From 8197f489f4c4398391746a377c10501076b05168 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 6 Nov 2017 10:38:23 -0800 Subject: spi: imx: Fix failure path leak on GPIO request error correctly In commit 974488e4ce1e ("spi: imx: Fix failure path leak on GPIO request error"), spi_bitbang_start() was moved later in the probe sequence. But this doesn't work, as spi_bitbang_start() has to be called before requesting GPIOs because the GPIO data in the spi master is populated when the master is registed, and that doesn't happen until spi_bitbang_start() is called. The default only works if one uses one CS. So add a failure path call to spi_bitbang_stop() to fix the leak. CC: Shawn Guo CC: Sascha Hauer CC: Fabio Estevam CC: Mark Brown CC: Oleksij Rempel Signed-off-by: Trent Piepho Reviewed-by: Oleksij Rempel Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 5ddd32ba2521..301cdb721bad 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1613,6 +1613,11 @@ static int spi_imx_probe(struct platform_device *pdev) spi_imx->devtype_data->intctrl(spi_imx, 0); master->dev.of_node = pdev->dev.of_node; + ret = spi_bitbang_start(&spi_imx->bitbang); + if (ret) { + dev_err(&pdev->dev, "bitbang start failed with %d\n", ret); + goto out_clk_put; + } if (!spi_imx->slave_mode) { if (!master->cs_gpios) { @@ -1631,23 +1636,19 @@ static int spi_imx_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Can't get CS GPIO %i\n", master->cs_gpios[i]); - goto out_clk_put; + goto out_spi_bitbang; } } } - ret = spi_bitbang_start(&spi_imx->bitbang); - if (ret) { - dev_err(&pdev->dev, "bitbang start failed with %d\n", ret); - goto out_clk_put; - } - dev_info(&pdev->dev, "probed\n"); clk_disable(spi_imx->clk_ipg); clk_disable(spi_imx->clk_per); return ret; +out_spi_bitbang: + spi_bitbang_stop(&spi_imx->bitbang); out_clk_put: clk_disable_unprepare(spi_imx->clk_ipg); out_put_per: -- cgit v1.2.3 From fc9cab05837639ce3372870b09d26334fb15b157 Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Tue, 7 Nov 2017 12:31:14 +0800 Subject: ASoC: rt5514: The DSP clock can be calibrated by the other clock source Add the option for the DSP clock that can be calibrated by the other clock source. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- include/sound/rt5514.h | 2 + sound/soc/codecs/rt5514-spi.c | 1 + sound/soc/codecs/rt5514.c | 85 +++++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/rt5514.h | 5 ++- 4 files changed, 92 insertions(+), 1 deletion(-) diff --git a/include/sound/rt5514.h b/include/sound/rt5514.h index ef18494769ee..64d027dbaaca 100644 --- a/include/sound/rt5514.h +++ b/include/sound/rt5514.h @@ -14,6 +14,8 @@ struct rt5514_platform_data { unsigned int dmic_init_delay; + const char *dsp_calib_clk_name; + unsigned int dsp_calib_clk_rate; }; #endif diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 12f2ecf3a4fe..b90d6d5d7ff8 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -370,6 +370,7 @@ int rt5514_spi_burst_read(unsigned int addr, u8 *rxbuf, size_t len) return true; } +EXPORT_SYMBOL_GPL(rt5514_spi_burst_read); /** * rt5514_spi_burst_write - Write data to SPI by rt5514 address. diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2a5b5d74e697..61ccbc62125b 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -295,6 +295,33 @@ static int rt5514_dsp_voice_wake_up_get(struct snd_kcontrol *kcontrol, return 0; } +static int rt5514_calibration(struct rt5514_priv *rt5514, bool on) +{ + if (on) { + regmap_write(rt5514->regmap, RT5514_ANA_CTRL_PLL3, 0x0000000a); + regmap_update_bits(rt5514->regmap, RT5514_PLL_SOURCE_CTRL, 0xf, + 0xa); + regmap_update_bits(rt5514->regmap, RT5514_PWR_ANA1, 0x301, + 0x301); + regmap_write(rt5514->regmap, RT5514_PLL3_CALIB_CTRL4, + 0x80000000 | rt5514->pll3_cal_value); + regmap_write(rt5514->regmap, RT5514_PLL3_CALIB_CTRL1, + 0x8bb80800); + regmap_update_bits(rt5514->regmap, RT5514_PLL3_CALIB_CTRL5, + 0xc0000000, 0x80000000); + regmap_update_bits(rt5514->regmap, RT5514_PLL3_CALIB_CTRL5, + 0xc0000000, 0xc0000000); + } else { + regmap_update_bits(rt5514->regmap, RT5514_PLL3_CALIB_CTRL5, + 0xc0000000, 0x40000000); + regmap_update_bits(rt5514->regmap, RT5514_PWR_ANA1, 0x301, 0); + regmap_update_bits(rt5514->regmap, RT5514_PLL_SOURCE_CTRL, 0xf, + 0x4); + } + + return 0; +} + static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -302,6 +329,7 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, struct rt5514_priv *rt5514 = snd_soc_component_get_drvdata(component); struct snd_soc_codec *codec = rt5514->codec; const struct firmware *fw = NULL; + u8 buf[8]; if (ucontrol->value.integer.value[0] == rt5514->dsp_enabled) return 0; @@ -310,6 +338,35 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, rt5514->dsp_enabled = ucontrol->value.integer.value[0]; if (rt5514->dsp_enabled) { + if (rt5514->pdata.dsp_calib_clk_name && + !IS_ERR(rt5514->dsp_calib_clk)) { + if (clk_set_rate(rt5514->dsp_calib_clk, + rt5514->pdata.dsp_calib_clk_rate)) + dev_err(codec->dev, + "Can't set rate for mclk"); + + if (clk_prepare_enable(rt5514->dsp_calib_clk)) + dev_err(codec->dev, + "Can't enable dsp_calib_clk"); + + rt5514_calibration(rt5514, true); + + msleep(20); +#if IS_ENABLED(CONFIG_SND_SOC_RT5514_SPI) + rt5514_spi_burst_read(RT5514_PLL3_CALIB_CTRL6 | + RT5514_DSP_MAPPING, + (u8 *)&buf, sizeof(buf)); +#else + dev_err(codec->dev, "There is no SPI driver for" + " loading the firmware\n"); +#endif + rt5514->pll3_cal_value = buf[0] | buf[1] << 8 | + buf[2] << 16 | buf[3] << 24; + + rt5514_calibration(rt5514, false); + clk_disable_unprepare(rt5514->dsp_calib_clk); + } + rt5514_enable_dsp_prepare(rt5514); request_firmware(&fw, RT5514_FIRMWARE1, codec->dev); @@ -341,6 +398,20 @@ static int rt5514_dsp_voice_wake_up_put(struct snd_kcontrol *kcontrol, /* DSP run */ regmap_write(rt5514->i2c_regmap, 0x18002f00, 0x00055148); + + if (rt5514->pdata.dsp_calib_clk_name && + !IS_ERR(rt5514->dsp_calib_clk)) { + msleep(20); + + regmap_write(rt5514->i2c_regmap, 0x1800211c, + rt5514->pll3_cal_value); + regmap_write(rt5514->i2c_regmap, 0x18002124, + 0x00220012); + regmap_write(rt5514->i2c_regmap, 0x18002124, + 0x80220042); + regmap_write(rt5514->i2c_regmap, 0x18002124, + 0xe0220042); + } } else { regmap_multi_reg_write(rt5514->i2c_regmap, rt5514_i2c_patch, ARRAY_SIZE(rt5514_i2c_patch)); @@ -1024,12 +1095,22 @@ static int rt5514_set_bias_level(struct snd_soc_codec *codec, static int rt5514_probe(struct snd_soc_codec *codec) { struct rt5514_priv *rt5514 = snd_soc_codec_get_drvdata(codec); + struct platform_device *pdev = container_of(codec->dev, + struct platform_device, dev); rt5514->mclk = devm_clk_get(codec->dev, "mclk"); if (PTR_ERR(rt5514->mclk) == -EPROBE_DEFER) return -EPROBE_DEFER; + if (rt5514->pdata.dsp_calib_clk_name) { + rt5514->dsp_calib_clk = devm_clk_get(&pdev->dev, + rt5514->pdata.dsp_calib_clk_name); + if (PTR_ERR(rt5514->dsp_calib_clk) == -EPROBE_DEFER) + return -EPROBE_DEFER; + } + rt5514->codec = codec; + rt5514->pll3_cal_value = 0x0078b000; return 0; } @@ -1147,6 +1228,10 @@ static int rt5514_parse_dp(struct rt5514_priv *rt5514, struct device *dev) { device_property_read_u32(dev, "realtek,dmic-init-delay-ms", &rt5514->pdata.dmic_init_delay); + device_property_read_string(dev, "realtek,dsp-calib-clk-name", + &rt5514->pdata.dsp_calib_clk_name); + device_property_read_u32(dev, "realtek,dsp-calib-clk-rate", + &rt5514->pdata.dsp_calib_clk_rate); return 0; } diff --git a/sound/soc/codecs/rt5514.h b/sound/soc/codecs/rt5514.h index 2dc40e6d8b3f..f0f3400ce6b1 100644 --- a/sound/soc/codecs/rt5514.h +++ b/sound/soc/codecs/rt5514.h @@ -34,7 +34,9 @@ #define RT5514_CLK_CTRL1 0x2104 #define RT5514_CLK_CTRL2 0x2108 #define RT5514_PLL3_CALIB_CTRL1 0x2110 +#define RT5514_PLL3_CALIB_CTRL4 0x2120 #define RT5514_PLL3_CALIB_CTRL5 0x2124 +#define RT5514_PLL3_CALIB_CTRL6 0x2128 #define RT5514_DELAY_BUF_CTRL1 0x2140 #define RT5514_DELAY_BUF_CTRL3 0x2148 #define RT5514_ASRC_IN_CTRL1 0x2180 @@ -272,7 +274,7 @@ struct rt5514_priv { struct rt5514_platform_data pdata; struct snd_soc_codec *codec; struct regmap *i2c_regmap, *regmap; - struct clk *mclk; + struct clk *mclk, *dsp_calib_clk; int sysclk; int sysclk_src; int lrck; @@ -281,6 +283,7 @@ struct rt5514_priv { int pll_in; int pll_out; int dsp_enabled; + unsigned int pll3_cal_value; }; #endif /* __RT5514_H__ */ -- cgit v1.2.3 From e4d0db60e8d25cc62b9b7e32c18e7f6acc136055 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 7 Nov 2017 15:23:17 +0800 Subject: ASoC: nau8540: reset state machine for channel phase sync The four channel ADCs in NAU85L40 have difference control registers, it is hard to synchronous these four channels without correct sequence. The phase difference will not be a constant and not to conjecture easily. It may be 2.55 degree, or more ,or less. Intended to prevent phase difference of channels, the solution as follows: (1)Channel_Sync need to be enabled. (2)Do soft reset without affecting register when recording done. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8540.c | 23 ++++++++++++++++++++--- sound/soc/codecs/nau8540.h | 1 + 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index f9c9933acffb..c10cbffa6314 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -233,6 +233,19 @@ static SOC_ENUM_SINGLE_DECL( static const struct snd_kcontrol_new digital_ch1_mux = SOC_DAPM_ENUM("Digital CH1 Select", digital_ch1_enum); +static int aiftx_power_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + + if (SND_SOC_DAPM_EVENT_OFF(event)) { + regmap_write(nau8540->regmap, NAU8540_REG_RST, 0x0001); + regmap_write(nau8540->regmap, NAU8540_REG_RST, 0x0000); + } + return 0; +} + static const struct snd_soc_dapm_widget nau8540_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("MICBIAS2", NAU8540_REG_MIC_BIAS, 11, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("MICBIAS1", NAU8540_REG_MIC_BIAS, 10, 0, NULL, 0), @@ -270,7 +283,8 @@ static const struct snd_soc_dapm_widget nau8540_dapm_widgets[] = { SND_SOC_DAPM_MUX("Digital CH1 Mux", SND_SOC_NOPM, 0, 0, &digital_ch1_mux), - SND_SOC_DAPM_AIF_OUT("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT_E("AIFTX", "Capture", 0, SND_SOC_NOPM, 0, 0, + aiftx_power_control, SND_SOC_DAPM_POST_PMD), }; static const struct snd_soc_dapm_route nau8540_dapm_routes[] = { @@ -710,9 +724,12 @@ static void nau8540_init_regs(struct nau8540 *nau8540) regmap_update_bits(regmap, NAU8540_REG_CLOCK_CTRL, NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN, NAU8540_CLK_ADC_EN | NAU8540_CLK_I2S_EN); - /* ADC OSR selection, CLK_ADC = Fs * OSR */ + /* ADC OSR selection, CLK_ADC = Fs * OSR; + * Channel time alignment enable. + */ regmap_update_bits(regmap, NAU8540_REG_ADC_SAMPLE_RATE, - NAU8540_ADC_OSR_MASK, NAU8540_ADC_OSR_64); + NAU8540_CH_SYNC | NAU8540_ADC_OSR_MASK, + NAU8540_CH_SYNC | NAU8540_ADC_OSR_64); } static int __maybe_unused nau8540_suspend(struct snd_soc_codec *codec) diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index 5db5b224944d..14339f9bb01a 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -165,6 +165,7 @@ #define NAU8540_TDM_TX_MASK 0xf /* ADC_SAMPLE_RATE (0x3A) */ +#define NAU8540_CH_SYNC (0x1 << 14) #define NAU8540_ADC_OSR_MASK 0x3 #define NAU8540_ADC_OSR_256 0x3 #define NAU8540_ADC_OSR_128 0x2 -- cgit v1.2.3 From 14323ff8c21825e20810e893312f9321f9e4e72c Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 7 Nov 2017 15:23:18 +0800 Subject: ASoC: nau8540: PGA short to ground Change channel PGA input mode selection for better recording quality. The patch shorts the inputs to ground with 12kOhm differentially terminated. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8540.c | 7 +++++++ sound/soc/codecs/nau8540.h | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index c10cbffa6314..8246486a588d 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -730,6 +730,13 @@ static void nau8540_init_regs(struct nau8540 *nau8540) regmap_update_bits(regmap, NAU8540_REG_ADC_SAMPLE_RATE, NAU8540_CH_SYNC | NAU8540_ADC_OSR_MASK, NAU8540_CH_SYNC | NAU8540_ADC_OSR_64); + /* PGA input mode selection */ + regmap_update_bits(regmap, NAU8540_REG_FEPGA1, + NAU8540_FEPGA1_MODCH2_SHT | NAU8540_FEPGA1_MODCH1_SHT, + NAU8540_FEPGA1_MODCH2_SHT | NAU8540_FEPGA1_MODCH1_SHT); + regmap_update_bits(regmap, NAU8540_REG_FEPGA2, + NAU8540_FEPGA2_MODCH4_SHT | NAU8540_FEPGA2_MODCH3_SHT, + NAU8540_FEPGA2_MODCH4_SHT | NAU8540_FEPGA2_MODCH3_SHT); } static int __maybe_unused nau8540_suspend(struct snd_soc_codec *codec) diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index 14339f9bb01a..7083d4821ce4 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -184,6 +184,18 @@ #define NAU8540_PRECHARGE_DIS (0x1 << 13) #define NAU8540_GLOBAL_BIAS_EN (0x1 << 12) +/* FEPGA1 (0x69) */ +#define NAU8540_FEPGA1_MODCH2_SHT_SFT 7 +#define NAU8540_FEPGA1_MODCH2_SHT (0x1 << NAU8540_FEPGA1_MODCH2_SHT_SFT) +#define NAU8540_FEPGA1_MODCH1_SHT_SFT 3 +#define NAU8540_FEPGA1_MODCH1_SHT (0x1 << NAU8540_FEPGA1_MODCH1_SHT_SFT) + +/* FEPGA2 (0x6A) */ +#define NAU8540_FEPGA2_MODCH4_SHT_SFT 7 +#define NAU8540_FEPGA2_MODCH4_SHT (0x1 << NAU8540_FEPGA2_MODCH4_SHT_SFT) +#define NAU8540_FEPGA2_MODCH3_SHT_SFT 3 +#define NAU8540_FEPGA2_MODCH3_SHT (0x1 << NAU8540_FEPGA2_MODCH3_SHT_SFT) + /* System Clock Source */ enum { -- cgit v1.2.3 From 6573c0510be611fb886d005b7b2321658dc5df87 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 7 Nov 2017 15:23:19 +0800 Subject: ASoC: nau8540: fix the record pop noise When the record starts, the driver turns on MICBIAS and the voltage is pulled up for an instant. If the receiver starts to capture the signal between the instant, there is an pop noise in the stream beginning. To avoid the pop noise, the driver makes a delay in the sequence. After MICBIAS powered up, the driver waits 300 ms for the voltage going down. Then turns on the ADC output, and sends signal to receiver. The pop noise can be erased. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8540.c | 47 ++++++++++++++++++++++++++++++++++++++-------- sound/soc/codecs/nau8540.h | 2 ++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index 8246486a588d..9565f9a181a9 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -233,6 +233,28 @@ static SOC_ENUM_SINGLE_DECL( static const struct snd_kcontrol_new digital_ch1_mux = SOC_DAPM_ENUM("Digital CH1 Select", digital_ch1_enum); +static int adc_power_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct nau8540 *nau8540 = snd_soc_codec_get_drvdata(codec); + + if (SND_SOC_DAPM_EVENT_ON(event)) { + msleep(300); + /* DO12 and DO34 pad output enable */ + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, + NAU8540_I2S_DO12_TRI, 0); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, + NAU8540_I2S_DO34_TRI, 0); + } else if (SND_SOC_DAPM_EVENT_OFF(event)) { + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL1, + NAU8540_I2S_DO12_TRI, NAU8540_I2S_DO12_TRI); + regmap_update_bits(nau8540->regmap, NAU8540_REG_PCM_CTRL2, + NAU8540_I2S_DO34_TRI, NAU8540_I2S_DO34_TRI); + } + return 0; +} + static int aiftx_power_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *k, int event) { @@ -260,14 +282,18 @@ static const struct snd_soc_dapm_widget nau8540_dapm_widgets[] = { SND_SOC_DAPM_PGA("Frontend PGA3", NAU8540_REG_PWR, 14, 0, NULL, 0), SND_SOC_DAPM_PGA("Frontend PGA4", NAU8540_REG_PWR, 15, 0, NULL, 0), - SND_SOC_DAPM_ADC("ADC1", NULL, - NAU8540_REG_POWER_MANAGEMENT, 0, 0), - SND_SOC_DAPM_ADC("ADC2", NULL, - NAU8540_REG_POWER_MANAGEMENT, 1, 0), - SND_SOC_DAPM_ADC("ADC3", NULL, - NAU8540_REG_POWER_MANAGEMENT, 2, 0), - SND_SOC_DAPM_ADC("ADC4", NULL, - NAU8540_REG_POWER_MANAGEMENT, 3, 0), + SND_SOC_DAPM_ADC_E("ADC1", NULL, + NAU8540_REG_POWER_MANAGEMENT, 0, 0, adc_power_control, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_ADC_E("ADC2", NULL, + NAU8540_REG_POWER_MANAGEMENT, 1, 0, adc_power_control, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_ADC_E("ADC3", NULL, + NAU8540_REG_POWER_MANAGEMENT, 2, 0, adc_power_control, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_ADC_E("ADC4", NULL, + NAU8540_REG_POWER_MANAGEMENT, 3, 0, adc_power_control, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA("ADC CH1", NAU8540_REG_ANALOG_PWR, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("ADC CH2", NAU8540_REG_ANALOG_PWR, 1, 0, NULL, 0), @@ -737,6 +763,11 @@ static void nau8540_init_regs(struct nau8540 *nau8540) regmap_update_bits(regmap, NAU8540_REG_FEPGA2, NAU8540_FEPGA2_MODCH4_SHT | NAU8540_FEPGA2_MODCH3_SHT, NAU8540_FEPGA2_MODCH4_SHT | NAU8540_FEPGA2_MODCH3_SHT); + /* DO12 and DO34 pad output disable */ + regmap_update_bits(regmap, NAU8540_REG_PCM_CTRL1, + NAU8540_I2S_DO12_TRI, NAU8540_I2S_DO12_TRI); + regmap_update_bits(regmap, NAU8540_REG_PCM_CTRL2, + NAU8540_I2S_DO34_TRI, NAU8540_I2S_DO34_TRI); } static int __maybe_unused nau8540_suspend(struct snd_soc_codec *codec) diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index 7083d4821ce4..dceb04b23c19 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -146,6 +146,7 @@ #define NAU8540_I2S_DF_PCM_AB 0x3 /* PCM_CTRL1 (0x11) */ +#define NAU8540_I2S_DO12_TRI (0x1 << 15) #define NAU8540_I2S_LRC_DIV_SFT 12 #define NAU8540_I2S_LRC_DIV_MASK (0x3 << NAU8540_I2S_LRC_DIV_SFT) #define NAU8540_I2S_DO12_OE (0x1 << 4) @@ -156,6 +157,7 @@ #define NAU8540_I2S_BLK_DIV_MASK 0x7 /* PCM_CTRL1 (0x12) */ +#define NAU8540_I2S_DO34_TRI (0x1 << 15) #define NAU8540_I2S_DO34_OE (0x1 << 11) #define NAU8540_I2S_TSLOT_L_MASK 0x3ff -- cgit v1.2.3 From cf6b68d192138d67b49002b499eb507af0c8c56d Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 7 Nov 2017 17:06:32 +0800 Subject: ASoC: nau8824: move key irq after jd done It is possible to get the fake key press interruption when the codec do jack detection. We think it's proper to move the key interruption configuration after jack detection done. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 0240759f951c..b7b63ac037a7 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -843,6 +843,11 @@ static void nau8824_jdet_work(struct work_struct *work) event_mask |= SND_JACK_HEADSET; snd_soc_jack_report(nau8824->jack, event, event_mask); + /* Enable short key press and release interruption. */ + regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING, + NAU8824_IRQ_KEY_RELEASE_DIS | + NAU8824_IRQ_KEY_SHORT_PRESS_DIS, 0); + nau8824_sema_release(nau8824); } @@ -850,13 +855,12 @@ static void nau8824_setup_auto_irq(struct nau8824 *nau8824) { struct regmap *regmap = nau8824->regmap; - /* Enable jack ejection, short key press and release interruption. */ + /* Enable jack ejection interruption. */ regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING_1, NAU8824_IRQ_INSERT_EN | NAU8824_IRQ_EJECT_EN, NAU8824_IRQ_EJECT_EN); regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING, - NAU8824_IRQ_EJECT_DIS | NAU8824_IRQ_KEY_RELEASE_DIS | - NAU8824_IRQ_KEY_SHORT_PRESS_DIS, 0); + NAU8824_IRQ_EJECT_DIS, 0); /* Enable internal VCO needed for interruptions */ nau8824_config_sysclk(nau8824, NAU8824_CLK_INTERNAL, 0); regmap_update_bits(regmap, NAU8824_REG_ENA_CTRL, -- cgit v1.2.3 From a2eb62edbd01064cee0c4c00854a25f04237605b Mon Sep 17 00:00:00 2001 From: John Hsu Date: Tue, 7 Nov 2017 17:06:33 +0800 Subject: ASoC: nau8824: condition for clock disable There are headphone and speaker outputs in NAU88L24. During the playback, the codec should not change the clock status when switching these outputs. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index b7b63ac037a7..8a9a9939827e 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -811,7 +811,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824) NAU8824_JD_SLEEP_MODE, NAU8824_JD_SLEEP_MODE); /* Close clock for jack type detection at manual mode */ - nau8824_config_sysclk(nau8824, NAU8824_CLK_DIS, 0); + if (dapm->bias_level < SND_SOC_BIAS_PREPARE) + nau8824_config_sysclk(nau8824, NAU8824_CLK_DIS, 0); } static void nau8824_jdet_work(struct work_struct *work) @@ -862,7 +863,8 @@ static void nau8824_setup_auto_irq(struct nau8824 *nau8824) regmap_update_bits(regmap, NAU8824_REG_INTERRUPT_SETTING, NAU8824_IRQ_EJECT_DIS, 0); /* Enable internal VCO needed for interruptions */ - nau8824_config_sysclk(nau8824, NAU8824_CLK_INTERNAL, 0); + if (nau8824->dapm->bias_level < SND_SOC_BIAS_PREPARE) + nau8824_config_sysclk(nau8824, NAU8824_CLK_INTERNAL, 0); regmap_update_bits(regmap, NAU8824_REG_ENA_CTRL, NAU8824_JD_SLEEP_MODE, 0); } -- cgit v1.2.3 From cc20c4df1627dd515ea90dd20e2684a8a1c76693 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:30:36 +0100 Subject: ASoC: intel: initialize return value properly When CONFIG_SND_SOC_HDAC_HDMI is disabled, we can run into an uninitialized variable: sound/soc/intel/skylake/skl.c: In function 'skl_resume': sound/soc/intel/skylake/skl.c:326:6: error: 'ret' may be used uninitialized in this function [-Werror=maybe-uninitialized] I have run into this on today's linux-next kernel, but it appears that this is an older problem that was just hard to trigger with randconfig builds as CONFIG_SND_SOC_HDAC_HDMI would in effect be impossible to disable when having SND_SOC_INTEL_SKYLAKE enabled. Signed-off-by: Arnd Bergmann Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 31d8634e8aa1..acb0ab470ca6 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -355,6 +355,7 @@ static int skl_resume(struct device *dev) if (ebus->cmd_dma_state) snd_hdac_bus_init_cmd_io(&ebus->bus); + ret = 0; } else { ret = _skl_resume(ebus); -- cgit v1.2.3 From a76d7f5454c688b52dc849e832cc4c6dd0975723 Mon Sep 17 00:00:00 2001 From: Akshu Agrawal Date: Fri, 3 Nov 2017 16:35:44 -0400 Subject: ASoC: AMD: Make the driver name consistent across files This fixes the issue of driver not getting auto loaded with MODULE_ALIAS. find /sys/devices -name modalias -print0 | xargs -0 grep 'audio' /sys/devices/pci0000:00/0000:00:01.0/acp_audio_dma.0.auto/modalias:platform:acp_audio_dma BUG=b:62103837 TEST=boot and check for device in lsmod Signed-off-by: Akshu Agrawal Reviewed-on: https://chromium-review.googlesource.com/678278 Tested-by: Jason Clinton Reviewed-by: Jason Clinton Signed-off-by: Alex Deucher Signed-off-by: Mark Brown --- sound/soc/amd/Makefile | 4 ++-- sound/soc/amd/acp-pcm-dma.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/amd/Makefile b/sound/soc/amd/Makefile index eed64ff6c73e..f07fd2e2870a 100644 --- a/sound/soc/amd/Makefile +++ b/sound/soc/amd/Makefile @@ -1,5 +1,5 @@ -snd-soc-acp-pcm-objs := acp-pcm-dma.o +acp_audio_dma-objs := acp-pcm-dma.o snd-soc-acp-rt5645-mach-objs := acp-rt5645.o -obj-$(CONFIG_SND_SOC_AMD_ACP) += snd-soc-acp-pcm.o +obj-$(CONFIG_SND_SOC_AMD_ACP) += acp_audio_dma.o obj-$(CONFIG_SND_SOC_AMD_CZ_RT5645_MACH) += snd-soc-acp-rt5645-mach.o diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 73b58ee00383..95c61ecdd1dd 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -40,6 +40,8 @@ #define ST_MAX_BUFFER (ST_PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS) #define ST_MIN_BUFFER ST_MAX_BUFFER +#define DRV_NAME "acp_audio_dma" + static const struct snd_pcm_hardware acp_pcm_hardware_playback = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | @@ -1170,7 +1172,7 @@ static struct platform_driver acp_dma_driver = { .probe = acp_audio_probe, .remove = acp_audio_remove, .driver = { - .name = "acp_audio_dma", + .name = DRV_NAME, .pm = &acp_pm_ops, }, }; @@ -1181,4 +1183,4 @@ MODULE_AUTHOR("Vijendar.Mukunda@amd.com"); MODULE_AUTHOR("Maruthi.Bayyavarapu@amd.com"); MODULE_DESCRIPTION("AMD ACP PCM Driver"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:acp-dma-audio"); +MODULE_ALIAS("platform:"DRV_NAME); -- cgit v1.2.3 From 640d5175a671cd0df0b9e3b5935dc80fc5248973 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:13:29 -0300 Subject: perf evlist: Set the correct idx when adding dummy events The evsel->idx field is used mainly to access the right bucket in per-event arrays such as the annotation ones, but also to set evsel->tracking, that in turn will decide what of the events will ask for PERF_RECORD_{MMAP,COMM,EXEC} to be generated, i.e. which perf_event_attr will have its mmap, etc fields set. When we were adding the "dummy" event using perf_evlist__add_dummy() we were not setting it correctly, which could result in multiple tracking events. Now that I'll try using a dummy event to be the tracking one when using 'perf record --delay', i.e. when we process the --delay setting we may already have the evlist set up, like with: perf record -e cycles,instructions --delay 1000 ./workload We will need to add a "dummy" event, then reset evsel->tracking for the first event, "cycles", and set it instead to the dummy one, and also setting its attr.enable_on_exec, so that we get the PERF_RECORD_MMAP, etc metadata events while waiting to enable the explicitely requested events, so lets get this straight and set the right evsel->idx. Cc: Adrian Hunter Cc: Bram Stolk Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6c891e154a6..ccb749f9a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -257,7 +257,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct perf_evsel *evsel = perf_evsel__new(&attr); + struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); if (evsel == NULL) return -ENOMEM; -- cgit v1.2.3 From d3dbf43c56f9176be325ce1cc72a44c8d3c210dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:34:34 -0300 Subject: perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay When we use an initial delay, e.g.: 'perf record --delay 1000', we do not enable the events until that delay has passed after we started the workload, including the tracking event, i.e. the one for which we have attr.mmap, etc, enabled to ask the kernel to generate the PERF_RECORD_{MMAP,COMM,EXEC} metadata events that will then allow us to resolve addresses in samples to the map, dso and symbol. There will be a shadow that even synthesizing samples won't cover, i.e. the workload that we start and other processes forking while we wait for the initial delay to expire. So use a dummy event to be the tracking one and make it be enabled on exec. Before: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9029] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9029] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.624 MB perf.data (15908 samples) ] # perf script | head :9031 9031 32001.826888: 1 cycles:ppp: ffffffff831aa30d event_function (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826893: 1 cycles:ppp: ffffffff8300d1a0 intel_bts_enable_local (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826895: 7 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826897: 103 cycles:ppp: ffffffff8300c331 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826899: 1615 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826902: 26724 cycles:ppp: ffffffff8384c6a7 native_irq_return_iret (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826913: 329739 cycles:ppp: 7fb2a5410932 [unknown] ([unknown]) :9031 9031 32001.827033: 1225451 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827474: 1391725 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827978: 1233697 cycles:ppp: 7fb2a5410928 [unknown] ([unknown]) # After: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9741] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9741] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.751 MB perf.data (15976 samples) ] # perf script | head stress 9742 32110.959106: 1 cycles:ppp: ffffffff831b26f6 __perf_event_task_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959110: 1 cycles:ppp: ffffffff8300c2e9 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959112: 7 cycles:ppp: ffffffff830231e0 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959115: 101 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959117: 1533 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959119: 23992 cycles:ppp: ffffffff831b0900 ctx_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959129: 329406 cycles:ppp: 7f4b1b661930 __random_r (/usr/lib64/libc-2.25.so) stress 9742 32110.959249: 1288322 cycles:ppp: 5566e1e7cbc9 hogcpu (/usr/bin/stress) stress 9742 32110.959712: 1464046 cycles:ppp: 7f4b1b66179e __random (/usr/lib64/libc-2.25.so) stress 9742 32110.960241: 1266918 cycles:ppp: 7f4b1b66195b __random_r (/usr/lib64/libc-2.25.so) # Reported-by: Bram Stolk Tested-by: Bram Stolk Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 6619a53ef757 ("perf record: Add --initial-delay option") Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3d7f33e19df2..5f78ce943407 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -339,6 +339,22 @@ static int record__open(struct record *rec) struct perf_evsel_config_term *err_term; int rc = 0; + /* + * For initial_delay we need to add a dummy event so that we can track + * PERF_RECORD_MMAP while we wait for the initial delay to enable the + * real events, the ones asked by the user. + */ + if (opts->initial_delay) { + if (perf_evlist__add_dummy(evlist)) + return -ENOMEM; + + pos = perf_evlist__first(evlist); + pos->tracking = 0; + pos = perf_evlist__last(evlist); + pos->tracking = 1; + pos->attr.enable_on_exec = 1; + } + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, pos) { -- cgit v1.2.3 From a17c4ca0ddef659d33fb6661995bd74e1a6a6101 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:25 +0200 Subject: perf annotate: Add annotation_line struct In order to make the annotation support generic, addadding 'struct annotation_line', which will hold generic data common to annotation sources (such as the one for python scripts, coming on upcoming patches). Having this, we can add different annotation line support other than objdump disasm. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 34 +++++++++++++++++----------------- tools/perf/ui/gtk/annotate.c | 6 +++--- tools/perf/util/annotate.c | 20 ++++++++++---------- tools/perf/util/annotate.h | 20 ++++++++++++-------- 4 files changed, 42 insertions(+), 38 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f7f59d1a2b5..a8c2f7405a41 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,7 +84,7 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); return dl->offset == -1; } @@ -123,7 +123,7 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); struct browser_disasm_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && @@ -286,7 +286,7 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { - struct disasm_line *pos = list_prev_entry(cursor, node); + struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; if (!pos) @@ -404,16 +404,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->node.prev, struct disasm_line, node); + pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); - if (disasm_line__filter(&browser->b, &pos->node)) + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --browser->b.top_idx; --back; } - browser->b.top = pos; + browser->b.top = &pos->al; browser->b.navkeypressed = true; } @@ -446,7 +446,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, pthread_mutex_lock(¬es->lock); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); const char *path = NULL; double max_percent = 0.0; @@ -492,7 +492,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, node); + dl = list_entry(browser->b.top, struct disasm_line, al.node); bdl = disasm_line__browser(dl); if (annotate_browser__opts.hide_src_code) { @@ -589,10 +589,10 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows struct disasm_line *pos; *idx = 0; - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->offset == offset) return pos; - if (!disasm_line__filter(&browser->b, &pos->node)) + if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; } @@ -630,8 +630,8 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; ++*idx; @@ -669,8 +669,8 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --*idx; @@ -1134,7 +1134,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; size_t line_len = strlen(pos->line); @@ -1174,8 +1174,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index fc7a2e105bfd..cf8092676c7a 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -119,7 +119,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); g_object_unref(GTK_TREE_MODEL(store)); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { GtkTreeIter iter; int ret = 0; @@ -148,8 +148,8 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_container_add(GTK_CONTAINER(window), view); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index da1c4c4a0dd8..004e33dc897c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -931,12 +931,12 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r static void disasm__add(struct list_head *head, struct disasm_line *line) { - list_add_tail(&line->node, head); + list_add_tail(&line->al.node, head); } struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { - list_for_each_entry_continue(pos, head, node) + list_for_each_entry_continue(pos, head, al.node) if (pos->offset >= 0) return pos; @@ -1122,7 +1122,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, node) { + list_for_each_entry_from(queue, ¬es->src->source, al.node) { if (queue == dl) break; disasm_line__print(queue, sym, start, evsel, len, @@ -1305,7 +1305,7 @@ static void delete_last_nop(struct symbol *sym) struct disasm_line *dl; while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, node); + dl = list_entry(list->prev, struct disasm_line, al.node); if (dl->ins.ops) { if (dl->ins.ops != &nop_ops) @@ -1317,7 +1317,7 @@ static void delete_last_nop(struct symbol *sym) return; } - list_del(&dl->node); + list_del(&dl->al.node); disasm_line__free(dl); } } @@ -1844,7 +1844,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (context && queue == NULL) { queue = pos; queue_len = 0; @@ -1874,7 +1874,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->node.next, typeof(*queue), node); + queue = list_entry(queue->al.node.next, typeof(*queue), al.node); else ++queue_len; break; @@ -1911,8 +1911,8 @@ void disasm__purge(struct list_head *head) { struct disasm_line *pos, *n; - list_for_each_entry_safe(pos, n, head, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, head, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } } @@ -1939,7 +1939,7 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) struct disasm_line *pos; size_t printed = 0; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, al.node) printed += disasm_line__fprintf(pos, fp); return printed; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f6ba3560de5e..cc3cf6b50d55 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,15 +59,19 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct annotation_line { + struct list_head node; +}; + struct disasm_line { - struct list_head node; - s64 offset; - char *line; - struct ins ins; - int line_nr; - float ipc; - u64 cycles; - struct ins_operands ops; + struct annotation_line al; + s64 offset; + char *line; + struct ins ins; + int line_nr; + float ipc; + u64 cycles; + struct ins_operands ops; }; static inline bool disasm_line__has_offset(const struct disasm_line *dl) -- cgit v1.2.3 From d5490b9647e6e41b203186ed0d73b4103f139fda Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:26 +0200 Subject: perf annotate: Move line/offset into annotation_line struct Move the line/line_nr/offset menbers to the annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 45 ++++++++++++++++++++------------------- tools/perf/ui/gtk/annotate.c | 14 ++++++------ tools/perf/util/annotate.c | 41 ++++++++++++++++++----------------- tools/perf/util/annotate.h | 6 +++--- 4 files changed, 54 insertions(+), 52 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index a8c2f7405a41..73d921c3e3ec 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,8 +84,9 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - return dl->offset == -1; + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + + return al->offset == -1; } return false; @@ -141,7 +142,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int percent_max = bdl->samples[i].percent; } - if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { if (dl->ipc == 0.0 && dl->cycles == 0) show_title = true; @@ -149,7 +150,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int show_title = true; } - if (dl->offset != -1 && percent_max != 0.0) { + if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, bdl->samples[i].percent, @@ -199,19 +200,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->line) + if (!*dl->al.line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->offset == -1) { - if (dl->line_nr && annotate_browser__opts.show_linenr) + else if (dl->al.offset == -1) { + if (dl->al.line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->line_nr); + ab->addr_width + 1, dl->al.line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->offset; + u64 addr = dl->al.offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -247,7 +248,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->offset; + bool fwd = dl->ops.target.offset > dl->al.offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -452,7 +453,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double max_percent = 0.0; int i; - if (pos->offset == -1) { + if (pos->al.offset == -1) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -464,8 +465,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, - pos->offset, - next ? next->offset : len, + pos->al.offset, + next ? next->al.offset : len, &path, &sample); bpos->samples[i].he = sample; @@ -590,7 +591,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows *idx = 0; list_for_each_entry(pos, ¬es->src->source, al.node) { - if (pos->offset == offset) + if (pos->al.offset == offset) return pos; if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; @@ -636,7 +637,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows ++*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -675,7 +676,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse --*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -901,7 +902,7 @@ show_help: case K_RIGHT: if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->offset == -1) + else if (browser->selection->al.offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); else if (!browser->selection->ins.ops) goto show_sup_ins; @@ -1136,13 +1137,13 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; - size_t line_len = strlen(pos->line); + size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) browser.b.width = line_len; bpos = disasm_line__browser(pos); bpos->idx = browser.nr_entries++; - if (pos->offset != -1) { + if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1151,8 +1152,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->offset < (s64)size) - browser.offsets[pos->offset] = pos; + if (pos->al.offset < (s64)size) + browser.offsets[pos->al.offset] = pos; } else bpos->idx_asm = -1; } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cf8092676c7a..162f15712d2d 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -31,14 +31,14 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples) + if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples) return 0; - percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples; + percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) @@ -57,16 +57,16 @@ static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; - return scnprintf(buf, size, "%"PRIx64, start + dl->offset); + return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset); } static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) { int ret = 0; - char *line = g_markup_escape_text(dl->line, -1); + char *line = g_markup_escape_text(dl->al.line, -1); const char *markup = ""; strcpy(buf, ""); @@ -74,7 +74,7 @@ static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) if (!line) return 0; - if (dl->offset != (s64) -1) + if (dl->al.offset != (s64) -1) markup = NULL; if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 004e33dc897c..e8b69001229d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -886,14 +886,15 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); if (dl != NULL) { - dl->offset = offset; - dl->line = strdup(line); - dl->line_nr = line_nr; - if (dl->line == NULL) + dl->al.offset = offset; + dl->al.line = strdup(line); + dl->al.line_nr = line_nr; + + if (dl->al.line == NULL) goto out_delete; if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, arch, map); @@ -903,7 +904,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, return dl; out_free_line: - zfree(&dl->line); + zfree(&dl->al.line); out_delete: free(dl); return NULL; @@ -911,7 +912,7 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->line); + zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else @@ -937,7 +938,7 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { list_for_each_entry_continue(pos, head, al.node) - if (pos->offset >= 0) + if (pos->al.offset >= 0) return pos; return NULL; @@ -1077,7 +1078,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_line; static const char *prev_color; - if (dl->offset != -1) { + if (dl->al.offset != -1) { const char *path = NULL; double percent, max_percent = 0.0; double *ppercents = &percent; @@ -1086,7 +1087,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->offset; + s64 offset = dl->al.offset; const u64 addr = start + offset; struct disasm_line *next; struct block_range *br; @@ -1106,7 +1107,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->offset : (s64) len, + next ? next->al.offset : (s64) len, &path, &sample); ppercents[i] = percent; @@ -1165,7 +1166,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st br = block_range__find(addr); color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); @@ -1186,10 +1187,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->line) + if (!*dl->al.line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->line); + printf(" %*s: %s\n", width, " ", dl->al.line); } return 0; @@ -1311,9 +1312,9 @@ static void delete_last_nop(struct symbol *sym) if (dl->ins.ops != &nop_ops) return; } else { - if (!strstr(dl->line, " nop ") && - !strstr(dl->line, " nopl ") && - !strstr(dl->line, " nopw ")) + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) return; } @@ -1921,10 +1922,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) { size_t printed; - if (dl->offset == -1) - return fprintf(fp, "%s\n", dl->line); + if (dl->al.offset == -1) + return fprintf(fp, "%s\n", dl->al.line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cc3cf6b50d55..b7ca62855760 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,14 +61,14 @@ struct annotation; struct annotation_line { struct list_head node; + s64 offset; + char *line; + int line_nr; }; struct disasm_line { struct annotation_line al; - s64 offset; - char *line; struct ins ins; - int line_nr; float ipc; u64 cycles; struct ins_operands ops; -- cgit v1.2.3 From 37236d5e0b6a765319dec3e64d828cb44ebecac6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:27 +0200 Subject: perf annotate: Move ipc/cycles into annotation_line struct Move ipc/cycles into annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- tools/perf/util/annotate.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 73d921c3e3ec..d1aff2f7cb6c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -144,7 +144,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->ipc == 0.0 && dl->cycles == 0) + if (dl->al.ipc == 0.0 && dl->al.cycles == 0) show_title = true; } else show_title = true; @@ -178,16 +178,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); + if (dl->al.ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->cycles) + if (dl->al.cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->cycles); + CYCLES_WIDTH - 1, dl->al.cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -474,7 +474,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01 && pos->ipc == 0) { + if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -994,7 +994,7 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, struct disasm_line *dl = browser->offsets[offset]; if (dl) - dl->ipc = ipc; + dl->al.ipc = ipc; } } } @@ -1025,7 +1025,7 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, count_and_fill(browser, ch->start, offset, ch); dl = browser->offsets[offset]; if (dl && ch->num_aggr) - dl->cycles = ch->cycles_aggr / ch->num_aggr; + dl->al.cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b7ca62855760..a822c0a4987e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -64,13 +64,13 @@ struct annotation_line { s64 offset; char *line; int line_nr; + float ipc; + u64 cycles; }; struct disasm_line { struct annotation_line al; struct ins ins; - float ipc; - u64 cycles; struct ins_operands ops; }; -- cgit v1.2.3 From c34df25b40c20b478634b954a709749aebdc241a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:28 +0200 Subject: perf annotate: Add symbol__annotate function Add symbol__annotate function to have generic annotation function to be called for all annotation sources. It calls the generic annotation init and then the specific annotation data retrieval function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 6 ++-- tools/perf/ui/gtk/annotate.c | 4 +-- tools/perf/util/annotate.c | 58 ++++++++++++++++++++++----------------- tools/perf/util/annotate.h | 6 ++-- 5 files changed, 42 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 477a8699f0b5..adfeeb488f1a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d1aff2f7cb6c..d77994c1cba9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,9 +1120,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - sizeof_bdl, &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + sizeof_bdl, &browser.arch, + perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 162f15712d2d..b498f1a92bb1 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8b69001229d..f0093918882d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1425,13 +1425,11 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct map *map, + size_t privsize, struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; - struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1445,25 +1443,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, if (err) return err; - arch_name = annotate__norm_arch(arch_name); - if (!arch_name) - return -1; - - arch = arch__find(arch_name); - if (arch == NULL) - return -ENOTSUP; - - if (parch) - *parch = arch; - - if (arch->init) { - err = arch->init(arch, cpuid); - if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); - return err; - } - } - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1581,6 +1560,35 @@ out_close_stdout: goto out_remove_tmp; } +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid) +{ + struct arch *arch; + int err; + + arch_name = annotate__norm_arch(arch_name); + if (!arch_name) + return -1; + + arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (parch) + *parch = arch; + + if (arch->init) { + err = arch->init(arch, cpuid); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + + return symbol__disassemble(sym, map, privsize, arch); +} + static void insert_source_line(struct rb_root *root, struct source_line *src_line) { struct source_line *iter; @@ -1954,8 +1962,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a822c0a4987e..e577f9d13a58 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -173,9 +173,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, -- cgit v1.2.3 From ea07c5aaed33d23875cd59da8b0892f76e882ccd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:29 +0200 Subject: perf annotate: Add struct annotate_args Adding struct annotate_args to reduce the number of arguments, that need to travel all the way to line allocation. This makes the code easier to read and ease up the changes for following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f0093918882d..f5bd6826fa66 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -878,12 +878,17 @@ out_free_name: return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, - size_t privsize, int line_nr, +struct annotate_args { + size_t privsize; +}; + +static struct disasm_line *disasm_line__new(struct annotate_args *args, + s64 offset, char *line, + int line_nr, struct arch *arch, struct map *map) { - struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { dl->al.offset = offset; @@ -1217,8 +1222,8 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, - FILE *file, size_t privsize, + struct arch *arch, FILE *file, + struct annotate_args *args, int *line_nr) { struct annotation *notes = symbol__annotation(sym); @@ -1264,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); free(line); (*line_nr)++; @@ -1426,7 +1431,8 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - size_t privsize, struct arch *arch) + struct annotate_args *args, + struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1526,7 +1532,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, privsize, + if (symbol__parse_objdump_line(sym, map, arch, file, args, &lineno) < 0) break; nline++; @@ -1564,6 +1570,9 @@ int symbol__annotate(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize, struct arch **parch, char *cpuid) { + struct annotate_args args = { + .privsize = privsize, + }; struct arch *arch; int err; @@ -1586,7 +1595,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, privsize, arch); + return symbol__disassemble(sym, map, &args, arch); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3 From 24fe7b88934b702442597662643222cd0a6a44a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:30 +0200 Subject: perf annotate: Add arch into struct annotate_args Add arch into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f5bd6826fa66..b4d3454618b0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -880,12 +880,12 @@ out_free_name: struct annotate_args { size_t privsize; + struct arch *arch; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, int line_nr, - struct arch *arch, struct map *map) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, arch, map); + disasm_line__init_ins(dl, args->arch, map); } } @@ -1222,7 +1222,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, FILE *file, + FILE *file, struct annotate_args *args, int *line_nr) { @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); free(line); (*line_nr)++; @@ -1431,8 +1431,7 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args, - struct arch *arch) + struct annotate_args *args) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1532,7 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, args, + if (symbol__parse_objdump_line(sym, map, file, args, &lineno) < 0) break; nline++; @@ -1580,7 +1579,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, if (!arch_name) return -1; - arch = arch__find(arch_name); + args.arch = arch = arch__find(arch_name); if (arch == NULL) return -ENOTSUP; @@ -1595,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args, arch); + return symbol__disassemble(sym, map, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3 From 1a04db70dcbf621f9919e95456c372281779c053 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:31 +0200 Subject: perf annotate: Add map into struct annotate_args Add map into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b4d3454618b0..30da4402a3e4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -881,12 +881,12 @@ out_free_name: struct annotate_args { size_t privsize; struct arch *arch; + struct map *map; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, - int line_nr, - struct map *map) + int line_nr) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, args->arch, map); + disasm_line__init_ins(dl, args->arch, args->map); } } @@ -1221,11 +1221,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, struct annotate_args *args, int *line_nr) { + struct map *map = args->map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr); free(line); (*line_nr)++; @@ -1430,9 +1430,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct map *map = args->map; struct dso *dso = map->dso; char command[PATH_MAX * 2]; FILE *file; @@ -1531,8 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, file, args, - &lineno) < 0) + if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) break; nline++; } @@ -1571,6 +1570,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, { struct annotate_args args = { .privsize = privsize, + .map = map, }; struct arch *arch; int err; @@ -1594,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args); + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3 From 4748834f96903f843719b02190f98e36b2c55192 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:32 +0200 Subject: perf annotate: Add offset/line/line_nr into struct annotate_args Add offset/line/line_nr into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30da4402a3e4..681c9c4ce9f9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,23 +882,24 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + s64 offset; + char *line; + int line_nr; }; -static struct disasm_line *disasm_line__new(struct annotate_args *args, - s64 offset, char *line, - int line_nr) +static struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { - dl->al.offset = offset; - dl->al.line = strdup(line); - dl->al.line_nr = line_nr; + dl->al.offset = args->offset; + dl->al.line = strdup(args->line); + dl->al.line_nr = args->line_nr; if (dl->al.line == NULL) goto out_delete; - if (offset != -1) { + if (args->offset != -1) { if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; @@ -1269,7 +1270,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr); + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + + dl = disasm_line__new(args); free(line); (*line_nr)++; -- cgit v1.2.3 From d03a686ea6e77b25edacc3eed386cef870e8d248 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:33 +0200 Subject: perf annotate: Add evsel into struct annotation_line_args Add evsel into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. This change also allow us to move the arch name initialization under symbol__annotate function. Link: http://lkml.kernel.org/n/tip-a9ok53rrgt1s5e8uglyvy6qt@git.kernel.org Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/ui/gtk/annotate.c | 3 +-- tools/perf/util/annotate.c | 11 ++++++++--- tools/perf/util/annotate.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d77994c1cba9..3b72519c085f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,7 +1120,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + err = symbol__annotate(sym, map, evsel, sizeof_bdl, &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index b498f1a92bb1..5e0a56df0b4c 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 681c9c4ce9f9..75f54eab22c8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,6 +882,7 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + struct perf_evsel *evsel; s64 offset; char *line; int line_nr; @@ -1570,16 +1571,21 @@ out_close_stdout: } int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) { struct annotate_args args = { .privsize = privsize, .map = map, + .evsel = evsel, }; + const char *arch_name = NULL; struct arch *arch; int err; + if (evsel) + arch_name = perf_evsel__env_arch(evsel); + arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; @@ -1975,8 +1981,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e577f9d13a58..baf34032504a 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -174,7 +174,7 @@ int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid); enum symbol_disassemble_errno { -- cgit v1.2.3 From c4c724364d398a9746410d5ff482e8c4c7228249 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:34 +0200 Subject: perf annotate: Add annotation_line__next function Rename disasm__get_next_ip_line() to annotation_line__next() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 7 ++++--- tools/perf/util/annotate.c | 13 +++++++------ tools/perf/util/annotate.h | 3 ++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3b72519c085f..881ad6122057 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -440,7 +440,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos, *next; + struct annotation_line *next; + struct disasm_line *pos; s64 len = symbol__size(sym); browser->entries = RB_ROOT; @@ -458,7 +459,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = disasm__get_next_ip_line(¬es->src->source, pos); + next = annotation_line__next(&pos->al, ¬es->src->source); for (i = 0; i < browser->nr_events; i++) { struct sym_hist_entry sample; @@ -466,7 +467,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, pos->al.offset, - next ? next->al.offset : len, + next ? next->offset : len, &path, &sample); bpos->samples[i].he = sample; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 75f54eab22c8..e7da88d7bb27 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -942,10 +942,11 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) list_add_tail(&line->al.node, head); } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head) { - list_for_each_entry_continue(pos, head, al.node) - if (pos->al.offset >= 0) + list_for_each_entry_continue(pos, head, node) + if (pos->offset >= 0) return pos; return NULL; @@ -1096,10 +1097,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct disasm_line *next; + struct annotation_line *next; struct block_range *br; - next = disasm__get_next_ip_line(¬es->src->source, dl); + next = annotation_line__next(&dl->al, ¬es->src->source); if (perf_evsel__is_group_event(evsel)) { nr_percent = evsel->nr_members; @@ -1114,7 +1115,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->al.offset : (s64) len, + next ? next->offset : (s64) len, &path, &sample); ppercents[i] = percent; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index baf34032504a..43bef6cacbc4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -85,7 +85,8 @@ struct sym_hist_entry { }; void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, -- cgit v1.2.3 From 82b9d7ff096b7e7ae3efaeb341ee673bb494bb61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:35 +0200 Subject: perf annotate: Add annotation_line__add function Rename disasm__add() into annotation_line__add() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e7da88d7bb27..11c7743203a0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -937,9 +937,9 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head) { - list_add_tail(&line->al.node, head); + list_add_tail(&al->node, head); } struct annotation_line * @@ -1301,7 +1301,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, dl->ops.target.name = strdup(target.sym->name); } - disasm__add(¬es->src->source, dl); + annotation_line__add(&dl->al, ¬es->src->source); return 0; } -- cgit v1.2.3 From 5b12adc849be011fd6d99a16e39d83afee43c0a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:36 +0200 Subject: perf annotate: Move rb_node to struct annotation_line Move rb_node to struct annotation_line to make struct annotation_line the rb tree node for sorted lines used in both stdio and TUI code. This way we can unite the sorted lines lines codes for both TUI and stdio in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 30 ++++++++++++++++-------------- tools/perf/util/annotate.h | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 881ad6122057..cfde5a2ca3f4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -26,7 +26,6 @@ struct disasm_line_samples { #define CYCLES_WIDTH 6 struct browser_disasm_line { - struct rb_node rb_node; u32 idx; int idx_asm; int jump_sources; @@ -362,9 +361,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct browser_disasm_line *a, - struct browser_disasm_line *b, int nr_pcnt) +static int disasm__cmp(struct disasm_line *da, + struct disasm_line *db, int nr_pcnt) { + struct browser_disasm_line *a = disasm_line__browser(da); + struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -375,24 +376,24 @@ static int disasm__cmp(struct browser_disasm_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl, +static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct browser_disasm_line *l; + struct disasm_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct browser_disasm_line, rb_node); + l = rb_entry(parent, struct disasm_line, al.rb_node); - if (disasm__cmp(bdl, l, nr_events)) + if (disasm__cmp(dl, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&bdl->rb_node, parent, p); - rb_insert_color(&bdl->rb_node, root); + rb_link_node(&dl->al.rb_node, parent, p); + rb_insert_color(&dl->al.rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -425,8 +426,9 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct disasm_line *pos; u32 idx; - bpos = rb_entry(nd, struct browser_disasm_line, rb_node); - pos = ((struct disasm_line *)bpos) - 1; + pos = rb_entry(nd, struct disasm_line, al.rb_node); + bpos = disasm_line__browser(pos); + idx = bpos->idx; if (annotate_browser__opts.hide_src_code) idx = bpos->idx_asm; @@ -455,7 +457,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, int i; if (pos->al.offset == -1) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } @@ -476,10 +478,10 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } if (max_percent < 0.01 && pos->al.ipc == 0) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, bpos, + disasm_rb_tree__insert(&browser->entries, pos, browser->nr_events); } pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 43bef6cacbc4..6f01e6117936 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,6 +61,7 @@ struct annotation; struct annotation_line { struct list_head node; + struct rb_node rb_node; s64 offset; char *line; int line_nr; -- cgit v1.2.3 From c835e1914c4bcfdd41f43d270cafc6d8119d7782 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:37 +0200 Subject: perf annotate: Add annotation_line__(new|delete) functions Changing the way the annotation lines are allocated and adding annotation_line__(new|delete) functions to deal with this. Before the allocation schema was as follows: ----------------------------------------------------------- struct disasm_line | struct annotation_line | private space ----------------------------------------------------------- Where the private space is used in TUI code to store computed annotation data for events. The stdio code computes the data on the fly. The goal is to compute and store annotation line's data directly in the struct annotation_line itself, so this patch changes the line allocation schema as follows: ------------------------------------------------------------ privsize space | struct disasm_line | struct annotation_line ------------------------------------------------------------ Moving struct annotation_line to the end, because in following changes we will move here the non-fixed length event's data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 4 ++- tools/perf/util/annotate.c | 63 ++++++++++++++++++++++++++++++++++----- tools/perf/util/annotate.h | 10 ++++++- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index cfde5a2ca3f4..7ca5ae625cc9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -76,7 +76,9 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - return (struct browser_disasm_line *)(dl + 1); + struct annotation_line *al = &dl->al; + + return (void *) al - al->privsize; } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11c7743203a0..7c74700ae6d7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -888,14 +888,64 @@ struct annotate_args { int line_nr; }; +static void annotation_line__delete(struct annotation_line *al) +{ + void *ptr = (void *) al - al->privsize; + + zfree(&al->line); + free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + * -------------------------------------- + * private space | struct annotation_line + * -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) +{ + struct annotation_line *al; + size_t size = privsize + sizeof(*al); + + al = zalloc(size); + if (al) { + al = (void *) al + privsize; + al->privsize = privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + } + + return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------------------------ + * privsize space | struct disasm_line | struct annotation_line + * ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ static struct disasm_line *disasm_line__new(struct annotate_args *args) { - struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); + struct disasm_line *dl = NULL; + struct annotation_line *al; + size_t privsize = args->privsize + offsetof(struct disasm_line, al); - if (dl != NULL) { - dl->al.offset = args->offset; - dl->al.line = strdup(args->line); - dl->al.line_nr = args->line_nr; + al = annotation_line__new(args, privsize); + if (al != NULL) { + dl = disasm_line(al); if (dl->al.line == NULL) goto out_delete; @@ -919,14 +969,13 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); free((void *)dl->ins.name); dl->ins.name = NULL; - free(dl); + annotation_line__delete(&dl->al); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6f01e6117936..2e7a08afb04f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -67,14 +67,22 @@ struct annotation_line { int line_nr; float ipc; u64 cycles; + size_t privsize; }; struct disasm_line { - struct annotation_line al; struct ins ins; struct ins_operands ops; + + /* This needs to be at the end. */ + struct annotation_line al; }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ + return al ? container_of(al, struct disasm_line, al) : NULL; +} + static inline bool disasm_line__has_offset(const struct disasm_line *dl) { return dl->ops.target.offset_avail; -- cgit v1.2.3 From f8eb37bd7c33babc01d9c2e3074ce001eec6cfbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:38 +0200 Subject: perf annotate: Add annotated_source__purge function Mov disasm__purge() to annotated_source__purge() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 8 +++----- tools/perf/util/annotate.c | 12 ++++++------ tools/perf/util/annotate.h | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 7ca5ae625cc9..4c54d5e76008 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1084,7 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos, *n; + struct disasm_line *pos; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1180,10 +1180,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); - } + + annotated_source__purge(notes->src); out_free_offsets: free(browser.offsets); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7c74700ae6d7..0c2eb95ba90a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1985,13 +1985,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) } } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as) { - struct disasm_line *pos, *n; + struct annotation_line *al, *n; - list_for_each_entry_safe(pos, n, head, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); + list_for_each_entry_safe(al, n, &as->source, node) { + list_del(&al->node); + disasm_line__free(disasm_line(al)); } } @@ -2047,7 +2047,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (print_lines) symbol__free_source_line(sym, len); - disasm__purge(&symbol__annotation(sym)->src->source); + annotated_source__purge(symbol__annotation(sym)->src); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 2e7a08afb04f..cb60cafae1fb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -212,7 +212,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int min_pcnt, int max_lines, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as); bool ui__has_annotation(void); -- cgit v1.2.3 From 7e304557ead5b309d59807b2f05ed47f2c0076c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:39 +0200 Subject: perf annotate: Add samples into struct annotation_line Add samples array into struct annotation_line to hold the annotation data. The data is populated in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++++++ tools/perf/util/annotate.h | 17 ++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0c2eb95ba90a..313fb2e90dba 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -911,7 +911,14 @@ static struct annotation_line * annotation_line__new(struct annotate_args *args, size_t privsize) { struct annotation_line *al; + struct perf_evsel *evsel = args->evsel; size_t size = privsize + sizeof(*al); + int nr = 1; + + if (perf_evsel__is_group_event(evsel)) + nr = evsel->nr_members; + + size += sizeof(al->samples[0]) * nr; al = zalloc(size); if (al) { @@ -920,6 +927,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize) al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->samples_nr = nr; } return al; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cb60cafae1fb..55bdd9015f33 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,16 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + +struct annotation_data { + double percent; + struct sym_hist_entry he; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -68,6 +78,8 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + int samples_nr; + struct annotation_data samples[0]; }; struct disasm_line { @@ -88,11 +100,6 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl) return dl->ops.target.offset_avail; } -struct sym_hist_entry { - u64 nr_samples; - u64 period; -}; - void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); -- cgit v1.2.3 From c20c6704bf2dafaba0d90c8310ef9e919fe4d2e2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 16 Nov 2017 04:36:51 +0000 Subject: ASoC: rcar: revert IOMMU support so far commit 4821d914fe74 ("ASoC: rsnd: use dma_sync_single_for_xxx() for IOMMU") had supported IOMMU, but it breaks normal sound "recorde" and both PulseAudio's "playback/recorde". The sound will be noisy. That commit was using dma_sync_single_for_xxx(), and driver should make sure memory is protected during CPU or Device are using it. But if driver returns current "residue" data size correctly on pointer function, player/recorder will access to protected memory. IOMMU feature should be supported, but I don't know how to handle it without memory cache problem at this point. Thus, this patch simply revert it to avoid current noisy sound. Tested-by: Hiroyuki Yokoyama Tested-by: Ryo Kodama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 4 +-- sound/soc/sh/rcar/dma.c | 86 ++++-------------------------------------------- 2 files changed, 8 insertions(+), 82 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index c70eb2097816..f12a88a21dfa 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd) return snd_pcm_lib_preallocate_pages_for_all( rtd->pcm, - SNDRV_DMA_TYPE_CONTINUOUS, - snd_dma_continuous_data(GFP_KERNEL), + SNDRV_DMA_TYPE_DEV, + rtd->card->snd_card->dev, PREALLOC_BUFFER, PREALLOC_BUFFER_MAX); } diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index fd557abfe390..4d750bdf8e24 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -26,10 +26,7 @@ struct rsnd_dmaen { struct dma_chan *chan; dma_cookie_t cookie; - dma_addr_t dma_buf; unsigned int dma_len; - unsigned int dma_period; - unsigned int dma_cnt; }; struct rsnd_dmapp { @@ -71,38 +68,10 @@ static struct rsnd_mod mem = { /* * Audio DMAC */ -#define rsnd_dmaen_sync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 1) -#define rsnd_dmaen_unsync(dmaen, io, i) __rsnd_dmaen_sync(dmaen, io, i, 0) -static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io, - int i, int sync) -{ - struct device *dev = dmaen->chan->device->dev; - enum dma_data_direction dir; - int is_play = rsnd_io_is_play(io); - dma_addr_t buf; - int len, max; - size_t period; - - len = dmaen->dma_len; - period = dmaen->dma_period; - max = len / period; - i = i % max; - buf = dmaen->dma_buf + (period * i); - - dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (sync) - dma_sync_single_for_device(dev, buf, period, dir); - else - dma_sync_single_for_cpu(dev, buf, period, dir); -} - static void __rsnd_dmaen_complete(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct rsnd_dma *dma = rsnd_mod_to_dma(mod); - struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); bool elapsed = false; unsigned long flags; @@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod, */ spin_lock_irqsave(&priv->lock, flags); - if (rsnd_io_is_working(io)) { - rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt); - - /* - * Next period is already started. - * Let's sync Next Next period - * see - * rsnd_dmaen_start() - */ - rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2); - + if (rsnd_io_is_working(io)) elapsed = true; - dmaen->dma_cnt++; - } - spin_unlock_irqrestore(&priv->lock, flags); if (elapsed) @@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma); - if (dmaen->chan) { - int is_play = rsnd_io_is_play(io); - + if (dmaen->chan) dmaengine_terminate_all(dmaen->chan); - dma_unmap_single(dmaen->chan->device->dev, - dmaen->dma_buf, dmaen->dma_len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - } return 0; } @@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, struct device *dev = rsnd_priv_to_dev(priv); struct dma_async_tx_descriptor *desc; struct dma_slave_config cfg = {}; - dma_addr_t buf; - size_t len; - size_t period; int is_play = rsnd_io_is_play(io); - int i; int ret; cfg.direction = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; @@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, if (ret < 0) return ret; - len = snd_pcm_lib_buffer_bytes(substream); - period = snd_pcm_lib_period_bytes(substream); - buf = dma_map_single(dmaen->chan->device->dev, - substream->runtime->dma_area, - len, - is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (dma_mapping_error(dmaen->chan->device->dev, buf)) { - dev_err(dev, "dma map failed\n"); - return -EIO; - } - desc = dmaengine_prep_dma_cyclic(dmaen->chan, - buf, len, period, + substream->runtime->dma_addr, + snd_pcm_lib_buffer_bytes(substream), + snd_pcm_lib_period_bytes(substream), is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod, desc->callback = rsnd_dmaen_complete; desc->callback_param = rsnd_mod_get(dma); - dmaen->dma_buf = buf; - dmaen->dma_len = len; - dmaen->dma_period = period; - dmaen->dma_cnt = 0; - - /* - * synchronize this and next period - * see - * __rsnd_dmaen_complete() - */ - for (i = 0; i < 2; i++) - rsnd_dmaen_sync(dmaen, io, i); + dmaen->dma_len = snd_pcm_lib_buffer_bytes(substream); dmaen->cookie = dmaengine_submit(desc); if (dmaen->cookie < 0) { -- cgit v1.2.3 From 8c059a4676038967dd6efe614538c329b61e68a1 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Wed, 15 Nov 2017 11:52:32 -0800 Subject: spi: imx: Update device tree binding documentation Update documentation for gpio-cs and num-cs to reflect the standard SPI bindings. The dma properties are optional. Include a warning that native CS do not work in a commonly useful manner with this hardware/driver, and therefor most users probably should use GPIO based CS lines rather than native. CC: Mark Brown CC: Shawn Guo CC: Sascha Hauer CC: Fabio Estevam CC: Oleksij Rempel Signed-off-by: Trent Piepho Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt index 5bf13960f7f4..e3c48b20b1a6 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt @@ -12,24 +12,30 @@ Required properties: - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc - reg : Offset and length of the register set for the device - interrupts : Should contain CSPI/eCSPI interrupt -- cs-gpios : Specifies the gpio pins to be used for chipselects. - clocks : Clock specifiers for both ipg and per clocks. - clock-names : Clock names should include both "ipg" and "per" See the clock consumer binding, Documentation/devicetree/bindings/clock/clock-bindings.txt -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request names should include "tx" and "rx" if present. -Obsolete properties: -- fsl,spi-num-chipselects : Contains the number of the chipselect +Recommended properties: +- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip +select lines can be used, they appear to always generate a pulse between each +word of a transfer. Most use cases will require GPIO based chip selects to +generate a valid transaction. Optional properties: +- num-cs : Number of total chip selects, see spi-bus.txt. +- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, +Documentation/devicetree/bindings/dma/dma.txt. +- dma-names: DMA request names, if present, should include "tx" and "rx". - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register controlling the SPI_READY handling. Note that to enable the DRCTL consideration, the SPI_READY mode-flag needs to be set too. Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). +Obsolete properties: +- fsl,spi-num-chipselects : Contains the number of the chipselect + Example: ecspi@70010000 { -- cgit v1.2.3 From 4c761ebfcb2d04ee36783c4c8c45ae00caf59d36 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Fri, 3 Nov 2017 19:15:02 +0530 Subject: ASoC: Intel: kbl: Modify map for Headset Playback to fix pop-noise Patch fixes wrong path in commit 0b06122fc8d0 ("ASoC: Intel: kbl: Add map for new DAIs for Multi-Playback & Echo Ref") which resulted in pop noise. Current topology for Headset results in unwanted pop noise, while switching from spk->hs at the start of Headset Playback. Hence re-introduced mixin-mixout dsp module in topology for headset playback pipe to fix the regression. And the corresponding modification for headset route is updated here. Fixes: 0b06122fc8d0 ("ASoC: Intel: kbl: Add map for new DAIs for Multi-Playback & Echo Ref") Signed-off-by: Naveen Manohar Signed-off-by: Sathya Prakash M R Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_max98927.c | 2 +- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 6f9a8bcf20f3..6dcad0a8a0d0 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 6072164f2d43..271ae3c2c535 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = { { "ssp0 Tx", NULL, "spk_out" }, { "AIF Playback", NULL, "ssp1 Tx" }, - { "ssp1 Tx", NULL, "hs_out" }, + { "ssp1 Tx", NULL, "codec1_out" }, { "hs_in", NULL, "ssp1 Rx" }, { "ssp1 Rx", NULL, "AIF Capture" }, -- cgit v1.2.3 From 54f0a51a73acba08f047881c397e4040a573339b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 13 Nov 2017 21:05:35 +0800 Subject: regulator: tps65218: Remove unused enum tps65218_regulators The enum tps65218_regulators is no longer being used after commit 2dc4940360d4 ("regulator: tps65218: Remove all the compatibles"). Signed-off-by: Axel Lin Reviewed-by: Keerthy Signed-off-by: Mark Brown --- drivers/regulator/tps65218-regulator.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c index bc489958fed7..b6f705292aef 100644 --- a/drivers/regulator/tps65218-regulator.c +++ b/drivers/regulator/tps65218-regulator.c @@ -28,9 +28,6 @@ #include #include -enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4, - DCDC5, DCDC6, LDO1, LS3 }; - #define TPS65218_REGULATOR(_name, _of, _id, _type, _ops, _n, _vr, _vm, _er, \ _em, _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm) \ { \ -- cgit v1.2.3 From 5597bfb474d3ed84c1e0c73db620a257bbe127b6 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 13 Nov 2017 20:52:32 +0800 Subject: regulator: tps65218: Add NULL test for devm_kzalloc call Signed-off-by: Axel Lin Reviewed-by: Keerthy Signed-off-by: Mark Brown --- drivers/regulator/tps65218-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c index b6f705292aef..1827185beacc 100644 --- a/drivers/regulator/tps65218-regulator.c +++ b/drivers/regulator/tps65218-regulator.c @@ -326,6 +326,8 @@ static int tps65218_regulator_probe(struct platform_device *pdev) /* Allocate memory for strobes */ tps->strobes = devm_kzalloc(&pdev->dev, sizeof(u8) * TPS65218_NUM_REGULATOR, GFP_KERNEL); + if (!tps->strobes) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(regulators); i++) { rdev = devm_regulator_register(&pdev->dev, ®ulators[i], -- cgit v1.2.3 From fe83b1b7d7d0ff11210f84f25b8e1ba1afbac76f Mon Sep 17 00:00:00 2001 From: John Hsu Date: Mon, 13 Nov 2017 10:16:17 +0800 Subject: ASoC: nau8540: improve FLL performance Add these parameters to improve the FLL performance. The comments show as follows: (1)ICTRL_LATCH: FLL DSP speed capability control When FLL running at high frequency with long decimal number, DSP needs to operate at high speed. FLL DSP can optimize between performance and power consumption by ICTRL_LATCH.(111 has highest power consumption.) The default setting can be used to reduce power. (2)CUTOFF500: loop filter cutoff frequency at 500Khz It will give the best FLL performance but highest power consumption to enable the cutoff frequency. FLL Loop Filter enable to reduce FLL output noise, especially,(DCO frequency)/(FLL input reference frequency) is not a integer. (3)GAIN_ERR: FLL gain error correction threshold setting The threshold is comparison between DCO and target frequency. The value 1111 has the most sensitive threshold, that is, 1111 can have the most accurate DCO to target frequency. However, the gain error setting conditionally and inversely depends on FLL input reference clock rate. Higher FLL reference input frequency can only set lower gain error, such as 0000 for input reference from MCLK=12.288Mhz. On the other side, if FLL reference input is from Frame Sync, 48KHz, higher error gain can apply such as 1111. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8540.c | 21 ++++++++++++++------- sound/soc/codecs/nau8540.h | 5 +++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index 9565f9a181a9..b08fb7e243c3 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -615,7 +615,8 @@ static void nau8540_fll_apply(struct regmap *regmap, NAU8540_CLK_SRC_MASK | NAU8540_CLK_MCLK_SRC_MASK, NAU8540_CLK_SRC_MCLK | fll_param->mclk_src); regmap_update_bits(regmap, NAU8540_REG_FLL1, - NAU8540_FLL_RATIO_MASK, fll_param->ratio); + NAU8540_FLL_RATIO_MASK | NAU8540_ICTRL_LATCH_MASK, + fll_param->ratio | (0x6 << NAU8540_ICTRL_LATCH_SFT)); /* FLL 16-bit fractional input */ regmap_write(regmap, NAU8540_REG_FLL2, fll_param->fll_frac); /* FLL 10-bit integer input */ @@ -636,13 +637,14 @@ static void nau8540_fll_apply(struct regmap *regmap, NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | NAU8540_FLL_FTR_SW_FILTER); regmap_update_bits(regmap, NAU8540_REG_FLL6, - NAU8540_SDM_EN, NAU8540_SDM_EN); + NAU8540_SDM_EN | NAU8540_CUTOFF500, + NAU8540_SDM_EN | NAU8540_CUTOFF500); } else { regmap_update_bits(regmap, NAU8540_REG_FLL5, NAU8540_FLL_PDB_DAC_EN | NAU8540_FLL_LOOP_FTR_EN | NAU8540_FLL_FTR_SW_MASK, NAU8540_FLL_FTR_SW_ACCU); - regmap_update_bits(regmap, - NAU8540_REG_FLL6, NAU8540_SDM_EN, 0); + regmap_update_bits(regmap, NAU8540_REG_FLL6, + NAU8540_SDM_EN | NAU8540_CUTOFF500, 0); } } @@ -657,17 +659,22 @@ static int nau8540_set_pll(struct snd_soc_codec *codec, int pll_id, int source, switch (pll_id) { case NAU8540_CLK_FLL_MCLK: regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_MCLK); + NAU8540_FLL_CLK_SRC_MASK | NAU8540_GAIN_ERR_MASK, + NAU8540_FLL_CLK_SRC_MCLK | 0); break; case NAU8540_CLK_FLL_BLK: regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_BLK); + NAU8540_FLL_CLK_SRC_MASK | NAU8540_GAIN_ERR_MASK, + NAU8540_FLL_CLK_SRC_BLK | + (0xf << NAU8540_GAIN_ERR_SFT)); break; case NAU8540_CLK_FLL_FS: regmap_update_bits(nau8540->regmap, NAU8540_REG_FLL3, - NAU8540_FLL_CLK_SRC_MASK, NAU8540_FLL_CLK_SRC_FS); + NAU8540_FLL_CLK_SRC_MASK | NAU8540_GAIN_ERR_MASK, + NAU8540_FLL_CLK_SRC_FS | + (0xf << NAU8540_GAIN_ERR_SFT)); break; default: diff --git a/sound/soc/codecs/nau8540.h b/sound/soc/codecs/nau8540.h index dceb04b23c19..732b490edf81 100644 --- a/sound/soc/codecs/nau8540.h +++ b/sound/soc/codecs/nau8540.h @@ -100,9 +100,13 @@ #define NAU8540_CLK_MCLK_SRC_MASK 0xf /* FLL1 (0x04) */ +#define NAU8540_ICTRL_LATCH_SFT 10 +#define NAU8540_ICTRL_LATCH_MASK (0x7 << NAU8540_ICTRL_LATCH_SFT) #define NAU8540_FLL_RATIO_MASK 0x7f /* FLL3 (0x06) */ +#define NAU8540_GAIN_ERR_SFT 12 +#define NAU8540_GAIN_ERR_MASK (0xf << NAU8540_GAIN_ERR_SFT) #define NAU8540_FLL_CLK_SRC_SFT 10 #define NAU8540_FLL_CLK_SRC_MASK (0x3 << NAU8540_FLL_CLK_SRC_SFT) #define NAU8540_FLL_CLK_SRC_MCLK (0 << NAU8540_FLL_CLK_SRC_SFT) @@ -127,6 +131,7 @@ /* FLL6 (0x9) */ #define NAU8540_DCO_EN (0x1 << 15) #define NAU8540_SDM_EN (0x1 << 14) +#define NAU8540_CUTOFF500 (0x1 << 13) /* PCM_CTRL0 (0x10) */ #define NAU8540_I2S_BP_SFT 7 -- cgit v1.2.3 From 080f773d226a9c2b0fa0d8f02107518c560f8b77 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Mon, 13 Nov 2017 10:36:28 +0800 Subject: ASoC: nau8824: change FVCO maximum threshold Change the maximum of FDCO which remains between 90MHz-100MHz. FDCO must be within the 90MHz-100MHz or the FFL cannot be guaranteed across the full range of operation. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 8a9a9939827e..088e0cef4cb8 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -43,7 +43,7 @@ static bool nau8824_is_jack_inserted(struct nau8824 *nau8824); /* the parameter threshold of FLL */ #define NAU_FREF_MAX 13500000 -#define NAU_FVCO_MAX 124000000 +#define NAU_FVCO_MAX 100000000 #define NAU_FVCO_MIN 90000000 /* scaling for mclk from sysclk_src output */ -- cgit v1.2.3 From 00ac9562158e8541d60e91eb72aa9ed0674d4a6a Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Mon, 13 Nov 2017 08:47:21 +0100 Subject: spi: spi-fsl-dspi: add SPI_LSB_FIRST to driver capabilities The driver as well as the controller support the SPI lsb first mode. However, it's not possible to configure it e.g. when using spidev. Adding this flag to mode_bits resolves the issue and lsb first mode can be used. Signed-off-by: Kurt Kanzenbach Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index f652f70cb8db..02d3ed7f2558 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -980,7 +980,7 @@ static int dspi_probe(struct platform_device *pdev) master->dev.of_node = pdev->dev.of_node; master->cleanup = dspi_cleanup; - master->mode_bits = SPI_CPOL | SPI_CPHA; + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST; master->bits_per_word_mask = SPI_BPW_MASK(4) | SPI_BPW_MASK(8) | SPI_BPW_MASK(16); -- cgit v1.2.3 From bc6476d6c1edcb9b97621b5131bd169aa81f27db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:55 +0100 Subject: ASoC: da7218: fix fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed. Fixes: 4d50934abd22 ("ASoC: da7218: Add da7218 codec driver") Signed-off-by: Johan Hovold Acked-by: Adam Thomson Signed-off-by: Mark Brown Cc: stable --- sound/soc/codecs/da7218.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index b2d42ec1dcd9..56564ce90cb6 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) } if (da7218->dev_id == DA7218_DEV_ID) { - hpldet_np = of_find_node_by_name(np, "da7218_hpldet"); + hpldet_np = of_get_child_by_name(np, "da7218_hpldet"); if (!hpldet_np) return pdata; -- cgit v1.2.3 From 15f8c5f2415bfac73f33a14bcd83422bcbfb5298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 12:12:56 +0100 Subject: ASoC: twl4030: fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent codec node was also prematurely freed, while the child node was leaked. Fixes: 2d6d649a2e0f ("ASoC: twl4030: Support for DT booted kernel") Signed-off-by: Johan Hovold Signed-off-by: Mark Brown Cc: stable --- sound/soc/codecs/twl4030.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c482b2e7a7d2..cfe72b9d4356 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev); struct device_node *twl4030_codec_node = NULL; - twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node, + twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node, "codec"); if (!pdata && twl4030_codec_node) { @@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) GFP_KERNEL); if (!pdata) { dev_err(codec->dev, "Can not allocate memory\n"); + of_node_put(twl4030_codec_node); return NULL; } twl4030_setup_pdata_of(pdata, twl4030_codec_node); + of_node_put(twl4030_codec_node); } return pdata; -- cgit v1.2.3 From 073ae601edc211383b62618effaaedaa8b1d22db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:40 +0200 Subject: perf annotate: Add symbol__calc_percent function Add symbol__calc_percent function, that calculates annotation data for symbol and put the data in the struct annotation_line::samples array. Committer notes: Made symbol__calc_percent non static to be used in the next two patches, which will get some fixups from jolsa, doing it this way to keep this bisectable. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 62 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 1 + 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 313fb2e90dba..ff1036096347 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1628,6 +1628,62 @@ out_close_stdout: goto out_remove_tmp; } +static void calc_percent(struct sym_hist *hist, + struct annotation_data *sample, + s64 offset, s64 end) +{ + unsigned int hits = 0; + u64 period = 0; + + while (offset < end) { + hits += hist->addr[offset].nr_samples; + period += hist->addr[offset].period; + ++offset; + } + + if (hist->nr_samples) { + sample->he.period = period; + sample->he.nr_samples = hits; + sample->percent = 100.0 * hits / hist->nr_samples; + } +} + +static int annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) +{ + struct annotation_line *al, *next; + + list_for_each_entry(al, ¬es->src->source, node) { + s64 end; + int i; + + if (al->offset == -1) + continue; + + next = annotation_line__next(al, ¬es->src->source); + end = next ? next->offset : len; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + struct sym_hist *hist; + + hist = annotation__histogram(notes, evsel->idx + i); + sample = &al->samples[i]; + + calc_percent(hist, sample, al->offset, end); + } + } + + return 0; +} + +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +{ + struct annotation *notes = symbol__annotation(sym); + + return annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) @@ -1663,7 +1719,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, &args); + err = symbol__disassemble(sym, &args); + if (err) + return err; + + return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 55bdd9015f33..6056840da4c9 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,6 +107,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, s64 end, const char **path, struct sym_hist_entry *sample); +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; -- cgit v1.2.3 From 8b4c74dc5cd40a3bc77f8bc2b6b7b33dc125e302 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:41 +0200 Subject: perf annotate: Add symbol__calc_lines function Replace symbol__get_source_line() with symbol__calc_lines(), which calculates the source line tree over the struct annotation_line. This will allow us to remove redundant struct source_line in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 186 ++++++++++++++++----------------------------- tools/perf/util/annotate.h | 2 + 2 files changed, 68 insertions(+), 120 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ff1036096347..96cf6767b5ce 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -892,6 +892,7 @@ static void annotation_line__delete(struct annotation_line *al) { void *ptr = (void *) al - al->privsize; + free_srcline(al->path); zfree(&al->line); free(ptr); } @@ -1726,21 +1727,21 @@ int symbol__annotate(struct symbol *sym, struct map *map, return symbol__calc_percent(sym, evsel); } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; int i, ret; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - ret = strcmp(iter->path, src_line->path); + ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < src_line->nr_pcnt; i++) - iter->samples[i].percent_sum += src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + iter->samples[i].percent_sum += al->samples[i].percent; return; } @@ -1750,18 +1751,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin p = &(*p)->rb_right; } - for (i = 0; i < src_line->nr_pcnt; i++) - src_line->samples[i].percent_sum = src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + al->samples[i].percent_sum = al->samples[i].percent; - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1770,135 +1771,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) return 0; } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - if (cmp_source_line(src_line, iter)) + if (cmp_source_line(al, iter)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; node = rb_first(src_root); while (node) { struct rb_node *next; - src_line = rb_entry(node, struct source_line, node); + al = rb_entry(node, struct annotation_line, rb_node); next = rb_next(node); rb_erase(node, src_root); - __resort_source_line(dest_root, src_line); + __resort_source_line(dest_root, al); node = next; } } -static void symbol__free_source_line(struct symbol *sym, int len) -{ - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - size_t sizeof_src_line; - int i; - - sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - - for (i = 0; i < len; i++) { - free_srcline(src_line->path); - src_line = (void *)src_line + sizeof_src_line; - } - - zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct rb_root *root, int len) -{ - u64 start; - int i, k; - int evidx = evsel->idx; - struct source_line *src_line; - struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - struct rb_root tmp_root = RB_ROOT; - int nr_pcnt = 1; - u64 nr_samples = h->nr_samples; - size_t sizeof_src_line = sizeof(struct source_line); - - if (perf_evsel__is_group_event(evsel)) { - for (i = 1; i < evsel->nr_members; i++) { - h = annotation__histogram(notes, evidx + i); - nr_samples += h->nr_samples; - } - nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); - } - - if (!nr_samples) - return 0; - - src_line = notes->src->lines = calloc(len, sizeof_src_line); - if (!notes->src->lines) - return -1; - - start = map__rip_2objdump(map, sym->start); - - for (i = 0; i < len; i++) { - u64 offset; - double percent_max = 0.0; - - src_line->nr_pcnt = nr_pcnt; - - for (k = 0; k < nr_pcnt; k++) { - double percent = 0.0; - - h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i].nr_samples; - if (h->nr_samples) - percent = 100.0 * nr_samples / h->nr_samples; - - if (percent > percent_max) - percent_max = percent; - src_line->samples[k].percent = percent; - src_line->samples[k].nr = nr_samples; - } - - if (percent_max <= 0.5) - goto next; - - offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, - false, true); - insert_source_line(&tmp_root, src_line); - - next: - src_line = (void *)src_line + sizeof_src_line; - } - - resort_source_line(root, &tmp_root); - return 0; -} - static void print_summary(struct rb_root *root, const char *filename) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; printf("\nSorted summary for file %s\n", filename); @@ -1916,9 +1829,9 @@ static void print_summary(struct rb_root *root, const char *filename) char *path; int i; - src_line = rb_entry(node, struct source_line, node); - for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->samples[i].percent_sum; + al = rb_entry(node, struct annotation_line, rb_node); + for (i = 0; i < al->samples_nr; i++) { + percent = al->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1926,7 +1839,7 @@ static void print_summary(struct rb_root *root, const char *filename) percent_max = percent; } - path = src_line->path; + path = al->path; color = get_percent_color(percent_max); color_fprintf(stdout, color, " %s\n", path); @@ -2091,29 +2004,62 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) return printed; } +static void annotation__calc_lines(struct annotation *notes, struct map *map, + struct rb_root *root, u64 start) +{ + struct annotation_line *al; + struct rb_root tmp_root = RB_ROOT; + + list_for_each_entry(al, ¬es->src->source, node) { + double percent_max = 0.0; + int i; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + + sample = &al->samples[i]; + + if (sample->percent > percent_max) + percent_max = sample->percent; + } + + if (percent_max <= 0.5) + continue; + + al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + insert_source_line(&tmp_root, al); + } + + resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, + struct rb_root *root) +{ + struct annotation *notes = symbol__annotation(sym); + u64 start = map__rip_2objdump(map, sym->start); + + annotation__calc_lines(notes, map, root, start); +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - u64 len; if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; - len = symbol__size(sym); - if (print_lines) { srcline_full_filename = full_paths; - symbol__get_source_line(sym, map, evsel, &source_line, len); + symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, min_pcnt, max_lines, 0); - if (print_lines) - symbol__free_source_line(sym, len); annotated_source__purge(symbol__annotation(sym)->src); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6056840da4c9..927810b19f0d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -66,6 +66,7 @@ struct sym_hist_entry { struct annotation_data { double percent; + double percent_sum; struct sym_hist_entry he; }; @@ -78,6 +79,7 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + char *path; int samples_nr; struct annotation_data samples[0]; }; -- cgit v1.2.3 From f681d593d1ce7d2fc665c4047b45f4316408b892 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:42 +0200 Subject: perf annotate: Remove disasm__calc_percent() from disasm_line__print() Remove disasm__calc_percent() from disasm_line__print(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 +++ tools/perf/util/annotate.c | 59 ++++++++++++---------------------------------- 2 files changed, 18 insertions(+), 44 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index adfeeb488f1a..0789f95ca2f3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -226,6 +226,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, static void perf_top__show_details(struct perf_top *top) { struct hist_entry *he = top->sym_filter_entry; + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct annotation *notes; struct symbol *symbol; int more; @@ -238,6 +239,8 @@ static void perf_top__show_details(struct perf_top *top) pthread_mutex_lock(¬es->lock); + symbol__calc_percent(symbol, evsel); + if (notes->src == NULL) goto out_unlock; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 96cf6767b5ce..209a25545542 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1145,41 +1145,19 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_color; if (dl->al.offset != -1) { - const char *path = NULL; - double percent, max_percent = 0.0; - double *ppercents = &percent; - struct sym_hist_entry sample; - struct sym_hist_entry *psamples = &sample; + double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct annotation_line *next; struct block_range *br; - next = annotation_line__next(&dl->al, ¬es->src->source); + for (i = 0; i < dl->al.samples_nr; i++) { + struct annotation_data *sample = &dl->al.samples[i]; - if (perf_evsel__is_group_event(evsel)) { - nr_percent = evsel->nr_members; - ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); - if (ppercents == NULL || psamples == NULL) { - return -1; - } - } - - for (i = 0; i < nr_percent; i++) { - percent = disasm__calc_percent(notes, - notes->src->lines ? i : evsel->idx + i, - offset, - next ? next->offset : (s64) len, - &path, &sample); - - ppercents[i] = percent; - psamples[i] = sample; - if (percent > max_percent) - max_percent = percent; + if (sample->percent > max_percent) + max_percent = sample->percent; } if (max_percent < min_pcnt) @@ -1204,28 +1182,28 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (path) { - if (!prev_line || strcmp(prev_line, path) + if (dl->al.path) { + if (!prev_line || strcmp(prev_line, dl->al.path) || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; + color_fprintf(stdout, color, " %s", dl->al.path); + prev_line = dl->al.path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - percent = ppercents[i]; - sample = psamples[i]; - color = get_percent_color(percent); + struct annotation_data *sample = &dl->al.samples[i]; + + color = get_percent_color(sample->percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample.period); + sample->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", percent); + color_fprintf(stdout, color, " %7.2f", sample->percent); } printf(" : "); @@ -1235,13 +1213,6 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); - - if (ppercents != &percent) - free(ppercents); - - if (psamples != &sample) - free(psamples); - } else if (max_lines && printed >= max_lines) return 1; else { -- cgit v1.2.3 From e425da6caed1a2872e9543bba83488dbe4bbe3f3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:43 +0200 Subject: perf annotate: Remove disasm__calc_percent() from annotate_browser__calc_percent() Remove disasm__calc_percent() from annotate_browser__calc_percent(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4c54d5e76008..613682432940 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -444,17 +444,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct annotation_line *next; struct disasm_line *pos; - s64 len = symbol__size(sym); browser->entries = RB_ROOT; pthread_mutex_lock(¬es->lock); + symbol__calc_percent(sym, evsel); + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); - const char *path = NULL; double max_percent = 0.0; int i; @@ -463,17 +462,11 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = annotation_line__next(&pos->al, ¬es->src->source); - for (i = 0; i < browser->nr_events; i++) { - struct sym_hist_entry sample; - - bpos->samples[i].percent = disasm__calc_percent(notes, - evsel->idx + i, - pos->al.offset, - next ? next->offset : len, - &path, &sample); - bpos->samples[i].he = sample; + struct annotation_data *sample = &pos->al.samples[i]; + + bpos->samples[i].percent = sample->percent; + bpos->samples[i].he = sample->he; if (max_percent < bpos->samples[i].percent) max_percent = bpos->samples[i].percent; -- cgit v1.2.3 From 81e436a0b3a7a2f3ac0311674ce407b7cdd23f0b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:44 +0200 Subject: perf annotate: Remove disasm__calc_percent function Remove disasm__calc_percent() function, because it's no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 44 -------------------------------------------- tools/perf/util/annotate.h | 2 -- 2 files changed, 46 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 209a25545542..29cf2a5ef620 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1010,50 +1010,6 @@ annotation_line__next(struct annotation_line *pos, struct list_head *head) return NULL; } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample) -{ - struct source_line *src_line = notes->src->lines; - double percent = 0.0; - - sample->nr_samples = sample->period = 0; - - if (src_line) { - size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - - while (offset < end) { - src_line = (void *)notes->src->lines + - (sizeof_src_line * offset); - - if (*path == NULL) - *path = src_line->path; - - percent += src_line->samples[evidx].percent; - sample->nr_samples += src_line->samples[evidx].nr; - offset++; - } - } else { - struct sym_hist *h = annotation__histogram(notes, evidx); - unsigned int hits = 0; - u64 period = 0; - - while (offset < end) { - hits += h->addr[offset].nr_samples; - period += h->addr[offset].period; - ++offset; - } - - if (h->nr_samples) { - sample->period = period; - sample->nr_samples = hits; - percent = 100.0 * hits / h->nr_samples; - } - } - - return percent; -} - static const char *annotate__address_color(struct block_range *br) { double cov = block_range__coverage(br); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 927810b19f0d..f98acb2ad721 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,8 +107,6 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample); int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { -- cgit v1.2.3 From fa1924eb4abcd756febc031d819ba75c3849ca45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:45 +0200 Subject: perf annotate: Remove struct source_line Remove struct source_line*, no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f98acb2ad721..4fc805a271d2 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -126,19 +126,6 @@ struct cyc_hist { u16 reset; }; -struct source_line_samples { - double percent; - double percent_sum; - u64 nr; -}; - -struct source_line { - struct rb_node node; - char *path; - int nr_pcnt; - struct source_line_samples samples[1]; -}; - /** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored @@ -154,7 +141,6 @@ struct source_line { */ struct annotated_source { struct list_head source; - struct source_line *lines; int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; -- cgit v1.2.3 From 8f25b8197d43885a4cc19bea581e37bf46ed9958 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:46 +0200 Subject: perf annotate: Add annotation_line__print function Separating struct annotation_line display function, it will hold the generic line display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 29cf2a5ef620..5c6f739ac3ac 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1189,6 +1189,18 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 0; } +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *aq) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); + struct disasm_line *queue = container_of(aq, struct disasm_line, al); + + return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, + max_lines, queue); +} + /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following @@ -1797,7 +1809,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); - struct disasm_line *pos, *queue = NULL; + struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0; int more = 0; @@ -1830,15 +1842,19 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(pos, ¬es->src->source, node) { + int err; + if (context && queue == NULL) { queue = pos; queue_len = 0; } - switch (disasm_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue)) { + err = annotation_line__print(pos, sym, start, evsel, len, + min_pcnt, printed, max_lines, + queue); + + switch (err) { case 0: ++printed; if (context) { @@ -1860,7 +1876,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->al.node.next, typeof(*queue), al.node); + queue = list_entry(queue->node.next, typeof(*queue), node); else ++queue_len; break; -- cgit v1.2.3 From 29971f9a82a5d005b37d65fbb73edaf9073279b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:47 +0200 Subject: perf annotate: Factor annotation_line__print from disasm_line__print Move generic annotation line display code into annotation_line__print function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 69 ++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5c6f739ac3ac..cb065ca431ee 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1093,24 +1093,36 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct disasm_line *queue) +static int disasm_line__print(struct disasm_line *dl, u64 start) { + s64 offset = dl->al.offset; + const u64 addr = start + offset; + struct block_range *br; + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); + annotate__branch_printf(br, addr); + return 0; +} + +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *queue) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; static const char *prev_color; - if (dl->al.offset != -1) { + if (al->offset != -1) { double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->al.offset; - const u64 addr = start + offset; - struct block_range *br; - for (i = 0; i < dl->al.samples_nr; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample = &al->samples[i]; if (sample->percent > max_percent) max_percent = sample->percent; @@ -1123,11 +1135,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, al.node) { - if (queue == dl) + list_for_each_entry_from(queue, ¬es->src->source, node) { + if (queue == al) break; - disasm_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + annotation_line__print(queue, sym, start, evsel, len, + 0, 0, 1, NULL); } } @@ -1138,17 +1150,17 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (dl->al.path) { - if (!prev_line || strcmp(prev_line, dl->al.path) + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path) || color != prev_color) { - color_fprintf(stdout, color, " %s", dl->al.path); - prev_line = dl->al.path; + color_fprintf(stdout, color, " %s", al->path); + prev_line = al->path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + struct annotation_data *sample = &al->samples[i]; color = get_percent_color(sample->percent); @@ -1164,10 +1176,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st printf(" : "); - br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); - annotate__branch_printf(br, addr); + disasm_line__print(dl, start); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1180,27 +1189,15 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->al.line) + if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->al.line); + printf(" %*s: %s\n", width, " ", al->line); } return 0; } -static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *aq) -{ - struct disasm_line *dl = container_of(al, struct disasm_line, al); - struct disasm_line *queue = container_of(aq, struct disasm_line, al); - - return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, - max_lines, queue); -} - /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following -- cgit v1.2.3 From 3ab6db8d0f3b19b93a8de25e3b7ab5fdaac47679 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:48 +0200 Subject: perf annotate browser: Use samples data from struct annotation_line We now carry the data in 'struct annotation_line', so using it instead of samples from 'struct browser_disasm_line' and removing it and its setup. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 57 ++++++++++++++------------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 613682432940..5d99429a03bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -29,11 +29,6 @@ struct browser_disasm_line { u32 idx; int idx_asm; int jump_sources; - /* - * actual length of this array is saved on the nr_events field - * of the struct annotate_browser - */ - struct disasm_line_samples samples[1]; }; static struct annotate_browser_opt { @@ -76,9 +71,7 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - struct annotation_line *al = &dl->al; - - return (void *) al - al->privsize; + return (void *) dl - sizeof(struct browser_disasm_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -139,8 +132,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (bdl->samples[i].percent > percent_max) - percent_max = bdl->samples[i].percent; + if (dl->al.samples[i].percent > percent_max) + percent_max = dl->al.samples[i].percent; } if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { @@ -154,17 +147,17 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - bdl->samples[i].percent, + dl->al.samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - bdl->samples[i].he.period); + dl->al.samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].he.nr_samples); + dl->al.samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - bdl->samples[i].percent); + dl->al.samples[i].percent); } } } else { @@ -363,11 +356,9 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct disasm_line *da, - struct disasm_line *db, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, + struct annotation_line *b, int nr_pcnt) { - struct browser_disasm_line *a = disasm_line__browser(da); - struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -378,24 +369,24 @@ static int disasm__cmp(struct disasm_line *da, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct disasm_line *l; + struct annotation_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct disasm_line, al.rb_node); + l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(dl, l, nr_events)) + if (disasm__cmp(al, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&dl->al.rb_node, parent, p); - rb_insert_color(&dl->al.rb_node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -453,7 +444,6 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, symbol__calc_percent(sym, evsel); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos = disasm_line__browser(pos); double max_percent = 0.0; int i; @@ -465,18 +455,15 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, for (i = 0; i < browser->nr_events; i++) { struct annotation_data *sample = &pos->al.samples[i]; - bpos->samples[i].percent = sample->percent; - bpos->samples[i].he = sample->he; - - if (max_percent < bpos->samples[i].percent) - max_percent = bpos->samples[i].percent; + if (max_percent < sample->percent) + max_percent = sample->percent; } if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, pos, + disasm_rb_tree__insert(&browser->entries, &pos->al, browser->nr_events); } pthread_mutex_unlock(¬es->lock); @@ -1096,7 +1083,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, }; int ret = -1, err; int nr_pcnt = 1; - size_t sizeof_bdl = sizeof(struct browser_disasm_line); if (sym == NULL) return -1; @@ -1112,14 +1098,11 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, return -1; } - if (perf_evsel__is_group_event(evsel)) { + if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - sizeof_bdl += sizeof(struct disasm_line_samples) * - (nr_pcnt - 1); - } err = symbol__annotate(sym, map, evsel, - sizeof_bdl, &browser.arch, + sizeof(struct browser_disasm_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; -- cgit v1.2.3 From b15636c62f3a32a8560ea6a32245ec49edd60c6b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:49 +0200 Subject: perf annotate browser: Do not pass nr_events in disasm_rb_tree__insert We now keep samples_nr in struct annotation_line, so there's no need to pass nr_events to disasm_rb_tree__insert function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5d99429a03bc..67e5955b3c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -356,12 +356,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, - struct annotation_line *b, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent == b->samples[i].percent) continue; return a->samples[i].percent < b->samples[i].percent; @@ -369,8 +368,7 @@ static int disasm__cmp(struct annotation_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, - int nr_events) +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -380,7 +378,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, nr_events)) + if (disasm__cmp(al, l)) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -452,7 +450,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - for (i = 0; i < browser->nr_events; i++) { + for (i = 0; i < pos->al.samples_nr; i++) { struct annotation_data *sample = &pos->al.samples[i]; if (max_percent < sample->percent) @@ -463,8 +461,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al, - browser->nr_events); + disasm_rb_tree__insert(&browser->entries, &pos->al); } pthread_mutex_unlock(¬es->lock); -- cgit v1.2.3 From 0d9579701fee0a482185ab4e8ee7f5ae86f8ae19 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:36 +0100 Subject: perf annotate browser: Rename struct browser_disasm_line to browser_line Rename struct browser_disasm_line to browser_line, because the browser operates now on generic lines and no longer on disasm lines. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105536.GA20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 67e5955b3c6f..5ed6c158af40 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -25,10 +25,10 @@ struct disasm_line_samples { #define IPC_WIDTH 6 #define CYCLES_WIDTH 6 -struct browser_disasm_line { - u32 idx; - int idx_asm; - int jump_sources; +struct browser_line { + u32 idx; + int idx_asm; + int jump_sources; }; static struct annotate_browser_opt { @@ -69,9 +69,9 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) { - return (void *) dl - sizeof(struct browser_disasm_line); + return (void *) dl - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_disasm_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,7 +302,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = ab->selection, *target; - struct browser_disasm_line *btarget, *bcursor; + struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; @@ -413,7 +413,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; struct disasm_line *pos; u32 idx; @@ -471,7 +471,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_disasm_line *bdl; + struct browser_line *bdl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); @@ -1027,7 +1027,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct disasm_line *dl = browser->offsets[offset], *dlt; - struct browser_disasm_line *bdlt; + struct browser_line *bdlt; if (!disasm_line__is_valid_jump(dl, sym)) continue; @@ -1099,7 +1099,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, nr_pcnt = evsel->nr_members; err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_disasm_line), &browser.arch, + sizeof(struct browser_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; @@ -1114,7 +1114,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) -- cgit v1.2.3 From daf25d4303cbf1795535b6c0b7172ba6f12aa2bd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:52 +0100 Subject: perf annotate browser: Rename disasm_line__browser to browser_line Rename disasm_line__browser function to browser_line, because the browser got generic and is no longer disasm specific. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105552.GB20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5ed6c158af40..3691dc8cef4c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,7 +69,7 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct disasm_line *dl) { return (void *) dl - sizeof(struct browser_line); } @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = browser_line(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -319,8 +319,8 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!target) return; - bcursor = disasm_line__browser(cursor); - btarget = disasm_line__browser(target); + bcursor = browser_line(cursor); + btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { from = bcursor->idx_asm; @@ -418,7 +418,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -476,7 +476,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = disasm_line__browser(dl); + bdl = browser_line(dl); if (annotate_browser__opts.hide_src_code) { if (bdl->idx_asm < offset) @@ -1040,7 +1040,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser if (dlt == NULL) continue; - bdlt = disasm_line__browser(dlt); + bdlt = browser_line(dlt); if (++bdlt->jump_sources > browser->max_jump_sources) browser->max_jump_sources = bdlt->jump_sources; @@ -1119,7 +1119,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (browser.b.width < line_len) browser.b.width = line_len; - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); bpos->idx = browser.nr_entries++; if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; -- cgit v1.2.3 From 7bcbcd589b15eae849d45540832ba4f9530c778e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:56:17 +0100 Subject: perf annotate browser: Change selection to struct annotation_line Use struct annotation_line as a browser::selection. We want to be able to use the annotate_browser for all sorts of source data, so it needs to be able to work over the generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105617.GC20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 63 +++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3691dc8cef4c..657811669a6c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -47,26 +47,26 @@ static struct annotate_browser_opt { struct arch; struct annotate_browser { - struct ui_browser b; - struct rb_root entries; - struct rb_node *curr_hot; - struct disasm_line *selection; - struct disasm_line **offsets; - struct arch *arch; - int nr_events; - u64 start; - int nr_asm_entries; - int nr_entries; - int max_jump_sources; - int nr_jumps; - bool searching_backwards; - bool have_cycles; - u8 addr_width; - u8 jumps_width; - u8 target_width; - u8 min_addr_width; - u8 max_addr_width; - char search_bf[128]; + struct ui_browser b; + struct rb_root entries; + struct rb_node *curr_hot; + struct annotation_line *selection; + struct disasm_line **offsets; + struct arch *arch; + int nr_events; + u64 start; + int nr_asm_entries; + int nr_entries; + int max_jump_sources; + int nr_jumps; + bool searching_backwards; + bool have_cycles; + u8 addr_width; + u8 jumps_width; + u8 target_width; + u8 min_addr_width; + u8 max_addr_width; + char search_bf[128]; }; static inline struct browser_line *browser_line(struct disasm_line *dl) @@ -265,7 +265,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (current_entry) - ab->selection = dl; + ab->selection = &dl->al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) @@ -301,7 +301,8 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *cursor = ab->selection, *target; + struct disasm_line *cursor = disasm_line(ab->selection); + struct disasm_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -526,7 +527,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct hist_browser_timer *hbt) { struct map_symbol *ms = browser->b.priv; - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); struct annotation *notes; struct addr_map_symbol target = { .map = ms->map, @@ -584,7 +585,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows static bool annotate_browser__jump(struct annotate_browser *browser) { - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); u64 offset; s64 idx; @@ -610,7 +611,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue(pos, ¬es->src->source, al.node) { @@ -649,7 +650,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { @@ -882,13 +883,16 @@ show_help: continue; case K_ENTER: case K_RIGHT: + { + struct disasm_line *dl = disasm_line(browser->selection); + if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->al.offset == -1) + else if (browser->selection->offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); - else if (!browser->selection->ins.ops) + else if (!dl->ins.ops) goto show_sup_ins; - else if (ins__is_ret(&browser->selection->ins)) + else if (ins__is_ret(&dl->ins)) goto out; else if (!(annotate_browser__jump(browser) || annotate_browser__callq(browser, evsel, hbt))) { @@ -896,6 +900,7 @@ show_sup_ins: ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions."); } continue; + } case 't': if (annotate_browser__opts.show_total_period) { annotate_browser__opts.show_total_period = false; -- cgit v1.2.3 From e1b60b5bd3c7a3f215e79fa911122aba59b3d984 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:53 +0200 Subject: perf annotate browser: Change offsets to struct annotation_line Use struct annotation_line as a browser::offsets array entry. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-31-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 49 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 657811669a6c..911f06ce0f1b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -51,7 +51,7 @@ struct annotate_browser { struct rb_root entries; struct rb_node *curr_hot; struct annotation_line *selection; - struct disasm_line **offsets; + struct annotation_line **offsets; struct arch *arch; int nr_events; u64 start; @@ -303,6 +303,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); struct disasm_line *target; + struct annotation_line *al; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -316,10 +317,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - target = ab->offsets[cursor->ops.target.offset]; - if (!target) + al = ab->offsets[cursor->ops.target.offset]; + if (!al) return; + target = disasm_line(al); + bcursor = browser_line(cursor); btarget = browser_line(target); @@ -978,10 +981,10 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, return; for (offset = start; offset <= end; offset++) { - struct disasm_line *dl = browser->offsets[offset]; + struct annotation_line *al = browser->offsets[offset]; - if (dl) - dl->al.ipc = ipc; + if (al) + al->ipc = ipc; } } } @@ -1006,13 +1009,13 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, ch = ¬es->src->cycles_hist[offset]; if (ch && ch->cycles) { - struct disasm_line *dl; + struct annotation_line *al; if (ch->have_start) count_and_fill(browser, ch->start, offset, ch); - dl = browser->offsets[offset]; - if (dl && ch->num_aggr) - dl->al.cycles = ch->cycles_aggr / ch->num_aggr; + al = browser->offsets[offset]; + if (al && ch->num_aggr) + al->cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } @@ -1031,13 +1034,18 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser return; for (offset = 0; offset < size; ++offset) { - struct disasm_line *dl = browser->offsets[offset], *dlt; + struct annotation_line *al = browser->offsets[offset]; + struct disasm_line *dl, *dlt; struct browser_line *bdlt; + dl = disasm_line(al); + if (!disasm_line__is_valid_jump(dl, sym)) continue; - dlt = browser->offsets[dl->ops.target.offset]; + al = browser->offsets[dl->ops.target.offset]; + dlt = disasm_line(al); + /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? @@ -1066,7 +1074,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos; + struct annotation_line *al; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1094,7 +1102,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - browser.offsets = zalloc(size * sizeof(struct disasm_line *)); + browser.offsets = zalloc(size * sizeof(struct annotation_line *)); if (browser.offsets == NULL) { ui__error("Not enough memory!"); return -1; @@ -1118,15 +1126,16 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(al, ¬es->src->source, node) { + struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; - size_t line_len = strlen(pos->al.line); + size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(pos); + bpos = browser_line(dl); bpos->idx = browser.nr_entries++; - if (pos->al.offset != -1) { + if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1135,8 +1144,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->al.offset < (s64)size) - browser.offsets[pos->al.offset] = pos; + if (al->offset < (s64)size) + browser.offsets[al->offset] = al; } else bpos->idx_asm = -1; } -- cgit v1.2.3 From a5ef27020b4bc0785fabb2591eb670d3bc641257 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:54 +0200 Subject: perf annotate browser: Use struct annotation_line in browser_line Using struct annotation_line arg in browser_line function to make it generic. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-32-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 60 ++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 911f06ce0f1b..fb83deb8c909 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,9 +69,12 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *browser_line(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct annotation_line *al) { - return (void *) dl - sizeof(struct browser_line); + void *ptr = al; + + ptr = container_of(al, struct disasm_line, al); + return ptr - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +122,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(dl); + struct browser_line *bdl = browser_line(&dl->al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,8 +305,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); - struct disasm_line *target; - struct annotation_line *al; + struct annotation_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -317,13 +319,9 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - al = ab->offsets[cursor->ops.target.offset]; - if (!al) - return; - - target = disasm_line(al); + target = ab->offsets[cursor->ops.target.offset]; - bcursor = browser_line(cursor); + bcursor = browser_line(&cursor->al); btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { @@ -422,7 +420,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(pos); + bpos = browser_line(&pos->al); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -475,37 +473,37 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_line *bdl; + struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = browser_line(dl); + bl = browser_line(&dl->al); if (annotate_browser__opts.hide_src_code) { - if (bdl->idx_asm < offset) - offset = bdl->idx; + if (bl->idx_asm < offset) + offset = bl->idx; browser->b.nr_entries = browser->nr_entries; annotate_browser__opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx - offset; - browser->b.index = bdl->idx; + browser->b.top_idx = bl->idx - offset; + browser->b.index = bl->idx; } else { - if (bdl->idx_asm < 0) { + if (bl->idx_asm < 0) { ui_helpline__puts("Only available for assembly lines."); browser->b.seek(&browser->b, -offset, SEEK_CUR); return false; } - if (bdl->idx_asm < offset) - offset = bdl->idx_asm; + if (bl->idx_asm < offset) + offset = bl->idx_asm; browser->b.nr_entries = browser->nr_asm_entries; annotate_browser__opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx_asm - offset; - browser->b.index = bdl->idx_asm; + browser->b.top_idx = bl->idx_asm - offset; + browser->b.index = bl->idx_asm; } return true; @@ -1035,8 +1033,8 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct annotation_line *al = browser->offsets[offset]; - struct disasm_line *dl, *dlt; - struct browser_line *bdlt; + struct disasm_line *dl; + struct browser_line *blt; dl = disasm_line(al); @@ -1044,18 +1042,17 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser continue; al = browser->offsets[dl->ops.target.offset]; - dlt = disasm_line(al); /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? */ - if (dlt == NULL) + if (al == NULL) continue; - bdlt = browser_line(dlt); - if (++bdlt->jump_sources > browser->max_jump_sources) - browser->max_jump_sources = bdlt->jump_sources; + blt = browser_line(al); + if (++blt->jump_sources > browser->max_jump_sources) + browser->max_jump_sources = blt->jump_sources; ++browser->nr_jumps; } @@ -1127,13 +1124,12 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(al, ¬es->src->source, node) { - struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(dl); + bpos = browser_line(al); bpos->idx = browser.nr_entries++; if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; -- cgit v1.2.3 From 9213afbdf9562cd108e7ed03bd960d8acdfb49c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:55 +0200 Subject: perf annotate browser: Use struct annotation_line in find functions Use struct annotation_line in find functions: annotate_browser__find_string annotate_browser__find_string_reverse Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-33-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index fb83deb8c909..8f75e56aedc2 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -606,23 +606,23 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } static -struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; ++*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -630,38 +630,38 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows static bool __annotate_browser__search(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = false; return true; } static -struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; --*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -669,16 +669,16 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse static bool __annotate_browser__search_reverse(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = true; return true; } -- cgit v1.2.3 From ec03a77d7d28a2c2de246f67322c5d916852dd9d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:56 +0200 Subject: perf annotate browser: Use struct annotation_line in browser top Use struct annotation_line in browser::b::top. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f75e56aedc2..f0f27cf9db85 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -390,7 +390,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line } static void annotate_browser__set_top(struct annotate_browser *browser, - struct disasm_line *pos, u32 idx) + struct annotation_line *pos, u32 idx) { unsigned back; @@ -399,16 +399,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); + pos = list_entry(pos->node.prev, struct annotation_line, node); - if (disasm_line__filter(&browser->b, &pos->al.node)) + if (disasm_line__filter(&browser->b, &pos->node)) continue; --browser->b.top_idx; --back; } - browser->b.top = &pos->al; + browser->b.top = pos; browser->b.navkeypressed = true; } @@ -416,11 +416,11 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { struct browser_line *bpos; - struct disasm_line *pos; + struct annotation_line *pos; u32 idx; - pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(&pos->al); + pos = rb_entry(nd, struct annotation_line, rb_node); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -472,13 +472,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, al.node); - bl = browser_line(&dl->al); + al = list_entry(browser->b.top, struct annotation_line, node); + bl = browser_line(al); if (annotate_browser__opts.hide_src_code) { if (bl->idx_asm < offset) @@ -600,7 +600,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) return true; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, &dl->al, idx); return true; } @@ -639,7 +639,7 @@ static bool __annotate_browser__search(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = false; return true; } @@ -678,7 +678,7 @@ static bool __annotate_browser__search_reverse(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = true; return true; } -- cgit v1.2.3 From a5433b3ec937765a1d7521bc6bb87f6e76497640 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:57 +0200 Subject: perf annotate browser: Add disasm_line__write function Factor disasm_line__write function from annotate_browser__write, which now keeps only generic display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-35-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 98 +++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index f0f27cf9db85..5a2f37a91feb 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -118,11 +118,37 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0; } +static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser, + char *bf, size_t size) +{ + if (dl->ins.ops && dl->ins.ops->scnprintf) { + if (ins__is_jump(&dl->ins)) { + bool fwd = dl->ops.target.offset > dl->al.offset; + + ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : + SLSMG_UARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_call(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_ret(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); + SLsmg_write_char(' '); + } else { + ui_browser__write_nstring(browser, " ", 2); + } + } else { + ui_browser__write_nstring(browser, " ", 2); + } + + disasm_line__scnprintf(dl, bf, size, !annotate_browser__opts.use_offset); +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(&dl->al); + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + struct browser_line *bl = browser_line(al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -135,32 +161,32 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (dl->al.samples[i].percent > percent_max) - percent_max = dl->al.samples[i].percent; + if (al->samples[i].percent > percent_max) + percent_max = al->samples[i].percent; } - if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->al.ipc == 0.0 && dl->al.cycles == 0) + if (al->ipc == 0.0 && al->cycles == 0) show_title = true; } else show_title = true; } - if (dl->al.offset != -1 && percent_max != 0.0) { + if (al->offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - dl->al.samples[i].percent, + al->samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - dl->al.samples[i].he.period); + al->samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - dl->al.samples[i].he.nr_samples); + al->samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - dl->al.samples[i].percent); + al->samples[i].percent); } } } else { @@ -175,16 +201,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->al.ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); + if (al->ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, al->ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->al.cycles) + if (al->cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->al.cycles); + CYCLES_WIDTH - 1, al->cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -197,19 +223,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->al.line) + if (!*al->line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->al.offset == -1) { - if (dl->al.line_nr && annotate_browser__opts.show_linenr) + else if (al->offset == -1) { + if (al->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->al.line_nr); + ab->addr_width + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->al.offset; + u64 addr = al->offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -218,13 +244,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!annotate_browser__opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (bdl->jump_sources) { + if (bl->jump_sources) { if (annotate_browser__opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", ab->jumps_width, - bdl->jump_sources); - prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, + bl->jump_sources); + prev = annotate_browser__set_jumps_percent_color(ab, bl->jump_sources, current_entry); ui_browser__write_nstring(browser, bf, printed); ui_browser__set_color(browser, prev); @@ -243,32 +269,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); - if (dl->ins.ops && dl->ins.ops->scnprintf) { - if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->al.offset; - - ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : - SLSMG_UARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_call(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_ret(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); - SLsmg_write_char(' '); - } else { - ui_browser__write_nstring(browser, " ", 2); - } - } else { - ui_browser__write_nstring(browser, " ", 2); - } - disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); + disasm_line__write(disasm_line(al), browser, bf, sizeof(bf)); + ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed); } if (current_entry) - ab->selection = &dl->al; + ab->selection = al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) -- cgit v1.2.3 From f48e7c407050e5f5f53a0fa9a266d83b001dd356 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:58 +0200 Subject: perf annotate: Align source and offset lines Align source with offset lines, which are more advanced, because of the address column. Before: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) After: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) It makes bigger different when displaying script sources, where the comment lines looks oddly shifted from the lines which actually hold code. I'll send script support separately. Committer note: Do not use a fixed column width for the addresses, as kernel ones se more than 10 columns, look at the last offset and get the right width. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cb065ca431ee..eab4a8e3c679 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1092,15 +1092,14 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } } - -static int disasm_line__print(struct disasm_line *dl, u64 start) +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) { s64 offset = dl->al.offset; const u64 addr = start + offset; struct block_range *br; br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr); color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); return 0; @@ -1109,7 +1108,7 @@ static int disasm_line__print(struct disasm_line *dl, u64 start) static int annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue) + int max_lines, struct annotation_line *queue, int addr_fmt_width) { struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; @@ -1139,7 +1138,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue == al) break; annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + 0, 0, 1, NULL, addr_fmt_width); } } @@ -1174,9 +1173,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start color_fprintf(stdout, color, " %7.2f", sample->percent); } - printf(" : "); + printf(" : "); - disasm_line__print(dl, start); + disasm_line__print(dl, start, addr_fmt_width); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1192,7 +1191,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", al->line); + printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); } return 0; @@ -1796,6 +1795,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ + char bf[32]; + struct annotation_line *line; + + list_for_each_entry_reverse(line, lines, node) { + if (line->offset != -1) + return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); + } + + return 0; +} + int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context) @@ -1808,7 +1820,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0; + int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; @@ -1839,6 +1851,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); + addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -1849,7 +1863,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, err = annotation_line__print(pos, sym, start, evsel, len, min_pcnt, printed, max_lines, - queue); + queue, addr_fmt_width); switch (err) { case 0: -- cgit v1.2.3 From 1873b710ace80b9437227a0f8f1a5dab18f49ec1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Nov 2017 13:41:35 -0300 Subject: tools headers: Synchronize kernel ABI headers wrt SPDX tags Two more, that were just in perf/core and thus weren't covered by Ingo's latest headers synch, kcmp.h and prctl.h, silencing this: Warning: Kernel ABI header at 'tools/include/uapi/linux/kcmp.h' differs from latest version at 'include/uapi/linux/kcmp.h' Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2a0r7iybyqpkftllyy5t9hfk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h index 481e103da78e..ef1305010925 100644 --- a/tools/include/uapi/linux/kcmp.h +++ b/tools/include/uapi/linux/kcmp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_KCMP_H #define _UAPI_LINUX_KCMP_H diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index a8d0759a9e40..b640071421f7 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _LINUX_PRCTL_H #define _LINUX_PRCTL_H -- cgit v1.2.3 From c2f1cead19b628d7a23d2cfc43e444af669f9eab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:55 -0700 Subject: perf record: Fix -c/-F options for cpu event aliases The Intel PMU event aliases have a implicit period= specifier to set the default period. Unfortunately this breaks overriding these periods with -c or -F, because the alias terms look like they are user specified to the internal parser, and user specified event qualifiers override the command line options. Track that they are coming from aliases by adding a "weak" state to the term. Any weak terms don't override command line options. I only did it for -c/-F for now, I think that's the only case that's broken currently. Before: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 2000003 After: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 1000 Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 12 ++++++++---- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 2 ++ tools/perf/util/parse-events.h | 3 +++ tools/perf/util/pmu.c | 5 +++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f894893c203d..bfde6a7a80f2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -733,12 +733,16 @@ static void apply_config_terms(struct perf_evsel *evsel, list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: - attr->sample_period = term->val.period; - attr->freq = 0; + if (!(term->weak && opts->user_interval != ULLONG_MAX)) { + attr->sample_period = term->val.period; + attr->freq = 0; + } break; case PERF_EVSEL__CONFIG_TERM_FREQ: - attr->sample_freq = term->val.freq; - attr->freq = 1; + if (!(term->weak && opts->user_freq != UINT_MAX)) { + attr->sample_freq = term->val.freq; + attr->freq = 1; + } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9277df96ffda..157f49e8a772 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -67,6 +67,7 @@ struct perf_evsel_config_term { bool overwrite; char *branch; } val; + bool weak; }; struct perf_stat_evsel; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7fcd95961ef..170316795a18 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1116,6 +1116,7 @@ do { \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ __t->val.__name = __val; \ + __t->weak = term->weak; \ list_add_tail(&__t->list, head_terms); \ } while (0) @@ -2410,6 +2411,7 @@ static int new_term(struct parse_events_term **_term, *term = *temp; INIT_LIST_HEAD(&term->list); + term->weak = false; switch (term->type_val) { case PARSE_EVENTS__TERM_TYPE_NUM: diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index be337c266697..88108cd11b4c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -101,6 +101,9 @@ struct parse_events_term { /* error string indexes for within parsed string */ int err_term; int err_val; + + /* Coming from implicit alias */ + bool weak; }; struct parse_events_error { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 07cb2ac041d7..80fb1593913a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -405,6 +405,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, parse_events_terms__purge(&list); return ret; } + /* + * Weak terms don't override command line options, + * which we don't want for implicit terms in aliases. + */ + cloned->weak = true; list_add_tail(&cloned->list, &list); } list_splice(&list, terms); -- cgit v1.2.3 From d0565132605f454179699a1b8a3276fc0f8cc87b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:54 -0700 Subject: perf evsel: Enable type checking for perf_evsel_config_term types Use a typed enum for the perf_evsel_config_term type enum. This allows gcc to do much stronger type checks, and also check for missing case statements. I removed the unused _MAX member from the number. It found one missing case. I'm not sure it's a real problem, so I just turned it into a BUG_ON for now. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-1-andi@firstfloor.org [ Renamed the enum name to term_type as per jolsa's request ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 ++ tools/perf/util/evsel.h | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bfde6a7a80f2..4376cdfaea49 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -779,6 +779,8 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + BUG_ON(1); default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 157f49e8a772..0688880227e1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,7 +38,7 @@ struct cgroup_sel; * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ -enum { +enum term_type { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, @@ -49,12 +49,11 @@ enum { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_MAX, }; struct perf_evsel_config_term { struct list_head list; - int type; + enum term_type type; union { u64 period; u64 freq; -- cgit v1.2.3 From cbd5c1787bab4643e5959522275b46de94eba5ac Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Nov 2017 16:22:46 -0800 Subject: perf trace: Fix an exit code of trace__symbols_init Currently if trace_event__register_resolver() fails, we return -errno, but we can't be sure that errno isn't zero in this case. Signed-off-by: Andrei Vagin Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Link: http://lkml.kernel.org/r/20171108002246.8924-2-avagin@openvz.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f2757d38c7d7..84debdbad327 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1152,12 +1152,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; - if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0) - return -errno; + err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); + if (err < 0) + goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout, 1); +out: if (err) symbol__exit(); -- cgit v1.2.3 From 86f5fe01cf3ad42e99e7655dec93e0e36ef65614 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Nov 2017 11:27:37 +0100 Subject: perf tools: Use shell function for perl cflags retrieval Using the shell function for perl CFLAGS retrieval instead of back quotes (``). Both execute shell with the command, but the latter is more explicit and seems to be the preferred way. Also we don't have any other use of the back quotes in perf Makefiles. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171108102739.30338-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f..f6786fa2419f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -579,7 +579,7 @@ else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3 From 82806c3aae7496d6974429f3bd43ebeeef18b9b2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 12:03:40 -0300 Subject: perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv I forgot one conversion, which got noticed by Thomas when running: $ perf stat -e '{cpu-clock,instructions}' kill kill: not enough arguments Segmentation fault (core dumped) $ Fix it, those stats are in evsel->stats, not anymore in evsel->priv. Reported-by: Thomas-Mich Richter Tested-by: Thomas-Mich Richter Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: e669e833da8d ("perf evsel: Restore evsel->priv as a tool private area") Link: http://lkml.kernel.org/r/20171109150046.GN4333@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4376cdfaea49..cb9bcdb065ea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1377,7 +1377,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader, static int perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) { - struct perf_stat_evsel *ps = leader->priv; + struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->attr.read_format; int size = perf_evsel__read_size(leader); u64 *data = ps->group_data; -- cgit v1.2.3 From 60dbcd2532dd7eec2f1e23a37b80ff85d8fb2953 Mon Sep 17 00:00:00 2001 From: Seonghyun Park Date: Thu, 9 Nov 2017 23:07:04 +0900 Subject: perf tests: Add missing WRITE_ASS for new fields of perf_event_attr Include newly added fields 'mmap2', 'comm_exec', 'use_clockid', 'namespaces', 'write_backward' and 'context_switch' from perf_event_attr to store_event(). Signed-off-by: Seonghyun Park Cc: Jiri Olsa Cc: Namhyung Kim Cc: Seonghyun Park Link: http://lkml.kernel.org/n/tip-vltn7pqhcv8h5fmo9cthk87q@git.kernel.org [ Fix log message to add 'write_backward', fix the patch to add 'use_clock_id' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0e1367f90af5..97f64ad7fa08 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -124,6 +124,12 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, WRITE_ASS(exclude_guest, "d"); WRITE_ASS(exclude_callchain_kernel, "d"); WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); WRITE_ASS(wakeup_events, PRIu32); WRITE_ASS(bp_type, PRIu32); WRITE_ASS(config1, "llu"); -- cgit v1.2.3 From fa48c892645dfd3159e5aa6eb9cefd00d5cb347a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 16:04:26 -0300 Subject: perf script: Fix --per-event-dump for auxtrace synth evsels When processing PERF_RECORD_AUXTRACE_INFO several perf_evsel entries will be synthesized and inserted into session->evlist, eventually ending in perf_script.tool.sample(), which ends up calling builtin-script.c's process_event(), that expects evsel->priv to be a perf_evsel_script object with a valid FILE pointer in fp. So we need to intercept the processing of PERF_RECORD_AUXTRACE_INFO and then setup evsel->priv for these newly created perf_evsel instances, do it to fix the segfault in process_event() trying to use a NULL for that FILE pointer. Reported-by: Alexander Shishkin Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Wang Nan Cc: yuzhoujian Fixes: a14390fde64e ("perf script: Allow creating per-event dump files") Link: http://lkml.kernel.org/n/tip-bthnur8r8de01gxvn2qayx6e@git.kernel.org [ Merge fix by Ravi Bangoria before pushing upstream to preserv bisectability ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 68f36dc0344f..9b43bda45a41 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { + /* + * Already setup? I.e. we may be called twice in cases like + * Intel PT, one for the intel_pt// and dummy events, then + * for the evsels syntheized from the auxtrace info. + * + * Ses perf_script__process_auxtrace_info. + */ + if (evsel->priv != NULL) + continue; + evsel->priv = perf_evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; @@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +#ifdef HAVE_AUXTRACE_SUPPORT +static int perf_script__process_auxtrace_info(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + int ret = perf_event__process_auxtrace_info(tool, event, session); + + if (ret == 0) { + struct perf_script *script = container_of(tool, struct perf_script, tool); + + ret = perf_script__setup_per_event_dump(script); + } + + return ret; +} +#else +#define perf_script__process_auxtrace_info 0 +#endif + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv) .feature = perf_event__process_feature, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace_info = perf_script__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, .auxtrace_error = perf_event__process_auxtrace_error, .stat = perf_event__process_stat_event, -- cgit v1.2.3 From e795dd42b716ff36ebaa5384fd1be8458d6c9c34 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 8 Nov 2017 18:42:03 -0500 Subject: perf vendor events powerpc: Update POWER9 events The POWER9 hardware has dropped support for several events, added a few new events and changed the category for a couple of events. Update the POWER9 events in Linux to reflect these changes. Signed-off-by: Sukadev Bhattiprolu Cc: Jiri Olsa Cc: Michael Ellerman Cc: Madhavan Srinivasan Link: http://lkml.kernel.org/r/20171108201938.GA10985@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/powerpc/power9/cache.json | 5 - .../pmu-events/arch/powerpc/power9/frontend.json | 7 +- .../pmu-events/arch/powerpc/power9/marked.json | 27 +- .../perf/pmu-events/arch/powerpc/power9/other.json | 276 ++++++--------------- .../pmu-events/arch/powerpc/power9/pipeline.json | 14 +- tools/perf/pmu-events/arch/powerpc/power9/pmc.json | 2 +- .../arch/powerpc/power9/translation.json | 5 - 7 files changed, 88 insertions(+), 248 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 18f6645f2897..7945c5196c43 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -124,11 +124,6 @@ "EventName": "PM_CMPLU_STALL_LARX", "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" }, - {, - "EventCode": "0x3006C", - "EventName": "PM_RUN_CYC_SMT2_MODE", - "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode" - }, {, "EventCode": "0x1C058", "EventName": "PM_DTLB_MISS_16G", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index c63a919eda98..bd8361b5fd6a 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3E15C", - "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", - "BriefDescription": "TM marked store abort for this thread" - }, {, "EventCode": "0x25044", "EventName": "PM_IPTEG_FROM_L31_MOD", @@ -369,4 +364,4 @@ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index b9df54fb37e3..22f9f32060a8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3C052", - "EventName": "PM_DATA_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" - }, {, "EventCode": "0x3013E", "EventName": "PM_MRK_STALL_CMPLU_CYC", @@ -254,6 +249,11 @@ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache" }, + {, + "EventCode": "0x3C052", + "EventName": "PM_DATA_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" + }, {, "EventCode": "0x4D142", "EventName": "PM_MRK_DATA_FROM_L3", @@ -434,21 +434,6 @@ "EventName": "PM_ITLB_MISS", "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed" }, - {, - "EventCode": "0x2D024", - "EventName": "PM_RADIX_PWC_L2_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache." - }, - {, - "EventCode": "0x3F056", - "EventName": "PM_RADIX_PWC_L3_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache." - }, - {, - "EventCode": "0x4E014", - "EventName": "PM_TM_TX_PASS_RUN_INST", - "BriefDescription": "Run instructions spent in successful transactions" - }, {, "EventCode": "0x1E044", "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", @@ -644,4 +629,4 @@ "EventName": "PM_MRK_BR_MPRED_CMPL", "BriefDescription": "Marked Branch Mispredicted" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 54cc3be00fc2..5ce312973f1e 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -79,6 +79,11 @@ "EventName": "PM_RADIX_PWC_MISS", "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache." }, + {, + "EventCode": "0x26882", + "EventName": "PM_L2_DC_INV", + "BriefDescription": "D-cache invalidates sent over the reload bus to the core" + }, {, "EventCode": "0x24048", "EventName": "PM_INST_FROM_LMEM", @@ -94,11 +99,6 @@ "EventName": "PM_TM_PASSED", "BriefDescription": "Number of TM transactions that passed" }, - {, - "EventCode": "0xD1A0", - "EventName": "PM_MRK_LSU_FLUSH_LHS", - "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" - }, {, "EventCode": "0xF088", "EventName": "PM_LSU0_STORE_REJECT", @@ -127,7 +127,7 @@ {, "EventCode": "0xD08C", "EventName": "PM_LSU2_LDMX_FIN", - "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])" + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." }, {, "EventCode": "0x300F8", @@ -204,11 +204,6 @@ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" }, - {, - "EventCode": "0xF0B4", - "EventName": "PM_DC_PREF_CONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0xF894", "EventName": "PM_LSU3_L1_CAM_CANCEL", @@ -219,21 +214,11 @@ "EventName": "PM_FLUSH_DISP_TLBIE", "BriefDescription": "Dispatch Flush: TLBIE" }, - {, - "EventCode": "0xD1A4", - "EventName": "PM_MRK_LSU_FLUSH_SAO", - "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush" - }, {, "EventCode": "0x4E11E", "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load" }, - {, - "EventCode": "0x5894", - "EventName": "PM_LWSYNC", - "BriefDescription": "Lwsync instruction decoded and transferred" - }, {, "EventCode": "0x14156", "EventName": "PM_MRK_DATA_FROM_L2_CYC", @@ -244,11 +229,6 @@ "EventName": "PM_RD_CLEARING_SC", "BriefDescription": "Read clearing SC" }, - {, - "EventCode": "0x50A0", - "EventName": "PM_HWSYNC", - "BriefDescription": "Hwsync instruction decoded and transferred" - }, {, "EventCode": "0x168B0", "EventName": "PM_L3_P1_NODE_PUMP", @@ -264,6 +244,11 @@ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load" }, + {, + "EventCode": "0x468AE", + "EventName": "PM_L3_P3_CO_RTY", + "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" + }, {, "EventCode": "0x460A8", "EventName": "PM_SN_HIT", @@ -279,11 +264,6 @@ "EventName": "PM_DC_PREF_HW_ALLOC", "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" }, - {, - "EventCode": "0xF0BC", - "EventName": "PM_LS2_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0xD0AC", "EventName": "PM_SRQ_SYNC_CYC", @@ -379,26 +359,11 @@ "EventName": "PM_RUN_CYC_SMT4_MODE", "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode" }, - {, - "EventCode": "0x5088", - "EventName": "PM_DECODE_FUSION_OP_PRESERV", - "BriefDescription": "Destructive op operand preservation" - }, {, "EventCode": "0x1D14E", "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" }, - {, - "EventCode": "0x509C", - "EventName": "PM_FORCED_NOP", - "BriefDescription": "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time" - }, - {, - "EventCode": "0xC098", - "EventName": "PM_LS2_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20058", "EventName": "PM_DARQ1_10_12_ENTRIES", @@ -434,11 +399,6 @@ "EventName": "PM_LSU1_STORE_REJECT", "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" }, - {, - "EventCode": "0x4505E", - "EventName": "PM_FLOP_CMPL", - "BriefDescription": "Floating Point Operation Finished" - }, {, "EventCode": "0x1D144", "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", @@ -480,14 +440,9 @@ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" }, {, - "EventCode": "0xC094", - "EventName": "PM_LS0_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, - {, - "EventCode": "0xF8BC", - "EventName": "PM_LS3_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x460AE", + "EventName": "PM_L3_P2_CO_RTY", + "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted" }, {, "EventCode": "0x58B0", @@ -504,11 +459,6 @@ "EventName": "PM_TM_ST_CONF", "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)" }, - {, - "EventCode": "0xD998", - "EventName": "PM_MRK_LSU_FLUSH_EMSH", - "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" - }, {, "EventCode": "0xF8A0", "EventName": "PM_NON_DATA_STORE", @@ -524,11 +474,6 @@ "EventName": "PM_BR_UNCOND", "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve." }, - {, - "EventCode": "0x1F056", - "EventName": "PM_RADIX_PWC_L1_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit." - }, {, "EventCode": "0xF8A8", "EventName": "PM_DC_PREF_FUZZY_CONF", @@ -544,6 +489,11 @@ "EventName": "PM_LSU2_TM_L1_MISS", "BriefDescription": "Load tm L1 miss" }, + {, + "EventCode": "0xC880", + "EventName": "PM_LS1_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x2894", "EventName": "PM_TM_OUTER_TEND", @@ -564,21 +514,11 @@ "EventName": "PM_MRK_LSU_DERAT_MISS", "BriefDescription": "Marked derat reload (miss) for any page size" }, - {, - "EventCode": "0x160A0", - "EventName": "PM_L3_PF_MISS_L3", - "BriefDescription": "L3 PF missed in L3" - }, {, "EventCode": "0x1C04A", "EventName": "PM_DATA_FROM_RL2L3_SHR", "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" }, - {, - "EventCode": "0xD99C", - "EventName": "PM_MRK_LSU_FLUSH_UE", - "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" - }, {, "EventCode": "0x268B0", "EventName": "PM_L3_P1_GRP_PUMP", @@ -629,11 +569,6 @@ "EventName": "PM_TMA_REQ_L2", "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" }, - {, - "EventCode": "0x5884", - "EventName": "PM_DECODE_LANES_NOT_AVAIL", - "BriefDescription": "Decode has something to transmit but dispatch lanes are not available" - }, {, "EventCode": "0x3C042", "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", @@ -690,9 +625,9 @@ "BriefDescription": "False LHS match detected" }, {, - "EventCode": "0xD9A4", - "EventName": "PM_MRK_LSU_FLUSH_LARX_STCX", - "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches" + "EventCode": "0xF0B0", + "EventName": "PM_L3_LD_PREF", + "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" }, {, "EventCode": "0x4D012", @@ -715,9 +650,9 @@ "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" }, {, - "EventCode": "0xF8B8", - "EventName": "PM_LS1_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" }, {, "EventCode": "0x408C", @@ -764,11 +699,6 @@ "EventName": "PM_TM_NESTED_TEND", "BriefDescription": "Completion time nested tend" }, - {, - "EventCode": "0x36084", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "All D-side store dispatch attempts for this thread" - }, {, "EventCode": "0x368A0", "EventName": "PM_L3_PF_OFF_CHIP_CACHE", @@ -829,11 +759,6 @@ "EventName": "PM_L3_SN_USAGE", "BriefDescription": "Rotating sample of 16 snoop valids" }, - {, - "EventCode": "0x16084", - "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" - }, {, "EventCode": "0x1608C", "EventName": "PM_RC0_BUSY", @@ -842,7 +767,7 @@ {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." + "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)" }, {, "EventCode": "0xF8B0", @@ -904,11 +829,6 @@ "EventName": "PM_IC_PREF_REQ", "BriefDescription": "Instruction prefetch requests" }, - {, - "EventCode": "0xC898", - "EventName": "PM_LS3_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x488C", "EventName": "PM_IC_PREF_WRITE", @@ -1017,7 +937,7 @@ {, "EventCode": "0x3E05E", "EventName": "PM_L3_CO_MEPF", - "BriefDescription": "L3 castouts in Mepf state for this thread" + "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" }, {, "EventCode": "0x460A2", @@ -1204,11 +1124,6 @@ "EventName": "PM_TM_FAIL_NON_TX_CONFLICT", "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR" }, - {, - "EventCode": "0xD198", - "EventName": "PM_MRK_LSU_FLUSH_ATOMIC", - "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed" - }, {, "EventCode": "0x201E0", "EventName": "PM_MRK_DATA_FROM_MEMORY", @@ -1294,11 +1209,6 @@ "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" }, - {, - "EventCode": "0xC894", - "EventName": "PM_LS1_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x360A2", "EventName": "PM_L3_L2_CO_HIT", @@ -1324,11 +1234,6 @@ "EventName": "PM_L2_CASTOUT_SHR", "BriefDescription": "L2 Castouts - Shared (Tx,Sx)" }, - {, - "EventCode": "0xD884", - "EventName": "PM_LSU3_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x26092", "EventName": "PM_L2_LD_MISS_64B", @@ -1362,12 +1267,12 @@ {, "EventCode": "0xD8A8", "EventName": "PM_ISLB_MISS", - "BriefDescription": "Instruction SLB miss - Total of all segment sizes" + "BriefDescription": "Instruction SLB Miss - Total of all segment sizes" }, {, - "EventCode": "0xD19C", - "EventName": "PM_MRK_LSU_FLUSH_RELAUNCH_MISS", - "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent" + "EventCode": "0x368AE", + "EventName": "PM_L3_P1_CO_RTY", + "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" }, {, "EventCode": "0x260A2", @@ -1384,6 +1289,11 @@ "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT" }, + {, + "EventCode": "0xC084", + "EventName": "PM_LS2_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x1608E", "EventName": "PM_ST_CAUSED_FAIL", @@ -1409,11 +1319,6 @@ "EventName": "PM_CO_USAGE", "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" }, - {, - "EventCode": "0xD084", - "EventName": "PM_LSU2_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x48B8", "EventName": "PM_BR_MPRED_TAKEN_TA", @@ -1449,30 +1354,25 @@ "EventName": "PM_DC_PREF_STRIDED_CONF", "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." }, + {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, {, "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " }, - {, - "EventCode": "0x5090", - "EventName": "PM_SHL_ST_DISABLE", - "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)" - }, {, "EventCode": "0x201E8", "EventName": "PM_THRESH_EXC_512", "BriefDescription": "Threshold counter exceeded a value of 512" }, - {, - "EventCode": "0x5084", - "EventName": "PM_DECODE_FUSION_EXT_ADD", - "BriefDescription": "32-bit extended addition" - }, {, "EventCode": "0x36080", "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." + "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)" }, {, "EventCode": "0x3504C", @@ -1554,21 +1454,11 @@ "EventName": "PM_MEM_RWITM", "BriefDescription": "Memory Read With Intent to Modify for this thread" }, - {, - "EventCode": "0x26882", - "EventName": "PM_L2_DC_INV", - "BriefDescription": "D-cache invalidates sent over the reload bus to the core" - }, {, "EventCode": "0xC090", "EventName": "PM_LSU_STCX", "BriefDescription": "STCX sent to nest, i.e. total" }, - {, - "EventCode": "0xD080", - "EventName": "PM_LSU0_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x2C120", "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", @@ -1609,11 +1499,6 @@ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" }, - {, - "EventCode": "0xD9A0", - "EventName": "PM_MRK_LSU_FLUSH_LHL_SHL", - "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)." - }, {, "EventCode": "0x35042", "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", @@ -1692,7 +1577,7 @@ {, "EventCode": "0x2001A", "EventName": "PM_NTC_ALL_FIN", - "BriefDescription": "Cycles after all instructions have finished to group completed" + "BriefDescription": "Cycles after instruction finished to instruction completed." }, {, "EventCode": "0x3005A", @@ -1709,6 +1594,11 @@ "EventName": "PM_LSU1_L1_CAM_CANCEL", "BriefDescription": "ls1 l1 tm cam cancel" }, + {, + "EventCode": "0x268AE", + "EventName": "PM_L3_P3_PF_RTY", + "BriefDescription": "L3 PF received retry port 3, every retry counted" + }, {, "EventCode": "0xE884", "EventName": "PM_LS1_ERAT_MISS_PREF", @@ -1742,7 +1632,7 @@ {, "EventCode": "0x160B6", "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "Rotating sample of 8 WI valid" + "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" }, {, "EventCode": "0x368AC", @@ -1790,9 +1680,9 @@ "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)" }, {, - "EventCode": "0x589C", - "EventName": "PM_PTESYNC", - "BriefDescription": "ptesync instruction counted when the instruction is decoded and transmitted" + "EventCode": "0x260AE", + "EventName": "PM_L3_P2_PF_RTY", + "BriefDescription": "L3 PF received retry port 2, every retry counted" }, {, "EventCode": "0x26086", @@ -1824,6 +1714,11 @@ "EventName": "PM_SHL_ST_DEP_CREATED", "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled" }, + {, + "EventCode": "0x46882", + "EventName": "PM_L2_ST_HIT", + "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + }, {, "EventCode": "0x360AC", "EventName": "PM_L3_SN0_BUSY", @@ -1844,11 +1739,6 @@ "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" }, - {, - "EventCode": "0xF8B4", - "EventName": "PM_DC_PREF_XCONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0x35048", "EventName": "PM_IPTEG_FROM_DL2L3_SHR", @@ -1969,11 +1859,6 @@ "EventName": "PM_THRD_PRIO_2_3_CYC", "BriefDescription": "Cycles thread running at priority level 2 or 3" }, - {, - "EventCode": "0x10134", - "EventName": "PM_MRK_ST_DONE_L2", - "BriefDescription": "marked store completed in L2 ( RC machine done)" - }, {, "EventCode": "0x368B2", "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", @@ -2004,11 +1889,6 @@ "EventName": "PM_L2_GRP_GUESS_WRONG", "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)" }, - {, - "EventCode": "0x368AE", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" - }, {, "EventCode": "0xC0AC", "EventName": "PM_LSU_FLUSH_EMSH", @@ -2034,11 +1914,6 @@ "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" }, - {, - "EventCode": "0xF0B0", - "EventName": "PM_L3_LD_PREF", - "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" - }, {, "EventCode": "0x16080", "EventName": "PM_L2_LD", @@ -2049,6 +1924,11 @@ "EventName": "PM_MATH_FLOP_CMPL", "BriefDescription": "Math flop instruction completed" }, + {, + "EventCode": "0xC080", + "EventName": "PM_LS0_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x368B0", "EventName": "PM_L3_P1_SYS_PUMP", @@ -2119,11 +1999,6 @@ "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" }, - {, - "EventCode": "0xF0B8", - "EventName": "PM_LS0_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20132", "EventName": "PM_MRK_DFU_FIN", @@ -2139,6 +2014,11 @@ "EventName": "PM_LSU_FLUSH_LHS", "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" }, + {, + "EventCode": "0x16084", + "EventName": "PM_L2_RCLD_DISP", + "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + }, {, "EventCode": "0x3F150", "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", @@ -2224,11 +2104,6 @@ "EventName": "PM_IC_PREF_CANCEL_PAGE", "BriefDescription": "Prefetch Canceled due to page boundary" }, - {, - "EventCode": "0xF09C", - "EventName": "PM_SLB_TABLEWALK_CYC", - "BriefDescription": "Cycles when a tablewalk is pending on this thread on the SLB table" - }, {, "EventCode": "0x460AA", "EventName": "PM_L3_P0_CO_L31", @@ -2247,10 +2122,10 @@ {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread " + "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" }, {, - "EventCode": "0x4609E", + "EventCode": "0x36880", "EventName": "PM_L2_INST_MISS", "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" }, @@ -2340,9 +2215,9 @@ "BriefDescription": "All ISU rejects" }, {, - "EventCode": "0x46882", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + "EventCode": "0xC884", + "EventName": "PM_LS3_LD_VECTOR_FIN", + "BriefDescription": "" }, {, "EventCode": "0x360A8", @@ -2359,11 +2234,6 @@ "EventName": "PM_LSU_NCST", "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1" }, - {, - "EventCode": "0xD880", - "EventName": "PM_LSU1_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0xD0B8", "EventName": "PM_LSU_LMQ_FULL_CYC", @@ -2389,4 +2259,4 @@ "EventName": "PM_L3_PF_USAGE", "BriefDescription": "Rotating sample of 32 PF actives" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index bc2db636dabf..5af1abbe82c4 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -124,6 +124,11 @@ "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "Overflow from counter 5" }, + {, + "EventCode": "0x4505E", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operation Finished" + }, {, "EventCode": "0x2C018", "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", @@ -389,11 +394,6 @@ "EventName": "PM_ICT_NOSLOT_BR_MPRED", "BriefDescription": "Ict empty for this thread due to branch mispred" }, - {, - "EventCode": "0x3405E", - "EventName": "PM_IFETCH_THROTTLE", - "BriefDescription": "Cycles in which Instruction fetch throttle was active." - }, {, "EventCode": "0x1F148", "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", @@ -422,7 +422,7 @@ {, "EventCode": "0xD0A8", "EventName": "PM_DSLB_MISS", - "BriefDescription": "Data SLB Miss - Total of all segment sizes" + "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))" }, {, "EventCode": "0x4C058", @@ -549,4 +549,4 @@ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index 3ef8a10aac86..d0b89f930567 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -119,4 +119,4 @@ "EventName": "PM_1FLOP_CMPL", "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index 8c0f12024afa..bc8e03d7a6b0 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -89,11 +89,6 @@ "EventName": "PM_STCX_FAIL", "BriefDescription": "stcx failed" }, - {, - "EventCode": "0x20112", - "EventName": "PM_MRK_NTF_FIN", - "BriefDescription": "Marked next to finish instruction finished" - }, {, "EventCode": "0x300F0", "EventName": "PM_ST_MISS_L1", -- cgit v1.2.3 From 4359dd88afb7cac8a98e32f6bdfe0b46c79bc3cd Mon Sep 17 00:00:00 2001 From: Thomas-Mich Richter Date: Tue, 7 Nov 2017 15:48:53 +0100 Subject: perf buildid-cache: Update help text for purge command Clarify the perf buildid-cache help text for the purge operation. The purge subcommand takes a list of files (binaries) as option parameter. Make the wording the same as for the add and remove operation. Signed-off-by: Thomas-Mich Richter Reviewed-by: Hendrik Brueckner Acked-by: Masami Hiramatsu Cc: Martin Schwidefsky LPU-Reference: 20171107144853.12925-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 3d354ba6e9c5..41db2cba77eb 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -325,8 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv) "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), - OPT_STRING('p', "purge", &purge_name_list_str, "path list", - "path(s) to remove (remove old caches too)"), + OPT_STRING('p', "purge", &purge_name_list_str, "file list", + "file(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), -- cgit v1.2.3 From 35c0a81a97692cc0afe3d005c9a737bbde06e784 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:24 -0800 Subject: perf tools: Document some missing perf.data headers Document STAT and CACHE header entries. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index e90c59c6d815..15e8b48077ba 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -238,6 +238,29 @@ struct auxtrace_index { struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; }; + HEADER_STAT = 19, + +This is merely a flag signifying that the data section contains data +recorded from perf stat record. + + HEADER_CACHE = 20, + +Description of the cache hierarchy. Based on the Linux sysfs format +in /sys/devices/system/cpu/cpu*/cache/ + + u32 version Currently always 1 + u32 number_of_cache_levels + +struct { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + struct perf_header_string type; + struct perf_header_string size; + struct perf_header_string map; +}[number_of_cache_levels]; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, -- cgit v1.2.3 From 5039c8a28fa97b8dce7b363a5ecd4bee2b87bf03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:26 -0800 Subject: perf script: Allow printing period for non freq mode groups When using leader sampling the values of the not sampled but counted events are shown by perf script in "period". Currently printing period is only allowed when the main event has a period, that is it is in frequency mode. This implies that we cannot dump the values of counted events when the leader event is not in frequency mode. Just remove the check that the period must be set on all events. It will just be printed as 0 instead if it's not available. This fixes the following: $ perf record -c 100000 -e '{cycles,branches}:S' $ perf script -F event,period Further commentary by Jiri Olsa: The period will be the value of configured period, not 0: int perf_evsel__parse_sample(struct ... ... data->period = evsel->attr.sample_period; $ perf record -c 100000 $ perf script -F event,period | head -3 Failed to open /tmp/perf-2048.map, continuing without symbols 100000 cycles:ppp: 100000 cycles:ppp: other than that I think we can remove that check, because we will have always sane number in period Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9b43bda45a41..ee7c7aaaae72 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -423,11 +423,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; - if (PRINT_FIELD(PERIOD) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD", - PERF_OUTPUT_PERIOD)) - return -EINVAL; - if (PRINT_FIELD(IREGS) && perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) -- cgit v1.2.3 From 958964f803b27baffd238708842b527a1d30e110 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:46 +0900 Subject: perf top: Document missing options Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4353262bc462..8a32cc77bead 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -268,6 +268,12 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. +[K]:: + Hide kernel symbols. + +[U]:: + Hide user symbols. + [z]:: Toggle event count zeroing across display updates. -- cgit v1.2.3 From 8fce3743cea47db86dd13ab4c479158a872271e8 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:47 +0900 Subject: perf top: Remove a duplicate word Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0789f95ca2f3..68320ac5e9b0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -412,7 +412,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[S] stop annotation.\n"); fprintf(stdout, - "\t[K] hide kernel_symbols symbols. \t(%s)\n", + "\t[K] hide kernel symbols. \t(%s)\n", top->hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", -- cgit v1.2.3 From d492326f160e44e08fcf132a63163b36dd8e8839 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:04 +0000 Subject: perf tests: Set evlist of test__backward_ring_buffer() to !overwrite Setting overwrite in perf_evlist__mmap() is meaningless because the event in this evlist is already have 'overwrite' postfix and goes to backward ring buffer automatically. Pass 'false' to perf_evlist__mmap() to make it similar to others. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/backward-ring-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 71b9a0b613d2..43a8c6ac4070 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages, true); + err = perf_evlist__mmap(evlist, mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); -- cgit v1.2.3 From 677b0601768881934f658bebb1713c3c843893fa Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:05 +0000 Subject: perf tests: Set evlist of test__sw_clock_freq() to !overwrite Unsetting overwrite when calling perf_evlist__mmap is harmless. This commit passes false to it, makes following commits eliminate the overwrite argument easier. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 725a196991a8..c6937ed12e6b 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128, true); + err = perf_evlist__mmap(evlist, 128, false); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); -- cgit v1.2.3 From 301d724aa19add1c0cf3ec8cad0d10151d30393f Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:06 +0000 Subject: perf tests: Set evlist of test__basic_mmap() to !overwrite In this test, a large ring buffer is required so all events can feed into, so overwrite or not is meaningless. Change to !overwrite so following commits can remove this argument. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-5-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5a8bf318f8a7..91f10d6d9ae2 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; -- cgit v1.2.3 From a0e3dd79cdd8ad838cbcefeff530a15193f8336e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:07 +0000 Subject: perf tests: Set evlist of test__task_exit() to !overwrite Changing ringbuffer to !overwrite in this task is harmless because this test uses a very low frequency (1) and using a very simple program (true). There should have only 3 events in the whole test. Overwriting is impossible to happen. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-6-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bc4a7344e274..98c098475e71 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -97,7 +97,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; -- cgit v1.2.3 From 19993b82a571893e661afd90f1d77fa698785cee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Nov 2017 16:06:29 -0300 Subject: perf machine: Guard against NULL in machine__exit() A recent fix for 'perf trace' introduced a bug where machine__exit(trace->host) could be called while trace->host was still NULL, so make this more robust by guarding against NULL, just like free() does. The problem happens, for instance, when !root users try to run 'perf trace': [acme@jouet linux]$ trace Error: No permissions to read /sys/kernel/debug/tracing/events/raw_syscalls/sys_(enter|exit) Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' perf: Segmentation fault Obtained 7 stack frames. [0x4f1b2e] /lib64/libc.so.6(+0x3671f) [0x7f43a1dd971f] [0x4f3fec] [0x47468b] [0x42a2db] /lib64/libc.so.6(__libc_start_main+0xe9) [0x7f43a1dc3509] [0x42a6c9] Segmentation fault (core dumped) [acme@jouet linux]$ Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrei Vagin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Cc: Wang Nan Fixes: 33974a414ce2 ("perf trace: Call machine__exit() at exit") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6a8d03c3d9b7..270f3223c6df 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -172,6 +172,9 @@ void machine__exit(struct machine *machine) { int i; + if (machine == NULL) + return; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); -- cgit v1.2.3 From 2f0af8600e82e9f950fc32908386b9c639f88d48 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Nov 2017 09:15:42 +0900 Subject: perf help: Fix a bug during strstart() conversion The commit 8e99b6d4533c changed prefixcmp() to strstart() but missed to change the return value in some place. It makes perf help print annoying output even for sane config items like below: $ perf help '.root': unsupported man viewer sub key. ... Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Taeung Song Cc: Jiri Olsa Cc: Sihyeon Jang Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20171114001542.GA16464@sejong Fixes: 8e99b6d4533c ("tools include: Adopt strstarts() from the kernel") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index bd1fedef3d1c..a0f7ed2b869b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -284,7 +284,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) add_man_viewer(value); return 0; } - if (!strstarts(var, "man.")) + if (strstarts(var, "man.")) return add_man_viewer_info(var, value); return 0; @@ -314,7 +314,7 @@ static const char *cmd_to_page(const char *perf_cmd) if (!perf_cmd) return "perf"; - else if (!strstarts(perf_cmd, "perf")) + else if (strstarts(perf_cmd, "perf")) return perf_cmd; return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; -- cgit v1.2.3 From 648388ae68e953b312e28eaf869fe6c01e2f70cc Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Nov 2017 08:55:40 +0530 Subject: perf annotate: Do not truncate instruction names at 6 chars There are many instructions, esp on PowerPC, whose mnemonics are longer than 6 characters. Using precision limit causes truncation of such mnemonics. Fix this by removing precision limit. Note that, 'width' is still 6, so alignment won't get affected for length <= 6. Before: li r11,-1 xscvdp vs1,vs1 add. r10,r10,r11 After: li r11,-1 xscvdpsxds vs1,vs1 add. r10,r10,r11 Reported-by: Donald Stence Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Taeung Song Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eab4a8e3c679..30d74dabdc42 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -165,7 +165,7 @@ static void ins__delete(struct ins_operands *ops) static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw); } int ins__scnprintf(struct ins *ins, char *bf, size_t size, @@ -230,12 +230,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { if (ops->target.name) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } static struct ins_ops call_ops = { @@ -299,7 +299,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, c++; } - return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + return scnprintf(bf, size, "%-6s %.*s%" PRIx64, ins->name, c ? c - ops->raw : 0, ops->raw, ops->target.offset); } @@ -372,7 +372,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); - printed = scnprintf(bf, size, "%-6.6s ", ins->name); + printed = scnprintf(bf, size, "%-6s ", ins->name); return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } @@ -448,7 +448,7 @@ out_free_source: static int mov__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, + return scnprintf(bf, size, "%-6s %s,%s", ins->name, ops->source.name ?: ops->source.raw, ops->target.name ?: ops->target.raw); } @@ -488,7 +488,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops static int dec__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name ?: ops->target.raw); } @@ -500,7 +500,7 @@ static struct ins_ops dec_ops = { static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, struct ins_operands *ops __maybe_unused) { - return scnprintf(bf, size, "%-6.6s", "nop"); + return scnprintf(bf, size, "%-6s", "nop"); } static struct ins_ops nop_ops = { @@ -990,7 +990,7 @@ void disasm_line__free(struct disasm_line *dl) int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); + return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw); return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -- cgit v1.2.3 From f231af789b11a2f1a3795acc3228a3e178a80c21 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:46 +0100 Subject: perf test shell: Fix check open filename arg using 'perf trace' on s390x This 'perf test' case fails on s390x. The 'touch' command on s390x uses the 'openat' system call to open the file named on the command line: [root@s35lp76 perf]# perf probe -l probe:vfs_getname (on getname_flags:72@fs/namei.c with pathname) [root@s35lp76 perf]# perf trace -e open touch /tmp/abc 0.400 ( 0.015 ms): touch/27542 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 [root@s35lp76 perf]# There is no 'open' system call for file '/tmp/abc'. Instead the 'openat' system call is used: [root@s35lp76 perf]# strace touch /tmp/abc execve("/usr/bin/touch", ["touch", "/tmp/abc"], 0x3ffd547ec98 /* 30 vars */) = 0 [...] openat(AT_FDCWD, "/tmp/abc", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 [...] On s390x the 'egrep' command does not find a matching pattern and returns an error. Fix this for s390x create a platform dependent command line to enable the 'perf probe' call to listen to the 'openat' system call and get the expected output. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter LPU-Reference: 20171114071847.2381-1-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-3qf38jk0prz54rhmhyu871my@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2e68c5f120da..2a9ef080efd0 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,8 +17,10 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - perf trace -e open touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } + + perf trace -e ${svc:-open} touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3 From 0879e5e5f33c8a1eb01281ad920173664e68b266 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:47 +0100 Subject: perf test shell: Fix test case probe libc's inet_pton on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'perf test' case "probe libc's inet_pton & backtrace it with ping" fails on s390x. The reason is the 'realpath /lib64/ld*.so.* | uniq' line which returns 2 libraries: root@s35lp76 shell]# realpath /lib64/ld*.so.* | uniq /usr/lib64/ld-2.26.so /usr/lib64/ld_pre_smc.so.1.0.1 [root@s35lp76 shell] This output makes the "perf probe" command lines invalid. Use ldd tool to find out the libraries required by "bash" and check if symbol "inet_pton" is part of the "libc" library. Some distros do not have a /lib64 directory. I have also added a check for the existence of an IPv6 network interface before it is being used. Committer changes: We can't really use ldd for libc, as in some systems, such as x86_64, it has hardlinks and then ldd sees one and the kernel the other, so grep for libc in /proc/self/maps to get the one we'll receive from PERF_RECORD_MMAP. Thomas checked this change and acked it. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Suggested-by: Hendrik Brückner Reviewed-by: Hendrik Brückner Link: http://lkml.kernel.org/r/20171114133409.GN8836@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 7a84d73324e3..8b3da21a08f1 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,8 +10,8 @@ . $(dirname $0)/lib/probe.sh -ld=$(realpath /lib64/ld*.so.* | uniq) -libc=$(echo $ld | sed 's/ld/libc/g') +libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') +nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { idx=0 @@ -37,6 +37,9 @@ trace_libc_inet_pton_backtrace() { done } +# Check for IPv6 interface existence +ip a sh lo | fgrep -q inet6 || exit 2 + skip_if_no_perf_probe && \ perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace -- cgit v1.2.3 From 07d6f446a9e45b7e6e7438f8560e40d4dcfa0321 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:01:06 -0300 Subject: perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels The warning about kptr_restrict needs to be emitted only when it is set and we ask for kernel space samples, so add a helper to help with that. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fh7drty6yljei9gxxzer6eup@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 12 ++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 14 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ccb749f9a83f..b62e523a7035 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1786,3 +1786,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, state_err: return; } + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->attr.exclude_kernel) + return false; + } + + return true; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e72ae64c11ac..491f69542920 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ -- cgit v1.2.3 From 9c39ed90153d95d362004ed0d5e259ec46af3803 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:12:11 -0300 Subject: perf report: Ignore kptr_restrict when not sampling the kernel If none of the evsels has attr.exclude_kernel set to zero, no kernel samples, so no point in warning the user about problems in processing kernel samples, as there will be none. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7dn926v3at8txxkky92aesz2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1394cd8d96f7..af5dd038195e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -441,6 +441,9 @@ static void report__warn_kptr_restrict(const struct report *rep) struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; + if (perf_evlist__exclude_kernel(rep->session->evlist)) + return; + if (kernel_map == NULL || (kernel_map->dso->hit && (kernel_kmap->ref_reloc_sym == NULL || -- cgit v1.2.3 From 6c4439545517c9d6155e85f1a508be38408fb0b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:03:19 -0300 Subject: perf record: Ignore kptr_restrict when not sampling the kernel If we're not sampling the kernel, we shouldn't care about kptr_restrict neither synthesize anything for assisting in resolving kernel samples, like the reference relocation symbol or kernel modules information. Before: $ cat /proc/sys/kernel/kptr_restrict /proc/sys/kernel/perf_event_paranoid 2 2 $ perf record sleep 1 WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Couldn't record kernel reference relocation symbol Symbol resolution may be skewed if relocation was used (e.g. kexec). Check /proc/kallsyms permission or run as root. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf evlist -v cycles:uppp: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 $ After: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (10 samples) ] $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-t025e9zftbx2b8cq2w01g5e5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5f78ce943407..003255910c05 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -765,17 +765,19 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); + if (!perf_evlist__exclude_kernel(rec->evlist)) { + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + } if (perf_guest) { machines__process_guests(&session->machines, @@ -1709,7 +1711,7 @@ int cmd_record(int argc, const char **argv) err = -ENOMEM; - if (symbol_conf.kptr_restrict) + if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict.\n\n" -- cgit v1.2.3 From b89a5124d2089eec8f090dcd05dd88abaec0cbd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 13:30:19 -0300 Subject: perf top: Ignore kptr_restrict when not sampling the kernel If all events have attr.exclude_kernel set, no need to look at kptr_restrict. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yegpzg5bf2im69g0tfizqaqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68320ac5e9b0..865191281591 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -735,14 +735,16 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { - ui__warning( + if (!perf_evlist__exclude_kernel(top->session->evlist)) { + ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); - if (use_browser <= 0) - sleep(5); + if (use_browser <= 0) + sleep(5); + } machine->kptr_restrict_warned = true; } -- cgit v1.2.3 From 239fb4fed6c49110ebebe7378a84d96e3f0cf55d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:47 -0600 Subject: perf c2c: Fix spelling mistakes in browser help text Togle -> Toggle, lenght -> length. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150447.f4b63bc5d97c83cdaa8bf7dc@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 17855c4626a0..f1da9b0833c0 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2224,9 +2224,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct hist_browser *browser; int key = -1; const char help[] = - " ENTER Togle callchains (if present) \n" - " n Togle Node details info \n" - " s Togle full lenght of symbol and source line columns \n" + " ENTER Toggle callchains (if present) \n" + " n Toggle Node details info \n" + " s Toggle full length of symbol and source line columns \n" " q Return back to cacheline list \n"; /* Display compact version first. */ @@ -2303,7 +2303,7 @@ static int perf_c2c__hists_browse(struct hists *hists) int key = -1; const char help[] = " d Display cacheline details \n" - " ENTER Togle callchains (if present) \n" + " ENTER Toggle callchains (if present) \n" " q Quit \n"; browser = perf_c2c_browser__new(hists); -- cgit v1.2.3 From 114bc191c37028d87a540251d93e7b328f4de3fe Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:52 -0600 Subject: perf evsel: Say which PMU Hardware event doesn't support sampling/overflow-interrupts Help identify to the user the event with the unsupported sampling error. Also suggest a corrective action. BEFORE: $ sudo ./oldperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: PMU Hardware doesn't support sampling/overflow-interrupts. AFTER: $ sudo ./newperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: ccn/cycles/: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150452.e846f2e23684c7d7d8ee706f@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cb9bcdb065ea..b8e9def77f44 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2745,8 +2745,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, break; case EOPNOTSUPP: if (evsel->attr.sample_period != 0) - return scnprintf(msg, size, "%s", - "PMU Hardware doesn't support sampling/overflow-interrupts."); + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", + perf_evsel__name(evsel)); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); -- cgit v1.2.3 From 38ba1daf8164d43d48b45c8e8deee4b20c21484d Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 22:06:49 +0900 Subject: perf lock: Document missing options Add man page entry for --force. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510837609-6277-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index ab25be28c9dc..74d774592196 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,6 +42,10 @@ COMMON OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +-f:: +--force:: + Don't complan, do it. + REPORT OPTIONS -------------- -- cgit v1.2.3 From 742015ff12ae27324b8ad2d28e43da6743529bad Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 9 Aug 2017 18:14:06 +0200 Subject: perf: Fix header.size for namespace events Reset header size for namespace events, otherwise it only gets bigger in ctx iterations. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Fixes: e422267322cd ("perf: Add PERF_RECORD_NAMESPACES to include namespaces related info") Link: http://lkml.kernel.org/n/tip-nlo4gonz9d4guyb8153ukzt0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 81dd57b9e5e3..aa21555972aa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6640,6 +6640,7 @@ static void perf_event_namespaces_output(struct perf_event *event, struct perf_namespaces_event *namespaces_event = data; struct perf_output_handle handle; struct perf_sample_data sample; + u16 header_size = namespaces_event->event_id.header.size; int ret; if (!perf_event_namespaces_match(event)) @@ -6650,7 +6651,7 @@ static void perf_event_namespaces_output(struct perf_event *event, ret = perf_output_begin(&handle, event, namespaces_event->event_id.header.size); if (ret) - return; + goto out; namespaces_event->event_id.pid = perf_event_pid(event, namespaces_event->task); @@ -6662,6 +6663,8 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event__output_id_sample(event, &handle, &sample); perf_output_end(&handle); +out: + namespaces_event->event_id.header.size = header_size; } static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info, -- cgit v1.2.3 From 52186b8aa40f06350b33f8e4031879d389e2b9f2 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:02 +0900 Subject: perf inject: Document missing options Add the missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 87b2588d1cbd..a64d6588470e 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -60,6 +60,10 @@ include::itrace.txt[] found in the jitdumps files captured in the input perf.data file. Use this option if you are monitoring environment using JIT runtimes, such as Java, DART or V8. +-f:: +--force:: + Don't complain, do it. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] -- cgit v1.2.3 From 9b9d28a0087608052b39e7d9ee2f07b4e0fd6dca Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:03 +0900 Subject: perf trace: Document missing option, colons Add missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index d53bea6bd571..6909cf1e0eea 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -86,18 +86,18 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0- In per-thread mode with inheritance mode on (default), Events are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. ---duration: +--duration:: Show only events that had a duration greater than N.M ms. ---sched: +--sched:: Accrue thread runtime and provide a summary at the end of the session. --i ---input +-i:: +--input:: Process events from a given perf data file. --T ---time +-T:: +--time:: Print full timestamp rather time relative to first sample. --comm:: @@ -117,6 +117,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show tool stats such as number of times fd->pathname was discovered thru hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. +-f:: +--force:: + Don't complain, do it. + -F=[all|min|maj]:: --pf=[all|min|maj]:: Trace pagefaults. Optionally, you can specify whether you want minor, -- cgit v1.2.3 From f4a30d2bee25b92f25086c81e33c80d767500097 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:04 +0900 Subject: perf timechart: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index df98d1c82688..ef0c7565bd5c 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -50,7 +50,9 @@ TIMECHART OPTIONS -p:: --process:: Select the processes to display, by name or PID - +-f:: +--force:: + Don't complain, do it. --symfs=:: Look for files with symbols relative to this directory. -n:: -- cgit v1.2.3 From e9b61e52c384f4af13404ad95161af58af08c908 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:05 +0900 Subject: perf sched: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-4-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 55b67338548e..c7e50f263887 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -74,6 +74,10 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the sched data. +-f:: +--force:: + Don't complain, do it. + OPTIONS for 'perf sched map' ---------------------------- -- cgit v1.2.3 From deb368acf1731bf89c34b171094c4f8eff66ebd9 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:06 +0900 Subject: perf evlist: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-5-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 6f7200fb85cf..c0a66400a960 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -20,6 +20,10 @@ OPTIONS --input=:: Input file name. (default: perf.data unless stdin is a fifo) +-f:: +--force:: + Don't complain, do it. + -F:: --freq=:: Show just the sample frequency used for each event. -- cgit v1.2.3 From 5a79eef4eccf0571e856eb13c0ffe19083d27474 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:07 +0900 Subject: perf buildid-cache: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-6-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 84681007f80f..73c2650bd0db 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -24,6 +24,9 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-f:: +--force:: + Don't complain, do it. -k:: --kcore:: Add specified kcore file to the cache. For the current host that is -- cgit v1.2.3 From 914eb9ca51117776d83e6761a1c555fb76f0ded2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 6 Aug 2017 16:39:39 +0200 Subject: perf callchain: Reset cursor arg instead of callchain_cursor We already pass cursor into thread__resolve_callchain function, so there's no point in resetting the global instance. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-puk015qvuppao9m1xtdy9v7j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 270f3223c6df..64d255f6a537 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2204,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { ret = thread__resolve_callchain_sample(thread, cursor, -- cgit v1.2.3 From 3ad31d8a0df257c3f18c989119359c1f25cd009d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 16:07:05 +0200 Subject: perf evsel: Centralize perf_sample initialization Move the initialization bits into common place at the beginning of the function. Also removing some superfluous zero initialization for addr and transaction, because we zero all the data at the top. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-1gv5t6fvv735t1rt3mxpy1h9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b8e9def77f44..03d7abcdc6b7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1983,6 +1983,8 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->id = -1ULL; + data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -2000,7 +2002,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (evsel->sample_size + sizeof(event->header) > event->header.size) return -EFAULT; - data->id = -1ULL; if (type & PERF_SAMPLE_IDENTIFIER) { data->id = *array; array++; @@ -2030,7 +2031,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->addr = 0; if (type & PERF_SAMPLE_ADDR) { data->addr = *array; array++; @@ -2194,14 +2194,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->data_src = PERF_MEM_DATA_SRC_NONE; if (type & PERF_SAMPLE_DATA_SRC) { OVERFLOW_CHECK_u64(array); data->data_src = *array; array++; } - data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { OVERFLOW_CHECK_u64(array); data->transaction = *array; -- cgit v1.2.3 From 014681208ea0d1a7e5ea2f014242e7d196d04c34 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:10:28 +0200 Subject: perf evlist: Add perf_evlist__parse_sample_timestamp function Add perf_evlist__parse_sample_timestamp to retrieve the timestamp of the sample. The idea is to use this function instead of the full sample parsing before we queue the sample. At that time only the timestamp is needed and we parse the sample once again later on delivery. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-o7syqo8lipj4or7renpu8e8y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++ tools/perf/util/evlist.h | 4 +++ tools/perf/util/evsel.c | 65 +++++++++++++++++++++++++++++++++++++++++++----- tools/perf/util/evsel.h | 4 +++ 4 files changed, 78 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b62e523a7035..199bb82efbcd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1582,6 +1582,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even return perf_evsel__parse_sample(evsel, event, sample); } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp) +{ + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; + return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} + size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 491f69542920..4e8131dacbd7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -205,6 +205,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp); + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 03d7abcdc6b7..95853c51c0ca 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1962,6 +1962,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ + /* + * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes + * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to + * check the format does not go past the end of the event. + */ + if (sample_size + sizeof(event->header) > event->header.size) + return -EFAULT; + + return 0; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1994,12 +2008,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = event->sample.array; - /* - * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes - * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to - * check the format does not go past the end of the event. - */ - if (evsel->sample_size + sizeof(event->header) > event->header.size) + if (perf_event__check_size(event, evsel->sample_size)) return -EFAULT; if (type & PERF_SAMPLE_IDENTIFIER) { @@ -2232,6 +2241,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, return 0; } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp) +{ + u64 type = evsel->attr.sample_type; + const u64 *array; + + if (!(type & PERF_SAMPLE_TIME)) + return -1; + + if (event->header.type != PERF_RECORD_SAMPLE) { + struct perf_sample data = { + .time = -1ULL, + }; + + if (!evsel->attr.sample_id_all) + return -1; + if (perf_evsel__parse_id_sample(evsel, event, &data)) + return -1; + + *timestamp = data.time; + return 0; + } + + array = event->sample.array; + + if (perf_event__check_size(event, evsel->sample_size)) + return -EFAULT; + + if (type & PERF_SAMPLE_IDENTIFIER) + array++; + + if (type & PERF_SAMPLE_IP) + array++; + + if (type & PERF_SAMPLE_TID) + array++; + + if (type & PERF_SAMPLE_TIME) + *timestamp = *array; + + return 0; +} + size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0688880227e1..c3663a70c9b9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -338,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp); + static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { return list_entry(evsel->node.next, struct perf_evsel, node); -- cgit v1.2.3 From dc83e1394083d6e12625a3158bf88396dfaec633 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:24:33 +0200 Subject: perf ordered_events: Pass timestamp arg in perf_session__queue_event There's no need to pass whole sample data, because it's only timestamp that is used. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xd1hpoze3kgb1rb639o3vehb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/util/ordered-events.c | 3 +-- tools/perf/util/ordered-events.h | 2 +- tools/perf/util/session.c | 6 +++--- tools/perf/util/session.h | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0c36f2ac6a0e..cd253db6917f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -754,7 +754,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session__queue_event(kvm->session, event, &sample, 0); + err = perf_session__queue_event(kvm->session, event, sample.time, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 8e09fd2d842f..bad9e0296e9a 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - u64 timestamp = sample->time; struct ordered_event *oevent; if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 96e5292d88e2..8c7a2948593e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -45,7 +45,7 @@ struct ordered_events { }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f266..8976e417eab2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -873,9 +873,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1517,7 +1517,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + ret = perf_session__queue_event(session, event, sample.time, file_offset); if (ret != -ETIME) return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 80bc80de8362..5b1c32b3694a 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); -- cgit v1.2.3 From 93d10af26bb7159349158b721ba2e258291d53c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:21:14 +0200 Subject: perf tools: Optimize sample parsing for ordered events Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 8 ++++---- tools/perf/util/session.c | 41 ++++++++++++++++++----------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cd253db6917f..597c7de9bec9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, u64 *mmap_time) { union perf_event *event; - struct perf_sample sample; + u64 timestamp; s64 n = 0; int err; *mmap_time = ULLONG_MAX; while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { - err = perf_evlist__parse_sample(kvm->evlist, event, &sample); + err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp); if (err) { perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } - err = perf_session__queue_event(kvm->session, event, sample.time, 0); + err = perf_session__queue_event(kvm->session, event, timestamp, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. @@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, /* save time stamp of our first sample for this mmap */ if (n == 0) - *mmap_time = sample.time; + *mmap_time = timestamp; /* limit events per mmap handled all at once */ n++; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8976e417eab2..df2857137908 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, sample.time, file_offset); + u64 timestamp; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) -- cgit v1.2.3 From b135e5ee1a0e325166c30b16cf5493fea44ede45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Nov 2017 10:23:39 +0100 Subject: perf top: Fix window dimensions change handling The stdio perf top crashes when we change the terminal window size. The reason is that we assumed we get the perf_top pointer as a signal handler argument which is not the case. Changing the SIGWINCH handler logic to change global resize variable, which is checked in the main thread loop. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ysuzwz77oev1ftgvdscn9bpu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 865191281591..4cbd3dd14a33 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -77,6 +77,7 @@ #include "sane_ctype.h" static volatile int done; +static volatile int resize; #define HEADER_LINE_NR 5 @@ -86,10 +87,13 @@ static void perf_top__update_print_entries(struct perf_top *top) } static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg) + siginfo_t *info __maybe_unused, void *arg __maybe_unused) { - struct perf_top *top = arg; + resize = 1; +} +static void perf_top__resize(struct perf_top *top) +{ get_term_dimensions(&top->winsize); perf_top__update_print_entries(top); } @@ -480,7 +484,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) .sa_sigaction = perf_top__sig_winch, .sa_flags = SA_SIGINFO, }; - perf_top__sig_winch(SIGWINCH, NULL, top); + perf_top__resize(top); sigaction(SIGWINCH, &act, NULL); } else { signal(SIGWINCH, SIG_DFL); @@ -1035,6 +1039,11 @@ static int __cmd_top(struct perf_top *top) if (hits == top->samples) ret = perf_evlist__poll(top->evlist, 100); + + if (resize) { + perf_top__resize(top); + resize = 0; + } } ret = 0; -- cgit v1.2.3 From 244a1086aba97a6b673162fd6684c5c024b724db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 14:30:57 +0100 Subject: perf top: Use signal interface for SIGWINCH handler There's no need for SA_SIGINFO data in SIGWINCH handler, switching it to register the handler via signal interface as we do for the rest of the signals in perf top. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-elxp1vdnaog1scaj13cx7cu0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4cbd3dd14a33..a29a98334f33 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -86,8 +86,7 @@ static void perf_top__update_print_entries(struct perf_top *top) top->print_entries = top->winsize.ws_row - HEADER_LINE_NR; } -static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg __maybe_unused) +static void winch_sig(int sig __maybe_unused) { resize = 1; } @@ -480,12 +479,8 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) case 'e': prompt_integer(&top->print_entries, "Enter display entries (lines)"); if (top->print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__resize(top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } else { signal(SIGWINCH, SIG_DFL); } @@ -1366,12 +1361,8 @@ int cmd_top(int argc, const char **argv) get_term_dimensions(&top.winsize); if (top.print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__update_print_entries(&top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } status = __cmd_top(&top); -- cgit v1.2.3 From a7eec4c677fe60c8760fa9054b578c743ff6a3ec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 11:53:21 +0100 Subject: perf top: Fix crash when annotating symbol Ravi reported crash in perf top --stdio when annotating a function [1]. The issue was, that we don't pass evsel pointer into symbol__annotate() function, which got over looked in the last annotation changes. [1] https://marc.info/?l=linux-kernel&m=151060884412702&w=2 Committer note: This fixes the crash, but makes it stumble into another bug, double locking the annotation data structures, that is in turn fixed by the next patch in this series. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6eol035redpoqvxqnuiqudtc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a29a98334f33..0077724fb24f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -99,6 +99,7 @@ static void perf_top__resize(struct perf_top *top) static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) { + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct symbol *sym; struct annotation *notes; struct map *map; @@ -137,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; -- cgit v1.2.3 From 9e4e0a9d2ef37c7bc60c32e2a3189bd1f04067a5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:05:59 +0100 Subject: perf tools: Change (symbol|annotation)__calc_percent return type to void There's no need for symbol__calc_percent and annotation__calc_percent functions to return any value, since it's always zero. Changing both function to return void. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-z0gs28hh24m4gia1t1ctraye@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/annotate.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30d74dabdc42..846abb4955ac 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1584,8 +1584,8 @@ static void calc_percent(struct sym_hist *hist, } } -static int annotation__calc_percent(struct annotation *notes, - struct perf_evsel *evsel, s64 len) +static void annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) { struct annotation_line *al, *next; @@ -1609,15 +1609,13 @@ static int annotation__calc_percent(struct annotation *notes, calc_percent(hist, sample, al->offset, end); } } - - return 0; } -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - return annotation__calc_percent(notes, evsel, symbol__size(sym)); + annotation__calc_percent(notes, evsel, symbol__size(sym)); } int symbol__annotate(struct symbol *sym, struct map *map, @@ -1656,10 +1654,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } err = symbol__disassemble(sym, &args); - if (err) - return err; + if (!err) + symbol__calc_percent(sym, evsel); + + return err; - return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 4fc805a271d2..6d7289e88fa3 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,7 +107,7 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; -- cgit v1.2.3 From 05d3f1a1d5a3d37ca4b591d5524f5a5b159d0564 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:20:08 +0100 Subject: perf tools: Move symbol__calc_percent() call to outside symbol__disassemble() We need to call symbol__calc_percent() periodicaly for top, so it's no longer convenient to keep it in symbol__disassemble(). Let's separate the symbol__disassemble() to allocate and init the symbol annotation structs and symbol__calc_percent() to compute the lines percentages based on symbol hists data. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gtnp8t4tb00q6lag07psn5nq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 ++ tools/perf/ui/gtk/annotate.c | 2 ++ tools/perf/util/annotate.c | 9 +++------ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5a2f37a91feb..03b7363a49c9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1126,6 +1126,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, goto out_free_offsets; } + symbol__calc_percent(sym, evsel); + ui_helpline__push("Press ESC to exit"); notes = symbol__annotation(sym); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 5e0a56df0b4c..cdb5ecf91666 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -177,6 +177,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, return -1; } + symbol__calc_percent(sym, evsel); + if (perf_gtk__is_active_context(pgctx)) { window = pgctx->main_window; notebook = pgctx->notebook; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 846abb4955ac..22ea7936d92f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1653,12 +1653,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - err = symbol__disassemble(sym, &args); - if (!err) - symbol__calc_percent(sym, evsel); - - return err; - + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) @@ -2005,6 +2000,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; + symbol__calc_percent(sym, evsel); + if (print_lines) { srcline_full_filename = full_paths; symbol__calc_lines(sym, map, &source_line); -- cgit v1.2.3 From 07dc8bc9a6b15f54d3ad962af74a096c7d7b42b4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 7 Nov 2017 10:08:01 +0000 Subject: netfilter: remove redundant assignment to e The assignment to variable e is redundant since the same assignment occurs just a few lines later, hence it can be removed. Cleans up clang warning for arp_tables, ip_tables and ip6_tables: warning: Value stored to 'e' is never read Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 1 - net/ipv4/netfilter/ip_tables.c | 1 - net/ipv6/netfilter/ip6_tables.c | 1 - 3 files changed, 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f88221aebc9d..0c3c944a7b72 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -373,7 +373,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4cbe5e80f3bf..2e0d339028bb 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -439,7 +439,6 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f06e25065a34..1d7ae9366335 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -458,7 +458,6 @@ mark_source_chains(const struct xt_table_info *newinfo, if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; - e = entry0 + newpos; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; -- cgit v1.2.3 From 613d0776d3fe7eb28c695a63a5533a1ec8258c86 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 14:32:37 +0300 Subject: netfilter: exit_net cleanup check added Be sure that lists initialized in net_init hook was return to initial state. Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 1 + net/netfilter/nf_tables_api.c | 7 +++++++ net/netfilter/nfnetlink_log.c | 5 +++++ net/netfilter/nfnetlink_queue.c | 5 +++++ net/netfilter/x_tables.c | 9 +++++++++ 5 files changed, 27 insertions(+) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 17b4ca562944..e35b8d074f06 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -819,6 +819,7 @@ static void clusterip_net_exit(struct net *net) cn->procdir = NULL; #endif nf_unregister_net_hook(net, &cip_arp_ops); + WARN_ON_ONCE(!list_empty(&cn->configs)); } static struct pernet_operations clusterip_net_ops = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d8327b43e4dc..10798b357481 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5847,6 +5847,12 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +static void __net_exit nf_tables_exit_net(struct net *net) +{ + WARN_ON_ONCE(!list_empty(&net->nft.af_info)); + WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); +} + int __nft_release_basechain(struct nft_ctx *ctx) { struct nft_rule *rule, *nr; @@ -5917,6 +5923,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, + .exit = nf_tables_exit_net, }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index cad6498f10b0..1f511ed0fea3 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1093,10 +1093,15 @@ static int __net_init nfnl_log_net_init(struct net *net) static void __net_exit nfnl_log_net_exit(struct net *net) { + struct nfnl_log_net *log = nfnl_log_pernet(net); + unsigned int i; + #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); #endif nf_log_unset(net, &nfulnl_logger); + for (i = 0; i < INSTANCE_BUCKETS; i++) + WARN_ON_ONCE(!hlist_empty(&log->instance_table[i])); } static struct pernet_operations nfnl_log_net_ops = { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index a16356cacec3..c09b36755ed7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1512,10 +1512,15 @@ static int __net_init nfnl_queue_net_init(struct net *net) static void __net_exit nfnl_queue_net_exit(struct net *net) { + struct nfnl_queue_net *q = nfnl_queue_pernet(net); + unsigned int i; + nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif + for (i = 0; i < INSTANCE_BUCKETS; i++) + WARN_ON_ONCE(!hlist_empty(&q->instance_table[i])); } static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index a77dd514297c..55802e97f906 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1729,8 +1729,17 @@ static int __net_init xt_net_init(struct net *net) return 0; } +static void __net_exit xt_net_exit(struct net *net) +{ + int i; + + for (i = 0; i < NFPROTO_NUMPROTO; i++) + WARN_ON_ONCE(!list_empty(&net->xt.tables[i])); +} + static struct pernet_operations xt_net_ops = { .init = xt_net_init, + .exit = xt_net_exit, }; static int __init xt_init(void) -- cgit v1.2.3 From bc7d811ace4ad39a3941089ca871633366878719 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 13 Nov 2017 09:09:40 +0100 Subject: netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function It is bad practive to return in a macro, this patch moves the check into a function. Signed-off-by: Eric Sesterhenn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 94 +++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 29 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index cf1bf2605c10..3d9a009ac147 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -103,7 +103,6 @@ struct bitstr { #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} -#define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) static unsigned int get_len(struct bitstr *bs); static unsigned int get_bit(struct bitstr *bs); static unsigned int get_bits(struct bitstr *bs, unsigned int b); @@ -165,6 +164,14 @@ static unsigned int get_len(struct bitstr *bs) return v; } +static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes) +{ + if (*bs->cur + bytes > *bs->end) + return 1; + + return 0; +} + /****************************************************************************/ static unsigned int get_bit(struct bitstr *bs) { @@ -280,7 +287,8 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f, INC_BIT(bs); - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -293,11 +301,14 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); - CHECK_BOUND(bs, 1); + if (nf_h323_error_boundary(bs, 1)) + return H323_ERROR_BOUND; + len = *bs->cur++; bs->cur += len; + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; - CHECK_BOUND(bs, 0); return H323_ERROR_NONE; } @@ -330,7 +341,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, break; case UNCO: BYTE_ALIGN(bs); - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs); bs->cur += len; break; @@ -341,7 +353,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, PRINT("\n"); - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -357,7 +370,8 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f, INC_BITS(bs, f->sz); } - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -375,12 +389,14 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, len = f->lb; break; case WORD: /* 2-byte length */ - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = (*bs->cur++) << 8; len += (*bs->cur++) + f->lb; break; case SEMI: - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs); break; default: @@ -391,7 +407,8 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, bs->cur += len >> 3; bs->bit = len & 7; - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -409,7 +426,8 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, BYTE_ALIGN(bs); INC_BITS(bs, (len << 2)); - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -440,12 +458,14 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, break; case BYTE: /* Range == 256 */ BYTE_ALIGN(bs); - CHECK_BOUND(bs, 1); + if (nf_h323_error_boundary(bs, 1)) + return H323_ERROR_BOUND; len = (*bs->cur++) + f->lb; break; case SEMI: BYTE_ALIGN(bs); - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs) + f->lb; break; default: /* 2 <= Range <= 255 */ @@ -458,7 +478,8 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, PRINT("\n"); - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -473,7 +494,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, switch (f->sz) { case BYTE: /* Range == 256 */ BYTE_ALIGN(bs); - CHECK_BOUND(bs, 1); + if (nf_h323_error_boundary(bs, 1)) + return H323_ERROR_BOUND; len = (*bs->cur++) + f->lb; break; default: /* 2 <= Range <= 255 */ @@ -484,7 +506,8 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, bs->cur += len << 1; - CHECK_BOUND(bs, 0); + if (nf_h323_error_boundary(bs, 0)) + return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -525,9 +548,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Decode */ if (son->attr & OPEN) { /* Open field */ - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); @@ -556,7 +581,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get the extension bitmap */ bmp2_len = get_bits(bs, 7) + 1; - CHECK_BOUND(bs, (bmp2_len + 7) >> 3); + if (nf_h323_error_boundary(bs, (bmp2_len + 7) >> 3)) + return H323_ERROR_BOUND; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) @@ -567,9 +593,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, for (opt = 0; opt < bmp2_len; opt++, i++, son++) { /* Check Range */ if (i >= f->ub) { /* Newer Version? */ - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; bs->cur += len; continue; } @@ -583,9 +611,11 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (!((0x80000000 >> opt) & bmp2)) /* Not present */ continue; - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); @@ -623,19 +653,22 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, switch (f->sz) { case BYTE: BYTE_ALIGN(bs); - CHECK_BOUND(bs, 1); + if (nf_h323_error_boundary(bs, 1)) + return H323_ERROR_BOUND; count = *bs->cur++; break; case WORD: BYTE_ALIGN(bs); - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; count = *bs->cur++; count <<= 8; count += *bs->cur++; break; case SEMI: BYTE_ALIGN(bs); - CHECK_BOUND(bs, 2); + if (nf_h323_error_boundary(bs, 2)) + return H323_ERROR_BOUND; count = get_len(bs); break; default: @@ -659,7 +692,8 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, if (son->attr & OPEN) { BYTE_ALIGN(bs); len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); @@ -728,7 +762,8 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (type >= f->ub) { /* Newer version? */ BYTE_ALIGN(bs); len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; bs->cur += len; return H323_ERROR_NONE; } @@ -743,7 +778,8 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); len = get_len(bs); - CHECK_BOUND(bs, len); + if (nf_h323_error_boundary(bs, len)) + return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); -- cgit v1.2.3 From ec8a8f3c31ddef0a7d9626c4b8a4baa30f3b80aa Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 13 Nov 2017 09:09:41 +0100 Subject: netfilter: nf_ct_h323: Extend nf_h323_error_boundary to work on bits as well This patch fixes several out of bounds memory reads by extending the nf_h323_error_boundary() function to work on bits as well an check the affected parts. Signed-off-by: Eric Sesterhenn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 92 +++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 3d9a009ac147..dc6347342e34 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -164,8 +164,13 @@ static unsigned int get_len(struct bitstr *bs) return v; } -static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes) +static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits) { + bits += bs->bit; + bytes += bits / BITS_PER_BYTE; + if (bits % BITS_PER_BYTE > 0) + bytes++; + if (*bs->cur + bytes > *bs->end) return 1; @@ -286,8 +291,7 @@ static int decode_bool(struct bitstr *bs, const struct field_t *f, PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); INC_BIT(bs); - - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -301,12 +305,12 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 1)) + if (nf_h323_error_boundary(bs, 1, 0)) return H323_ERROR_BOUND; len = *bs->cur++; bs->cur += len; - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; @@ -330,6 +334,8 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, bs->cur += 2; break; case CONS: /* 64K < Range < 4G */ + if (nf_h323_error_boundary(bs, 0, 2)) + return H323_ERROR_BOUND; len = get_bits(bs, 2) + 1; BYTE_ALIGN(bs); if (base && (f->attr & DECODE)) { /* timeToLive */ @@ -341,7 +347,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, break; case UNCO: BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); bs->cur += len; @@ -353,7 +359,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, PRINT("\n"); - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -370,7 +376,7 @@ static int decode_enum(struct bitstr *bs, const struct field_t *f, INC_BITS(bs, f->sz); } - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -389,13 +395,13 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, len = f->lb; break; case WORD: /* 2-byte length */ - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = (*bs->cur++) << 8; len += (*bs->cur++) + f->lb; break; case SEMI: - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); break; @@ -407,7 +413,7 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, bs->cur += len >> 3; bs->bit = len & 7; - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -421,12 +427,14 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); /* 2 <= Range <= 255 */ + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; len = get_bits(bs, f->sz) + f->lb; BYTE_ALIGN(bs); INC_BITS(bs, (len << 2)); - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -458,17 +466,19 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, break; case BYTE: /* Range == 256 */ BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 1)) + if (nf_h323_error_boundary(bs, 1, 0)) return H323_ERROR_BOUND; len = (*bs->cur++) + f->lb; break; case SEMI: BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs) + f->lb; break; default: /* 2 <= Range <= 255 */ + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; len = get_bits(bs, f->sz) + f->lb; BYTE_ALIGN(bs); break; @@ -478,7 +488,7 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, PRINT("\n"); - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -494,11 +504,13 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, switch (f->sz) { case BYTE: /* Range == 256 */ BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 1)) + if (nf_h323_error_boundary(bs, 1, 0)) return H323_ERROR_BOUND; len = (*bs->cur++) + f->lb; break; default: /* 2 <= Range <= 255 */ + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; len = get_bits(bs, f->sz) + f->lb; BYTE_ALIGN(bs); break; @@ -506,7 +518,7 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, bs->cur += len << 1; - if (nf_h323_error_boundary(bs, 0)) + if (nf_h323_error_boundary(bs, 0, 0)) return H323_ERROR_BOUND; return H323_ERROR_NONE; } @@ -526,9 +538,13 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; /* Extensible? */ + if (nf_h323_error_boundary(bs, 0, 1)) + return H323_ERROR_BOUND; ext = (f->attr & EXT) ? get_bit(bs) : 0; /* Get fields bitmap */ + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -548,10 +564,10 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Decode */ if (son->attr & OPEN) { /* Open field */ - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, @@ -580,8 +596,10 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, return H323_ERROR_NONE; /* Get the extension bitmap */ + if (nf_h323_error_boundary(bs, 0, 7)) + return H323_ERROR_BOUND; bmp2_len = get_bits(bs, 7) + 1; - if (nf_h323_error_boundary(bs, (bmp2_len + 7) >> 3)) + if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; @@ -593,10 +611,10 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, for (opt = 0; opt < bmp2_len; opt++, i++, son++) { /* Check Range */ if (i >= f->ub) { /* Newer Version? */ - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; bs->cur += len; continue; @@ -611,10 +629,10 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (!((0x80000000 >> opt) & bmp2)) /* Not present */ continue; - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", @@ -653,13 +671,13 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, switch (f->sz) { case BYTE: BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 1)) + if (nf_h323_error_boundary(bs, 1, 0)) return H323_ERROR_BOUND; count = *bs->cur++; break; case WORD: BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; count = *bs->cur++; count <<= 8; @@ -667,11 +685,13 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, break; case SEMI: BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, 2)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; count = get_len(bs); break; default: + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; count = get_bits(bs, f->sz); break; } @@ -691,8 +711,10 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, for (i = 0; i < count; i++) { if (son->attr & OPEN) { BYTE_ALIGN(bs); + if (nf_h323_error_boundary(bs, 2, 0)) + return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, @@ -744,11 +766,17 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; /* Decode the choice index number */ + if (nf_h323_error_boundary(bs, 0, 1)) + return H323_ERROR_BOUND; if ((f->attr & EXT) && get_bit(bs)) { ext = 1; + if (nf_h323_error_boundary(bs, 0, 7)) + return H323_ERROR_BOUND; type = get_bits(bs, 7) + f->lb; } else { ext = 0; + if (nf_h323_error_boundary(bs, 0, f->sz)) + return H323_ERROR_BOUND; type = get_bits(bs, f->sz); if (type >= f->lb) return H323_ERROR_RANGE; @@ -761,8 +789,10 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, /* Check Range */ if (type >= f->ub) { /* Newer version? */ BYTE_ALIGN(bs); + if (nf_h323_error_boundary(bs, 2, 0)) + return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; bs->cur += len; return H323_ERROR_NONE; @@ -777,8 +807,10 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); + if (nf_h323_error_boundary(bs, len, 0)) + return H323_ERROR_BOUND; len = get_len(bs); - if (nf_h323_error_boundary(bs, len)) + if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", -- cgit v1.2.3 From fbcd253d2448b8f168241e38f629a36c4c8c1e94 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 19 Nov 2017 21:27:28 +0100 Subject: netfilter: conntrack: lower timeout to RETRANS seconds if window is 0 When zero window is announced we can get into a situation where connection stays around forever: 1. One side announces zero window. 2. Other side closes. In this case, no FIN is sent (stuck in send queue). Unless other side opens the window up again conntrack stays in ESTABLISHED state for a very long time. Lets alleviate this by lowering the timeout to RETRANS (5 minutes), the other end should be sending zero window probes to keep the connection established as long as a socket still exists. Cc: Jozsef Kadlecsik Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b12fc07111d0..37ef35b861f2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1039,6 +1039,9 @@ static int tcp_packet(struct nf_conn *ct, IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; + else if (ct->proto.tcp.last_win == 0 && + timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) + timeout = timeouts[TCP_CONNTRACK_RETRANS]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); -- cgit v1.2.3 From 542134c0375b5ca2b1d18490c02b8a20bfdd8d74 Mon Sep 17 00:00:00 2001 From: Eudean Sun Date: Tue, 21 Nov 2017 10:43:24 -0800 Subject: HID: cp2112: Fix I2C_BLOCK_DATA transactions The existing driver erroneously treats I2C_BLOCK_DATA and BLOCK_DATA commands the same. For I2C_BLOCK_DATA reads, the length of the read is provided in data->block[0], but the length itself should not be sent to the slave. In contrast, for BLOCK_DATA reads no length is specified since the length will be the first byte returned from the slave. When copying data back to the data buffer, for an I2C_BLOCK_DATA read we have to take care not to overwrite data->block[0] to avoid overwriting the length. A BLOCK_DATA read doesn't have this concern since the first byte returned by the device is the length and belongs in data->block[0]. For I2C_BLOCK_DATA writes, the length is also provided in data->block[0], but the length itself is not sent to the slave (in contrast to BLOCK_DATA writes where the length prefixes the data sent to the slave). This was tested on physical hardware using i2cdump with the i and s flags to test the behavior of I2C_BLOCK_DATA reads and BLOCK_DATA reads, respectively. Writes were not tested but the I2C_BLOCK_DATA write change is pretty simple to verify by inspection. Signed-off-by: Eudean Sun Signed-off-by: Jiri Kosina --- drivers/hid/hid-cp2112.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 68cdc962265b..271f31461da4 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, (u8 *)&word, 2); break; case I2C_SMBUS_I2C_BLOCK_DATA: - size = I2C_SMBUS_BLOCK_DATA; - /* fallthrough */ + if (read_write == I2C_SMBUS_READ) { + read_length = data->block[0]; + count = cp2112_write_read_req(buf, addr, read_length, + command, NULL, 0); + } else { + count = cp2112_write_req(buf, addr, command, + data->block + 1, + data->block[0]); + } + break; case I2C_SMBUS_BLOCK_DATA: if (I2C_SMBUS_READ == read_write) { count = cp2112_write_read_req(buf, addr, @@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, case I2C_SMBUS_WORD_DATA: data->word = le16_to_cpup((__le16 *)buf); break; + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(data->block + 1, buf, read_length); + break; case I2C_SMBUS_BLOCK_DATA: if (read_length > I2C_SMBUS_BLOCK_MAX) { ret = -EPROTO; -- cgit v1.2.3 From 56986b07d17b4a19416e248aaca9367c241a824b Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 22 Nov 2017 13:59:19 +0800 Subject: ASoC: rt5645: reset RT5645_AD_DA_MIXER at probe RT5645_AD_DA_MIXER (0x29) register will not be reset to default after SW reset. So we have to write it to its default value in i2c_probe. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 5f24df4fae8e..fcd02c2c76f1 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, regmap_read(regmap, RT5645_VENDOR_ID, &val); rt5645->v_id = val & 0xff; + regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080); + ret = regmap_register_patch(rt5645->regmap, init_list, ARRAY_SIZE(init_list)); if (ret != 0) -- cgit v1.2.3 From 254beff97b4714bac4ec8add5a6888c1adc1ad8f Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Fri, 24 Nov 2017 16:11:22 +0800 Subject: ASoC: rt5514: Make sure the DMIC delay will be happened after normal SUPPLY widgets power on The patch makes sure the DMIC delay will be happened after normal SUPPLY widgets power on. If there are some platforms that provide the MCLK using the SUPPLY widget, it will make sure the delay time is helpful. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c index 2a5b5d74e697..2dd6e9f990a4 100644 --- a/sound/soc/codecs/rt5514.c +++ b/sound/soc/codecs/rt5514.c @@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = { SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0), - SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0, rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU), SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1, -- cgit v1.2.3 From 5a1314fa697fc65cefaba64cd4699bfc3e6882a6 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 21 Nov 2017 10:09:02 +0100 Subject: spi: xilinx: Detect stall with Unknown commands When the core is configured in C_SPI_MODE > 0, it integrates a lookup table that automatically configures the core in dual or quad mode based on the command (first byte on the tx fifo). Unfortunately, that list mode_?_memoy_*.mif does not contain all the supported commands by the flash. Since 4.14 spi-nor automatically tries to probe the flash using SFDP (command 0x5a), and that command is not part of the list_mode table. Whit the right combination of C_SPI_MODE and C_SPI_MEMORY this leads into a stall that can only be recovered with a soft rest. This patch detects this kind of stall and returns -EIO to the caller on those commands. spi-nor can handle this error properly: m25p80 spi0.0: Detected stall. Check C_SPI_MODE and C_SPI_MEMORY. 0x21 0x2404 m25p80 spi0.0: SPI transfer failed: -5 spi_master spi0: failed to transfer one message from queue m25p80 spi0.0: s25sl064p (8192 Kbytes) Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-xilinx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index bc7100b93dfc..e0b9fe1d0e37 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) while (remaining_words) { int n_words, tx_words, rx_words; u32 sr; + int stalled; n_words = min(remaining_words, xspi->buffer_size); @@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; + stalled = 10; while (rx_words) { + if (rx_words == n_words && !(stalled--) && + !(sr & XSPI_SR_TX_EMPTY_MASK) && + (sr & XSPI_SR_RX_EMPTY_MASK)) { + dev_err(&spi->dev, + "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n"); + xspi_init_hw(xspi); + return -EIO; + } + if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) { xilinx_spi_rx(xspi); rx_words--; -- cgit v1.2.3 From 5ddc3c656bfb5c90d0196ff72b908d0343fef85e Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 25 Nov 2017 15:48:32 -0800 Subject: Input: ims-pcu - fix typo in the error message 1. change "to" to "too". 2. move ")" to the front of "\n", which discovered by Joe Perches. Signed-off-by: Zhen Lei Reviewed-by: Joe Perches Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ims-pcu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index ae473123583b..3d51175c4d72 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf) return union_desc; dev_err(&intf->dev, - "Union descriptor to short (%d vs %zd\n)", + "Union descriptor too short (%d vs %zd)\n", union_desc->bLength, sizeof(*union_desc)); return NULL; } -- cgit v1.2.3 From 10d900303f1c3a821eb0bef4e7b7ece16768fba4 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sat, 25 Nov 2017 16:48:41 -0800 Subject: Input: elantech - add new icbody type 15 The touchpad of Lenovo Thinkpad L480 reports it's version as 15. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index b84cd978fce2..a4aaa748e987 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd) case 5: etd->hw_version = 3; break; - case 6 ... 14: + case 6 ... 15: etd->hw_version = 4; break; default: -- cgit v1.2.3 From fd3e454366603c8e35b31c52195b1ea8798a8fff Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Wed, 15 Nov 2017 22:59:02 +0530 Subject: ACPI / NUMA: ia64: Parse all entries of SRAT memory affinity table In current implementation, SRAT Memory Affinity Structure table parsing is restricted to number of maximum memblocks allowed (NR_NODE_MEMBLKS). However NR_NODE_MEMBLKS is defined individually as per architecture requirements. Hence removing the restriction of SRAT Memory Affinity Structure parsing in ACPI driver code and let architecture code check for allowed memblocks count. This check is already there in the x86 code, so do the same on ia64. Signed-off-by: Ganapatrao Kulkarni Acked-by: Tony Luck Signed-off-by: Rafael J. Wysocki --- arch/ia64/kernel/acpi.c | 5 +++++ drivers/acpi/numa.c | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 1d29b2f8726b..1dacbf5e9e09 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -504,6 +504,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) return -1; + if (num_node_memblks >= NR_NODE_MEMBLKS) { + pr_err("NUMA: too many memblk ranges\n"); + return -EINVAL; + } + /* record this node in proximity bitmap */ pxm_bit_set(pxm); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 917f1cc0fda4..8ccaae3550d2 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -460,8 +460,7 @@ int __init acpi_numa_init(void) srat_proc, ARRAY_SIZE(srat_proc), 0); cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_memory_affinity, 0); } /* SLIT: System Locality Information Table */ -- cgit v1.2.3 From 8f275615a623183584b1604789cdcb240342ffbc Mon Sep 17 00:00:00 2001 From: Jung-uk Kim Date: Fri, 17 Nov 2017 15:40:14 -0800 Subject: ACPICA: Fix an off-by-one error in acpi_get_timer_duration(). ACPICA commit b4fd33f3c2af014aeec978d46392d286fd7f52b3 Delta calculation has an off-by-one error when there is a rollover. For example, when start_ticks is 0x00FFFFFF and end_ticks is 0x00000000 (for 24-bit timer), delta_ticks should be 1 (one) but it was 0 (zero). Link: https://github.com/acpica/acpica/commit/b4fd33f3 Signed-off-by: Jung-uk Kim Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/hwtimer.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c index a2f4e25d45b1..5b4282902a83 100644 --- a/drivers/acpi/acpica/hwtimer.c +++ b/drivers/acpi/acpica/hwtimer.c @@ -150,10 +150,10 @@ ACPI_EXPORT_SYMBOL(acpi_get_timer) * ******************************************************************************/ acpi_status -acpi_get_timer_duration(u32 start_ticks, u32 end_ticks, u32 * time_elapsed) +acpi_get_timer_duration(u32 start_ticks, u32 end_ticks, u32 *time_elapsed) { acpi_status status; - u32 delta_ticks; + u64 delta_ticks; u64 quotient; ACPI_FUNCTION_TRACE(acpi_get_timer_duration); @@ -168,30 +168,29 @@ acpi_get_timer_duration(u32 start_ticks, u32 end_ticks, u32 * time_elapsed) return_ACPI_STATUS(AE_SUPPORT); } + if (start_ticks == end_ticks) { + *time_elapsed = 0; + return_ACPI_STATUS(AE_OK); + } + /* * Compute Tick Delta: * Handle (max one) timer rollovers on 24-bit versus 32-bit timers. */ - if (start_ticks < end_ticks) { - delta_ticks = end_ticks - start_ticks; - } else if (start_ticks > end_ticks) { + delta_ticks = end_ticks; + if (start_ticks > end_ticks) { if ((acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) == 0) { /* 24-bit Timer */ - delta_ticks = - (((0x00FFFFFF - start_ticks) + - end_ticks) & 0x00FFFFFF); + delta_ticks |= (u64)1 << 24; } else { /* 32-bit Timer */ - delta_ticks = (0xFFFFFFFF - start_ticks) + end_ticks; + delta_ticks |= (u64)1 << 32; } - } else { /* start_ticks == end_ticks */ - - *time_elapsed = 0; - return_ACPI_STATUS(AE_OK); } + delta_ticks -= start_ticks; /* * Compute Duration (Requires a 64-bit multiply and divide): @@ -199,10 +198,10 @@ acpi_get_timer_duration(u32 start_ticks, u32 end_ticks, u32 * time_elapsed) * time_elapsed (microseconds) = * (delta_ticks * ACPI_USEC_PER_SEC) / ACPI_PM_TIMER_FREQUENCY; */ - status = acpi_ut_short_divide(((u64)delta_ticks) * ACPI_USEC_PER_SEC, + status = acpi_ut_short_divide(delta_ticks * ACPI_USEC_PER_SEC, ACPI_PM_TIMER_FREQUENCY, "ient, NULL); - *time_elapsed = (u32) quotient; + *time_elapsed = (u32)quotient; return_ACPI_STATUS(status); } -- cgit v1.2.3 From 5c74663e20bca25fa37d4d488265367b8d6b975f Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 17 Nov 2017 15:40:15 -0800 Subject: ACPICA: Disassembler: reset parser_state's Aml pointer when parsing bad externals ACPICA commit e7e25137471d7f75960fdb8caf757db0426245ca Link: https://github.com/acpica/acpica/commit/e7e25137 Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/psobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index 0bef6df71bba..8cbe36159376 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -382,6 +382,7 @@ acpi_ps_create_op(struct acpi_walk_state *walk_state, walk_state->aml - walk_state->parser_state.aml_start); walk_state->aml = walk_state->parser_state.aml + 2; + walk_state->parser_state.aml = walk_state->aml; return_ACPI_STATUS(AE_CTRL_PARSE_CONTINUE); } #endif -- cgit v1.2.3 From ee470f08b07d6c3a07a8d9dd7d1817e68cdf3b86 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 17 Nov 2017 15:40:16 -0800 Subject: ACPICA: disassembler: getting rid of error message ACPICA commit 7d542c6f97e27f7d0e90be1afd98097c3840e007 This error message tends to clutter up the disassembled ASL file with information that is unnecessary. Link: https://github.com/acpica/acpica/commit/7d542c6f Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/psobject.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index 8cbe36159376..c0b179883ff2 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -372,15 +372,8 @@ acpi_ps_create_op(struct acpi_walk_state *walk_state, * external declaration opcode. Setting walk_state->Aml to * walk_state->parser_state.Aml + 2 moves increments the * walk_state->Aml past the object type and the paramcount of the - * external opcode. For the error message, only print the AML - * offset. We could attempt to print the name but this may cause - * a segmentation fault when printing the namepath because the - * AML may be incorrect. + * external opcode. */ - acpi_os_printf - ("// Invalid external declaration at AML offset 0x%x.\n", - walk_state->aml - - walk_state->parser_state.aml_start); walk_state->aml = walk_state->parser_state.aml + 2; walk_state->parser_state.aml = walk_state->aml; return_ACPI_STATUS(AE_CTRL_PARSE_CONTINUE); -- cgit v1.2.3 From 29ad1f88ab7dd6c220ff9f5b9faed718b443f4a4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 17 Nov 2017 15:40:17 -0800 Subject: ACPICA: Avoid null pointer dereference on Op. ACPICA commit 08a00639b0d6756e8ba1421419fc3728904651d9 The calls to acpi_os_acquire_object can result in a null being assigned to Op (for example if a mutex acquire fails) which can lead to a null pointer dereference on Op on the call to ASL_CV_TRANSFER_COMMENTS (via function cv_transfer_comments). Move the block into the previous block that checks for a null Op so that we never can call cv_transfer_comments with a null Op. Detected by: coverity_scan CID#1371660 ("Dereference after null check") Link: https://github.com/acpica/acpica/commit/08a00639 Signed-off-by: Colin Ian King Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/psutils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index 02642760cb93..e15b636b1d4b 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -158,10 +158,10 @@ union acpi_parse_object *acpi_ps_alloc_op(u16 opcode, u8 *aml) if (opcode == AML_SCOPE_OP) { acpi_gbl_current_scope = op; } - } - if (gbl_capture_comments) { - ASL_CV_TRANSFER_COMMENTS(op); + if (gbl_capture_comments) { + ASL_CV_TRANSFER_COMMENTS(op); + } } return (op); -- cgit v1.2.3 From 164a08cee1358e360c47fcb26a7720461d5853a5 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 17 Nov 2017 15:40:18 -0800 Subject: ACPICA: Dispatcher: Introduce timeout mechanism for infinite loop detection ACPICA commit 9605023e7e6d1f05581502766c8cf2905bcc03d9 This patch implements a new infinite loop detection mechanism to replace the old one, it uses acpi_os_get_timer() to limit loop execution into a determined time slice. This is useful in case some hardware/firmware operations really require the AML interpreter to wait while the old mechanism could expire too fast on recent machines. The new mechanism converts old acpi_gbl_max_loop_iterations to store the user configurable value for the new mechanism in order to allow users to be still able to configure this value for acpiexec via command line. This patch also removes wrong initilization code of acpi_gbl_max_loop_iterations accordingly (it should have been initialized by ACPI_INIT_GLOBAL, and the default value is also properly tuned for acpiexec). Reported by M. Foronda, fixed by Lv Zheng. Link: https://github.com/acpica/acpica/commit/9605023e Link: https://bugzilla.kernel.org/show_bug.cgi?id=156501 Reported-by: M. Foronda Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/aclocal.h | 2 +- drivers/acpi/acpica/dscontrol.c | 16 +++++++++------- drivers/acpi/acpica/utinit.c | 1 - include/acpi/acconfig.h | 4 ++-- include/acpi/acpixf.h | 4 ++-- include/acpi/actypes.h | 2 ++ 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 0d45b8bb1678..bed041d41596 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -622,7 +622,7 @@ struct acpi_control_state { union acpi_parse_object *predicate_op; u8 *aml_predicate_start; /* Start of if/while predicate */ u8 *package_end; /* End of if/while block */ - u32 loop_count; /* While() loop counter */ + u64 loop_timeout; /* While() loop timeout */ }; /* diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c index f470e81b0499..244075dbc03a 100644 --- a/drivers/acpi/acpica/dscontrol.c +++ b/drivers/acpi/acpica/dscontrol.c @@ -118,6 +118,8 @@ acpi_ds_exec_begin_control_op(struct acpi_walk_state *walk_state, control_state->control.package_end = walk_state->parser_state.pkg_end; control_state->control.opcode = op->common.aml_opcode; + control_state->control.loop_timeout = acpi_os_get_timer() + + (u64)(acpi_gbl_max_loop_iterations * ACPI_100NSEC_PER_SEC); /* Push the control state on this walk's control stack */ @@ -206,14 +208,14 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state, /* Predicate was true, the body of the loop was just executed */ /* - * This loop counter mechanism allows the interpreter to escape - * possibly infinite loops. This can occur in poorly written AML - * when the hardware does not respond within a while loop and the - * loop does not implement a timeout. + * This infinite loop detection mechanism allows the interpreter + * to escape possibly infinite loops. This can occur in poorly + * written AML when the hardware does not respond within a while + * loop and the loop does not implement a timeout. */ - control_state->control.loop_count++; - if (control_state->control.loop_count > - acpi_gbl_max_loop_iterations) { + if (ACPI_TIME_AFTER(acpi_os_get_timer(), + control_state->control. + loop_timeout)) { status = AE_AML_INFINITE_LOOP; break; } diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c index 23e766d1691d..45eeb0dcf283 100644 --- a/drivers/acpi/acpica/utinit.c +++ b/drivers/acpi/acpica/utinit.c @@ -206,7 +206,6 @@ acpi_status acpi_ut_init_globals(void) acpi_gbl_next_owner_id_offset = 0; acpi_gbl_debugger_configuration = DEBUGGER_THREADING; acpi_gbl_osi_mutex = NULL; - acpi_gbl_max_loop_iterations = ACPI_MAX_LOOP_COUNT; /* Hardware oriented */ diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h index 6db3b4668b1a..ffe364fa4040 100644 --- a/include/acpi/acconfig.h +++ b/include/acpi/acconfig.h @@ -145,9 +145,9 @@ #define ACPI_ADDRESS_RANGE_MAX 2 -/* Maximum number of While() loops before abort */ +/* Maximum time (default 30s) of While() loops before abort */ -#define ACPI_MAX_LOOP_COUNT 0x000FFFFF +#define ACPI_MAX_LOOP_TIMEOUT 30 /****************************************************************************** * diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e1dd1a8d42b6..1a4322db0274 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -260,11 +260,11 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_osi_data, 0); ACPI_INIT_GLOBAL(u8, acpi_gbl_reduced_hardware, FALSE); /* - * Maximum number of While() loop iterations before forced method abort. + * Maximum timeout for While() loop iterations before forced method abort. * This mechanism is intended to prevent infinite loops during interpreter * execution within a host kernel. */ -ACPI_INIT_GLOBAL(u32, acpi_gbl_max_loop_iterations, ACPI_MAX_LOOP_COUNT); +ACPI_INIT_GLOBAL(u32, acpi_gbl_max_loop_iterations, ACPI_MAX_LOOP_TIMEOUT); /* * This mechanism is used to trace a specified AML method. The method is diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4f077edb9b81..ddde2790a54a 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -468,6 +468,8 @@ typedef void *acpi_handle; /* Actually a ptr to a NS Node */ #define ACPI_NSEC_PER_MSEC 1000000L #define ACPI_NSEC_PER_SEC 1000000000L +#define ACPI_TIME_AFTER(a, b) ((s64)((b) - (a)) < 0) + /* Owner IDs are used to track namespace nodes for selective deletion */ typedef u8 acpi_owner_id; -- cgit v1.2.3 From a26f4df913140a43cf7e3cb89b12ac27d87df984 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:40:19 -0800 Subject: ACPICA: Rename AE_AML_INFINITE_LOOP exception ACPICA commit e017213698374e01225f641ba0917516d8e91427 More appropriately renamed to AE_AML_LOOP_TIMEOUT, now that a real timer is used for the implementation. Link: https://github.com/acpica/acpica/commit/e0172136 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dscontrol.c | 2 +- include/acpi/acexcep.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c index 244075dbc03a..4b6ebc2a2851 100644 --- a/drivers/acpi/acpica/dscontrol.c +++ b/drivers/acpi/acpica/dscontrol.c @@ -216,7 +216,7 @@ acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state, if (ACPI_TIME_AFTER(acpi_os_get_timer(), control_state->control. loop_timeout)) { - status = AE_AML_INFINITE_LOOP; + status = AE_AML_LOOP_TIMEOUT; break; } diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 17d61b1f2511..e1f9fe47f69e 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -195,7 +195,7 @@ struct acpi_exception_info { #define AE_AML_CIRCULAR_REFERENCE EXCEP_AML (0x001E) #define AE_AML_BAD_RESOURCE_LENGTH EXCEP_AML (0x001F) #define AE_AML_ILLEGAL_ADDRESS EXCEP_AML (0x0020) -#define AE_AML_INFINITE_LOOP EXCEP_AML (0x0021) +#define AE_AML_LOOP_TIMEOUT EXCEP_AML (0x0021) #define AE_AML_UNINITIALIZED_NODE EXCEP_AML (0x0022) #define AE_AML_TARGET_TYPE EXCEP_AML (0x0023) @@ -368,8 +368,8 @@ static const struct acpi_exception_info acpi_gbl_exception_names_aml[] = { "The length of a Resource Descriptor in the AML is incorrect"), EXCEP_TXT("AE_AML_ILLEGAL_ADDRESS", "A memory, I/O, or PCI configuration address is invalid"), - EXCEP_TXT("AE_AML_INFINITE_LOOP", - "An apparent infinite AML While loop, method was aborted"), + EXCEP_TXT("AE_AML_LOOP_TIMEOUT", + "An AML While loop exceeded the maximum execution time"), EXCEP_TXT("AE_AML_UNINITIALIZED_NODE", "A namespace node is uninitialized or unresolved"), EXCEP_TXT("AE_AML_TARGET_TYPE", -- cgit v1.2.3 From 90adf776a9b01faa08f29d712de5ff0bf5ba1441 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:40:20 -0800 Subject: ACPICA: Tools: Deploy -vd option (build date/time) across all tools ACPICA commit 336131640a1574b86240b32eca3150195f9270d6 Common option for all tools. Link: https://github.com/acpica/acpica/commit/33613164 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acapps.h | 3 +++ tools/power/acpi/tools/acpidump/apmain.c | 28 ++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h index 7a1a68b5ac5c..2243c8164b34 100644 --- a/drivers/acpi/acpica/acapps.h +++ b/drivers/acpi/acpica/acapps.h @@ -80,6 +80,9 @@ prefix, ACPICA_COPYRIGHT, \ prefix +#define ACPI_COMMON_BUILD_TIME \ + "Build date/time: %s %s\n", __DATE__, __TIME__ + /* Macros for usage messages */ #define ACPI_USAGE_HEADER(usage) \ diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 22c3b4ee1617..be418fba9441 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -79,7 +79,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:sv^xz" /****************************************************************************** * @@ -100,6 +100,7 @@ static void ap_display_usage(void) ACPI_OPTION("-r
", "Dump tables from specified RSDP"); ACPI_OPTION("-s", "Print table summaries only"); ACPI_OPTION("-v", "Display version information"); + ACPI_OPTION("-vd", "Display build date and time"); ACPI_OPTION("-z", "Verbose mode"); ACPI_USAGE_TEXT("\nTable Options:\n"); @@ -231,10 +232,29 @@ static int ap_do_options(int argc, char **argv) } continue; - case 'v': /* Revision/version */ + case 'v': /* -v: (Version): signon already emitted, just exit */ - acpi_os_printf(ACPI_COMMON_SIGNON(AP_UTILITY_NAME)); - return (1); + switch (acpi_gbl_optarg[0]) { + case '^': /* -v: (Version) */ + + fprintf(stderr, + ACPI_COMMON_SIGNON(AP_UTILITY_NAME)); + return (1); + + case 'd': + + fprintf(stderr, + ACPI_COMMON_SIGNON(AP_UTILITY_NAME)); + printf(ACPI_COMMON_BUILD_TIME); + return (1); + + default: + + printf("Unknown option: -v%s\n", + acpi_gbl_optarg); + return (-1); + } + break; case 'z': /* Verbose mode */ -- cgit v1.2.3 From b43eac6f3384b033259bd973d4067d11260b68b3 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 17 Nov 2017 15:40:21 -0800 Subject: ACPICA: iASL: change processing of external op namespace nodes for correctness ACPICA commit aa866a9b4f24bbec9f158d10325b486d7d12d90f The declaration External (ABCD.EFGH) creates two nodes in the namespace: ABCD and EFGH where ABCD is marked as an implicit external node. ABCD is labeled as implicit because there does not exist a specific External (ABCD) declaration. Before this change, the declaration External (ABCD.EFGH) and External (ABCD.EFGH.IJKL) creates the namespace nodes ABCD, EFGH, and IJKL where ABCD and EFGH are labeled as implicit external nodes. This is incorrect. The only implicit node should be ABCD because EFGH and IJKL are explicit nodes. This change fixes the labeling procecess of external op namespace nodes so that nodes are properly labeled as implicit external. Due to this commit, the below ASL code results in a compilation error. definition_block ("DSDT.aml", "DSDT", 0x02, "INTEL", "BDW ", 0x0) { External(\_SB.PCI0.GFX0, device_obj) External(\_SB.PCI0.GFX0.ALSI) Scope(\_SB) { Device(PCI0) { Device(GFX0) { Name(_ADR, 0x00020000) } } } } Link: https://github.com/acpica/acpica/commit/aa866a9b Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsaccess.c | 13 +++++++------ drivers/acpi/acpica/nssearch.c | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index f2733f51ca8d..33e652a12fca 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -644,17 +644,18 @@ acpi_ns_lookup(union acpi_generic_state *scope_info, this_node->object; } } -#ifdef ACPI_ASL_COMPILER - if (!acpi_gbl_disasm_flag && - (this_node->flags & ANOBJ_IS_EXTERNAL)) { - this_node->flags |= IMPLICIT_EXTERNAL; - } -#endif } /* Special handling for the last segment (num_segments == 0) */ else { +#ifdef ACPI_ASL_COMPILER + if (!acpi_gbl_disasm_flag + && (this_node->flags & ANOBJ_IS_EXTERNAL)) { + this_node->flags &= ~IMPLICIT_EXTERNAL; + } +#endif + /* * Sanity typecheck of the target object: * diff --git a/drivers/acpi/acpica/nssearch.c b/drivers/acpi/acpica/nssearch.c index 5de8957f5ef0..e91dbee9235f 100644 --- a/drivers/acpi/acpica/nssearch.c +++ b/drivers/acpi/acpica/nssearch.c @@ -417,6 +417,7 @@ acpi_ns_search_and_enter(u32 target_name, if (flags & ACPI_NS_EXTERNAL || (walk_state && walk_state->opcode == AML_SCOPE_OP)) { new_node->flags |= ANOBJ_IS_EXTERNAL; + new_node->flags |= IMPLICIT_EXTERNAL; } #endif -- cgit v1.2.3 From 4c189c9da00edcb1e5cc82b9f46a02692b465440 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:40:22 -0800 Subject: ACPICA: iASL/Tools: Add support for PDTT, SDEV, TPM2 ACPI tables ACPICA commit 028d331522f239fa615148273f6d10e9deadb1b3 Full support for PDTT and SDEV Partial support for TPM2 due to odd layout of the optional fields of the table. Link: https://github.com/acpica/acpica/commit/028d3315 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++----- include/acpi/actbl2.h | 14 +++++++++- 2 files changed, 83 insertions(+), 8 deletions(-) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 7a89e6de94da..ea0c71ece407 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -69,9 +69,10 @@ #define ACPI_SIG_HEST "HEST" /* Hardware Error Source Table */ #define ACPI_SIG_MADT "APIC" /* Multiple APIC Description Table */ #define ACPI_SIG_MSCT "MSCT" /* Maximum System Characteristics Table */ -#define ACPI_SIG_PDTT "PDTT" /* Processor Debug Trigger Table */ +#define ACPI_SIG_PDTT "PDTT" /* Platform Debug Trigger Table */ #define ACPI_SIG_PPTT "PPTT" /* Processor Properties Topology Table */ #define ACPI_SIG_SBST "SBST" /* Smart Battery Specification Table */ +#define ACPI_SIG_SDEV "SDEV" /* Secure Devices table */ #define ACPI_SIG_SLIT "SLIT" /* System Locality Distance Information Table */ #define ACPI_SIG_SRAT "SRAT" /* System Resource Affinity Table */ #define ACPI_SIG_NFIT "NFIT" /* NVDIMM Firmware Interface Table */ @@ -1283,7 +1284,7 @@ struct acpi_nfit_flush_address { /******************************************************************************* * - * PDTT - Processor Debug Trigger Table (ACPI 6.2) + * PDTT - Platform Debug Trigger Table (ACPI 6.2) * Version 0 * ******************************************************************************/ @@ -1301,14 +1302,14 @@ struct acpi_table_pdtt { * starting at array_offset. */ struct acpi_pdtt_channel { - u16 sub_channel_id; + u8 subchannel_id; + u8 flags; }; -/* Mask and Flags for above */ +/* Flags for above */ -#define ACPI_PDTT_SUBCHANNEL_ID_MASK 0x00FF -#define ACPI_PDTT_RUNTIME_TRIGGER (1<<8) -#define ACPI_PPTT_WAIT_COMPLETION (1<<9) +#define ACPI_PDTT_RUNTIME_TRIGGER (1) +#define ACPI_PPTT_WAIT_COMPLETION (1<<1) /******************************************************************************* * @@ -1403,6 +1404,68 @@ struct acpi_table_sbst { u32 critical_level; }; +/******************************************************************************* + * + * SDEV - Secure Devices Table (ACPI 6.2) + * Version 1 + * + ******************************************************************************/ + +struct acpi_table_sdev { + struct acpi_table_header header; /* Common ACPI table header */ +}; + +struct acpi_sdev_header { + u8 type; + u8 flags; + u16 length; +}; + +/* Values for subtable type above */ + +enum acpi_sdev_type { + ACPI_SDEV_TYPE_NAMESPACE_DEVICE = 0, + ACPI_SDEV_TYPE_PCIE_ENDPOINT_DEVICE = 1, + ACPI_SDEV_TYPE_RESERVED = 2 /* 2 and greater are reserved */ +}; + +/* Values for flags above */ + +#define ACPI_SDEV_HANDOFF_TO_UNSECURE_OS (1) + +/* + * SDEV subtables + */ + +/* 0: Namespace Device Based Secure Device Structure */ + +struct acpi_sdev_namespace { + struct acpi_sdev_header header; + u16 device_id_offset; + u16 device_id_length; + u16 vendor_data_offset; + u16 vendor_data_length; +}; + +/* 1: PCIe Endpoint Device Based Device Structure */ + +struct acpi_sdev_pcie { + struct acpi_sdev_header header; + u16 segment; + u16 start_bus; + u16 path_offset; + u16 path_length; + u16 vendor_data_offset; + u16 vendor_data_length; +}; + +/* 1a: PCIe Endpoint path entry */ + +struct acpi_sdev_pcie_path { + u8 device; + u8 function; +}; + /******************************************************************************* * * SLIT - System Locality Distance Information Table diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 686b6f8c09dc..2623f9d72e46 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1246,6 +1246,8 @@ enum acpi_spmi_interface_types { * TCPA - Trusted Computing Platform Alliance table * Version 2 * + * TCG Hardware Interface Table for TPM 1.2 Clients and Servers + * * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", * Version 1.2, Revision 8 * February 27, 2017 @@ -1310,6 +1312,8 @@ struct acpi_table_tcpa_server { * TPM2 - Trusted Platform Module (TPM) 2.0 Hardware Interface Table * Version 4 * + * TCG Hardware Interface Table for TPM 2.0 Clients and Servers + * * Conforms to "TCG ACPI Specification, Family 1.2 and 2.0", * Version 1.2, Revision 8 * February 27, 2017 @@ -1329,15 +1333,23 @@ struct acpi_table_tpm2 { /* Values for start_method above */ #define ACPI_TPM2_NOT_ALLOWED 0 +#define ACPI_TPM2_RESERVED1 1 #define ACPI_TPM2_START_METHOD 2 +#define ACPI_TPM2_RESERVED3 3 +#define ACPI_TPM2_RESERVED4 4 +#define ACPI_TPM2_RESERVED5 5 #define ACPI_TPM2_MEMORY_MAPPED 6 #define ACPI_TPM2_COMMAND_BUFFER 7 #define ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD 8 +#define ACPI_TPM2_RESERVED9 9 +#define ACPI_TPM2_RESERVED10 10 #define ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC 11 /* V1.2 Rev 8 */ +#define ACPI_TPM2_RESERVED 12 -/* Trailer appears after any start_method subtables */ +/* Optional trailer appears after any start_method subtables */ struct acpi_tpm2_trailer { + u8 method_parameters[12]; u32 minimum_log_length; /* Minimum length for the event log area */ u64 log_address; /* Address of the event log area */ }; -- cgit v1.2.3 From 19654f9f24a47d374e5b413c67d05ecb5a1df580 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Fri, 17 Nov 2017 15:40:23 -0800 Subject: ACPICA: Utilities: Cleanup style issue for bit clearing ACPICA commit b49a0e1e26f3c61df7113f18f441c83739eb5514 It's reported in Linux community that change to utmath.c contains a style problem: [.../utmath.c:137]: (style) Same expression on both sides of '^='. [.../utmath.c:174]: (style) Same expression on both sides of '^='. This patch fixes this problem. ACPICA BZ 1422, reported by David Binderman, fixed by Lv Zheng. Link: https://github.com/acpica/acpica/commit/b49a0e1e Link: https://bugs.acpica.org/show_bug.cgi?id=1422 Reported-by: David Binderman Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utmath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/utmath.c b/drivers/acpi/acpica/utmath.c index 5f9c680076c4..2055a858e5f5 100644 --- a/drivers/acpi/acpica/utmath.c +++ b/drivers/acpi/acpica/utmath.c @@ -134,7 +134,7 @@ acpi_status acpi_ut_short_shift_left(u64 operand, u32 count, u64 *out_result) if ((count & 63) >= 32) { operand_ovl.part.hi = operand_ovl.part.lo; - operand_ovl.part.lo ^= operand_ovl.part.lo; + operand_ovl.part.lo = 0; count = (count & 63) - 32; } ACPI_SHIFT_LEFT_64_BY_32(operand_ovl.part.hi, @@ -171,7 +171,7 @@ acpi_status acpi_ut_short_shift_right(u64 operand, u32 count, u64 *out_result) if ((count & 63) >= 32) { operand_ovl.part.lo = operand_ovl.part.hi; - operand_ovl.part.hi ^= operand_ovl.part.hi; + operand_ovl.part.hi = 0; count = (count & 63) - 32; } ACPI_SHIFT_RIGHT_64_BY_32(operand_ovl.part.hi, -- cgit v1.2.3 From 8ca86e82b9f6a98d5f5606ee9cb9ed3ae08c1529 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:40:24 -0800 Subject: ACPICA: Update version to 20170929 ACPICA commit 2399a7799d6e41ec5beca77ddff3acc77e1a4e0a Version 20170929 Link: https://github.com/acpica/acpica/commit/2399a779 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 1a4322db0274..f25668cce946 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20170831 +#define ACPI_CA_VERSION 0x20170929 #include #include -- cgit v1.2.3 From db53f7f0eba56a7d8e45d6b26042899fccc43755 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:16 -0800 Subject: ACPICA: Use local 64-bit divide support for string conversions ACPICA commit f230f4df26d07b97ef00be39156ecee64250447d On 32-bit platforms, 64-bit divide often requires a library function which may not be present in the build. Link: https://github.com/acpica/acpica/commit/f230f4df Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utstrsuppt.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/acpica/utstrsuppt.c b/drivers/acpi/acpica/utstrsuppt.c index 965fb5cec94f..b2fc371c402e 100644 --- a/drivers/acpi/acpica/utstrsuppt.c +++ b/drivers/acpi/acpica/utstrsuppt.c @@ -52,10 +52,9 @@ static acpi_status acpi_ut_insert_digit(u64 *accumulated_value, u32 base, int ascii_digit); static acpi_status -acpi_ut_strtoul_multiply64(u64 multiplicand, u64 multiplier, u64 *out_product); +acpi_ut_strtoul_multiply64(u64 multiplicand, u32 base, u64 *out_product); -static acpi_status -acpi_ut_strtoul_add64(u64 addend1, u64 addend2, u64 *out_sum); +static acpi_status acpi_ut_strtoul_add64(u64 addend1, u32 digit, u64 *out_sum); /******************************************************************************* * @@ -357,7 +356,7 @@ acpi_ut_insert_digit(u64 *accumulated_value, u32 base, int ascii_digit) * FUNCTION: acpi_ut_strtoul_multiply64 * * PARAMETERS: multiplicand - Current accumulated converted integer - * multiplier - Base/Radix + * base - Base/Radix * out_product - Where the product is returned * * RETURN: Status and 64-bit product @@ -369,25 +368,32 @@ acpi_ut_insert_digit(u64 *accumulated_value, u32 base, int ascii_digit) ******************************************************************************/ static acpi_status -acpi_ut_strtoul_multiply64(u64 multiplicand, u64 multiplier, u64 *out_product) +acpi_ut_strtoul_multiply64(u64 multiplicand, u32 base, u64 *out_product) { u64 val; + u64 quotient; /* Exit if either operand is zero */ *out_product = 0; - if (!multiplicand || !multiplier) { + if (!multiplicand || !base) { return (AE_OK); } - /* Check for 64-bit overflow before the actual multiplication */ - - acpi_ut_short_divide(ACPI_UINT64_MAX, (u32)multiplier, &val, NULL); - if (multiplicand > val) { + /* + * Check for 64-bit overflow before the actual multiplication. + * + * Notes: 64-bit division is often not supported on 32-bit platforms + * (it requires a library function), Therefore ACPICA has a local + * 64-bit divide function. Also, Multiplier is currently only used + * as the radix (8/10/16), to the 64/32 divide will always work. + */ + acpi_ut_short_divide(ACPI_UINT64_MAX, base, "ient, NULL); + if (multiplicand > quotient) { return (AE_NUMERIC_OVERFLOW); } - val = multiplicand * multiplier; + val = multiplicand * base; /* Check for 32-bit overflow if necessary */ @@ -404,7 +410,7 @@ acpi_ut_strtoul_multiply64(u64 multiplicand, u64 multiplier, u64 *out_product) * FUNCTION: acpi_ut_strtoul_add64 * * PARAMETERS: addend1 - Current accumulated converted integer - * addend2 - New hex value/char + * digit - New hex value/char * out_sum - Where sum is returned (Accumulator) * * RETURN: Status and 64-bit sum @@ -415,17 +421,17 @@ acpi_ut_strtoul_multiply64(u64 multiplicand, u64 multiplier, u64 *out_product) * ******************************************************************************/ -static acpi_status acpi_ut_strtoul_add64(u64 addend1, u64 addend2, u64 *out_sum) +static acpi_status acpi_ut_strtoul_add64(u64 addend1, u32 digit, u64 *out_sum) { u64 sum; /* Check for 64-bit overflow before the actual addition */ - if ((addend1 > 0) && (addend2 > (ACPI_UINT64_MAX - addend1))) { + if ((addend1 > 0) && (digit > (ACPI_UINT64_MAX - addend1))) { return (AE_NUMERIC_OVERFLOW); } - sum = addend1 + addend2; + sum = addend1 + digit; /* Check for 32-bit overflow if necessary */ -- cgit v1.2.3 From e814109bee1aba30c650418e7412adb89654057b Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:17 -0800 Subject: ACPICA: Update output from ACPI_EXCEPTION macro ACPICA commit b2858b2cc83e1481950a2c976f62d4e1d758bc85 Changes line prefix from "ACPI Exception" to simply "ACPI Error" to match the ACPI_ERROR macro. ACPI_EXCEPTION takes the ACPI status as an argument, decodes and displays it along with the error message. Link: https://github.com/acpica/acpica/commit/b2858b2c Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acutils.h | 3 --- drivers/acpi/acpica/utxferror.c | 8 ++++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index 83b75e9db7ef..bb97a18158ec 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -118,9 +118,6 @@ extern const char *acpi_gbl_ptyp_decode[]; #ifndef ACPI_MSG_ERROR #define ACPI_MSG_ERROR "ACPI Error: " #endif -#ifndef ACPI_MSG_EXCEPTION -#define ACPI_MSG_EXCEPTION "ACPI Exception: " -#endif #ifndef ACPI_MSG_WARNING #define ACPI_MSG_WARNING "ACPI Warning: " #endif diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c index 950a1e500bfa..9da4f8ef2e77 100644 --- a/drivers/acpi/acpica/utxferror.c +++ b/drivers/acpi/acpica/utxferror.c @@ -96,8 +96,8 @@ ACPI_EXPORT_SYMBOL(acpi_error) * * RETURN: None * - * DESCRIPTION: Print "ACPI Exception" message with module/line/version info - * and decoded acpi_status. + * DESCRIPTION: Print an "ACPI Error" message with module/line/version + * info as well as decoded acpi_status. * ******************************************************************************/ void ACPI_INTERNAL_VAR_XFACE @@ -111,10 +111,10 @@ acpi_exception(const char *module_name, /* For AE_OK, just print the message */ if (ACPI_SUCCESS(status)) { - acpi_os_printf(ACPI_MSG_EXCEPTION); + acpi_os_printf(ACPI_MSG_ERROR); } else { - acpi_os_printf(ACPI_MSG_EXCEPTION "%s, ", + acpi_os_printf(ACPI_MSG_ERROR "%s, ", acpi_format_exception(status)); } -- cgit v1.2.3 From 53ae81e1896e8beb52c6d94fa2380f864e9bb4e7 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:18 -0800 Subject: ACPICA: Add an additional error message for EC timeouts ACPICA commit 24dd370b14711b7b3f31d7ca6326f9e0bd177c4e AE_TIME is seen to be returned from the EC driver/handler so often that an additional error message is added to help clarify the problem. Link: https://github.com/acpica/acpica/commit/24dd370b Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/evregion.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 28b447ff92df..bb58419f0d61 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -298,6 +298,16 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, ACPI_EXCEPTION((AE_INFO, status, "Returned by Handler for [%s]", acpi_ut_get_region_name(region_obj->region. space_id))); + + /* + * Special case for an EC timeout. These are seen so frequently + * that an additional error message is helpful + */ + if ((region_obj->region.space_id == ACPI_ADR_SPACE_EC) && + (status == AE_TIME)) { + ACPI_ERROR((AE_INFO, + "Timeout from EC hardware or EC device driver")); + } } if (!(handler_desc->address_space.handler_flags & -- cgit v1.2.3 From 4c106aa411ee7c1919589f283a4f17888dfee387 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 17 Nov 2017 15:42:19 -0800 Subject: ACPICA: iasl: Add SMMUv3 device ID mapping index support ACPICA commit 5c371879e035122c5807752f42247fd091d107d6 SMMUv3 device ID mapping index is used for SMMUv3 MSI which is introduced in IORT spec revision c, add its support for iasl. Tested with iasl -t IORT then get the right SMMUv3 node in iort.asl. Link: https://github.com/acpica/acpica/commit/5c371879 Signed-off-by: Hanjun Guo Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl2.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 2623f9d72e46..0d60d5df14f8 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -810,6 +810,7 @@ struct acpi_iort_smmu_v3 { u8 pxm; u8 reserved1; u16 reserved2; + u32 id_mapping_index; }; /* Values for Model field above */ -- cgit v1.2.3 From b12ebe59353c76e08d4a956a2c8a6a13b3b98839 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 17 Nov 2017 15:42:20 -0800 Subject: ACPICA: ACPICA: style edits to utility function output, no functional change ACPICA commit 8070a23749f1c2eedec313f42f564b5375054137 Link: https://github.com/acpica/acpica/commit/8070a237 Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/uttrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index 3c8de88ecbd5..28a302eb2015 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -717,7 +717,7 @@ exit: if (!num_outstanding) { ACPI_INFO(("No outstanding allocations")); } else { - ACPI_ERROR((AE_INFO, "%u(0x%X) Outstanding allocations", + ACPI_ERROR((AE_INFO, "%u (0x%X) Outstanding cache allocations", num_outstanding, num_outstanding)); } -- cgit v1.2.3 From 4b9b1de798b668c02942792e2b3be8587c04c030 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 17 Nov 2017 15:42:21 -0800 Subject: ACPICA: Trivial fix to spelling mistake in comment ACPICA commit 62c3ff46de977456155fdedbd9f294f2ff700520 Fix spelling mistake, "Reseved" -> "Reserved" Link: https://github.com/acpica/acpica/commit/62c3ff46 Signed-off-by: Colin Ian King Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index ea0c71ece407..5ef905626c6d 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1163,7 +1163,7 @@ struct acpi_nfit_system_address { struct acpi_nfit_header header; u16 range_index; u16 flags; - u32 reserved; /* Reseved, must be zero */ + u32 reserved; /* Reserved, must be zero */ u32 proximity_domain; u8 range_guid[16]; u64 address; -- cgit v1.2.3 From 16ccf829201cef23edaa5524f3f91e05861c4455 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:22 -0800 Subject: ACPICA: Enhance error messages from namespace create/lookup operations ACPICA commit b09c8d7bdc8c5a3db0a8d38bfd6182c023885e12 1) Emit the full pathname (scope+name) instead of just the name 2) For AE_ALREADY_EXISTS and AE_NOT_FOUND, use the "firmware error" string to point to the true problem. Link: https://github.com/acpica/acpica/commit/b09c8d7b Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acmacros.h | 2 +- drivers/acpi/acpica/acnamesp.h | 3 + drivers/acpi/acpica/acutils.h | 12 ++-- drivers/acpi/acpica/dsfield.c | 28 +++++--- drivers/acpi/acpica/dsobject.c | 4 +- drivers/acpi/acpica/dsutils.c | 3 +- drivers/acpi/acpica/dswload.c | 6 +- drivers/acpi/acpica/dswload2.c | 13 ++-- drivers/acpi/acpica/nsconvert.c | 3 +- drivers/acpi/acpica/nsnames.c | 146 ++++++++++++++++++++++++++++++++++++++++ drivers/acpi/acpica/psargs.c | 2 +- drivers/acpi/acpica/uterror.c | 73 ++++++++++++++++++++ 12 files changed, 270 insertions(+), 25 deletions(-) diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h index c7f0c96cc00f..128a3d71b598 100644 --- a/drivers/acpi/acpica/acmacros.h +++ b/drivers/acpi/acpica/acmacros.h @@ -455,7 +455,7 @@ * the plist contains a set of parens to allow variable-length lists. * These macros are used for both the debug and non-debug versions of the code. */ -#define ACPI_ERROR_NAMESPACE(s, e) acpi_ut_namespace_error (AE_INFO, s, e); +#define ACPI_ERROR_NAMESPACE(s, p, e) acpi_ut_prefixed_namespace_error (AE_INFO, s, p, e); #define ACPI_ERROR_METHOD(s, n, p, e) acpi_ut_method_error (AE_INFO, s, n, p, e); #define ACPI_WARN_PREDEFINED(plist) acpi_ut_predefined_warning plist #define ACPI_INFO_PREDEFINED(plist) acpi_ut_predefined_info plist diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 54a0c51b3e37..2fb1bb78d85c 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -289,6 +289,9 @@ acpi_ns_build_normalized_path(struct acpi_namespace_node *node, char *acpi_ns_get_normalized_pathname(struct acpi_namespace_node *node, u8 no_trailing); +char *acpi_ns_build_prefixed_pathname(union acpi_generic_state *prefix_scope, + const char *internal_path); + char *acpi_ns_name_of_current_scope(struct acpi_walk_state *walk_state); acpi_status diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index bb97a18158ec..d38187ee11c8 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -126,10 +126,10 @@ extern const char *acpi_gbl_ptyp_decode[]; #endif #ifndef ACPI_MSG_BIOS_ERROR -#define ACPI_MSG_BIOS_ERROR "ACPI BIOS Error (bug): " +#define ACPI_MSG_BIOS_ERROR "Firmware Error (ACPI): " #endif #ifndef ACPI_MSG_BIOS_WARNING -#define ACPI_MSG_BIOS_WARNING "ACPI BIOS Warning (bug): " +#define ACPI_MSG_BIOS_WARNING "Firmware Warning (ACPI): " #endif /* @@ -734,9 +734,11 @@ acpi_ut_predefined_bios_error(const char *module_name, u8 node_flags, const char *format, ...); void -acpi_ut_namespace_error(const char *module_name, - u32 line_number, - const char *internal_name, acpi_status lookup_status); +acpi_ut_prefixed_namespace_error(const char *module_name, + u32 line_number, + union acpi_generic_state *prefix_scope, + const char *internal_name, + acpi_status lookup_status); void acpi_ut_method_error(const char *module_name, diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 7bcf5f5ea029..0cab34a593d5 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -209,7 +209,8 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op, ACPI_IMODE_LOAD_PASS1, flags, walk_state, &node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.string, status); return_ACPI_STATUS(status); } } @@ -383,7 +384,9 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info, walk_state, &info->connection_node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(child->common. + ACPI_ERROR_NAMESPACE(walk_state-> + scope_info, + child->common. value.name, status); return_ACPI_STATUS(status); @@ -402,7 +405,8 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info, ACPI_NS_DONT_OPEN_SCOPE, walk_state, &info->field_node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE((char *)&arg->named.name, + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + (char *)&arg->named.name, status); return_ACPI_STATUS(status); } else { @@ -498,7 +502,8 @@ acpi_ds_create_field(union acpi_parse_object *op, ®ion_node); #endif if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.name, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.name, status); return_ACPI_STATUS(status); } } @@ -618,7 +623,8 @@ acpi_ds_init_field_objects(union acpi_parse_object *op, ACPI_IMODE_LOAD_PASS1, flags, walk_state, &node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE((char *)&arg->named.name, + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + (char *)&arg->named.name, status); if (status != AE_ALREADY_EXISTS) { return_ACPI_STATUS(status); @@ -681,7 +687,8 @@ acpi_ds_create_bank_field(union acpi_parse_object *op, ®ion_node); #endif if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.name, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.name, status); return_ACPI_STATUS(status); } } @@ -695,7 +702,8 @@ acpi_ds_create_bank_field(union acpi_parse_object *op, ACPI_NS_SEARCH_PARENT, walk_state, &info.register_node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.string, status); return_ACPI_STATUS(status); } @@ -765,7 +773,8 @@ acpi_ds_create_index_field(union acpi_parse_object *op, ACPI_NS_SEARCH_PARENT, walk_state, &info.register_node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.string, status); return_ACPI_STATUS(status); } @@ -778,7 +787,8 @@ acpi_ds_create_index_field(union acpi_parse_object *op, ACPI_NS_SEARCH_PARENT, walk_state, &info.data_register_node); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(arg->common.value.string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.string, status); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c index 82448551781b..b21fe084ffc8 100644 --- a/drivers/acpi/acpica/dsobject.c +++ b/drivers/acpi/acpica/dsobject.c @@ -112,7 +112,9 @@ acpi_ds_build_internal_object(struct acpi_walk_state *walk_state, acpi_namespace_node, &(op->common.node))); if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(op->common.value. + ACPI_ERROR_NAMESPACE(walk_state-> + scope_info, + op->common.value. string, status); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c index 0dabd9b95684..4c5faf629a83 100644 --- a/drivers/acpi/acpica/dsutils.c +++ b/drivers/acpi/acpica/dsutils.c @@ -583,7 +583,8 @@ acpi_ds_create_operand(struct acpi_walk_state *walk_state, } if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(name_string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + name_string, status); } } diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index eaa859a89702..5771e4e4a99a 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -207,7 +207,8 @@ acpi_ds_load1_begin_op(struct acpi_walk_state *walk_state, } #endif if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(path, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, path, + status); return_ACPI_STATUS(status); } @@ -375,7 +376,8 @@ acpi_ds_load1_begin_op(struct acpi_walk_state *walk_state, } if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(path, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + path, status); return_ACPI_STATUS(status); } } diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index aad83ef5a4ec..b3d0aaec8203 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -184,11 +184,14 @@ acpi_ds_load2_begin_op(struct acpi_walk_state *walk_state, if (status == AE_NOT_FOUND) { status = AE_OK; } else { - ACPI_ERROR_NAMESPACE(buffer_ptr, + ACPI_ERROR_NAMESPACE(walk_state-> + scope_info, + buffer_ptr, status); } #else - ACPI_ERROR_NAMESPACE(buffer_ptr, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + buffer_ptr, status); #endif return_ACPI_STATUS(status); } @@ -343,7 +346,8 @@ acpi_ds_load2_begin_op(struct acpi_walk_state *walk_state, } if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(buffer_ptr, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + buffer_ptr, status); return_ACPI_STATUS(status); } @@ -719,7 +723,8 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) */ op->common.node = new_node; } else { - ACPI_ERROR_NAMESPACE(arg->common.value.string, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, + arg->common.value.string, status); } break; diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c index 539d775bbc92..d55dcc82f434 100644 --- a/drivers/acpi/acpica/nsconvert.c +++ b/drivers/acpi/acpica/nsconvert.c @@ -495,7 +495,8 @@ acpi_ns_convert_to_reference(struct acpi_namespace_node *scope, /* Check if we are resolving a named reference within a package */ - ACPI_ERROR_NAMESPACE(original_object->string.pointer, status); + ACPI_ERROR_NAMESPACE(&scope_info, + original_object->string.pointer, status); goto error_exit; } diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c index a410760a0308..4203d4589b6d 100644 --- a/drivers/acpi/acpica/nsnames.c +++ b/drivers/acpi/acpica/nsnames.c @@ -49,6 +49,9 @@ #define _COMPONENT ACPI_NAMESPACE ACPI_MODULE_NAME("nsnames") +/* Local Prototypes */ +static void acpi_ns_normalize_pathname(char *original_path); + /******************************************************************************* * * FUNCTION: acpi_ns_get_external_pathname @@ -63,6 +66,7 @@ ACPI_MODULE_NAME("nsnames") * for error and debug statements. * ******************************************************************************/ + char *acpi_ns_get_external_pathname(struct acpi_namespace_node *node) { char *name_buffer; @@ -352,3 +356,145 @@ char *acpi_ns_get_normalized_pathname(struct acpi_namespace_node *node, return_PTR(name_buffer); } + +/******************************************************************************* + * + * FUNCTION: acpi_ns_build_prefixed_pathname + * + * PARAMETERS: prefix_scope - Scope/Path that prefixes the internal path + * internal_path - Name or path of the namespace node + * + * RETURN: None + * + * DESCRIPTION: Construct a fully qualified pathname from a concatenation of: + * 1) Path associated with the prefix_scope namespace node + * 2) External path representation of the Internal path + * + ******************************************************************************/ + +char *acpi_ns_build_prefixed_pathname(union acpi_generic_state *prefix_scope, + const char *internal_path) +{ + acpi_status status; + char *full_path = NULL; + char *external_path; + char *prefix_path = NULL; + u32 prefix_path_length = 0; + + /* If there is a prefix, get the pathname to it */ + + if (prefix_scope && prefix_scope->scope.node) { + prefix_path = + acpi_ns_get_normalized_pathname(prefix_scope->scope.node, + TRUE); + if (prefix_path) { + prefix_path_length = strlen(prefix_path); + } + } + + status = acpi_ns_externalize_name(ACPI_UINT32_MAX, internal_path, + NULL, &external_path); + if (ACPI_FAILURE(status)) { + goto cleanup; + } + + /* Merge the prefix path and the path. 2 is for one dot and trailing null */ + + full_path = + ACPI_ALLOCATE_ZEROED(prefix_path_length + strlen(external_path) + + 2); + if (!full_path) { + goto cleanup; + } + + /* Don't merge if the External path is already fully qualified */ + + if (prefix_path && (*external_path != '\\') && (*external_path != '^')) { + strcat(full_path, prefix_path); + if (prefix_path[1]) { + strcat(full_path, "."); + } + } + + acpi_ns_normalize_pathname(external_path); + strcat(full_path, external_path); + +cleanup: + if (prefix_path) { + ACPI_FREE(prefix_path); + } + + return (full_path); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ns_normalize_pathname + * + * PARAMETERS: original_path - Path to be normalized, in External format + * + * RETURN: The original path is processed in-place + * + * DESCRIPTION: Remove trailing underscores from each element of a path. + * + * For example: \A___.B___.C___ becomes \A.B.C + * + ******************************************************************************/ + +static void acpi_ns_normalize_pathname(char *original_path) +{ + char *input_path = original_path; + char *new_path_buffer; + char *new_path; + u32 i; + + /* Allocate a temp buffer in which to construct the new path */ + + new_path_buffer = ACPI_ALLOCATE_ZEROED(strlen(input_path) + 1); + new_path = new_path_buffer; + if (!new_path_buffer) { + return; + } + + /* Special characters may appear at the beginning of the path */ + + if (*input_path == '\\') { + *new_path = *input_path; + new_path++; + input_path++; + } + + while (*input_path == '^') { + *new_path = *input_path; + new_path++; + input_path++; + } + + /* Remainder of the path */ + + while (*input_path) { + + /* Do one nameseg at a time */ + + for (i = 0; (i < ACPI_NAME_SIZE) && *input_path; i++) { + if ((i == 0) || (*input_path != '_')) { /* First char is allowed to be underscore */ + *new_path = *input_path; + new_path++; + } + + input_path++; + } + + /* Dot means that there are more namesegs to come */ + + if (*input_path == '.') { + *new_path = *input_path; + new_path++; + input_path++; + } + } + + *new_path = 0; + strcpy(original_path, new_path_buffer); + ACPI_FREE(new_path_buffer); +} diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c index eb9dfaca555f..171e2faa7c50 100644 --- a/drivers/acpi/acpica/psargs.c +++ b/drivers/acpi/acpica/psargs.c @@ -361,7 +361,7 @@ acpi_ps_get_next_namepath(struct acpi_walk_state *walk_state, /* Final exception check (may have been changed from code above) */ if (ACPI_FAILURE(status)) { - ACPI_ERROR_NAMESPACE(path, status); + ACPI_ERROR_NAMESPACE(walk_state->scope_info, path, status); if ((walk_state->parse_flags & ACPI_PARSE_MODE_MASK) == ACPI_PARSE_EXECUTE) { diff --git a/drivers/acpi/acpica/uterror.c b/drivers/acpi/acpica/uterror.c index e3368186e1c1..42388dcb5ccc 100644 --- a/drivers/acpi/acpica/uterror.c +++ b/drivers/acpi/acpica/uterror.c @@ -180,6 +180,78 @@ acpi_ut_predefined_bios_error(const char *module_name, va_end(arg_list); } +/******************************************************************************* + * + * FUNCTION: acpi_ut_prefixed_namespace_error + * + * PARAMETERS: module_name - Caller's module name (for error output) + * line_number - Caller's line number (for error output) + * prefix_scope - Scope/Path that prefixes the internal path + * internal_path - Name or path of the namespace node + * lookup_status - Exception code from NS lookup + * + * RETURN: None + * + * DESCRIPTION: Print error message with the full pathname constructed this way: + * + * prefix_scope_node_full_path.externalized_internal_path + * + * NOTE: 10/2017: Treat the major ns_lookup errors as firmware errors + * + ******************************************************************************/ + +void +acpi_ut_prefixed_namespace_error(const char *module_name, + u32 line_number, + union acpi_generic_state *prefix_scope, + const char *internal_path, + acpi_status lookup_status) +{ + char *full_path; + const char *message; + + /* + * Main cases: + * 1) Object creation, object must not already exist + * 2) Object lookup, object must exist + */ + switch (lookup_status) { + case AE_ALREADY_EXISTS: + + acpi_os_printf(ACPI_MSG_BIOS_ERROR); + message = "Failure creating"; + break; + + case AE_NOT_FOUND: + + acpi_os_printf(ACPI_MSG_BIOS_ERROR); + message = "Failure looking up"; + break; + + default: + + acpi_os_printf(ACPI_MSG_ERROR); + message = "Failure looking up"; + break; + } + + /* Concatenate the prefix path and the internal path */ + + full_path = + acpi_ns_build_prefixed_pathname(prefix_scope, internal_path); + + acpi_os_printf("%s [%s], %s", message, + full_path ? full_path : "Could not get pathname", + acpi_format_exception(lookup_status)); + + if (full_path) { + ACPI_FREE(full_path); + } + + ACPI_MSG_SUFFIX; +} + +#ifdef __OBSOLETE_FUNCTION /******************************************************************************* * * FUNCTION: acpi_ut_namespace_error @@ -240,6 +312,7 @@ acpi_ut_namespace_error(const char *module_name, ACPI_MSG_SUFFIX; ACPI_MSG_REDIRECT_END; } +#endif /******************************************************************************* * -- cgit v1.2.3 From dea4e83358f23bcb178e9cb23e9278ff5a8cea76 Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Fri, 17 Nov 2017 15:42:23 -0800 Subject: ACPICA: Namespace: fix memory leak from building prefixed pathname ACPICA commit f8ca5db30605467b851f86d152079631c27eeca8 Link: https://github.com/acpica/acpica/commit/f8ca5db3 Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsnames.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c index 4203d4589b6d..22c92d1a24d8 100644 --- a/drivers/acpi/acpica/nsnames.c +++ b/drivers/acpi/acpica/nsnames.c @@ -377,7 +377,7 @@ char *acpi_ns_build_prefixed_pathname(union acpi_generic_state *prefix_scope, { acpi_status status; char *full_path = NULL; - char *external_path; + char *external_path = NULL; char *prefix_path = NULL; u32 prefix_path_length = 0; @@ -423,6 +423,9 @@ cleanup: if (prefix_path) { ACPI_FREE(prefix_path); } + if (external_path) { + ACPI_FREE(external_path); + } return (full_path); } -- cgit v1.2.3 From 5ed4e5ca51cad5ddae09f59a95626731d1546244 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:24 -0800 Subject: ACPICA: ACPI 6.0A: Changes to the NFIT ACPI table ACPICA commit a42a086b8d682ab8dfbc4666cf6b9c8a5ee23a77 Adds a new subtable. Link: https://github.com/acpica/acpica/commit/a42a086b Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 5ef905626c6d..867f4136dbac 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1150,7 +1150,8 @@ enum acpi_nfit_type { ACPI_NFIT_TYPE_CONTROL_REGION = 4, ACPI_NFIT_TYPE_DATA_REGION = 5, ACPI_NFIT_TYPE_FLUSH_ADDRESS = 6, - ACPI_NFIT_TYPE_RESERVED = 7 /* 7 and greater are reserved */ + ACPI_NFIT_TYPE_CAPABILITIES = 7, + ACPI_NFIT_TYPE_RESERVED = 8 /* 8 and greater are reserved */ }; /* @@ -1282,6 +1283,69 @@ struct acpi_nfit_flush_address { u64 hint_address[1]; /* Variable length */ }; +/* 7: Platform Capabilities Structure */ + +struct acpi_nfit_capabilities { + struct acpi_nfit_header header; + u8 highest_capability; + u8 reserved[3]; /* Reserved, must be zero */ + u32 capabilities; + u32 reserved2; +}; + +/* Capabilities Flags */ + +#define ACPI_NFIT_CAPABILITY_CACHE_FLUSH (1) /* 00: Cache Flush to NVDIMM capable */ +#define ACPI_NFIT_CAPABILITY_MEM_FLUSH (1<<1) /* 01: Memory Flush to NVDIMM capable */ +#define ACPI_NFIT_CAPABILITY_MEM_MIRRORING (1<<2) /* 02: Memory Mirroring capable */ + +/* + * NFIT/DVDIMM device handle support - used as the _ADR for each NVDIMM + */ +struct nfit_device_handle { + u32 handle; +}; + +/* Device handle construction and extraction macros */ + +#define ACPI_NFIT_DIMM_NUMBER_MASK 0x0000000F +#define ACPI_NFIT_CHANNEL_NUMBER_MASK 0x000000F0 +#define ACPI_NFIT_MEMORY_ID_MASK 0x00000F00 +#define ACPI_NFIT_SOCKET_ID_MASK 0x0000F000 +#define ACPI_NFIT_NODE_ID_MASK 0x0FFF0000 + +#define ACPI_NFIT_DIMM_NUMBER_OFFSET 0 +#define ACPI_NFIT_CHANNEL_NUMBER_OFFSET 4 +#define ACPI_NFIT_MEMORY_ID_OFFSET 8 +#define ACPI_NFIT_SOCKET_ID_OFFSET 12 +#define ACPI_NFIT_NODE_ID_OFFSET 16 + +/* Macro to construct a NFIT/NVDIMM device handle */ + +#define ACPI_NFIT_BUILD_DEVICE_HANDLE(dimm, channel, memory, socket, node) \ + ((dimm) | \ + ((channel) << ACPI_NFIT_CHANNEL_NUMBER_OFFSET) | \ + ((memory) << ACPI_NFIT_MEMORY_ID_OFFSET) | \ + ((socket) << ACPI_NFIT_SOCKET_ID_OFFSET) | \ + ((node) << ACPI_NFIT_NODE_ID_OFFSET)) + +/* Macros to extract individual fields from a NFIT/NVDIMM device handle */ + +#define ACPI_NFIT_GET_DIMM_NUMBER(handle) \ + ((handle) & ACPI_NFIT_DIMM_NUMBER_MASK) + +#define ACPI_NFIT_GET_CHANNEL_NUMBER(handle) \ + (((handle) & ACPI_NFIT_CHANNEL_NUMBER_MASK) >> ACPI_NFIT_CHANNEL_NUMBER_OFFSET) + +#define ACPI_NFIT_GET_MEMORY_ID(handle) \ + (((handle) & ACPI_NFIT_MEMORY_ID_MASK) >> ACPI_NFIT_MEMORY_ID_OFFSET) + +#define ACPI_NFIT_GET_SOCKET_ID(handle) \ + (((handle) & ACPI_NFIT_SOCKET_ID_MASK) >> ACPI_NFIT_SOCKET_ID_OFFSET) + +#define ACPI_NFIT_GET_NODE_ID(handle) \ + (((handle) & ACPI_NFIT_NODE_ID_MASK) >> ACPI_NFIT_NODE_ID_OFFSET) + /******************************************************************************* * * PDTT - Platform Debug Trigger Table (ACPI 6.2) -- cgit v1.2.3 From d41bf52e38e47ce4990857a5ffdfd7910307619a Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:25 -0800 Subject: ACPICA: Small typo fix, no functional change ACPICA commit 9b03c05305d856274c39f3adbddd8a98ef5d018e ACPI table header. Link: https://github.com/acpica/acpica/commit/9b03c053 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 867f4136dbac..178661f1b896 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1373,7 +1373,7 @@ struct acpi_pdtt_channel { /* Flags for above */ #define ACPI_PDTT_RUNTIME_TRIGGER (1) -#define ACPI_PPTT_WAIT_COMPLETION (1<<1) +#define ACPI_PDTT_WAIT_COMPLETION (1<<1) /******************************************************************************* * -- cgit v1.2.3 From 060c859d79ed8ead423a076e581af08d6496bf02 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:26 -0800 Subject: ACPICA: Debugger: add "background" command for method execution ACPICA commit d7b44738a48caa9f669b8dbf0024d456711aec31 Allows a single task to execute in the background, while control returns to the debugger prompt. Also, cleanup the debugger help screen. Link: https://github.com/acpica/acpica/commit/d7b44738 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acdebug.h | 4 ++ drivers/acpi/acpica/aclocal.h | 13 ++-- drivers/acpi/acpica/dbexec.c | 110 +++++++++++++++++++++++++++++++- drivers/acpi/acpica/dbinput.c | 145 +++++++++++++++++++++++++----------------- 4 files changed, 206 insertions(+), 66 deletions(-) diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h index 71743e5252f5..54b8d9df9423 100644 --- a/drivers/acpi/acpica/acdebug.h +++ b/drivers/acpi/acpica/acdebug.h @@ -222,6 +222,10 @@ ACPI_DBR_DEPENDENT_RETURN_VOID(void void acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags); +void +acpi_db_create_execution_thread(char *method_name_arg, + char **arguments, acpi_object_type *types); + void acpi_db_create_execution_threads(char *num_threads_arg, char *num_loops_arg, char *method_name_arg); diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index bed041d41596..a56675f0661e 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -1218,16 +1218,17 @@ struct acpi_db_method_info { acpi_object_type *types; /* - * Arguments to be passed to method for the command - * Threads - - * the Number of threads, ID of current thread and - * Index of current thread inside all them created. + * Arguments to be passed to method for the commands Threads and + * Background. Note, ACPI specifies a maximum of 7 arguments (0 - 6). + * + * For the Threads command, the Number of threads, ID of current + * thread and Index of current thread inside all them created. */ char init_args; #ifdef ACPI_DEBUGGER - acpi_object_type arg_types[4]; + acpi_object_type arg_types[ACPI_METHOD_NUM_ARGS]; #endif - char *arguments[4]; + char *arguments[ACPI_METHOD_NUM_ARGS]; char num_threads_str[11]; char id_of_thread_str[11]; char index_of_thread_str[11]; diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c index 3b30319752f0..ed088fceb18d 100644 --- a/drivers/acpi/acpica/dbexec.c +++ b/drivers/acpi/acpica/dbexec.c @@ -67,6 +67,8 @@ static acpi_status acpi_db_execution_walk(acpi_handle obj_handle, u32 nesting_level, void *context, void **return_value); +static void ACPI_SYSTEM_XFACE acpi_db_single_execution_thread(void *context); + /******************************************************************************* * * FUNCTION: acpi_db_delete_objects @@ -229,7 +231,7 @@ static acpi_status acpi_db_execute_setup(struct acpi_db_method_info *info) ACPI_FUNCTION_NAME(db_execute_setup); - /* Catenate the current scope to the supplied name */ + /* Concatenate the current scope to the supplied name */ info->pathname[0] = 0; if ((info->name[0] != '\\') && (info->name[0] != '/')) { @@ -609,6 +611,112 @@ static void ACPI_SYSTEM_XFACE acpi_db_method_thread(void *context) } } +/******************************************************************************* + * + * FUNCTION: acpi_db_single_execution_thread + * + * PARAMETERS: context - Method info struct + * + * RETURN: None + * + * DESCRIPTION: Create one thread and execute a method + * + ******************************************************************************/ + +static void ACPI_SYSTEM_XFACE acpi_db_single_execution_thread(void *context) +{ + struct acpi_db_method_info *info = context; + acpi_status status; + struct acpi_buffer return_obj; + + acpi_os_printf("\n"); + + status = acpi_db_execute_method(info, &return_obj); + if (ACPI_FAILURE(status)) { + acpi_os_printf("%s During evaluation of %s\n", + acpi_format_exception(status), info->pathname); + return; + } + + /* Display a return object, if any */ + + if (return_obj.length) { + acpi_os_printf("Evaluation of %s returned object %p, " + "external buffer length %X\n", + acpi_gbl_db_method_info.pathname, + return_obj.pointer, (u32)return_obj.length); + + acpi_db_dump_external_object(return_obj.pointer, 1); + } + + acpi_os_printf("\nBackground thread completed\n%c ", + ACPI_DEBUGGER_COMMAND_PROMPT); +} + +/******************************************************************************* + * + * FUNCTION: acpi_db_create_execution_thread + * + * PARAMETERS: method_name_arg - Control method to execute + * arguments - Array of arguments to the method + * types - Corresponding array of object types + * + * RETURN: None + * + * DESCRIPTION: Create a single thread to evaluate a namespace object. Handles + * arguments passed on command line for control methods. + * + ******************************************************************************/ + +void +acpi_db_create_execution_thread(char *method_name_arg, + char **arguments, acpi_object_type *types) +{ + acpi_status status; + u32 i; + + memset(&acpi_gbl_db_method_info, 0, sizeof(struct acpi_db_method_info)); + acpi_gbl_db_method_info.name = method_name_arg; + acpi_gbl_db_method_info.init_args = 1; + acpi_gbl_db_method_info.args = acpi_gbl_db_method_info.arguments; + acpi_gbl_db_method_info.types = acpi_gbl_db_method_info.arg_types; + + /* Setup method arguments, up to 7 (0-6) */ + + for (i = 0; (i < ACPI_METHOD_NUM_ARGS) && *arguments; i++) { + acpi_gbl_db_method_info.arguments[i] = *arguments; + arguments++; + + acpi_gbl_db_method_info.arg_types[i] = *types; + types++; + } + + status = acpi_db_execute_setup(&acpi_gbl_db_method_info); + if (ACPI_FAILURE(status)) { + return; + } + + /* Get the NS node, determines existence also */ + + status = acpi_get_handle(NULL, acpi_gbl_db_method_info.pathname, + &acpi_gbl_db_method_info.method); + if (ACPI_FAILURE(status)) { + acpi_os_printf("%s Could not get handle for %s\n", + acpi_format_exception(status), + acpi_gbl_db_method_info.pathname); + return; + } + + status = acpi_os_execute(OSL_DEBUGGER_EXEC_THREAD, + acpi_db_single_execution_thread, + &acpi_gbl_db_method_info); + if (ACPI_FAILURE(status)) { + return; + } + + acpi_os_printf("\nBackground thread started\n"); +} + /******************************************************************************* * * FUNCTION: acpi_db_create_execution_threads diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c index 2626d79db064..954ca3b981a7 100644 --- a/drivers/acpi/acpica/dbinput.c +++ b/drivers/acpi/acpica/dbinput.c @@ -136,6 +136,7 @@ enum acpi_ex_debugger_commands { CMD_UNLOAD, CMD_TERMINATE, + CMD_BACKGROUND, CMD_THREADS, CMD_TEST, @@ -212,6 +213,7 @@ static const struct acpi_db_command_info acpi_gbl_db_commands[] = { {"UNLOAD", 1}, {"TERMINATE", 0}, + {"BACKGROUND", 1}, {"THREADS", 3}, {"TEST", 1}, @@ -222,9 +224,56 @@ static const struct acpi_db_command_info acpi_gbl_db_commands[] = { /* * Help for all debugger commands. First argument is the number of lines * of help to output for the command. + * + * Note: Some commands are not supported by the kernel-level version of + * the debugger. */ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = { - {0, "\nGeneral-Purpose Commands:", "\n"}, + {0, "\nNamespace Access:", "\n"}, + {1, " Businfo", "Display system bus info\n"}, + {1, " Disassemble ", "Disassemble a control method\n"}, + {1, " Find (? is wildcard)", + "Find ACPI name(s) with wildcards\n"}, + {1, " Integrity", "Validate namespace integrity\n"}, + {1, " Methods", "Display list of loaded control methods\n"}, + {1, " Namespace [Object] [Depth]", + "Display loaded namespace tree/subtree\n"}, + {1, " Notify ", "Send a notification on Object\n"}, + {1, " Objects [ObjectType]", + "Display summary of all objects or just given type\n"}, + {1, " Owner [Depth]", + "Display loaded namespace by object owner\n"}, + {1, " Paths", "Display full pathnames of namespace objects\n"}, + {1, " Predefined", "Check all predefined names\n"}, + {1, " Prefix []", "Set or Get current execution prefix\n"}, + {1, " References ", "Find all references to object at addr\n"}, + {1, " Resources [DeviceName]", + "Display Device resources (no arg = all devices)\n"}, + {1, " Set N ", "Set value for named integer\n"}, + {1, " Template ", "Format/dump a Buffer/ResourceTemplate\n"}, + {1, " Type ", "Display object type\n"}, + + {0, "\nControl Method Execution:", "\n"}, + {1, " Evaluate [Arguments]", + "Evaluate object or control method\n"}, + {1, " Execute [Arguments]", "Synonym for Evaluate\n"}, +#ifdef ACPI_APPLICATION + {1, " Background [Arguments]", + "Evaluate object/method in a separate thread\n"}, + {1, " Thread ", + "Spawn threads to execute method(s)\n"}, +#endif + {1, " Debug [Arguments]", "Single-Step a control method\n"}, + {7, " [Arguments] formats:", "Control method argument formats\n"}, + {1, " Hex Integer", "Integer\n"}, + {1, " \"Ascii String\"", "String\n"}, + {1, " (Hex Byte List)", "Buffer\n"}, + {1, " (01 42 7A BF)", "Buffer example (4 bytes)\n"}, + {1, " [Package Element List]", "Package\n"}, + {1, " [0x01 0x1234 \"string\"]", + "Package example (3 elements)\n"}, + + {0, "\nMiscellaneous:", "\n"}, {1, " Allocations", "Display list of current memory allocations\n"}, {2, " Dump
|", "\n"}, {0, " [Byte|Word|Dword|Qword]", @@ -248,46 +297,30 @@ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = { {1, " Stack", "Display CPU stack usage\n"}, {1, " Tables", "Info about current ACPI table(s)\n"}, {1, " Tables", "Display info about loaded ACPI tables\n"}, +#ifdef ACPI_APPLICATION + {1, " Terminate", "Delete namespace and all internal objects\n"}, +#endif {1, " ! ", "Execute command from history buffer\n"}, {1, " !!", "Execute last command again\n"}, - {0, "\nNamespace Access Commands:", "\n"}, - {1, " Businfo", "Display system bus info\n"}, - {1, " Disassemble ", "Disassemble a control method\n"}, - {1, " Find (? is wildcard)", - "Find ACPI name(s) with wildcards\n"}, - {1, " Integrity", "Validate namespace integrity\n"}, - {1, " Methods", "Display list of loaded control methods\n"}, - {1, " Namespace [Object] [Depth]", - "Display loaded namespace tree/subtree\n"}, - {1, " Notify ", "Send a notification on Object\n"}, - {1, " Objects [ObjectType]", - "Display summary of all objects or just given type\n"}, - {1, " Owner [Depth]", - "Display loaded namespace by object owner\n"}, - {1, " Paths", "Display full pathnames of namespace objects\n"}, - {1, " Predefined", "Check all predefined names\n"}, - {1, " Prefix []", "Set or Get current execution prefix\n"}, - {1, " References ", "Find all references to object at addr\n"}, - {1, " Resources [DeviceName]", - "Display Device resources (no arg = all devices)\n"}, - {1, " Set N ", "Set value for named integer\n"}, - {1, " Template ", "Format/dump a Buffer/ResourceTemplate\n"}, - {1, " Type ", "Display object type\n"}, + {0, "\nMethod and Namespace Debugging:", "\n"}, + {5, " Trace [] [Once]", + "Trace control method execution\n"}, + {1, " Enable", "Enable all messages\n"}, + {1, " Disable", "Disable tracing\n"}, + {1, " Method", "Enable method execution messages\n"}, + {1, " Opcode", "Enable opcode execution messages\n"}, + {3, " Test ", "Invoke a debug test\n"}, + {1, " Objects", "Read/write/compare all namespace data objects\n"}, + {1, " Predefined", + "Validate all ACPI predefined names (_STA, etc.)\n"}, + {1, " Execute predefined", + "Execute all predefined (public) methods\n"}, - {0, "\nControl Method Execution Commands:", "\n"}, + {0, "\nControl Method Single-Step Execution:", "\n"}, {1, " Arguments (or Args)", "Display method arguments\n"}, {1, " Breakpoint ", "Set an AML execution breakpoint\n"}, {1, " Call", "Run to next control method invocation\n"}, - {1, " Debug [Arguments]", "Single Step a control method\n"}, - {6, " Evaluate", "Synonym for Execute\n"}, - {5, " Execute [Arguments]", "Execute control method\n"}, - {1, " Hex Integer", "Integer method argument\n"}, - {1, " \"Ascii String\"", "String method argument\n"}, - {1, " (Hex Byte List)", "Buffer method argument\n"}, - {1, " [Package Element List]", "Package method argument\n"}, - {5, " Execute predefined", - "Execute all predefined (public) methods\n"}, {1, " Go", "Allow method to run to completion\n"}, {1, " Information", "Display info about the current method\n"}, {1, " Into", "Step into (not over) a method call\n"}, @@ -296,41 +329,24 @@ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = { {1, " Results", "Display method result stack\n"}, {1, " Set <#> ", "Set method data (Arguments/Locals)\n"}, {1, " Stop", "Terminate control method\n"}, - {5, " Trace [] [Once]", - "Trace control method execution\n"}, - {1, " Enable", "Enable all messages\n"}, - {1, " Disable", "Disable tracing\n"}, - {1, " Method", "Enable method execution messages\n"}, - {1, " Opcode", "Enable opcode execution messages\n"}, {1, " Tree", "Display control method calling tree\n"}, {1, " ", "Single step next AML opcode (over calls)\n"}, #ifdef ACPI_APPLICATION - {0, "\nHardware Simulation Commands:", "\n"}, - {1, " EnableAcpi", "Enable ACPI (hardware) mode\n"}, - {1, " Event ", "Generate AcpiEvent (Fixed/GPE)\n"}, - {1, " Gpe [GpeBlockDevice]", "Simulate a GPE\n"}, - {1, " Gpes", "Display info on all GPE devices\n"}, - {1, " Sci", "Generate an SCI\n"}, - {1, " Sleep [SleepState]", "Simulate sleep/wake sequence(s) (0-5)\n"}, - - {0, "\nFile I/O Commands:", "\n"}, + {0, "\nFile Operations:", "\n"}, {1, " Close", "Close debug output file\n"}, {1, " Load ", "Load ACPI table from a file\n"}, {1, " Open ", "Open a file for debug output\n"}, {1, " Unload ", "Unload an ACPI table via namespace object\n"}, - {0, "\nUser Space Commands:", "\n"}, - {1, " Terminate", "Delete namespace and all internal objects\n"}, - {1, " Thread ", - "Spawn threads to execute method(s)\n"}, - - {0, "\nDebug Test Commands:", "\n"}, - {3, " Test ", "Invoke a debug test\n"}, - {1, " Objects", "Read/write/compare all namespace data objects\n"}, - {1, " Predefined", - "Execute all ACPI predefined names (_STA, etc.)\n"}, + {0, "\nHardware Simulation:", "\n"}, + {1, " EnableAcpi", "Enable ACPI (hardware) mode\n"}, + {1, " Event ", "Generate AcpiEvent (Fixed/GPE)\n"}, + {1, " Gpe [GpeBlockDevice]", "Simulate a GPE\n"}, + {1, " Gpes", "Display info on all GPE devices\n"}, + {1, " Sci", "Generate an SCI\n"}, + {1, " Sleep [SleepState]", "Simulate sleep/wake sequence(s) (0-5)\n"}, #endif {0, NULL, NULL} }; @@ -442,11 +458,15 @@ static void acpi_db_display_help(char *command) /* No argument to help, display help for all commands */ + acpi_os_printf("\nSummary of AML Debugger Commands\n\n"); + while (next->invocation) { acpi_os_printf("%-38s%s", next->invocation, next->description); next++; } + acpi_os_printf("\n"); + } else { /* Display help for all commands that match the subtring */ @@ -1087,6 +1107,13 @@ acpi_db_command_dispatch(char *input_buffer, /* acpi_initialize (NULL); */ break; + case CMD_BACKGROUND: + + acpi_db_create_execution_thread(acpi_gbl_db_args[1], + &acpi_gbl_db_args[2], + &acpi_gbl_db_arg_types[2]); + break; + case CMD_THREADS: acpi_db_create_execution_threads(acpi_gbl_db_args[1], -- cgit v1.2.3 From 692aa0cf5d459dfb747d8bbf8bee20afbf1fa746 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:27 -0800 Subject: ACPICA: Update mutex error messages, no functional change ACPICA commit 22f2b0beb45d277841ed02bc613df1009e5b20cf Add mutex name on certain acquire/release errors. Link: https://github.com/acpica/acpica/commit/22f2b0be Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utmutex.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 586354788018..524ba931d5e8 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -286,8 +286,9 @@ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id) acpi_gbl_mutex_info[mutex_id].thread_id = this_thread_id; } else { ACPI_EXCEPTION((AE_INFO, status, - "Thread %u could not acquire Mutex [0x%X]", - (u32)this_thread_id, mutex_id)); + "Thread %u could not acquire Mutex [%s] (0x%X)", + (u32)this_thread_id, + acpi_ut_get_mutex_name(mutex_id), mutex_id)); } return (status); @@ -322,8 +323,8 @@ acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id) */ if (acpi_gbl_mutex_info[mutex_id].thread_id == ACPI_MUTEX_NOT_ACQUIRED) { ACPI_ERROR((AE_INFO, - "Mutex [0x%X] is not acquired, cannot release", - mutex_id)); + "Mutex [%s] (0x%X) is not acquired, cannot release", + acpi_ut_get_mutex_name(mutex_id), mutex_id)); return (AE_NOT_ACQUIRED); } -- cgit v1.2.3 From 3e1dc644aa2fe816e6d91459dfd2e383e66be8c7 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:28 -0800 Subject: ACPICA: Update linkage for get mutex name interface ACPICA commit cb9a3906ec35da7a6e0b8972f8e6e7895e59c208 Always enable the function. Link: https://github.com/acpica/acpica/commit/cb9a3906 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acutils.h | 4 ++-- drivers/acpi/acpica/utdecode.c | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index d38187ee11c8..8bb46d8623ca 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -230,10 +230,10 @@ u64 acpi_ut_implicit_strtoul64(char *string); */ acpi_status acpi_ut_init_globals(void); -#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER) - const char *acpi_ut_get_mutex_name(u32 mutex_id); +#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER) + const char *acpi_ut_get_notify_name(u32 notify_value, acpi_object_type type); #endif diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c index 02cd2c2d961a..55debbad487d 100644 --- a/drivers/acpi/acpica/utdecode.c +++ b/drivers/acpi/acpica/utdecode.c @@ -395,11 +395,6 @@ const char *acpi_ut_get_reference_name(union acpi_operand_object *object) return (acpi_gbl_ref_class_names[object->reference.class]); } -#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER) -/* - * Strings and procedures used for debug only - */ - /******************************************************************************* * * FUNCTION: acpi_ut_get_mutex_name @@ -433,6 +428,12 @@ const char *acpi_ut_get_mutex_name(u32 mutex_id) return (acpi_gbl_mutex_names[mutex_id]); } +#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER) + +/* + * Strings and procedures used for debug only + */ + /******************************************************************************* * * FUNCTION: acpi_ut_get_notify_name -- cgit v1.2.3 From e3860b5ecff12770a45bd11c8d4fb8ac0fffcce1 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 17 Nov 2017 15:42:29 -0800 Subject: ACPICA: ACPI 6.2: Additional PPTT flags ACPICA commit fba3ae99b2bc514ca34f0d7b2609c2a043582784 The ACPI 6.2 spec has flags to describe cache allocation, write back, and whether it is an instruction, data or unified cache. Link: https://github.com/acpica/acpica/commit/fba3ae99 Signed-off-by: Jeremy Linton Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/actbl1.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 178661f1b896..4c304bf4d591 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -1441,6 +1441,20 @@ struct acpi_pptt_cache { #define ACPI_PPTT_MASK_CACHE_TYPE (0x0C) /* Cache type */ #define ACPI_PPTT_MASK_WRITE_POLICY (0x10) /* Write policy */ +/* Attributes describing cache */ +#define ACPI_PPTT_CACHE_READ_ALLOCATE (0x0) /* Cache line is allocated on read */ +#define ACPI_PPTT_CACHE_WRITE_ALLOCATE (0x01) /* Cache line is allocated on write */ +#define ACPI_PPTT_CACHE_RW_ALLOCATE (0x02) /* Cache line is allocated on read and write */ +#define ACPI_PPTT_CACHE_RW_ALLOCATE_ALT (0x03) /* Alternate representation of above */ + +#define ACPI_PPTT_CACHE_TYPE_DATA (0x0) /* Data cache */ +#define ACPI_PPTT_CACHE_TYPE_INSTR (1<<2) /* Instruction cache */ +#define ACPI_PPTT_CACHE_TYPE_UNIFIED (2<<2) /* Unified I & D cache */ +#define ACPI_PPTT_CACHE_TYPE_UNIFIED_ALT (3<<2) /* Alternate representation of above */ + +#define ACPI_PPTT_CACHE_POLICY_WB (0x0) /* Cache is write back */ +#define ACPI_PPTT_CACHE_POLICY_WT (1<<4) /* Cache is write through */ + /* 2: ID Structure */ struct acpi_pptt_id { -- cgit v1.2.3 From fb969d160cac2074f8eeefdc073e4797885839e7 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Fri, 17 Nov 2017 15:42:30 -0800 Subject: ACPICA: Update version to 20171110 ACPICA commit d0ef5a72ca7c01acbf590e4556d19edf69a5b5c4 Version 20171110. Link: https://github.com/acpica/acpica/commit/d0ef5a72 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f25668cce946..e02610adc07d 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20170929 +#define ACPI_CA_VERSION 0x20171110 #include #include -- cgit v1.2.3 From 0d4b54c6fee87ff60b0bc1007ca487449698468d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Nov 2017 15:31:49 +0100 Subject: PM / core: Add LEAVE_SUSPENDED driver flag Define and document a new driver flag, DPM_FLAG_LEAVE_SUSPENDED, to instruct the PM core and middle-layer (bus type, PM domain, etc.) code that it is desirable to leave the device in runtime suspend after system-wide transitions to the working state (for example, the device may be slow to resume and it may be better to avoid resuming it right away). Generally, the middle-layer code involved in the handling of the device is expected to indicate to the PM core whether or not the device may be left in suspend with the help of the device's power.may_skip_resume status bit. That has to happen in the "noirq" phase of the preceding system suspend (or analogous) transition. The middle layer is then responsible for handling the device as appropriate in its "noirq" resume callback which is executed regardless of whether or not the device may be left suspended, but the other resume callbacks (except for ->complete) will be skipped automatically by the core if the device really can be left in suspend. The additional power.must_resume status bit introduced for the implementation of this mechanisn is used internally by the PM core to track the requirement to resume the device (which may depend on its children etc). Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Ulf Hansson --- Documentation/driver-api/pm/devices.rst | 27 +++++++++++- drivers/base/power/main.c | 73 ++++++++++++++++++++++++++++++--- include/linux/pm.h | 16 ++++++-- 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 53c1b0b06da5..b0fe63c91f8d 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -788,6 +788,29 @@ must reflect the "active" status for runtime PM in that case. During system-wide resume from a sleep state it's easiest to put devices into the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`. -Refer to that document for more information regarding this particular issue as +[Refer to that document for more information regarding this particular issue as well as for information on the device runtime power management framework in -general. +general.] + +However, it often is desirable to leave devices in suspend after system +transitions to the working state, especially if those devices had been in +runtime suspend before the preceding system-wide suspend (or analogous) +transition. Device drivers can use the ``DPM_FLAG_LEAVE_SUSPENDED`` flag to +indicate to the PM core (and middle-layer code) that they prefer the specific +devices handled by them to be left suspended and they have no problems with +skipping their system-wide resume callbacks for this reason. Whether or not the +devices will actually be left in suspend may depend on their state before the +given system suspend-resume cycle and on the type of the system transition under +way. In particular, devices are not left suspended if that transition is a +restore from hibernation, as device states are not guaranteed to be reflected +by the information stored in the hibernation image in that case. + +The middle-layer code involved in the handling of the device is expected to +indicate to the PM core if the device may be left in suspend by setting its +:c:member:`power.may_skip_resume` status bit which is checked by the PM core +during the "noirq" phase of the preceding system-wide suspend (or analogous) +transition. The middle layer is then responsible for handling the device as +appropriate in its "noirq" resume callback, which is executed regardless of +whether or not the device is left suspended, but the other resume callbacks +(except for ``->complete``) will be skipped automatically by the PM core if the +device really can be left in suspend. diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index db2f04415927..73ec6796d9e1 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -525,6 +525,18 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) /*------------------------- Resume routines -------------------------*/ +/** + * dev_pm_may_skip_resume - System-wide device resume optimization check. + * @dev: Target device. + * + * Checks whether or not the device may be left in suspend after a system-wide + * transition to the working state. + */ +bool dev_pm_may_skip_resume(struct device *dev) +{ + return !dev->power.must_resume && pm_transition.event != PM_EVENT_RESTORE; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -573,6 +585,19 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn error = dpm_run_callback(callback, dev, state, info); dev->power.is_noirq_suspended = false; + if (dev_pm_may_skip_resume(dev)) { + /* + * The device is going to be left in suspend, but it might not + * have been in runtime suspend before the system suspended, so + * its runtime PM status needs to be updated to avoid confusing + * the runtime PM framework when runtime PM is enabled for the + * device again. + */ + pm_runtime_set_suspended(dev); + dev->power.is_late_suspended = false; + dev->power.is_suspended = false; + } + Out: complete_all(&dev->power.completion); TRACE_RESUME(error); @@ -1074,6 +1099,22 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } +static void dpm_superior_set_must_resume(struct device *dev) +{ + struct device_link *link; + int idx; + + if (dev->parent) + dev->parent->power.must_resume = true; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + link->supplier->power.must_resume = true; + + device_links_read_unlock(idx); +} + /** * __device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. @@ -1125,10 +1166,28 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a } error = dpm_run_callback(callback, dev, state, info); - if (!error) - dev->power.is_noirq_suspended = true; - else + if (error) { async_error = error; + goto Complete; + } + + dev->power.is_noirq_suspended = true; + + if (dev_pm_test_driver_flags(dev, DPM_FLAG_LEAVE_SUSPENDED)) { + /* + * The only safe strategy here is to require that if the device + * may not be left in suspend, resume callbacks must be invoked + * for it. + */ + dev->power.must_resume = dev->power.must_resume || + !dev->power.may_skip_resume || + atomic_read(&dev->power.usage_count) > 1; + } else { + dev->power.must_resume = true; + } + + if (dev->power.must_resume) + dpm_superior_set_must_resume(dev); Complete: complete_all(&dev->power.completion); @@ -1485,6 +1544,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dev->power.direct_complete = false; } + dev->power.may_skip_resume = false; + dev->power.must_resume = false; + dpm_watchdog_set(&wd, dev); device_lock(dev); @@ -1650,8 +1712,9 @@ static int device_prepare(struct device *dev, pm_message_t state) if (dev->power.syscore) return 0; - WARN_ON(dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) && - !pm_runtime_enabled(dev)); + WARN_ON(!pm_runtime_enabled(dev) && + dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_LEAVE_SUSPENDED)); /* * If a device's parent goes into runtime suspend at the wrong time, diff --git a/include/linux/pm.h b/include/linux/pm.h index 65d39115f06d..b5a40b713e9e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -556,9 +556,10 @@ struct pm_subsys_data { * These flags can be set by device drivers at the probe time. They need not be * cleared by the drivers as the driver core will take care of that. * - * NEVER_SKIP: Do not skip system suspend/resume callbacks for the device. + * NEVER_SKIP: Do not skip all system suspend/resume callbacks for the device. * SMART_PREPARE: Check the return value of the driver's ->prepare callback. * SMART_SUSPEND: No need to resume the device from runtime suspend. + * LEAVE_SUSPENDED: Avoid resuming the device during system resume if possible. * * Setting SMART_PREPARE instructs bus types and PM domains which may want * system suspend/resume callbacks to be skipped for the device to return 0 from @@ -572,10 +573,14 @@ struct pm_subsys_data { * necessary from the driver's perspective. It also may cause them to skip * invocations of the ->suspend_late and ->suspend_noirq callbacks provided by * the driver if they decide to leave the device in runtime suspend. + * + * Setting LEAVE_SUSPENDED informs the PM core and middle-layer code that the + * driver prefers the device to be left in suspend after system resume. */ -#define DPM_FLAG_NEVER_SKIP BIT(0) -#define DPM_FLAG_SMART_PREPARE BIT(1) -#define DPM_FLAG_SMART_SUSPEND BIT(2) +#define DPM_FLAG_NEVER_SKIP BIT(0) +#define DPM_FLAG_SMART_PREPARE BIT(1) +#define DPM_FLAG_SMART_SUSPEND BIT(2) +#define DPM_FLAG_LEAVE_SUSPENDED BIT(3) struct dev_pm_info { pm_message_t power_state; @@ -597,6 +602,8 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ + unsigned int must_resume:1; /* Owned by the PM core */ + unsigned int may_skip_resume:1; /* Set by subsystems */ #else unsigned int should_wakeup:1; #endif @@ -765,6 +772,7 @@ extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); +extern bool dev_pm_may_skip_resume(struct device *dev); extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); #else /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From bd755d770ac78e8eeda05877ba66cc66f151e10e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Nov 2017 15:33:52 +0100 Subject: PCI / PM: Support for LEAVE_SUSPENDED driver flag Add support for DPM_FLAG_LEAVE_SUSPENDED to the PCI bus type by making it (a) set the power.may_skip_resume status bit for devices that, from its perspective, may be left in suspend after system wakeup from sleep and (b) return early from pci_pm_resume_noirq() for devices whose remaining resume callbacks during the transition under way are going to be skipped by the PM core. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas --- Documentation/power/pci.txt | 11 +++++++++++ drivers/pci/pci-driver.c | 19 +++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 704cd36079b8..8eaf9ee24d43 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -994,6 +994,17 @@ into D0 going forward), but if it is in runtime suspend in pci_pm_thaw_noirq(), the function will set the power.direct_complete flag for it (to make the PM core skip the subsequent "thaw" callbacks for it) and return. +Setting the DPM_FLAG_LEAVE_SUSPENDED flag means that the driver prefers the +device to be left in suspend after system-wide transitions to the working state. +This flag is checked by the PM core, but the PCI bus type informs the PM core +which devices may be left in suspend from its perspective (that happens during +the "noirq" phase of system-wide suspend and analogous transitions) and next it +uses the dev_pm_may_skip_resume() helper to decide whether or not to return from +pci_pm_resume_noirq() early, as the PM core will skip the remaining resume +callbacks for the device during the transition under way and will set its +runtime PM status to "suspended" if dev_pm_may_skip_resume() returns "true" for +it. + 3.2. Device Runtime Power Management ------------------------------------ In addition to providing device power management callbacks PCI device drivers diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 7f47bb72bf30..3cf2da22acf2 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -699,7 +699,7 @@ static void pci_pm_complete(struct device *dev) pm_generic_complete(dev); /* Resume device if platform firmware has put it in reset-power-on */ - if (dev->power.direct_complete && pm_resume_via_firmware()) { + if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) { pci_power_t pre_sleep_state = pci_dev->current_state; pci_update_current_state(pci_dev, pci_dev->current_state); @@ -783,8 +783,10 @@ static int pci_pm_suspend_noirq(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - if (dev_pm_smart_suspend_and_suspended(dev)) + if (dev_pm_smart_suspend_and_suspended(dev)) { + dev->power.may_skip_resume = true; return 0; + } if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); @@ -838,6 +840,16 @@ static int pci_pm_suspend_noirq(struct device *dev) Fixup: pci_fixup_device(pci_fixup_suspend_late, pci_dev); + /* + * If the target system sleep state is suspend-to-idle, it is sufficient + * to check whether or not the device's wakeup settings are good for + * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause + * pci_pm_complete() to take care of fixing up the device's state + * anyway, if need be. + */ + dev->power.may_skip_resume = device_may_wakeup(dev) || + !device_can_wakeup(dev); + return 0; } @@ -847,6 +859,9 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (dev_pm_may_skip_resume(dev)) + return 0; + /* * Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend * during system suspend, so update their runtime PM status to "active" -- cgit v1.2.3 From db68daff90ef79761cc0bba16f775b6027ea3a83 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Nov 2017 15:35:00 +0100 Subject: ACPI / PM: Support for LEAVE_SUSPENDED driver flag in ACPI PM domain Add support for DPM_FLAG_LEAVE_SUSPENDED to the ACPI PM domain by making it (a) set the power.may_skip_resume status bit for devices that, from its perspective, may be left in suspend after system wakeup from sleep and (b) return early from acpi_subsys_resume_noirq() for devices whose remaining resume callbacks during the transition under way are going to be skipped by the PM core. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/acpi/device_pm.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e4ffaeec9ec2..5cfe794c36bd 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -990,7 +990,7 @@ void acpi_subsys_complete(struct device *dev) * the sleep state it is going out of and it has never been resumed till * now, resume it in case the firmware powered it up. */ - if (dev->power.direct_complete && pm_resume_via_firmware()) + if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) pm_request_resume(dev); } EXPORT_SYMBOL_GPL(acpi_subsys_complete); @@ -1039,10 +1039,28 @@ EXPORT_SYMBOL_GPL(acpi_subsys_suspend_late); */ int acpi_subsys_suspend_noirq(struct device *dev) { - if (dev_pm_smart_suspend_and_suspended(dev)) + int ret; + + if (dev_pm_smart_suspend_and_suspended(dev)) { + dev->power.may_skip_resume = true; return 0; + } + + ret = pm_generic_suspend_noirq(dev); + if (ret) + return ret; + + /* + * If the target system sleep state is suspend-to-idle, it is sufficient + * to check whether or not the device's wakeup settings are good for + * runtime PM. Otherwise, the pm_resume_via_firmware() check will cause + * acpi_subsys_complete() to take care of fixing up the device's state + * anyway, if need be. + */ + dev->power.may_skip_resume = device_may_wakeup(dev) || + !device_can_wakeup(dev); - return pm_generic_suspend_noirq(dev); + return 0; } EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq); @@ -1052,6 +1070,9 @@ EXPORT_SYMBOL_GPL(acpi_subsys_suspend_noirq); */ int acpi_subsys_resume_noirq(struct device *dev) { + if (dev_pm_may_skip_resume(dev)) + return 0; + /* * Devices with DPM_FLAG_SMART_SUSPEND may be left in runtime suspend * during system suspend, so update their runtime PM status to "active" -- cgit v1.2.3 From 57044031b0cb11325e1034394a4721484f9dc9fe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Nov 2017 02:16:55 +0100 Subject: ACPI / PM: Make it possible to ignore the system sleep blacklist The ACPI code supporting system transitions to sleep states uses an internal blacklist to apply special handling to some machines reported to behave incorrectly in some ways. However, some entries of that blacklist cover problematic as well as non-problematic systems, so give the users of the latter a chance to ignore the blacklist and run their systems in the default way by adding acpi_sleep=nobl to the kernel command line. For example, that allows the users of Dell XPS13 9360 systems not affected by the issue that caused the blacklist entry for this machine to be added by commit 71630b7a832f (ACPI / PM: Blacklist Low Power S0 Idle _DSM for Dell XPS13 9360) to use suspend-to-idle with the Low Power S0 Idle _DSM interface which in principle should be more energy-efficient than S3 on them. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 5 ++++- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep.c | 10 ++++++++++ include/linux/acpi.h | 1 + 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..b125690d5dbc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -223,7 +223,7 @@ acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, nonvs, sci_force_enable } + old_ordering, nonvs, sci_force_enable, nobl } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -239,6 +239,9 @@ sci_force_enable causes the kernel to set SCI_EN directly on resume from S1/S3 (which is against the ACPI spec, but some broken systems don't work without it). + nobl causes the internal blacklist of systems known to + behave incorrectly in some ways with respect to system + suspend and resume to be ignored (use wisely). acpi_use_timer_override [HW,ACPI] Use timer override. For some broken Nvidia NF5 boards diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 7188aea91549..f1915b744052 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -138,6 +138,8 @@ static int __init acpi_sleep_setup(char *str) acpi_nvs_nosave_s3(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); + if (strncmp(str, "nobl", 4) == 0) + acpi_sleep_no_blacklist(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 8082871b409a..15cd862a87c2 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -367,10 +367,20 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { {}, }; +static bool ignore_blacklist; + +void __init acpi_sleep_no_blacklist(void) +{ + ignore_blacklist = true; +} + static void __init acpi_sleep_dmi_check(void) { int year; + if (ignore_blacklist) + return; + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year >= 2012) acpi_nvs_nosave_s3(); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index dc1ebfeeb5ec..699655a9618b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -451,6 +451,7 @@ void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); +void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From bdfe4cebea11476d278b1b98dd0f7cdac8269d62 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 10 Nov 2017 17:26:54 +0800 Subject: arm64: allwinner: a64: add Ethernet PHY regulator for several boards On several A64 boards the Ethernet PHY is powered by the DC1SW regulator on the AXP803 PMIC. Add phy-handle property to these boards' emac node. Signed-off-by: Icenowy Zheng Acked-by: Corentin LABBE Tested-by: Corentin LABBE Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 1 + arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts | 1 + arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 45bdbfb96126..4a8d3f83a36e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -75,6 +75,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 806442d3e846..604cdaedac38 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -77,6 +77,7 @@ pinctrl-0 = <&rmii_pins>; phy-mode = "rmii"; phy-handle = <&ext_rmii_phy1>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 0eb2acedf8c3..a053a6ac5267 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -82,6 +82,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; -- cgit v1.2.3 From 544e92581a2ac44607d7cc602c6b54d18656f56d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 13 Nov 2017 16:12:06 +0000 Subject: EDAC, octeon: Fix an uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an uninitialized variable warning in the Octeon EDAC driver, as seen in MIPS cavium_octeon_defconfig builds since v4.14 with Codescape GNU Tools 2016.05-03: drivers/edac/octeon_edac-lmc.c In function ‘octeon_lmc_edac_poll_o2’: drivers/edac/octeon_edac-lmc.c:87:24: warning: ‘((long unsigned int*)&int_reg)[1]’ may \ be used uninitialized in this function [-Wmaybe-uninitialized] if (int_reg.s.sec_err || int_reg.s.ded_err) { ^ Iinitialise the whole int_reg variable to zero before the conditional assignments in the error injection case. Signed-off-by: James Hogan Acked-by: David Daney Cc: linux-edac Cc: linux-mips@linux-mips.org Cc: # 3.15+ Fixes: 1bc021e81565 ("EDAC: Octeon: Add error injection support") Link: http://lkml.kernel.org/r/20171113161206.20990-1-james.hogan@mips.com Signed-off-by: Borislav Petkov --- drivers/edac/octeon_edac-lmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/octeon_edac-lmc.c b/drivers/edac/octeon_edac-lmc.c index 9c1ffe3e912b..aeb222ca3ed1 100644 --- a/drivers/edac/octeon_edac-lmc.c +++ b/drivers/edac/octeon_edac-lmc.c @@ -78,6 +78,7 @@ static void octeon_lmc_edac_poll_o2(struct mem_ctl_info *mci) if (!pvt->inject) int_reg.u64 = cvmx_read_csr(CVMX_LMCX_INT(mci->mc_idx)); else { + int_reg.u64 = 0; if (pvt->error_type == 1) int_reg.s.sec_err = 1; if (pvt->error_type == 2) -- cgit v1.2.3 From 86a18ee21e5eecf56ca02aec24807ffa87bb57b6 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 13 Nov 2017 15:08:10 +0200 Subject: EDAC, ti: Add support for TI keystone and DRA7xx EDAC TI Keystone and DRA7xx SoCs have support for EDAC on DDR3 memory that can correct one bit errors and detect two bit errors. Add EDAC driver for this feature which plugs into the generic kernel EDAC framework. Signed-off-by: Tero Kristo Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Cc: linux-omap@vger.kernel.org Link: http://lkml.kernel.org/r/1510578490-14510-1-git-send-email-t-kristo@ti.com [ Add SPDX tag and make _emif_get_id() use edac_printk(). ] Signed-off-by: Borislav Petkov --- MAINTAINERS | 6 + drivers/edac/Kconfig | 7 + drivers/edac/Makefile | 1 + drivers/edac/ti_edac.c | 341 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 355 insertions(+) create mode 100644 drivers/edac/ti_edac.c diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..27abd1524042 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5140,6 +5140,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/skx_edac.c +EDAC-TI +M: Tero Kristo +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/ti_edac.c + EDIROL UA-101/UA-1000 DRIVER M: Clemens Ladisch L: alsa-devel@alsa-project.org (moderated for non-subscribers) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 96afb2aeed18..3c4017007647 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -457,4 +457,11 @@ config EDAC_XGENE Support for error detection and correction on the APM X-Gene family of SOCs. +config EDAC_TI + tristate "Texas Instruments DDR3 ECC Controller" + depends on ARCH_KEYSTONE || SOC_DRA7XX + help + Support for error detection and correction on the + TI SoCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 0fd9ffa63299..b54912eb39af 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -78,3 +78,4 @@ obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o +obj-$(CONFIG_EDAC_TI) += ti_edac.o diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c new file mode 100644 index 000000000000..6ac26d1b929f --- /dev/null +++ b/drivers/edac/ti_edac.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * + * Texas Instruments DDR3 ECC error correction and detection driver + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "edac_module.h" + +/* EMIF controller registers */ +#define EMIF_SDRAM_CONFIG 0x008 +#define EMIF_IRQ_STATUS 0x0ac +#define EMIF_IRQ_ENABLE_SET 0x0b4 +#define EMIF_ECC_CTRL 0x110 +#define EMIF_1B_ECC_ERR_CNT 0x130 +#define EMIF_1B_ECC_ERR_THRSH 0x134 +#define EMIF_1B_ECC_ERR_ADDR_LOG 0x13c +#define EMIF_2B_ECC_ERR_ADDR_LOG 0x140 + +/* Bit definitions for EMIF_SDRAM_CONFIG */ +#define SDRAM_TYPE_SHIFT 29 +#define SDRAM_TYPE_MASK GENMASK(31, 29) +#define SDRAM_TYPE_DDR3 (3 << SDRAM_TYPE_SHIFT) +#define SDRAM_TYPE_DDR2 (2 << SDRAM_TYPE_SHIFT) +#define SDRAM_NARROW_MODE_MASK GENMASK(15, 14) +#define SDRAM_K2_NARROW_MODE_SHIFT 12 +#define SDRAM_K2_NARROW_MODE_MASK GENMASK(13, 12) +#define SDRAM_ROWSIZE_SHIFT 7 +#define SDRAM_ROWSIZE_MASK GENMASK(9, 7) +#define SDRAM_IBANK_SHIFT 4 +#define SDRAM_IBANK_MASK GENMASK(6, 4) +#define SDRAM_K2_IBANK_SHIFT 5 +#define SDRAM_K2_IBANK_MASK GENMASK(6, 5) +#define SDRAM_K2_EBANK_SHIFT 3 +#define SDRAM_K2_EBANK_MASK BIT(SDRAM_K2_EBANK_SHIFT) +#define SDRAM_PAGESIZE_SHIFT 0 +#define SDRAM_PAGESIZE_MASK GENMASK(2, 0) +#define SDRAM_K2_PAGESIZE_SHIFT 0 +#define SDRAM_K2_PAGESIZE_MASK GENMASK(1, 0) + +#define EMIF_1B_ECC_ERR_THRSH_SHIFT 24 + +/* IRQ bit definitions */ +#define EMIF_1B_ECC_ERR BIT(5) +#define EMIF_2B_ECC_ERR BIT(4) +#define EMIF_WR_ECC_ERR BIT(3) +#define EMIF_SYS_ERR BIT(0) +/* Bit 31 enables ECC and 28 enables RMW */ +#define ECC_ENABLED (BIT(31) | BIT(28)) + +#define EDAC_MOD_NAME "ti-emif-edac" + +enum { + EMIF_TYPE_DRA7, + EMIF_TYPE_K2 +}; + +struct ti_edac { + void __iomem *reg; +}; + +static u32 ti_edac_readl(struct ti_edac *edac, u16 offset) +{ + return readl_relaxed(edac->reg + offset); +} + +static void ti_edac_writel(struct ti_edac *edac, u32 val, u16 offset) +{ + writel_relaxed(val, edac->reg + offset); +} + +static irqreturn_t ti_edac_isr(int irq, void *data) +{ + struct mem_ctl_info *mci = data; + struct ti_edac *edac = mci->pvt_info; + u32 irq_status; + u32 err_addr; + int err_count; + + irq_status = ti_edac_readl(edac, EMIF_IRQ_STATUS); + + if (irq_status & EMIF_1B_ECC_ERR) { + err_addr = ti_edac_readl(edac, EMIF_1B_ECC_ERR_ADDR_LOG); + err_count = ti_edac_readl(edac, EMIF_1B_ECC_ERR_CNT); + ti_edac_writel(edac, err_count, EMIF_1B_ECC_ERR_CNT); + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, -1, 0, 0, 0, + mci->ctl_name, "1B"); + } + + if (irq_status & EMIF_2B_ECC_ERR) { + err_addr = ti_edac_readl(edac, EMIF_2B_ECC_ERR_ADDR_LOG); + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + err_addr >> PAGE_SHIFT, + err_addr & ~PAGE_MASK, -1, 0, 0, 0, + mci->ctl_name, "2B"); + } + + if (irq_status & EMIF_WR_ECC_ERR) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + 0, 0, -1, 0, 0, 0, + mci->ctl_name, "WR"); + + ti_edac_writel(edac, irq_status, EMIF_IRQ_STATUS); + + return IRQ_HANDLED; +} + +static void ti_edac_setup_dimm(struct mem_ctl_info *mci, u32 type) +{ + struct dimm_info *dimm; + struct ti_edac *edac = mci->pvt_info; + int bits; + u32 val; + u32 memsize; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, 0, 0, 0); + + val = ti_edac_readl(edac, EMIF_SDRAM_CONFIG); + + if (type == EMIF_TYPE_DRA7) { + bits = ((val & SDRAM_PAGESIZE_MASK) >> SDRAM_PAGESIZE_SHIFT) + 8; + bits += ((val & SDRAM_ROWSIZE_MASK) >> SDRAM_ROWSIZE_SHIFT) + 9; + bits += (val & SDRAM_IBANK_MASK) >> SDRAM_IBANK_SHIFT; + + if (val & SDRAM_NARROW_MODE_MASK) { + bits++; + dimm->dtype = DEV_X16; + } else { + bits += 2; + dimm->dtype = DEV_X32; + } + } else { + bits = 16; + bits += ((val & SDRAM_K2_PAGESIZE_MASK) >> + SDRAM_K2_PAGESIZE_SHIFT) + 8; + bits += (val & SDRAM_K2_IBANK_MASK) >> SDRAM_K2_IBANK_SHIFT; + bits += (val & SDRAM_K2_EBANK_MASK) >> SDRAM_K2_EBANK_SHIFT; + + val = (val & SDRAM_K2_NARROW_MODE_MASK) >> + SDRAM_K2_NARROW_MODE_SHIFT; + switch (val) { + case 0: + bits += 3; + dimm->dtype = DEV_X64; + break; + case 1: + bits += 2; + dimm->dtype = DEV_X32; + break; + case 2: + bits++; + dimm->dtype = DEV_X16; + break; + } + } + + memsize = 1 << bits; + + dimm->nr_pages = memsize >> PAGE_SHIFT; + dimm->grain = 4; + if ((val & SDRAM_TYPE_MASK) == SDRAM_TYPE_DDR2) + dimm->mtype = MEM_DDR2; + else + dimm->mtype = MEM_DDR3; + + val = ti_edac_readl(edac, EMIF_ECC_CTRL); + if (val & ECC_ENABLED) + dimm->edac_mode = EDAC_SECDED; + else + dimm->edac_mode = EDAC_NONE; +} + +static const struct of_device_id ti_edac_of_match[] = { + { .compatible = "ti,emif-keystone", .data = (void *)EMIF_TYPE_K2 }, + { .compatible = "ti,emif-dra7xx", .data = (void *)EMIF_TYPE_DRA7 }, + {}, +}; + +static int _emif_get_id(struct device_node *node) +{ + struct device_node *np; + const __be32 *addrp; + u32 addr, my_addr; + int my_id = 0; + + addrp = of_get_address(node, 0, NULL, NULL); + my_addr = (u32)of_translate_address(node, addrp); + + for_each_matching_node(np, ti_edac_of_match) { + if (np == node) + continue; + + addrp = of_get_address(np, 0, NULL, NULL); + addr = (u32)of_translate_address(np, addrp); + + edac_printk(KERN_INFO, EDAC_MOD_NAME, + "addr=%x, my_addr=%x\n", + addr, my_addr); + + if (addr < my_addr) + my_id++; + } + + return my_id; +} + +static int ti_edac_probe(struct platform_device *pdev) +{ + int error_irq = 0, ret = -ENODEV; + struct device *dev = &pdev->dev; + struct resource *res; + void __iomem *reg; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + const struct of_device_id *id; + struct ti_edac *edac; + int emif_id; + + id = of_match_device(ti_edac_of_match, &pdev->dev); + if (!id) + return -ENODEV; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(dev, res); + if (IS_ERR(reg)) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "EMIF controller regs not defined\n"); + return PTR_ERR(reg); + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = 1; + + /* Allocate ID number for our EMIF controller */ + emif_id = _emif_get_id(pdev->dev.of_node); + if (emif_id < 0) + return -EINVAL; + + mci = edac_mc_alloc(emif_id, 1, layers, sizeof(*edac)); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + edac = mci->pvt_info; + edac->reg = reg; + platform_set_drvdata(pdev, mci); + + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR2; + mci->edac_ctl_cap = EDAC_FLAG_SECDED | EDAC_FLAG_NONE; + mci->mod_name = EDAC_MOD_NAME; + mci->ctl_name = id->compatible; + mci->dev_name = dev_name(&pdev->dev); + + /* Setup memory layout */ + ti_edac_setup_dimm(mci, (u32)(id->data)); + + /* add EMIF ECC error handler */ + error_irq = platform_get_irq(pdev, 0); + if (!error_irq) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "EMIF irq number not defined.\n"); + goto err; + } + + ret = devm_request_irq(dev, error_irq, ti_edac_isr, 0, + "emif-edac-irq", mci); + if (ret) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "request_irq fail for EMIF EDAC irq\n"); + goto err; + } + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_printk(KERN_ERR, EDAC_MOD_NAME, + "Failed to register mci: %d.\n", ret); + goto err; + } + + /* Generate an interrupt with each 1b error */ + ti_edac_writel(edac, 1 << EMIF_1B_ECC_ERR_THRSH_SHIFT, + EMIF_1B_ECC_ERR_THRSH); + + /* Enable interrupts */ + ti_edac_writel(edac, + EMIF_1B_ECC_ERR | EMIF_2B_ECC_ERR | EMIF_WR_ECC_ERR, + EMIF_IRQ_ENABLE_SET); + + return 0; + +err: + edac_mc_free(mci); + return ret; +} + +static int ti_edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static struct platform_driver ti_edac_driver = { + .probe = ti_edac_probe, + .remove = ti_edac_remove, + .driver = { + .name = EDAC_MOD_NAME, + .of_match_table = ti_edac_of_match, + }, +}; + +module_platform_driver(ti_edac_driver); + +MODULE_AUTHOR("Texas Instruments Inc."); +MODULE_DESCRIPTION("EDAC Driver for Texas Instruments DDR3 MC"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From e866d87a7de38672f52c6e7567e88c3f82f937dc Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 22:28:00 +0100 Subject: ASoC: wm2000: Delete an error message for a failed memory allocation in wm2000_i2c_probe() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm2000.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 23cde3a0dc11..ce936deed7e3 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -902,7 +902,6 @@ static int wm2000_i2c_probe(struct i2c_client *i2c, wm2000->anc_download_size, GFP_KERNEL); if (wm2000->anc_download == NULL) { - dev_err(&i2c->dev, "Out of memory\n"); ret = -ENOMEM; goto err_supplies; } -- cgit v1.2.3 From cdbd9b0c7211485e08d5128eccf9d0a24d38facd Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 24 Nov 2017 08:02:57 +0100 Subject: ASoC: wm2000: Fix a typo in a comment line Delete a duplicate character in a word of this description. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index ce936deed7e3..480ceb07c3ed 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -13,7 +13,7 @@ * 'wm2000_anc.bin' by default (overridable via platform data) at * runtime and is expected to be in flat binary format. This is * generated by Wolfson configuration tools and includes - * system-specific callibration information. If supplied as a + * system-specific calibration information. If supplied as a * sequence of ASCII-encoded hexidecimal bytes this can be converted * into a flat binary with a command such as this on the command line: * -- cgit v1.2.3 From bf0842ba3afa85d37b35e65dedec9a994988e346 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 24 Nov 2017 08:18:14 +0100 Subject: ASoC: wm2000: Improve a size determination in wm2000_i2c_probe() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm2000.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 480ceb07c3ed..abfa052c07d8 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -826,8 +826,7 @@ static int wm2000_i2c_probe(struct i2c_client *i2c, int reg; u16 id; - wm2000 = devm_kzalloc(&i2c->dev, sizeof(struct wm2000_priv), - GFP_KERNEL); + wm2000 = devm_kzalloc(&i2c->dev, sizeof(*wm2000), GFP_KERNEL); if (!wm2000) return -ENOMEM; -- cgit v1.2.3 From cce7c0ac44832225ca86afff308cf6a5fb19cf2c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 24 Nov 2017 10:05:43 +0100 Subject: ASoC: wm8903: Delete an error message for a failed memory allocation in wm8903_i2c_probe() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm8903.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 237eeb9a8b97..51eb7d61d446 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -2020,10 +2020,8 @@ static int wm8903_i2c_probe(struct i2c_client *i2c, wm8903->pdata = devm_kzalloc(&i2c->dev, sizeof(struct wm8903_platform_data), GFP_KERNEL); - if (wm8903->pdata == NULL) { - dev_err(&i2c->dev, "Failed to allocate pdata\n"); + if (!wm8903->pdata) return -ENOMEM; - } if (i2c->irq) { ret = wm8903_set_pdata_irq_trigger(i2c, wm8903->pdata); -- cgit v1.2.3 From 017b9b35cb107c0aeaad2ad770460c49e3f71395 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 24 Nov 2017 10:40:43 +0100 Subject: ASoC: wm8903: Improve two size determinations in wm8903_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm8903.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 51eb7d61d446..cba90f21161f 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1995,8 +1995,7 @@ static int wm8903_i2c_probe(struct i2c_client *i2c, unsigned int val, irq_pol; int ret, i; - wm8903 = devm_kzalloc(&i2c->dev, sizeof(struct wm8903_priv), - GFP_KERNEL); + wm8903 = devm_kzalloc(&i2c->dev, sizeof(*wm8903), GFP_KERNEL); if (wm8903 == NULL) return -ENOMEM; @@ -2017,9 +2016,8 @@ static int wm8903_i2c_probe(struct i2c_client *i2c, if (pdata) { wm8903->pdata = pdata; } else { - wm8903->pdata = devm_kzalloc(&i2c->dev, - sizeof(struct wm8903_platform_data), - GFP_KERNEL); + wm8903->pdata = devm_kzalloc(&i2c->dev, sizeof(*wm8903->pdata), + GFP_KERNEL); if (!wm8903->pdata) return -ENOMEM; -- cgit v1.2.3 From 2dbb29cd977fc281f71f8895abce7e382efe77e1 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 21:30:07 +0100 Subject: ASoC: twl4030: Delete an error message for a failed memory allocation in twl4030_get_pdata() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/twl4030.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index cfe72b9d4356..90691701b082 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -240,7 +240,6 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec) sizeof(struct twl4030_codec_data), GFP_KERNEL); if (!pdata) { - dev_err(codec->dev, "Can not allocate memory\n"); of_node_put(twl4030_codec_node); return NULL; } -- cgit v1.2.3 From 14a07f1d8c4c64af29566316df0415052e8bdfe4 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 20:42:20 +0100 Subject: ASoC: da7218: Delete two error messages for a failed memory allocation in da7218_of_to_pdata() Omit extra messages for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Adam Thomson Signed-off-by: Mark Brown --- sound/soc/codecs/da7218.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index 56564ce90cb6..25ab7443d803 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -2455,10 +2455,8 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) u32 of_val32; pdata = devm_kzalloc(codec->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - dev_warn(codec->dev, "Failed to allocate memory for pdata\n"); + if (!pdata) return NULL; - } if (of_property_read_u32(np, "dlg,micbias1-lvl-millivolt", &of_val32) >= 0) pdata->micbias1_lvl = da7218_of_micbias_lvl(codec, of_val32); @@ -2527,8 +2525,6 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec) hpldet_pdata = devm_kzalloc(codec->dev, sizeof(*hpldet_pdata), GFP_KERNEL); if (!hpldet_pdata) { - dev_warn(codec->dev, - "Failed to allocate memory for hpldet pdata\n"); of_node_put(hpldet_np); return pdata; } -- cgit v1.2.3 From 392b79e20b41cfdc174d31bd4b004bbd874de4d9 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 20:50:44 +0100 Subject: ASoC: da7218: Improve a size determination in da7218_i2c_probe() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Adam Thomson Signed-off-by: Mark Brown --- sound/soc/codecs/da7218.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/da7218.c b/sound/soc/codecs/da7218.c index 25ab7443d803..96c644a15b11 100644 --- a/sound/soc/codecs/da7218.c +++ b/sound/soc/codecs/da7218.c @@ -3269,8 +3269,7 @@ static int da7218_i2c_probe(struct i2c_client *i2c, struct da7218_priv *da7218; int ret; - da7218 = devm_kzalloc(&i2c->dev, sizeof(struct da7218_priv), - GFP_KERNEL); + da7218 = devm_kzalloc(&i2c->dev, sizeof(*da7218), GFP_KERNEL); if (!da7218) return -ENOMEM; -- cgit v1.2.3 From 13d5ea5f10b7359cd0f846179fe0b2411e12cfeb Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 17:56:54 +0100 Subject: ASoC: da7213: Delete an error message for a failed memory allocation in da7213_fw_to_pdata() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Adam Thomson Signed-off-by: Mark Brown --- sound/soc/codecs/da7213.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index 41d9b1da27c2..d1b77a0e3b74 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -1654,10 +1654,8 @@ static struct da7213_platform_data u32 fw_val32; pdata = devm_kzalloc(codec->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - dev_warn(codec->dev, "Failed to allocate memory for pdata\n"); + if (!pdata) return NULL; - } if (device_property_read_u32(dev, "dlg,micbias1-lvl", &fw_val32) >= 0) pdata->micbias1_lvl = da7213_of_micbias_lvl(codec, fw_val32); -- cgit v1.2.3 From 8080699a3649cb0a0aed2c650a7a3a76c2025b30 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 18:15:30 +0100 Subject: ASoC: da7213: Improve a size determination in da7213_i2c_probe() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Adam Thomson Signed-off-by: Mark Brown --- sound/soc/codecs/da7213.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index d1b77a0e3b74..b2b4e90fc02a 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -1853,8 +1853,7 @@ static int da7213_i2c_probe(struct i2c_client *i2c, struct da7213_priv *da7213; int ret; - da7213 = devm_kzalloc(&i2c->dev, sizeof(struct da7213_priv), - GFP_KERNEL); + da7213 = devm_kzalloc(&i2c->dev, sizeof(*da7213), GFP_KERNEL); if (!da7213) return -ENOMEM; -- cgit v1.2.3 From 277631ccff503f2a8e8150574c9773a8383fc926 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 09:26:08 +0100 Subject: ASoC: cs42l56: Delete an error message for a failed memory allocation in cs42l56_i2c_probe() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l56.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index cb6ca85f1536..13ee46217d78 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -1210,11 +1210,9 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, pdata = devm_kzalloc(&i2c_client->dev, sizeof(struct cs42l56_platform_data), GFP_KERNEL); - if (!pdata) { - dev_err(&i2c_client->dev, - "could not allocate pdata\n"); + if (!pdata) return -ENOMEM; - } + if (i2c_client->dev.of_node) { ret = cs42l56_handle_of_data(i2c_client, &cs42l56->pdata); -- cgit v1.2.3 From e8d8b98c0a6edab5d6cc0292e1ed603d8d4ff5b6 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 23 Nov 2017 09:33:15 +0100 Subject: ASoC: cs42l56: Improve two size determinations in cs42l56_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l56.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index 13ee46217d78..fd7b8d32c2b2 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -1190,9 +1190,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, unsigned int alpha_rev, metal_rev; unsigned int reg; - cs42l56 = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs42l56_private), - GFP_KERNEL); + cs42l56 = devm_kzalloc(&i2c_client->dev, sizeof(*cs42l56), GFP_KERNEL); if (cs42l56 == NULL) return -ENOMEM; cs42l56->dev = &i2c_client->dev; @@ -1207,8 +1205,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, if (pdata) { cs42l56->pdata = *pdata; } else { - pdata = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs42l56_platform_data), + pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; -- cgit v1.2.3 From e04db58c1252305e8a4d4178a3cfdbef802cff74 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 22:08:06 +0100 Subject: ASoC: cs42l52: Delete an error message for a failed memory allocation in cs42l52_i2c_probe() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l52.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index 0d9c4a57301b..e8645f2ac0d9 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -1118,10 +1118,9 @@ static int cs42l52_i2c_probe(struct i2c_client *i2c_client, pdata = devm_kzalloc(&i2c_client->dev, sizeof(struct cs42l52_platform_data), GFP_KERNEL); - if (!pdata) { - dev_err(&i2c_client->dev, "could not allocate pdata\n"); + if (!pdata) return -ENOMEM; - } + if (i2c_client->dev.of_node) { if (of_property_read_bool(i2c_client->dev.of_node, "cirrus,mica-differential-cfg")) -- cgit v1.2.3 From cd9e0b8282653d8bdc2c8b799eeb26ee034cdd06 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 22:11:30 +0100 Subject: ASoC: cs42l52: Improve two size determinations in cs42l52_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l52.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index e8645f2ac0d9..9731e5dff291 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -1100,8 +1100,7 @@ static int cs42l52_i2c_probe(struct i2c_client *i2c_client, unsigned int reg; u32 val32; - cs42l52 = devm_kzalloc(&i2c_client->dev, sizeof(struct cs42l52_private), - GFP_KERNEL); + cs42l52 = devm_kzalloc(&i2c_client->dev, sizeof(*cs42l52), GFP_KERNEL); if (cs42l52 == NULL) return -ENOMEM; cs42l52->dev = &i2c_client->dev; @@ -1115,9 +1114,8 @@ static int cs42l52_i2c_probe(struct i2c_client *i2c_client, if (pdata) { cs42l52->pdata = *pdata; } else { - pdata = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs42l52_platform_data), - GFP_KERNEL); + pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), + GFP_KERNEL); if (!pdata) return -ENOMEM; -- cgit v1.2.3 From af0f6c5820845b66c76296f281da5cf916d7e094 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 22 Nov 2017 12:56:41 -0800 Subject: ASoC: ts3a227e: Map BTN_0 to KEY_PLAYPAUSE The Android 3.5mm Headset jack specification mentions that BTN_0 should be mapped to KEY_MEDIA, but this is less logical than KEY_PLAYPAUSE, which has much broader userspace support. For example, the Chrome OS userspace now supports KEY_PLAYPAUSE to toggle play/pause of videos and audio, but does not handle KEY_MEDIA. Furthermore, Android itself now supports KEY_PLAYPAUSE equivalently, as the new USB headset spec requires KEY_PLAYPAUSE for BTN_0. https://source.android.com/devices/accessories/headset/usb-headset-spec Signed-off-by: Benson Leung Signed-off-by: Mark Brown --- sound/soc/codecs/ts3a227e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/ts3a227e.c b/sound/soc/codecs/ts3a227e.c index 738e04b09116..1271e7e1fc78 100644 --- a/sound/soc/codecs/ts3a227e.c +++ b/sound/soc/codecs/ts3a227e.c @@ -241,7 +241,7 @@ int ts3a227e_enable_jack_detect(struct snd_soc_component *component, { struct ts3a227e *ts3a227e = snd_soc_component_get_drvdata(component); - snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_MEDIA); + snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND); snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP); snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN); -- cgit v1.2.3 From af1b1cefd735c919d3185ce06b549c2b121067ba Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 22 Nov 2017 12:56:42 -0800 Subject: ASoC: rk3399_gru_sound: Map BTN_0 to KEY_PLAYPAUSE The Android 3.5mm Headset jack specification mentions that BTN_0 should be mapped to KEY_MEDIA, but this is less logical than KEY_PLAYPAUSE, which has much broader userspace support. For example, the Chrome OS userspace now supports KEY_PLAYPAUSE to toggle play/pause of videos and audio, but does not handle KEY_MEDIA. Furthermore, Android itself now supports KEY_PLAYPAUSE equivalently, as the new USB headset spec requires KEY_PLAYPAUSE for BTN_0. https://source.android.com/devices/accessories/headset/usb-headset-spec Signed-off-by: Benson Leung Signed-off-by: Mark Brown --- sound/soc/rockchip/rk3399_gru_sound.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/rockchip/rk3399_gru_sound.c b/sound/soc/rockchip/rk3399_gru_sound.c index d64fbbd50544..fa6cd1de828b 100644 --- a/sound/soc/rockchip/rk3399_gru_sound.c +++ b/sound/soc/rockchip/rk3399_gru_sound.c @@ -206,7 +206,8 @@ static int rockchip_sound_da7219_init(struct snd_soc_pcm_runtime *rtd) return ret; } - snd_jack_set_key(rockchip_sound_jack.jack, SND_JACK_BTN_0, KEY_MEDIA); + snd_jack_set_key( + rockchip_sound_jack.jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_jack_set_key( rockchip_sound_jack.jack, SND_JACK_BTN_1, KEY_VOLUMEUP); snd_jack_set_key( -- cgit v1.2.3 From 5f6d1df8ced6bb52453149f96d9b7005058641b6 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 22 Nov 2017 12:56:43 -0800 Subject: ASoC: qcom: apq8016-sbc: Map BTN_0 to KEY_PLAYPAUSE The Android 3.5mm Headset jack specification mentions that BTN_0 should be mapped to KEY_MEDIA, but this is less logical than KEY_PLAYPAUSE, which has much broader userspace support. For example, the Chrome OS userspace now supports KEY_PLAYPAUSE to toggle play/pause of videos and audio, but does not handle KEY_MEDIA. Furthermore, Android itself now supports KEY_PLAYPAUSE equivalently, as the new USB headset spec requires KEY_PLAYPAUSE for BTN_0. https://source.android.com/devices/accessories/headset/usb-headset-spec Signed-off-by: Benson Leung Signed-off-by: Mark Brown --- sound/soc/qcom/apq8016_sbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index d49adc822a11..03851fedd1e2 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -92,7 +92,7 @@ static int apq8016_sbc_dai_init(struct snd_soc_pcm_runtime *rtd) jack = pdata->jack.jack; - snd_jack_set_key(jack, SND_JACK_BTN_0, KEY_MEDIA); + snd_jack_set_key(jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_jack_set_key(jack, SND_JACK_BTN_1, KEY_VOICECOMMAND); snd_jack_set_key(jack, SND_JACK_BTN_2, KEY_VOLUMEUP); snd_jack_set_key(jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN); -- cgit v1.2.3 From 8ee649283b1e542aedba007f6c828d6767c48e0d Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 22 Nov 2017 12:56:39 -0800 Subject: ASoC: Intel: kbl_rt5663_rt5514_max98927: Map BTN_0 to KEY_PLAYPAUSE The Android 3.5mm Headset jack specification mentions that BTN_0 should be mapped to KEY_MEDIA, but this is less logical than KEY_PLAYPAUSE, which has much broader userspace support. For example, the Chrome OS userspace now supports KEY_PLAYPAUSE to toggle play/pause of videos and audio, but does not handle KEY_MEDIA. Furthermore, Android itself now supports KEY_PLAYPAUSE equivalently, as the new USB headset spec requires KEY_PLAYPAUSE for BTN_0. https://source.android.com/devices/accessories/headset/usb-headset-spec Signed-off-by: Benson Leung Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index e7672831bc49..38512f0d1a73 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -195,7 +195,7 @@ static int kabylake_rt5663_codec_init(struct snd_soc_pcm_runtime *rtd) } jack = &ctx->kabylake_headset; - snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_MEDIA); + snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND); snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP); snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN); -- cgit v1.2.3 From 38a5882e4292d135cebabad0b56c9420dfdd80a5 Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 22 Nov 2017 12:56:40 -0800 Subject: ASoC: Intel: kbl_rt5663_max98927: Map BTN_0 to KEY_PLAYPAUSE The Android 3.5mm Headset jack specification mentions that BTN_0 should be mapped to KEY_MEDIA, but this is less logical than KEY_PLAYPAUSE, which has much broader userspace support. For example, the Chrome OS userspace now supports KEY_PLAYPAUSE to toggle play/pause of videos and audio, but does not handle KEY_MEDIA. Furthermore, Android itself now supports KEY_PLAYPAUSE equivalently, as the new USB headset spec requires KEY_PLAYPAUSE for BTN_0. https://source.android.com/devices/accessories/headset/usb-headset-spec Signed-off-by: Benson Leung Signed-off-by: Mark Brown --- sound/soc/intel/boards/kbl_rt5663_max98927.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c index 6f9a8bcf20f3..94a34db4f8c0 100644 --- a/sound/soc/intel/boards/kbl_rt5663_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c @@ -225,7 +225,7 @@ static int kabylake_rt5663_codec_init(struct snd_soc_pcm_runtime *rtd) } jack = &ctx->kabylake_headset; - snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_MEDIA); + snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE); snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND); snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP); snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN); -- cgit v1.2.3 From 4be0ffdf284046eb7289fa66cc7b2eb8d7efdc64 Mon Sep 17 00:00:00 2001 From: olivier moysan Date: Wed, 22 Nov 2017 16:02:25 +0100 Subject: ASoC: stm32: fix sync property description in SAI bindings SAI sync property must be described in SAI subnodes section, as it is a property of child node. This patch fixes commit 14f0e5f8d97e632695d92f41f2e91d10d8005d47 "ASoC: stm32: Add synchronization to SAI bindings". Signed-off-by: Olivier Moysan Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/st,stm32-sai.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt index 1f9cd7095337..b1acc1a256ba 100644 --- a/Documentation/devicetree/bindings/sound/st,stm32-sai.txt +++ b/Documentation/devicetree/bindings/sound/st,stm32-sai.txt @@ -20,11 +20,6 @@ Required properties: Optional properties: - resets: Reference to a reset controller asserting the SAI - - st,sync: specify synchronization mode. - By default SAI sub-block is in asynchronous mode. - This property sets SAI sub-block as slave of another SAI sub-block. - Must contain the phandle and index of the sai sub-block providing - the synchronization. SAI subnodes: Two subnodes corresponding to SAI sub-block instances A et B can be defined. @@ -44,6 +39,13 @@ SAI subnodes required properties: - pinctrl-names: should contain only value "default" - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/pinctrl-stm32.txt +SAI subnodes Optional properties: + - st,sync: specify synchronization mode. + By default SAI sub-block is in asynchronous mode. + This property sets SAI sub-block as slave of another SAI sub-block. + Must contain the phandle and index of the sai sub-block providing + the synchronization. + The device node should contain one 'port' child node with one child 'endpoint' node, according to the bindings defined in Documentation/devicetree/bindings/ graph.txt. -- cgit v1.2.3 From 7dd0d835582ff72b0b3bd0f4fa074967dff1ce82 Mon Sep 17 00:00:00 2001 From: olivier moysan Date: Wed, 22 Nov 2017 16:02:26 +0100 Subject: ASoC: stm32: sai: simplify sync modes management Use function of_find_device_by_node() to retrieve SAI synchro provider device and private data. This allows to remove registration of probed SAI in a linked list. Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai.c | 105 ++++++++++------------------------------------ 1 file changed, 22 insertions(+), 83 deletions(-) diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c index d6f71a3406e9..0a1f06418bf4 100644 --- a/sound/soc/stm/stm32_sai.c +++ b/sound/soc/stm/stm32_sai.c @@ -28,16 +28,6 @@ #include "stm32_sai.h" -static LIST_HEAD(sync_providers); -static DEFINE_MUTEX(sync_mutex); - -struct sync_provider { - struct list_head link; - struct device_node *node; - int (*sync_conf)(void *data, int synco); - void *data; -}; - static const struct stm32_sai_conf stm32_sai_conf_f4 = { .version = SAI_STM32F4, }; @@ -70,9 +60,8 @@ static int stm32_sai_sync_conf_client(struct stm32_sai_data *sai, int synci) return 0; } -static int stm32_sai_sync_conf_provider(void *data, int synco) +static int stm32_sai_sync_conf_provider(struct stm32_sai_data *sai, int synco) { - struct stm32_sai_data *sai = (struct stm32_sai_data *)data; u32 prev_synco; int ret; @@ -103,73 +92,34 @@ static int stm32_sai_sync_conf_provider(void *data, int synco) return 0; } -static int stm32_sai_set_sync_provider(struct device_node *np, int synco) +static int stm32_sai_set_sync(struct stm32_sai_data *sai_client, + struct device_node *np_provider, + int synco, int synci) { - struct sync_provider *provider; + struct platform_device *pdev = of_find_device_by_node(np_provider); + struct stm32_sai_data *sai_provider; int ret; - mutex_lock(&sync_mutex); - list_for_each_entry(provider, &sync_providers, link) { - if (provider->node == np) { - ret = provider->sync_conf(provider->data, synco); - mutex_unlock(&sync_mutex); - return ret; - } + if (!pdev) { + dev_err(&sai_client->pdev->dev, + "Device not found for node %s\n", np_provider->name); + return -ENODEV; } - mutex_unlock(&sync_mutex); - /* SAI sync provider not found */ - return -ENODEV; -} - -static int stm32_sai_set_sync(struct stm32_sai_data *sai, - struct device_node *np_provider, - int synco, int synci) -{ - int ret; + sai_provider = platform_get_drvdata(pdev); + if (!sai_provider) { + dev_err(&sai_client->pdev->dev, + "SAI sync provider data not found\n"); + return -EINVAL; + } /* Configure sync client */ - stm32_sai_sync_conf_client(sai, synci); + ret = stm32_sai_sync_conf_client(sai_client, synci); + if (ret < 0) + return ret; /* Configure sync provider */ - ret = stm32_sai_set_sync_provider(np_provider, synco); - - return ret; -} - -static int stm32_sai_sync_add_provider(struct platform_device *pdev, - void *data) -{ - struct sync_provider *sp; - - sp = devm_kzalloc(&pdev->dev, sizeof(*sp), GFP_KERNEL); - if (!sp) - return -ENOMEM; - - sp->node = of_node_get(pdev->dev.of_node); - sp->data = data; - sp->sync_conf = &stm32_sai_sync_conf_provider; - - mutex_lock(&sync_mutex); - list_add(&sp->link, &sync_providers); - mutex_unlock(&sync_mutex); - - return 0; -} - -static void stm32_sai_sync_del_provider(struct device_node *np) -{ - struct sync_provider *sp; - - mutex_lock(&sync_mutex); - list_for_each_entry(sp, &sync_providers, link) { - if (sp->node == np) { - list_del(&sp->link); - of_node_put(sp->node); - break; - } - } - mutex_unlock(&sync_mutex); + return stm32_sai_sync_conf_provider(sai_provider, synco); } static int stm32_sai_probe(struct platform_device *pdev) @@ -179,7 +129,6 @@ static int stm32_sai_probe(struct platform_device *pdev) struct reset_control *rst; struct resource *res; const struct of_device_id *of_id; - int ret; sai = devm_kzalloc(&pdev->dev, sizeof(*sai), GFP_KERNEL); if (!sai) @@ -231,27 +180,17 @@ static int stm32_sai_probe(struct platform_device *pdev) reset_control_deassert(rst); } - ret = stm32_sai_sync_add_provider(pdev, sai); - if (ret < 0) - return ret; - sai->set_sync = &stm32_sai_set_sync; - sai->pdev = pdev; + sai->set_sync = &stm32_sai_set_sync; platform_set_drvdata(pdev, sai); - ret = of_platform_populate(np, NULL, NULL, &pdev->dev); - if (ret < 0) - stm32_sai_sync_del_provider(np); - - return ret; + return of_platform_populate(np, NULL, NULL, &pdev->dev); } static int stm32_sai_remove(struct platform_device *pdev) { of_platform_depopulate(&pdev->dev); - stm32_sai_sync_del_provider(pdev->dev.of_node); - return 0; } -- cgit v1.2.3 From 512d1bb4e86bd0fd4d665d4e454a3486236a419f Mon Sep 17 00:00:00 2001 From: olivier moysan Date: Wed, 22 Nov 2017 16:02:27 +0100 Subject: ASoC: stm32: sai: use devm_of_platform_populate() Use devm_of_platform_populate() instead of of_platform_depopulate() to simplify driver code. Signed-off-by: Benjamin Gaignard Signed-off-by: Olivier Moysan Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/sound/soc/stm/stm32_sai.c b/sound/soc/stm/stm32_sai.c index 0a1f06418bf4..d743b7dd52fb 100644 --- a/sound/soc/stm/stm32_sai.c +++ b/sound/soc/stm/stm32_sai.c @@ -124,7 +124,6 @@ static int stm32_sai_set_sync(struct stm32_sai_data *sai_client, static int stm32_sai_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; struct stm32_sai_data *sai; struct reset_control *rst; struct resource *res; @@ -184,14 +183,7 @@ static int stm32_sai_probe(struct platform_device *pdev) sai->set_sync = &stm32_sai_set_sync; platform_set_drvdata(pdev, sai); - return of_platform_populate(np, NULL, NULL, &pdev->dev); -} - -static int stm32_sai_remove(struct platform_device *pdev) -{ - of_platform_depopulate(&pdev->dev); - - return 0; + return devm_of_platform_populate(&pdev->dev); } MODULE_DEVICE_TABLE(of, stm32_sai_ids); @@ -202,7 +194,6 @@ static struct platform_driver stm32_sai_driver = { .of_match_table = stm32_sai_ids, }, .probe = stm32_sai_probe, - .remove = stm32_sai_remove, }; module_platform_driver(stm32_sai_driver); -- cgit v1.2.3 From ddedd797943df21a2464420744d117e930a43af8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 15:50:46 +0100 Subject: ASoC: cs42l73: Delete an error message for a failed memory allocation in cs42l73_i2c_probe() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l73.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index 3df2c473ab88..978cfbbad408 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -1307,10 +1307,9 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client, pdata = devm_kzalloc(&i2c_client->dev, sizeof(struct cs42l73_platform_data), GFP_KERNEL); - if (!pdata) { - dev_err(&i2c_client->dev, "could not allocate pdata\n"); + if (!pdata) return -ENOMEM; - } + if (i2c_client->dev.of_node) { if (of_property_read_u32(i2c_client->dev.of_node, "chgfreq", &val32) >= 0) -- cgit v1.2.3 From 68fa08c665e51b2fe100876692e57bca3aea7711 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 16:07:42 +0100 Subject: ASoC: cs42l73: Improve two size determinations in cs42l73_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l73.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index 978cfbbad408..dde37e569ade 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -1289,8 +1289,7 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client, unsigned int reg; u32 val32; - cs42l73 = devm_kzalloc(&i2c_client->dev, sizeof(struct cs42l73_private), - GFP_KERNEL); + cs42l73 = devm_kzalloc(&i2c_client->dev, sizeof(*cs42l73), GFP_KERNEL); if (!cs42l73) return -ENOMEM; @@ -1304,9 +1303,8 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client, if (pdata) { cs42l73->pdata = *pdata; } else { - pdata = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs42l73_platform_data), - GFP_KERNEL); + pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), + GFP_KERNEL); if (!pdata) return -ENOMEM; -- cgit v1.2.3 From 316c85c3db55588893f51907de3424f85020f0eb Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 17:17:48 +0100 Subject: ASoC: wm0010: Delete an error message for a failed memory allocation in wm0010_boot() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm0010.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c index 4f5f5710b569..0147d2fb7b0a 100644 --- a/sound/soc/codecs/wm0010.c +++ b/sound/soc/codecs/wm0010.c @@ -655,11 +655,8 @@ static int wm0010_boot(struct snd_soc_codec *codec) ret = -ENOMEM; len = pll_rec.length + 8; out = kzalloc(len, GFP_KERNEL | GFP_DMA); - if (!out) { - dev_err(codec->dev, - "Failed to allocate RX buffer\n"); + if (!out) goto abort; - } img_swap = kzalloc(len, GFP_KERNEL | GFP_DMA); if (!img_swap) -- cgit v1.2.3 From 410afed04b130fc1c22f82b10b20aed1636d2f15 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 20:35:06 +0100 Subject: ASoC: cs35l32: Delete two error messages for a failed memory allocation in cs35l32_i2c_probe() Omit extra messages for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l32.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c index 7e9806206648..e41d8ebe6eea 100644 --- a/sound/soc/codecs/cs35l32.c +++ b/sound/soc/codecs/cs35l32.c @@ -358,10 +358,8 @@ static int cs35l32_i2c_probe(struct i2c_client *i2c_client, cs35l32 = devm_kzalloc(&i2c_client->dev, sizeof(struct cs35l32_private), GFP_KERNEL); - if (!cs35l32) { - dev_err(&i2c_client->dev, "could not allocate codec\n"); + if (!cs35l32) return -ENOMEM; - } i2c_set_clientdata(i2c_client, cs35l32); @@ -378,10 +376,9 @@ static int cs35l32_i2c_probe(struct i2c_client *i2c_client, pdata = devm_kzalloc(&i2c_client->dev, sizeof(struct cs35l32_platform_data), GFP_KERNEL); - if (!pdata) { - dev_err(&i2c_client->dev, "could not allocate pdata\n"); + if (!pdata) return -ENOMEM; - } + if (i2c_client->dev.of_node) { ret = cs35l32_handle_of_data(i2c_client, &cs35l32->pdata); -- cgit v1.2.3 From b28ad41ec831b099dccac1f84a9cfb40b0650724 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 20:40:47 +0100 Subject: ASoC: cs35l32: Improve two size determinations in cs35l32_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l32.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c index e41d8ebe6eea..bc3a72e4c4ed 100644 --- a/sound/soc/codecs/cs35l32.c +++ b/sound/soc/codecs/cs35l32.c @@ -355,9 +355,7 @@ static int cs35l32_i2c_probe(struct i2c_client *i2c_client, unsigned int devid = 0; unsigned int reg; - - cs35l32 = devm_kzalloc(&i2c_client->dev, sizeof(struct cs35l32_private), - GFP_KERNEL); + cs35l32 = devm_kzalloc(&i2c_client->dev, sizeof(*cs35l32), GFP_KERNEL); if (!cs35l32) return -ENOMEM; @@ -373,9 +371,8 @@ static int cs35l32_i2c_probe(struct i2c_client *i2c_client, if (pdata) { cs35l32->pdata = *pdata; } else { - pdata = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs35l32_platform_data), - GFP_KERNEL); + pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), + GFP_KERNEL); if (!pdata) return -ENOMEM; -- cgit v1.2.3 From 4dbd91549200574c71e10176d0ca37beca6703ec Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 21:13:29 +0100 Subject: ASoC: cs35l34: Delete two error messages for a failed memory allocation in cs35l34_i2c_probe() Omit extra messages for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l34.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/cs35l34.c b/sound/soc/codecs/cs35l34.c index 1e05026bedca..6cbdbbe53940 100644 --- a/sound/soc/codecs/cs35l34.c +++ b/sound/soc/codecs/cs35l34.c @@ -1007,10 +1007,8 @@ static int cs35l34_i2c_probe(struct i2c_client *i2c_client, cs35l34 = devm_kzalloc(&i2c_client->dev, sizeof(struct cs35l34_private), GFP_KERNEL); - if (!cs35l34) { - dev_err(&i2c_client->dev, "could not allocate codec\n"); + if (!cs35l34) return -ENOMEM; - } i2c_set_clientdata(i2c_client, cs35l34); cs35l34->regmap = devm_regmap_init_i2c(i2c_client, &cs35l34_regmap); @@ -1047,11 +1045,9 @@ static int cs35l34_i2c_probe(struct i2c_client *i2c_client, pdata = devm_kzalloc(&i2c_client->dev, sizeof(struct cs35l34_platform_data), GFP_KERNEL); - if (!pdata) { - dev_err(&i2c_client->dev, - "could not allocate pdata\n"); + if (!pdata) return -ENOMEM; - } + if (i2c_client->dev.of_node) { ret = cs35l34_handle_of_data(i2c_client, pdata); if (ret != 0) -- cgit v1.2.3 From 7f9f3abd285433e224faaf854b165b189270c923 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 21:17:42 +0100 Subject: ASoC: cs35l34: Improve two size determinations in cs35l34_i2c_probe() Replace the specification of two data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l34.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/cs35l34.c b/sound/soc/codecs/cs35l34.c index 6cbdbbe53940..0600d5264c4c 100644 --- a/sound/soc/codecs/cs35l34.c +++ b/sound/soc/codecs/cs35l34.c @@ -1004,9 +1004,7 @@ static int cs35l34_i2c_probe(struct i2c_client *i2c_client, unsigned int devid = 0; unsigned int reg; - cs35l34 = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs35l34_private), - GFP_KERNEL); + cs35l34 = devm_kzalloc(&i2c_client->dev, sizeof(*cs35l34), GFP_KERNEL); if (!cs35l34) return -ENOMEM; @@ -1042,9 +1040,8 @@ static int cs35l34_i2c_probe(struct i2c_client *i2c_client, if (pdata) { cs35l34->pdata = *pdata; } else { - pdata = devm_kzalloc(&i2c_client->dev, - sizeof(struct cs35l34_platform_data), - GFP_KERNEL); + pdata = devm_kzalloc(&i2c_client->dev, sizeof(*pdata), + GFP_KERNEL); if (!pdata) return -ENOMEM; -- cgit v1.2.3 From 251c201bf4f8b5bf4f1ccb4f8920eed2e1f57580 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Mon, 27 Nov 2017 15:16:32 +0100 Subject: spi: a3700: Fix clk prescaling for coefficient over 15 The Armada 3700 SPI controller has 2 ranges of prescaler coefficients. One ranging from 0 to 15 by steps of 1, and one ranging from 0 to 30 by steps of 2. This commit fixes the prescaler coefficients that are over 15 so that it uses the correct range of values. The prescaling coefficient is rounded to the upper value if it is odd. This was tested on Espressobin with spidev and a locigal analyser. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-armada-3700.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 77fe55ce790c..d65345312527 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -79,6 +79,7 @@ #define A3700_SPI_BYTE_LEN BIT(5) #define A3700_SPI_CLK_PRESCALE BIT(0) #define A3700_SPI_CLK_PRESCALE_MASK (0x1f) +#define A3700_SPI_CLK_EVEN_OFFS (0x10) #define A3700_SPI_WFIFO_THRS_BIT 28 #define A3700_SPI_RFIFO_THRS_BIT 24 @@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi, prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz); + /* For prescaler values over 15, we can only set it by steps of 2. + * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to + * 30. We only use this range from 16 to 30. + */ + if (prescale > 15) + prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2); + val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); val = val & ~A3700_SPI_CLK_PRESCALE_MASK; -- cgit v1.2.3 From fdaa451107ce543d345a339b4d5e20e8e4bac396 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 20 Nov 2017 20:27:56 -0800 Subject: ASoC: amd: Add error checking to probe function The acp_audio_dma does not perform sufficient error checking in its probe function. This can result in crashes if a critical error path is encountered. Fixes: 7c31335a03b6a ("ASoC: AMD: add AMD ASoC ACP 2.x DMA driver") Cc: Alex Deucher Cc: Dominik Behr Cc: Daniel Kurtz Signed-off-by: Guenter Roeck Reviewed-by: Alex Deucher Signed-off-by: Mark Brown --- sound/soc/amd/acp-pcm-dma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 9f521a55d610..b5e41df6bb3a 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev) struct resource *res; const u32 *pdata = pdev->dev.platform_data; + if (!pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + return -ENODEV; + } + audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data), GFP_KERNEL); if (audio_drv_data == NULL) @@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(audio_drv_data->acp_mmio)) + return PTR_ERR(audio_drv_data->acp_mmio); /* The following members gets populated in device 'open' * function. Till then interrupts are disabled in 'acp_init' -- cgit v1.2.3 From a094c2fa093cf7fd0fe23d15cc2abca4083c6a45 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 21 Nov 2017 10:09:03 +0100 Subject: spi: xilinx: Add support for xlnx,axi-quad-spi-1.00.a The driver has been successfully tested with Xilinx's core axi-quad-spi-1.0.0a. Documented on DS843: https://www.xilinx.com/support/documentation/ip_documentation/axi_quad_spi/v1_00_a/ds843_axi_quad_spi.pdf Cc: Mark Brown Cc: Rob Herring Cc: devicetree@vger.kernel.org Signed-off-by: Ricardo Ribalda Delgado Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-xilinx.txt | 2 +- drivers/spi/spi-xilinx.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.txt b/Documentation/devicetree/bindings/spi/spi-xilinx.txt index c7b7856bd528..7bf61efc66c8 100644 --- a/Documentation/devicetree/bindings/spi/spi-xilinx.txt +++ b/Documentation/devicetree/bindings/spi/spi-xilinx.txt @@ -2,7 +2,7 @@ Xilinx SPI controller Device Tree Bindings ------------------------------------------------- Required properties: -- compatible : Should be "xlnx,xps-spi-2.00.a" or "xlnx,xps-spi-2.00.b" +- compatible : Should be "xlnx,xps-spi-2.00.a", "xlnx,xps-spi-2.00.b" or "xlnx,axi-quad-spi-1.00.a" - reg : Physical base address and size of SPI registers map. - interrupts : Property with a value describing the interrupt number. diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index e0b9fe1d0e37..63fedc49ae9c 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -381,6 +381,7 @@ static int xilinx_spi_find_buffer_size(struct xilinx_spi *xspi) } static const struct of_device_id xilinx_spi_of_match[] = { + { .compatible = "xlnx,axi-quad-spi-1.00.a", }, { .compatible = "xlnx,xps-spi-2.00.a", }, { .compatible = "xlnx,xps-spi-2.00.b", }, {} -- cgit v1.2.3 From 2e672ab2d491713541963afca3a5967ccc2376e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 16:54:49 -0700 Subject: rcu: Avoid ->dynticks_nmi_nesting store tearing NMIs can nest, and store tearing could in theory happen on carries from one byte to the next. This commit therefore adds the WRITE_ONCE() macros preventing this. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c0ca2ccf0c..c5d960f86cf8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1103,7 +1103,8 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } - rdtp->dynticks_nmi_nesting += incby; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); barrier(); } @@ -1135,12 +1136,13 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - rdtp->dynticks_nmi_nesting -= 2; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - rdtp->dynticks_nmi_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } -- cgit v1.2.3 From a0eb22bf64a755bb162b421120b9fbe7d012b85f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 19:45:10 -0700 Subject: rcu: Reduce dyntick-idle state space Both extended-quiescent-state entry and exit first update the nesting counter and then adjust the dyntick-idle state. This means that there are four states: (1) Both nesting and dyntick idle indicate idle, (2) Nesting indicates idle but dyntick idle does not, (3) Nesting indicates non-idle and dyntick idle does not, and (4) Both nesting and dyntick idle indicate non-idle. This commit simplifies the state space by eliminating #3, reversing the order of updates on exit from extended quiescent state. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c5d960f86cf8..49f661bb8ffe 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -928,21 +928,21 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long oldval, int user) +static void rcu_eqs_exit_common(long long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), - oldval, rdtp->dynticks_nesting); + rdtp->dynticks_nesting, newval); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -967,8 +967,8 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { __this_cpu_inc(disable_rcu_irq_enter); + rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; - rcu_eqs_exit_common(oldval, user); __this_cpu_dec(disable_rcu_irq_enter); } } @@ -1037,7 +1037,7 @@ void rcu_user_exit(void) void rcu_irq_enter(void) { struct rcu_dynticks *rdtp; - long long oldval; + long long newval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1046,14 +1046,13 @@ void rcu_irq_enter(void) if (rdtp->dynticks_nmi_nesting) return; - oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (oldval) - trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); + newval = rdtp->dynticks_nesting + 1; + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); + if (rdtp->dynticks_nesting) + trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); else - rcu_eqs_exit_common(oldval, true); + rcu_eqs_exit_common(newval, true); + rdtp->dynticks_nesting++; } /* -- cgit v1.2.3 From fd581a91ac16187625ec509414d08d37827472c4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Oct 2017 21:56:20 -0700 Subject: rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation This is a code-motion-only commit that prepares to define rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_irq_exit(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 150 +++++++++++++++++++++++++++--------------------------- 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 49f661bb8ffe..419f3c38e1b6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -866,6 +866,44 @@ void rcu_user_enter(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If we are returning from the outermost NMI handler that interrupted an + * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting + * to let the RCU grace-period handling know that the CPU is back to + * being RCU-idle. + * + * If you add or remove a call to rcu_nmi_exit(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* + * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. + * (We are exiting an NMI handler, so RCU better be paying attention + * to us!) + */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); + WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); + + /* + * If the nesting level is not 1, the CPU wasn't RCU-idle, so + * leave it in non-RCU-idle state. + */ + if (rdtp->dynticks_nmi_nesting != 1) { + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ + rdtp->dynticks_nmi_nesting - 2); + return; + } + + /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ + rcu_dynticks_eqs_enter(); +} + /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle * @@ -1012,6 +1050,43 @@ void rcu_user_exit(void) } #endif /* CONFIG_NO_HZ_FULL */ +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and + * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know + * that the CPU is active. This implementation permits nested NMIs, as + * long as the nesting level does not overflow an int. (You will probably + * run out of stack space first.) + * + * If you add or remove a call to rcu_nmi_enter(), be sure to test + * with CONFIG_RCU_EQS_DEBUG=y. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + int incby = 2; + + /* Complain about underflow. */ + WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); + + /* + * If idle from RCU viewpoint, atomically increment ->dynticks + * to mark non-idle and increment ->dynticks_nmi_nesting by one. + * Otherwise, increment ->dynticks_nmi_nesting by two. This means + * if ->dynticks_nmi_nesting is equal to one, we are guaranteed + * to be in the outermost NMI handler that interrupted an RCU-idle + * period (observation due to Andy Lutomirski). + */ + if (rcu_dynticks_curr_cpu_in_eqs()) { + rcu_dynticks_eqs_exit(); + incby = 1; + } + WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ + rdtp->dynticks_nmi_nesting + incby); + barrier(); +} + /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle * @@ -1070,81 +1145,6 @@ void rcu_irq_enter_irqson(void) local_irq_restore(flags); } -/** - * rcu_nmi_enter - inform RCU of entry to NMI context - * - * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and - * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know - * that the CPU is active. This implementation permits nested NMIs, as - * long as the nesting level does not overflow an int. (You will probably - * run out of stack space first.) - * - * If you add or remove a call to rcu_nmi_enter(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_enter(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; - - /* Complain about underflow. */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); - - /* - * If idle from RCU viewpoint, atomically increment ->dynticks - * to mark non-idle and increment ->dynticks_nmi_nesting by one. - * Otherwise, increment ->dynticks_nmi_nesting by two. This means - * if ->dynticks_nmi_nesting is equal to one, we are guaranteed - * to be in the outermost NMI handler that interrupted an RCU-idle - * period (observation due to Andy Lutomirski). - */ - if (rcu_dynticks_curr_cpu_in_eqs()) { - rcu_dynticks_eqs_exit(); - incby = 1; - } - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ - rdtp->dynticks_nmi_nesting + incby); - barrier(); -} - -/** - * rcu_nmi_exit - inform RCU of exit from NMI context - * - * If we are returning from the outermost NMI handler that interrupted an - * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting - * to let the RCU grace-period handling know that the CPU is back to - * being RCU-idle. - * - * If you add or remove a call to rcu_nmi_exit(), be sure to test - * with CONFIG_RCU_EQS_DEBUG=y. - */ -void rcu_nmi_exit(void) -{ - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - - /* - * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. - * (We are exiting an NMI handler, so RCU better be paying attention - * to us!) - */ - WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); - WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); - - /* - * If the nesting level is not 1, the CPU wasn't RCU-idle, so - * leave it in non-RCU-idle state. - */ - if (rdtp->dynticks_nmi_nesting != 1) { - WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ - rdtp->dynticks_nmi_nesting - 2); - return; - } - - /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ - rcu_dynticks_eqs_enter(); -} - /** * rcu_is_watching - see if RCU thinks that the current CPU is idle * -- cgit v1.2.3 From 6136d6e48a0138f6be5bb3427dbeb0ba07a546a4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 08:28:04 -0700 Subject: rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit In preparation for merging dyntick-idle irq handling into the NMI algorithm, clamp ->dynticks_nmi_nesting value to allow for interrupts that enter but never leave and vice versa. It is important that the clamping happen outside of the extended quiescent state. Otherwise, there will be short windows where irqs and NMIs fail to convince RCU to start watching. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 2 ++ kernel/rcu/tree.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 59c471de342a..f4a411964c41 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -56,6 +56,8 @@ #define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ DYNTICK_TASK_FLAG) +#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) + /* * Grace-period counter management. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 419f3c38e1b6..142cdd4a50c9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -818,6 +818,7 @@ static void rcu_eqs_enter(bool user) struct rcu_dynticks *rdtp; rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -1008,6 +1009,7 @@ static void rcu_eqs_exit(bool user) rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; __this_cpu_dec(disable_rcu_irq_enter); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } -- cgit v1.2.3 From 695b78b548d8a26288f041e907ff17758df9e1d5 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:14:55 +0100 Subject: ASoC: fsl_ssi: AC'97 ops need regmap, clock and cleaning up on failure AC'97 ops (register read / write) need SSI regmap and clock, so they have to be set after them. We also need to set these ops back to NULL if we fail the probe. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/fsl/fsl_ssi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index f2f51e06e22c..c3a83ed0297e 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1458,12 +1458,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi_private; - - ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); - if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); - return ret; - } } else { /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1574,6 +1568,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ret; } + if (fsl_ssi_is_ac97(ssi_private)) { + ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); + if (ret) { + dev_err(&pdev->dev, "could not set AC'97 ops\n"); + goto error_ac97_ops; + } + } + ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, &ssi_private->cpu_dai_drv, 1); if (ret) { @@ -1657,6 +1659,10 @@ error_sound_card: fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); error_asoc_register: + if (fsl_ssi_is_ac97(ssi_private)) + snd_soc_set_ac97_ops(NULL); + +error_ac97_ops: if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); -- cgit v1.2.3 From 58721f5da4bcd5187566f4159a4fc88f70bf74f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 10:42:22 -0700 Subject: rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}() RCU currently uses two different mechanisms for tracking irqs and NMIs. This is unnecessary complexity: Given that NMIs can nest and given that RCU's tracking handles such nesting, the NMI tracking mechanism can also be used to track irqs. This commit therefore defines rcu_irq_enter() in terms of rcu_nmi_enter() and rcu_irq_exit() in terms of rcu_nmi_exit(). Unfortunately, callers must still distinguish between the irq and NMI functions because additional actions are taken when an irq interrupts idle or nohz_full usermode execution, and these actions cannot always be taken from NMI handlers. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 59 ++++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 142cdd4a50c9..fde0e840563f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -266,6 +266,7 @@ void rcu_bh_qs(void) static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -914,8 +915,8 @@ void rcu_nmi_exit(void) * * This code assumes that the idle loop never does anything that might * result in unbalanced calls to irq_enter() and irq_exit(). If your - * architecture violates this assumption, RCU will give you what you - * deserve, good and hard. But very infrequently and irreproducibly. + * architecture's idle loop violates this assumption, RCU will give you what + * you deserve, good and hard. But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -926,23 +927,14 @@ void rcu_nmi_exit(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting < 1); - if (rdtp->dynticks_nesting <= 1) { - rcu_eqs_enter_common(true); - } else { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); - rdtp->dynticks_nesting--; - } + if (rdtp->dynticks_nmi_nesting == 1) + rcu_prepare_for_idle(); + rcu_nmi_exit(); + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_enter(); } /* @@ -1097,12 +1089,12 @@ void rcu_nmi_enter(void) * sections can occur. The caller must have disabled interrupts. * * Note that the Linux kernel is fully capable of entering an interrupt - * handler that it never exits, for example when doing upcalls to - * user mode! This code assumes that the idle loop never does upcalls to - * user mode. If your architecture does do upcalls from the idle loop (or - * does anything else that results in unbalanced calls to the irq_enter() - * and irq_exit() functions), RCU will give you what you deserve, good - * and hard. But very infrequently and irreproducibly. + * handler that it never exits, for example when doing upcalls to user mode! + * This code assumes that the idle loop never does upcalls to user mode. + * If your architecture's idle loop does do upcalls to user mode (or does + * anything else that results in unbalanced calls to the irq_enter() and + * irq_exit() functions), RCU will give you what you deserve, good and hard. + * But very infrequently and irreproducibly. * * Use things like work queues to work around this limitation. * @@ -1113,23 +1105,14 @@ void rcu_nmi_enter(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp; - long long newval; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - rdtp = this_cpu_ptr(&rcu_dynticks); - - /* Page faults can happen in NMI handlers, so check... */ - if (rdtp->dynticks_nmi_nesting) - return; - - newval = rdtp->dynticks_nesting + 1; - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && newval == 0); - if (rdtp->dynticks_nesting) - trace_rcu_dyntick(TPS("++="), rdtp->dynticks_nesting, newval); - else - rcu_eqs_exit_common(newval, true); - rdtp->dynticks_nesting++; + if (rdtp->dynticks_nmi_nesting == 0) + rcu_dynticks_task_exit(); + rcu_nmi_enter(); + if (rdtp->dynticks_nmi_nesting == 1) + rcu_cleanup_after_idle(); } /* -- cgit v1.2.3 From 51a1fd30f13090be7750fed86cf3728afaf4e394 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 14:43:40 -0700 Subject: rcu: Make ->dynticks_nesting be a simple counter Now that ->dynticks_nesting counts only process-level dyntick-idle entry and exit, there is no need for the elaborate segmented counter with its guard fields and overflow checking. This commit therefore makes ->dynticks_nesting be a simple counter. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcu.h | 27 +-------------------------- kernel/rcu/tree.c | 40 ++++++++++++++++++++-------------------- kernel/rcu/tree.h | 1 - 3 files changed, 21 insertions(+), 47 deletions(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f4a411964c41..afe0559d1867 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -30,32 +30,7 @@ #define RCU_TRACE(stmt) #endif /* #else #ifdef CONFIG_RCU_TRACE */ -/* - * Process-level increment to ->dynticks_nesting field. This allows for - * architectures that use half-interrupts and half-exceptions from - * process context. - * - * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH - * that counts the number of process-based reasons why RCU cannot - * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE - * is the value used to increment or decrement this field. - * - * The rest of the bits could in principle be used to count interrupts, - * but this would mean that a negative-one value in the interrupt - * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. - * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK - * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. - * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon - * initial exit from idle. - */ -#define DYNTICK_TASK_NEST_WIDTH 7 -#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) -#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) -#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) -#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) -#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ - DYNTICK_TASK_FLAG) - +/* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ #define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fde0e840563f..d123474fe829 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -265,7 +265,7 @@ void rcu_bh_qs(void) #endif static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, + .dynticks_nesting = 1, .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE, .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; @@ -813,6 +813,10 @@ static void rcu_eqs_enter_common(bool user) /* * Enter an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ static void rcu_eqs_enter(bool user) { @@ -821,11 +825,11 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - (rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); - if ((rdtp->dynticks_nesting & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting == 1) rcu_eqs_enter_common(user); else - rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; + rdtp->dynticks_nesting--; } /** @@ -836,10 +840,6 @@ static void rcu_eqs_enter(bool user) * critical sections can occur in irq handlers in idle, a possibility * handled by irq_enter() and irq_exit().) * - * We crowbar the ->dynticks_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - * * If you add or remove a call to rcu_idle_enter(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -984,6 +984,10 @@ static void rcu_eqs_exit_common(long long newval, int user) /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. + * + * We crowbar the ->dynticks_nmi_nesting field to DYNTICK_IRQ_NONIDLE to + * allow for the possibility of usermode upcalls messing up our count of + * interrupt nesting level during the busy period that is just now starting. */ static void rcu_eqs_exit(bool user) { @@ -994,12 +998,12 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); - if (oldval & DYNTICK_TASK_NEST_MASK) { - rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + if (oldval) { + rdtp->dynticks_nesting++; } else { __this_cpu_inc(disable_rcu_irq_enter); - rcu_eqs_exit_common(DYNTICK_TASK_EXIT_IDLE, user); - rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rcu_eqs_exit_common(1, user); + rdtp->dynticks_nesting = 1; __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } @@ -1011,11 +1015,6 @@ static void rcu_eqs_exit(bool user) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to - * allow for the possibility of usermode upcalls messing up our count - * of interrupt nesting level during the busy period that is just - * now starting. - * * If you add or remove a call to rcu_idle_exit(), be sure to test with * CONFIG_RCU_EQS_DEBUG=y. */ @@ -1219,7 +1218,8 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 && + __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1; } /* @@ -3709,7 +3709,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); rdp->cpu = cpu; rdp->rsp = rsp; @@ -3738,7 +3738,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; + rdp->dynticks->dynticks_nesting = 1; rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 46a5d1991450..dbd7e3753bed 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -39,7 +39,6 @@ */ struct rcu_dynticks { long long dynticks_nesting; /* Track irq/process nesting level. */ - /* Process level is worth LLONG_MAX/2. */ int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ -- cgit v1.2.3 From 844ccdd7dce2c1a6ea9b437fcf8c3265b136e4a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Oct 2017 16:51:47 -0700 Subject: rcu: Eliminate rcu_irq_enter_disabled() Now that the irq path uses the rcu_nmi_{enter,exit}() algorithm, rcu_irq_enter() and rcu_irq_exit() may be used from any context. There is thus no need for rcu_irq_enter_disabled() and for the checks using it. This commit therefore eliminates rcu_irq_enter_disabled(). Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 1 - include/linux/rcutree.h | 1 - include/linux/tracepoint.h | 5 +---- kernel/rcu/tree.c | 22 ++-------------------- kernel/trace/trace.c | 11 ----------- 5 files changed, 3 insertions(+), 37 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index b3dbf9502fd0..ce9beec35e34 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -111,7 +111,6 @@ static inline void rcu_cpu_stall_reset(void) { } static inline void rcu_idle_enter(void) { } static inline void rcu_idle_exit(void) { } static inline void rcu_irq_enter(void) { } -static inline bool rcu_irq_enter_disabled(void) { return false; } static inline void rcu_irq_exit_irqson(void) { } static inline void rcu_irq_enter_irqson(void) { } static inline void rcu_irq_exit(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 37d6fd3b7ff8..fd996cdf1833 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -85,7 +85,6 @@ void rcu_irq_enter(void); void rcu_irq_exit(void); void rcu_irq_enter_irqson(void); void rcu_irq_exit_irqson(void); -bool rcu_irq_enter_disabled(void); void exit_rcu(void); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a26ffbe09e71..c94f466d57ef 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -137,11 +137,8 @@ extern void syscall_unregfunc(void); \ if (!(cond)) \ return; \ - if (rcucheck) { \ - if (WARN_ON_ONCE(rcu_irq_enter_disabled())) \ - return; \ + if (rcucheck) \ rcu_irq_enter_irqson(); \ - } \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d123474fe829..444aa2b3f24d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -270,20 +270,6 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), }; -/* - * There's a few places, currently just in the tracing infrastructure, - * that uses rcu_irq_enter() to make sure RCU is watching. But there's - * a small location where that will not even work. In those cases - * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() - * can be called. - */ -static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); - -bool rcu_irq_enter_disabled(void) -{ - return this_cpu_read(disable_rcu_irq_enter); -} - /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. @@ -792,10 +778,8 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - __this_cpu_inc(disable_rcu_irq_enter); - rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ - rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ - __this_cpu_dec(disable_rcu_irq_enter); + rdtp->dynticks_nesting = 0; + rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); /* @@ -1001,10 +985,8 @@ static void rcu_eqs_exit(bool user) if (oldval) { rdtp->dynticks_nesting++; } else { - __this_cpu_inc(disable_rcu_irq_enter); rcu_eqs_exit_common(1, user); rdtp->dynticks_nesting = 1; - __this_cpu_dec(disable_rcu_irq_enter); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..dbce1be3bab8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2682,17 +2682,6 @@ void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, if (unlikely(in_nmi())) return; - /* - * It is possible that a function is being traced in a - * location that RCU is not watching. A call to - * rcu_irq_enter() will make sure that it is, but there's - * a few internal rcu functions that could be traced - * where that wont work either. In those cases, we just - * do nothing. - */ - if (unlikely(rcu_irq_enter_disabled())) - return; - rcu_irq_enter_irqson(); __ftrace_trace_stack(buffer, flags, skip, pc, NULL); rcu_irq_exit_irqson(); -- cgit v1.2.3 From b880b8056b31288323745a13930bc45cf4c86e9d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:16:07 +0100 Subject: ASoC: fsl_ssi: serialize AC'97 register access operations AC'97 register access operations (both read and write) on SSI use a one, shared set of SSI registers for AC'97 register address and data. This means that only one such access is possible at a time and so all these operations need to be serialized. Since an AC'97 register access operation in this driver takes 100us+ let's use a mutex for this. Use this opportunity to also change a default value returned from AC'97 register read function from -1 to 0, since that's what AC'97 specs require to be returned when unknown / undefined registers are read. Signed-off-by: Maciej S. Szmigiero Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index c3a83ed0297e..424bafaf51ef 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -265,6 +266,8 @@ struct fsl_ssi_private { u32 fifo_watermark; u32 dma_maxburst; + + struct mutex ac97_reg_lock; }; /* @@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, if (reg > 0x7f) return; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 write clk_prepare_enable failed: %d\n", ret); - return; + goto ret_unlock; } lreg = reg << 12; @@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); + +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); } static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, @@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, { struct regmap *regs = fsl_ac97_data->regs; - unsigned short val = -1; + unsigned short val = 0; u32 reg_val; unsigned int lreg; int ret; + mutex_lock(&fsl_ac97_data->ac97_reg_lock); + ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { pr_err("ac97 read clk_prepare_enable failed: %d\n", ret); - return -1; + goto ret_unlock; } lreg = (reg & 0x7f) << 12; @@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, clk_disable_unprepare(fsl_ac97_data->clk); +ret_unlock: + mutex_unlock(&fsl_ac97_data->ac97_reg_lock); return val; } @@ -1569,6 +1581,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) } if (fsl_ssi_is_ac97(ssi_private)) { + mutex_init(&ssi_private->ac97_reg_lock); ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); if (ret) { dev_err(&pdev->dev, "could not set AC'97 ops\n"); @@ -1663,6 +1676,9 @@ error_asoc_register: snd_soc_set_ac97_ops(NULL); error_ac97_ops: + if (fsl_ssi_is_ac97(ssi_private)) + mutex_destroy(&ssi_private->ac97_reg_lock); + if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); @@ -1681,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev) if (ssi_private->soc->imx) fsl_ssi_imx_clean(pdev, ssi_private); - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi_private)) { snd_soc_set_ac97_ops(NULL); + mutex_destroy(&ssi_private->ac97_reg_lock); + } return 0; } -- cgit v1.2.3 From 0cab20cec0b663b7be8e2be5998d5a4113647f86 Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Sun, 19 Nov 2017 23:45:49 -0800 Subject: ASoC: pcm512x: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/codecs/snd-soc-pcm512x-spi.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Signed-off-by: Mark Brown --- sound/soc/codecs/pcm512x-spi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c index 25c63510ae15..7cdd2dc4fd79 100644 --- a/sound/soc/codecs/pcm512x-spi.c +++ b/sound/soc/codecs/pcm512x-spi.c @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { }; module_spi_driver(pcm512x_spi_driver); + +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 346cccf88319344c9f513bd85df6ae2258e8a8ea Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Mon, 20 Nov 2017 18:23:19 +0800 Subject: ASoC: rt5514: Add the sanity check for the driver_data in the resume function If the rt5514 spi driver is loaded, but the snd_soc_platform_driver is not loaded by the correct DAI settings, the NULL pointer will be gotten by snd_soc_platform_get_drvdata in the resume function. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514-spi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 2df91db765ac..ca6a90d8fc39 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev) if (device_may_wakeup(dev)) disable_irq_wake(irq); - if (rt5514_dsp->substream) { - rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf)); - if (buf[0] & RT5514_IRQ_STATUS_BIT) - rt5514_schedule_copy(rt5514_dsp); + if (rt5514_dsp) { + if (rt5514_dsp->substream) { + rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, + sizeof(buf)); + if (buf[0] & RT5514_IRQ_STATUS_BIT) + rt5514_schedule_copy(rt5514_dsp); + } } return 0; -- cgit v1.2.3 From 74231295c67ada29a4566272d8ac4886d09f3e83 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 20 Nov 2017 23:12:01 +0100 Subject: ASoC: fsl_ssi: remove duplicated flag setting in fsl_ssi_setup_reg_vals() We don't need to set CCSR_SSI_SIER_RFF0_EN / CCSR_SSI_SIER_TFE0_EN bits in reg->rx.sier / reg->tx.sier variables in a non-AC'97 mode considering we had just initialized these variables to these very values unconditionally a few lines earlier. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 424bafaf51ef..9e97a0529f37 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -600,9 +600,7 @@ static void fsl_ssi_setup_reg_vals(struct fsl_ssi_private *ssi_private) if (!fsl_ssi_is_ac97(ssi_private)) { reg->rx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_RE; - reg->rx.sier |= CCSR_SSI_SIER_RFF0_EN; reg->tx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE; - reg->tx.sier |= CCSR_SSI_SIER_TFE0_EN; } if (ssi_private->use_dma) { -- cgit v1.2.3 From aa24163b2ee5c92120e32e99b5a93143a0f4258e Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Wed, 15 Nov 2017 19:50:14 +0530 Subject: cgroup/cpuset: remove circular dependency deadlock Remove circular dependency deadlock in a scenario where hotplug of CPU is being done while there is updation in cgroup and cpuset triggered from userspace. Process A => kthreadd => Process B => Process C => Process A Process A cpu_subsys_offline(); cpu_down(); _cpu_down(); percpu_down_write(&cpu_hotplug_lock); //held cpuhp_invoke_callback(); workqueue_offline_cpu(); queue_work_on(); // unbind_work on system_highpri_wq __queue_work(); insert_work(); wake_up_worker(); flush_work(); wait_for_completion(); worker_thread(); manage_workers(); create_worker(); kthread_create_on_node(); wake_up_process(kthreadd_task); kthreadd kthreadd(); kernel_thread(); do_fork(); copy_process(); percpu_down_read(&cgroup_threadgroup_rwsem); __rwsem_down_read_failed_common(); //waiting Process B kernfs_fop_write(); cgroup_file_write(); cgroup_procs_write(); percpu_down_write(&cgroup_threadgroup_rwsem); //held cgroup_attach_task(); cgroup_migrate(); cgroup_migrate_execute(); cpuset_can_attach(); mutex_lock(&cpuset_mutex); //waiting Process C kernfs_fop_write(); cgroup_file_write(); cpuset_write_resmask(); mutex_lock(&cpuset_mutex); //held update_cpumask(); update_cpumasks_hier(); rebuild_sched_domains_locked(); get_online_cpus(); percpu_down_read(&cpu_hotplug_lock); //waiting Eliminating deadlock by reversing the locking order for cpuset_mutex and cpu_hotplug_lock. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 53 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f7efa7b4d825..cab5fd1ee767 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -812,6 +812,18 @@ done: return ndoms; } +static void cpuset_sched_change_begin(void) +{ + cpus_read_lock(); + mutex_lock(&cpuset_mutex); +} + +static void cpuset_sched_change_end(void) +{ + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); +} + /* * Rebuild scheduler domains. * @@ -821,16 +833,14 @@ done: * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * - * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; lockdep_assert_held(&cpuset_mutex); - get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid @@ -838,27 +848,25 @@ static void rebuild_sched_domains_locked(void) * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - goto out; + return; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); -out: - put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_locked(void) +static void rebuild_sched_domains_cpuslocked(void) { } #endif /* CONFIG_SMP */ void rebuild_sched_domains(void) { - mutex_lock(&cpuset_mutex); - rebuild_sched_domains_locked(); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_begin(); + rebuild_sched_domains_cpuslocked(); + cpuset_sched_change_end(); } /** @@ -944,7 +952,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) rcu_read_unlock(); if (need_rebuild_sched_domains) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); } /** @@ -1276,7 +1284,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); } return 0; @@ -1309,7 +1317,6 @@ static void update_tasks_flags(struct cpuset *cs) * * Call with cpuset_mutex held. */ - static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { @@ -1342,7 +1349,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_locked(); + rebuild_sched_domains_cpuslocked(); if (spread_flag_changed) update_tasks_flags(cs); @@ -1610,7 +1617,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0; - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -1646,7 +1653,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; } @@ -1657,7 +1664,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV; - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1670,7 +1677,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); return retval; } @@ -1709,7 +1716,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1733,7 +1740,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_trial_cpuset(trialcs); out_unlock: - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2034,14 +2041,14 @@ out_unlock: /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_locked(). + * will call rebuild_sched_domains_cpuslocked(). */ static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); - mutex_lock(&cpuset_mutex); + cpuset_sched_change_begin(); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -2049,7 +2056,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); - mutex_unlock(&cpuset_mutex); + cpuset_sched_change_end(); } static void cpuset_css_free(struct cgroup_subsys_state *css) -- cgit v1.2.3 From 1599a185f0e6113be185b9fb809c621c73865829 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Wed, 15 Nov 2017 19:50:15 +0530 Subject: cpuset: Make cpuset hotplug synchronous Convert cpuset_hotplug_workfn() into synchronous call for cpu hotplug path. For memory hotplug path it still gets queued as a work item. Since cpuset_hotplug_workfn() can be made synchronous for cpu hotplug path, it is not required to wait for cpuset hotplug while thawing processes. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 6 ------ kernel/cgroup/cpuset.c | 41 ++++++++++++++++++++--------------------- kernel/power/process.c | 2 -- kernel/sched/core.c | 1 - 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1b8e41597ef5..2ab910f85154 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -52,9 +52,7 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); -extern void cpuset_wait_for_hotplug(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -167,15 +165,11 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_force_rebuild(void) { } - static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_wait_for_hotplug(void) { } - static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index cab5fd1ee767..227bc25d951d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2277,15 +2277,8 @@ retry: mutex_unlock(&cpuset_mutex); } -static bool force_rebuild; - -void cpuset_force_rebuild(void) -{ - force_rebuild = true; -} - /** - * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset + * cpuset_hotplug - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always @@ -2300,7 +2293,7 @@ void cpuset_force_rebuild(void) * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */ -static void cpuset_hotplug_workfn(struct work_struct *work) +static void cpuset_hotplug(bool use_cpu_hp_lock) { static cpumask_t new_cpus; static nodemask_t new_mems; @@ -2358,25 +2351,31 @@ static void cpuset_hotplug_workfn(struct work_struct *work) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; - rebuild_sched_domains(); + if (cpus_updated) { + if (use_cpu_hp_lock) + rebuild_sched_domains(); + else { + /* Acquiring cpu_hotplug_lock is not required. + * When cpuset_hotplug() is called in hotplug path, + * cpu_hotplug_lock is held by the hotplug context + * which is waiting for cpuhp_thread_fun to indicate + * completion of callback. + */ + mutex_lock(&cpuset_mutex); + rebuild_sched_domains_cpuslocked(); + mutex_unlock(&cpuset_mutex); + } } } -void cpuset_update_active_cpus(void) +static void cpuset_hotplug_workfn(struct work_struct *work) { - /* - * We're inside cpu hotplug critical region which usually nests - * inside cgroup synchronization. Bounce actual hotplug processing - * to a work item to avoid reverse locking order. - */ - schedule_work(&cpuset_hotplug_work); + cpuset_hotplug(true); } -void cpuset_wait_for_hotplug(void) +void cpuset_update_active_cpus(void) { - flush_work(&cpuset_hotplug_work); + cpuset_hotplug(false); } /* diff --git a/kernel/power/process.c b/kernel/power/process.c index 7381d49a44db..c326d7235c5f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -204,8 +204,6 @@ void thaw_processes(void) __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); - cpuset_wait_for_hotplug(); - read_lock(&tasklist_lock); for_each_process_thread(g, p) { /* No other threads should have PF_SUSPEND_TASK set */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..88b3450b29ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5624,7 +5624,6 @@ static void cpuset_cpu_active(void) * restore the original sched domains by considering the * cpuset configurations. */ - cpuset_force_rebuild(); } cpuset_update_active_cpus(); } -- cgit v1.2.3 From 7d229c668a114e80d6be62b00e21a73bdd9ba7b3 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Fri, 3 Nov 2017 17:27:49 +0200 Subject: main: kernel_start: move housekeeping_init() before workqueue_init_early() This is needed in order to allow the unbound workqueue to take housekeeping cpus into accounty Signed-off-by: Tal Shorer Signed-off-by: Tejun Heo --- init/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index dfec3809e740..e96e3a14533c 100644 --- a/init/main.c +++ b/init/main.c @@ -588,6 +588,12 @@ asmlinkage __visible void __init start_kernel(void) local_irq_disable(); radix_tree_init(); + /* + * Set up housekeeping before setting up workqueues to allow the unbound + * workqueue to take non-housekeeping into account. + */ + housekeeping_init(); + /* * Allow workqueue creation and work item queueing/cancelling * early. Work item execution depends on kthreads and starts after @@ -605,7 +611,6 @@ asmlinkage __visible void __init start_kernel(void) early_irq_init(); init_IRQ(); tick_init(); - housekeeping_init(); rcu_init_nohz(); init_timers(); hrtimers_init(); -- cgit v1.2.3 From c98a9805096460567404799a7bd3149826affde7 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Fri, 3 Nov 2017 17:27:50 +0200 Subject: workqueue: respect isolated cpus when queueing an unbound work Initialize wq_unbound_cpumask to exclude cpus that were isolated by the cmdline's isolcpus parameter. Signed-off-by: Tal Shorer Signed-off-by: Tejun Heo --- kernel/workqueue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..6a5658cb46da 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -4957,6 +4958,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) return -ENOMEM; + /* + * Not excluding isolated cpus on purpose. + * If the user wishes to include them, we allow that. + */ cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); @@ -5555,7 +5560,7 @@ int __init workqueue_init_early(void) WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); -- cgit v1.2.3 From d3b0535216f04e7e149eaebe8e967c46bdf88dc3 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Fri, 17 Nov 2017 15:09:27 +0000 Subject: ASoC: da7219: Correct IRQ level in DT binding example Current DT binding documentation shows an example where the IRQ for the device is chosen to be ACTIVE_HIGH. This is incorrect as the device only supports ACTIVE_LOW, so this commit fixes that discrepancy. Signed-off-by: Adam Thomson Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/da7219.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt index cf61681826b6..5b54d2d045c3 100644 --- a/Documentation/devicetree/bindings/sound/da7219.txt +++ b/Documentation/devicetree/bindings/sound/da7219.txt @@ -77,7 +77,7 @@ Example: reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; VDD-supply = <®_audio>; VDDMIC-supply = <®_audio>; -- cgit v1.2.3 From b7926c464d6479fc62a4297ca4f48a5da5fb0988 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Fri, 17 Nov 2017 15:09:28 +0000 Subject: ASoC: da7218: Correct IRQ level in DT binding example Current DT binding documentation shows an example where the IRQ for the device is chosen to be ACTIVE_HIGH. This is incorrect as the device only supports ACTIVE_LOW, so this commit fixes that discrepancy. Signed-off-by: Adam Thomson Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/da7218.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt index 5ca5a709b6aa..3ab9dfef38d1 100644 --- a/Documentation/devicetree/bindings/sound/da7218.txt +++ b/Documentation/devicetree/bindings/sound/da7218.txt @@ -73,7 +73,7 @@ Example: compatible = "dlg,da7218"; reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; wakeup-source; VDD-supply = <®_audio>; -- cgit v1.2.3 From eaa53216c5909ae1567d15888e55d9b1d7269ca7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 00:19:33 +0000 Subject: ASoC: don't use codec hw_write on uda1380 uda1380 driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/uda1380.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 926c81ae8185..44448023edb0 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -37,7 +37,7 @@ struct uda1380_priv { struct snd_soc_codec *codec; unsigned int dac_clk; struct work_struct work; - void *control_data; + struct i2c_client *i2c; }; /* @@ -92,6 +92,7 @@ static inline void uda1380_write_reg_cache(struct snd_soc_codec *codec, static int uda1380_write(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) { + struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); u8 data[3]; /* data is @@ -111,10 +112,10 @@ static int uda1380_write(struct snd_soc_codec *codec, unsigned int reg, if (!snd_soc_codec_is_active(codec) && (reg >= UDA1380_MVOL)) return 0; pr_debug("uda1380: hw write %x val %x\n", reg, value); - if (codec->hw_write(codec->control_data, data, 3) == 3) { + if (i2c_master_send(uda1380->i2c, data, 3) == 3) { unsigned int val; - i2c_master_send(codec->control_data, data, 1); - i2c_master_recv(codec->control_data, data, 2); + i2c_master_send(uda1380->i2c, data, 1); + i2c_master_recv(uda1380->i2c, data, 2); val = (data[0]<<8) | data[1]; if (val != value) { pr_debug("uda1380: READ BACK VAL %x\n", @@ -130,6 +131,7 @@ static int uda1380_write(struct snd_soc_codec *codec, unsigned int reg, static void uda1380_sync_cache(struct snd_soc_codec *codec) { + struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); int reg; u8 data[3]; u16 *cache = codec->reg_cache; @@ -139,7 +141,7 @@ static void uda1380_sync_cache(struct snd_soc_codec *codec) data[0] = reg; data[1] = (cache[reg] & 0xff00) >> 8; data[2] = cache[reg] & 0x00ff; - if (codec->hw_write(codec->control_data, data, 3) != 3) + if (i2c_master_send(uda1380->i2c, data, 3) != 3) dev_err(codec->dev, "%s: write to reg 0x%x failed\n", __func__, reg); } @@ -148,6 +150,7 @@ static void uda1380_sync_cache(struct snd_soc_codec *codec) static int uda1380_reset(struct snd_soc_codec *codec) { struct uda1380_platform_data *pdata = codec->dev->platform_data; + struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); if (gpio_is_valid(pdata->gpio_reset)) { gpio_set_value(pdata->gpio_reset, 1); @@ -160,7 +163,7 @@ static int uda1380_reset(struct snd_soc_codec *codec) data[1] = 0; data[2] = 0; - if (codec->hw_write(codec->control_data, data, 3) != 3) { + if (i2c_master_send(uda1380->i2c, data, 3) != 3) { dev_err(codec->dev, "%s: failed\n", __func__); return -EIO; } @@ -695,9 +698,6 @@ static int uda1380_probe(struct snd_soc_codec *codec) uda1380->codec = codec; - codec->hw_write = (hw_write_t)i2c_master_send; - codec->control_data = uda1380->control_data; - if (!gpio_is_valid(pdata->gpio_power)) { ret = uda1380_reset(codec); if (ret) @@ -772,7 +772,7 @@ static int uda1380_i2c_probe(struct i2c_client *i2c, } i2c_set_clientdata(i2c, uda1380); - uda1380->control_data = i2c; + uda1380->i2c = i2c; ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_uda1380, uda1380_dai, ARRAY_SIZE(uda1380_dai)); -- cgit v1.2.3 From ce9544dc8f9a83f9e8ff08eca30821edbe51d177 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 00:19:48 +0000 Subject: ASoC: don't use codec hw_write on tlv320dac33 uda1380 driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320dac33.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c index 5b94a151539c..80a26418827c 100644 --- a/sound/soc/codecs/tlv320dac33.c +++ b/sound/soc/codecs/tlv320dac33.c @@ -121,7 +121,7 @@ struct tlv320dac33_priv { unsigned int uthr; enum dac33_state state; - void *control_data; + struct i2c_client *i2c; }; static const u8 dac33_reg[DAC33_CACHEREGNUM] = { @@ -200,7 +200,7 @@ static int dac33_read(struct snd_soc_codec *codec, unsigned int reg, /* If powered off, return the cached value */ if (dac33->chip_power) { - val = i2c_smbus_read_byte_data(codec->control_data, value[0]); + val = i2c_smbus_read_byte_data(dac33->i2c, value[0]); if (val < 0) { dev_err(codec->dev, "Read failed (%d)\n", val); value[0] = dac33_read_reg_cache(codec, reg); @@ -233,7 +233,7 @@ static int dac33_write(struct snd_soc_codec *codec, unsigned int reg, dac33_write_reg_cache(codec, data[0], data[1]); if (dac33->chip_power) { - ret = codec->hw_write(codec->control_data, data, 2); + ret = i2c_master_send(dac33->i2c, data, 2); if (ret != 2) dev_err(codec->dev, "Write failed (%d)\n", ret); else @@ -280,7 +280,7 @@ static int dac33_write16(struct snd_soc_codec *codec, unsigned int reg, if (dac33->chip_power) { /* We need to set autoincrement mode for 16 bit writes */ data[0] |= DAC33_I2C_ADDR_AUTOINC; - ret = codec->hw_write(codec->control_data, data, 3); + ret = i2c_master_send(dac33->i2c, data, 3); if (ret != 3) dev_err(codec->dev, "Write failed (%d)\n", ret); else @@ -1379,8 +1379,6 @@ static int dac33_soc_probe(struct snd_soc_codec *codec) struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec); int ret = 0; - codec->control_data = dac33->control_data; - codec->hw_write = (hw_write_t) i2c_master_send; dac33->codec = codec; /* Read the tlv320dac33 ID registers */ @@ -1499,7 +1497,7 @@ static int dac33_i2c_probe(struct i2c_client *client, if (dac33 == NULL) return -ENOMEM; - dac33->control_data = client; + dac33->i2c = client; mutex_init(&dac33->mutex); spin_lock_init(&dac33->lock); -- cgit v1.2.3 From fac3f5e20dcecc2aa03272c5d2d36d253883c6ce Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 01:04:09 +0000 Subject: ASoC: don't use codec hw_write on cx20442/omap-ams-delta cx20442/omap-ams-delta driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/cx20442.c | 23 ++++++++++------------- sound/soc/omap/ams-delta.c | 4 ++-- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index 46b1fbb66eba..2083f7eb9de2 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -26,7 +26,7 @@ struct cx20442_priv { - void *control_data; + struct tty_struct *tty; struct regulator *por; }; @@ -163,9 +163,9 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, if (reg >= codec->driver->reg_cache_size) return -EINVAL; - /* hw_write and control_data pointers required for talking to the modem + /* tty and write pointers required for talking to the modem * are expected to be set by the line discipline initialization code */ - if (!codec->hw_write || !cx20442->control_data) + if (!cx20442->tty || !cx20442->tty->ops->write) return -EIO; old = reg_cache[reg]; @@ -194,7 +194,7 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, return -ENOMEM; dev_dbg(codec->dev, "%s: %s\n", __func__, buf); - if (codec->hw_write(cx20442->control_data, buf, len) != len) + if (cx20442->tty->ops->write(cx20442->tty, buf, len) != len) return -EIO; return 0; @@ -252,8 +252,7 @@ static void v253_close(struct tty_struct *tty) cx20442 = snd_soc_codec_get_drvdata(codec); /* Prevent the codec driver from further accessing the modem */ - codec->hw_write = NULL; - cx20442->control_data = NULL; + cx20442->tty = NULL; codec->component.card->pop_time = 0; } @@ -276,12 +275,11 @@ static void v253_receive(struct tty_struct *tty, cx20442 = snd_soc_codec_get_drvdata(codec); - if (!cx20442->control_data) { + if (!cx20442->tty) { /* First modem response, complete setup procedure */ /* Set up codec driver access to modem controls */ - cx20442->control_data = tty; - codec->hw_write = (hw_write_t)tty->ops->write; + cx20442->tty = tty; codec->component.card->pop_time = 1; } } @@ -367,10 +365,9 @@ static int cx20442_codec_probe(struct snd_soc_codec *codec) cx20442->por = regulator_get(codec->dev, "POR"); if (IS_ERR(cx20442->por)) dev_warn(codec->dev, "failed to get the regulator"); - cx20442->control_data = NULL; + cx20442->tty = NULL; snd_soc_codec_set_drvdata(codec, cx20442); - codec->hw_write = NULL; codec->component.card->pop_time = 0; return 0; @@ -381,8 +378,8 @@ static int cx20442_codec_remove(struct snd_soc_codec *codec) { struct cx20442_priv *cx20442 = snd_soc_codec_get_drvdata(codec); - if (cx20442->control_data) { - struct tty_struct *tty = cx20442->control_data; + if (cx20442->tty) { + struct tty_struct *tty = cx20442->tty; tty_hangup(tty); } diff --git a/sound/soc/omap/ams-delta.c b/sound/soc/omap/ams-delta.c index d40219678700..cb72c1e57da0 100644 --- a/sound/soc/omap/ams-delta.c +++ b/sound/soc/omap/ams-delta.c @@ -105,7 +105,7 @@ static int ams_delta_set_audio_mode(struct snd_kcontrol *kcontrol, int pin, changed = 0; /* Refuse any mode changes if we are not able to control the codec. */ - if (!cx20442_codec->hw_write) + if (!cx20442_codec->component.card->pop_time) return -EUNATCH; if (ucontrol->value.enumerated.item[0] >= control->items) @@ -345,7 +345,7 @@ static void cx81801_receive(struct tty_struct *tty, if (!codec) return; - if (!codec->hw_write) { + if (!codec->component.card->pop_time) { /* First modem response, complete setup procedure */ /* Initialize timer used for config pulse generation */ -- cgit v1.2.3 From 3bd333677d6091d989068b0f7ff7aba975e62dc5 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 01:04:37 +0000 Subject: ASoC: don't use codec hw_write on twl6040 twl6040 driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/twl6040.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/twl6040.c b/sound/soc/codecs/twl6040.c index 1773ff84ee3b..a8e6941a1868 100644 --- a/sound/soc/codecs/twl6040.c +++ b/sound/soc/codecs/twl6040.c @@ -106,10 +106,12 @@ static const struct snd_pcm_hw_constraint_list sysclk_constraints[] = { { .count = ARRAY_SIZE(hp_rates), .list = hp_rates, }, }; +#define to_twl6040(codec) dev_get_drvdata((codec)->dev->parent) + static unsigned int twl6040_read(struct snd_soc_codec *codec, unsigned int reg) { struct twl6040_data *priv = snd_soc_codec_get_drvdata(codec); - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); u8 value; if (reg >= TWL6040_CACHEREGNUM) @@ -171,7 +173,7 @@ static inline void twl6040_update_dl12_cache(struct snd_soc_codec *codec, static int twl6040_write(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) { - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); if (reg >= TWL6040_CACHEREGNUM) return -EIO; @@ -572,7 +574,7 @@ EXPORT_SYMBOL_GPL(twl6040_get_trim_value); int twl6040_get_hs_step_size(struct snd_soc_codec *codec) { - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); if (twl6040_get_revid(twl6040) < TWL6040_REV_ES1_3) /* For ES under ES_1.3 HS step is 2 mV */ @@ -830,7 +832,7 @@ static const struct snd_soc_dapm_route intercon[] = { static int twl6040_set_bias_level(struct snd_soc_codec *codec, enum snd_soc_bias_level level) { - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); struct twl6040_data *priv = snd_soc_codec_get_drvdata(codec); int ret = 0; @@ -922,7 +924,7 @@ static int twl6040_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_codec *codec = dai->codec; - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); struct twl6040_data *priv = snd_soc_codec_get_drvdata(codec); int ret; @@ -964,7 +966,7 @@ static int twl6040_set_dai_sysclk(struct snd_soc_dai *codec_dai, static void twl6040_mute_path(struct snd_soc_codec *codec, enum twl6040_dai_id id, int mute) { - struct twl6040 *twl6040 = codec->control_data; + struct twl6040 *twl6040 = to_twl6040(codec); struct twl6040_data *priv = snd_soc_codec_get_drvdata(codec); int hslctl, hsrctl, earctl; int hflctl, hfrctl; @@ -1108,7 +1110,6 @@ static struct snd_soc_dai_driver twl6040_dai[] = { static int twl6040_probe(struct snd_soc_codec *codec) { struct twl6040_data *priv; - struct twl6040 *twl6040 = dev_get_drvdata(codec->dev->parent); struct platform_device *pdev = to_platform_device(codec->dev); int ret = 0; @@ -1119,7 +1120,6 @@ static int twl6040_probe(struct snd_soc_codec *codec) snd_soc_codec_set_drvdata(codec, priv); priv->codec = codec; - codec->control_data = twl6040; priv->plug_irq = platform_get_irq(pdev, 0); if (priv->plug_irq < 0) { -- cgit v1.2.3 From 866b9c81ad8450eb1e2a0132b4bd9fd909afd647 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 01:04:54 +0000 Subject: ASoC: don't use codec hw_write on max98926 max98926 driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/max98926.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98926.c b/sound/soc/codecs/max98926.c index 03d07bf4d942..7b1d1b0fa879 100644 --- a/sound/soc/codecs/max98926.c +++ b/sound/soc/codecs/max98926.c @@ -490,7 +490,7 @@ static int max98926_probe(struct snd_soc_codec *codec) struct max98926_priv *max98926 = snd_soc_codec_get_drvdata(codec); max98926->codec = codec; - codec->control_data = max98926->regmap; + /* Hi-Z all the slots */ regmap_write(max98926->regmap, MAX98926_DOUT_HIZ_CFG4, 0xF0); return 0; -- cgit v1.2.3 From 68fea7cd3f3722cf3d3b7f30e9b5667133446936 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 9 Nov 2017 01:05:11 +0000 Subject: ASoC: don't use codec hw_write on max98927 max98927 driver is using codec hw_write/control_data, but it is redundant code. This patch cleanup these Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/max98927.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/max98927.c b/sound/soc/codecs/max98927.c index a1d39353719d..f701fdc81175 100644 --- a/sound/soc/codecs/max98927.c +++ b/sound/soc/codecs/max98927.c @@ -682,7 +682,6 @@ static int max98927_probe(struct snd_soc_codec *codec) struct max98927_priv *max98927 = snd_soc_codec_get_drvdata(codec); max98927->codec = codec; - codec->control_data = max98927->regmap; /* Software Reset */ regmap_write(max98927->regmap, -- cgit v1.2.3 From c001bf633a910cfc8a5b84b80634db4636bf1724 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Nov 2017 01:04:08 +0000 Subject: ASoC: use internal reg_cache on uda1380 Codec reg_cache is legacy feature and very few driver only are using it. But, ALSA SoC framework needs to support it. Codec will be merged into Component in the future, so, let's remove legacy and unused feature from framework. This patch implements ALSA SoC reg_cache feature into driver, and don't use ALSA SoC framework's feature. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/uda1380.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 44448023edb0..46a495b4da8d 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -38,6 +38,7 @@ struct uda1380_priv { unsigned int dac_clk; struct work_struct work; struct i2c_client *i2c; + u16 *reg_cache; }; /* @@ -63,7 +64,9 @@ static unsigned long uda1380_cache_dirty; static inline unsigned int uda1380_read_reg_cache(struct snd_soc_codec *codec, unsigned int reg) { - u16 *cache = codec->reg_cache; + struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); + u16 *cache = uda1380->reg_cache; + if (reg == UDA1380_RESET) return 0; if (reg >= UDA1380_CACHEREGNUM) @@ -77,7 +80,8 @@ static inline unsigned int uda1380_read_reg_cache(struct snd_soc_codec *codec, static inline void uda1380_write_reg_cache(struct snd_soc_codec *codec, u16 reg, unsigned int value) { - u16 *cache = codec->reg_cache; + struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); + u16 *cache = uda1380->reg_cache; if (reg >= UDA1380_CACHEREGNUM) return; @@ -134,7 +138,7 @@ static void uda1380_sync_cache(struct snd_soc_codec *codec) struct uda1380_priv *uda1380 = snd_soc_codec_get_drvdata(codec); int reg; u8 data[3]; - u16 *cache = codec->reg_cache; + u16 *cache = uda1380->reg_cache; /* Sync reg_cache with the hardware */ for (reg = 0; reg < UDA1380_MVOL; reg++) { @@ -722,16 +726,9 @@ static int uda1380_probe(struct snd_soc_codec *codec) static const struct snd_soc_codec_driver soc_codec_dev_uda1380 = { .probe = uda1380_probe, - .read = uda1380_read_reg_cache, - .write = uda1380_write, .set_bias_level = uda1380_set_bias_level, .suspend_bias_off = true, - .reg_cache_size = ARRAY_SIZE(uda1380_reg), - .reg_word_size = sizeof(u16), - .reg_cache_default = uda1380_reg, - .reg_cache_step = 1, - .component_driver = { .controls = uda1380_snd_controls, .num_controls = ARRAY_SIZE(uda1380_snd_controls), @@ -771,6 +768,13 @@ static int uda1380_i2c_probe(struct i2c_client *i2c, return ret; } + uda1380->reg_cache = devm_kmemdup(&i2c->dev, + uda1380_reg, + ARRAY_SIZE(uda1380_reg) * sizeof(u16), + GFP_KERNEL); + if (!uda1380->reg_cache) + return -ENOMEM; + i2c_set_clientdata(i2c, uda1380); uda1380->i2c = i2c; -- cgit v1.2.3 From c4305af43a80158fba0d2801be9a0e774634add0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Nov 2017 01:04:25 +0000 Subject: ASoC: use internal reg_cache on tlv320dac33 Codec reg_cache is legacy feature and very few driver only are using it. But, ALSA SoC framework needs to support it. Codec will be merged into Component in the future, so, let's remove legacy and unused feature from framework. This patch implements ALSA SoC reg_cache feature into driver, and don't use ALSA SoC framework's feature. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320dac33.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c index 80a26418827c..675f5b1b90a6 100644 --- a/sound/soc/codecs/tlv320dac33.c +++ b/sound/soc/codecs/tlv320dac33.c @@ -106,6 +106,7 @@ struct tlv320dac33_priv { int mode1_latency; /* latency caused by the i2c writes in * us */ u8 burst_bclkdiv; /* BCLK divider value in burst mode */ + u8 *reg_cache; unsigned int burst_rate; /* Interface speed in Burst modes */ int keep_bclk; /* Keep the BCLK continuously running @@ -173,7 +174,8 @@ static const u8 dac33_reg[DAC33_CACHEREGNUM] = { static inline unsigned int dac33_read_reg_cache(struct snd_soc_codec *codec, unsigned reg) { - u8 *cache = codec->reg_cache; + struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec); + u8 *cache = dac33->reg_cache; if (reg >= DAC33_CACHEREGNUM) return 0; @@ -183,7 +185,8 @@ static inline unsigned int dac33_read_reg_cache(struct snd_soc_codec *codec, static inline void dac33_write_reg_cache(struct snd_soc_codec *codec, u8 reg, u8 value) { - u8 *cache = codec->reg_cache; + struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec); + u8 *cache = dac33->reg_cache; if (reg >= DAC33_CACHEREGNUM) return; @@ -243,19 +246,6 @@ static int dac33_write(struct snd_soc_codec *codec, unsigned int reg, return ret; } -static int dac33_write_locked(struct snd_soc_codec *codec, unsigned int reg, - unsigned int value) -{ - struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec); - int ret; - - mutex_lock(&dac33->mutex); - ret = dac33_write(codec, reg, value); - mutex_unlock(&dac33->mutex); - - return ret; -} - #define DAC33_I2C_ADDR_AUTOINC 0x80 static int dac33_write16(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) @@ -1432,13 +1422,9 @@ static int dac33_soc_remove(struct snd_soc_codec *codec) } static const struct snd_soc_codec_driver soc_codec_dev_tlv320dac33 = { - .read = dac33_read_reg_cache, - .write = dac33_write_locked, .set_bias_level = dac33_set_bias_level, .idle_bias_off = true, - .reg_cache_size = ARRAY_SIZE(dac33_reg), - .reg_word_size = sizeof(u8), - .reg_cache_default = dac33_reg, + .probe = dac33_soc_probe, .remove = dac33_soc_remove, @@ -1497,6 +1483,13 @@ static int dac33_i2c_probe(struct i2c_client *client, if (dac33 == NULL) return -ENOMEM; + dac33->reg_cache = devm_kmemdup(&client->dev, + dac33_reg, + ARRAY_SIZE(dac33_reg) * sizeof(u8), + GFP_KERNEL); + if (!dac33->reg_cache) + return -ENOMEM; + dac33->i2c = client; mutex_init(&dac33->mutex); spin_lock_init(&dac33->lock); -- cgit v1.2.3 From 39b5a0f80c07f41440f38761e4b8d36bf2072007 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Nov 2017 01:04:42 +0000 Subject: ASoC: cx20442: don't use reg_cache Codec reg_cache is legacy feature and very few driver only are using it. But, ALSA SoC framework needs to support it. Codec will be merged into Component in the future, so, let's remove legacy and unused feature from framework. cx20442 is using reg_cache but it is only 1byte, and it doesn't use snd_soc_write/read/update_bits function which uses reg_cache. reg_cache user is only debugfs. Let's clean reg_cache for now. But let's keep .write function since it can be used for new additional feature. .read will not be used, let's remove. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/cx20442.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index 2083f7eb9de2..6b6f8e44369b 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -88,17 +88,6 @@ static const struct snd_soc_dapm_route cx20442_audio_map[] = { {"ADC", NULL, "Input Mixer"}, }; -static unsigned int cx20442_read_reg_cache(struct snd_soc_codec *codec, - unsigned int reg) -{ - u8 *reg_cache = codec->reg_cache; - - if (reg >= codec->driver->reg_cache_size) - return -EINVAL; - - return reg_cache[reg]; -} - enum v253_vls { V253_VLS_NONE = 0, V253_VLS_T, @@ -123,6 +112,8 @@ enum v253_vls { V253_VLS_TEST, }; +#if 0 +/* FIXME : these function will be re-used */ static int cx20442_pm_to_v253_vls(u8 value) { switch (value & ~(1 << CX20442_AGC)) { @@ -199,7 +190,7 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, return 0; } - +#endif /* * Line discpline related code @@ -399,11 +390,7 @@ static const struct snd_soc_codec_driver cx20442_codec_dev = { .probe = cx20442_codec_probe, .remove = cx20442_codec_remove, .set_bias_level = cx20442_set_bias_level, - .reg_cache_default = &cx20442_reg, - .reg_cache_size = 1, - .reg_word_size = sizeof(u8), - .read = cx20442_read_reg_cache, - .write = cx20442_write, + .component_driver = { .dapm_widgets = cx20442_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(cx20442_dapm_widgets), -- cgit v1.2.3 From 93a00c467fe998bf5716cbc9cabc127046054782 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Nov 2017 01:04:58 +0000 Subject: ASoC: don't use snd_soc_write/read on twl6040 twl6040 doesn't use regmap nor reg_cache. Its write/read are done through twl6040_reg_write/read. This driver directly calling these functions, but sometimes using snd_soc_write/read. As part of cleanup, snd_soc_codec_driver::write, read will be removed soon. Then, write/read access through snd_soc_write/read will doesn't work on this driver, since it doesn't use regmap nor reg_cache. This patch replace snd_soc_write/read to twl6040_write/read. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/twl6040.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/twl6040.c b/sound/soc/codecs/twl6040.c index a8e6941a1868..3b895b4b451c 100644 --- a/sound/soc/codecs/twl6040.c +++ b/sound/soc/codecs/twl6040.c @@ -543,7 +543,7 @@ int twl6040_get_dl1_gain(struct snd_soc_codec *codec) if (snd_soc_dapm_get_pin_status(dapm, "HSOR") || snd_soc_dapm_get_pin_status(dapm, "HSOL")) { - u8 val = snd_soc_read(codec, TWL6040_REG_HSLCTL); + u8 val = twl6040_read(codec, TWL6040_REG_HSLCTL); if (val & TWL6040_HSDACMODE) /* HSDACL in LP mode */ return -8; /* -8dB */ @@ -1158,8 +1158,6 @@ static int twl6040_remove(struct snd_soc_codec *codec) static const struct snd_soc_codec_driver soc_codec_dev_twl6040 = { .probe = twl6040_probe, .remove = twl6040_remove, - .read = twl6040_read, - .write = twl6040_write, .set_bias_level = twl6040_set_bias_level, .suspend_bias_off = true, .ignore_pmdown_time = true, -- cgit v1.2.3 From 3bb0f7c31b1aedd0f85c675297031281799145d7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Nov 2017 01:05:17 +0000 Subject: ASoC: don't use snd_soc_write/read on twl4030 twl4030 doesn't use regmap nor reg_cache. Its write/read are done through twl4030_reg_write/read. This driver directly calling these functions, but sometimes using snd_soc_write/read. As part of cleanup, snd_soc_codec_driver::write, read will be removed soon. Then, write/read access through snd_soc_write/read will doesn't work on this driver, since it doesn't use regmap nor reg_cache. This patch replace snd_soc_write/read to twl4030_write/read. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/twl4030.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index 90691701b082..8798182959c1 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -850,14 +850,14 @@ static int snd_soc_get_volsw_twl4030(struct snd_kcontrol *kcontrol, int mask = (1 << fls(max)) - 1; ucontrol->value.integer.value[0] = - (snd_soc_read(codec, reg) >> shift) & mask; + (twl4030_read(codec, reg) >> shift) & mask; if (ucontrol->value.integer.value[0]) ucontrol->value.integer.value[0] = max + 1 - ucontrol->value.integer.value[0]; if (shift != rshift) { ucontrol->value.integer.value[1] = - (snd_soc_read(codec, reg) >> rshift) & mask; + (twl4030_read(codec, reg) >> rshift) & mask; if (ucontrol->value.integer.value[1]) ucontrol->value.integer.value[1] = max + 1 - ucontrol->value.integer.value[1]; @@ -908,9 +908,9 @@ static int snd_soc_get_volsw_r2_twl4030(struct snd_kcontrol *kcontrol, int mask = (1<value.integer.value[0] = - (snd_soc_read(codec, reg) >> shift) & mask; + (twl4030_read(codec, reg) >> shift) & mask; ucontrol->value.integer.value[1] = - (snd_soc_read(codec, reg2) >> shift) & mask; + (twl4030_read(codec, reg2) >> shift) & mask; if (ucontrol->value.integer.value[0]) ucontrol->value.integer.value[0] = @@ -2195,8 +2195,6 @@ static int twl4030_soc_remove(struct snd_soc_codec *codec) static const struct snd_soc_codec_driver soc_codec_dev_twl4030 = { .probe = twl4030_soc_probe, .remove = twl4030_soc_remove, - .read = twl4030_read, - .write = twl4030_write, .set_bias_level = twl4030_set_bias_level, .idle_bias_off = true, -- cgit v1.2.3 From 16a27dfd218566f9604d5542c6285395cfc6831c Mon Sep 17 00:00:00 2001 From: Albert Pool Date: Mon, 20 Nov 2017 14:20:09 +0100 Subject: ata: mediatek: Fix typo in module description Signed-off-by: Albert Pool Signed-off-by: Tejun Heo --- drivers/ata/ahci_mtk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c index 80854f71559a..489452a64303 100644 --- a/drivers/ata/ahci_mtk.c +++ b/drivers/ata/ahci_mtk.c @@ -1,5 +1,5 @@ /* - * MeidaTek AHCI SATA driver + * MediaTek AHCI SATA driver * * Copyright (c) 2017 MediaTek Inc. * Author: Ryder Lee @@ -192,5 +192,5 @@ static struct platform_driver mtk_ahci_driver = { }; module_platform_driver(mtk_ahci_driver); -MODULE_DESCRIPTION("MeidaTek SATA AHCI Driver"); +MODULE_DESCRIPTION("MediaTek SATA AHCI Driver"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From ddf7005f32212f28669032651e09bd8d2245c35d Mon Sep 17 00:00:00 2001 From: Wang Long Date: Sun, 19 Nov 2017 16:08:37 -0500 Subject: debug cgroup: use task_css_set instead of rcu_dereference This macro `task_css_set` verifies that the caller is inside proper critical section if the kernel set CONFIG_PROVE_RCU=y. Signed-off-by: Wang Long Signed-off-by: Tejun Heo --- kernel/cgroup/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c index 5f780d8f6a9d..9caeda610249 100644 --- a/kernel/cgroup/debug.c +++ b/kernel/cgroup/debug.c @@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v) spin_lock_irq(&css_set_lock); rcu_read_lock(); - cset = rcu_dereference(current->cgroups); + cset = task_css_set(current); refcnt = refcount_read(&cset->refcount); seq_printf(seq, "css_set %pK %d", cset, refcnt); if (refcnt > cset->nr_tasks) @@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) spin_lock_irq(&css_set_lock); rcu_read_lock(); - cset = rcu_dereference(current->cgroups); + cset = task_css_set(current); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; -- cgit v1.2.3 From c1da86c19ad6bfb77ceef3414c82269e8466f410 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 25 Nov 2017 15:49:49 +0530 Subject: pata_pdc2027x: Remove unnecessary error check Here, The function pdc_hardware_init always return zero. So it is not necessary to check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Tejun Heo --- drivers/ata/pata_pdc2027x.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index ffd8d33c6e0f..4a9d532bdbb8 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -649,7 +649,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host) * @host: target ATA host * @board_idx: board identifier */ -static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx) +static void pdc_hardware_init(struct ata_host *host, unsigned int board_idx) { long pll_clock; @@ -665,8 +665,6 @@ static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx) /* Adjust PLL control register */ pdc_adjust_pll(host, pll_clock, board_idx); - - return 0; } /** @@ -753,8 +751,7 @@ static int pdc2027x_init_one(struct pci_dev *pdev, //pci_enable_intx(pdev); /* initialize adapter */ - if (pdc_hardware_init(host, board_idx) != 0) - return -EIO; + pdc_hardware_init(host, board_idx); pci_set_master(pdev); return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt, @@ -778,8 +775,7 @@ static int pdc2027x_reinit_one(struct pci_dev *pdev) else board_idx = PDC_UDMA_133; - if (pdc_hardware_init(host, board_idx)) - return -EIO; + pdc_hardware_init(host, board_idx); ata_host_resume(host); return 0; -- cgit v1.2.3 From 20f9ceed72f127e4cc44c0358160e6e0118f823d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 25 Nov 2017 16:47:35 +0530 Subject: pata_pdc2027x : make pdc2027x_*_timing structures const Make these pdc2027x_*_timing structures const as it is never modified. Signed-off-by: Arvind Yadav Signed-off-by: Tejun Heo --- drivers/ata/pata_pdc2027x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index 4a9d532bdbb8..6db2e34bd52f 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -82,7 +82,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed * is issued to the device. However, if the controller clock is 133MHz, * the following tables must be used. */ -static struct pdc2027x_pio_timing { +static const struct pdc2027x_pio_timing { u8 value0, value1, value2; } pdc2027x_pio_timing_tbl[] = { { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */ @@ -92,7 +92,7 @@ static struct pdc2027x_pio_timing { { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */ }; -static struct pdc2027x_mdma_timing { +static const struct pdc2027x_mdma_timing { u8 value0, value1; } pdc2027x_mdma_timing_tbl[] = { { 0xdf, 0x5f }, /* MDMA mode 0 */ @@ -100,7 +100,7 @@ static struct pdc2027x_mdma_timing { { 0x69, 0x25 }, /* MDMA mode 2 */ }; -static struct pdc2027x_udma_timing { +static const struct pdc2027x_udma_timing { u8 value0, value1, value2; } pdc2027x_udma_timing_tbl[] = { { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */ -- cgit v1.2.3 From abee210500ed15a22787009d9210b9a34911afcc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 27 Nov 2017 15:51:04 -0500 Subject: percpu: hack to let the CRIS architecture to boot until they clean up Commit 438a506180 ("percpu: don't forget to free the temporary struct pcpu_alloc_info") uncovered a problem on the CRIS architecture where the bootmem allocator is initialized with virtual addresses. Given it has: #define __va(x) ((void *)((unsigned long)(x) | 0x80000000)) then things just work out because the end result is the same whether you give this a physical or a virtual address. Untill you call memblock_free_early(__pa(address)) that is, because values from __pa() don't match with the virtual addresses stuffed in the bootmem allocator anymore. Avoid freeing the temporary pcpu_alloc_info memory on that architecture until they fix things up to let the kernel boot like it did before. Signed-off-by: Nicolas Pitre Signed-off-by: Tejun Heo Fixes: 438a506180 ("percpu: don't forget to free the temporary struct pcpu_alloc_info") --- mm/percpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/percpu.c b/mm/percpu.c index 79e3549cab0f..50e7fdf84055 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -2719,7 +2719,11 @@ void __init setup_per_cpu_areas(void) if (pcpu_setup_first_chunk(ai, fc) < 0) panic("Failed to initialize percpu areas."); +#ifdef CONFIG_CRIS +#warning "the CRIS architecture has physical and virtual addresses confused" +#else pcpu_free_alloc_info(ai); +#endif } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 8b1836c4b64386e9bc580438cae386ed31a43ab9 Mon Sep 17 00:00:00 2001 From: Jay Elliott Date: Wed, 15 Nov 2017 15:01:13 -0800 Subject: netfilter: conntrack: clamp timeouts to INT_MAX When the conntracking code multiplies a timeout by HZ, it can overflow from positive to negative; this causes it to instantly expire. To protect against this the multiplication is done in 64-bit so we can prevent it from exceeding INT_MAX. Signed-off-by: Jay Elliott Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59c08997bfdf..66d72a8fa87f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1566,9 +1566,11 @@ static int ctnetlink_change_helper(struct nf_conn *ct, static int ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) { - u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); + u64 timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; - ct->timeout = nfct_time_stamp + timeout * HZ; + if (timeout > INT_MAX) + timeout = INT_MAX; + ct->timeout = nfct_time_stamp + (u32)timeout; if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; @@ -1768,6 +1770,7 @@ ctnetlink_create_conntrack(struct net *net, int err = -EINVAL; struct nf_conntrack_helper *helper; struct nf_conn_tstamp *tstamp; + u64 timeout; ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) @@ -1776,7 +1779,10 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; + timeout = (u64)ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; + if (timeout > INT_MAX) + timeout = INT_MAX; + ct->timeout = (u32)timeout + nfct_time_stamp; rcu_read_lock(); if (cda[CTA_HELP]) { -- cgit v1.2.3 From a91d7fb97092d6b840af5899ded3b389603fd7f1 Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 28 Nov 2017 16:05:13 +0900 Subject: ASoC: rsnd: ssiu: clear SSI_MODE for non TDM Extended modes register SSI_MODE is set when SSI works in TDM Extended, but it isn't reset when SSI starts to work in other modes, thus causes issues. This patch clearss SSI_MODE register when SSI works in modes other than TDM Extended. Fixes: 186fadc132f0 ("ASoC: rsnd: add TDM Extend Mode support") Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssiu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4d948757d300..6ff8a36c2c82 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, { int hdmi = rsnd_ssi_hdmi_port(io); int ret; + u32 mode = 0; ret = rsnd_ssiu_init(mod, io, priv); if (ret < 0) @@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod, * see * rsnd_ssi_config_init() */ - rsnd_mod_write(mod, SSI_MODE, 0x1); + mode = 0x1; } + rsnd_mod_write(mod, SSI_MODE, mode); + if (rsnd_ssi_use_busif(io)) { rsnd_mod_write(mod, SSI_BUSIF_ADINR, rsnd_get_adinr_bit(mod, io) | -- cgit v1.2.3 From 52cf373c37a684f8fc279d541307fad39d206376 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 28 Nov 2017 13:59:25 +0100 Subject: cgroup: properly init u64_stats Lockdep complains that the stats update is trying to register a non-static key. This is because u64_stats are using a seqlock on 32bit arches, which needs to be initialized before usage. Fixes: 041cd640b2f3 (cgroup: Implement cgroup2 basic CPU usage accounting) Signed-off-by: Lucas Stach Signed-off-by: Tejun Heo --- kernel/cgroup/stat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c index 133b465691d6..1e111dd455c4 100644 --- a/kernel/cgroup/stat.c +++ b/kernel/cgroup/stat.c @@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp) } /* ->updated_children list is self terminated */ - for_each_possible_cpu(cpu) - cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp; + for_each_possible_cpu(cpu) { + struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu); + + cstat->updated_children = cgrp; + u64_stats_init(&cstat->sync); + } prev_cputime_init(&cgrp->stat.prev_cputime); -- cgit v1.2.3 From ef2e8175eb19011f756469d4d14f4207bf7f289c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:27:09 +0000 Subject: ASoC: add snd_soc_disconnect_sync() Now, we have snd_card_disconnect_sync() on ALSA framework. snd_soc_disconnect_sync() is ASoC version of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 2 ++ sound/soc/soc-core.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/sound/soc.h b/include/sound/soc.h index 1a7323238c49..22f479e1da0a 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -494,6 +494,8 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num); int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num); #endif +void snd_soc_disconnect_sync(struct device *dev); + struct snd_pcm_substream *snd_soc_get_dai_substream(struct snd_soc_card *card, const char *dai_link, int stream); struct snd_soc_pcm_runtime *snd_soc_get_pcm_runtime(struct snd_soc_card *card, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0edac80df34..9047046c749c 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1392,6 +1392,16 @@ static int soc_init_dai_link(struct snd_soc_card *card, return 0; } +void snd_soc_disconnect_sync(struct device *dev) +{ + struct snd_soc_component *component = snd_soc_lookup_component(dev, NULL); + + if (!component || !component->card) + return; + + snd_card_disconnect_sync(component->card->snd_card); +} + /** * snd_soc_add_dai_link - Add a DAI link dynamically * @card: The ASoC card to which the DAI link is added -- cgit v1.2.3 From 180d9ef58104dfae78622d01910b9b7756701134 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 6 Nov 2017 08:41:37 +0000 Subject: ASoC: rsnd: call snd_soc_disconnect_sync() when remove Renesas R-Car sound driver should be stopped if unbinded during playbacking/capturing. Otherwise clock open/close counter mismatch happen. One note is that we can't skip from remove function (= return -Exxx) in such case if user used unbind. Because unbind function doesn't check return value from each driver's remove function. This means we must to stop and remove driver in remove function. Now ASoC has snd_soc_disconnect_sync() for this purpose. Let's use it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index f12a88a21dfa..bd64dc6ec1c3 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1496,6 +1496,8 @@ static int rsnd_remove(struct platform_device *pdev) }; int ret = 0, i; + snd_soc_disconnect_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); for_each_rsnd_dai(rdai, priv, i) { -- cgit v1.2.3 From a78841479696efa557a2251c86462c936a208f90 Mon Sep 17 00:00:00 2001 From: Sunny Luo Date: Tue, 28 Nov 2017 21:29:24 +0800 Subject: spi: meson-axg: update compatible string for the Meson-AXG Update the compatbile string to support Meson-AXG SoCs. Signed-off-by: Sunny Luo Signed-off-by: Yixun Lan Acked-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-meson.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/spi/spi-meson.txt b/Documentation/devicetree/bindings/spi/spi-meson.txt index 825c39cae74a..b7f5e86fed22 100644 --- a/Documentation/devicetree/bindings/spi/spi-meson.txt +++ b/Documentation/devicetree/bindings/spi/spi-meson.txt @@ -27,7 +27,9 @@ The Meson SPICC is generic SPI controller for general purpose Full-Duplex communications with dedicated 16 words RX/TX PIO FIFOs. Required properties: - - compatible: should be "amlogic,meson-gx-spicc" on Amlogic GX SoCs. + - compatible: should be: + "amlogic,meson-gx-spicc" on Amlogic GX and compatible SoCs. + "amlogic,meson-axg-spicc" on Amlogic AXG and compatible SoCs - reg: physical base address and length of the controller registers - interrupts: The interrupt specifier - clock-names: Must contain "core" -- cgit v1.2.3 From a5db27c00da37654ba518b814925d4e9cd05259c Mon Sep 17 00:00:00 2001 From: Sunny Luo Date: Tue, 28 Nov 2017 21:29:25 +0800 Subject: spi: meson-axg: add SPICC driver support Add new compatible string to support SPICC controller which found at Amlogic Meson-AXG SoC. This is aiming at adding a couple of enhanced feature patches. Signed-off-by: Sunny Luo Signed-off-by: Yixun Lan Signed-off-by: Mark Brown --- drivers/spi/spi-meson-spicc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index 7f8429635502..5c82910e3480 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -599,6 +599,7 @@ static int meson_spicc_remove(struct platform_device *pdev) static const struct of_device_id meson_spicc_of_match[] = { { .compatible = "amlogic,meson-gx-spicc", }, + { .compatible = "amlogic,meson-axg-spicc", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, meson_spicc_of_match); -- cgit v1.2.3 From bd2b879a1ca55486fdb9dcac691bfd3dd79c83d6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 12:29:01 -0700 Subject: rcu: Add tracing to irq/NMI dyntick-idle transitions Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 14 ++++++++------ kernel/rcu/tree.c | 6 ++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 59d40c454aa0..4674b21247f7 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -421,16 +421,18 @@ TRACE_EVENT(rcu_fqs, /* * Tracepoint for dyntick-idle entry/exit events. These take a string - * as argument: "Start" for entering dyntick-idle mode, "End" for - * leaving it, "--=" for events moving towards idle, and "++=" for events - * moving away from idle. "Error on entry: not idle task" and "Error on - * exit: not idle task" indicate that a non-idle task is erroneously + * as argument: "Start" for entering dyntick-idle mode, "Startirq" for + * entering it from irq/NMI, "End" for leaving it, "Endirq" for leaving it + * to irq/NMI, "--=" for events moving towards idle, and "++=" for events + * moving away from idle. "Error on entry: not idle task" and "Error + * on exit: not idle task" indicate that a non-idle task is erroneously * toying with the idle loop. * * These events also take a pair of numbers, which indicate the nesting * depth before and after the event of interest. Note that task-related - * events use the upper bits of each number, while interrupt-related - * events use the lower bits. + * and interrupt-related events use two separate counters, and that the + * "++=" and "--=" events for irq/NMI will change the counter by two, + * otherwise by one. */ TRACE_EVENT(rcu_dyntick, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 444aa2b3f24d..d069ba2d8412 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -880,12 +880,15 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting - 2); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -1057,6 +1060,9 @@ void rcu_nmi_enter(void) rcu_dynticks_eqs_exit(); incby = 1; } + trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), + rdtp->dynticks_nmi_nesting, + rdtp->dynticks_nmi_nesting + incby); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 84585aa8b6ad24e5bdfba9db4a320a6aeed192ab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 15:55:16 -0700 Subject: rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long Because the ->dynticks_nesting field now only contains the process-based nesting level instead of a value encoding both the process nesting level and the irq "nesting" level, we no longer need a long long, even on 32-bit systems. This commit therefore changes both the ->dynticks_nesting and ->dynticks_nmi_nesting fields to long. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 8 ++++---- kernel/rcu/rcu.h | 2 +- kernel/rcu/tree.c | 6 +++--- kernel/rcu/tree.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 4674b21247f7..b0a48231ea0e 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -436,14 +436,14 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(const char *polarity, long long oldnesting, long long newnesting), + TP_PROTO(const char *polarity, long oldnesting, long newnesting), TP_ARGS(polarity, oldnesting, newnesting), TP_STRUCT__entry( __field(const char *, polarity) - __field(long long, oldnesting) - __field(long long, newnesting) + __field(long, oldnesting) + __field(long, newnesting) ), TP_fast_assign( @@ -452,7 +452,7 @@ TRACE_EVENT(rcu_dyntick, __entry->newnesting = newnesting; ), - TP_printk("%s %llx %llx", __entry->polarity, + TP_printk("%s %lx %lx", __entry->polarity, __entry->oldnesting, __entry->newnesting) ); diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index afe0559d1867..6334f2c1abd0 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -31,7 +31,7 @@ #endif /* #else #ifdef CONFIG_RCU_TRACE */ /* Offset to allow for unmatched rcu_irq_{enter,exit}(). */ -#define DYNTICK_IRQ_NONIDLE ((INT_MAX / 2) + 1) +#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d069ba2d8412..92de3bacda07 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -946,7 +946,7 @@ void rcu_irq_exit_irqson(void) * we really have exited idle, and must do the appropriate accounting. * The caller must have disabled interrupts. */ -static void rcu_eqs_exit_common(long long newval, int user) +static void rcu_eqs_exit_common(long newval, int user) { RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) @@ -979,7 +979,7 @@ static void rcu_eqs_exit_common(long long newval, int user) static void rcu_eqs_exit(bool user) { struct rcu_dynticks *rdtp; - long long oldval; + long oldval; lockdep_assert_irqs_disabled(); rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1043,7 +1043,7 @@ void rcu_user_exit(void) void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); - int incby = 2; + long incby = 2; /* Complain about underflow. */ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index dbd7e3753bed..6488a3b0e729 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -38,8 +38,8 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - long long dynticks_nesting; /* Track irq/process nesting level. */ - int dynticks_nmi_nesting; /* Track NMI nesting level. */ + long dynticks_nesting; /* Track process nesting level. */ + long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index db85ca3975f1..e94e754464cd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1687,7 +1687,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); delta = rdp->mynode->gpnum - rdp->rcu_iw_gpnum; - pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%ld softirq=%u/%u fqs=%ld %s\n", cpu, "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], -- cgit v1.2.3 From dec98900eae1e22467182e58688abe5fae98bd5f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 16:24:29 -0700 Subject: rcu: Add ->dynticks field to rcu_dyntick trace event Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 13 ++++++++----- kernel/rcu/tree.c | 16 +++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index b0a48231ea0e..d103de9f8c10 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -436,24 +436,27 @@ TRACE_EVENT(rcu_fqs, */ TRACE_EVENT(rcu_dyntick, - TP_PROTO(const char *polarity, long oldnesting, long newnesting), + TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks), - TP_ARGS(polarity, oldnesting, newnesting), + TP_ARGS(polarity, oldnesting, newnesting, dynticks), TP_STRUCT__entry( __field(const char *, polarity) __field(long, oldnesting) __field(long, newnesting) + __field(int, dynticks) ), TP_fast_assign( __entry->polarity = polarity; __entry->oldnesting = oldnesting; __entry->newnesting = newnesting; + __entry->dynticks = atomic_read(&dynticks); ), - TP_printk("%s %lx %lx", __entry->polarity, - __entry->oldnesting, __entry->newnesting) + TP_printk("%s %lx %lx %#3x", __entry->polarity, + __entry->oldnesting, __entry->newnesting, + __entry->dynticks & 0xfff) ); /* @@ -801,7 +804,7 @@ TRACE_EVENT(rcu_barrier, grplo, grphi, gp_tasks) do { } \ while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) -#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) +#define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) #define trace_rcu_prep_idle(reason) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 92de3bacda07..5febb76809f6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -761,13 +761,13 @@ static void rcu_eqs_enter_common(bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0); + trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -880,15 +880,14 @@ void rcu_nmi_exit(void) * leave it in non-RCU-idle state. */ if (rdtp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting - 2); + trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ rdtp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0); + trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ rcu_dynticks_eqs_enter(); } @@ -953,14 +952,13 @@ static void rcu_eqs_exit_common(long newval, int user) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - trace_rcu_dyntick(TPS("Error on exit: not idle task"), - rdtp->dynticks_nesting, newval); + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); rcu_ftrace_dump(DUMP_ORIG); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", current->pid, current->comm, @@ -1062,7 +1060,7 @@ void rcu_nmi_enter(void) } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdtp->dynticks_nmi_nesting, - rdtp->dynticks_nmi_nesting + incby); + rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdtp->dynticks_nmi_nesting + incby); barrier(); -- cgit v1.2.3 From 914955e18ca09fc404d7fc3614bb04c96a03692c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 13:50:57 -0700 Subject: rcu: Stop duplicating lockdep checks in RCU's idle-entry code The three RCU_LOCKDEP_WARN() calls in rcu_eqs_enter_common() are redundant with other lockdep checks, so this commit removes them. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5febb76809f6..80cada11f544 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -781,17 +781,6 @@ static void rcu_eqs_enter_common(bool user) rdtp->dynticks_nesting = 0; rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); - - /* - * It is illegal to enter an extended quiescent state while - * in an RCU read-side critical section. - */ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); } /* -- cgit v1.2.3 From 2342172fd6c148506456862d795c7f155baf6797 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 15:03:10 -0700 Subject: rcu: Avoid ->dynticks_nesting store tearing Although ->dynticks_nesting is updated only by process level, it is accessed from hardirq to check for interrupt-from-idle quiescent states. Store tearing is thus possible, so this commit applies WRITE_ONCE() to ->dynticks_nesting stores. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 80cada11f544..b2ded4d436c6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -778,7 +778,7 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - rdtp->dynticks_nesting = 0; + WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ rcu_dynticks_eqs_enter(); rcu_dynticks_task_enter(); } @@ -976,7 +976,7 @@ static void rcu_eqs_exit(bool user) rdtp->dynticks_nesting++; } else { rcu_eqs_exit_common(1, user); - rdtp->dynticks_nesting = 1; + WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } } @@ -3713,7 +3713,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !init_nocb_callback_list(rdp)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rdp->dynticks->dynticks_nesting = 1; + rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ -- cgit v1.2.3 From 215bba9f59e35e64b9936da62632b2fa3ede647c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:37:03 -0700 Subject: rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter() There is now only one call to rcu_eqs_enter_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2ded4d436c6..5c8a5796c71f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -749,16 +749,27 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * rcu_eqs_enter_common - current CPU is entering an extended quiescent state + * Enter an RCU extended quiescent state, which can be either the + * idle loop or adaptive-tickless usermode execution. * - * Enter idle, doing appropriate accounting. The caller must have - * disabled interrupts. + * We crowbar the ->dynticks_nmi_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period. */ -static void rcu_eqs_enter_common(bool user) +static void rcu_eqs_enter(bool user) { struct rcu_state *rsp; struct rcu_data *rdp; - struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + struct rcu_dynticks *rdtp; + + rdtp = this_cpu_ptr(&rcu_dynticks); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); + if (rdtp->dynticks_nesting != 1) { + rdtp->dynticks_nesting--; + return; + } lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); @@ -783,28 +794,6 @@ static void rcu_eqs_enter_common(bool user) rcu_dynticks_task_enter(); } -/* - * Enter an RCU extended quiescent state, which can be either the - * idle loop or adaptive-tickless usermode execution. - * - * We crowbar the ->dynticks_nmi_nesting field to zero to allow for - * the possibility of usermode upcalls having messed up our count - * of interrupt nesting level during the prior busy period. - */ -static void rcu_eqs_enter(bool user) -{ - struct rcu_dynticks *rdtp; - - rdtp = this_cpu_ptr(&rcu_dynticks); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); - WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - rdtp->dynticks_nesting == 0); - if (rdtp->dynticks_nesting == 1) - rcu_eqs_enter_common(user); - else - rdtp->dynticks_nesting--; -} - /** * rcu_idle_enter - inform RCU that current CPU is entering idle * -- cgit v1.2.3 From 9dd238e28640d5514bbd0ff2d425f32409981d85 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 16:56:26 -0700 Subject: rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit() There is now only one call to rcu_eqs_exit_common() and there is no other reason to keep it separate. This commit therefore inlines it into its sole call site, saving a few lines of code in the process. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5c8a5796c71f..46a8e06bf03e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -916,34 +916,6 @@ void rcu_irq_exit_irqson(void) local_irq_restore(flags); } -/* - * rcu_eqs_exit_common - current CPU moving away from extended quiescent state - * - * If the new value of the ->dynticks_nesting counter was previously zero, - * we really have exited idle, and must do the appropriate accounting. - * The caller must have disabled interrupts. - */ -static void rcu_eqs_exit_common(long newval, int user) -{ - RCU_TRACE(struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);) - - rcu_dynticks_task_exit(); - rcu_dynticks_eqs_exit(); - rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, newval, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } -} - /* * Exit an RCU extended quiescent state, which can be either the * idle loop or adaptive-tickless usermode execution. @@ -963,11 +935,25 @@ static void rcu_eqs_exit(bool user) WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval) { rdtp->dynticks_nesting++; - } else { - rcu_eqs_exit_common(1, user); - WRITE_ONCE(rdtp->dynticks_nesting, 1); - WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); + return; + } + rcu_dynticks_task_exit(); + rcu_dynticks_eqs_exit(); + rcu_cleanup_after_idle(); + trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); + + trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); + rcu_ftrace_dump(DUMP_ORIG); + WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", + current->pid, current->comm, + idle->pid, idle->comm); /* must be idle task! */ } + WRITE_ONCE(rdtp->dynticks_nesting, 1); + WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } /** -- cgit v1.2.3 From e68bbb266dcfed201d8d54a2828ef820d747f083 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Oct 2017 19:55:31 -0700 Subject: rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code The code that checks for non-idle non-nohz_idle-usermode tasks invoking rcu_eqs_enter() and rcu_eqs_exit() prints a considerable quantity of helpful information. However, these checks fire rarely, so the extra complexity is no longer worth it. This commit therefore replaces this debug code with simple WARN_ON_ONCE() statements. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 12 +++++------- kernel/rcu/tree.c | 24 ++---------------------- 2 files changed, 7 insertions(+), 29 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d103de9f8c10..adf47c635c8e 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -424,15 +424,13 @@ TRACE_EVENT(rcu_fqs, * as argument: "Start" for entering dyntick-idle mode, "Startirq" for * entering it from irq/NMI, "End" for leaving it, "Endirq" for leaving it * to irq/NMI, "--=" for events moving towards idle, and "++=" for events - * moving away from idle. "Error on entry: not idle task" and "Error - * on exit: not idle task" indicate that a non-idle task is erroneously - * toying with the idle loop. + * moving away from idle. * * These events also take a pair of numbers, which indicate the nesting - * depth before and after the event of interest. Note that task-related - * and interrupt-related events use two separate counters, and that the - * "++=" and "--=" events for irq/NMI will change the counter by two, - * otherwise by one. + * depth before and after the event of interest, and a third number that is + * the ->dynticks counter. Note that task-related and interrupt-related + * events use two separate counters, and that the "++=" and "--=" events + * for irq/NMI will change the counter by two, otherwise by one. */ TRACE_EVENT(rcu_dyntick, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 46a8e06bf03e..4d374d2bc925 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -773,17 +773,7 @@ static void rcu_eqs_enter(bool user) lockdep_assert_irqs_disabled(); trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on entry: not idle task"), rdtp->dynticks_nesting, 0, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); do_nocb_deferred_wakeup(rdp); @@ -941,17 +931,7 @@ static void rcu_eqs_exit(bool user) rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && - !user && !is_idle_task(current)) { - struct task_struct *idle __maybe_unused = - idle_task(smp_processor_id()); - - trace_rcu_dyntick(TPS("Error on exit: not idle task"), rdtp->dynticks_nesting, 1, rdtp->dynticks); - rcu_ftrace_dump(DUMP_ORIG); - WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", - current->pid, current->comm, - idle->pid, idle->comm); /* must be idle task! */ - } + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdtp->dynticks_nesting, 1); WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); } -- cgit v1.2.3 From 3af3999b9a325d462c9353389b7507c4b7bc5428 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Oct 2017 13:48:14 -0700 Subject: doc: Update dyntick-idle design documentation for NMI/irq consolidation Signed-off-by: Paul E. McKenney --- .../Design/Data-Structures/Data-Structures.html | 46 +++++++++++++++------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 38d6d800761f..1ac011de606e 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1182,8 +1182,8 @@ CPU (and from tracing) unless otherwise stated. Its fields are as follows:
-  1   int dynticks_nesting;
-  2   int dynticks_nmi_nesting;
+  1   long dynticks_nesting;
+  2   long dynticks_nmi_nesting;
   3   atomic_t dynticks;
   4   bool rcu_need_heavy_qs;
   5   unsigned long rcu_qs_ctr;
@@ -1191,15 +1191,31 @@ Its fields are as follows:
 

The ->dynticks_nesting field counts the -nesting depth of normal interrupts. -In addition, this counter is incremented when exiting dyntick-idle -mode and decremented when entering it. +nesting depth of process execution, so that in normal circumstances +this counter has value zero or one. +NMIs, irqs, and tracers are counted by the ->dynticks_nmi_nesting +field. +Because NMIs cannot be masked, changes to this variable have to be +undertaken carefully using an algorithm provided by Andy Lutomirski. +The initial transition from idle adds one, and nested transitions +add two, so that a nesting level of five is represented by a +->dynticks_nmi_nesting value of nine. This counter can therefore be thought of as counting the number of reasons why this CPU cannot be permitted to enter dyntick-idle -mode, aside from non-maskable interrupts (NMIs). -NMIs are counted by the ->dynticks_nmi_nesting -field, except that NMIs that interrupt non-dyntick-idle execution -are not counted. +mode, aside from process-level transitions. + +

However, it turns out that when running in non-idle kernel context, +the Linux kernel is fully capable of entering interrupt handlers that +never exit and perhaps also vice versa. +Therefore, whenever the ->dynticks_nesting field is +incremented up from zero, the ->dynticks_nmi_nesting field +is set to a large positive number, and whenever the +->dynticks_nesting field is decremented down to zero, +the the ->dynticks_nmi_nesting field is set to zero. +Assuming that the number of misnested interrupts is not sufficient +to overflow the counter, this approach corrects the +->dynticks_nmi_nesting field every time the corresponding +CPU enters the idle loop from process context.

The ->dynticks field counts the corresponding CPU's transitions to and from dyntick-idle mode, so that this counter @@ -1231,14 +1247,16 @@ in response.   Quick Quiz: - Why not just count all NMIs? - Wouldn't that be simpler and less error prone? + Why not simply combine the ->dynticks_nesting + and ->dynticks_nmi_nesting counters into a + single counter that just counts the number of reasons that + the corresponding CPU is non-idle? Answer: - It seems simpler only until you think hard about how to go about - updating the rcu_dynticks structure's - ->dynticks field. + Because this would fail in the presence of interrupts whose + handlers never return and of handlers that manage to return + from a made-up interrupt.   -- cgit v1.2.3 From 584c005951866792d552f21f7445e8104ce10f9c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 12 Oct 2017 18:12:57 -0400 Subject: tracing, rcu: Remove no longer used trace event rcu_prep_idle Commit c0f4dfd4f90 ("rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks") removed the only instances of trace_rcu_prep_idle, but did not remove the TRACE_EVENT() that creates it. As defined trace events take up memory within the kernel even when they are not used, this is a waste of space. Remove the obsolete event. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index adf47c635c8e..9bafeaf4e0e0 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -457,45 +457,6 @@ TRACE_EVENT(rcu_dyntick, __entry->dynticks & 0xfff) ); -/* - * Tracepoint for RCU preparation for idle, the goal being to get RCU - * processing done so that the current CPU can shut off its scheduling - * clock and enter dyntick-idle mode. One way to accomplish this is - * to drain all RCU callbacks from this CPU, and the other is to have - * done everything RCU requires for the current grace period. In this - * latter case, the CPU will be awakened at the end of the current grace - * period in order to process the remainder of its callbacks. - * - * These tracepoints take a string as argument: - * - * "No callbacks": Nothing to do, no callbacks on this CPU. - * "In holdoff": Nothing to do, holding off after unsuccessful attempt. - * "Begin holdoff": Attempt failed, don't retry until next jiffy. - * "Dyntick with callbacks": Entering dyntick-idle despite callbacks. - * "Dyntick with lazy callbacks": Entering dyntick-idle w/lazy callbacks. - * "More callbacks": Still more callbacks, try again to clear them out. - * "Callbacks drained": All callbacks processed, off to dyntick idle! - * "Timer": Timer fired to cause CPU to continue processing callbacks. - * "Demigrate": Timer fired on wrong CPU, woke up correct CPU. - * "Cleanup after idle": Idle exited, timer canceled. - */ -TRACE_EVENT(rcu_prep_idle, - - TP_PROTO(const char *reason), - - TP_ARGS(reason), - - TP_STRUCT__entry( - __field(const char *, reason) - ), - - TP_fast_assign( - __entry->reason = reason; - ), - - TP_printk("%s", __entry->reason) -); - /* * Tracepoint for the registration of a single RCU callback function. * The first argument is the type of RCU, the second argument is @@ -803,7 +764,6 @@ TRACE_EVENT(rcu_barrier, while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0) -#define trace_rcu_prep_idle(reason) do { } while (0) #define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ do { } while (0) -- cgit v1.2.3 From d633198088bd9e358566c470ed182994403acc7a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Oct 2017 13:52:30 -0700 Subject: srcu: Prohibit call_srcu() use under raw spinlocks Invoking queue_delayed_work() while holding a raw spinlock is forbidden in -rt kernels, which is exactly what __call_srcu() does, indirectly via srcu_funnel_gp_start(). This commit therefore downgrades Tree SRCU's locking from raw to non-raw spinlocks, which works because call_srcu() is not ever called while holding a raw spinlock. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- include/linux/srcutree.h | 8 ++-- kernel/rcu/srcutree.c | 109 +++++++++++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 45 deletions(-) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index a949f4f9e4d7..4eda108abee0 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -40,7 +40,7 @@ struct srcu_data { unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ /* Update-side state. */ - raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; + spinlock_t __private lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ @@ -58,7 +58,7 @@ struct srcu_data { * Node in SRCU combining tree, similar in function to rcu_data. */ struct srcu_node { - raw_spinlock_t __private lock; + spinlock_t __private lock; unsigned long srcu_have_cbs[4]; /* GP seq for children */ /* having CBs, but only */ /* is > ->srcu_gq_seq. */ @@ -78,7 +78,7 @@ struct srcu_struct { struct srcu_node *level[RCU_NUM_LVLS + 1]; /* First node at each level. */ struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ - raw_spinlock_t __private lock; /* Protect counters */ + spinlock_t __private lock; /* Protect counters */ struct mutex srcu_gp_mutex; /* Serialize GP work. */ unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ @@ -107,7 +107,7 @@ struct srcu_struct { #define __SRCU_STRUCT_INIT(name) \ { \ .sda = &name##_srcu_data, \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 6d5880089ff6..d5cea81378cc 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -53,6 +53,33 @@ static void srcu_invoke_callbacks(struct work_struct *work); static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); static void process_srcu(struct work_struct *work); +/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ +#define spin_lock_rcu_node(p) \ +do { \ + spin_lock(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) + +#define spin_lock_irq_rcu_node(p) \ +do { \ + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_irq_rcu_node(p) \ + spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) + +#define spin_lock_irqsave_rcu_node(p, flags) \ +do { \ + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ + smp_mb__after_unlock_lock(); \ +} while (0) + +#define spin_unlock_irqrestore_rcu_node(p, flags) \ + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ + /* * Initialize SRCU combining tree. Note that statically allocated * srcu_struct structures might already have srcu_read_lock() and @@ -77,7 +104,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) /* Each pass through this loop initializes one srcu_node structure. */ rcu_for_each_node_breadth_first(sp, snp) { - raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); + spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { @@ -111,7 +138,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp_first = sp->level[level]; for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; @@ -170,7 +197,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, /* Don't re-initialize a lock while it is held. */ debug_check_no_locks_freed((void *)sp, sizeof(*sp)); lockdep_init_map(&sp->dep_map, name, key, 0); - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -187,7 +214,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); */ int init_srcu_struct(struct srcu_struct *sp) { - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); return init_srcu_struct_fields(sp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -210,13 +237,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp) /* The smp_load_acquire() pairs with the smp_store_release(). */ if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); return; } init_srcu_struct_fields(sp, true); - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -513,7 +540,7 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_lock(&sp->srcu_cb_mutex); /* End the current grace period. */ - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); idx = rcu_seq_state(sp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(sp); @@ -522,7 +549,7 @@ static void srcu_gp_end(struct srcu_struct *sp) gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) sp->srcu_gp_seq_needed_exp = gpseq; - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ @@ -530,7 +557,7 @@ static void srcu_gp_end(struct srcu_struct *sp) idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); rcu_for_each_node_breadth_first(sp, snp) { - raw_spin_lock_irq_rcu_node(snp); + spin_lock_irq_rcu_node(snp); cbs = false; if (snp >= sp->level[rcu_num_lvls - 1]) cbs = snp->srcu_have_cbs[idx] == gpseq; @@ -540,7 +567,7 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_gp_seq_needed_exp = gpseq; mask = snp->srcu_data_have_cbs[idx]; snp->srcu_data_have_cbs[idx] = 0; - raw_spin_unlock_irq_rcu_node(snp); + spin_unlock_irq_rcu_node(snp); if (cbs) srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); @@ -548,11 +575,11 @@ static void srcu_gp_end(struct srcu_struct *sp) if (!(gpseq & counter_wrap_check)) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_irqsave_rcu_node(sdp, flags); + spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) sdp->srcu_gp_seq_needed = gpseq; - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); + spin_unlock_irqrestore_rcu_node(sdp, flags); } } @@ -560,17 +587,17 @@ static void srcu_gp_end(struct srcu_struct *sp) mutex_unlock(&sp->srcu_cb_mutex); /* Start a new grace period if needed. */ - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); gpseq = rcu_seq_current(&sp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { srcu_gp_start(sp); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); /* Throttle expedited grace periods: Should be rare! */ srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff ? 0 : SRCU_INTERVAL); } else { - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); } } @@ -590,18 +617,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, if (rcu_seq_done(&sp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; - raw_spin_lock_irqsave_rcu_node(snp, flags); + spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); return; } WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); } - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) sp->srcu_gp_seq_needed_exp = s; - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -623,12 +650,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, for (; snp != NULL; snp = snp->srcu_parent) { if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ - raw_spin_lock_irqsave_rcu_node(snp, flags); + spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { snp_seq = snp->srcu_have_cbs[idx]; if (snp == sdp->mynode && snp_seq == s) snp->srcu_data_have_cbs[idx] |= sdp->grpmask; - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); if (snp == sdp->mynode && snp_seq != s) { srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL @@ -644,11 +671,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, snp->srcu_data_have_cbs[idx] |= sdp->grpmask; if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) snp->srcu_gp_seq_needed_exp = s; - raw_spin_unlock_irqrestore_rcu_node(snp, flags); + spin_unlock_irqrestore_rcu_node(snp, flags); } /* Top of tree, must ensure the grace period will be started. */ - raw_spin_lock_irqsave_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(sp, flags); if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load @@ -667,7 +694,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, queue_delayed_work(system_power_efficient_wq, &sp->work, srcu_get_delay(sp)); } - raw_spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(sp, flags); } /* @@ -830,7 +857,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; local_irq_save(flags); sdp = this_cpu_ptr(sp->sda); - raw_spin_lock_rcu_node(sdp); + spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); @@ -844,7 +871,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, sdp->srcu_gp_seq_needed_exp = s; needexp = true; } - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); + spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) srcu_funnel_gp_start(sp, sdp, s, do_norm); else if (needexp) @@ -900,7 +927,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) /* * Make sure that later code is ordered after the SRCU grace - * period. This pairs with the raw_spin_lock_irq_rcu_node() + * period. This pairs with the spin_lock_irq_rcu_node() * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed * because the current CPU might have been totally uninvolved with * (and thus unordered against) that grace period. @@ -1024,7 +1051,7 @@ void srcu_barrier(struct srcu_struct *sp) */ for_each_possible_cpu(cpu) { sdp = per_cpu_ptr(sp->sda, cpu); - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); atomic_inc(&sp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); @@ -1033,7 +1060,7 @@ void srcu_barrier(struct srcu_struct *sp) debug_rcu_head_unqueue(&sdp->srcu_barrier_head); atomic_dec(&sp->srcu_barrier_cpu_cnt); } - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ @@ -1082,17 +1109,17 @@ static void srcu_advance_state(struct srcu_struct *sp) */ idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); mutex_unlock(&sp->srcu_gp_mutex); return; } idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) srcu_gp_start(sp); - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); if (idx != SRCU_STATE_IDLE) { mutex_unlock(&sp->srcu_gp_mutex); return; /* Someone else started the grace period. */ @@ -1141,19 +1168,19 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp = container_of(work, struct srcu_data, work.work); sp = sdp->sp; rcu_cblist_init(&ready_cbs); - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&sp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); return; /* Someone else on the job or nothing to do. */ } /* We are on the job! Extract and invoke ready callbacks. */ sdp->srcu_cblist_invoking = true; rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); @@ -1166,13 +1193,13 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Update counts, accelerate new callbacks, and if needed, * schedule another round of callback invocation. */ - raw_spin_lock_irq_rcu_node(sdp); + spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, rcu_seq_snap(&sp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); - raw_spin_unlock_irq_rcu_node(sdp); + spin_unlock_irq_rcu_node(sdp); if (more) srcu_schedule_cbs_sdp(sdp, 0); } @@ -1185,7 +1212,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) { bool pushgp = true; - raw_spin_lock_irq_rcu_node(sp); + spin_lock_irq_rcu_node(sp); if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ @@ -1195,7 +1222,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) /* Outstanding request and no GP. Start one. */ srcu_gp_start(sp); } - raw_spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(sp); if (pushgp) queue_delayed_work(system_power_efficient_wq, &sp->work, delay); -- cgit v1.2.3 From dac95906003fec1b4801115830cc14ec61c74960 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Oct 2017 11:23:10 -0700 Subject: torture: Suppress CPU stall warnings during shutdown ftrace dump The torture_shutdown() function directly invokes ftrace_dump(), which can result in RCU CPU stall warnings when the ftrace buffer is large, which it usually is. This commit therefore invoks rcu_ftrace_dump() in place of ftrace_dump(), suppressing RCU CPU stall warnings during this time. Signed-off-by: Paul E. McKenney --- kernel/torture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/torture.c b/kernel/torture.c index 637e172835d8..52781e838541 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -47,6 +47,7 @@ #include #include #include +#include "rcu/rcu.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); @@ -500,7 +501,7 @@ static int torture_shutdown(void *arg) torture_shutdown_hook(); else VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping."); - ftrace_dump(DUMP_ALL); + rcu_ftrace_dump(DUMP_ALL); kernel_power_off(); /* Shut down the system. */ return 0; } -- cgit v1.2.3 From cf8d8b00518d9228d603fcd17de47c31deb70b8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Oct 2017 11:00:33 -0700 Subject: torture: Prepare scripting for shift from %p to %pK Because %p prints "(null)" and %pK prints "0000000000000000" or (on 32-bit systems) "00000000", this commit adjusts torture-test scripting accordingly. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/parse-torture.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh index f12c38909b00..5987e50cfeb4 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-torture.sh @@ -55,7 +55,7 @@ then exit fi -grep --binary-files=text 'torture:.*ver:' $file | grep --binary-files=text -v '(null)' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | +grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | awk ' BEGIN { ver = 0; -- cgit v1.2.3 From a0982dfa03efca6c239c52cabebcea4afb93ea6b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Oct 2017 16:24:28 -0700 Subject: sched: Stop resched_cpu() from sending IPIs to offline CPUs The rcutorture test suite occasionally provokes a splat due to invoking resched_cpu() on an offline CPU: WARNING: CPU: 2 PID: 8 at /home/paulmck/public_git/linux-rcu/arch/x86/kernel/smp.c:128 native_smp_send_reschedule+0x37/0x40 Modules linked in: CPU: 2 PID: 8 Comm: rcu_preempt Not tainted 4.14.0-rc4+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff902ede9daf00 task.stack: ffff96c50010c000 RIP: 0010:native_smp_send_reschedule+0x37/0x40 RSP: 0018:ffff96c50010fdb8 EFLAGS: 00010096 RAX: 000000000000002e RBX: ffff902edaab4680 RCX: 0000000000000003 RDX: 0000000080000003 RSI: 0000000000000000 RDI: 00000000ffffffff RBP: ffff96c50010fdb8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 00000000299f36ae R12: 0000000000000001 R13: ffffffff9de64240 R14: 0000000000000001 R15: ffffffff9de64240 FS: 0000000000000000(0000) GS:ffff902edfc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000f7d4c642 CR3: 000000001e0e2000 CR4: 00000000000006e0 Call Trace: resched_curr+0x8f/0x1c0 resched_cpu+0x2c/0x40 rcu_implicit_dynticks_qs+0x152/0x220 force_qs_rnp+0x147/0x1d0 ? sync_rcu_exp_select_cpus+0x450/0x450 rcu_gp_kthread+0x5a9/0x950 kthread+0x142/0x180 ? force_qs_rnp+0x1d0/0x1d0 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x27/0x40 Code: 14 01 0f 92 c0 84 c0 74 14 48 8b 05 14 4f f4 00 be fd 00 00 00 ff 90 a0 00 00 00 5d c3 89 fe 48 c7 c7 38 89 ca 9d e8 e5 56 08 00 <0f> ff 5d c3 0f 1f 44 00 00 8b 05 52 9e 37 02 85 c0 75 38 55 48 ---[ end trace 26df9e5df4bba4ac ]--- This splat cannot be generated by expedited grace periods because they always invoke resched_cpu() on the current CPU, which is good because expedited grace periods require that resched_cpu() unconditionally succeed. However, other parts of RCU can tolerate resched_cpu() acting as a no-op, at least as long as it doesn't happen too often. This commit therefore makes resched_cpu() invoke resched_curr() only if the CPU is either online or is the current CPU. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra --- kernel/sched/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..c85dfb746f8c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -508,7 +508,8 @@ void resched_cpu(int cpu) unsigned long flags; raw_spin_lock_irqsave(&rq->lock, flags); - resched_curr(rq); + if (cpu_online(cpu) || cpu == smp_processor_id()) + resched_curr(rq); raw_spin_unlock_irqrestore(&rq->lock, flags); } -- cgit v1.2.3 From 2fe2582649aa2355f79acddb86bd4d6c5363eb63 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Oct 2017 17:00:18 -0700 Subject: sched: Stop switched_to_rt() from sending IPIs to offline CPUs The rcutorture test suite occasionally provokes a splat due to invoking rt_mutex_lock() which needs to boost the priority of a task currently sitting on a runqueue that belongs to an offline CPU: WARNING: CPU: 0 PID: 12 at /home/paulmck/public_git/linux-rcu/arch/x86/kernel/smp.c:128 native_smp_send_reschedule+0x37/0x40 Modules linked in: CPU: 0 PID: 12 Comm: rcub/7 Not tainted 4.14.0-rc4+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff9ed3de5f8cc0 task.stack: ffffbbf80012c000 RIP: 0010:native_smp_send_reschedule+0x37/0x40 RSP: 0018:ffffbbf80012fd10 EFLAGS: 00010082 RAX: 000000000000002f RBX: ffff9ed3dd9cb300 RCX: 0000000000000004 RDX: 0000000080000004 RSI: 0000000000000086 RDI: 00000000ffffffff RBP: ffffbbf80012fd10 R08: 000000000009da7a R09: 0000000000007b9d R10: 0000000000000001 R11: ffffffffbb57c2cd R12: 000000000000000d R13: ffff9ed3de5f8cc0 R14: 0000000000000061 R15: ffff9ed3ded59200 FS: 0000000000000000(0000) GS:ffff9ed3dea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000080686f0 CR3: 000000001b9e0000 CR4: 00000000000006f0 Call Trace: resched_curr+0x61/0xd0 switched_to_rt+0x8f/0xa0 rt_mutex_setprio+0x25c/0x410 task_blocks_on_rt_mutex+0x1b3/0x1f0 rt_mutex_slowlock+0xa9/0x1e0 rt_mutex_lock+0x29/0x30 rcu_boost_kthread+0x127/0x3c0 kthread+0x104/0x140 ? rcu_report_unblock_qs_rnp+0x90/0x90 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x22/0x30 Code: f0 00 0f 92 c0 84 c0 74 14 48 8b 05 34 74 c5 00 be fd 00 00 00 ff 90 a0 00 00 00 5d c3 89 fe 48 c7 c7 a0 c6 fc b9 e8 d5 b5 06 00 <0f> ff 5d c3 0f 1f 44 00 00 8b 05 a2 d1 13 02 85 c0 75 38 55 48 But the target task's priority has already been adjusted, so the only purpose of switched_to_rt() invoking resched_curr() is to wake up the CPU running some task that needs to be preempted by the boosted task. But the CPU is offline, which presumably means that the task must be migrated to some other CPU, and that this other CPU will undertake any needed preemption at the time of migration. Because the runqueue lock is held when resched_curr() is invoked, we know that the boosted task cannot go anywhere, so it is not necessary to invoke resched_curr() in this particular case. This commit therefore makes switched_to_rt() refrain from invoking resched_curr() when the target CPU is offline. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra --- kernel/sched/rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4056c19ca3f0..f242f642ef53 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2206,7 +2206,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) queue_push_tasks(rq); #endif /* CONFIG_SMP */ - if (p->prio < rq->curr->prio) + if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) resched_curr(rq); } } -- cgit v1.2.3 From ffa53c5863ddb265f9a25729023f4d0409cdacf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 06:48:39 -0700 Subject: netfilter: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 85f643c1e227..4efaa3066c78 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1044,7 +1044,7 @@ static void gc_worker(struct work_struct *work) * we will just continue with next hash slot. */ rcu_read_unlock(); - cond_resched_rcu_qs(); + cond_resched(); } while (++buckets < goal); if (gc_work->exiting) -- cgit v1.2.3 From 50d4fb781287b47c4c2d455e3395783c1f06a3a5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:22:18 -0700 Subject: mm: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka --- mm/mlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mlock.c b/mm/mlock.c index 30472d438794..f7f54fd2e13f 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -779,7 +779,7 @@ static int apply_mlockall_flags(int flags) /* Ignore errors */ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); - cond_resched_rcu_qs(); + cond_resched(); } out: return 0; -- cgit v1.2.3 From 329b4130bc5eb2a1b123a652b985dbdb08d6b9a8 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 23 Nov 2017 13:21:55 +0300 Subject: ARC: Fix detection of dual-issue enabled As per PRM bit #0 ("D") in EXEC_CTRL enables dual-issue if set to 0, otherwise if set to 1 all instructions are executed one at a time, i.e. dual-issue is disabled. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ef7d9a8ff89..9d27331fe69a 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = exec_ctrl & 1; + cpu->extn.dual_enb = !(exec_ctrl & 1); /* dual issue always present for this core */ cpu->extn.dual = 1; -- cgit v1.2.3 From d2890c3778b164fde587bc16583f3a1c87233ec5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 26 Nov 2017 23:16:49 -0800 Subject: crypto: rsa - fix buffer overread when stripping leading zeroes In rsa_get_n(), if the buffer contained all 0's and "FIPS mode" is enabled, we would read one byte past the end of the buffer while scanning the leading zeroes. Fix it by checking 'n_sz' before '!*ptr'. This bug was reachable by adding a specially crafted key of type "asymmetric" (requires CONFIG_RSA and CONFIG_X509_CERTIFICATE_PARSER). KASAN report: BUG: KASAN: slab-out-of-bounds in rsa_get_n+0x19e/0x1d0 crypto/rsa_helper.c:33 Read of size 1 at addr ffff88003501a708 by task keyctl/196 CPU: 1 PID: 196 Comm: keyctl Not tainted 4.14.0-09238-g1d3b78bbc6e9 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: rsa_get_n+0x19e/0x1d0 crypto/rsa_helper.c:33 asn1_ber_decoder+0x82a/0x1fd0 lib/asn1_decoder.c:328 rsa_set_pub_key+0xd3/0x320 crypto/rsa.c:278 crypto_akcipher_set_pub_key ./include/crypto/akcipher.h:364 [inline] pkcs1pad_set_pub_key+0xae/0x200 crypto/rsa-pkcs1pad.c:117 crypto_akcipher_set_pub_key ./include/crypto/akcipher.h:364 [inline] public_key_verify_signature+0x270/0x9d0 crypto/asymmetric_keys/public_key.c:106 x509_check_for_self_signed+0x2ea/0x480 crypto/asymmetric_keys/x509_public_key.c:141 x509_cert_parse+0x46a/0x620 crypto/asymmetric_keys/x509_cert_parser.c:129 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Allocated by task 196: __do_kmalloc mm/slab.c:3711 [inline] __kmalloc_track_caller+0x118/0x2e0 mm/slab.c:3726 kmemdup+0x17/0x40 mm/util.c:118 kmemdup ./include/linux/string.h:414 [inline] x509_cert_parse+0x2cb/0x620 crypto/asymmetric_keys/x509_cert_parser.c:106 x509_key_preparse+0x61/0x750 crypto/asymmetric_keys/x509_public_key.c:174 asymmetric_key_preparse+0xa4/0x150 crypto/asymmetric_keys/asymmetric_type.c:388 key_create_or_update+0x4d4/0x10a0 security/keys/key.c:850 SYSC_add_key security/keys/keyctl.c:122 [inline] SyS_add_key+0xe8/0x290 security/keys/keyctl.c:62 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 5a7de97309f5 ("crypto: rsa - return raw integers for the ASN.1 parser") Cc: # v4.8+ Cc: Tudor Ambarus Signed-off-by: Eric Biggers Reviewed-by: James Morris Reviewed-by: David Howells Signed-off-by: Herbert Xu --- crypto/rsa_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index 0b66dc824606..cad395d70d78 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag, return -EINVAL; if (fips_enabled) { - while (!*ptr && n_sz) { + while (n_sz && !*ptr) { ptr++; n_sz--; } -- cgit v1.2.3 From b32a7dc8aef1882fbf983eb354837488cc9d54dc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 27 Nov 2017 23:23:05 -0800 Subject: crypto: algif_aead - fix reference counting of null skcipher In the AEAD interface for AF_ALG, the reference to the "null skcipher" held by each tfm was being dropped in the wrong place -- when each af_alg_ctx was freed instead of when the aead_tfm was freed. As discovered by syzkaller, a specially crafted program could use this to cause the null skcipher to be freed while it is still in use. Fix it by dropping the reference in the right place. Fixes: 72548b093ee3 ("crypto: algif_aead - copy AAD from src to dst") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Eric Biggers Reviewed-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index aacae0837aff..9d73be28cf01 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -487,6 +487,7 @@ static void aead_release(void *private) struct aead_tfm *tfm = private; crypto_free_aead(tfm->aead); + crypto_put_default_null_skcipher2(); kfree(tfm); } @@ -519,7 +520,6 @@ static void aead_sock_destruct(struct sock *sk) unsigned int ivlen = crypto_aead_ivsize(tfm); af_alg_pull_tsgl(sk, ctx->used, NULL, 0); - crypto_put_default_null_skcipher2(); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); -- cgit v1.2.3 From 887207ed9e5812ed9239b6d07185a2d35dda91db Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Nov 2017 00:46:24 -0800 Subject: crypto: af_alg - fix NULL pointer dereference in af_alg_free_areq_sgls() If allocating the ->tsgl member of 'struct af_alg_async_req' failed, during cleanup we dereferenced the NULL ->tsgl pointer in af_alg_free_areq_sgls(), because ->tsgl_entries was nonzero. Fix it by only freeing the ->tsgl list if it is non-NULL. This affected both algif_skcipher and algif_aead. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Eric Biggers Reviewed-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 85cea9de324a..1e5353f62067 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -672,14 +672,15 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) } tsgl = areq->tsgl; - for_each_sg(tsgl, sg, areq->tsgl_entries, i) { - if (!sg_page(sg)) - continue; - put_page(sg_page(sg)); - } + if (tsgl) { + for_each_sg(tsgl, sg, areq->tsgl_entries, i) { + if (!sg_page(sg)) + continue; + put_page(sg_page(sg)); + } - if (areq->tsgl && areq->tsgl_entries) sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl)); + } } EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls); -- cgit v1.2.3 From af3ff8045bbf3e32f1a448542e73abb4c8ceb6f1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Nov 2017 18:01:38 -0800 Subject: crypto: hmac - require that the underlying hash algorithm is unkeyed Because the HMAC template didn't check that its underlying hash algorithm is unkeyed, trying to use "hmac(hmac(sha3-512-generic))" through AF_ALG or through KEYCTL_DH_COMPUTE resulted in the inner HMAC being used without having been keyed, resulting in sha3_update() being called without sha3_init(), causing a stack buffer overflow. This is a very old bug, but it seems to have only started causing real problems when SHA-3 support was added (requires CONFIG_CRYPTO_SHA3) because the innermost hash's state is ->import()ed from a zeroed buffer, and it just so happens that other hash algorithms are fine with that, but SHA-3 is not. However, there could be arch or hardware-dependent hash algorithms also affected; I couldn't test everything. Fix the bug by introducing a function crypto_shash_alg_has_setkey() which tests whether a shash algorithm is keyed. Then update the HMAC template to require that its underlying hash algorithm is unkeyed. Here is a reproducer: #include #include int main() { int algfd; struct sockaddr_alg addr = { .salg_type = "hash", .salg_name = "hmac(hmac(sha3-512-generic))", }; char key[4096] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (const struct sockaddr *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)); } Here was the KASAN report from syzbot: BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:341 [inline] BUG: KASAN: stack-out-of-bounds in sha3_update+0xdf/0x2e0 crypto/sha3_generic.c:161 Write of size 4096 at addr ffff8801cca07c40 by task syzkaller076574/3044 CPU: 1 PID: 3044 Comm: syzkaller076574 Not tainted 4.14.0-mm1+ #25 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x137/0x190 mm/kasan/kasan.c:267 memcpy+0x37/0x50 mm/kasan/kasan.c:303 memcpy include/linux/string.h:341 [inline] sha3_update+0xdf/0x2e0 crypto/sha3_generic.c:161 crypto_shash_update+0xcb/0x220 crypto/shash.c:109 shash_finup_unaligned+0x2a/0x60 crypto/shash.c:151 crypto_shash_finup+0xc4/0x120 crypto/shash.c:165 hmac_finup+0x182/0x330 crypto/hmac.c:152 crypto_shash_finup+0xc4/0x120 crypto/shash.c:165 shash_digest_unaligned+0x9e/0xd0 crypto/shash.c:172 crypto_shash_digest+0xc4/0x120 crypto/shash.c:186 hmac_setkey+0x36a/0x690 crypto/hmac.c:66 crypto_shash_setkey+0xad/0x190 crypto/shash.c:64 shash_async_setkey+0x47/0x60 crypto/shash.c:207 crypto_ahash_setkey+0xaf/0x180 crypto/ahash.c:200 hash_setkey+0x40/0x90 crypto/algif_hash.c:446 alg_setkey crypto/af_alg.c:221 [inline] alg_setsockopt+0x2a1/0x350 crypto/af_alg.c:254 SYSC_setsockopt net/socket.c:1851 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1830 entry_SYSCALL_64_fastpath+0x1f/0x96 Reported-by: syzbot Cc: Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/hmac.c | 6 +++++- crypto/shash.c | 5 +++-- include/crypto/internal/hash.h | 8 ++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/crypto/hmac.c b/crypto/hmac.c index 92871dc2a63e..e74730224f0a 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -195,11 +195,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb) salg = shash_attr_alg(tb[1], 0, 0); if (IS_ERR(salg)) return PTR_ERR(salg); + alg = &salg->base; + /* The underlying hash algorithm must be unkeyed */ err = -EINVAL; + if (crypto_shash_alg_has_setkey(salg)) + goto out_put_alg; + ds = salg->digestsize; ss = salg->statesize; - alg = &salg->base; if (ds > alg->cra_blocksize || ss < alg->cra_blocksize) goto out_put_alg; diff --git a/crypto/shash.c b/crypto/shash.c index 325a14da5827..e849d3ee2e27 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -25,11 +25,12 @@ static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f0b44c16e88f..c2bae8da642c 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -82,6 +82,14 @@ int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen); + +static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) +{ + return alg->setkey != shash_no_setkey; +} + int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn, struct hash_alg_common *alg, struct crypto_instance *inst); -- cgit v1.2.3 From 9816ef6ecbc102b9bcbb1d83e12c7fb19924f38c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Nov 2017 11:58:03 +0300 Subject: scsi: lpfc: Use after free in lpfc_rq_buf_free() The error message dereferences "rqb_entry" so we need to print it first and then free the buffer. Fixes: 6c621a2229b0 ("scsi: lpfc: Separate NVMET RQ buffer posting from IO resources SGL/iocbq/context") Signed-off-by: Dan Carpenter Acked-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 56faeb049b4a..87c08ff37ddd 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -753,12 +753,12 @@ lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys); rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe); if (rc < 0) { - (rqbp->rqb_free_buffer)(phba, rqb_entry); lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6409 Cannot post to RQ %d: %x %x\n", rqb_entry->hrq->queue_id, rqb_entry->hrq->host_index, rqb_entry->hrq->hba_index); + (rqbp->rqb_free_buffer)(phba, rqb_entry); } else { list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list); rqbp->buffer_count++; -- cgit v1.2.3 From fe55e79536a37348dcb0b7177ee5fda6deccb99a Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Sat, 25 Nov 2017 19:38:10 +0100 Subject: scsi: libfc: fix ELS request handling The modification of fc_lport_recv_els_req() in commit fcabb09e59a7 ("scsi: libfc: directly call ELS request handlers") caused certain requests not to be handled at all. Fix that. Fixes: fcabb09e59a7 ("scsi: libfc: directly call ELS request handlers") Signed-off-by: Martin Wilck Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_lport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c index 5da46052e179..21be672679fb 100644 --- a/drivers/scsi/libfc/fc_lport.c +++ b/drivers/scsi/libfc/fc_lport.c @@ -904,10 +904,14 @@ static void fc_lport_recv_els_req(struct fc_lport *lport, case ELS_FLOGI: if (!lport->point_to_multipoint) fc_lport_recv_flogi_req(lport, fp); + else + fc_rport_recv_req(lport, fp); break; case ELS_LOGO: if (fc_frame_sid(fp) == FC_FID_FLOGI) fc_lport_recv_logo_req(lport, fp); + else + fc_rport_recv_req(lport, fp); break; case ELS_RSCN: lport->tt.disc_recv_req(lport, fp); -- cgit v1.2.3 From d18539754d97876503275efc7d00a1901bb0cfad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Nov 2017 14:25:25 +0100 Subject: scsi: aacraid: address UBSAN warning regression As reported by Meelis Roos, my previous patch causes an incorrect calculation of the timeout, through an undefined signed integer overflow: [ 12.228155] UBSAN: Undefined behaviour in drivers/scsi/aacraid/commsup.c:2514:49 [ 12.228229] signed integer overflow: [ 12.228283] 964297611 * 250 cannot be represented in type 'long int' The problem is that doing a multiplication with HZ first and then dividing by USEC_PER_SEC worked correctly for 32-bit microseconds, but not for 32-bit nanoseconds, which would require up to 41 bits. This reworks the calculation to first convert the nanoseconds into jiffies, which should give us the same result as before and not overflow. Unfortunately I did not understand the exact intention of the algorithm, in particular the part where we add half a second, so it's possible that there is still a preexisting problem in this function. I added a comment that this would be handled more nicely using usleep_range(), which generally works better for waking up at a particular time than the current schedule_timeout() based implementation. I did not feel comfortable trying to implement that without being sure what the intent is here though. Fixes: 820f18865912 ("scsi: aacraid: use timespec64 instead of timeval") Tested-by: Meelis Roos Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commsup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index bec9f3193f60..80a8cb26cdea 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -2482,8 +2482,8 @@ int aac_command_thread(void *data) /* Synchronize our watches */ if (((NSEC_PER_SEC - (NSEC_PER_SEC / HZ)) > now.tv_nsec) && (now.tv_nsec > (NSEC_PER_SEC / HZ))) - difference = (((NSEC_PER_SEC - now.tv_nsec) * HZ) - + NSEC_PER_SEC / 2) / NSEC_PER_SEC; + difference = HZ + HZ / 2 - + now.tv_nsec / (NSEC_PER_SEC / HZ); else { if (now.tv_nsec > NSEC_PER_SEC / 2) ++now.tv_sec; @@ -2507,6 +2507,10 @@ int aac_command_thread(void *data) if (kthread_should_stop()) break; + /* + * we probably want usleep_range() here instead of the + * jiffies computation + */ schedule_timeout(difference); if (kthread_should_stop()) -- cgit v1.2.3 From 45349821ab3a8d378b8f37e52c6fe1aa1b870c47 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 28 Nov 2017 16:26:57 +0100 Subject: scsi: bfa: fix access to bfad_im_port_s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 'cd21c605b2cf ("scsi: fc: provide fc_bsg_to_shost() helper")' changed access to bfa's 'struct bfad_im_port_s' by using shost_priv() instead of shost->hostdata[0]. This lead to crashes like in the following back-trace: task: ffff880046375300 ti: ffff8800a2ef8000 task.ti: ffff8800a2ef8000 RIP: e030:[] [] bfa_fcport_get_attr+0x82/0x260 [bfa] RSP: e02b:ffff8800a2efba10 EFLAGS: 00010046 RAX: 575f415441536432 RBX: ffff8800a2efba28 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8800a2efba28 RDI: ffff880004dc31d8 RBP: ffff880004dc31d8 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88011fadc468 R11: 0000000000000001 R12: ffff880004dc31f0 R13: 0000000000000200 R14: ffff880004dc61d0 R15: ffff880004947a10 FS: 00007feb1e489700(0000) GS:ffff88011fac0000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007ffe14e46c10 CR3: 00000000957b8000 CR4: 0000000000000660 Stack: ffff88001d4da000 ffff880004dc31c0 ffffffffa048a9df ffffffff81e56380 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [] bfad_iocmd_ioc_get_info+0x4f/0x220 [bfa] [] bfad_iocmd_handler+0xa00/0xd40 [bfa] [] bfad_im_bsg_request+0xee/0x1b0 [bfa] [] fc_bsg_dispatch+0x10b/0x1b0 [scsi_transport_fc] [] bsg_request_fn+0x11d/0x1c0 [] __blk_run_queue+0x2f/0x40 [] blk_execute_rq_nowait+0xa8/0x160 [] blk_execute_rq+0x77/0x120 [] bsg_ioctl+0x1b6/0x200 [] do_vfs_ioctl+0x2cd/0x4a0 [] SyS_ioctl+0x74/0x80 [] entry_SYSCALL_64_fastpath+0x12/0x6d Fixes: cd21c605b2cf ("scsi: fc: provide fc_bsg_to_shost() helper") Signed-off-by: Johannes Thumshirn Cc: Michal Koutný Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_bsg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 72ca2a2e08e2..09ef68c8225f 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3135,7 +3135,8 @@ bfad_im_bsg_vendor_request(struct bsg_job *job) struct fc_bsg_request *bsg_request = job->request; struct fc_bsg_reply *bsg_reply = job->reply; uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0]; - struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); + struct Scsi_Host *shost = fc_bsg_to_shost(job); + struct bfad_im_port_s *im_port = shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; void *payload_kbuf; int rc = -EINVAL; @@ -3350,7 +3351,8 @@ int bfad_im_bsg_els_ct_request(struct bsg_job *job) { struct bfa_bsg_data *bsg_data; - struct bfad_im_port_s *im_port = shost_priv(fc_bsg_to_shost(job)); + struct Scsi_Host *shost = fc_bsg_to_shost(job); + struct bfad_im_port_s *im_port = shost->hostdata[0]; struct bfad_s *bfad = im_port->bfad; bfa_bsg_fcpt_t *bsg_fcpt; struct bfad_fcxp *drv_fcxp; -- cgit v1.2.3 From ecaaab5649781c5a0effdaf298a925063020500e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 28 Nov 2017 20:56:59 -0800 Subject: crypto: salsa20 - fix blkcipher_walk API usage When asked to encrypt or decrypt 0 bytes, both the generic and x86 implementations of Salsa20 crash in blkcipher_walk_done(), either when doing 'kfree(walk->buffer)' or 'free_page((unsigned long)walk->page)', because walk->buffer and walk->page have not been initialized. The bug is that Salsa20 is calling blkcipher_walk_done() even when nothing is in 'walk.nbytes'. But blkcipher_walk_done() is only meant to be called when a nonzero number of bytes have been provided. The broken code is part of an optimization that tries to make only one call to salsa20_encrypt_bytes() to process inputs that are not evenly divisible by 64 bytes. To fix the bug, just remove this "optimization" and use the blkcipher_walk API the same way all the other users do. Reproducer: #include #include #include int main() { int algfd, reqfd; struct sockaddr_alg addr = { .salg_type = "skcipher", .salg_name = "salsa20", }; char key[16] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); reqfd = accept(algfd, 0, 0); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key)); read(reqfd, key, sizeof(key)); } Reported-by: syzbot Fixes: eb6f13eb9f81 ("[CRYPTO] salsa20_generic: Fix multi-page processing") Cc: # v2.6.25+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/salsa20_glue.c | 7 ------- crypto/salsa20_generic.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 399a29d067d6..cb91a64a99e7 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.src.virt.addr, walk.dst.virt.addr, diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index f550b5d94630..d7da0eea5622 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, walk.src.virt.addr, -- cgit v1.2.3 From 6a53b7593233ab9e4f96873ebacc0f653a55c3e1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 27 Nov 2017 11:15:16 -0800 Subject: xfrm: check id proto in validate_tmpl() syzbot reported a kernel warning in xfrm_state_fini(), which indicates that we have entries left in the list net->xfrm.state_all whose proto is zero. And xfrm_id_proto_match() doesn't consider them as a match with IPSEC_PROTO_ANY in this case. Proto with value 0 is probably not a valid value, at least verify_newsa_info() doesn't consider it valid either. This patch fixes it by checking the proto value in validate_tmpl() and rejecting invalid ones, like what iproute2 does in xfrm_xfrmproto_getbyname(). Reported-by: syzbot Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 983b0233767b..c2cfcc6fdb34 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1445,6 +1445,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) default: return -EINVAL; } + + switch (ut[i].id.proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: +#endif + case IPSEC_PROTO_ANY: + break; + default: + return -EINVAL; + } + } return 0; -- cgit v1.2.3 From 823dbb6eb08a2865bcd236b4f52b1b9de216418a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 27 Nov 2017 23:33:29 +0100 Subject: ALSA: pcm: add SNDRV_PCM_FORMAT_{S,U}20 This format is similar to existing SNDRV_PCM_FORMAT_{S,U}20_3 that keep 20-bit PCM samples in 3 bytes, however i.MX6 platform SSI FIFO does not allow 3-byte accesses (including DMA) so a 4-byte (more conventional) format is needed for it. Signed-off-by: Maciej S. Szmigiero Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 8 ++++++++ include/sound/soc-dai.h | 2 ++ include/uapi/sound/asound.h | 9 +++++++++ sound/core/pcm_misc.c | 19 ++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 24febf9e177c..e054c583d3b3 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -169,6 +169,10 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_IMA_ADPCM _SNDRV_PCM_FMTBIT(IMA_ADPCM) #define SNDRV_PCM_FMTBIT_MPEG _SNDRV_PCM_FMTBIT(MPEG) #define SNDRV_PCM_FMTBIT_GSM _SNDRV_PCM_FMTBIT(GSM) +#define SNDRV_PCM_FMTBIT_S20_LE _SNDRV_PCM_FMTBIT(S20_LE) +#define SNDRV_PCM_FMTBIT_U20_LE _SNDRV_PCM_FMTBIT(U20_LE) +#define SNDRV_PCM_FMTBIT_S20_BE _SNDRV_PCM_FMTBIT(S20_BE) +#define SNDRV_PCM_FMTBIT_U20_BE _SNDRV_PCM_FMTBIT(U20_BE) #define SNDRV_PCM_FMTBIT_SPECIAL _SNDRV_PCM_FMTBIT(SPECIAL) #define SNDRV_PCM_FMTBIT_S24_3LE _SNDRV_PCM_FMTBIT(S24_3LE) #define SNDRV_PCM_FMTBIT_U24_3LE _SNDRV_PCM_FMTBIT(U24_3LE) @@ -202,6 +206,8 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_FLOAT SNDRV_PCM_FMTBIT_FLOAT_LE #define SNDRV_PCM_FMTBIT_FLOAT64 SNDRV_PCM_FMTBIT_FLOAT64_LE #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE +#define SNDRV_PCM_FMTBIT_S20 SNDRV_PCM_FMTBIT_S20_LE +#define SNDRV_PCM_FMTBIT_U20 SNDRV_PCM_FMTBIT_U20_LE #endif #ifdef SNDRV_BIG_ENDIAN #define SNDRV_PCM_FMTBIT_S16 SNDRV_PCM_FMTBIT_S16_BE @@ -213,6 +219,8 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_FLOAT SNDRV_PCM_FMTBIT_FLOAT_BE #define SNDRV_PCM_FMTBIT_FLOAT64 SNDRV_PCM_FMTBIT_FLOAT64_BE #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE +#define SNDRV_PCM_FMTBIT_S20 SNDRV_PCM_FMTBIT_S20_BE +#define SNDRV_PCM_FMTBIT_U20 SNDRV_PCM_FMTBIT_U20_BE #endif struct snd_pcm_file { diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 58acd00cae19..d970879944fc 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -102,6 +102,8 @@ struct snd_compr_stream; SNDRV_PCM_FMTBIT_S16_BE |\ SNDRV_PCM_FMTBIT_S20_3LE |\ SNDRV_PCM_FMTBIT_S20_3BE |\ + SNDRV_PCM_FMTBIT_S20_LE |\ + SNDRV_PCM_FMTBIT_S20_BE |\ SNDRV_PCM_FMTBIT_S24_3LE |\ SNDRV_PCM_FMTBIT_S24_3BE |\ SNDRV_PCM_FMTBIT_S32_LE |\ diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index c227ccba60ae..07d61583fd02 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -214,6 +214,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_IMA_ADPCM ((__force snd_pcm_format_t) 22) #define SNDRV_PCM_FORMAT_MPEG ((__force snd_pcm_format_t) 23) #define SNDRV_PCM_FORMAT_GSM ((__force snd_pcm_format_t) 24) +#define SNDRV_PCM_FORMAT_S20_LE ((__force snd_pcm_format_t) 25) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_S20_BE ((__force snd_pcm_format_t) 26) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_U20_LE ((__force snd_pcm_format_t) 27) /* in four bytes, LSB justified */ +#define SNDRV_PCM_FORMAT_U20_BE ((__force snd_pcm_format_t) 28) /* in four bytes, LSB justified */ +/* gap in the numbering for a future standard linear format */ #define SNDRV_PCM_FORMAT_SPECIAL ((__force snd_pcm_format_t) 31) #define SNDRV_PCM_FORMAT_S24_3LE ((__force snd_pcm_format_t) 32) /* in three bytes */ #define SNDRV_PCM_FORMAT_S24_3BE ((__force snd_pcm_format_t) 33) /* in three bytes */ @@ -248,6 +253,8 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_LE #define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_LE #define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE +#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_LE +#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_LE #endif #ifdef SNDRV_BIG_ENDIAN #define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_BE @@ -259,6 +266,8 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_BE #define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_BE #define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE +#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_BE +#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_BE #endif typedef int __bitwise snd_pcm_subformat_t; diff --git a/sound/core/pcm_misc.c b/sound/core/pcm_misc.c index 9be81025372f..c4eb561d2008 100644 --- a/sound/core/pcm_misc.c +++ b/sound/core/pcm_misc.c @@ -163,13 +163,30 @@ static struct pcm_format_data pcm_formats[(INT)SNDRV_PCM_FORMAT_LAST+1] = { .width = 32, .phys = 32, .le = 0, .signd = 0, .silence = { 0x69, 0x69, 0x69, 0x69 }, }, - /* FIXME: the following three formats are not defined properly yet */ + /* FIXME: the following two formats are not defined properly yet */ [SNDRV_PCM_FORMAT_MPEG] = { .le = -1, .signd = -1, }, [SNDRV_PCM_FORMAT_GSM] = { .le = -1, .signd = -1, }, + [SNDRV_PCM_FORMAT_S20_LE] = { + .width = 20, .phys = 32, .le = 1, .signd = 1, + .silence = {}, + }, + [SNDRV_PCM_FORMAT_S20_BE] = { + .width = 20, .phys = 32, .le = 0, .signd = 1, + .silence = {}, + }, + [SNDRV_PCM_FORMAT_U20_LE] = { + .width = 20, .phys = 32, .le = 1, .signd = 0, + .silence = { 0x00, 0x00, 0x08, 0x00 }, + }, + [SNDRV_PCM_FORMAT_U20_BE] = { + .width = 20, .phys = 32, .le = 0, .signd = 0, + .silence = { 0x00, 0x08, 0x00, 0x00 }, + }, + /* FIXME: the following format is not defined properly yet */ [SNDRV_PCM_FORMAT_SPECIAL] = { .le = -1, .signd = -1, }, -- cgit v1.2.3 From a4a1b737032daf42e1e2ccd70bfceca56464ccac Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Nov 2017 12:58:51 +0000 Subject: ALSA: drivers: make array 'names' const, reduces object code size Don't populate array 'names' on the stack but instead make them static. Makes the object code smaller by 50 bytes: Before: text data bss dec hex filename 21237 9192 1120 31549 7b3d linux/sound/drivers/dummy.o After: text data bss dec hex filename 21095 9280 1120 31495 7b07 linux/sound/drivers/dummy.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/drivers/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 7b2b1f766b00..69db45bc0197 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -830,7 +830,7 @@ static int snd_dummy_capsrc_put(struct snd_kcontrol *kcontrol, struct snd_ctl_el static int snd_dummy_iobox_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *info) { - const char *const names[] = { "None", "CD Player" }; + static const char *const names[] = { "None", "CD Player" }; return snd_ctl_enum_info(info, 1, 2, names); } -- cgit v1.2.3 From 044ace5e3fc95f8df1197b7eaeadee0b35dfcef5 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 11 Nov 2017 18:34:04 +0100 Subject: ALSA: hal2: Improve a size determination in hal2_create() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Takashi Iwai --- sound/mips/hal2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/mips/hal2.c b/sound/mips/hal2.c index 37d378a26a50..c8904e732aaa 100644 --- a/sound/mips/hal2.c +++ b/sound/mips/hal2.c @@ -814,7 +814,7 @@ static int hal2_create(struct snd_card *card, struct snd_hal2 **rchip) struct hpc3_regs *hpc3 = hpc3c0; int err; - hal2 = kzalloc(sizeof(struct snd_hal2), GFP_KERNEL); + hal2 = kzalloc(sizeof(*hal2), GFP_KERNEL); if (!hal2) return -ENOMEM; -- cgit v1.2.3 From cdc4398b399cad38e36be64faf3752aa12a44022 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 11 Nov 2017 20:02:07 +0100 Subject: ALSA: sgio2audio: Improve a size determination in snd_sgio2audio_create() Replace the specification of a data structure by a pointer dereference as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Takashi Iwai --- sound/mips/sgio2audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c index 71c942162c25..9fb68b35de5a 100644 --- a/sound/mips/sgio2audio.c +++ b/sound/mips/sgio2audio.c @@ -840,7 +840,7 @@ static int snd_sgio2audio_create(struct snd_card *card, if (!(readq(&mace->perif.audio.control) & AUDIO_CONTROL_CODEC_PRESENT)) return -ENOENT; - chip = kzalloc(sizeof(struct snd_sgio2audio), GFP_KERNEL); + chip = kzalloc(sizeof(*chip), GFP_KERNEL); if (chip == NULL) return -ENOMEM; -- cgit v1.2.3 From 4d0272ca40dfe524ce8d1ae350f0a246ef849d4b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 22 Nov 2017 17:43:25 +0100 Subject: ALSA: gus: Delete an error message for a failed memory allocation in snd_gf1_dma_transfer_block() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Takashi Iwai --- sound/isa/gus/gus_dma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/isa/gus/gus_dma.c b/sound/isa/gus/gus_dma.c index 36c27c832360..7f95f452f106 100644 --- a/sound/isa/gus/gus_dma.c +++ b/sound/isa/gus/gus_dma.c @@ -201,10 +201,9 @@ int snd_gf1_dma_transfer_block(struct snd_gus_card * gus, struct snd_gf1_dma_block *block; block = kmalloc(sizeof(*block), atomic ? GFP_ATOMIC : GFP_KERNEL); - if (block == NULL) { - snd_printk(KERN_ERR "gf1: DMA transfer failure; not enough memory\n"); + if (!block) return -ENOMEM; - } + *block = *__block; block->next = NULL; -- cgit v1.2.3 From cf576fe5fd74cdabf7fc3fd8ac3b9abea9b964f8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 16 Nov 2017 11:22:26 +0100 Subject: ALSA: korg1212: Delete a duplicate function call "release_firmware" in snd_korg1212_create() The function "release_firmware" is called in the current implementation of the function "_request_firmware" after a failure was detected. Link: https://elixir.free-electrons.com/linux/v4.14-rc8/source/drivers/base/firmware_class.c#L1196 Such a call should therefore not be repeated directly after the corresponding error information was received in the local variable "err" of the function "snd_korg1212_create". Thus remove a misplaced function call. Signed-off-by: Markus Elfring Signed-off-by: Takashi Iwai --- sound/pci/korg1212/korg1212.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/korg1212/korg1212.c b/sound/pci/korg1212/korg1212.c index c7b007164c99..4206ba44d8bb 100644 --- a/sound/pci/korg1212/korg1212.c +++ b/sound/pci/korg1212/korg1212.c @@ -2348,7 +2348,6 @@ static int snd_korg1212_create(struct snd_card *card, struct pci_dev *pci, err = request_firmware(&dsp_code, "korg/k1212.dsp", &pci->dev); if (err < 0) { - release_firmware(dsp_code); snd_printk(KERN_ERR "firmware not available\n"); snd_korg1212_free(korg1212); return err; -- cgit v1.2.3 From 6dbc6caf6602607edf45d486aaf1949888e5053e Mon Sep 17 00:00:00 2001 From: Yussuf Khalil Date: Sat, 25 Nov 2017 23:31:08 +0100 Subject: ALSA: ice1724: Fix resume issues with Prodigy 7.1 HiFi There are two issues after resuming from suspend on the Audiotrak Prodigy 7.1 HiFi: - the output volume is set to 100% - microphone input isn't working anymore This patch fixes these issues by reinitializing both codecs of the device and restoring the previous volumes during resume. Signed-off-by: Yussuf Khalil Signed-off-by: Takashi Iwai --- sound/pci/ice1712/prodigy_hifi.c | 131 ++++++++++++++++++++++++++++++--------- 1 file changed, 102 insertions(+), 29 deletions(-) diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c index 2697402b5195..8dabd4d0211d 100644 --- a/sound/pci/ice1712/prodigy_hifi.c +++ b/sound/pci/ice1712/prodigy_hifi.c @@ -965,13 +965,32 @@ static int prodigy_hd2_add_controls(struct snd_ice1712 *ice) return 0; } +static void wm8766_init(struct snd_ice1712 *ice) +{ + static unsigned short wm8766_inits[] = { + WM8766_RESET, 0x0000, + WM8766_DAC_CTRL, 0x0120, + WM8766_INT_CTRL, 0x0022, /* I2S Normal Mode, 24 bit */ + WM8766_DAC_CTRL2, 0x0001, + WM8766_DAC_CTRL3, 0x0080, + WM8766_LDA1, 0x0100, + WM8766_LDA2, 0x0100, + WM8766_LDA3, 0x0100, + WM8766_RDA1, 0x0100, + WM8766_RDA2, 0x0100, + WM8766_RDA3, 0x0100, + WM8766_MUTE1, 0x0000, + WM8766_MUTE2, 0x0000, + }; + unsigned int i; -/* - * initialize the chip - */ -static int prodigy_hifi_init(struct snd_ice1712 *ice) + for (i = 0; i < ARRAY_SIZE(wm8766_inits); i += 2) + wm8766_spi_write(ice, wm8766_inits[i], wm8766_inits[i + 1]); +} + +static void wm8776_init(struct snd_ice1712 *ice) { - static unsigned short wm_inits[] = { + static unsigned short wm8776_inits[] = { /* These come first to reduce init pop noise */ WM_ADC_MUX, 0x0003, /* ADC mute */ /* 0x00c0 replaced by 0x0003 */ @@ -982,7 +1001,76 @@ static int prodigy_hifi_init(struct snd_ice1712 *ice) WM_POWERDOWN, 0x0008, /* All power-up except HP */ WM_RESET, 0x0000, /* reset */ }; - static unsigned short wm_inits2[] = { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(wm8776_inits); i += 2) + wm_put(ice, wm8776_inits[i], wm8776_inits[i + 1]); +} + +#ifdef CONFIG_PM_SLEEP +static int prodigy_hifi_resume(struct snd_ice1712 *ice) +{ + static unsigned short wm8776_reinit_registers[] = { + WM_MASTER_CTRL, + WM_DAC_INT, + WM_ADC_INT, + WM_OUT_MUX, + WM_HP_ATTEN_L, + WM_HP_ATTEN_R, + WM_PHASE_SWAP, + WM_DAC_CTRL2, + WM_ADC_ATTEN_L, + WM_ADC_ATTEN_R, + WM_ALC_CTRL1, + WM_ALC_CTRL2, + WM_ALC_CTRL3, + WM_NOISE_GATE, + WM_ADC_MUX, + /* no DAC attenuation here */ + }; + struct prodigy_hifi_spec *spec = ice->spec; + int i, ch; + + mutex_lock(&ice->gpio_mutex); + + /* reinitialize WM8776 and re-apply old register values */ + wm8776_init(ice); + schedule_timeout_uninterruptible(1); + for (i = 0; i < ARRAY_SIZE(wm8776_reinit_registers); i++) + wm_put(ice, wm8776_reinit_registers[i], + wm_get(ice, wm8776_reinit_registers[i])); + + /* reinitialize WM8766 and re-apply volumes for all DACs */ + wm8766_init(ice); + for (ch = 0; ch < 2; ch++) { + wm_set_vol(ice, WM_DAC_ATTEN_L + ch, + spec->vol[2 + ch], spec->master[ch]); + + wm8766_set_vol(ice, WM8766_LDA1 + ch, + spec->vol[0 + ch], spec->master[ch]); + + wm8766_set_vol(ice, WM8766_LDA2 + ch, + spec->vol[4 + ch], spec->master[ch]); + + wm8766_set_vol(ice, WM8766_LDA3 + ch, + spec->vol[6 + ch], spec->master[ch]); + } + + /* unmute WM8776 DAC */ + wm_put(ice, WM_DAC_MUTE, 0x00); + wm_put(ice, WM_DAC_CTRL1, 0x90); + + mutex_unlock(&ice->gpio_mutex); + return 0; +} +#endif + +/* + * initialize the chip + */ +static int prodigy_hifi_init(struct snd_ice1712 *ice) +{ + static unsigned short wm8776_defaults[] = { WM_MASTER_CTRL, 0x0022, /* 256fs, slave mode */ WM_DAC_INT, 0x0022, /* I2S, normal polarity, 24bit */ WM_ADC_INT, 0x0022, /* I2S, normal polarity, 24bit */ @@ -1010,22 +1098,6 @@ static int prodigy_hifi_init(struct snd_ice1712 *ice) WM_DAC_MUTE, 0x0000, /* DAC unmute */ WM_ADC_MUX, 0x0003, /* ADC unmute, both CD/Line On */ }; - static unsigned short wm8766_inits[] = { - WM8766_RESET, 0x0000, - WM8766_DAC_CTRL, 0x0120, - WM8766_INT_CTRL, 0x0022, /* I2S Normal Mode, 24 bit */ - WM8766_DAC_CTRL2, 0x0001, - WM8766_DAC_CTRL3, 0x0080, - WM8766_LDA1, 0x0100, - WM8766_LDA2, 0x0100, - WM8766_LDA3, 0x0100, - WM8766_RDA1, 0x0100, - WM8766_RDA2, 0x0100, - WM8766_RDA3, 0x0100, - WM8766_MUTE1, 0x0000, - WM8766_MUTE2, 0x0000, - }; - struct prodigy_hifi_spec *spec; unsigned int i; @@ -1052,16 +1124,17 @@ static int prodigy_hifi_init(struct snd_ice1712 *ice) ice->spec = spec; /* initialize WM8776 codec */ - for (i = 0; i < ARRAY_SIZE(wm_inits); i += 2) - wm_put(ice, wm_inits[i], wm_inits[i+1]); + wm8776_init(ice); schedule_timeout_uninterruptible(1); - for (i = 0; i < ARRAY_SIZE(wm_inits2); i += 2) - wm_put(ice, wm_inits2[i], wm_inits2[i+1]); + for (i = 0; i < ARRAY_SIZE(wm8776_defaults); i += 2) + wm_put(ice, wm8776_defaults[i], wm8776_defaults[i + 1]); - /* initialize WM8766 codec */ - for (i = 0; i < ARRAY_SIZE(wm8766_inits); i += 2) - wm8766_spi_write(ice, wm8766_inits[i], wm8766_inits[i+1]); + wm8766_init(ice); +#ifdef CONFIG_PM_SLEEP + ice->pm_resume = &prodigy_hifi_resume; + ice->pm_suspend_enabled = 1; +#endif return 0; } -- cgit v1.2.3 From df532185e8720baff120f55eb46058d270445d56 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 29 Nov 2017 02:38:27 +0000 Subject: ASoC: soc-core: add missing EXPORT_SYMBOL_GPL() for snd_soc_disconnect_sync Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9047046c749c..345baa4f10c0 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1401,6 +1401,7 @@ void snd_soc_disconnect_sync(struct device *dev) snd_card_disconnect_sync(component->card->snd_card); } +EXPORT_SYMBOL_GPL(snd_soc_disconnect_sync); /** * snd_soc_add_dai_link - Add a DAI link dynamically -- cgit v1.2.3 From b2fb31bb7454d5479b1c7214ccd10c1af85a6245 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 29 Nov 2017 03:07:51 +0000 Subject: ASoC: rsnd: TDM 6ch needs 8ch clock for hw refine Renesas sound needs 8ch clock if TDM 6ch mode, and needs 2ch clock for 6ch or 8ch sound if Multi SSI mode. And these are related to before/after CTU (= Channel Transfer Unit). To calculate these we already has rsnd_runtime_channel_for_ssi() which returns runtime necessary channels. But, it based on runtime->channels which is not yet set when hw refine. We need to use hw_params instead of runtime->xxx when hw refine, and it is not needed after runtime was set. This patch adds new hw_params on rsnd_dai_stream, and it will be removed on rsnd_hw_params(). This is very temporary durty code, but it seems no choice at this point. Tested-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 92 +++++++++++++++++++++++++++++++++++------------- sound/soc/sh/rcar/rsnd.h | 15 ++++++-- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index bd64dc6ec1c3..d76ad46a6fd9 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -197,16 +197,27 @@ int rsnd_io_is_working(struct rsnd_dai_stream *io) return 0; } -int rsnd_runtime_channel_original(struct rsnd_dai_stream *io) +int rsnd_runtime_channel_original_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - return runtime->channels; + /* + * params will be added when refine + * see + * __rsnd_soc_hw_rule_rate() + * __rsnd_soc_hw_rule_channels() + */ + if (params) + return params_channels(params); + else + return runtime->channels; } -int rsnd_runtime_channel_after_ctu(struct rsnd_dai_stream *io) +int rsnd_runtime_channel_after_ctu_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params) { - int chan = rsnd_runtime_channel_original(io); + int chan = rsnd_runtime_channel_original_with_params(io, params); struct rsnd_mod *ctu_mod = rsnd_io_to_mod_ctu(io); if (ctu_mod) { @@ -219,12 +230,13 @@ int rsnd_runtime_channel_after_ctu(struct rsnd_dai_stream *io) return chan; } -int rsnd_runtime_channel_for_ssi(struct rsnd_dai_stream *io) +int rsnd_runtime_channel_for_ssi_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params) { struct rsnd_dai *rdai = rsnd_io_to_rdai(io); int chan = rsnd_io_is_play(io) ? - rsnd_runtime_channel_after_ctu(io) : - rsnd_runtime_channel_original(io); + rsnd_runtime_channel_after_ctu_with_params(io, params) : + rsnd_runtime_channel_original_with_params(io, params); /* Use Multi SSI */ if (rsnd_runtime_is_ssi_multi(io)) @@ -616,8 +628,6 @@ static int rsnd_soc_dai_trigger(struct snd_pcm_substream *substream, int cmd, switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: - rsnd_dai_stream_init(io, substream); - ret = rsnd_dai_call(init, io, priv); if (ret < 0) goto dai_trigger_end; @@ -639,7 +649,6 @@ static int rsnd_soc_dai_trigger(struct snd_pcm_substream *substream, int cmd, ret |= rsnd_dai_call(quit, io, priv); - rsnd_dai_stream_quit(io); break; default: ret = -EINVAL; @@ -784,8 +793,9 @@ static int rsnd_soc_hw_rule(struct rsnd_priv *priv, return snd_interval_refine(iv, &p); } -static int rsnd_soc_hw_rule_rate(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) +static int __rsnd_soc_hw_rule_rate(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule, + int is_play) { struct snd_interval *ic_ = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); struct snd_interval *ir = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); @@ -793,25 +803,37 @@ static int rsnd_soc_hw_rule_rate(struct snd_pcm_hw_params *params, struct snd_soc_dai *dai = rule->private; struct rsnd_dai *rdai = rsnd_dai_to_rdai(dai); struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai); + struct rsnd_dai_stream *io = is_play ? &rdai->playback : &rdai->capture; /* * possible sampling rate limitation is same as * 2ch if it supports multi ssi + * and same as 8ch if TDM 6ch (see rsnd_ssi_config_init()) */ ic = *ic_; - if (1 < rsnd_rdai_ssi_lane_get(rdai)) { - ic.min = 2; - ic.max = 2; - } + ic.min = + ic.max = rsnd_runtime_channel_for_ssi_with_params(io, params); return rsnd_soc_hw_rule(priv, rsnd_soc_hw_rate_list, ARRAY_SIZE(rsnd_soc_hw_rate_list), &ic, ir); } +static int rsnd_soc_hw_rule_rate_playback(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + return __rsnd_soc_hw_rule_rate(params, rule, 1); +} + +static int rsnd_soc_hw_rule_rate_capture(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + return __rsnd_soc_hw_rule_rate(params, rule, 0); +} -static int rsnd_soc_hw_rule_channels(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) +static int __rsnd_soc_hw_rule_channels(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule, + int is_play) { struct snd_interval *ic_ = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); struct snd_interval *ir = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); @@ -819,22 +841,34 @@ static int rsnd_soc_hw_rule_channels(struct snd_pcm_hw_params *params, struct snd_soc_dai *dai = rule->private; struct rsnd_dai *rdai = rsnd_dai_to_rdai(dai); struct rsnd_priv *priv = rsnd_rdai_to_priv(rdai); + struct rsnd_dai_stream *io = is_play ? &rdai->playback : &rdai->capture; /* * possible sampling rate limitation is same as * 2ch if it supports multi ssi + * and same as 8ch if TDM 6ch (see rsnd_ssi_config_init()) */ ic = *ic_; - if (1 < rsnd_rdai_ssi_lane_get(rdai)) { - ic.min = 2; - ic.max = 2; - } + ic.min = + ic.max = rsnd_runtime_channel_for_ssi_with_params(io, params); return rsnd_soc_hw_rule(priv, rsnd_soc_hw_channels_list, ARRAY_SIZE(rsnd_soc_hw_channels_list), ir, &ic); } +static int rsnd_soc_hw_rule_channels_playback(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + return __rsnd_soc_hw_rule_channels(params, rule, 1); +} + +static int rsnd_soc_hw_rule_channels_capture(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + return __rsnd_soc_hw_rule_channels(params, rule, 0); +} + static const struct snd_pcm_hardware rsnd_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | @@ -859,6 +893,8 @@ static int rsnd_soc_dai_startup(struct snd_pcm_substream *substream, int ret; int i; + rsnd_dai_stream_init(io, substream); + /* * Channel Limitation * It depends on Platform design @@ -886,11 +922,17 @@ static int rsnd_soc_dai_startup(struct snd_pcm_substream *substream, * It depends on Clock Master Mode */ if (rsnd_rdai_is_clk_master(rdai)) { + int is_play = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; + snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - rsnd_soc_hw_rule_rate, dai, + is_play ? rsnd_soc_hw_rule_rate_playback : + rsnd_soc_hw_rule_rate_capture, + dai, SNDRV_PCM_HW_PARAM_CHANNELS, -1); snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - rsnd_soc_hw_rule_channels, dai, + is_play ? rsnd_soc_hw_rule_channels_playback : + rsnd_soc_hw_rule_channels_capture, + dai, SNDRV_PCM_HW_PARAM_RATE, -1); } @@ -915,6 +957,8 @@ static void rsnd_soc_dai_shutdown(struct snd_pcm_substream *substream, * call rsnd_dai_call without spinlock */ rsnd_dai_call(nolock_stop, io, priv); + + rsnd_dai_stream_quit(io); } static const struct snd_soc_dai_ops rsnd_soc_dai_ops = { diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 57cd2bc773c2..ad6523595b0a 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -399,9 +399,18 @@ void rsnd_parse_connect_common(struct rsnd_dai *rdai, struct device_node *playback, struct device_node *capture); -int rsnd_runtime_channel_original(struct rsnd_dai_stream *io); -int rsnd_runtime_channel_after_ctu(struct rsnd_dai_stream *io); -int rsnd_runtime_channel_for_ssi(struct rsnd_dai_stream *io); +#define rsnd_runtime_channel_original(io) \ + rsnd_runtime_channel_original_with_params(io, NULL) +int rsnd_runtime_channel_original_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params); +#define rsnd_runtime_channel_after_ctu(io) \ + rsnd_runtime_channel_after_ctu_with_params(io, NULL) +int rsnd_runtime_channel_after_ctu_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params); +#define rsnd_runtime_channel_for_ssi(io) \ + rsnd_runtime_channel_for_ssi_with_params(io, NULL) +int rsnd_runtime_channel_for_ssi_with_params(struct rsnd_dai_stream *io, + struct snd_pcm_hw_params *params); int rsnd_runtime_is_ssi_multi(struct rsnd_dai_stream *io); int rsnd_runtime_is_ssi_tdm(struct rsnd_dai_stream *io); -- cgit v1.2.3 From e455b69ddf9b69326d0cab28d374faf3325489c9 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Wed, 29 Nov 2017 17:08:03 +0800 Subject: misc: rtsx: Move Realtek Card Reader Driver to misc Because Realtek card reader drivers are pcie and usb drivers, and they bridge mmc subsystem and memstick subsystem, they are not mfd drivers. Greg and Lee Jones had a discussion about where to put the drivers, the result is that misc is a good place for them, so I move all files to misc. If I don't move them to a right place, I can't add any patch for this driver. Signed-off-by: Rui Feng Reviewed-by: Daniel Bristot de Oliveira Acked-by: Arnd Bergmann Acked-by: Ulf Hansson Acked-by: Greg Kroah-Hartman Tested-by: Perry Yuan Signed-off-by: Lee Jones --- drivers/memstick/host/Kconfig | 4 +- drivers/memstick/host/rtsx_pci_ms.c | 2 +- drivers/memstick/host/rtsx_usb_ms.c | 2 +- drivers/mfd/Kconfig | 21 - drivers/mfd/Makefile | 4 - drivers/mfd/rtl8411.c | 508 ------------ drivers/mfd/rts5209.c | 277 ------- drivers/mfd/rts5227.c | 374 --------- drivers/mfd/rts5229.c | 273 ------ drivers/mfd/rts5249.c | 741 ----------------- drivers/mfd/rtsx_pcr.c | 1569 ----------------------------------- drivers/mfd/rtsx_pcr.h | 103 --- drivers/mfd/rtsx_usb.c | 791 ------------------ drivers/misc/Kconfig | 5 + drivers/misc/Makefile | 1 + drivers/misc/cardreader/Kconfig | 20 + drivers/misc/cardreader/Makefile | 4 + drivers/misc/cardreader/rtl8411.c | 508 ++++++++++++ drivers/misc/cardreader/rts5209.c | 277 +++++++ drivers/misc/cardreader/rts5227.c | 374 +++++++++ drivers/misc/cardreader/rts5229.c | 273 ++++++ drivers/misc/cardreader/rts5249.c | 740 +++++++++++++++++ drivers/misc/cardreader/rtsx_pcr.c | 1569 +++++++++++++++++++++++++++++++++++ drivers/misc/cardreader/rtsx_pcr.h | 103 +++ drivers/misc/cardreader/rtsx_usb.c | 791 ++++++++++++++++++ drivers/mmc/host/Kconfig | 4 +- drivers/mmc/host/rtsx_pci_sdmmc.c | 2 +- drivers/mmc/host/rtsx_usb_sdmmc.c | 2 +- include/linux/mfd/rtsx_common.h | 50 -- include/linux/mfd/rtsx_pci.h | 1141 ------------------------- include/linux/mfd/rtsx_usb.h | 628 -------------- include/linux/rtsx_common.h | 50 ++ include/linux/rtsx_pci.h | 1141 +++++++++++++++++++++++++ include/linux/rtsx_usb.h | 628 ++++++++++++++ 34 files changed, 6492 insertions(+), 6488 deletions(-) delete mode 100644 drivers/mfd/rtl8411.c delete mode 100644 drivers/mfd/rts5209.c delete mode 100644 drivers/mfd/rts5227.c delete mode 100644 drivers/mfd/rts5229.c delete mode 100644 drivers/mfd/rts5249.c delete mode 100644 drivers/mfd/rtsx_pcr.c delete mode 100644 drivers/mfd/rtsx_pcr.h delete mode 100644 drivers/mfd/rtsx_usb.c create mode 100644 drivers/misc/cardreader/Kconfig create mode 100644 drivers/misc/cardreader/Makefile create mode 100644 drivers/misc/cardreader/rtl8411.c create mode 100644 drivers/misc/cardreader/rts5209.c create mode 100644 drivers/misc/cardreader/rts5227.c create mode 100644 drivers/misc/cardreader/rts5229.c create mode 100644 drivers/misc/cardreader/rts5249.c create mode 100644 drivers/misc/cardreader/rtsx_pcr.c create mode 100644 drivers/misc/cardreader/rtsx_pcr.h create mode 100644 drivers/misc/cardreader/rtsx_usb.c delete mode 100644 include/linux/mfd/rtsx_common.h delete mode 100644 include/linux/mfd/rtsx_pci.h delete mode 100644 include/linux/mfd/rtsx_usb.h create mode 100644 include/linux/rtsx_common.h create mode 100644 include/linux/rtsx_pci.h create mode 100644 include/linux/rtsx_usb.h diff --git a/drivers/memstick/host/Kconfig b/drivers/memstick/host/Kconfig index 7310e32b5991..aa2b0786bbe9 100644 --- a/drivers/memstick/host/Kconfig +++ b/drivers/memstick/host/Kconfig @@ -45,7 +45,7 @@ config MEMSTICK_R592 config MEMSTICK_REALTEK_PCI tristate "Realtek PCI-E Memstick Card Interface Driver" - depends on MFD_RTSX_PCI + depends on MISC_RTSX_PCI help Say Y here to include driver code to support Memstick card interface of Realtek PCI-E card reader @@ -55,7 +55,7 @@ config MEMSTICK_REALTEK_PCI config MEMSTICK_REALTEK_USB tristate "Realtek USB Memstick Card Interface Driver" - depends on MFD_RTSX_USB + depends on MISC_RTSX_USB help Say Y here to include driver code to support Memstick card interface of Realtek RTS5129/39 series USB card reader diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c index 818fa94354ae..a44b4578ba4d 100644 --- a/drivers/memstick/host/rtsx_pci_ms.c +++ b/drivers/memstick/host/rtsx_pci_ms.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include struct realtek_pci_ms { diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 2e3cf012ef48..4f64563df7de 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 1d20a800e967..1246ba1832d7 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -929,17 +929,6 @@ config MFD_RDC321X southbridge which provides access to GPIOs and Watchdog using the southbridge PCI device configuration space. -config MFD_RTSX_PCI - tristate "Realtek PCI-E card reader" - depends on PCI - select MFD_CORE - help - This supports for Realtek PCI-Express card reader including rts5209, - rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, etc. - Realtek card reader supports access to many types of memory cards, - such as Memory Stick, Memory Stick Pro, Secure Digital and - MultiMediaCard. - config MFD_RT5033 tristate "Richtek RT5033 Power Management IC" depends on I2C @@ -953,16 +942,6 @@ config MFD_RT5033 sub-devices like charger, fuel gauge, flash LED, current source, LDO and Buck. -config MFD_RTSX_USB - tristate "Realtek USB card reader" - depends on USB - select MFD_CORE - help - Select this option to get support for Realtek USB 2.0 card readers - including RTS5129, RTS5139, RTS5179 and RTS5170. - Realtek card reader supports access to many types of memory cards, - such as Memory Stick Pro, Secure Digital and MultiMediaCard. - config MFD_RC5T583 bool "Ricoh RC5T583 Power Management system device" depends on I2C=y diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index d9474ade32e6..293e223c373d 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -19,10 +19,6 @@ obj-$(CONFIG_MFD_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o obj-$(CONFIG_MFD_EXYNOS_LPASS) += exynos-lpass.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o -obj-$(CONFIG_MFD_RTSX_PCI) += rtsx_pci.o -obj-$(CONFIG_MFD_RTSX_USB) += rtsx_usb.o - obj-$(CONFIG_HTC_PASIC3) += htc-pasic3.o obj-$(CONFIG_HTC_I2CPLD) += htc-i2cpld.o diff --git a/drivers/mfd/rtl8411.c b/drivers/mfd/rtl8411.c deleted file mode 100644 index b3ae6592014a..000000000000 --- a/drivers/mfd/rtl8411.c +++ /dev/null @@ -1,508 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - * Roger Tseng - */ - -#include -#include -#include -#include - -#include "rtsx_pcr.h" - -static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr) -{ - u8 val; - - rtsx_pci_read_register(pcr, SYS_VER, &val); - return val & 0x0F; -} - -static int rtl8411b_is_qfn48(struct rtsx_pcr *pcr) -{ - u8 val = 0; - - rtsx_pci_read_register(pcr, RTL8411B_PACKAGE_MODE, &val); - - if (val & 0x2) - return 1; - else - return 0; -} - -static void rtl8411_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg1 = 0; - u8 reg3 = 0; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®1); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg1); - - if (!rtsx_vendor_setting_valid(reg1)) - return; - - pcr->aspm_en = rtsx_reg_to_aspm(reg1); - pcr->sd30_drive_sel_1v8 = - map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg1)); - pcr->card_drive_sel &= 0x3F; - pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg1); - - rtsx_pci_read_config_byte(pcr, PCR_SETTING_REG3, ®3); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG3, reg3); - pcr->sd30_drive_sel_3v3 = rtl8411_reg_to_sd30_drive_sel_3v3(reg3); -} - -static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg = 0; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - if (!rtsx_vendor_setting_valid(reg)) - return; - - pcr->aspm_en = rtsx_reg_to_aspm(reg); - pcr->sd30_drive_sel_1v8 = - map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); - pcr->sd30_drive_sel_3v3 = - map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg)); -} - -static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) -{ - rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); -} - -static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr) -{ - rtsx_pci_init_cmd(pcr); - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, - 0xFF, pcr->sd30_drive_sel_3v3); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, - CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); - - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rtl8411b_extra_init_hw(struct rtsx_pcr *pcr) -{ - rtsx_pci_init_cmd(pcr); - - if (rtl8411b_is_qfn48(pcr)) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, - CARD_PULL_CTL3, 0xFF, 0xF5); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, - 0xFF, pcr->sd30_drive_sel_3v3); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, - CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, FUNC_FORCE_CTL, - 0x06, 0x00); - - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rtl8411_turn_on_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); -} - -static int rtl8411_turn_off_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); -} - -static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); -} - -static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); -} - -static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card) -{ - int err; - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL, - BPP_LDO_POWB, BPP_LDO_SUSPEND); - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - /* To avoid too large in-rush current */ - udelay(150); - - err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON); - if (err < 0) - return err; - - udelay(150); - - err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON); - if (err < 0) - return err; - - udelay(150); - - err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_ON); - if (err < 0) - return err; - - return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON); -} - -static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card) -{ - int err; - - err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_OFF); - if (err < 0) - return err; - - return rtsx_pci_write_register(pcr, LDO_CTL, - BPP_LDO_POWB, BPP_LDO_SUSPEND); -} - -static int rtl8411_do_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage, - int bpp_tuned18_shift, int bpp_asic_1v8) -{ - u8 mask, val; - int err; - - mask = (BPP_REG_TUNED18 << bpp_tuned18_shift) | BPP_PAD_MASK; - if (voltage == OUTPUT_3V3) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); - if (err < 0) - return err; - val = (BPP_ASIC_3V3 << bpp_tuned18_shift) | BPP_PAD_3V3; - } else if (voltage == OUTPUT_1V8) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); - if (err < 0) - return err; - val = (bpp_asic_1v8 << bpp_tuned18_shift) | BPP_PAD_1V8; - } else { - return -EINVAL; - } - - return rtsx_pci_write_register(pcr, LDO_CTL, mask, val); -} - -static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - return rtl8411_do_switch_output_voltage(pcr, voltage, - BPP_TUNED18_SHIFT_8411, BPP_ASIC_1V8); -} - -static int rtl8402_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - return rtl8411_do_switch_output_voltage(pcr, voltage, - BPP_TUNED18_SHIFT_8402, BPP_ASIC_2V0); -} - -static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr) -{ - unsigned int card_exist; - - card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); - card_exist &= CARD_EXIST; - if (!card_exist) { - /* Enable card CD */ - rtsx_pci_write_register(pcr, CD_PAD_CTL, - CD_DISABLE_MASK, CD_ENABLE); - /* Enable card interrupt */ - rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00); - return 0; - } - - if (hweight32(card_exist) > 1) { - rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); - msleep(100); - - card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); - if (card_exist & MS_EXIST) - card_exist = MS_EXIST; - else if (card_exist & SD_EXIST) - card_exist = SD_EXIST; - else - card_exist = 0; - - rtsx_pci_write_register(pcr, CARD_PWR_CTL, - BPP_POWER_MASK, BPP_POWER_OFF); - - pcr_dbg(pcr, "After CD deglitch, card_exist = 0x%x\n", - card_exist); - } - - if (card_exist & MS_EXIST) { - /* Disable SD interrupt */ - rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40); - rtsx_pci_write_register(pcr, CD_PAD_CTL, - CD_DISABLE_MASK, MS_CD_EN_ONLY); - } else if (card_exist & SD_EXIST) { - /* Disable MS interrupt */ - rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80); - rtsx_pci_write_register(pcr, CD_PAD_CTL, - CD_DISABLE_MASK, SD_CD_EN_ONLY); - } - - return card_exist; -} - -static int rtl8411_conv_clk_and_div_n(int input, int dir) -{ - int output; - - if (dir == CLK_TO_DIV_N) - output = input * 4 / 5 - 2; - else - output = (input + 2) * 5 / 4; - - return output; -} - -static const struct pcr_ops rtl8411_pcr_ops = { - .fetch_vendor_settings = rtl8411_fetch_vendor_settings, - .extra_init_hw = rtl8411_extra_init_hw, - .optimize_phy = NULL, - .turn_on_led = rtl8411_turn_on_led, - .turn_off_led = rtl8411_turn_off_led, - .enable_auto_blink = rtl8411_enable_auto_blink, - .disable_auto_blink = rtl8411_disable_auto_blink, - .card_power_on = rtl8411_card_power_on, - .card_power_off = rtl8411_card_power_off, - .switch_output_voltage = rtl8411_switch_output_voltage, - .cd_deglitch = rtl8411_cd_deglitch, - .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, - .force_power_down = rtl8411_force_power_down, -}; - -static const struct pcr_ops rtl8402_pcr_ops = { - .fetch_vendor_settings = rtl8411_fetch_vendor_settings, - .extra_init_hw = rtl8411_extra_init_hw, - .optimize_phy = NULL, - .turn_on_led = rtl8411_turn_on_led, - .turn_off_led = rtl8411_turn_off_led, - .enable_auto_blink = rtl8411_enable_auto_blink, - .disable_auto_blink = rtl8411_disable_auto_blink, - .card_power_on = rtl8411_card_power_on, - .card_power_off = rtl8411_card_power_off, - .switch_output_voltage = rtl8402_switch_output_voltage, - .cd_deglitch = rtl8411_cd_deglitch, - .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, - .force_power_down = rtl8411_force_power_down, -}; - -static const struct pcr_ops rtl8411b_pcr_ops = { - .fetch_vendor_settings = rtl8411b_fetch_vendor_settings, - .extra_init_hw = rtl8411b_extra_init_hw, - .optimize_phy = NULL, - .turn_on_led = rtl8411_turn_on_led, - .turn_off_led = rtl8411_turn_off_led, - .enable_auto_blink = rtl8411_enable_auto_blink, - .disable_auto_blink = rtl8411_disable_auto_blink, - .card_power_on = rtl8411_card_power_on, - .card_power_off = rtl8411_card_power_off, - .switch_output_voltage = rtl8411_switch_output_voltage, - .cd_deglitch = rtl8411_cd_deglitch, - .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, - .force_power_down = rtl8411_force_power_down, -}; - -/* SD Pull Control Enable: - * SD_DAT[3:0] ==> pull up - * SD_CD ==> pull up - * SD_WP ==> pull up - * SD_CMD ==> pull up - * SD_CLK ==> pull down - */ -static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), - 0, -}; - -/* SD Pull Control Disable: - * SD_DAT[3:0] ==> pull down - * SD_CD ==> pull up - * SD_WP ==> pull down - * SD_CMD ==> pull down - * SD_CLK ==> pull down - */ -static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), - 0, -}; - -/* MS Pull Control Enable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), - 0, -}; - -/* MS Pull Control Disable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), - 0, -}; - -static const u32 rtl8411b_qfn64_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x09 | 0xD0), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn48_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x69 | 0x90), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x08 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn64_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn48_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn64_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn48_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn64_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static const u32 rtl8411b_qfn48_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), - 0, -}; - -static void rtl8411_init_common_params(struct rtsx_pcr *pcr) -{ - pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; - pcr->num_slots = 2; - pcr->flags = 0; - pcr->card_drive_sel = RTL8411_CARD_DRIVE_DEFAULT; - pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; - pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; - pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14); - pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10); - pcr->ic_version = rtl8411_get_ic_version(pcr); -} - -void rtl8411_init_params(struct rtsx_pcr *pcr) -{ - rtl8411_init_common_params(pcr); - pcr->ops = &rtl8411_pcr_ops; - set_pull_ctrl_tables(pcr, rtl8411); -} - -void rtl8411b_init_params(struct rtsx_pcr *pcr) -{ - rtl8411_init_common_params(pcr); - pcr->ops = &rtl8411b_pcr_ops; - if (rtl8411b_is_qfn48(pcr)) - set_pull_ctrl_tables(pcr, rtl8411b_qfn48); - else - set_pull_ctrl_tables(pcr, rtl8411b_qfn64); -} - -void rtl8402_init_params(struct rtsx_pcr *pcr) -{ - rtl8411_init_common_params(pcr); - pcr->ops = &rtl8402_pcr_ops; - set_pull_ctrl_tables(pcr, rtl8411); -} diff --git a/drivers/mfd/rts5209.c b/drivers/mfd/rts5209.c deleted file mode 100644 index b95beecf767f..000000000000 --- a/drivers/mfd/rts5209.c +++ /dev/null @@ -1,277 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#include -#include -#include - -#include "rtsx_pcr.h" - -static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr) -{ - u8 val; - - val = rtsx_pci_readb(pcr, 0x1C); - return val & 0x0F; -} - -static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - if (rts5209_vendor_setting1_valid(reg)) { - if (rts5209_reg_check_ms_pmos(reg)) - pcr->flags |= PCR_MS_PMOS; - pcr->aspm_en = rts5209_reg_to_aspm(reg); - } - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - - if (rts5209_vendor_setting2_valid(reg)) { - pcr->sd30_drive_sel_1v8 = - rts5209_reg_to_sd30_drive_sel_1v8(reg); - pcr->sd30_drive_sel_3v3 = - rts5209_reg_to_sd30_drive_sel_3v3(reg); - pcr->card_drive_sel = rts5209_reg_to_card_drive_sel(reg); - } -} - -static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) -{ - rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); -} - -static int rts5209_extra_init_hw(struct rtsx_pcr *pcr) -{ - rtsx_pci_init_cmd(pcr); - - /* Turn off LED */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03); - /* Reset ASPM state to default value */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); - /* Force CLKREQ# PIN to drive 0 to request clock */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); - /* Configure GPIO as output */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03); - /* Configure driving */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, - 0xFF, pcr->sd30_drive_sel_3v3); - - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5209_optimize_phy(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966); -} - -static int rts5209_turn_on_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); -} - -static int rts5209_turn_off_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); -} - -static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); -} - -static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); -} - -static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card) -{ - int err; - u8 pwr_mask, partial_pwr_on, pwr_on; - - pwr_mask = SD_POWER_MASK; - partial_pwr_on = SD_PARTIAL_POWER_ON; - pwr_on = SD_POWER_ON; - - if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { - pwr_mask = MS_POWER_MASK; - partial_pwr_on = MS_PARTIAL_POWER_ON; - pwr_on = MS_POWER_ON; - } - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - pwr_mask, partial_pwr_on); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x04); - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - /* To avoid too large in-rush current */ - udelay(150); - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x00); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card) -{ - u8 pwr_mask, pwr_off; - - pwr_mask = SD_POWER_MASK; - pwr_off = SD_POWER_OFF; - - if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { - pwr_mask = MS_POWER_MASK; - pwr_off = MS_POWER_OFF; - } - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x06); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - int err; - - if (voltage == OUTPUT_3V3) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); - if (err < 0) - return err; - } else if (voltage == OUTPUT_1V8) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); - if (err < 0) - return err; - } else { - return -EINVAL; - } - - return 0; -} - -static const struct pcr_ops rts5209_pcr_ops = { - .fetch_vendor_settings = rts5209_fetch_vendor_settings, - .extra_init_hw = rts5209_extra_init_hw, - .optimize_phy = rts5209_optimize_phy, - .turn_on_led = rts5209_turn_on_led, - .turn_off_led = rts5209_turn_off_led, - .enable_auto_blink = rts5209_enable_auto_blink, - .disable_auto_blink = rts5209_disable_auto_blink, - .card_power_on = rts5209_card_power_on, - .card_power_off = rts5209_card_power_off, - .switch_output_voltage = rts5209_switch_output_voltage, - .cd_deglitch = NULL, - .conv_clk_and_div_n = NULL, - .force_power_down = rts5209_force_power_down, -}; - -/* SD Pull Control Enable: - * SD_DAT[3:0] ==> pull up - * SD_CD ==> pull up - * SD_WP ==> pull up - * SD_CMD ==> pull up - * SD_CLK ==> pull down - */ -static const u32 rts5209_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), - 0, -}; - -/* SD Pull Control Disable: - * SD_DAT[3:0] ==> pull down - * SD_CD ==> pull up - * SD_WP ==> pull down - * SD_CMD ==> pull down - * SD_CLK ==> pull down - */ -static const u32 rts5209_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), - 0, -}; - -/* MS Pull Control Enable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5209_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -/* MS Pull Control Disable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5209_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -void rts5209_init_params(struct rtsx_pcr *pcr) -{ - pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | - EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT; - pcr->num_slots = 2; - pcr->ops = &rts5209_pcr_ops; - - pcr->flags = 0; - pcr->card_drive_sel = RTS5209_CARD_DRIVE_DEFAULT; - pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; - pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; - pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16); - pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); - - pcr->ic_version = rts5209_get_ic_version(pcr); - pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl; - pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl; - pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl; - pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl; -} diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c deleted file mode 100644 index ff296a4bf3d2..000000000000 --- a/drivers/mfd/rts5227.c +++ /dev/null @@ -1,374 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - * Roger Tseng - */ - -#include -#include -#include - -#include "rtsx_pcr.h" - -static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr) -{ - u8 val; - - rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); - return val & 0x0F; -} - -static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage) -{ - u8 driving_3v3[4][3] = { - {0x13, 0x13, 0x13}, - {0x96, 0x96, 0x96}, - {0x7F, 0x7F, 0x7F}, - {0x96, 0x96, 0x96}, - }; - u8 driving_1v8[4][3] = { - {0x99, 0x99, 0x99}, - {0xAA, 0xAA, 0xAA}, - {0xFE, 0xFE, 0xFE}, - {0xB3, 0xB3, 0xB3}, - }; - u8 (*driving)[3], drive_sel; - - if (voltage == OUTPUT_3V3) { - driving = driving_3v3; - drive_sel = pcr->sd30_drive_sel_3v3; - } else { - driving = driving_1v8; - drive_sel = pcr->sd30_drive_sel_1v8; - } - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, - 0xFF, driving[drive_sel][0]); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, - 0xFF, driving[drive_sel][1]); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, - 0xFF, driving[drive_sel][2]); -} - -static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - if (!rtsx_vendor_setting_valid(reg)) - return; - - pcr->aspm_en = rtsx_reg_to_aspm(reg); - pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); - pcr->card_drive_sel &= 0x3F; - pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); - if (rtsx_reg_check_reverse_socket(reg)) - pcr->flags |= PCR_REVERSE_SOCKET; -} - -static void rts5227_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) -{ - /* Set relink_time to 0 */ - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); - - if (pm_state == HOST_ENTER_S3) - rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x10); - - rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); -} - -static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) -{ - u16 cap; - - rtsx_pci_init_cmd(pcr); - - /* Configure GPIO as output */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); - /* Reset ASPM state to default value */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); - /* Switch LDO3318 source from DV33 to card_3v3 */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); - /* LED shine disabled, set initial shine cycle period */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); - /* Configure LTR */ - pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap); - if (cap & PCI_EXP_DEVCTL2_LTR_EN) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3); - /* Configure OBFF */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03); - /* Configure driving */ - rts5227_fill_driving(pcr, OUTPUT_3V3); - /* Configure force_clock_req */ - if (pcr->flags & PCR_REVERSE_SOCKET) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0xB8); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0x88); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00); - - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5227_optimize_phy(struct rtsx_pcr *pcr) -{ - int err; - - err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); - if (err < 0) - return err; - - /* Optimize RX sensitivity */ - return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); -} - -static int rts5227_turn_on_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); -} - -static int rts5227_turn_off_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); -} - -static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); -} - -static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); -} - -static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card) -{ - int err; - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_PARTIAL_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x02); - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - /* To avoid too large in-rush current */ - udelay(150); - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x06); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card) -{ - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK | PMOS_STRG_MASK, - SD_POWER_OFF | PMOS_STRG_400mA); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0X00); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - int err; - - if (voltage == OUTPUT_3V3) { - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); - if (err < 0) - return err; - } else if (voltage == OUTPUT_1V8) { - err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24); - if (err < 0) - return err; - } else { - return -EINVAL; - } - - /* set pad drive */ - rtsx_pci_init_cmd(pcr); - rts5227_fill_driving(pcr, voltage); - return rtsx_pci_send_cmd(pcr, 100); -} - -static const struct pcr_ops rts5227_pcr_ops = { - .fetch_vendor_settings = rts5227_fetch_vendor_settings, - .extra_init_hw = rts5227_extra_init_hw, - .optimize_phy = rts5227_optimize_phy, - .turn_on_led = rts5227_turn_on_led, - .turn_off_led = rts5227_turn_off_led, - .enable_auto_blink = rts5227_enable_auto_blink, - .disable_auto_blink = rts5227_disable_auto_blink, - .card_power_on = rts5227_card_power_on, - .card_power_off = rts5227_card_power_off, - .switch_output_voltage = rts5227_switch_output_voltage, - .cd_deglitch = NULL, - .conv_clk_and_div_n = NULL, - .force_power_down = rts5227_force_power_down, -}; - -/* SD Pull Control Enable: - * SD_DAT[3:0] ==> pull up - * SD_CD ==> pull up - * SD_WP ==> pull up - * SD_CMD ==> pull up - * SD_CLK ==> pull down - */ -static const u32 rts5227_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), - 0, -}; - -/* SD Pull Control Disable: - * SD_DAT[3:0] ==> pull down - * SD_CD ==> pull up - * SD_WP ==> pull down - * SD_CMD ==> pull down - * SD_CLK ==> pull down - */ -static const u32 rts5227_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), - 0, -}; - -/* MS Pull Control Enable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5227_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -/* MS Pull Control Disable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5227_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -void rts5227_init_params(struct rtsx_pcr *pcr) -{ - pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; - pcr->num_slots = 2; - pcr->ops = &rts5227_pcr_ops; - - pcr->flags = 0; - pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; - pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; - pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; - pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); - pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7); - - pcr->ic_version = rts5227_get_ic_version(pcr); - pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl; - pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl; - pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl; - pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl; - - pcr->reg_pm_ctrl3 = PM_CTRL3; -} - -static int rts522a_optimize_phy(struct rtsx_pcr *pcr) -{ - int err; - - err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN, - 0x00); - if (err < 0) - return err; - - if (is_version(pcr, 0x522A, IC_VER_A)) { - err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, - PHY_RCR2_INIT_27S); - if (err) - return err; - - rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S); - rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S); - rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S); - rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S); - } - - return 0; -} - -static int rts522a_extra_init_hw(struct rtsx_pcr *pcr) -{ - rts5227_extra_init_hw(pcr); - - rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG, - FUNC_FORCE_UPME_XMT_DBG); - rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04); - rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); - rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11); - - return 0; -} - -/* rts522a operations mainly derived from rts5227, except phy/hw init setting. - */ -static const struct pcr_ops rts522a_pcr_ops = { - .fetch_vendor_settings = rts5227_fetch_vendor_settings, - .extra_init_hw = rts522a_extra_init_hw, - .optimize_phy = rts522a_optimize_phy, - .turn_on_led = rts5227_turn_on_led, - .turn_off_led = rts5227_turn_off_led, - .enable_auto_blink = rts5227_enable_auto_blink, - .disable_auto_blink = rts5227_disable_auto_blink, - .card_power_on = rts5227_card_power_on, - .card_power_off = rts5227_card_power_off, - .switch_output_voltage = rts5227_switch_output_voltage, - .cd_deglitch = NULL, - .conv_clk_and_div_n = NULL, - .force_power_down = rts5227_force_power_down, -}; - -void rts522a_init_params(struct rtsx_pcr *pcr) -{ - rts5227_init_params(pcr); - - pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; -} diff --git a/drivers/mfd/rts5229.c b/drivers/mfd/rts5229.c deleted file mode 100644 index 9ed9dc84eac8..000000000000 --- a/drivers/mfd/rts5229.c +++ /dev/null @@ -1,273 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#include -#include -#include - -#include "rtsx_pcr.h" - -static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr) -{ - u8 val; - - rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); - return val & 0x0F; -} - -static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - if (!rtsx_vendor_setting_valid(reg)) - return; - - pcr->aspm_en = rtsx_reg_to_aspm(reg); - pcr->sd30_drive_sel_1v8 = - map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); - pcr->card_drive_sel &= 0x3F; - pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - pcr->sd30_drive_sel_3v3 = - map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg)); -} - -static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) -{ - rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); -} - -static int rts5229_extra_init_hw(struct rtsx_pcr *pcr) -{ - rtsx_pci_init_cmd(pcr); - - /* Configure GPIO as output */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); - /* Reset ASPM state to default value */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); - /* Force CLKREQ# PIN to drive 0 to request clock */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); - /* Switch LDO3318 source from DV33 to card_3v3 */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); - /* LED shine disabled, set initial shine cycle period */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); - /* Configure driving */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, - 0xFF, pcr->sd30_drive_sel_3v3); - - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5229_optimize_phy(struct rtsx_pcr *pcr) -{ - /* Optimize RX sensitivity */ - return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); -} - -static int rts5229_turn_on_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); -} - -static int rts5229_turn_off_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); -} - -static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); -} - -static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); -} - -static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card) -{ - int err; - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_PARTIAL_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x02); - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - /* To avoid too large in-rush current */ - udelay(150); - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x06); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card) -{ - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK | PMOS_STRG_MASK, - SD_POWER_OFF | PMOS_STRG_400mA); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x00); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - int err; - - if (voltage == OUTPUT_3V3) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); - if (err < 0) - return err; - } else if (voltage == OUTPUT_1V8) { - err = rtsx_pci_write_register(pcr, - SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); - if (err < 0) - return err; - } else { - return -EINVAL; - } - - return 0; -} - -static const struct pcr_ops rts5229_pcr_ops = { - .fetch_vendor_settings = rts5229_fetch_vendor_settings, - .extra_init_hw = rts5229_extra_init_hw, - .optimize_phy = rts5229_optimize_phy, - .turn_on_led = rts5229_turn_on_led, - .turn_off_led = rts5229_turn_off_led, - .enable_auto_blink = rts5229_enable_auto_blink, - .disable_auto_blink = rts5229_disable_auto_blink, - .card_power_on = rts5229_card_power_on, - .card_power_off = rts5229_card_power_off, - .switch_output_voltage = rts5229_switch_output_voltage, - .cd_deglitch = NULL, - .conv_clk_and_div_n = NULL, - .force_power_down = rts5229_force_power_down, -}; - -/* SD Pull Control Enable: - * SD_DAT[3:0] ==> pull up - * SD_CD ==> pull up - * SD_WP ==> pull up - * SD_CMD ==> pull up - * SD_CLK ==> pull down - */ -static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), - 0, -}; - -/* For RTS5229 version C */ -static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9), - 0, -}; - -/* SD Pull Control Disable: - * SD_DAT[3:0] ==> pull down - * SD_CD ==> pull up - * SD_WP ==> pull down - * SD_CMD ==> pull down - * SD_CLK ==> pull down - */ -static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), - 0, -}; - -/* For RTS5229 version C */ -static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = { - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5), - 0, -}; - -/* MS Pull Control Enable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5229_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -/* MS Pull Control Disable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5229_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -void rts5229_init_params(struct rtsx_pcr *pcr) -{ - pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; - pcr->num_slots = 2; - pcr->ops = &rts5229_pcr_ops; - - pcr->flags = 0; - pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; - pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; - pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; - pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); - pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6); - - pcr->ic_version = rts5229_get_ic_version(pcr); - if (pcr->ic_version == IC_VER_C) { - pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2; - pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2; - } else { - pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1; - pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1; - } - pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl; - pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl; -} diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c deleted file mode 100644 index 7fcf37ba922c..000000000000 --- a/drivers/mfd/rts5249.c +++ /dev/null @@ -1,741 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#include -#include -#include - -#include "rtsx_pcr.h" - -static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr) -{ - u8 val; - - rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); - return val & 0x0F; -} - -static void rts5249_fill_driving(struct rtsx_pcr *pcr, u8 voltage) -{ - u8 driving_3v3[4][3] = { - {0x11, 0x11, 0x18}, - {0x55, 0x55, 0x5C}, - {0xFF, 0xFF, 0xFF}, - {0x96, 0x96, 0x96}, - }; - u8 driving_1v8[4][3] = { - {0xC4, 0xC4, 0xC4}, - {0x3C, 0x3C, 0x3C}, - {0xFE, 0xFE, 0xFE}, - {0xB3, 0xB3, 0xB3}, - }; - u8 (*driving)[3], drive_sel; - - if (voltage == OUTPUT_3V3) { - driving = driving_3v3; - drive_sel = pcr->sd30_drive_sel_3v3; - } else { - driving = driving_1v8; - drive_sel = pcr->sd30_drive_sel_1v8; - } - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, - 0xFF, driving[drive_sel][0]); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, - 0xFF, driving[drive_sel][1]); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, - 0xFF, driving[drive_sel][2]); -} - -static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) -{ - u32 reg; - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); - - if (!rtsx_vendor_setting_valid(reg)) { - pcr_dbg(pcr, "skip fetch vendor setting\n"); - return; - } - - pcr->aspm_en = rtsx_reg_to_aspm(reg); - pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); - pcr->card_drive_sel &= 0x3F; - pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); - - rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); - pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); - pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); - if (rtsx_reg_check_reverse_socket(reg)) - pcr->flags |= PCR_REVERSE_SOCKET; -} - -static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) -{ - /* Set relink_time to 0 */ - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); - rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); - - if (pm_state == HOST_ENTER_S3) - rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, - D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); - - rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); -} - -static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &(pcr->option); - u32 lval; - - if (CHK_PCI_PID(pcr, PID_524A)) - rtsx_pci_read_config_dword(pcr, - PCR_ASPM_SETTING_REG1, &lval); - else - rtsx_pci_read_config_dword(pcr, - PCR_ASPM_SETTING_REG2, &lval); - - if (lval & ASPM_L1_1_EN_MASK) - rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); - - if (lval & ASPM_L1_2_EN_MASK) - rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); - - if (lval & PM_L1_1_EN_MASK) - rtsx_set_dev_flag(pcr, PM_L1_1_EN); - - if (lval & PM_L1_2_EN_MASK) - rtsx_set_dev_flag(pcr, PM_L1_2_EN); - - if (option->ltr_en) { - u16 val; - - pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); - if (val & PCI_EXP_DEVCTL2_LTR_EN) { - option->ltr_enabled = true; - option->ltr_active = true; - rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - } else { - option->ltr_enabled = false; - } - } -} - -static int rts5249_init_from_hw(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &(pcr->option); - - if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN - | PM_L1_1_EN | PM_L1_2_EN)) - option->force_clkreq_0 = false; - else - option->force_clkreq_0 = true; - - return 0; -} - -static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &(pcr->option); - - rts5249_init_from_cfg(pcr); - rts5249_init_from_hw(pcr); - - rtsx_pci_init_cmd(pcr); - - /* Rest L1SUB Config */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00); - /* Configure GPIO as output */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); - /* Reset ASPM state to default value */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); - /* Switch LDO3318 source from DV33 to card_3v3 */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); - /* LED shine disabled, set initial shine cycle period */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); - /* Configure driving */ - rts5249_fill_driving(pcr, OUTPUT_3V3); - if (pcr->flags & PCR_REVERSE_SOCKET) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); - - /* - * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced - * to drive low, and we forcibly request clock. - */ - if (option->force_clkreq_0) - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); - else - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, - FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); - - return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); -} - -static int rts5249_optimize_phy(struct rtsx_pcr *pcr) -{ - int err; - - err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); - if (err < 0) - return err; - - err = rtsx_pci_write_phy_register(pcr, PHY_REV, - PHY_REV_RESV | PHY_REV_RXIDLE_LATCHED | - PHY_REV_P1_EN | PHY_REV_RXIDLE_EN | - PHY_REV_CLKREQ_TX_EN | PHY_REV_RX_PWST | - PHY_REV_CLKREQ_DT_1_0 | PHY_REV_STOP_CLKRD | - PHY_REV_STOP_CLKWR); - if (err < 0) - return err; - - msleep(1); - - err = rtsx_pci_write_phy_register(pcr, PHY_BPCR, - PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL | - PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN); - if (err < 0) - return err; - - err = rtsx_pci_write_phy_register(pcr, PHY_PCR, - PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | - PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | - PHY_PCR_RSSI_EN | PHY_PCR_RX10K); - if (err < 0) - return err; - - err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, - PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR | - PHY_RCR2_CDR_SR_2 | PHY_RCR2_FREQSEL_12 | - PHY_RCR2_CDR_SC_12P | PHY_RCR2_CALIB_LATE); - if (err < 0) - return err; - - err = rtsx_pci_write_phy_register(pcr, PHY_FLD4, - PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF | - PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA | - PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER | - PHY_FLD4_BER_CHK_EN); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, PHY_RDR, - PHY_RDR_RXDSEL_1_9 | PHY_SSC_AUTO_PWD); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, PHY_RCR1, - PHY_RCR1_ADP_TIME_4 | PHY_RCR1_VCO_COARSE); - if (err < 0) - return err; - err = rtsx_pci_write_phy_register(pcr, PHY_FLD3, - PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 | - PHY_FLD3_RXDELINK); - if (err < 0) - return err; - - return rtsx_pci_write_phy_register(pcr, PHY_TUNE, - PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 | - PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 | - PHY_TUNE_TUNED12 | PHY_TUNE_TUNEA12); -} - -static int rtsx_base_turn_on_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); -} - -static int rtsx_base_turn_off_led(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); -} - -static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); -} - -static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr) -{ - return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); -} - -static int rtsx_base_card_power_on(struct rtsx_pcr *pcr, int card) -{ - int err; - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x02); - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - msleep(5); - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_VCC_POWER_ON); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x06); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rtsx_base_card_power_off(struct rtsx_pcr *pcr, int card) -{ - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, - SD_POWER_MASK, SD_POWER_OFF); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, - LDO3318_PWR_MASK, 0x00); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rtsx_base_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - int err; - u16 append; - - switch (voltage) { - case OUTPUT_3V3: - err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, - PHY_TUNE_VOLTAGE_3V3); - if (err < 0) - return err; - break; - case OUTPUT_1V8: - append = PHY_TUNE_D18_1V8; - if (CHK_PCI_PID(pcr, 0x5249)) { - err = rtsx_pci_update_phy(pcr, PHY_BACR, - PHY_BACR_BASIC_MASK, 0); - if (err < 0) - return err; - append = PHY_TUNE_D18_1V7; - } - - err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, - append); - if (err < 0) - return err; - break; - default: - pcr_dbg(pcr, "unknown output voltage %d\n", voltage); - return -EINVAL; - } - - /* set pad drive */ - rtsx_pci_init_cmd(pcr); - rts5249_fill_driving(pcr, voltage); - return rtsx_pci_send_cmd(pcr, 100); -} - -static void rts5249_set_aspm(struct rtsx_pcr *pcr, bool enable) -{ - struct rtsx_cr_option *option = &pcr->option; - u8 val = 0; - - if (pcr->aspm_enabled == enable) - return; - - if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { - if (enable) - val = pcr->aspm_en; - rtsx_pci_update_cfg_byte(pcr, - pcr->pcie_cap + PCI_EXP_LNKCTL, - ASPM_MASK_NEG, val); - } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { - u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; - - if (!enable) - val = FORCE_ASPM_CTL0; - rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); - } - - pcr->aspm_enabled = enable; -} - -static const struct pcr_ops rts5249_pcr_ops = { - .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, - .extra_init_hw = rts5249_extra_init_hw, - .optimize_phy = rts5249_optimize_phy, - .turn_on_led = rtsx_base_turn_on_led, - .turn_off_led = rtsx_base_turn_off_led, - .enable_auto_blink = rtsx_base_enable_auto_blink, - .disable_auto_blink = rtsx_base_disable_auto_blink, - .card_power_on = rtsx_base_card_power_on, - .card_power_off = rtsx_base_card_power_off, - .switch_output_voltage = rtsx_base_switch_output_voltage, - .force_power_down = rtsx_base_force_power_down, - .set_aspm = rts5249_set_aspm, -}; - -/* SD Pull Control Enable: - * SD_DAT[3:0] ==> pull up - * SD_CD ==> pull up - * SD_WP ==> pull up - * SD_CMD ==> pull up - * SD_CLK ==> pull down - */ -static const u32 rts5249_sd_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), - 0, -}; - -/* SD Pull Control Disable: - * SD_DAT[3:0] ==> pull down - * SD_CD ==> pull up - * SD_WP ==> pull down - * SD_CMD ==> pull down - * SD_CLK ==> pull down - */ -static const u32 rts5249_sd_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), - RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), - 0, -}; - -/* MS Pull Control Enable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5249_ms_pull_ctl_enable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -/* MS Pull Control Disable: - * MS CD ==> pull up - * others ==> pull down - */ -static const u32 rts5249_ms_pull_ctl_disable_tbl[] = { - RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), - RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), - 0, -}; - -void rts5249_init_params(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &(pcr->option); - - pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; - pcr->num_slots = 2; - pcr->ops = &rts5249_pcr_ops; - - pcr->flags = 0; - pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; - pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; - pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; - pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); - pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); - - pcr->ic_version = rts5249_get_ic_version(pcr); - pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl; - pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl; - pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl; - pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl; - - pcr->reg_pm_ctrl3 = PM_CTRL3; - - option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN - | LTR_L1SS_PWR_GATE_EN); - option->ltr_en = true; - - /* Init latency of active, idle, L1OFF to 60us, 300us, 3ms */ - option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; - option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; - option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; - option->dev_aspm_mode = DEV_ASPM_DYNAMIC; - option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; - option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF; - option->ltr_l1off_snooze_sspwrgate = - LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF; -} - -static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val) -{ - addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; - - return __rtsx_pci_write_phy_register(pcr, addr, val); -} - -static int rts524a_read_phy(struct rtsx_pcr *pcr, u8 addr, u16 *val) -{ - addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; - - return __rtsx_pci_read_phy_register(pcr, addr, val); -} - -static int rts524a_optimize_phy(struct rtsx_pcr *pcr) -{ - int err; - - err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, - D3_DELINK_MODE_EN, 0x00); - if (err < 0) - return err; - - rtsx_pci_write_phy_register(pcr, PHY_PCR, - PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | - PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | PHY_PCR_RSSI_EN); - rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, - PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); - - if (is_version(pcr, 0x524A, IC_VER_A)) { - rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, - PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); - rtsx_pci_write_phy_register(pcr, PHY_SSCCR2, - PHY_SSCCR2_PLL_NCODE | PHY_SSCCR2_TIME0 | - PHY_SSCCR2_TIME2_WIDTH); - rtsx_pci_write_phy_register(pcr, PHY_ANA1A, - PHY_ANA1A_TXR_LOOPBACK | PHY_ANA1A_RXT_BIST | - PHY_ANA1A_TXR_BIST | PHY_ANA1A_REV); - rtsx_pci_write_phy_register(pcr, PHY_ANA1D, - PHY_ANA1D_DEBUG_ADDR); - rtsx_pci_write_phy_register(pcr, PHY_DIG1E, - PHY_DIG1E_REV | PHY_DIG1E_D0_X_D1 | - PHY_DIG1E_RX_ON_HOST | PHY_DIG1E_RCLK_REF_HOST | - PHY_DIG1E_RCLK_TX_EN_KEEP | - PHY_DIG1E_RCLK_TX_TERM_KEEP | - PHY_DIG1E_RCLK_RX_EIDLE_ON | PHY_DIG1E_TX_TERM_KEEP | - PHY_DIG1E_RX_TERM_KEEP | PHY_DIG1E_TX_EN_KEEP | - PHY_DIG1E_RX_EN_KEEP); - } - - rtsx_pci_write_phy_register(pcr, PHY_ANA08, - PHY_ANA08_RX_EQ_DCGAIN | PHY_ANA08_SEL_RX_EN | - PHY_ANA08_RX_EQ_VAL | PHY_ANA08_SCP | PHY_ANA08_SEL_IPI); - - return 0; -} - -static int rts524a_extra_init_hw(struct rtsx_pcr *pcr) -{ - rts5249_extra_init_hw(pcr); - - rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, - FORCE_ASPM_L1_EN, FORCE_ASPM_L1_EN); - rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); - rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_VCC_LMT_EN, - LDO_VCC_LMT_EN); - rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); - if (is_version(pcr, 0x524A, IC_VER_A)) { - rtsx_pci_write_register(pcr, LDO_DV18_CFG, - LDO_DV18_SR_MASK, LDO_DV18_SR_DF); - rtsx_pci_write_register(pcr, LDO_VCC_CFG1, - LDO_VCC_REF_TUNE_MASK, LDO_VCC_REF_1V2); - rtsx_pci_write_register(pcr, LDO_VIO_CFG, - LDO_VIO_REF_TUNE_MASK, LDO_VIO_REF_1V2); - rtsx_pci_write_register(pcr, LDO_VIO_CFG, - LDO_VIO_SR_MASK, LDO_VIO_SR_DF); - rtsx_pci_write_register(pcr, LDO_DV12S_CFG, - LDO_REF12_TUNE_MASK, LDO_REF12_TUNE_DF); - rtsx_pci_write_register(pcr, SD40_LDO_CTL1, - SD40_VIO_TUNE_MASK, SD40_VIO_TUNE_1V7); - } - - return 0; -} - -static void rts5250_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) -{ - struct rtsx_cr_option *option = &(pcr->option); - - u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR); - int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST); - int aspm_L1_1, aspm_L1_2; - u8 val = 0; - - aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); - aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); - - if (active) { - /* Run, latency: 60us */ - if (aspm_L1_1) - val = option->ltr_l1off_snooze_sspwrgate; - } else { - /* L1off, latency: 300us */ - if (aspm_L1_2) - val = option->ltr_l1off_sspwrgate; - } - - if (aspm_L1_1 || aspm_L1_2) { - if (rtsx_check_dev_flag(pcr, - LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) { - if (card_exist) - val &= ~L1OFF_MBIAS2_EN_5250; - else - val |= L1OFF_MBIAS2_EN_5250; - } - } - rtsx_set_l1off_sub(pcr, val); -} - -static const struct pcr_ops rts524a_pcr_ops = { - .write_phy = rts524a_write_phy, - .read_phy = rts524a_read_phy, - .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, - .extra_init_hw = rts524a_extra_init_hw, - .optimize_phy = rts524a_optimize_phy, - .turn_on_led = rtsx_base_turn_on_led, - .turn_off_led = rtsx_base_turn_off_led, - .enable_auto_blink = rtsx_base_enable_auto_blink, - .disable_auto_blink = rtsx_base_disable_auto_blink, - .card_power_on = rtsx_base_card_power_on, - .card_power_off = rtsx_base_card_power_off, - .switch_output_voltage = rtsx_base_switch_output_voltage, - .force_power_down = rtsx_base_force_power_down, - .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, - .set_aspm = rts5249_set_aspm, -}; - -void rts524a_init_params(struct rtsx_pcr *pcr) -{ - rts5249_init_params(pcr); - pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; - pcr->option.ltr_l1off_snooze_sspwrgate = - LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; - - pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; - pcr->ops = &rts524a_pcr_ops; -} - -static int rts525a_card_power_on(struct rtsx_pcr *pcr, int card) -{ - rtsx_pci_write_register(pcr, LDO_VCC_CFG1, - LDO_VCC_TUNE_MASK, LDO_VCC_3V3); - return rtsx_base_card_power_on(pcr, card); -} - -static int rts525a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - switch (voltage) { - case OUTPUT_3V3: - rtsx_pci_write_register(pcr, LDO_CONFIG2, - LDO_D3318_MASK, LDO_D3318_33V); - rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0); - break; - case OUTPUT_1V8: - rtsx_pci_write_register(pcr, LDO_CONFIG2, - LDO_D3318_MASK, LDO_D3318_18V); - rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, - SD_IO_USING_1V8); - break; - default: - return -EINVAL; - } - - rtsx_pci_init_cmd(pcr); - rts5249_fill_driving(pcr, voltage); - return rtsx_pci_send_cmd(pcr, 100); -} - -static int rts525a_optimize_phy(struct rtsx_pcr *pcr) -{ - int err; - - err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, - D3_DELINK_MODE_EN, 0x00); - if (err < 0) - return err; - - rtsx_pci_write_phy_register(pcr, _PHY_FLD0, - _PHY_FLD0_CLK_REQ_20C | _PHY_FLD0_RX_IDLE_EN | - _PHY_FLD0_BIT_ERR_RSTN | _PHY_FLD0_BER_COUNT | - _PHY_FLD0_BER_TIMER | _PHY_FLD0_CHECK_EN); - - rtsx_pci_write_phy_register(pcr, _PHY_ANA03, - _PHY_ANA03_TIMER_MAX | _PHY_ANA03_OOBS_DEB_EN | - _PHY_CMU_DEBUG_EN); - - if (is_version(pcr, 0x525A, IC_VER_A)) - rtsx_pci_write_phy_register(pcr, _PHY_REV0, - _PHY_REV0_FILTER_OUT | _PHY_REV0_CDR_BYPASS_PFD | - _PHY_REV0_CDR_RX_IDLE_BYPASS); - - return 0; -} - -static int rts525a_extra_init_hw(struct rtsx_pcr *pcr) -{ - rts5249_extra_init_hw(pcr); - - rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); - if (is_version(pcr, 0x525A, IC_VER_A)) { - rtsx_pci_write_register(pcr, L1SUB_CONFIG2, - L1SUB_AUTO_CFG, L1SUB_AUTO_CFG); - rtsx_pci_write_register(pcr, RREF_CFG, - RREF_VBGSEL_MASK, RREF_VBGSEL_1V25); - rtsx_pci_write_register(pcr, LDO_VIO_CFG, - LDO_VIO_TUNE_MASK, LDO_VIO_1V7); - rtsx_pci_write_register(pcr, LDO_DV12S_CFG, - LDO_D12_TUNE_MASK, LDO_D12_TUNE_DF); - rtsx_pci_write_register(pcr, LDO_AV12S_CFG, - LDO_AV12S_TUNE_MASK, LDO_AV12S_TUNE_DF); - rtsx_pci_write_register(pcr, LDO_VCC_CFG0, - LDO_VCC_LMTVTH_MASK, LDO_VCC_LMTVTH_2A); - rtsx_pci_write_register(pcr, OOBS_CONFIG, - OOBS_AUTOK_DIS | OOBS_VAL_MASK, 0x89); - } - - return 0; -} - -static const struct pcr_ops rts525a_pcr_ops = { - .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, - .extra_init_hw = rts525a_extra_init_hw, - .optimize_phy = rts525a_optimize_phy, - .turn_on_led = rtsx_base_turn_on_led, - .turn_off_led = rtsx_base_turn_off_led, - .enable_auto_blink = rtsx_base_enable_auto_blink, - .disable_auto_blink = rtsx_base_disable_auto_blink, - .card_power_on = rts525a_card_power_on, - .card_power_off = rtsx_base_card_power_off, - .switch_output_voltage = rts525a_switch_output_voltage, - .force_power_down = rtsx_base_force_power_down, - .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, - .set_aspm = rts5249_set_aspm, -}; - -void rts525a_init_params(struct rtsx_pcr *pcr) -{ - rts5249_init_params(pcr); - pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; - pcr->option.ltr_l1off_snooze_sspwrgate = - LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; - - pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; - pcr->ops = &rts525a_pcr_ops; -} - diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c deleted file mode 100644 index 590fb9aad77d..000000000000 --- a/drivers/mfd/rtsx_pcr.c +++ /dev/null @@ -1,1569 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rtsx_pcr.h" - -static bool msi_en = true; -module_param(msi_en, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(msi_en, "Enable MSI"); - -static DEFINE_IDR(rtsx_pci_idr); -static DEFINE_SPINLOCK(rtsx_pci_lock); - -static struct mfd_cell rtsx_pcr_cells[] = { - [RTSX_SD_CARD] = { - .name = DRV_NAME_RTSX_PCI_SDMMC, - }, - [RTSX_MS_CARD] = { - .name = DRV_NAME_RTSX_PCI_MS, - }, -}; - -static const struct pci_device_id rtsx_pci_ids[] = { - { PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, - { 0, } -}; - -MODULE_DEVICE_TABLE(pci, rtsx_pci_ids); - -static inline void rtsx_pci_enable_aspm(struct rtsx_pcr *pcr) -{ - rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, - 0xFC, pcr->aspm_en); -} - -static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr) -{ - rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, - 0xFC, 0); -} - -int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) -{ - rtsx_pci_write_register(pcr, MSGTXDATA0, - MASK_8_BIT_DEF, (u8) (latency & 0xFF)); - rtsx_pci_write_register(pcr, MSGTXDATA1, - MASK_8_BIT_DEF, (u8)((latency >> 8) & 0xFF)); - rtsx_pci_write_register(pcr, MSGTXDATA2, - MASK_8_BIT_DEF, (u8)((latency >> 16) & 0xFF)); - rtsx_pci_write_register(pcr, MSGTXDATA3, - MASK_8_BIT_DEF, (u8)((latency >> 24) & 0xFF)); - rtsx_pci_write_register(pcr, LTR_CTL, LTR_TX_EN_MASK | - LTR_LATENCY_MODE_MASK, LTR_TX_EN_1 | LTR_LATENCY_MODE_SW); - - return 0; -} - -int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) -{ - if (pcr->ops->set_ltr_latency) - return pcr->ops->set_ltr_latency(pcr, latency); - else - return rtsx_comm_set_ltr_latency(pcr, latency); -} - -static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable) -{ - struct rtsx_cr_option *option = &pcr->option; - - if (pcr->aspm_enabled == enable) - return; - - if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { - if (enable) - rtsx_pci_enable_aspm(pcr); - else - rtsx_pci_disable_aspm(pcr); - } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { - u8 mask = FORCE_ASPM_VAL_MASK; - u8 val = 0; - - if (enable) - val = pcr->aspm_en; - rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); - } - - pcr->aspm_enabled = enable; -} - -static void rtsx_disable_aspm(struct rtsx_pcr *pcr) -{ - if (pcr->ops->set_aspm) - pcr->ops->set_aspm(pcr, false); - else - rtsx_comm_set_aspm(pcr, false); -} - -int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val) -{ - rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, val); - - return 0; -} - -void rtsx_set_l1off_sub_cfg_d0(struct rtsx_pcr *pcr, int active) -{ - if (pcr->ops->set_l1off_cfg_sub_d0) - pcr->ops->set_l1off_cfg_sub_d0(pcr, active); -} - -static void rtsx_comm_pm_full_on(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &pcr->option; - - rtsx_disable_aspm(pcr); - - if (option->ltr_enabled) - rtsx_set_ltr_latency(pcr, option->ltr_active_latency); - - if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) - rtsx_set_l1off_sub_cfg_d0(pcr, 1); -} - -void rtsx_pm_full_on(struct rtsx_pcr *pcr) -{ - if (pcr->ops->full_on) - pcr->ops->full_on(pcr); - else - rtsx_comm_pm_full_on(pcr); -} - -void rtsx_pci_start_run(struct rtsx_pcr *pcr) -{ - /* If pci device removed, don't queue idle work any more */ - if (pcr->remove_pci) - return; - - if (pcr->state != PDEV_STAT_RUN) { - pcr->state = PDEV_STAT_RUN; - if (pcr->ops->enable_auto_blink) - pcr->ops->enable_auto_blink(pcr); - rtsx_pm_full_on(pcr); - } - - mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200)); -} -EXPORT_SYMBOL_GPL(rtsx_pci_start_run); - -int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data) -{ - int i; - u32 val = HAIMR_WRITE_START; - - val |= (u32)(addr & 0x3FFF) << 16; - val |= (u32)mask << 8; - val |= (u32)data; - - rtsx_pci_writel(pcr, RTSX_HAIMR, val); - - for (i = 0; i < MAX_RW_REG_CNT; i++) { - val = rtsx_pci_readl(pcr, RTSX_HAIMR); - if ((val & HAIMR_TRANS_END) == 0) { - if (data != (u8)val) - return -EIO; - return 0; - } - } - - return -ETIMEDOUT; -} -EXPORT_SYMBOL_GPL(rtsx_pci_write_register); - -int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data) -{ - u32 val = HAIMR_READ_START; - int i; - - val |= (u32)(addr & 0x3FFF) << 16; - rtsx_pci_writel(pcr, RTSX_HAIMR, val); - - for (i = 0; i < MAX_RW_REG_CNT; i++) { - val = rtsx_pci_readl(pcr, RTSX_HAIMR); - if ((val & HAIMR_TRANS_END) == 0) - break; - } - - if (i >= MAX_RW_REG_CNT) - return -ETIMEDOUT; - - if (data) - *data = (u8)(val & 0xFF); - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_read_register); - -int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) -{ - int err, i, finished = 0; - u8 tmp; - - rtsx_pci_init_cmd(pcr); - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8)); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81); - - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - for (i = 0; i < 100000; i++) { - err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); - if (err < 0) - return err; - - if (!(tmp & 0x80)) { - finished = 1; - break; - } - } - - if (!finished) - return -ETIMEDOUT; - - return 0; -} - -int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) -{ - if (pcr->ops->write_phy) - return pcr->ops->write_phy(pcr, addr, val); - - return __rtsx_pci_write_phy_register(pcr, addr, val); -} -EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register); - -int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) -{ - int err, i, finished = 0; - u16 data; - u8 *ptr, tmp; - - rtsx_pci_init_cmd(pcr); - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80); - - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - for (i = 0; i < 100000; i++) { - err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); - if (err < 0) - return err; - - if (!(tmp & 0x80)) { - finished = 1; - break; - } - } - - if (!finished) - return -ETIMEDOUT; - - rtsx_pci_init_cmd(pcr); - - rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0); - rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0); - - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - ptr = rtsx_pci_get_cmd_data(pcr); - data = ((u16)ptr[1] << 8) | ptr[0]; - - if (val) - *val = data; - - return 0; -} - -int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) -{ - if (pcr->ops->read_phy) - return pcr->ops->read_phy(pcr, addr, val); - - return __rtsx_pci_read_phy_register(pcr, addr, val); -} -EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register); - -void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr) -{ - rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); - rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); - - rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80); - rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80); -} -EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd); - -void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, - u8 cmd_type, u16 reg_addr, u8 mask, u8 data) -{ - unsigned long flags; - u32 val = 0; - u32 *ptr = (u32 *)(pcr->host_cmds_ptr); - - val |= (u32)(cmd_type & 0x03) << 30; - val |= (u32)(reg_addr & 0x3FFF) << 16; - val |= (u32)mask << 8; - val |= (u32)data; - - spin_lock_irqsave(&pcr->lock, flags); - ptr += pcr->ci; - if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) { - put_unaligned_le32(val, ptr); - ptr++; - pcr->ci++; - } - spin_unlock_irqrestore(&pcr->lock, flags); -} -EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd); - -void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr) -{ - u32 val = 1 << 31; - - rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); - - val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; - /* Hardware Auto Response */ - val |= 0x40000000; - rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); -} -EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait); - -int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout) -{ - struct completion trans_done; - u32 val = 1 << 31; - long timeleft; - unsigned long flags; - int err = 0; - - spin_lock_irqsave(&pcr->lock, flags); - - /* set up data structures for the wakeup system */ - pcr->done = &trans_done; - pcr->trans_result = TRANS_NOT_READY; - init_completion(&trans_done); - - rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); - - val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; - /* Hardware Auto Response */ - val |= 0x40000000; - rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); - - spin_unlock_irqrestore(&pcr->lock, flags); - - /* Wait for TRANS_OK_INT */ - timeleft = wait_for_completion_interruptible_timeout( - &trans_done, msecs_to_jiffies(timeout)); - if (timeleft <= 0) { - pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); - err = -ETIMEDOUT; - goto finish_send_cmd; - } - - spin_lock_irqsave(&pcr->lock, flags); - if (pcr->trans_result == TRANS_RESULT_FAIL) - err = -EINVAL; - else if (pcr->trans_result == TRANS_RESULT_OK) - err = 0; - else if (pcr->trans_result == TRANS_NO_DEVICE) - err = -ENODEV; - spin_unlock_irqrestore(&pcr->lock, flags); - -finish_send_cmd: - spin_lock_irqsave(&pcr->lock, flags); - pcr->done = NULL; - spin_unlock_irqrestore(&pcr->lock, flags); - - if ((err < 0) && (err != -ENODEV)) - rtsx_pci_stop_cmd(pcr); - - if (pcr->finish_me) - complete(pcr->finish_me); - - return err; -} -EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd); - -static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, - dma_addr_t addr, unsigned int len, int end) -{ - u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; - u64 val; - u8 option = SG_VALID | SG_TRANS_DATA; - - pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len); - - if (end) - option |= SG_END; - val = ((u64)addr << 32) | ((u64)len << 12) | option; - - put_unaligned_le64(val, ptr); - pcr->sgi++; -} - -int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read, int timeout) -{ - int err = 0, count; - - pcr_dbg(pcr, "--> %s: num_sg = %d\n", __func__, num_sg); - count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read); - if (count < 1) - return -EINVAL; - pcr_dbg(pcr, "DMA mapping count: %d\n", count); - - err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout); - - rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read); - - return err; -} -EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); - -int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read) -{ - enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - if (pcr->remove_pci) - return -EINVAL; - - if ((sglist == NULL) || (num_sg <= 0)) - return -EINVAL; - - return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir); -} -EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg); - -void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read) -{ - enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir); -} -EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg); - -int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int count, bool read, int timeout) -{ - struct completion trans_done; - struct scatterlist *sg; - dma_addr_t addr; - long timeleft; - unsigned long flags; - unsigned int len; - int i, err = 0; - u32 val; - u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE; - - if (pcr->remove_pci) - return -ENODEV; - - if ((sglist == NULL) || (count < 1)) - return -EINVAL; - - val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; - pcr->sgi = 0; - for_each_sg(sglist, sg, count, i) { - addr = sg_dma_address(sg); - len = sg_dma_len(sg); - rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1); - } - - spin_lock_irqsave(&pcr->lock, flags); - - pcr->done = &trans_done; - pcr->trans_result = TRANS_NOT_READY; - init_completion(&trans_done); - rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); - rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); - - spin_unlock_irqrestore(&pcr->lock, flags); - - timeleft = wait_for_completion_interruptible_timeout( - &trans_done, msecs_to_jiffies(timeout)); - if (timeleft <= 0) { - pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); - err = -ETIMEDOUT; - goto out; - } - - spin_lock_irqsave(&pcr->lock, flags); - if (pcr->trans_result == TRANS_RESULT_FAIL) { - err = -EILSEQ; - if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION) - pcr->dma_error_count++; - } - - else if (pcr->trans_result == TRANS_NO_DEVICE) - err = -ENODEV; - spin_unlock_irqrestore(&pcr->lock, flags); - -out: - spin_lock_irqsave(&pcr->lock, flags); - pcr->done = NULL; - spin_unlock_irqrestore(&pcr->lock, flags); - - if ((err < 0) && (err != -ENODEV)) - rtsx_pci_stop_cmd(pcr); - - if (pcr->finish_me) - complete(pcr->finish_me); - - return err; -} -EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer); - -int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) -{ - int err; - int i, j; - u16 reg; - u8 *ptr; - - if (buf_len > 512) - buf_len = 512; - - ptr = buf; - reg = PPBUF_BASE2; - for (i = 0; i < buf_len / 256; i++) { - rtsx_pci_init_cmd(pcr); - - for (j = 0; j < 256; j++) - rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); - - err = rtsx_pci_send_cmd(pcr, 250); - if (err < 0) - return err; - - memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256); - ptr += 256; - } - - if (buf_len % 256) { - rtsx_pci_init_cmd(pcr); - - for (j = 0; j < buf_len % 256; j++) - rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); - - err = rtsx_pci_send_cmd(pcr, 250); - if (err < 0) - return err; - } - - memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256); - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf); - -int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) -{ - int err; - int i, j; - u16 reg; - u8 *ptr; - - if (buf_len > 512) - buf_len = 512; - - ptr = buf; - reg = PPBUF_BASE2; - for (i = 0; i < buf_len / 256; i++) { - rtsx_pci_init_cmd(pcr); - - for (j = 0; j < 256; j++) { - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, - reg++, 0xFF, *ptr); - ptr++; - } - - err = rtsx_pci_send_cmd(pcr, 250); - if (err < 0) - return err; - } - - if (buf_len % 256) { - rtsx_pci_init_cmd(pcr); - - for (j = 0; j < buf_len % 256; j++) { - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, - reg++, 0xFF, *ptr); - ptr++; - } - - err = rtsx_pci_send_cmd(pcr, 250); - if (err < 0) - return err; - } - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf); - -static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl) -{ - rtsx_pci_init_cmd(pcr); - - while (*tbl & 0xFFFF0000) { - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, - (u16)(*tbl >> 16), 0xFF, (u8)(*tbl)); - tbl++; - } - - return rtsx_pci_send_cmd(pcr, 100); -} - -int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card) -{ - const u32 *tbl; - - if (card == RTSX_SD_CARD) - tbl = pcr->sd_pull_ctl_enable_tbl; - else if (card == RTSX_MS_CARD) - tbl = pcr->ms_pull_ctl_enable_tbl; - else - return -EINVAL; - - return rtsx_pci_set_pull_ctl(pcr, tbl); -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable); - -int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) -{ - const u32 *tbl; - - if (card == RTSX_SD_CARD) - tbl = pcr->sd_pull_ctl_disable_tbl; - else if (card == RTSX_MS_CARD) - tbl = pcr->ms_pull_ctl_disable_tbl; - else - return -EINVAL; - - - return rtsx_pci_set_pull_ctl(pcr, tbl); -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); - -static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr) -{ - pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN; - - if (pcr->num_slots > 1) - pcr->bier |= MS_INT_EN; - - /* Enable Bus Interrupt */ - rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier); - - pcr_dbg(pcr, "RTSX_BIER: 0x%08x\n", pcr->bier); -} - -static inline u8 double_ssc_depth(u8 depth) -{ - return ((depth > 1) ? (depth - 1) : depth); -} - -static u8 revise_ssc_depth(u8 ssc_depth, u8 div) -{ - if (div > CLK_DIV_1) { - if (ssc_depth > (div - 1)) - ssc_depth -= (div - 1); - else - ssc_depth = SSC_DEPTH_4M; - } - - return ssc_depth; -} - -int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, - u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) -{ - int err, clk; - u8 n, clk_divider, mcu_cnt, div; - static const u8 depth[] = { - [RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M, - [RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M, - [RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M, - [RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K, - [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, - }; - - if (initial_mode) { - /* We use 250k(around) here, in initial stage */ - clk_divider = SD_CLK_DIVIDE_128; - card_clock = 30000000; - } else { - clk_divider = SD_CLK_DIVIDE_0; - } - err = rtsx_pci_write_register(pcr, SD_CFG1, - SD_CLK_DIVIDE_MASK, clk_divider); - if (err < 0) - return err; - - /* Reduce card clock by 20MHz each time a DMA transfer error occurs */ - if (card_clock == UHS_SDR104_MAX_DTR && - pcr->dma_error_count && - PCI_PID(pcr) == RTS5227_DEVICE_ID) - card_clock = UHS_SDR104_MAX_DTR - - (pcr->dma_error_count * 20000000); - - card_clock /= 1000000; - pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); - - clk = card_clock; - if (!initial_mode && double_clk) - clk = card_clock * 2; - pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", - clk, pcr->cur_clock); - - if (clk == pcr->cur_clock) - return 0; - - if (pcr->ops->conv_clk_and_div_n) - n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); - else - n = (u8)(clk - 2); - if ((clk <= 2) || (n > MAX_DIV_N_PCR)) - return -EINVAL; - - mcu_cnt = (u8)(125/clk + 3); - if (mcu_cnt > 15) - mcu_cnt = 15; - - /* Make sure that the SSC clock div_n is not less than MIN_DIV_N_PCR */ - div = CLK_DIV_1; - while ((n < MIN_DIV_N_PCR) && (div < CLK_DIV_8)) { - if (pcr->ops->conv_clk_and_div_n) { - int dbl_clk = pcr->ops->conv_clk_and_div_n(n, - DIV_N_TO_CLK) * 2; - n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk, - CLK_TO_DIV_N); - } else { - n = (n + 2) * 2 - 2; - } - div++; - } - pcr_dbg(pcr, "n = %d, div = %d\n", n, div); - - ssc_depth = depth[ssc_depth]; - if (double_clk) - ssc_depth = double_ssc_depth(ssc_depth); - - ssc_depth = revise_ssc_depth(ssc_depth, div); - pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); - - rtsx_pci_init_cmd(pcr); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, - CLK_LOW_FREQ, CLK_LOW_FREQ); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, - 0xFF, (div << 4) | mcu_cnt); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, - SSC_DEPTH_MASK, ssc_depth); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); - if (vpclk) { - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, - PHASE_NOT_RESET, 0); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, - PHASE_NOT_RESET, PHASE_NOT_RESET); - } - - err = rtsx_pci_send_cmd(pcr, 2000); - if (err < 0) - return err; - - /* Wait SSC clock stable */ - udelay(10); - err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); - if (err < 0) - return err; - - pcr->cur_clock = clk; - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock); - -int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card) -{ - if (pcr->ops->card_power_on) - return pcr->ops->card_power_on(pcr, card); - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on); - -int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card) -{ - if (pcr->ops->card_power_off) - return pcr->ops->card_power_off(pcr, card); - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off); - -int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card) -{ - static const unsigned int cd_mask[] = { - [RTSX_SD_CARD] = SD_EXIST, - [RTSX_MS_CARD] = MS_EXIST - }; - - if (!(pcr->flags & PCR_MS_PMOS)) { - /* When using single PMOS, accessing card is not permitted - * if the existing card is not the designated one. - */ - if (pcr->card_exist & (~cd_mask[card])) - return -EIO; - } - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check); - -int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -{ - if (pcr->ops->switch_output_voltage) - return pcr->ops->switch_output_voltage(pcr, voltage); - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage); - -unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr) -{ - unsigned int val; - - val = rtsx_pci_readl(pcr, RTSX_BIPR); - if (pcr->ops->cd_deglitch) - val = pcr->ops->cd_deglitch(pcr); - - return val; -} -EXPORT_SYMBOL_GPL(rtsx_pci_card_exist); - -void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr) -{ - struct completion finish; - - pcr->finish_me = &finish; - init_completion(&finish); - - if (pcr->done) - complete(pcr->done); - - if (!pcr->remove_pci) - rtsx_pci_stop_cmd(pcr); - - wait_for_completion_interruptible_timeout(&finish, - msecs_to_jiffies(2)); - pcr->finish_me = NULL; -} -EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer); - -static void rtsx_pci_card_detect(struct work_struct *work) -{ - struct delayed_work *dwork; - struct rtsx_pcr *pcr; - unsigned long flags; - unsigned int card_detect = 0, card_inserted, card_removed; - u32 irq_status; - - dwork = to_delayed_work(work); - pcr = container_of(dwork, struct rtsx_pcr, carddet_work); - - pcr_dbg(pcr, "--> %s\n", __func__); - - mutex_lock(&pcr->pcr_mutex); - spin_lock_irqsave(&pcr->lock, flags); - - irq_status = rtsx_pci_readl(pcr, RTSX_BIPR); - pcr_dbg(pcr, "irq_status: 0x%08x\n", irq_status); - - irq_status &= CARD_EXIST; - card_inserted = pcr->card_inserted & irq_status; - card_removed = pcr->card_removed; - pcr->card_inserted = 0; - pcr->card_removed = 0; - - spin_unlock_irqrestore(&pcr->lock, flags); - - if (card_inserted || card_removed) { - pcr_dbg(pcr, "card_inserted: 0x%x, card_removed: 0x%x\n", - card_inserted, card_removed); - - if (pcr->ops->cd_deglitch) - card_inserted = pcr->ops->cd_deglitch(pcr); - - card_detect = card_inserted | card_removed; - - pcr->card_exist |= card_inserted; - pcr->card_exist &= ~card_removed; - } - - mutex_unlock(&pcr->pcr_mutex); - - if ((card_detect & SD_EXIST) && pcr->slots[RTSX_SD_CARD].card_event) - pcr->slots[RTSX_SD_CARD].card_event( - pcr->slots[RTSX_SD_CARD].p_dev); - if ((card_detect & MS_EXIST) && pcr->slots[RTSX_MS_CARD].card_event) - pcr->slots[RTSX_MS_CARD].card_event( - pcr->slots[RTSX_MS_CARD].p_dev); -} - -static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) -{ - struct rtsx_pcr *pcr = dev_id; - u32 int_reg; - - if (!pcr) - return IRQ_NONE; - - spin_lock(&pcr->lock); - - int_reg = rtsx_pci_readl(pcr, RTSX_BIPR); - /* Clear interrupt flag */ - rtsx_pci_writel(pcr, RTSX_BIPR, int_reg); - if ((int_reg & pcr->bier) == 0) { - spin_unlock(&pcr->lock); - return IRQ_NONE; - } - if (int_reg == 0xFFFFFFFF) { - spin_unlock(&pcr->lock); - return IRQ_HANDLED; - } - - int_reg &= (pcr->bier | 0x7FFFFF); - - if (int_reg & SD_INT) { - if (int_reg & SD_EXIST) { - pcr->card_inserted |= SD_EXIST; - } else { - pcr->card_removed |= SD_EXIST; - pcr->card_inserted &= ~SD_EXIST; - } - pcr->dma_error_count = 0; - } - - if (int_reg & MS_INT) { - if (int_reg & MS_EXIST) { - pcr->card_inserted |= MS_EXIST; - } else { - pcr->card_removed |= MS_EXIST; - pcr->card_inserted &= ~MS_EXIST; - } - } - - if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) { - if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) { - pcr->trans_result = TRANS_RESULT_FAIL; - if (pcr->done) - complete(pcr->done); - } else if (int_reg & TRANS_OK_INT) { - pcr->trans_result = TRANS_RESULT_OK; - if (pcr->done) - complete(pcr->done); - } - } - - if (pcr->card_inserted || pcr->card_removed) - schedule_delayed_work(&pcr->carddet_work, - msecs_to_jiffies(200)); - - spin_unlock(&pcr->lock); - return IRQ_HANDLED; -} - -static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr) -{ - pcr_dbg(pcr, "%s: pcr->msi_en = %d, pci->irq = %d\n", - __func__, pcr->msi_en, pcr->pci->irq); - - if (request_irq(pcr->pci->irq, rtsx_pci_isr, - pcr->msi_en ? 0 : IRQF_SHARED, - DRV_NAME_RTSX_PCI, pcr)) { - dev_err(&(pcr->pci->dev), - "rtsx_sdmmc: unable to grab IRQ %d, disabling device\n", - pcr->pci->irq); - return -1; - } - - pcr->irq = pcr->pci->irq; - pci_intx(pcr->pci, !pcr->msi_en); - - return 0; -} - -static void rtsx_enable_aspm(struct rtsx_pcr *pcr) -{ - if (pcr->ops->set_aspm) - pcr->ops->set_aspm(pcr, true); - else - rtsx_comm_set_aspm(pcr, true); -} - -static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr) -{ - struct rtsx_cr_option *option = &pcr->option; - - if (option->ltr_enabled) { - u32 latency = option->ltr_l1off_latency; - - if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN)) - mdelay(option->l1_snooze_delay); - - rtsx_set_ltr_latency(pcr, latency); - } - - if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) - rtsx_set_l1off_sub_cfg_d0(pcr, 0); - - rtsx_enable_aspm(pcr); -} - -void rtsx_pm_power_saving(struct rtsx_pcr *pcr) -{ - if (pcr->ops->power_saving) - pcr->ops->power_saving(pcr); - else - rtsx_comm_pm_power_saving(pcr); -} - -static void rtsx_pci_idle_work(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work); - - pcr_dbg(pcr, "--> %s\n", __func__); - - mutex_lock(&pcr->pcr_mutex); - - pcr->state = PDEV_STAT_IDLE; - - if (pcr->ops->disable_auto_blink) - pcr->ops->disable_auto_blink(pcr); - if (pcr->ops->turn_off_led) - pcr->ops->turn_off_led(pcr); - - rtsx_pm_power_saving(pcr); - - mutex_unlock(&pcr->pcr_mutex); -} - -#ifdef CONFIG_PM -static void rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) -{ - if (pcr->ops->turn_off_led) - pcr->ops->turn_off_led(pcr); - - rtsx_pci_writel(pcr, RTSX_BIER, 0); - pcr->bier = 0; - - rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08); - rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state); - - if (pcr->ops->force_power_down) - pcr->ops->force_power_down(pcr, pm_state); -} -#endif - -static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) -{ - int err; - - pcr->pcie_cap = pci_find_capability(pcr->pci, PCI_CAP_ID_EXP); - rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); - - rtsx_pci_enable_bus_int(pcr); - - /* Power on SSC */ - err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); - if (err < 0) - return err; - - /* Wait SSC power stable */ - udelay(200); - - rtsx_pci_disable_aspm(pcr); - if (pcr->ops->optimize_phy) { - err = pcr->ops->optimize_phy(pcr); - if (err < 0) - return err; - } - - rtsx_pci_init_cmd(pcr); - - /* Set mcu_cnt to 7 to ensure data can be sampled properly */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07); - - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00); - /* Disable card clock */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0); - /* Reset delink mode */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0); - /* Card driving select */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DRIVE_SEL, - 0xFF, pcr->card_drive_sel); - /* Enable SSC Clock */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, - 0xFF, SSC_8X_EN | SSC_SEL_4M); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); - /* Disable cd_pwr_save */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); - /* Clear Link Ready Interrupt */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0, - LINK_RDY_INT, LINK_RDY_INT); - /* Enlarge the estimation window of PERST# glitch - * to reduce the chance of invalid card interrupt - */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80); - /* Update RC oscillator to 400k - * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1 - * 1: 2M 0: 400k - */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00); - /* Set interrupt write clear - * bit 1: U_elbi_if_rd_clr_en - * 1: Enable ELBI interrupt[31:22] & [7:0] flag read clear - * 0: ELBI interrupt flag[31:22] & [7:0] only can be write clear - */ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0); - - err = rtsx_pci_send_cmd(pcr, 100); - if (err < 0) - return err; - - switch (PCI_PID(pcr)) { - case PID_5250: - case PID_524A: - case PID_525A: - rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); - break; - default: - break; - } - - /* Enable clk_request_n to enable clock power management */ - rtsx_pci_write_config_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL + 1, 1); - /* Enter L1 when host tx idle */ - rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B); - - if (pcr->ops->extra_init_hw) { - err = pcr->ops->extra_init_hw(pcr); - if (err < 0) - return err; - } - - /* No CD interrupt if probing driver with card inserted. - * So we need to initialize pcr->card_exist here. - */ - if (pcr->ops->cd_deglitch) - pcr->card_exist = pcr->ops->cd_deglitch(pcr); - else - pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST; - - return 0; -} - -static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) -{ - int err; - - spin_lock_init(&pcr->lock); - mutex_init(&pcr->pcr_mutex); - - switch (PCI_PID(pcr)) { - default: - case 0x5209: - rts5209_init_params(pcr); - break; - - case 0x5229: - rts5229_init_params(pcr); - break; - - case 0x5289: - rtl8411_init_params(pcr); - break; - - case 0x5227: - rts5227_init_params(pcr); - break; - - case 0x522A: - rts522a_init_params(pcr); - break; - - case 0x5249: - rts5249_init_params(pcr); - break; - - case 0x524A: - rts524a_init_params(pcr); - break; - - case 0x525A: - rts525a_init_params(pcr); - break; - - case 0x5287: - rtl8411b_init_params(pcr); - break; - - case 0x5286: - rtl8402_init_params(pcr); - break; - } - - pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", - PCI_PID(pcr), pcr->ic_version); - - pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot), - GFP_KERNEL); - if (!pcr->slots) - return -ENOMEM; - - if (pcr->ops->fetch_vendor_settings) - pcr->ops->fetch_vendor_settings(pcr); - - pcr_dbg(pcr, "pcr->aspm_en = 0x%x\n", pcr->aspm_en); - pcr_dbg(pcr, "pcr->sd30_drive_sel_1v8 = 0x%x\n", - pcr->sd30_drive_sel_1v8); - pcr_dbg(pcr, "pcr->sd30_drive_sel_3v3 = 0x%x\n", - pcr->sd30_drive_sel_3v3); - pcr_dbg(pcr, "pcr->card_drive_sel = 0x%x\n", - pcr->card_drive_sel); - pcr_dbg(pcr, "pcr->flags = 0x%x\n", pcr->flags); - - pcr->state = PDEV_STAT_IDLE; - err = rtsx_pci_init_hw(pcr); - if (err < 0) { - kfree(pcr->slots); - return err; - } - - return 0; -} - -static int rtsx_pci_probe(struct pci_dev *pcidev, - const struct pci_device_id *id) -{ - struct rtsx_pcr *pcr; - struct pcr_handle *handle; - u32 base, len; - int ret, i, bar = 0; - - dev_dbg(&(pcidev->dev), - ": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n", - pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device, - (int)pcidev->revision); - - ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); - if (ret < 0) - return ret; - - ret = pci_enable_device(pcidev); - if (ret) - return ret; - - ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI); - if (ret) - goto disable; - - pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); - if (!pcr) { - ret = -ENOMEM; - goto release_pci; - } - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) { - ret = -ENOMEM; - goto free_pcr; - } - handle->pcr = pcr; - - idr_preload(GFP_KERNEL); - spin_lock(&rtsx_pci_lock); - ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT); - if (ret >= 0) - pcr->id = ret; - spin_unlock(&rtsx_pci_lock); - idr_preload_end(); - if (ret < 0) - goto free_handle; - - pcr->pci = pcidev; - dev_set_drvdata(&pcidev->dev, handle); - - if (CHK_PCI_PID(pcr, 0x525A)) - bar = 1; - len = pci_resource_len(pcidev, bar); - base = pci_resource_start(pcidev, bar); - pcr->remap_addr = ioremap_nocache(base, len); - if (!pcr->remap_addr) { - ret = -ENOMEM; - goto free_handle; - } - - pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev), - RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr), - GFP_KERNEL); - if (pcr->rtsx_resv_buf == NULL) { - ret = -ENXIO; - goto unmap; - } - pcr->host_cmds_ptr = pcr->rtsx_resv_buf; - pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr; - pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN; - pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN; - - pcr->card_inserted = 0; - pcr->card_removed = 0; - INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); - INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work); - - pcr->msi_en = msi_en; - if (pcr->msi_en) { - ret = pci_enable_msi(pcidev); - if (ret) - pcr->msi_en = false; - } - - ret = rtsx_pci_acquire_irq(pcr); - if (ret < 0) - goto disable_msi; - - pci_set_master(pcidev); - synchronize_irq(pcr->irq); - - ret = rtsx_pci_init_chip(pcr); - if (ret < 0) - goto disable_irq; - - for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) { - rtsx_pcr_cells[i].platform_data = handle; - rtsx_pcr_cells[i].pdata_size = sizeof(*handle); - } - ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells, - ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL); - if (ret < 0) - goto disable_irq; - - schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); - - return 0; - -disable_irq: - free_irq(pcr->irq, (void *)pcr); -disable_msi: - if (pcr->msi_en) - pci_disable_msi(pcr->pci); - dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, - pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); -unmap: - iounmap(pcr->remap_addr); -free_handle: - kfree(handle); -free_pcr: - kfree(pcr); -release_pci: - pci_release_regions(pcidev); -disable: - pci_disable_device(pcidev); - - return ret; -} - -static void rtsx_pci_remove(struct pci_dev *pcidev) -{ - struct pcr_handle *handle = pci_get_drvdata(pcidev); - struct rtsx_pcr *pcr = handle->pcr; - - pcr->remove_pci = true; - - /* Disable interrupts at the pcr level */ - spin_lock_irq(&pcr->lock); - rtsx_pci_writel(pcr, RTSX_BIER, 0); - pcr->bier = 0; - spin_unlock_irq(&pcr->lock); - - cancel_delayed_work_sync(&pcr->carddet_work); - cancel_delayed_work_sync(&pcr->idle_work); - - mfd_remove_devices(&pcidev->dev); - - dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, - pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); - free_irq(pcr->irq, (void *)pcr); - if (pcr->msi_en) - pci_disable_msi(pcr->pci); - iounmap(pcr->remap_addr); - - pci_release_regions(pcidev); - pci_disable_device(pcidev); - - spin_lock(&rtsx_pci_lock); - idr_remove(&rtsx_pci_idr, pcr->id); - spin_unlock(&rtsx_pci_lock); - - kfree(pcr->slots); - kfree(pcr); - kfree(handle); - - dev_dbg(&(pcidev->dev), - ": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n", - pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device); -} - -#ifdef CONFIG_PM - -static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state) -{ - struct pcr_handle *handle; - struct rtsx_pcr *pcr; - - dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - - cancel_delayed_work(&pcr->carddet_work); - cancel_delayed_work(&pcr->idle_work); - - mutex_lock(&pcr->pcr_mutex); - - rtsx_pci_power_off(pcr, HOST_ENTER_S3); - - pci_save_state(pcidev); - pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); - pci_disable_device(pcidev); - pci_set_power_state(pcidev, pci_choose_state(pcidev, state)); - - mutex_unlock(&pcr->pcr_mutex); - return 0; -} - -static int rtsx_pci_resume(struct pci_dev *pcidev) -{ - struct pcr_handle *handle; - struct rtsx_pcr *pcr; - int ret = 0; - - dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - - mutex_lock(&pcr->pcr_mutex); - - pci_set_power_state(pcidev, PCI_D0); - pci_restore_state(pcidev); - ret = pci_enable_device(pcidev); - if (ret) - goto out; - pci_set_master(pcidev); - - ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); - if (ret) - goto out; - - ret = rtsx_pci_init_hw(pcr); - if (ret) - goto out; - - schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); - -out: - mutex_unlock(&pcr->pcr_mutex); - return ret; -} - -static void rtsx_pci_shutdown(struct pci_dev *pcidev) -{ - struct pcr_handle *handle; - struct rtsx_pcr *pcr; - - dev_dbg(&(pcidev->dev), "--> %s\n", __func__); - - handle = pci_get_drvdata(pcidev); - pcr = handle->pcr; - rtsx_pci_power_off(pcr, HOST_ENTER_S1); - - pci_disable_device(pcidev); -} - -#else /* CONFIG_PM */ - -#define rtsx_pci_suspend NULL -#define rtsx_pci_resume NULL -#define rtsx_pci_shutdown NULL - -#endif /* CONFIG_PM */ - -static struct pci_driver rtsx_pci_driver = { - .name = DRV_NAME_RTSX_PCI, - .id_table = rtsx_pci_ids, - .probe = rtsx_pci_probe, - .remove = rtsx_pci_remove, - .suspend = rtsx_pci_suspend, - .resume = rtsx_pci_resume, - .shutdown = rtsx_pci_shutdown, -}; -module_pci_driver(rtsx_pci_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Wei WANG "); -MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver"); diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h deleted file mode 100644 index ec784e04fe20..000000000000 --- a/drivers/mfd/rtsx_pcr.h +++ /dev/null @@ -1,103 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#ifndef __RTSX_PCR_H -#define __RTSX_PCR_H - -#include - -#define MIN_DIV_N_PCR 80 -#define MAX_DIV_N_PCR 208 - -#define RTS522A_PM_CTRL3 0xFF7E - -#define RTS524A_PME_FORCE_CTL 0xFF78 -#define RTS524A_PM_CTRL3 0xFF7E - -#define LTR_ACTIVE_LATENCY_DEF 0x883C -#define LTR_IDLE_LATENCY_DEF 0x892C -#define LTR_L1OFF_LATENCY_DEF 0x9003 -#define L1_SNOOZE_DELAY_DEF 1 -#define LTR_L1OFF_SSPWRGATE_5249_DEF 0xAF -#define LTR_L1OFF_SSPWRGATE_5250_DEF 0xFF -#define LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF 0xAC -#define LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF 0xF8 -#define CMD_TIMEOUT_DEF 100 -#define ASPM_MASK_NEG 0xFC -#define MASK_8_BIT_DEF 0xFF - -int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); -int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); - -void rts5209_init_params(struct rtsx_pcr *pcr); -void rts5229_init_params(struct rtsx_pcr *pcr); -void rtl8411_init_params(struct rtsx_pcr *pcr); -void rtl8402_init_params(struct rtsx_pcr *pcr); -void rts5227_init_params(struct rtsx_pcr *pcr); -void rts522a_init_params(struct rtsx_pcr *pcr); -void rts5249_init_params(struct rtsx_pcr *pcr); -void rts524a_init_params(struct rtsx_pcr *pcr); -void rts525a_init_params(struct rtsx_pcr *pcr); -void rtl8411b_init_params(struct rtsx_pcr *pcr); - -static inline u8 map_sd_drive(int idx) -{ - u8 sd_drive[4] = { - 0x01, /* Type D */ - 0x02, /* Type C */ - 0x05, /* Type A */ - 0x03 /* Type B */ - }; - - return sd_drive[idx]; -} - -#define rtsx_vendor_setting_valid(reg) (!((reg) & 0x1000000)) -#define rts5209_vendor_setting1_valid(reg) (!((reg) & 0x80)) -#define rts5209_vendor_setting2_valid(reg) ((reg) & 0x80) - -#define rtsx_reg_to_aspm(reg) (((reg) >> 28) & 0x03) -#define rtsx_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 26) & 0x03) -#define rtsx_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x03) -#define rtsx_reg_to_card_drive_sel(reg) ((((reg) >> 25) & 0x01) << 6) -#define rtsx_reg_check_reverse_socket(reg) ((reg) & 0x4000) -#define rts5209_reg_to_aspm(reg) (((reg) >> 5) & 0x03) -#define rts5209_reg_check_ms_pmos(reg) (!((reg) & 0x08)) -#define rts5209_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 3) & 0x07) -#define rts5209_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x07) -#define rts5209_reg_to_card_drive_sel(reg) ((reg) >> 8) -#define rtl8411_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x07) -#define rtl8411b_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x03) - -#define set_pull_ctrl_tables(pcr, __device) \ -do { \ - pcr->sd_pull_ctl_enable_tbl = __device##_sd_pull_ctl_enable_tbl; \ - pcr->sd_pull_ctl_disable_tbl = __device##_sd_pull_ctl_disable_tbl; \ - pcr->ms_pull_ctl_enable_tbl = __device##_ms_pull_ctl_enable_tbl; \ - pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \ -} while (0) - -/* generic operations */ -int rtsx_gops_pm_reset(struct rtsx_pcr *pcr); -int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency); -int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val); - -#endif diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c deleted file mode 100644 index 59d61b04c197..000000000000 --- a/drivers/mfd/rtsx_usb.c +++ /dev/null @@ -1,791 +0,0 @@ -/* Driver for Realtek USB card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Roger Tseng - */ -#include -#include -#include -#include -#include -#include -#include - -static int polling_pipe = 1; -module_param(polling_pipe, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)"); - -static const struct mfd_cell rtsx_usb_cells[] = { - [RTSX_USB_SD_CARD] = { - .name = "rtsx_usb_sdmmc", - .pdata_size = 0, - }, - [RTSX_USB_MS_CARD] = { - .name = "rtsx_usb_ms", - .pdata_size = 0, - }, -}; - -static void rtsx_usb_sg_timed_out(struct timer_list *t) -{ - struct rtsx_ucr *ucr = from_timer(ucr, t, sg_timer); - - dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__); - usb_sg_cancel(&ucr->current_sg); -} - -static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, - unsigned int pipe, struct scatterlist *sg, int num_sg, - unsigned int length, unsigned int *act_len, int timeout) -{ - int ret; - - dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n", - __func__, length, num_sg); - ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0, - sg, num_sg, length, GFP_NOIO); - if (ret) - return ret; - - ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); - add_timer(&ucr->sg_timer); - usb_sg_wait(&ucr->current_sg); - if (!del_timer_sync(&ucr->sg_timer)) - ret = -ETIMEDOUT; - else - ret = ucr->current_sg.status; - - if (act_len) - *act_len = ucr->current_sg.bytes; - - return ret; -} - -int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, - void *buf, unsigned int len, int num_sg, - unsigned int *act_len, int timeout) -{ - if (timeout < 600) - timeout = 600; - - if (num_sg) - return rtsx_usb_bulk_transfer_sglist(ucr, pipe, - (struct scatterlist *)buf, num_sg, len, act_len, - timeout); - else - return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len, - timeout); -} -EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data); - -static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr, - u16 addr, u16 len, u8 seq_type) -{ - rtsx_usb_cmd_hdr_tag(ucr); - - ucr->cmd_buf[PACKET_TYPE] = seq_type; - ucr->cmd_buf[5] = (u8)(len >> 8); - ucr->cmd_buf[6] = (u8)len; - ucr->cmd_buf[8] = (u8)(addr >> 8); - ucr->cmd_buf[9] = (u8)addr; - - if (seq_type == SEQ_WRITE) - ucr->cmd_buf[STAGE_FLAG] = 0; - else - ucr->cmd_buf[STAGE_FLAG] = STAGE_R; -} - -static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr, - u16 addr, u16 len, u8 *data) -{ - u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4); - - if (!data) - return -EINVAL; - - if (cmd_len > IOBUF_SIZE) - return -EINVAL; - - rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE); - memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len); - - return rtsx_usb_transfer_data(ucr, - usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), - ucr->cmd_buf, cmd_len, 0, NULL, 100); -} - -static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr, - u16 addr, u16 len, u8 *data) -{ - int i, ret; - u16 rsp_len = round_down(len, 4); - u16 res_len = len - rsp_len; - - if (!data) - return -EINVAL; - - /* 4-byte aligned part */ - if (rsp_len) { - rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ); - ret = rtsx_usb_transfer_data(ucr, - usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), - ucr->cmd_buf, 12, 0, NULL, 100); - if (ret) - return ret; - - ret = rtsx_usb_transfer_data(ucr, - usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), - data, rsp_len, 0, NULL, 100); - if (ret) - return ret; - } - - /* unaligned part */ - for (i = 0; i < res_len; i++) { - ret = rtsx_usb_read_register(ucr, addr + rsp_len + i, - data + rsp_len + i); - if (ret) - return ret; - } - - return 0; -} - -int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) -{ - return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); -} -EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf); - -int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) -{ - return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); -} -EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf); - -int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, - u8 mask, u8 data) -{ - u16 value, index; - - addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT; - value = swab16(addr); - index = mask | data << 8; - - return usb_control_msg(ucr->pusb_dev, - usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, NULL, 0, 100); -} -EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); - -int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) -{ - u16 value; - u8 *buf; - int ret; - - if (!data) - return -EINVAL; - - buf = kzalloc(sizeof(u8), GFP_KERNEL); - if (!buf) - return -ENOMEM; - - addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; - value = swab16(addr); - - ret = usb_control_msg(ucr->pusb_dev, - usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, buf, 1, 100); - *data = *buf; - - kfree(buf); - return ret; -} -EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); - -void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr, - u8 mask, u8 data) -{ - int i; - - if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) { - i = CMD_OFFSET + ucr->cmd_idx * 4; - - ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) | - (u8)((reg_addr >> 8) & 0x3F); - ucr->cmd_buf[i++] = (u8)reg_addr; - ucr->cmd_buf[i++] = mask; - ucr->cmd_buf[i++] = data; - - ucr->cmd_idx++; - } -} -EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd); - -int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout) -{ - int ret; - - ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8); - ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx); - ucr->cmd_buf[STAGE_FLAG] = flag; - - ret = rtsx_usb_transfer_data(ucr, - usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), - ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET, - 0, NULL, timeout); - if (ret) { - rtsx_usb_clear_fsm_err(ucr); - return ret; - } - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd); - -int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout) -{ - if (rsp_len <= 0) - return -EINVAL; - - rsp_len = ALIGN(rsp_len, 4); - - return rtsx_usb_transfer_data(ucr, - usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), - ucr->rsp_buf, rsp_len, 0, NULL, timeout); -} -EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp); - -static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) -{ - int ret; - - rtsx_usb_init_cmd(ucr); - rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00); - rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00); - ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); - if (ret) - return ret; - - ret = rtsx_usb_get_rsp(ucr, 2, 100); - if (ret) - return ret; - - *status = ((ucr->rsp_buf[0] >> 2) & 0x0f) | - ((ucr->rsp_buf[1] & 0x03) << 4); - - return 0; -} - -int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) -{ - int ret; - u16 *buf; - - if (!status) - return -EINVAL; - - if (polling_pipe == 0) { - buf = kzalloc(sizeof(u16), GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ret = usb_control_msg(ucr->pusb_dev, - usb_rcvctrlpipe(ucr->pusb_dev, 0), - RTSX_USB_REQ_POLL, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, 0, buf, 2, 100); - *status = *buf; - - kfree(buf); - } else { - ret = rtsx_usb_get_status_with_bulk(ucr, status); - } - - /* usb_control_msg may return positive when success */ - if (ret < 0) - return ret; - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status); - -static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val) -{ - dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n", - val, addr); - - rtsx_usb_init_cmd(ucr); - - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, - 0xFF, (addr >> 4) & 0x0F); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); - - return rtsx_usb_send_cmd(ucr, MODE_C, 100); -} - -int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) -{ - rtsx_usb_init_cmd(ucr); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data); - return rtsx_usb_send_cmd(ucr, MODE_C, 100); -} -EXPORT_SYMBOL_GPL(rtsx_usb_write_register); - -int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) -{ - int ret; - - if (data != NULL) - *data = 0; - - rtsx_usb_init_cmd(ucr); - rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0); - ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); - if (ret) - return ret; - - ret = rtsx_usb_get_rsp(ucr, 1, 100); - if (ret) - return ret; - - if (data != NULL) - *data = ucr->rsp_buf[0]; - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_usb_read_register); - -static inline u8 double_ssc_depth(u8 depth) -{ - return (depth > 1) ? (depth - 1) : depth; -} - -static u8 revise_ssc_depth(u8 ssc_depth, u8 div) -{ - if (div > CLK_DIV_1) { - if (ssc_depth > div - 1) - ssc_depth -= (div - 1); - else - ssc_depth = SSC_DEPTH_2M; - } - - return ssc_depth; -} - -int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, - u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) -{ - int ret; - u8 n, clk_divider, mcu_cnt, div; - - if (!card_clock) { - ucr->cur_clk = 0; - return 0; - } - - if (initial_mode) { - /* We use 250k(around) here, in initial stage */ - clk_divider = SD_CLK_DIVIDE_128; - card_clock = 30000000; - } else { - clk_divider = SD_CLK_DIVIDE_0; - } - - ret = rtsx_usb_write_register(ucr, SD_CFG1, - SD_CLK_DIVIDE_MASK, clk_divider); - if (ret < 0) - return ret; - - card_clock /= 1000000; - dev_dbg(&ucr->pusb_intf->dev, - "Switch card clock to %dMHz\n", card_clock); - - if (!initial_mode && double_clk) - card_clock *= 2; - dev_dbg(&ucr->pusb_intf->dev, - "Internal SSC clock: %dMHz (cur_clk = %d)\n", - card_clock, ucr->cur_clk); - - if (card_clock == ucr->cur_clk) - return 0; - - /* Converting clock value into internal settings: n and div */ - n = card_clock - 2; - if ((card_clock <= 2) || (n > MAX_DIV_N)) - return -EINVAL; - - mcu_cnt = 60/card_clock + 3; - if (mcu_cnt > 15) - mcu_cnt = 15; - - /* Make sure that the SSC clock div_n is not less than MIN_DIV_N */ - - div = CLK_DIV_1; - while (n < MIN_DIV_N && div < CLK_DIV_4) { - n = (n + 2) * 2 - 2; - div++; - } - dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div); - - if (double_clk) - ssc_depth = double_ssc_depth(ssc_depth); - - ssc_depth = revise_ssc_depth(ssc_depth, div); - dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth); - - rtsx_usb_init_cmd(ucr); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, - 0x3F, (div << 4) | mcu_cnt); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2, - SSC_DEPTH_MASK, ssc_depth); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); - if (vpclk) { - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, - PHASE_NOT_RESET, 0); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, - PHASE_NOT_RESET, PHASE_NOT_RESET); - } - - ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000); - if (ret < 0) - return ret; - - ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff, - SSC_RSTB | SSC_8X_EN | SSC_SEL_4M); - if (ret < 0) - return ret; - - /* Wait SSC clock stable */ - usleep_range(100, 1000); - - ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0); - if (ret < 0) - return ret; - - ucr->cur_clk = card_clock; - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock); - -int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card) -{ - int ret; - u16 val; - u16 cd_mask[] = { - [RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD), - [RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD) - }; - - ret = rtsx_usb_get_card_status(ucr, &val); - /* - * If get status fails, return 0 (ok) for the exclusive check - * and let the flow fail at somewhere else. - */ - if (ret) - return 0; - - if (val & cd_mask[card]) - return -EIO; - - return 0; -} -EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check); - -static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) -{ - int ret; - u8 val; - - rtsx_usb_init_cmd(ucr); - - if (CHECK_PKG(ucr, LQFP48)) { - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, - LDO3318_PWR_MASK, LDO_SUSPEND); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, - FORCE_LDO_POWERB, FORCE_LDO_POWERB); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1, - 0x30, 0x10); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, - 0x03, 0x01); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6, - 0x0C, 0x04); - } - - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, - CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL, - SD30_DRIVE_MASK, DRIVER_TYPE_D); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, - CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0); - - if (ucr->is_rts5179) - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, - CARD_PULL_CTL5, 0x03, 0x01); - - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL, - EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL); - rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND, - XD_INT | MS_INT | SD_INT, - XD_INT | MS_INT | SD_INT); - - ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); - if (ret) - return ret; - - /* config non-crystal mode */ - rtsx_usb_read_register(ucr, CFG_MODE, &val); - if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) { - ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C); - if (ret) - return ret; - } - - return 0; -} - -static int rtsx_usb_init_chip(struct rtsx_ucr *ucr) -{ - int ret; - u8 val; - - rtsx_usb_clear_fsm_err(ucr); - - /* power on SSC */ - ret = rtsx_usb_write_register(ucr, - FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); - if (ret) - return ret; - - usleep_range(100, 1000); - ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00); - if (ret) - return ret; - - /* determine IC version */ - ret = rtsx_usb_read_register(ucr, HW_VERSION, &val); - if (ret) - return ret; - - ucr->ic_version = val & HW_VER_MASK; - - /* determine package */ - ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val); - if (ret) - return ret; - - if (val & CARD_SHARE_LQFP_SEL) { - ucr->package = LQFP48; - dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n"); - } else { - ucr->package = QFN24; - dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n"); - } - - /* determine IC variations */ - rtsx_usb_read_register(ucr, CFG_MODE_1, &val); - if (val & RTS5179) { - ucr->is_rts5179 = true; - dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n"); - } else { - ucr->is_rts5179 = false; - } - - return rtsx_usb_reset_chip(ucr); -} - -static int rtsx_usb_probe(struct usb_interface *intf, - const struct usb_device_id *id) -{ - struct usb_device *usb_dev = interface_to_usbdev(intf); - struct rtsx_ucr *ucr; - int ret; - - dev_dbg(&intf->dev, - ": Realtek USB Card Reader found at bus %03d address %03d\n", - usb_dev->bus->busnum, usb_dev->devnum); - - ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL); - if (!ucr) - return -ENOMEM; - - ucr->pusb_dev = usb_dev; - - ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, - GFP_KERNEL, &ucr->iobuf_dma); - if (!ucr->iobuf) - return -ENOMEM; - - usb_set_intfdata(intf, ucr); - - ucr->vendor_id = id->idVendor; - ucr->product_id = id->idProduct; - ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; - - mutex_init(&ucr->dev_mutex); - - ucr->pusb_intf = intf; - - /* initialize */ - ret = rtsx_usb_init_chip(ucr); - if (ret) - goto out_init_fail; - - /* initialize USB SG transfer timer */ - timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0); - - ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells, - ARRAY_SIZE(rtsx_usb_cells)); - if (ret) - goto out_init_fail; - -#ifdef CONFIG_PM - intf->needs_remote_wakeup = 1; - usb_enable_autosuspend(usb_dev); -#endif - - return 0; - -out_init_fail: - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); - return ret; -} - -static void rtsx_usb_disconnect(struct usb_interface *intf) -{ - struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); - - dev_dbg(&intf->dev, "%s called\n", __func__); - - mfd_remove_devices(&intf->dev); - - usb_set_intfdata(ucr->pusb_intf, NULL); - usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, - ucr->iobuf_dma); -} - -#ifdef CONFIG_PM -static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) -{ - struct rtsx_ucr *ucr = - (struct rtsx_ucr *)usb_get_intfdata(intf); - u16 val = 0; - - dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n", - __func__, message.event); - - if (PMSG_IS_AUTO(message)) { - if (mutex_trylock(&ucr->dev_mutex)) { - rtsx_usb_get_card_status(ucr, &val); - mutex_unlock(&ucr->dev_mutex); - - /* Defer the autosuspend if card exists */ - if (val & (SD_CD | MS_CD)) - return -EAGAIN; - } else { - /* There is an ongoing operation*/ - return -EAGAIN; - } - } - - return 0; -} - -static int rtsx_usb_resume(struct usb_interface *intf) -{ - return 0; -} - -static int rtsx_usb_reset_resume(struct usb_interface *intf) -{ - struct rtsx_ucr *ucr = - (struct rtsx_ucr *)usb_get_intfdata(intf); - - rtsx_usb_reset_chip(ucr); - return 0; -} - -#else /* CONFIG_PM */ - -#define rtsx_usb_suspend NULL -#define rtsx_usb_resume NULL -#define rtsx_usb_reset_resume NULL - -#endif /* CONFIG_PM */ - - -static int rtsx_usb_pre_reset(struct usb_interface *intf) -{ - struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); - - mutex_lock(&ucr->dev_mutex); - return 0; -} - -static int rtsx_usb_post_reset(struct usb_interface *intf) -{ - struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); - - mutex_unlock(&ucr->dev_mutex); - return 0; -} - -static struct usb_device_id rtsx_usb_usb_ids[] = { - { USB_DEVICE(0x0BDA, 0x0129) }, - { USB_DEVICE(0x0BDA, 0x0139) }, - { USB_DEVICE(0x0BDA, 0x0140) }, - { } -}; -MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids); - -static struct usb_driver rtsx_usb_driver = { - .name = "rtsx_usb", - .probe = rtsx_usb_probe, - .disconnect = rtsx_usb_disconnect, - .suspend = rtsx_usb_suspend, - .resume = rtsx_usb_resume, - .reset_resume = rtsx_usb_reset_resume, - .pre_reset = rtsx_usb_pre_reset, - .post_reset = rtsx_usb_post_reset, - .id_table = rtsx_usb_usb_ids, - .supports_autosuspend = 1, - .soft_unbind = 1, -}; - -module_usb_driver(rtsx_usb_driver); - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Roger Tseng "); -MODULE_DESCRIPTION("Realtek USB Card Reader Driver"); diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index f1a5c2357b14..7c0fa24f9067 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -496,6 +496,10 @@ config PCI_ENDPOINT_TEST Enable this configuration option to enable the host side test driver for PCI Endpoint. +config MISC_RTSX + tristate + default MISC_RTSX_PCI || MISC_RTSX_USB + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" @@ -508,4 +512,5 @@ source "drivers/misc/mic/Kconfig" source "drivers/misc/genwqe/Kconfig" source "drivers/misc/echo/Kconfig" source "drivers/misc/cxl/Kconfig" +source "drivers/misc/cardreader/Kconfig" endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 5ca5f64df478..8d8cc096063b 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_CXL_BASE) += cxl/ obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o +obj-$(CONFIG_MISC_RTSX) += cardreader/ lkdtm-$(CONFIG_LKDTM) += lkdtm_core.o lkdtm-$(CONFIG_LKDTM) += lkdtm_bugs.o diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig new file mode 100644 index 000000000000..e7d835a160bb --- /dev/null +++ b/drivers/misc/cardreader/Kconfig @@ -0,0 +1,20 @@ +config MISC_RTSX_PCI + tristate "Realtek PCI-E card reader" + depends on PCI + select MFD_CORE + help + This supports for Realtek PCI-Express card reader including rts5209, + rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411. + Realtek card readers support access to many types of memory cards, + such as Memory Stick, Memory Stick Pro, Secure Digital and + MultiMediaCard. + +config MISC_RTSX_USB + tristate "Realtek USB card reader" + depends on USB + select MFD_CORE + help + Select this option to get support for Realtek USB 2.0 card readers + including RTS5129, RTS5139, RTS5179 and RTS5170. + Realtek card reader supports access to many types of memory cards, + such as Memory Stick Pro, Secure Digital and MultiMediaCard. diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile new file mode 100644 index 000000000000..78337b24fc62 --- /dev/null +++ b/drivers/misc/cardreader/Makefile @@ -0,0 +1,4 @@ +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o + +obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o +obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c new file mode 100644 index 000000000000..434fd070d3e3 --- /dev/null +++ b/drivers/misc/cardreader/rtl8411.c @@ -0,0 +1,508 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * Roger Tseng + */ + +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, SYS_VER, &val); + return val & 0x0F; +} + +static int rtl8411b_is_qfn48(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + rtsx_pci_read_register(pcr, RTL8411B_PACKAGE_MODE, &val); + + if (val & 0x2) + return 1; + else + return 0; +} + +static void rtl8411_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg1 = 0; + u8 reg3 = 0; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®1); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg1); + + if (!rtsx_vendor_setting_valid(reg1)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg1); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg1)); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg1); + + rtsx_pci_read_config_byte(pcr, PCR_SETTING_REG3, ®3); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG3, reg3); + pcr->sd30_drive_sel_3v3 = rtl8411_reg_to_sd30_drive_sel_3v3(reg3); +} + +static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg = 0; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); + pcr->sd30_drive_sel_3v3 = + map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg)); +} + +static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); +} + +static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, + CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtl8411b_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + if (rtl8411b_is_qfn48(pcr)) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + CARD_PULL_CTL3, 0xFF, 0xF5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, + CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, FUNC_FORCE_CTL, + 0x06, 0x00); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtl8411_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rtl8411_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_ON); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON); +} + +static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err; + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); +} + +static int rtl8411_do_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage, + int bpp_tuned18_shift, int bpp_asic_1v8) +{ + u8 mask, val; + int err; + + mask = (BPP_REG_TUNED18 << bpp_tuned18_shift) | BPP_PAD_MASK; + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + val = (BPP_ASIC_3V3 << bpp_tuned18_shift) | BPP_PAD_3V3; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + val = (bpp_asic_1v8 << bpp_tuned18_shift) | BPP_PAD_1V8; + } else { + return -EINVAL; + } + + return rtsx_pci_write_register(pcr, LDO_CTL, mask, val); +} + +static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + return rtl8411_do_switch_output_voltage(pcr, voltage, + BPP_TUNED18_SHIFT_8411, BPP_ASIC_1V8); +} + +static int rtl8402_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + return rtl8411_do_switch_output_voltage(pcr, voltage, + BPP_TUNED18_SHIFT_8402, BPP_ASIC_2V0); +} + +static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr) +{ + unsigned int card_exist; + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + card_exist &= CARD_EXIST; + if (!card_exist) { + /* Enable card CD */ + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, CD_ENABLE); + /* Enable card interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00); + return 0; + } + + if (hweight32(card_exist) > 1) { + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + msleep(100); + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + if (card_exist & MS_EXIST) + card_exist = MS_EXIST; + else if (card_exist & SD_EXIST) + card_exist = SD_EXIST; + else + card_exist = 0; + + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + + pcr_dbg(pcr, "After CD deglitch, card_exist = 0x%x\n", + card_exist); + } + + if (card_exist & MS_EXIST) { + /* Disable SD interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, MS_CD_EN_ONLY); + } else if (card_exist & SD_EXIST) { + /* Disable MS interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, SD_CD_EN_ONLY); + } + + return card_exist; +} + +static int rtl8411_conv_clk_and_div_n(int input, int dir) +{ + int output; + + if (dir == CLK_TO_DIV_N) + output = input * 4 / 5 - 2; + else + output = (input + 2) * 5 / 4; + + return output; +} + +static const struct pcr_ops rtl8411_pcr_ops = { + .fetch_vendor_settings = rtl8411_fetch_vendor_settings, + .extra_init_hw = rtl8411_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8411_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +static const struct pcr_ops rtl8402_pcr_ops = { + .fetch_vendor_settings = rtl8411_fetch_vendor_settings, + .extra_init_hw = rtl8411_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8402_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +static const struct pcr_ops rtl8411b_pcr_ops = { + .fetch_vendor_settings = rtl8411b_fetch_vendor_settings, + .extra_init_hw = rtl8411b_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8411_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +static const u32 rtl8411b_qfn64_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x09 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x69 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x08 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static void rtl8411_init_common_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->flags = 0; + pcr->card_drive_sel = RTL8411_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14); + pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10); + pcr->ic_version = rtl8411_get_ic_version(pcr); +} + +void rtl8411_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8411_pcr_ops; + set_pull_ctrl_tables(pcr, rtl8411); +} + +void rtl8411b_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8411b_pcr_ops; + if (rtl8411b_is_qfn48(pcr)) + set_pull_ctrl_tables(pcr, rtl8411b_qfn48); + else + set_pull_ctrl_tables(pcr, rtl8411b_qfn64); +} + +void rtl8402_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8402_pcr_ops; + set_pull_ctrl_tables(pcr, rtl8411); +} diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c new file mode 100644 index 000000000000..ce68c48d8ec9 --- /dev/null +++ b/drivers/misc/cardreader/rts5209.c @@ -0,0 +1,277 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + val = rtsx_pci_readb(pcr, 0x1C); + return val & 0x0F; +} + +static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (rts5209_vendor_setting1_valid(reg)) { + if (rts5209_reg_check_ms_pmos(reg)) + pcr->flags |= PCR_MS_PMOS; + pcr->aspm_en = rts5209_reg_to_aspm(reg); + } + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + + if (rts5209_vendor_setting2_valid(reg)) { + pcr->sd30_drive_sel_1v8 = + rts5209_reg_to_sd30_drive_sel_1v8(reg); + pcr->sd30_drive_sel_3v3 = + rts5209_reg_to_sd30_drive_sel_3v3(reg); + pcr->card_drive_sel = rts5209_reg_to_card_drive_sel(reg); + } +} + +static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); +} + +static int rts5209_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Turn off LED */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Force CLKREQ# PIN to drive 0 to request clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03); + /* Configure driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_optimize_phy(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966); +} + +static int rts5209_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rts5209_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + u8 pwr_mask, partial_pwr_on, pwr_on; + + pwr_mask = SD_POWER_MASK; + partial_pwr_on = SD_PARTIAL_POWER_ON; + pwr_on = SD_POWER_ON; + + if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + partial_pwr_on = MS_PARTIAL_POWER_ON; + pwr_on = MS_POWER_ON; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask, partial_pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x04); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card) +{ + u8 pwr_mask, pwr_off; + + pwr_mask = SD_POWER_MASK; + pwr_off = SD_POWER_OFF; + + if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + pwr_off = MS_POWER_OFF; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct pcr_ops rts5209_pcr_ops = { + .fetch_vendor_settings = rts5209_fetch_vendor_settings, + .extra_init_hw = rts5209_extra_init_hw, + .optimize_phy = rts5209_optimize_phy, + .turn_on_led = rts5209_turn_on_led, + .turn_off_led = rts5209_turn_off_led, + .enable_auto_blink = rts5209_enable_auto_blink, + .disable_auto_blink = rts5209_disable_auto_blink, + .card_power_on = rts5209_card_power_on, + .card_power_off = rts5209_card_power_off, + .switch_output_voltage = rts5209_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5209_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5209_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | + EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT; + pcr->num_slots = 2; + pcr->ops = &rts5209_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTS5209_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5209_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c new file mode 100644 index 000000000000..024dcba8d6c8 --- /dev/null +++ b/drivers/misc/cardreader/rts5227.c @@ -0,0 +1,374 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * Roger Tseng + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x13, 0x13, 0x13}, + {0x96, 0x96, 0x96}, + {0x7F, 0x7F, 0x7F}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0x99, 0x99, 0x99}, + {0xAA, 0xAA, 0xAA}, + {0xFE, 0xFE, 0xFE}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rts5227_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x10); + + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) +{ + u16 cap; + + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure LTR */ + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap); + if (cap & PCI_EXP_DEVCTL2_LTR_EN) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3); + /* Configure OBFF */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03); + /* Configure driving */ + rts5227_fill_driving(pcr, OUTPUT_3V3); + /* Configure force_clock_req */ + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0xB8); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0x88); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + /* Optimize RX sensitivity */ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); +} + +static int rts5227_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5227_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK | PMOS_STRG_MASK, + SD_POWER_OFF | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0X00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5227_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5227_pcr_ops = { + .fetch_vendor_settings = rts5227_fetch_vendor_settings, + .extra_init_hw = rts5227_extra_init_hw, + .optimize_phy = rts5227_optimize_phy, + .turn_on_led = rts5227_turn_on_led, + .turn_off_led = rts5227_turn_off_led, + .enable_auto_blink = rts5227_enable_auto_blink, + .disable_auto_blink = rts5227_disable_auto_blink, + .card_power_on = rts5227_card_power_on, + .card_power_off = rts5227_card_power_off, + .switch_output_voltage = rts5227_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5227_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5227_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5227_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5227_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5227_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5227_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5227_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); + pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7); + + pcr->ic_version = rts5227_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = PM_CTRL3; +} + +static int rts522a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN, + 0x00); + if (err < 0) + return err; + + if (is_version(pcr, 0x522A, IC_VER_A)) { + err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, + PHY_RCR2_INIT_27S); + if (err) + return err; + + rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S); + } + + return 0; +} + +static int rts522a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5227_extra_init_hw(pcr); + + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG, + FUNC_FORCE_UPME_XMT_DBG); + rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04); + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11); + + return 0; +} + +/* rts522a operations mainly derived from rts5227, except phy/hw init setting. + */ +static const struct pcr_ops rts522a_pcr_ops = { + .fetch_vendor_settings = rts5227_fetch_vendor_settings, + .extra_init_hw = rts522a_extra_init_hw, + .optimize_phy = rts522a_optimize_phy, + .turn_on_led = rts5227_turn_on_led, + .turn_off_led = rts5227_turn_off_led, + .enable_auto_blink = rts5227_enable_auto_blink, + .disable_auto_blink = rts5227_disable_auto_blink, + .card_power_on = rts5227_card_power_on, + .card_power_off = rts5227_card_power_off, + .switch_output_voltage = rts5227_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5227_force_power_down, +}; + +void rts522a_init_params(struct rtsx_pcr *pcr) +{ + rts5227_init_params(pcr); + + pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; +} diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c new file mode 100644 index 000000000000..9119261337cc --- /dev/null +++ b/drivers/misc/cardreader/rts5229.c @@ -0,0 +1,273 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg)); +} + +static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static int rts5229_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Force CLKREQ# PIN to drive 0 to request clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_optimize_phy(struct rtsx_pcr *pcr) +{ + /* Optimize RX sensitivity */ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); +} + +static int rts5229_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5229_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK | PMOS_STRG_MASK, + SD_POWER_OFF | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct pcr_ops rts5229_pcr_ops = { + .fetch_vendor_settings = rts5229_fetch_vendor_settings, + .extra_init_hw = rts5229_extra_init_hw, + .optimize_phy = rts5229_optimize_phy, + .turn_on_led = rts5229_turn_on_led, + .turn_off_led = rts5229_turn_off_led, + .enable_auto_blink = rts5229_enable_auto_blink, + .disable_auto_blink = rts5229_disable_auto_blink, + .card_power_on = rts5229_card_power_on, + .card_power_off = rts5229_card_power_off, + .switch_output_voltage = rts5229_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5229_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5229_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5229_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); + pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6); + + pcr->ic_version = rts5229_get_ic_version(pcr); + if (pcr->ic_version == IC_VER_C) { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2; + } else { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1; + } + pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c new file mode 100644 index 000000000000..dbe013abdb83 --- /dev/null +++ b/drivers/misc/cardreader/rts5249.c @@ -0,0 +1,740 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5249_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x11, 0x11, 0x18}, + {0x55, 0x55, 0x5C}, + {0xFF, 0xFF, 0xFF}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0xC4, 0xC4, 0xC4}, + {0x3C, 0x3C, 0x3C}, + {0xFE, 0xFE, 0xFE}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + u32 lval; + + if (CHK_PCI_PID(pcr, PID_524A)) + rtsx_pci_read_config_dword(pcr, + PCR_ASPM_SETTING_REG1, &lval); + else + rtsx_pci_read_config_dword(pcr, + PCR_ASPM_SETTING_REG2, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } +} + +static int rts5249_init_from_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; + + return 0; +} + +static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + rts5249_init_from_cfg(pcr); + rts5249_init_from_hw(pcr); + + rtsx_pci_init_cmd(pcr); + + /* Rest L1SUB Config */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00); + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure driving */ + rts5249_fill_driving(pcr, OUTPUT_3V3); + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); +} + +static int rts5249_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_REV, + PHY_REV_RESV | PHY_REV_RXIDLE_LATCHED | + PHY_REV_P1_EN | PHY_REV_RXIDLE_EN | + PHY_REV_CLKREQ_TX_EN | PHY_REV_RX_PWST | + PHY_REV_CLKREQ_DT_1_0 | PHY_REV_STOP_CLKRD | + PHY_REV_STOP_CLKWR); + if (err < 0) + return err; + + msleep(1); + + err = rtsx_pci_write_phy_register(pcr, PHY_BPCR, + PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL | + PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_PCR, + PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | + PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | + PHY_PCR_RSSI_EN | PHY_PCR_RX10K); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, + PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR | + PHY_RCR2_CDR_SR_2 | PHY_RCR2_FREQSEL_12 | + PHY_RCR2_CDR_SC_12P | PHY_RCR2_CALIB_LATE); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_FLD4, + PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF | + PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA | + PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER | + PHY_FLD4_BER_CHK_EN); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_RDR, + PHY_RDR_RXDSEL_1_9 | PHY_SSC_AUTO_PWD); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_RCR1, + PHY_RCR1_ADP_TIME_4 | PHY_RCR1_VCO_COARSE); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_FLD3, + PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 | + PHY_FLD3_RXDELINK); + if (err < 0) + return err; + + return rtsx_pci_write_phy_register(pcr, PHY_TUNE, + PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 | + PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 | + PHY_TUNE_TUNED12 | PHY_TUNE_TUNEA12); +} + +static int rtsx_base_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rtsx_base_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rtsx_base_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + msleep(5); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtsx_base_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtsx_base_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u16 append; + + switch (voltage) { + case OUTPUT_3V3: + err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, + PHY_TUNE_VOLTAGE_3V3); + if (err < 0) + return err; + break; + case OUTPUT_1V8: + append = PHY_TUNE_D18_1V8; + if (CHK_PCI_PID(pcr, 0x5249)) { + err = rtsx_pci_update_phy(pcr, PHY_BACR, + PHY_BACR_BASIC_MASK, 0); + if (err < 0) + return err; + append = PHY_TUNE_D18_1V7; + } + + err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, + append); + if (err < 0) + return err; + break; + default: + pcr_dbg(pcr, "unknown output voltage %d\n", voltage); + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5249_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static void rts5249_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, + pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + if (!enable) + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static const struct pcr_ops rts5249_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts5249_extra_init_hw, + .optimize_phy = rts5249_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rtsx_base_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rtsx_base_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_aspm = rts5249_set_aspm, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5249_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5249_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5249_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = PM_CTRL3; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* Init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF; + option->ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF; +} + +static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; + + return __rtsx_pci_write_phy_register(pcr, addr, val); +} + +static int rts524a_read_phy(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; + + return __rtsx_pci_read_phy_register(pcr, addr, val); +} + +static int rts524a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, + D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + rtsx_pci_write_phy_register(pcr, PHY_PCR, + PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | + PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | PHY_PCR_RSSI_EN); + rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, + PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); + + if (is_version(pcr, 0x524A, IC_VER_A)) { + rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, + PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); + rtsx_pci_write_phy_register(pcr, PHY_SSCCR2, + PHY_SSCCR2_PLL_NCODE | PHY_SSCCR2_TIME0 | + PHY_SSCCR2_TIME2_WIDTH); + rtsx_pci_write_phy_register(pcr, PHY_ANA1A, + PHY_ANA1A_TXR_LOOPBACK | PHY_ANA1A_RXT_BIST | + PHY_ANA1A_TXR_BIST | PHY_ANA1A_REV); + rtsx_pci_write_phy_register(pcr, PHY_ANA1D, + PHY_ANA1D_DEBUG_ADDR); + rtsx_pci_write_phy_register(pcr, PHY_DIG1E, + PHY_DIG1E_REV | PHY_DIG1E_D0_X_D1 | + PHY_DIG1E_RX_ON_HOST | PHY_DIG1E_RCLK_REF_HOST | + PHY_DIG1E_RCLK_TX_EN_KEEP | + PHY_DIG1E_RCLK_TX_TERM_KEEP | + PHY_DIG1E_RCLK_RX_EIDLE_ON | PHY_DIG1E_TX_TERM_KEEP | + PHY_DIG1E_RX_TERM_KEEP | PHY_DIG1E_TX_EN_KEEP | + PHY_DIG1E_RX_EN_KEEP); + } + + rtsx_pci_write_phy_register(pcr, PHY_ANA08, + PHY_ANA08_RX_EQ_DCGAIN | PHY_ANA08_SEL_RX_EN | + PHY_ANA08_RX_EQ_VAL | PHY_ANA08_SCP | PHY_ANA08_SEL_IPI); + + return 0; +} + +static int rts524a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5249_extra_init_hw(pcr); + + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, + FORCE_ASPM_L1_EN, FORCE_ASPM_L1_EN); + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_VCC_LMT_EN, + LDO_VCC_LMT_EN); + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + if (is_version(pcr, 0x524A, IC_VER_A)) { + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + LDO_DV18_SR_MASK, LDO_DV18_SR_DF); + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, + LDO_VCC_REF_TUNE_MASK, LDO_VCC_REF_1V2); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_REF_TUNE_MASK, LDO_VIO_REF_1V2); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_SR_MASK, LDO_VIO_SR_DF); + rtsx_pci_write_register(pcr, LDO_DV12S_CFG, + LDO_REF12_TUNE_MASK, LDO_REF12_TUNE_DF); + rtsx_pci_write_register(pcr, SD40_LDO_CTL1, + SD40_VIO_TUNE_MASK, SD40_VIO_TUNE_1V7); + } + + return 0; +} + +static void rts5250_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &(pcr->option); + + u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR); + int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST); + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* Run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* L1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + if (aspm_L1_1 || aspm_L1_2) { + if (rtsx_check_dev_flag(pcr, + LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) { + if (card_exist) + val &= ~L1OFF_MBIAS2_EN_5250; + else + val |= L1OFF_MBIAS2_EN_5250; + } + } + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts524a_pcr_ops = { + .write_phy = rts524a_write_phy, + .read_phy = rts524a_read_phy, + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts524a_extra_init_hw, + .optimize_phy = rts524a_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rtsx_base_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rtsx_base_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, + .set_aspm = rts5249_set_aspm, +}; + +void rts524a_init_params(struct rtsx_pcr *pcr) +{ + rts5249_init_params(pcr); + pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + pcr->option.ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + pcr->ops = &rts524a_pcr_ops; +} + +static int rts525a_card_power_on(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, + LDO_VCC_TUNE_MASK, LDO_VCC_3V3); + return rtsx_base_card_power_on(pcr, card); +} + +static int rts525a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + LDO_D3318_MASK, LDO_D3318_33V); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + LDO_D3318_MASK, LDO_D3318_18V); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, + SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + rtsx_pci_init_cmd(pcr); + rts5249_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts525a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, + D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + rtsx_pci_write_phy_register(pcr, _PHY_FLD0, + _PHY_FLD0_CLK_REQ_20C | _PHY_FLD0_RX_IDLE_EN | + _PHY_FLD0_BIT_ERR_RSTN | _PHY_FLD0_BER_COUNT | + _PHY_FLD0_BER_TIMER | _PHY_FLD0_CHECK_EN); + + rtsx_pci_write_phy_register(pcr, _PHY_ANA03, + _PHY_ANA03_TIMER_MAX | _PHY_ANA03_OOBS_DEB_EN | + _PHY_CMU_DEBUG_EN); + + if (is_version(pcr, 0x525A, IC_VER_A)) + rtsx_pci_write_phy_register(pcr, _PHY_REV0, + _PHY_REV0_FILTER_OUT | _PHY_REV0_CDR_BYPASS_PFD | + _PHY_REV0_CDR_RX_IDLE_BYPASS); + + return 0; +} + +static int rts525a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5249_extra_init_hw(pcr); + + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + if (is_version(pcr, 0x525A, IC_VER_A)) { + rtsx_pci_write_register(pcr, L1SUB_CONFIG2, + L1SUB_AUTO_CFG, L1SUB_AUTO_CFG); + rtsx_pci_write_register(pcr, RREF_CFG, + RREF_VBGSEL_MASK, RREF_VBGSEL_1V25); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_TUNE_MASK, LDO_VIO_1V7); + rtsx_pci_write_register(pcr, LDO_DV12S_CFG, + LDO_D12_TUNE_MASK, LDO_D12_TUNE_DF); + rtsx_pci_write_register(pcr, LDO_AV12S_CFG, + LDO_AV12S_TUNE_MASK, LDO_AV12S_TUNE_DF); + rtsx_pci_write_register(pcr, LDO_VCC_CFG0, + LDO_VCC_LMTVTH_MASK, LDO_VCC_LMTVTH_2A); + rtsx_pci_write_register(pcr, OOBS_CONFIG, + OOBS_AUTOK_DIS | OOBS_VAL_MASK, 0x89); + } + + return 0; +} + +static const struct pcr_ops rts525a_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts525a_extra_init_hw, + .optimize_phy = rts525a_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rts525a_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rts525a_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, + .set_aspm = rts5249_set_aspm, +}; + +void rts525a_init_params(struct rtsx_pcr *pcr) +{ + rts5249_init_params(pcr); + pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + pcr->option.ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + pcr->ops = &rts525a_pcr_ops; +} diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c new file mode 100644 index 000000000000..b60bd2a3ba64 --- /dev/null +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -0,0 +1,1569 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static bool msi_en = true; +module_param(msi_en, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(msi_en, "Enable MSI"); + +static DEFINE_IDR(rtsx_pci_idr); +static DEFINE_SPINLOCK(rtsx_pci_lock); + +static struct mfd_cell rtsx_pcr_cells[] = { + [RTSX_SD_CARD] = { + .name = DRV_NAME_RTSX_PCI_SDMMC, + }, + [RTSX_MS_CARD] = { + .name = DRV_NAME_RTSX_PCI_MS, + }, +}; + +static const struct pci_device_id rtsx_pci_ids[] = { + { PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, rtsx_pci_ids); + +static inline void rtsx_pci_enable_aspm(struct rtsx_pcr *pcr) +{ + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + 0xFC, pcr->aspm_en); +} + +static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr) +{ + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + 0xFC, 0); +} + +int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) +{ + rtsx_pci_write_register(pcr, MSGTXDATA0, + MASK_8_BIT_DEF, (u8) (latency & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA1, + MASK_8_BIT_DEF, (u8)((latency >> 8) & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA2, + MASK_8_BIT_DEF, (u8)((latency >> 16) & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA3, + MASK_8_BIT_DEF, (u8)((latency >> 24) & 0xFF)); + rtsx_pci_write_register(pcr, LTR_CTL, LTR_TX_EN_MASK | + LTR_LATENCY_MODE_MASK, LTR_TX_EN_1 | LTR_LATENCY_MODE_SW); + + return 0; +} + +int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) +{ + if (pcr->ops->set_ltr_latency) + return pcr->ops->set_ltr_latency(pcr, latency); + else + return rtsx_comm_set_ltr_latency(pcr, latency); +} + +static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + rtsx_pci_enable_aspm(pcr); + else + rtsx_pci_disable_aspm(pcr); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK; + u8 val = 0; + + if (enable) + val = pcr->aspm_en; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static void rtsx_disable_aspm(struct rtsx_pcr *pcr) +{ + if (pcr->ops->set_aspm) + pcr->ops->set_aspm(pcr, false); + else + rtsx_comm_set_aspm(pcr, false); +} + +int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val) +{ + rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, val); + + return 0; +} + +void rtsx_set_l1off_sub_cfg_d0(struct rtsx_pcr *pcr, int active) +{ + if (pcr->ops->set_l1off_cfg_sub_d0) + pcr->ops->set_l1off_cfg_sub_d0(pcr, active); +} + +static void rtsx_comm_pm_full_on(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rtsx_disable_aspm(pcr); + + if (option->ltr_enabled) + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + + if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) + rtsx_set_l1off_sub_cfg_d0(pcr, 1); +} + +void rtsx_pm_full_on(struct rtsx_pcr *pcr) +{ + if (pcr->ops->full_on) + pcr->ops->full_on(pcr); + else + rtsx_comm_pm_full_on(pcr); +} + +void rtsx_pci_start_run(struct rtsx_pcr *pcr) +{ + /* If pci device removed, don't queue idle work any more */ + if (pcr->remove_pci) + return; + + if (pcr->state != PDEV_STAT_RUN) { + pcr->state = PDEV_STAT_RUN; + if (pcr->ops->enable_auto_blink) + pcr->ops->enable_auto_blink(pcr); + rtsx_pm_full_on(pcr); + } + + mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200)); +} +EXPORT_SYMBOL_GPL(rtsx_pci_start_run); + +int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data) +{ + int i; + u32 val = HAIMR_WRITE_START; + + val |= (u32)(addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) { + if (data != (u8)val) + return -EIO; + return 0; + } + } + + return -ETIMEDOUT; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_register); + +int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data) +{ + u32 val = HAIMR_READ_START; + int i; + + val |= (u32)(addr & 0x3FFF) << 16; + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) + break; + } + + if (i >= MAX_RW_REG_CNT) + return -ETIMEDOUT; + + if (data) + *data = (u8)(val & 0xFF); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_register); + +int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + int err, i, finished = 0; + u8 tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8)); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + return 0; +} + +int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + if (pcr->ops->write_phy) + return pcr->ops->write_phy(pcr, addr, val); + + return __rtsx_pci_write_phy_register(pcr, addr, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register); + +int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + int err, i, finished = 0; + u16 data; + u8 *ptr, tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0); + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + ptr = rtsx_pci_get_cmd_data(pcr); + data = ((u16)ptr[1] << 8) | ptr[0]; + + if (val) + *val = data; + + return 0; +} + +int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + if (pcr->ops->read_phy) + return pcr->ops->read_phy(pcr, addr, val); + + return __rtsx_pci_read_phy_register(pcr, addr, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register); + +void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + + rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80); + rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80); +} +EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd); + +void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, + u8 cmd_type, u16 reg_addr, u8 mask, u8 data) +{ + unsigned long flags; + u32 val = 0; + u32 *ptr = (u32 *)(pcr->host_cmds_ptr); + + val |= (u32)(cmd_type & 0x03) << 30; + val |= (u32)(reg_addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + spin_lock_irqsave(&pcr->lock, flags); + ptr += pcr->ci; + if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) { + put_unaligned_le32(val, ptr); + ptr++; + pcr->ci++; + } + spin_unlock_irqrestore(&pcr->lock, flags); +} +EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd); + +void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr) +{ + u32 val = 1 << 31; + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait); + +int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout) +{ + struct completion trans_done; + u32 val = 1 << 31; + long timeleft; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&pcr->lock, flags); + + /* set up data structures for the wakeup system */ + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + /* Wait for TRANS_OK_INT */ + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); + err = -ETIMEDOUT; + goto finish_send_cmd; + } + + spin_lock_irqsave(&pcr->lock, flags); + if (pcr->trans_result == TRANS_RESULT_FAIL) + err = -EINVAL; + else if (pcr->trans_result == TRANS_RESULT_OK) + err = 0; + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + spin_unlock_irqrestore(&pcr->lock, flags); + +finish_send_cmd: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd); + +static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, + dma_addr_t addr, unsigned int len, int end) +{ + u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; + u64 val; + u8 option = SG_VALID | SG_TRANS_DATA; + + pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len); + + if (end) + option |= SG_END; + val = ((u64)addr << 32) | ((u64)len << 12) | option; + + put_unaligned_le64(val, ptr); + pcr->sgi++; +} + +int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read, int timeout) +{ + int err = 0, count; + + pcr_dbg(pcr, "--> %s: num_sg = %d\n", __func__, num_sg); + count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read); + if (count < 1) + return -EINVAL; + pcr_dbg(pcr, "DMA mapping count: %d\n", count); + + err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout); + + rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); + +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + if (pcr->remove_pci) + return -EINVAL; + + if ((sglist == NULL) || (num_sg <= 0)) + return -EINVAL; + + return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg); + +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg); + +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int count, bool read, int timeout) +{ + struct completion trans_done; + struct scatterlist *sg; + dma_addr_t addr; + long timeleft; + unsigned long flags; + unsigned int len; + int i, err = 0; + u32 val; + u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE; + + if (pcr->remove_pci) + return -ENODEV; + + if ((sglist == NULL) || (count < 1)) + return -EINVAL; + + val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; + pcr->sgi = 0; + for_each_sg(sglist, sg, count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1); + } + + spin_lock_irqsave(&pcr->lock, flags); + + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); + err = -ETIMEDOUT; + goto out; + } + + spin_lock_irqsave(&pcr->lock, flags); + if (pcr->trans_result == TRANS_RESULT_FAIL) { + err = -EILSEQ; + if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION) + pcr->dma_error_count++; + } + + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + spin_unlock_irqrestore(&pcr->lock, flags); + +out: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer); + +int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256); + ptr += 256; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf); + +int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf); + +static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl) +{ + rtsx_pci_init_cmd(pcr); + + while (*tbl & 0xFFFF0000) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + (u16)(*tbl >> 16), 0xFF, (u8)(*tbl)); + tbl++; + } + + return rtsx_pci_send_cmd(pcr, 100); +} + +int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_enable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_enable_tbl; + else + return -EINVAL; + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable); + +int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_disable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_disable_tbl; + else + return -EINVAL; + + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); + +static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr) +{ + pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN; + + if (pcr->num_slots > 1) + pcr->bier |= MS_INT_EN; + + /* Enable Bus Interrupt */ + rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier); + + pcr_dbg(pcr, "RTSX_BIER: 0x%08x\n", pcr->bier); +} + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > (div - 1)) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_4M; + } + + return ssc_depth; +} + +int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u8 n, clk_divider, mcu_cnt, div; + static const u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K, + [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + /* Reduce card clock by 20MHz each time a DMA transfer error occurs */ + if (card_clock == UHS_SDR104_MAX_DTR && + pcr->dma_error_count && + PCI_PID(pcr) == RTS5227_DEVICE_ID) + card_clock = UHS_SDR104_MAX_DTR - + (pcr->dma_error_count * 20000000); + + card_clock /= 1000000; + pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + if (pcr->ops->conv_clk_and_div_n) + n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); + else + n = (u8)(clk - 2); + if ((clk <= 2) || (n > MAX_DIV_N_PCR)) + return -EINVAL; + + mcu_cnt = (u8)(125/clk + 3); + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is not less than MIN_DIV_N_PCR */ + div = CLK_DIV_1; + while ((n < MIN_DIV_N_PCR) && (div < CLK_DIV_8)) { + if (pcr->ops->conv_clk_and_div_n) { + int dbl_clk = pcr->ops->conv_clk_and_div_n(n, + DIV_N_TO_CLK) * 2; + n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk, + CLK_TO_DIV_N); + } else { + n = (n + 2) * 2 - 2; + } + div++; + } + pcr_dbg(pcr, "n = %d, div = %d\n", n, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(10); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock); + +int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_on) + return pcr->ops->card_power_on(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on); + +int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_off) + return pcr->ops->card_power_off(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off); + +int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card) +{ + static const unsigned int cd_mask[] = { + [RTSX_SD_CARD] = SD_EXIST, + [RTSX_MS_CARD] = MS_EXIST + }; + + if (!(pcr->flags & PCR_MS_PMOS)) { + /* When using single PMOS, accessing card is not permitted + * if the existing card is not the designated one. + */ + if (pcr->card_exist & (~cd_mask[card])) + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check); + +int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + if (pcr->ops->switch_output_voltage) + return pcr->ops->switch_output_voltage(pcr, voltage); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage); + +unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr) +{ + unsigned int val; + + val = rtsx_pci_readl(pcr, RTSX_BIPR); + if (pcr->ops->cd_deglitch) + val = pcr->ops->cd_deglitch(pcr); + + return val; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exist); + +void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr) +{ + struct completion finish; + + pcr->finish_me = &finish; + init_completion(&finish); + + if (pcr->done) + complete(pcr->done); + + if (!pcr->remove_pci) + rtsx_pci_stop_cmd(pcr); + + wait_for_completion_interruptible_timeout(&finish, + msecs_to_jiffies(2)); + pcr->finish_me = NULL; +} +EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer); + +static void rtsx_pci_card_detect(struct work_struct *work) +{ + struct delayed_work *dwork; + struct rtsx_pcr *pcr; + unsigned long flags; + unsigned int card_detect = 0, card_inserted, card_removed; + u32 irq_status; + + dwork = to_delayed_work(work); + pcr = container_of(dwork, struct rtsx_pcr, carddet_work); + + pcr_dbg(pcr, "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + spin_lock_irqsave(&pcr->lock, flags); + + irq_status = rtsx_pci_readl(pcr, RTSX_BIPR); + pcr_dbg(pcr, "irq_status: 0x%08x\n", irq_status); + + irq_status &= CARD_EXIST; + card_inserted = pcr->card_inserted & irq_status; + card_removed = pcr->card_removed; + pcr->card_inserted = 0; + pcr->card_removed = 0; + + spin_unlock_irqrestore(&pcr->lock, flags); + + if (card_inserted || card_removed) { + pcr_dbg(pcr, "card_inserted: 0x%x, card_removed: 0x%x\n", + card_inserted, card_removed); + + if (pcr->ops->cd_deglitch) + card_inserted = pcr->ops->cd_deglitch(pcr); + + card_detect = card_inserted | card_removed; + + pcr->card_exist |= card_inserted; + pcr->card_exist &= ~card_removed; + } + + mutex_unlock(&pcr->pcr_mutex); + + if ((card_detect & SD_EXIST) && pcr->slots[RTSX_SD_CARD].card_event) + pcr->slots[RTSX_SD_CARD].card_event( + pcr->slots[RTSX_SD_CARD].p_dev); + if ((card_detect & MS_EXIST) && pcr->slots[RTSX_MS_CARD].card_event) + pcr->slots[RTSX_MS_CARD].card_event( + pcr->slots[RTSX_MS_CARD].p_dev); +} + +static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) +{ + struct rtsx_pcr *pcr = dev_id; + u32 int_reg; + + if (!pcr) + return IRQ_NONE; + + spin_lock(&pcr->lock); + + int_reg = rtsx_pci_readl(pcr, RTSX_BIPR); + /* Clear interrupt flag */ + rtsx_pci_writel(pcr, RTSX_BIPR, int_reg); + if ((int_reg & pcr->bier) == 0) { + spin_unlock(&pcr->lock); + return IRQ_NONE; + } + if (int_reg == 0xFFFFFFFF) { + spin_unlock(&pcr->lock); + return IRQ_HANDLED; + } + + int_reg &= (pcr->bier | 0x7FFFFF); + + if (int_reg & SD_INT) { + if (int_reg & SD_EXIST) { + pcr->card_inserted |= SD_EXIST; + } else { + pcr->card_removed |= SD_EXIST; + pcr->card_inserted &= ~SD_EXIST; + } + pcr->dma_error_count = 0; + } + + if (int_reg & MS_INT) { + if (int_reg & MS_EXIST) { + pcr->card_inserted |= MS_EXIST; + } else { + pcr->card_removed |= MS_EXIST; + pcr->card_inserted &= ~MS_EXIST; + } + } + + if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) { + if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) { + pcr->trans_result = TRANS_RESULT_FAIL; + if (pcr->done) + complete(pcr->done); + } else if (int_reg & TRANS_OK_INT) { + pcr->trans_result = TRANS_RESULT_OK; + if (pcr->done) + complete(pcr->done); + } + } + + if (pcr->card_inserted || pcr->card_removed) + schedule_delayed_work(&pcr->carddet_work, + msecs_to_jiffies(200)); + + spin_unlock(&pcr->lock); + return IRQ_HANDLED; +} + +static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr) +{ + pcr_dbg(pcr, "%s: pcr->msi_en = %d, pci->irq = %d\n", + __func__, pcr->msi_en, pcr->pci->irq); + + if (request_irq(pcr->pci->irq, rtsx_pci_isr, + pcr->msi_en ? 0 : IRQF_SHARED, + DRV_NAME_RTSX_PCI, pcr)) { + dev_err(&(pcr->pci->dev), + "rtsx_sdmmc: unable to grab IRQ %d, disabling device\n", + pcr->pci->irq); + return -1; + } + + pcr->irq = pcr->pci->irq; + pci_intx(pcr->pci, !pcr->msi_en); + + return 0; +} + +static void rtsx_enable_aspm(struct rtsx_pcr *pcr) +{ + if (pcr->ops->set_aspm) + pcr->ops->set_aspm(pcr, true); + else + rtsx_comm_set_aspm(pcr, true); +} + +static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ltr_enabled) { + u32 latency = option->ltr_l1off_latency; + + if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN)) + mdelay(option->l1_snooze_delay); + + rtsx_set_ltr_latency(pcr, latency); + } + + if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) + rtsx_set_l1off_sub_cfg_d0(pcr, 0); + + rtsx_enable_aspm(pcr); +} + +void rtsx_pm_power_saving(struct rtsx_pcr *pcr) +{ + if (pcr->ops->power_saving) + pcr->ops->power_saving(pcr); + else + rtsx_comm_pm_power_saving(pcr); +} + +static void rtsx_pci_idle_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work); + + pcr_dbg(pcr, "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + + pcr->state = PDEV_STAT_IDLE; + + if (pcr->ops->disable_auto_blink) + pcr->ops->disable_auto_blink(pcr); + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pm_power_saving(pcr); + + mutex_unlock(&pcr->pcr_mutex); +} + +#ifdef CONFIG_PM +static void rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) +{ + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pci_writel(pcr, RTSX_BIER, 0); + pcr->bier = 0; + + rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08); + rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state); + + if (pcr->ops->force_power_down) + pcr->ops->force_power_down(pcr, pm_state); +} +#endif + +static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) +{ + int err; + + pcr->pcie_cap = pci_find_capability(pcr->pci, PCI_CAP_ID_EXP); + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + rtsx_pci_enable_bus_int(pcr); + + /* Power on SSC */ + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (err < 0) + return err; + + /* Wait SSC power stable */ + udelay(200); + + rtsx_pci_disable_aspm(pcr); + if (pcr->ops->optimize_phy) { + err = pcr->ops->optimize_phy(pcr); + if (err < 0) + return err; + } + + rtsx_pci_init_cmd(pcr); + + /* Set mcu_cnt to 7 to ensure data can be sampled properly */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00); + /* Disable card clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0); + /* Reset delink mode */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0); + /* Card driving select */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DRIVE_SEL, + 0xFF, pcr->card_drive_sel); + /* Enable SSC Clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, + 0xFF, SSC_8X_EN | SSC_SEL_4M); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); + /* Clear Link Ready Interrupt */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0, + LINK_RDY_INT, LINK_RDY_INT); + /* Enlarge the estimation window of PERST# glitch + * to reduce the chance of invalid card interrupt + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80); + /* Update RC oscillator to 400k + * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1 + * 1: 2M 0: 400k + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00); + /* Set interrupt write clear + * bit 1: U_elbi_if_rd_clr_en + * 1: Enable ELBI interrupt[31:22] & [7:0] flag read clear + * 0: ELBI interrupt flag[31:22] & [7:0] only can be write clear + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + switch (PCI_PID(pcr)) { + case PID_5250: + case PID_524A: + case PID_525A: + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); + break; + default: + break; + } + + /* Enable clk_request_n to enable clock power management */ + rtsx_pci_write_config_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL + 1, 1); + /* Enter L1 when host tx idle */ + rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B); + + if (pcr->ops->extra_init_hw) { + err = pcr->ops->extra_init_hw(pcr); + if (err < 0) + return err; + } + + /* No CD interrupt if probing driver with card inserted. + * So we need to initialize pcr->card_exist here. + */ + if (pcr->ops->cd_deglitch) + pcr->card_exist = pcr->ops->cd_deglitch(pcr); + else + pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST; + + return 0; +} + +static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) +{ + int err; + + spin_lock_init(&pcr->lock); + mutex_init(&pcr->pcr_mutex); + + switch (PCI_PID(pcr)) { + default: + case 0x5209: + rts5209_init_params(pcr); + break; + + case 0x5229: + rts5229_init_params(pcr); + break; + + case 0x5289: + rtl8411_init_params(pcr); + break; + + case 0x5227: + rts5227_init_params(pcr); + break; + + case 0x522A: + rts522a_init_params(pcr); + break; + + case 0x5249: + rts5249_init_params(pcr); + break; + + case 0x524A: + rts524a_init_params(pcr); + break; + + case 0x525A: + rts525a_init_params(pcr); + break; + + case 0x5287: + rtl8411b_init_params(pcr); + break; + + case 0x5286: + rtl8402_init_params(pcr); + break; + } + + pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", + PCI_PID(pcr), pcr->ic_version); + + pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot), + GFP_KERNEL); + if (!pcr->slots) + return -ENOMEM; + + if (pcr->ops->fetch_vendor_settings) + pcr->ops->fetch_vendor_settings(pcr); + + pcr_dbg(pcr, "pcr->aspm_en = 0x%x\n", pcr->aspm_en); + pcr_dbg(pcr, "pcr->sd30_drive_sel_1v8 = 0x%x\n", + pcr->sd30_drive_sel_1v8); + pcr_dbg(pcr, "pcr->sd30_drive_sel_3v3 = 0x%x\n", + pcr->sd30_drive_sel_3v3); + pcr_dbg(pcr, "pcr->card_drive_sel = 0x%x\n", + pcr->card_drive_sel); + pcr_dbg(pcr, "pcr->flags = 0x%x\n", pcr->flags); + + pcr->state = PDEV_STAT_IDLE; + err = rtsx_pci_init_hw(pcr); + if (err < 0) { + kfree(pcr->slots); + return err; + } + + return 0; +} + +static int rtsx_pci_probe(struct pci_dev *pcidev, + const struct pci_device_id *id) +{ + struct rtsx_pcr *pcr; + struct pcr_handle *handle; + u32 base, len; + int ret, i, bar = 0; + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device, + (int)pcidev->revision); + + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); + if (ret < 0) + return ret; + + ret = pci_enable_device(pcidev); + if (ret) + return ret; + + ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI); + if (ret) + goto disable; + + pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); + if (!pcr) { + ret = -ENOMEM; + goto release_pci; + } + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) { + ret = -ENOMEM; + goto free_pcr; + } + handle->pcr = pcr; + + idr_preload(GFP_KERNEL); + spin_lock(&rtsx_pci_lock); + ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT); + if (ret >= 0) + pcr->id = ret; + spin_unlock(&rtsx_pci_lock); + idr_preload_end(); + if (ret < 0) + goto free_handle; + + pcr->pci = pcidev; + dev_set_drvdata(&pcidev->dev, handle); + + if (CHK_PCI_PID(pcr, 0x525A)) + bar = 1; + len = pci_resource_len(pcidev, bar); + base = pci_resource_start(pcidev, bar); + pcr->remap_addr = ioremap_nocache(base, len); + if (!pcr->remap_addr) { + ret = -ENOMEM; + goto free_handle; + } + + pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev), + RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr), + GFP_KERNEL); + if (pcr->rtsx_resv_buf == NULL) { + ret = -ENXIO; + goto unmap; + } + pcr->host_cmds_ptr = pcr->rtsx_resv_buf; + pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr; + pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN; + pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN; + + pcr->card_inserted = 0; + pcr->card_removed = 0; + INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); + INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work); + + pcr->msi_en = msi_en; + if (pcr->msi_en) { + ret = pci_enable_msi(pcidev); + if (ret) + pcr->msi_en = false; + } + + ret = rtsx_pci_acquire_irq(pcr); + if (ret < 0) + goto disable_msi; + + pci_set_master(pcidev); + synchronize_irq(pcr->irq); + + ret = rtsx_pci_init_chip(pcr); + if (ret < 0) + goto disable_irq; + + for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) { + rtsx_pcr_cells[i].platform_data = handle; + rtsx_pcr_cells[i].pdata_size = sizeof(*handle); + } + ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells, + ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL); + if (ret < 0) + goto disable_irq; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + + return 0; + +disable_irq: + free_irq(pcr->irq, (void *)pcr); +disable_msi: + if (pcr->msi_en) + pci_disable_msi(pcr->pci); + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); +unmap: + iounmap(pcr->remap_addr); +free_handle: + kfree(handle); +free_pcr: + kfree(pcr); +release_pci: + pci_release_regions(pcidev); +disable: + pci_disable_device(pcidev); + + return ret; +} + +static void rtsx_pci_remove(struct pci_dev *pcidev) +{ + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; + + pcr->remove_pci = true; + + /* Disable interrupts at the pcr level */ + spin_lock_irq(&pcr->lock); + rtsx_pci_writel(pcr, RTSX_BIER, 0); + pcr->bier = 0; + spin_unlock_irq(&pcr->lock); + + cancel_delayed_work_sync(&pcr->carddet_work); + cancel_delayed_work_sync(&pcr->idle_work); + + mfd_remove_devices(&pcidev->dev); + + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); + iounmap(pcr->remap_addr); + + pci_release_regions(pcidev); + pci_disable_device(pcidev); + + spin_lock(&rtsx_pci_lock); + idr_remove(&rtsx_pci_idr, pcr->id); + spin_unlock(&rtsx_pci_lock); + + kfree(pcr->slots); + kfree(pcr); + kfree(handle); + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device); +} + +#ifdef CONFIG_PM + +static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + cancel_delayed_work(&pcr->carddet_work); + cancel_delayed_work(&pcr->idle_work); + + mutex_lock(&pcr->pcr_mutex); + + rtsx_pci_power_off(pcr, HOST_ENTER_S3); + + pci_save_state(pcidev); + pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); + pci_disable_device(pcidev); + pci_set_power_state(pcidev, pci_choose_state(pcidev, state)); + + mutex_unlock(&pcr->pcr_mutex); + return 0; +} + +static int rtsx_pci_resume(struct pci_dev *pcidev) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + int ret = 0; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + mutex_lock(&pcr->pcr_mutex); + + pci_set_power_state(pcidev, PCI_D0); + pci_restore_state(pcidev); + ret = pci_enable_device(pcidev); + if (ret) + goto out; + pci_set_master(pcidev); + + ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); + if (ret) + goto out; + + ret = rtsx_pci_init_hw(pcr); + if (ret) + goto out; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + +out: + mutex_unlock(&pcr->pcr_mutex); + return ret; +} + +static void rtsx_pci_shutdown(struct pci_dev *pcidev) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + rtsx_pci_power_off(pcr, HOST_ENTER_S1); + + pci_disable_device(pcidev); +} + +#else /* CONFIG_PM */ + +#define rtsx_pci_suspend NULL +#define rtsx_pci_resume NULL +#define rtsx_pci_shutdown NULL + +#endif /* CONFIG_PM */ + +static struct pci_driver rtsx_pci_driver = { + .name = DRV_NAME_RTSX_PCI, + .id_table = rtsx_pci_ids, + .probe = rtsx_pci_probe, + .remove = rtsx_pci_remove, + .suspend = rtsx_pci_suspend, + .resume = rtsx_pci_resume, + .shutdown = rtsx_pci_shutdown, +}; +module_pci_driver(rtsx_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Wei WANG "); +MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver"); diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h new file mode 100644 index 000000000000..b0691c95b103 --- /dev/null +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -0,0 +1,103 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#ifndef __RTSX_PCR_H +#define __RTSX_PCR_H + +#include + +#define MIN_DIV_N_PCR 80 +#define MAX_DIV_N_PCR 208 + +#define RTS522A_PM_CTRL3 0xFF7E + +#define RTS524A_PME_FORCE_CTL 0xFF78 +#define RTS524A_PM_CTRL3 0xFF7E + +#define LTR_ACTIVE_LATENCY_DEF 0x883C +#define LTR_IDLE_LATENCY_DEF 0x892C +#define LTR_L1OFF_LATENCY_DEF 0x9003 +#define L1_SNOOZE_DELAY_DEF 1 +#define LTR_L1OFF_SSPWRGATE_5249_DEF 0xAF +#define LTR_L1OFF_SSPWRGATE_5250_DEF 0xFF +#define LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF 0xAC +#define LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF 0xF8 +#define CMD_TIMEOUT_DEF 100 +#define ASPM_MASK_NEG 0xFC +#define MASK_8_BIT_DEF 0xFF + +int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); +int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); + +void rts5209_init_params(struct rtsx_pcr *pcr); +void rts5229_init_params(struct rtsx_pcr *pcr); +void rtl8411_init_params(struct rtsx_pcr *pcr); +void rtl8402_init_params(struct rtsx_pcr *pcr); +void rts5227_init_params(struct rtsx_pcr *pcr); +void rts522a_init_params(struct rtsx_pcr *pcr); +void rts5249_init_params(struct rtsx_pcr *pcr); +void rts524a_init_params(struct rtsx_pcr *pcr); +void rts525a_init_params(struct rtsx_pcr *pcr); +void rtl8411b_init_params(struct rtsx_pcr *pcr); + +static inline u8 map_sd_drive(int idx) +{ + u8 sd_drive[4] = { + 0x01, /* Type D */ + 0x02, /* Type C */ + 0x05, /* Type A */ + 0x03 /* Type B */ + }; + + return sd_drive[idx]; +} + +#define rtsx_vendor_setting_valid(reg) (!((reg) & 0x1000000)) +#define rts5209_vendor_setting1_valid(reg) (!((reg) & 0x80)) +#define rts5209_vendor_setting2_valid(reg) ((reg) & 0x80) + +#define rtsx_reg_to_aspm(reg) (((reg) >> 28) & 0x03) +#define rtsx_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 26) & 0x03) +#define rtsx_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x03) +#define rtsx_reg_to_card_drive_sel(reg) ((((reg) >> 25) & 0x01) << 6) +#define rtsx_reg_check_reverse_socket(reg) ((reg) & 0x4000) +#define rts5209_reg_to_aspm(reg) (((reg) >> 5) & 0x03) +#define rts5209_reg_check_ms_pmos(reg) (!((reg) & 0x08)) +#define rts5209_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 3) & 0x07) +#define rts5209_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x07) +#define rts5209_reg_to_card_drive_sel(reg) ((reg) >> 8) +#define rtl8411_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x07) +#define rtl8411b_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x03) + +#define set_pull_ctrl_tables(pcr, __device) \ +do { \ + pcr->sd_pull_ctl_enable_tbl = __device##_sd_pull_ctl_enable_tbl; \ + pcr->sd_pull_ctl_disable_tbl = __device##_sd_pull_ctl_disable_tbl; \ + pcr->ms_pull_ctl_enable_tbl = __device##_ms_pull_ctl_enable_tbl; \ + pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \ +} while (0) + +/* generic operations */ +int rtsx_gops_pm_reset(struct rtsx_pcr *pcr); +int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency); +int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val); + +#endif diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c new file mode 100644 index 000000000000..b97903ff1a72 --- /dev/null +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -0,0 +1,791 @@ +/* Driver for Realtek USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ +#include +#include +#include +#include +#include +#include +#include + +static int polling_pipe = 1; +module_param(polling_pipe, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)"); + +static const struct mfd_cell rtsx_usb_cells[] = { + [RTSX_USB_SD_CARD] = { + .name = "rtsx_usb_sdmmc", + .pdata_size = 0, + }, + [RTSX_USB_MS_CARD] = { + .name = "rtsx_usb_ms", + .pdata_size = 0, + }, +}; + +static void rtsx_usb_sg_timed_out(struct timer_list *t) +{ + struct rtsx_ucr *ucr = from_timer(ucr, t, sg_timer); + + dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__); + usb_sg_cancel(&ucr->current_sg); +} + +static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, + unsigned int pipe, struct scatterlist *sg, int num_sg, + unsigned int length, unsigned int *act_len, int timeout) +{ + int ret; + + dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n", + __func__, length, num_sg); + ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0, + sg, num_sg, length, GFP_NOIO); + if (ret) + return ret; + + ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); + add_timer(&ucr->sg_timer); + usb_sg_wait(&ucr->current_sg); + if (!del_timer_sync(&ucr->sg_timer)) + ret = -ETIMEDOUT; + else + ret = ucr->current_sg.status; + + if (act_len) + *act_len = ucr->current_sg.bytes; + + return ret; +} + +int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int num_sg, + unsigned int *act_len, int timeout) +{ + if (timeout < 600) + timeout = 600; + + if (num_sg) + return rtsx_usb_bulk_transfer_sglist(ucr, pipe, + (struct scatterlist *)buf, num_sg, len, act_len, + timeout); + else + return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len, + timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data); + +static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 seq_type) +{ + rtsx_usb_cmd_hdr_tag(ucr); + + ucr->cmd_buf[PACKET_TYPE] = seq_type; + ucr->cmd_buf[5] = (u8)(len >> 8); + ucr->cmd_buf[6] = (u8)len; + ucr->cmd_buf[8] = (u8)(addr >> 8); + ucr->cmd_buf[9] = (u8)addr; + + if (seq_type == SEQ_WRITE) + ucr->cmd_buf[STAGE_FLAG] = 0; + else + ucr->cmd_buf[STAGE_FLAG] = STAGE_R; +} + +static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4); + + if (!data) + return -EINVAL; + + if (cmd_len > IOBUF_SIZE) + return -EINVAL; + + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE); + memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len); + + return rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, cmd_len, 0, NULL, 100); +} + +static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + int i, ret; + u16 rsp_len = round_down(len, 4); + u16 res_len = len - rsp_len; + + if (!data) + return -EINVAL; + + /* 4-byte aligned part */ + if (rsp_len) { + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ); + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, 12, 0, NULL, 100); + if (ret) + return ret; + + ret = rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + data, rsp_len, 0, NULL, 100); + if (ret) + return ret; + } + + /* unaligned part */ + for (i = 0; i < res_len; i++) { + ret = rtsx_usb_read_register(ucr, addr + rsp_len + i, + data + rsp_len + i); + if (ret) + return ret; + } + + return 0; +} + +int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf); + +int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf); + +int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, + u8 mask, u8 data) +{ + u16 value, index; + + addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + index = mask | data << 8; + + return usb_control_msg(ucr->pusb_dev, + usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, NULL, 0, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); + +int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + u16 value; + u8 *buf; + int ret; + + if (!data) + return -EINVAL; + + buf = kzalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + + ret = usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, 0, buf, 1, 100); + *data = *buf; + + kfree(buf); + return ret; +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); + +void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr, + u8 mask, u8 data) +{ + int i; + + if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) { + i = CMD_OFFSET + ucr->cmd_idx * 4; + + ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) | + (u8)((reg_addr >> 8) & 0x3F); + ucr->cmd_buf[i++] = (u8)reg_addr; + ucr->cmd_buf[i++] = mask; + ucr->cmd_buf[i++] = data; + + ucr->cmd_idx++; + } +} +EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd); + +int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout) +{ + int ret; + + ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8); + ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx); + ucr->cmd_buf[STAGE_FLAG] = flag; + + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET, + 0, NULL, timeout); + if (ret) { + rtsx_usb_clear_fsm_err(ucr); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd); + +int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout) +{ + if (rsp_len <= 0) + return -EINVAL; + + rsp_len = ALIGN(rsp_len, 4); + + return rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + ucr->rsp_buf, rsp_len, 0, NULL, timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp); + +static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 2, 100); + if (ret) + return ret; + + *status = ((ucr->rsp_buf[0] >> 2) & 0x0f) | + ((ucr->rsp_buf[1] & 0x03) << 4); + + return 0; +} + +int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + u16 *buf; + + if (!status) + return -EINVAL; + + if (polling_pipe == 0) { + buf = kzalloc(sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), + RTSX_USB_REQ_POLL, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, 0, buf, 2, 100); + *status = *buf; + + kfree(buf); + } else { + ret = rtsx_usb_get_status_with_bulk(ucr, status); + } + + /* usb_control_msg may return positive when success */ + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status); + +static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val) +{ + dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n", + val, addr); + + rtsx_usb_init_cmd(ucr); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, + 0xFF, (addr >> 4) & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} + +int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) +{ + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data); + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_register); + +int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + int ret; + + if (data != NULL) + *data = 0; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 1, 100); + if (ret) + return ret; + + if (data != NULL) + *data = ucr->rsp_buf[0]; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_register); + +static inline u8 double_ssc_depth(u8 depth) +{ + return (depth > 1) ? (depth - 1) : depth; +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > div - 1) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_2M; + } + + return ssc_depth; +} + +int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int ret; + u8 n, clk_divider, mcu_cnt, div; + + if (!card_clock) { + ucr->cur_clk = 0; + return 0; + } + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + + ret = rtsx_usb_write_register(ucr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (ret < 0) + return ret; + + card_clock /= 1000000; + dev_dbg(&ucr->pusb_intf->dev, + "Switch card clock to %dMHz\n", card_clock); + + if (!initial_mode && double_clk) + card_clock *= 2; + dev_dbg(&ucr->pusb_intf->dev, + "Internal SSC clock: %dMHz (cur_clk = %d)\n", + card_clock, ucr->cur_clk); + + if (card_clock == ucr->cur_clk) + return 0; + + /* Converting clock value into internal settings: n and div */ + n = card_clock - 2; + if ((card_clock <= 2) || (n > MAX_DIV_N)) + return -EINVAL; + + mcu_cnt = 60/card_clock + 3; + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is not less than MIN_DIV_N */ + + div = CLK_DIV_1; + while (n < MIN_DIV_N && div < CLK_DIV_4) { + n = (n + 2) * 2 - 2; + div++; + } + dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div); + + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth); + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, + 0x3F, (div << 4) | mcu_cnt); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000); + if (ret < 0) + return ret; + + ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff, + SSC_RSTB | SSC_8X_EN | SSC_SEL_4M); + if (ret < 0) + return ret; + + /* Wait SSC clock stable */ + usleep_range(100, 1000); + + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0); + if (ret < 0) + return ret; + + ucr->cur_clk = card_clock; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock); + +int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card) +{ + int ret; + u16 val; + u16 cd_mask[] = { + [RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD), + [RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD) + }; + + ret = rtsx_usb_get_card_status(ucr, &val); + /* + * If get status fails, return 0 (ok) for the exclusive check + * and let the flow fail at somewhere else. + */ + if (ret) + return 0; + + if (val & cd_mask[card]) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check); + +static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_init_cmd(ucr); + + if (CHECK_PKG(ucr, LQFP48)) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + LDO3318_PWR_MASK, LDO_SUSPEND); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + FORCE_LDO_POWERB, FORCE_LDO_POWERB); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1, + 0x30, 0x10); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, + 0x03, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6, + 0x0C, 0x04); + } + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL, + SD30_DRIVE_MASK, DRIVER_TYPE_D); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0); + + if (ucr->is_rts5179) + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_PULL_CTL5, 0x03, 0x01); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL, + EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND, + XD_INT | MS_INT | SD_INT, + XD_INT | MS_INT | SD_INT); + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); + if (ret) + return ret; + + /* config non-crystal mode */ + rtsx_usb_read_register(ucr, CFG_MODE, &val); + if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) { + ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C); + if (ret) + return ret; + } + + return 0; +} + +static int rtsx_usb_init_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_clear_fsm_err(ucr); + + /* power on SSC */ + ret = rtsx_usb_write_register(ucr, + FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); + if (ret) + return ret; + + usleep_range(100, 1000); + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00); + if (ret) + return ret; + + /* determine IC version */ + ret = rtsx_usb_read_register(ucr, HW_VERSION, &val); + if (ret) + return ret; + + ucr->ic_version = val & HW_VER_MASK; + + /* determine package */ + ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val); + if (ret) + return ret; + + if (val & CARD_SHARE_LQFP_SEL) { + ucr->package = LQFP48; + dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n"); + } else { + ucr->package = QFN24; + dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n"); + } + + /* determine IC variations */ + rtsx_usb_read_register(ucr, CFG_MODE_1, &val); + if (val & RTS5179) { + ucr->is_rts5179 = true; + dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n"); + } else { + ucr->is_rts5179 = false; + } + + return rtsx_usb_reset_chip(ucr); +} + +static int rtsx_usb_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *usb_dev = interface_to_usbdev(intf); + struct rtsx_ucr *ucr; + int ret; + + dev_dbg(&intf->dev, + ": Realtek USB Card Reader found at bus %03d address %03d\n", + usb_dev->bus->busnum, usb_dev->devnum); + + ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL); + if (!ucr) + return -ENOMEM; + + ucr->pusb_dev = usb_dev; + + ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, + GFP_KERNEL, &ucr->iobuf_dma); + if (!ucr->iobuf) + return -ENOMEM; + + usb_set_intfdata(intf, ucr); + + ucr->vendor_id = id->idVendor; + ucr->product_id = id->idProduct; + ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; + + mutex_init(&ucr->dev_mutex); + + ucr->pusb_intf = intf; + + /* initialize */ + ret = rtsx_usb_init_chip(ucr); + if (ret) + goto out_init_fail; + + /* initialize USB SG transfer timer */ + timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0); + + ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells, + ARRAY_SIZE(rtsx_usb_cells)); + if (ret) + goto out_init_fail; + +#ifdef CONFIG_PM + intf->needs_remote_wakeup = 1; + usb_enable_autosuspend(usb_dev); +#endif + + return 0; + +out_init_fail: + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); + return ret; +} + +static void rtsx_usb_disconnect(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + dev_dbg(&intf->dev, "%s called\n", __func__); + + mfd_remove_devices(&intf->dev); + + usb_set_intfdata(ucr->pusb_intf, NULL); + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); +} + +#ifdef CONFIG_PM +static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + u16 val = 0; + + dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n", + __func__, message.event); + + if (PMSG_IS_AUTO(message)) { + if (mutex_trylock(&ucr->dev_mutex)) { + rtsx_usb_get_card_status(ucr, &val); + mutex_unlock(&ucr->dev_mutex); + + /* Defer the autosuspend if card exists */ + if (val & (SD_CD | MS_CD)) + return -EAGAIN; + } else { + /* There is an ongoing operation*/ + return -EAGAIN; + } + } + + return 0; +} + +static int rtsx_usb_resume(struct usb_interface *intf) +{ + return 0; +} + +static int rtsx_usb_reset_resume(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + + rtsx_usb_reset_chip(ucr); + return 0; +} + +#else /* CONFIG_PM */ + +#define rtsx_usb_suspend NULL +#define rtsx_usb_resume NULL +#define rtsx_usb_reset_resume NULL + +#endif /* CONFIG_PM */ + + +static int rtsx_usb_pre_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_lock(&ucr->dev_mutex); + return 0; +} + +static int rtsx_usb_post_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_unlock(&ucr->dev_mutex); + return 0; +} + +static struct usb_device_id rtsx_usb_usb_ids[] = { + { USB_DEVICE(0x0BDA, 0x0129) }, + { USB_DEVICE(0x0BDA, 0x0139) }, + { USB_DEVICE(0x0BDA, 0x0140) }, + { } +}; +MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids); + +static struct usb_driver rtsx_usb_driver = { + .name = "rtsx_usb", + .probe = rtsx_usb_probe, + .disconnect = rtsx_usb_disconnect, + .suspend = rtsx_usb_suspend, + .resume = rtsx_usb_resume, + .reset_resume = rtsx_usb_reset_resume, + .pre_reset = rtsx_usb_pre_reset, + .post_reset = rtsx_usb_post_reset, + .id_table = rtsx_usb_usb_ids, + .supports_autosuspend = 1, + .soft_unbind = 1, +}; + +module_usb_driver(rtsx_usb_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Roger Tseng "); +MODULE_DESCRIPTION("Realtek USB Card Reader Driver"); diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 567028c9219a..cec8152b5ede 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -838,14 +838,14 @@ config MMC_USDHI6ROL0 config MMC_REALTEK_PCI tristate "Realtek PCI-E SD/MMC Card Interface Driver" - depends on MFD_RTSX_PCI + depends on MISC_RTSX_PCI help Say Y here to include driver code to support SD/MMC card interface of Realtek PCI-E card reader config MMC_REALTEK_USB tristate "Realtek USB SD/MMC Card Interface Driver" - depends on MFD_RTSX_USB + depends on MISC_RTSX_USB help Say Y here to include driver code to support SD/MMC card interface of Realtek RTS5129/39 series card reader diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 0848dc0f882e..30bd8081307e 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include struct realtek_pci_sdmmc { diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index 76da1687ab37..78422079ecfa 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \ diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h deleted file mode 100644 index 443176ee1ab0..000000000000 --- a/include/linux/mfd/rtsx_common.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Driver for Realtek driver-based card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#ifndef __RTSX_COMMON_H -#define __RTSX_COMMON_H - -#define DRV_NAME_RTSX_PCI "rtsx_pci" -#define DRV_NAME_RTSX_PCI_SDMMC "rtsx_pci_sdmmc" -#define DRV_NAME_RTSX_PCI_MS "rtsx_pci_ms" - -#define RTSX_REG_PAIR(addr, val) (((u32)(addr) << 16) | (u8)(val)) - -#define RTSX_SSC_DEPTH_4M 0x01 -#define RTSX_SSC_DEPTH_2M 0x02 -#define RTSX_SSC_DEPTH_1M 0x03 -#define RTSX_SSC_DEPTH_500K 0x04 -#define RTSX_SSC_DEPTH_250K 0x05 - -#define RTSX_SD_CARD 0 -#define RTSX_MS_CARD 1 - -#define CLK_TO_DIV_N 0 -#define DIV_N_TO_CLK 1 - -struct platform_device; - -struct rtsx_slot { - struct platform_device *p_dev; - void (*card_event)(struct platform_device *p_dev); -}; - -#endif diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h deleted file mode 100644 index a2a1318a3d0c..000000000000 --- a/include/linux/mfd/rtsx_pci.h +++ /dev/null @@ -1,1141 +0,0 @@ -/* Driver for Realtek PCI-Express card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Wei WANG - */ - -#ifndef __RTSX_PCI_H -#define __RTSX_PCI_H - -#include -#include -#include - -#define MAX_RW_REG_CNT 1024 - -#define RTSX_HCBAR 0x00 -#define RTSX_HCBCTLR 0x04 -#define STOP_CMD (0x01 << 28) -#define READ_REG_CMD 0 -#define WRITE_REG_CMD 1 -#define CHECK_REG_CMD 2 - -#define RTSX_HDBAR 0x08 -#define SG_INT 0x04 -#define SG_END 0x02 -#define SG_VALID 0x01 -#define SG_NO_OP 0x00 -#define SG_TRANS_DATA (0x02 << 4) -#define SG_LINK_DESC (0x03 << 4) -#define RTSX_HDBCTLR 0x0C -#define SDMA_MODE 0x00 -#define ADMA_MODE (0x02 << 26) -#define STOP_DMA (0x01 << 28) -#define TRIG_DMA (0x01 << 31) - -#define RTSX_HAIMR 0x10 -#define HAIMR_TRANS_START (0x01 << 31) -#define HAIMR_READ 0x00 -#define HAIMR_WRITE (0x01 << 30) -#define HAIMR_READ_START (HAIMR_TRANS_START | HAIMR_READ) -#define HAIMR_WRITE_START (HAIMR_TRANS_START | HAIMR_WRITE) -#define HAIMR_TRANS_END (HAIMR_TRANS_START) - -#define RTSX_BIPR 0x14 -#define CMD_DONE_INT (1 << 31) -#define DATA_DONE_INT (1 << 30) -#define TRANS_OK_INT (1 << 29) -#define TRANS_FAIL_INT (1 << 28) -#define XD_INT (1 << 27) -#define MS_INT (1 << 26) -#define SD_INT (1 << 25) -#define GPIO0_INT (1 << 24) -#define OC_INT (1 << 23) -#define SD_WRITE_PROTECT (1 << 19) -#define XD_EXIST (1 << 18) -#define MS_EXIST (1 << 17) -#define SD_EXIST (1 << 16) -#define DELINK_INT GPIO0_INT -#define MS_OC_INT (1 << 23) -#define SD_OC_INT (1 << 22) - -#define CARD_INT (XD_INT | MS_INT | SD_INT) -#define NEED_COMPLETE_INT (DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT) -#define RTSX_INT (CMD_DONE_INT | NEED_COMPLETE_INT | \ - CARD_INT | GPIO0_INT | OC_INT) -#define CARD_EXIST (XD_EXIST | MS_EXIST | SD_EXIST) - -#define RTSX_BIER 0x18 -#define CMD_DONE_INT_EN (1 << 31) -#define DATA_DONE_INT_EN (1 << 30) -#define TRANS_OK_INT_EN (1 << 29) -#define TRANS_FAIL_INT_EN (1 << 28) -#define XD_INT_EN (1 << 27) -#define MS_INT_EN (1 << 26) -#define SD_INT_EN (1 << 25) -#define GPIO0_INT_EN (1 << 24) -#define OC_INT_EN (1 << 23) -#define DELINK_INT_EN GPIO0_INT_EN -#define MS_OC_INT_EN (1 << 23) -#define SD_OC_INT_EN (1 << 22) - - -/* - * macros for easy use - */ -#define rtsx_pci_writel(pcr, reg, value) \ - iowrite32(value, (pcr)->remap_addr + reg) -#define rtsx_pci_readl(pcr, reg) \ - ioread32((pcr)->remap_addr + reg) -#define rtsx_pci_writew(pcr, reg, value) \ - iowrite16(value, (pcr)->remap_addr + reg) -#define rtsx_pci_readw(pcr, reg) \ - ioread16((pcr)->remap_addr + reg) -#define rtsx_pci_writeb(pcr, reg, value) \ - iowrite8(value, (pcr)->remap_addr + reg) -#define rtsx_pci_readb(pcr, reg) \ - ioread8((pcr)->remap_addr + reg) - -#define rtsx_pci_read_config_byte(pcr, where, val) \ - pci_read_config_byte((pcr)->pci, where, val) - -#define rtsx_pci_write_config_byte(pcr, where, val) \ - pci_write_config_byte((pcr)->pci, where, val) - -#define rtsx_pci_read_config_dword(pcr, where, val) \ - pci_read_config_dword((pcr)->pci, where, val) - -#define rtsx_pci_write_config_dword(pcr, where, val) \ - pci_write_config_dword((pcr)->pci, where, val) - -#define STATE_TRANS_NONE 0 -#define STATE_TRANS_CMD 1 -#define STATE_TRANS_BUF 2 -#define STATE_TRANS_SG 3 - -#define TRANS_NOT_READY 0 -#define TRANS_RESULT_OK 1 -#define TRANS_RESULT_FAIL 2 -#define TRANS_NO_DEVICE 3 - -#define RTSX_RESV_BUF_LEN 4096 -#define HOST_CMDS_BUF_LEN 1024 -#define HOST_SG_TBL_BUF_LEN (RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN) -#define HOST_SG_TBL_ITEMS (HOST_SG_TBL_BUF_LEN / 8) -#define MAX_SG_ITEM_LEN 0x80000 -#define HOST_TO_DEVICE 0 -#define DEVICE_TO_HOST 1 - -#define OUTPUT_3V3 0 -#define OUTPUT_1V8 1 - -#define RTSX_PHASE_MAX 32 -#define RX_TUNING_CNT 3 - -#define MS_CFG 0xFD40 -#define SAMPLE_TIME_RISING 0x00 -#define SAMPLE_TIME_FALLING 0x80 -#define PUSH_TIME_DEFAULT 0x00 -#define PUSH_TIME_ODD 0x40 -#define NO_EXTEND_TOGGLE 0x00 -#define EXTEND_TOGGLE_CHK 0x20 -#define MS_BUS_WIDTH_1 0x00 -#define MS_BUS_WIDTH_4 0x10 -#define MS_BUS_WIDTH_8 0x18 -#define MS_2K_SECTOR_MODE 0x04 -#define MS_512_SECTOR_MODE 0x00 -#define MS_TOGGLE_TIMEOUT_EN 0x00 -#define MS_TOGGLE_TIMEOUT_DISEN 0x01 -#define MS_NO_CHECK_INT 0x02 -#define MS_TPC 0xFD41 -#define MS_TRANS_CFG 0xFD42 -#define WAIT_INT 0x80 -#define NO_WAIT_INT 0x00 -#define NO_AUTO_READ_INT_REG 0x00 -#define AUTO_READ_INT_REG 0x40 -#define MS_CRC16_ERR 0x20 -#define MS_RDY_TIMEOUT 0x10 -#define MS_INT_CMDNK 0x08 -#define MS_INT_BREQ 0x04 -#define MS_INT_ERR 0x02 -#define MS_INT_CED 0x01 -#define MS_TRANSFER 0xFD43 -#define MS_TRANSFER_START 0x80 -#define MS_TRANSFER_END 0x40 -#define MS_TRANSFER_ERR 0x20 -#define MS_BS_STATE 0x10 -#define MS_TM_READ_BYTES 0x00 -#define MS_TM_NORMAL_READ 0x01 -#define MS_TM_WRITE_BYTES 0x04 -#define MS_TM_NORMAL_WRITE 0x05 -#define MS_TM_AUTO_READ 0x08 -#define MS_TM_AUTO_WRITE 0x0C -#define MS_INT_REG 0xFD44 -#define MS_BYTE_CNT 0xFD45 -#define MS_SECTOR_CNT_L 0xFD46 -#define MS_SECTOR_CNT_H 0xFD47 -#define MS_DBUS_H 0xFD48 - -#define SD_CFG1 0xFDA0 -#define SD_CLK_DIVIDE_0 0x00 -#define SD_CLK_DIVIDE_256 0xC0 -#define SD_CLK_DIVIDE_128 0x80 -#define SD_BUS_WIDTH_1BIT 0x00 -#define SD_BUS_WIDTH_4BIT 0x01 -#define SD_BUS_WIDTH_8BIT 0x02 -#define SD_ASYNC_FIFO_NOT_RST 0x10 -#define SD_20_MODE 0x00 -#define SD_DDR_MODE 0x04 -#define SD_30_MODE 0x08 -#define SD_CLK_DIVIDE_MASK 0xC0 -#define SD_CFG2 0xFDA1 -#define SD_CALCULATE_CRC7 0x00 -#define SD_NO_CALCULATE_CRC7 0x80 -#define SD_CHECK_CRC16 0x00 -#define SD_NO_CHECK_CRC16 0x40 -#define SD_NO_CHECK_WAIT_CRC_TO 0x20 -#define SD_WAIT_BUSY_END 0x08 -#define SD_NO_WAIT_BUSY_END 0x00 -#define SD_CHECK_CRC7 0x00 -#define SD_NO_CHECK_CRC7 0x04 -#define SD_RSP_LEN_0 0x00 -#define SD_RSP_LEN_6 0x01 -#define SD_RSP_LEN_17 0x02 -#define SD_RSP_TYPE_R0 0x04 -#define SD_RSP_TYPE_R1 0x01 -#define SD_RSP_TYPE_R1b 0x09 -#define SD_RSP_TYPE_R2 0x02 -#define SD_RSP_TYPE_R3 0x05 -#define SD_RSP_TYPE_R4 0x05 -#define SD_RSP_TYPE_R5 0x01 -#define SD_RSP_TYPE_R6 0x01 -#define SD_RSP_TYPE_R7 0x01 -#define SD_CFG3 0xFDA2 -#define SD_RSP_80CLK_TIMEOUT_EN 0x01 - -#define SD_STAT1 0xFDA3 -#define SD_CRC7_ERR 0x80 -#define SD_CRC16_ERR 0x40 -#define SD_CRC_WRITE_ERR 0x20 -#define SD_CRC_WRITE_ERR_MASK 0x1C -#define GET_CRC_TIME_OUT 0x02 -#define SD_TUNING_COMPARE_ERR 0x01 -#define SD_STAT2 0xFDA4 -#define SD_RSP_80CLK_TIMEOUT 0x01 - -#define SD_BUS_STAT 0xFDA5 -#define SD_CLK_TOGGLE_EN 0x80 -#define SD_CLK_FORCE_STOP 0x40 -#define SD_DAT3_STATUS 0x10 -#define SD_DAT2_STATUS 0x08 -#define SD_DAT1_STATUS 0x04 -#define SD_DAT0_STATUS 0x02 -#define SD_CMD_STATUS 0x01 -#define SD_PAD_CTL 0xFDA6 -#define SD_IO_USING_1V8 0x80 -#define SD_IO_USING_3V3 0x7F -#define TYPE_A_DRIVING 0x00 -#define TYPE_B_DRIVING 0x01 -#define TYPE_C_DRIVING 0x02 -#define TYPE_D_DRIVING 0x03 -#define SD_SAMPLE_POINT_CTL 0xFDA7 -#define DDR_FIX_RX_DAT 0x00 -#define DDR_VAR_RX_DAT 0x80 -#define DDR_FIX_RX_DAT_EDGE 0x00 -#define DDR_FIX_RX_DAT_14_DELAY 0x40 -#define DDR_FIX_RX_CMD 0x00 -#define DDR_VAR_RX_CMD 0x20 -#define DDR_FIX_RX_CMD_POS_EDGE 0x00 -#define DDR_FIX_RX_CMD_14_DELAY 0x10 -#define SD20_RX_POS_EDGE 0x00 -#define SD20_RX_14_DELAY 0x08 -#define SD20_RX_SEL_MASK 0x08 -#define SD_PUSH_POINT_CTL 0xFDA8 -#define DDR_FIX_TX_CMD_DAT 0x00 -#define DDR_VAR_TX_CMD_DAT 0x80 -#define DDR_FIX_TX_DAT_14_TSU 0x00 -#define DDR_FIX_TX_DAT_12_TSU 0x40 -#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 -#define DDR_FIX_TX_CMD_14_AHEAD 0x20 -#define SD20_TX_NEG_EDGE 0x00 -#define SD20_TX_14_AHEAD 0x10 -#define SD20_TX_SEL_MASK 0x10 -#define DDR_VAR_SDCLK_POL_SWAP 0x01 -#define SD_CMD0 0xFDA9 -#define SD_CMD_START 0x40 -#define SD_CMD1 0xFDAA -#define SD_CMD2 0xFDAB -#define SD_CMD3 0xFDAC -#define SD_CMD4 0xFDAD -#define SD_CMD5 0xFDAE -#define SD_BYTE_CNT_L 0xFDAF -#define SD_BYTE_CNT_H 0xFDB0 -#define SD_BLOCK_CNT_L 0xFDB1 -#define SD_BLOCK_CNT_H 0xFDB2 -#define SD_TRANSFER 0xFDB3 -#define SD_TRANSFER_START 0x80 -#define SD_TRANSFER_END 0x40 -#define SD_STAT_IDLE 0x20 -#define SD_TRANSFER_ERR 0x10 -#define SD_TM_NORMAL_WRITE 0x00 -#define SD_TM_AUTO_WRITE_3 0x01 -#define SD_TM_AUTO_WRITE_4 0x02 -#define SD_TM_AUTO_READ_3 0x05 -#define SD_TM_AUTO_READ_4 0x06 -#define SD_TM_CMD_RSP 0x08 -#define SD_TM_AUTO_WRITE_1 0x09 -#define SD_TM_AUTO_WRITE_2 0x0A -#define SD_TM_NORMAL_READ 0x0C -#define SD_TM_AUTO_READ_1 0x0D -#define SD_TM_AUTO_READ_2 0x0E -#define SD_TM_AUTO_TUNING 0x0F -#define SD_CMD_STATE 0xFDB5 -#define SD_CMD_IDLE 0x80 - -#define SD_DATA_STATE 0xFDB6 -#define SD_DATA_IDLE 0x80 - -#define SRCTL 0xFC13 - -#define DCM_DRP_CTL 0xFC23 -#define DCM_RESET 0x08 -#define DCM_LOCKED 0x04 -#define DCM_208M 0x00 -#define DCM_TX 0x01 -#define DCM_RX 0x02 -#define DCM_DRP_TRIG 0xFC24 -#define DRP_START 0x80 -#define DRP_DONE 0x40 -#define DCM_DRP_CFG 0xFC25 -#define DRP_WRITE 0x80 -#define DRP_READ 0x00 -#define DCM_WRITE_ADDRESS_50 0x50 -#define DCM_WRITE_ADDRESS_51 0x51 -#define DCM_READ_ADDRESS_00 0x00 -#define DCM_READ_ADDRESS_51 0x51 -#define DCM_DRP_WR_DATA_L 0xFC26 -#define DCM_DRP_WR_DATA_H 0xFC27 -#define DCM_DRP_RD_DATA_L 0xFC28 -#define DCM_DRP_RD_DATA_H 0xFC29 -#define SD_VPCLK0_CTL 0xFC2A -#define SD_VPCLK1_CTL 0xFC2B -#define PHASE_SELECT_MASK 0x1F -#define SD_DCMPS0_CTL 0xFC2C -#define SD_DCMPS1_CTL 0xFC2D -#define SD_VPTX_CTL SD_VPCLK0_CTL -#define SD_VPRX_CTL SD_VPCLK1_CTL -#define PHASE_CHANGE 0x80 -#define PHASE_NOT_RESET 0x40 -#define SD_DCMPS_TX_CTL SD_DCMPS0_CTL -#define SD_DCMPS_RX_CTL SD_DCMPS1_CTL -#define DCMPS_CHANGE 0x80 -#define DCMPS_CHANGE_DONE 0x40 -#define DCMPS_ERROR 0x20 -#define DCMPS_CURRENT_PHASE 0x1F -#define CARD_CLK_SOURCE 0xFC2E -#define CRC_FIX_CLK (0x00 << 0) -#define CRC_VAR_CLK0 (0x01 << 0) -#define CRC_VAR_CLK1 (0x02 << 0) -#define SD30_FIX_CLK (0x00 << 2) -#define SD30_VAR_CLK0 (0x01 << 2) -#define SD30_VAR_CLK1 (0x02 << 2) -#define SAMPLE_FIX_CLK (0x00 << 4) -#define SAMPLE_VAR_CLK0 (0x01 << 4) -#define SAMPLE_VAR_CLK1 (0x02 << 4) -#define CARD_PWR_CTL 0xFD50 -#define PMOS_STRG_MASK 0x10 -#define PMOS_STRG_800mA 0x10 -#define PMOS_STRG_400mA 0x00 -#define SD_POWER_OFF 0x03 -#define SD_PARTIAL_POWER_ON 0x01 -#define SD_POWER_ON 0x00 -#define SD_POWER_MASK 0x03 -#define MS_POWER_OFF 0x0C -#define MS_PARTIAL_POWER_ON 0x04 -#define MS_POWER_ON 0x00 -#define MS_POWER_MASK 0x0C -#define BPP_POWER_OFF 0x0F -#define BPP_POWER_5_PERCENT_ON 0x0E -#define BPP_POWER_10_PERCENT_ON 0x0C -#define BPP_POWER_15_PERCENT_ON 0x08 -#define BPP_POWER_ON 0x00 -#define BPP_POWER_MASK 0x0F -#define SD_VCC_PARTIAL_POWER_ON 0x02 -#define SD_VCC_POWER_ON 0x00 -#define CARD_CLK_SWITCH 0xFD51 -#define RTL8411B_PACKAGE_MODE 0xFD51 -#define CARD_SHARE_MODE 0xFD52 -#define CARD_SHARE_MASK 0x0F -#define CARD_SHARE_MULTI_LUN 0x00 -#define CARD_SHARE_NORMAL 0x00 -#define CARD_SHARE_48_SD 0x04 -#define CARD_SHARE_48_MS 0x08 -#define CARD_SHARE_BAROSSA_SD 0x01 -#define CARD_SHARE_BAROSSA_MS 0x02 -#define CARD_DRIVE_SEL 0xFD53 -#define MS_DRIVE_8mA (0x01 << 6) -#define MMC_DRIVE_8mA (0x01 << 4) -#define XD_DRIVE_8mA (0x01 << 2) -#define GPIO_DRIVE_8mA 0x01 -#define RTS5209_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ - XD_DRIVE_8mA | GPIO_DRIVE_8mA) -#define RTL8411_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ - XD_DRIVE_8mA) -#define RTSX_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | GPIO_DRIVE_8mA) - -#define CARD_STOP 0xFD54 -#define SPI_STOP 0x01 -#define XD_STOP 0x02 -#define SD_STOP 0x04 -#define MS_STOP 0x08 -#define SPI_CLR_ERR 0x10 -#define XD_CLR_ERR 0x20 -#define SD_CLR_ERR 0x40 -#define MS_CLR_ERR 0x80 -#define CARD_OE 0xFD55 -#define SD_OUTPUT_EN 0x04 -#define MS_OUTPUT_EN 0x08 -#define CARD_AUTO_BLINK 0xFD56 -#define CARD_GPIO_DIR 0xFD57 -#define CARD_GPIO 0xFD58 -#define CARD_DATA_SOURCE 0xFD5B -#define PINGPONG_BUFFER 0x01 -#define RING_BUFFER 0x00 -#define SD30_CLK_DRIVE_SEL 0xFD5A -#define DRIVER_TYPE_A 0x05 -#define DRIVER_TYPE_B 0x03 -#define DRIVER_TYPE_C 0x02 -#define DRIVER_TYPE_D 0x01 -#define CARD_SELECT 0xFD5C -#define SD_MOD_SEL 2 -#define MS_MOD_SEL 3 -#define SD30_DRIVE_SEL 0xFD5E -#define CFG_DRIVER_TYPE_A 0x02 -#define CFG_DRIVER_TYPE_B 0x03 -#define CFG_DRIVER_TYPE_C 0x01 -#define CFG_DRIVER_TYPE_D 0x00 -#define SD30_CMD_DRIVE_SEL 0xFD5E -#define SD30_DAT_DRIVE_SEL 0xFD5F -#define CARD_CLK_EN 0xFD69 -#define SD_CLK_EN 0x04 -#define MS_CLK_EN 0x08 -#define SDIO_CTRL 0xFD6B -#define CD_PAD_CTL 0xFD73 -#define CD_DISABLE_MASK 0x07 -#define MS_CD_DISABLE 0x04 -#define SD_CD_DISABLE 0x02 -#define XD_CD_DISABLE 0x01 -#define CD_DISABLE 0x07 -#define CD_ENABLE 0x00 -#define MS_CD_EN_ONLY 0x03 -#define SD_CD_EN_ONLY 0x05 -#define XD_CD_EN_ONLY 0x06 -#define FORCE_CD_LOW_MASK 0x38 -#define FORCE_CD_XD_LOW 0x08 -#define FORCE_CD_SD_LOW 0x10 -#define FORCE_CD_MS_LOW 0x20 -#define CD_AUTO_DISABLE 0x40 -#define FPDCTL 0xFC00 -#define SSC_POWER_DOWN 0x01 -#define SD_OC_POWER_DOWN 0x02 -#define ALL_POWER_DOWN 0x07 -#define OC_POWER_DOWN 0x06 -#define PDINFO 0xFC01 - -#define CLK_CTL 0xFC02 -#define CHANGE_CLK 0x01 -#define CLK_LOW_FREQ 0x01 - -#define CLK_DIV 0xFC03 -#define CLK_DIV_1 0x01 -#define CLK_DIV_2 0x02 -#define CLK_DIV_4 0x03 -#define CLK_DIV_8 0x04 -#define CLK_SEL 0xFC04 - -#define SSC_DIV_N_0 0xFC0F -#define SSC_DIV_N_1 0xFC10 -#define SSC_CTL1 0xFC11 -#define SSC_RSTB 0x80 -#define SSC_8X_EN 0x40 -#define SSC_FIX_FRAC 0x20 -#define SSC_SEL_1M 0x00 -#define SSC_SEL_2M 0x08 -#define SSC_SEL_4M 0x10 -#define SSC_SEL_8M 0x18 -#define SSC_CTL2 0xFC12 -#define SSC_DEPTH_MASK 0x07 -#define SSC_DEPTH_DISALBE 0x00 -#define SSC_DEPTH_4M 0x01 -#define SSC_DEPTH_2M 0x02 -#define SSC_DEPTH_1M 0x03 -#define SSC_DEPTH_500K 0x04 -#define SSC_DEPTH_250K 0x05 -#define RCCTL 0xFC14 - -#define FPGA_PULL_CTL 0xFC1D -#define OLT_LED_CTL 0xFC1E -#define GPIO_CTL 0xFC1F - -#define LDO_CTL 0xFC1E -#define BPP_ASIC_1V7 0x00 -#define BPP_ASIC_1V8 0x01 -#define BPP_ASIC_1V9 0x02 -#define BPP_ASIC_2V0 0x03 -#define BPP_ASIC_2V7 0x04 -#define BPP_ASIC_2V8 0x05 -#define BPP_ASIC_3V2 0x06 -#define BPP_ASIC_3V3 0x07 -#define BPP_REG_TUNED18 0x07 -#define BPP_TUNED18_SHIFT_8402 5 -#define BPP_TUNED18_SHIFT_8411 4 -#define BPP_PAD_MASK 0x04 -#define BPP_PAD_3V3 0x04 -#define BPP_PAD_1V8 0x00 -#define BPP_LDO_POWB 0x03 -#define BPP_LDO_ON 0x00 -#define BPP_LDO_SUSPEND 0x02 -#define BPP_LDO_OFF 0x03 -#define SYS_VER 0xFC32 - -#define CARD_PULL_CTL1 0xFD60 -#define CARD_PULL_CTL2 0xFD61 -#define CARD_PULL_CTL3 0xFD62 -#define CARD_PULL_CTL4 0xFD63 -#define CARD_PULL_CTL5 0xFD64 -#define CARD_PULL_CTL6 0xFD65 - -/* PCI Express Related Registers */ -#define IRQEN0 0xFE20 -#define IRQSTAT0 0xFE21 -#define DMA_DONE_INT 0x80 -#define SUSPEND_INT 0x40 -#define LINK_RDY_INT 0x20 -#define LINK_DOWN_INT 0x10 -#define IRQEN1 0xFE22 -#define IRQSTAT1 0xFE23 -#define TLPRIEN 0xFE24 -#define TLPRISTAT 0xFE25 -#define TLPTIEN 0xFE26 -#define TLPTISTAT 0xFE27 -#define DMATC0 0xFE28 -#define DMATC1 0xFE29 -#define DMATC2 0xFE2A -#define DMATC3 0xFE2B -#define DMACTL 0xFE2C -#define DMA_RST 0x80 -#define DMA_BUSY 0x04 -#define DMA_DIR_TO_CARD 0x00 -#define DMA_DIR_FROM_CARD 0x02 -#define DMA_EN 0x01 -#define DMA_128 (0 << 4) -#define DMA_256 (1 << 4) -#define DMA_512 (2 << 4) -#define DMA_1024 (3 << 4) -#define DMA_PACK_SIZE_MASK 0x30 -#define BCTL 0xFE2D -#define RBBC0 0xFE2E -#define RBBC1 0xFE2F -#define RBDAT 0xFE30 -#define RBCTL 0xFE34 -#define CFGADDR0 0xFE35 -#define CFGADDR1 0xFE36 -#define CFGDATA0 0xFE37 -#define CFGDATA1 0xFE38 -#define CFGDATA2 0xFE39 -#define CFGDATA3 0xFE3A -#define CFGRWCTL 0xFE3B -#define PHYRWCTL 0xFE3C -#define PHYDATA0 0xFE3D -#define PHYDATA1 0xFE3E -#define PHYADDR 0xFE3F -#define MSGRXDATA0 0xFE40 -#define MSGRXDATA1 0xFE41 -#define MSGRXDATA2 0xFE42 -#define MSGRXDATA3 0xFE43 -#define MSGTXDATA0 0xFE44 -#define MSGTXDATA1 0xFE45 -#define MSGTXDATA2 0xFE46 -#define MSGTXDATA3 0xFE47 -#define MSGTXCTL 0xFE48 -#define LTR_CTL 0xFE4A -#define LTR_TX_EN_MASK BIT(7) -#define LTR_TX_EN_1 BIT(7) -#define LTR_TX_EN_0 0 -#define LTR_LATENCY_MODE_MASK BIT(6) -#define LTR_LATENCY_MODE_HW 0 -#define LTR_LATENCY_MODE_SW BIT(6) -#define OBFF_CFG 0xFE4C - -#define CDRESUMECTL 0xFE52 -#define WAKE_SEL_CTL 0xFE54 -#define PCLK_CTL 0xFE55 -#define PCLK_MODE_SEL 0x20 -#define PME_FORCE_CTL 0xFE56 - -#define ASPM_FORCE_CTL 0xFE57 -#define FORCE_ASPM_CTL0 0x10 -#define FORCE_ASPM_VAL_MASK 0x03 -#define FORCE_ASPM_L1_EN 0x02 -#define FORCE_ASPM_L0_EN 0x01 -#define FORCE_ASPM_NO_ASPM 0x00 -#define PM_CLK_FORCE_CTL 0xFE58 -#define FUNC_FORCE_CTL 0xFE59 -#define FUNC_FORCE_UPME_XMT_DBG 0x02 -#define PERST_GLITCH_WIDTH 0xFE5C -#define CHANGE_LINK_STATE 0xFE5B -#define RESET_LOAD_REG 0xFE5E -#define EFUSE_CONTENT 0xFE5F -#define HOST_SLEEP_STATE 0xFE60 -#define HOST_ENTER_S1 1 -#define HOST_ENTER_S3 2 - -#define SDIO_CFG 0xFE70 -#define PM_EVENT_DEBUG 0xFE71 -#define PME_DEBUG_0 0x08 -#define NFTS_TX_CTRL 0xFE72 - -#define PWR_GATE_CTRL 0xFE75 -#define PWR_GATE_EN 0x01 -#define LDO3318_PWR_MASK 0x06 -#define LDO_ON 0x00 -#define LDO_SUSPEND 0x04 -#define LDO_OFF 0x06 -#define PWD_SUSPEND_EN 0xFE76 -#define LDO_PWR_SEL 0xFE78 - -#define L1SUB_CONFIG1 0xFE8D -#define L1SUB_CONFIG2 0xFE8E -#define L1SUB_AUTO_CFG 0x02 -#define L1SUB_CONFIG3 0xFE8F -#define L1OFF_MBIAS2_EN_5250 BIT(7) - -#define DUMMY_REG_RESET_0 0xFE90 - -#define AUTOLOAD_CFG_BASE 0xFF00 -#define PETXCFG 0xFF03 -#define FORCE_CLKREQ_DELINK_MASK BIT(7) -#define FORCE_CLKREQ_LOW 0x80 -#define FORCE_CLKREQ_HIGH 0x00 - -#define PM_CTRL1 0xFF44 -#define CD_RESUME_EN_MASK 0xF0 - -#define PM_CTRL2 0xFF45 -#define PM_CTRL3 0xFF46 -#define SDIO_SEND_PME_EN 0x80 -#define FORCE_RC_MODE_ON 0x40 -#define FORCE_RX50_LINK_ON 0x20 -#define D3_DELINK_MODE_EN 0x10 -#define USE_PESRTB_CTL_DELINK 0x08 -#define DELAY_PIN_WAKE 0x04 -#define RESET_PIN_WAKE 0x02 -#define PM_WAKE_EN 0x01 -#define PM_CTRL4 0xFF47 - -/* Memory mapping */ -#define SRAM_BASE 0xE600 -#define RBUF_BASE 0xF400 -#define PPBUF_BASE1 0xF800 -#define PPBUF_BASE2 0xFA00 -#define IMAGE_FLAG_ADDR0 0xCE80 -#define IMAGE_FLAG_ADDR1 0xCE81 - -#define RREF_CFG 0xFF6C -#define RREF_VBGSEL_MASK 0x38 -#define RREF_VBGSEL_1V25 0x28 - -#define OOBS_CONFIG 0xFF6E -#define OOBS_AUTOK_DIS 0x80 -#define OOBS_VAL_MASK 0x1F - -#define LDO_DV18_CFG 0xFF70 -#define LDO_DV18_SR_MASK 0xC0 -#define LDO_DV18_SR_DF 0x40 - -#define LDO_CONFIG2 0xFF71 -#define LDO_D3318_MASK 0x07 -#define LDO_D3318_33V 0x07 -#define LDO_D3318_18V 0x02 - -#define LDO_VCC_CFG0 0xFF72 -#define LDO_VCC_LMTVTH_MASK 0x30 -#define LDO_VCC_LMTVTH_2A 0x10 - -#define LDO_VCC_CFG1 0xFF73 -#define LDO_VCC_REF_TUNE_MASK 0x30 -#define LDO_VCC_REF_1V2 0x20 -#define LDO_VCC_TUNE_MASK 0x07 -#define LDO_VCC_1V8 0x04 -#define LDO_VCC_3V3 0x07 -#define LDO_VCC_LMT_EN 0x08 - -#define LDO_VIO_CFG 0xFF75 -#define LDO_VIO_SR_MASK 0xC0 -#define LDO_VIO_SR_DF 0x40 -#define LDO_VIO_REF_TUNE_MASK 0x30 -#define LDO_VIO_REF_1V2 0x20 -#define LDO_VIO_TUNE_MASK 0x07 -#define LDO_VIO_1V7 0x03 -#define LDO_VIO_1V8 0x04 -#define LDO_VIO_3V3 0x07 - -#define LDO_DV12S_CFG 0xFF76 -#define LDO_REF12_TUNE_MASK 0x18 -#define LDO_REF12_TUNE_DF 0x10 -#define LDO_D12_TUNE_MASK 0x07 -#define LDO_D12_TUNE_DF 0x04 - -#define LDO_AV12S_CFG 0xFF77 -#define LDO_AV12S_TUNE_MASK 0x07 -#define LDO_AV12S_TUNE_DF 0x04 - -#define SD40_LDO_CTL1 0xFE7D -#define SD40_VIO_TUNE_MASK 0x70 -#define SD40_VIO_TUNE_1V7 0x30 -#define SD_VIO_LDO_1V8 0x40 -#define SD_VIO_LDO_3V3 0x70 - -/* Phy register */ -#define PHY_PCR 0x00 -#define PHY_PCR_FORCE_CODE 0xB000 -#define PHY_PCR_OOBS_CALI_50 0x0800 -#define PHY_PCR_OOBS_VCM_08 0x0200 -#define PHY_PCR_OOBS_SEN_90 0x0040 -#define PHY_PCR_RSSI_EN 0x0002 -#define PHY_PCR_RX10K 0x0001 - -#define PHY_RCR0 0x01 -#define PHY_RCR1 0x02 -#define PHY_RCR1_ADP_TIME_4 0x0400 -#define PHY_RCR1_VCO_COARSE 0x001F -#define PHY_RCR1_INIT_27S 0x0A1F -#define PHY_SSCCR2 0x02 -#define PHY_SSCCR2_PLL_NCODE 0x0A00 -#define PHY_SSCCR2_TIME0 0x001C -#define PHY_SSCCR2_TIME2_WIDTH 0x0003 - -#define PHY_RCR2 0x03 -#define PHY_RCR2_EMPHASE_EN 0x8000 -#define PHY_RCR2_NADJR 0x4000 -#define PHY_RCR2_CDR_SR_2 0x0100 -#define PHY_RCR2_FREQSEL_12 0x0040 -#define PHY_RCR2_CDR_SC_12P 0x0010 -#define PHY_RCR2_CALIB_LATE 0x0002 -#define PHY_RCR2_INIT_27S 0xC152 -#define PHY_SSCCR3 0x03 -#define PHY_SSCCR3_STEP_IN 0x2740 -#define PHY_SSCCR3_CHECK_DELAY 0x0008 -#define _PHY_ANA03 0x03 -#define _PHY_ANA03_TIMER_MAX 0x2700 -#define _PHY_ANA03_OOBS_DEB_EN 0x0040 -#define _PHY_CMU_DEBUG_EN 0x0008 - -#define PHY_RTCR 0x04 -#define PHY_RDR 0x05 -#define PHY_RDR_RXDSEL_1_9 0x4000 -#define PHY_SSC_AUTO_PWD 0x0600 -#define PHY_TCR0 0x06 -#define PHY_TCR1 0x07 -#define PHY_TUNE 0x08 -#define PHY_TUNE_TUNEREF_1_0 0x4000 -#define PHY_TUNE_VBGSEL_1252 0x0C00 -#define PHY_TUNE_SDBUS_33 0x0200 -#define PHY_TUNE_TUNED18 0x01C0 -#define PHY_TUNE_TUNED12 0X0020 -#define PHY_TUNE_TUNEA12 0x0004 -#define PHY_TUNE_VOLTAGE_MASK 0xFC3F -#define PHY_TUNE_VOLTAGE_3V3 0x03C0 -#define PHY_TUNE_D18_1V8 0x0100 -#define PHY_TUNE_D18_1V7 0x0080 -#define PHY_ANA08 0x08 -#define PHY_ANA08_RX_EQ_DCGAIN 0x5000 -#define PHY_ANA08_SEL_RX_EN 0x0400 -#define PHY_ANA08_RX_EQ_VAL 0x03C0 -#define PHY_ANA08_SCP 0x0020 -#define PHY_ANA08_SEL_IPI 0x0004 - -#define PHY_IMR 0x09 -#define PHY_BPCR 0x0A -#define PHY_BPCR_IBRXSEL 0x0400 -#define PHY_BPCR_IBTXSEL 0x0100 -#define PHY_BPCR_IB_FILTER 0x0080 -#define PHY_BPCR_CMIRROR_EN 0x0040 - -#define PHY_BIST 0x0B -#define PHY_RAW_L 0x0C -#define PHY_RAW_H 0x0D -#define PHY_RAW_DATA 0x0E -#define PHY_HOST_CLK_CTRL 0x0F -#define PHY_DMR 0x10 -#define PHY_BACR 0x11 -#define PHY_BACR_BASIC_MASK 0xFFF3 -#define PHY_IER 0x12 -#define PHY_BCSR 0x13 -#define PHY_BPR 0x14 -#define PHY_BPNR2 0x15 -#define PHY_BPNR 0x16 -#define PHY_BRNR2 0x17 -#define PHY_BENR 0x18 -#define PHY_REV 0x19 -#define PHY_REV_RESV 0xE000 -#define PHY_REV_RXIDLE_LATCHED 0x1000 -#define PHY_REV_P1_EN 0x0800 -#define PHY_REV_RXIDLE_EN 0x0400 -#define PHY_REV_CLKREQ_TX_EN 0x0200 -#define PHY_REV_CLKREQ_RX_EN 0x0100 -#define PHY_REV_CLKREQ_DT_1_0 0x0040 -#define PHY_REV_STOP_CLKRD 0x0020 -#define PHY_REV_RX_PWST 0x0008 -#define PHY_REV_STOP_CLKWR 0x0004 -#define _PHY_REV0 0x19 -#define _PHY_REV0_FILTER_OUT 0x3800 -#define _PHY_REV0_CDR_BYPASS_PFD 0x0100 -#define _PHY_REV0_CDR_RX_IDLE_BYPASS 0x0002 - -#define PHY_FLD0 0x1A -#define PHY_ANA1A 0x1A -#define PHY_ANA1A_TXR_LOOPBACK 0x2000 -#define PHY_ANA1A_RXT_BIST 0x0500 -#define PHY_ANA1A_TXR_BIST 0x0040 -#define PHY_ANA1A_REV 0x0006 -#define PHY_FLD0_INIT_27S 0x2546 -#define PHY_FLD1 0x1B -#define PHY_FLD2 0x1C -#define PHY_FLD3 0x1D -#define PHY_FLD3_TIMER_4 0x0800 -#define PHY_FLD3_TIMER_6 0x0020 -#define PHY_FLD3_RXDELINK 0x0004 -#define PHY_FLD3_INIT_27S 0x0004 -#define PHY_ANA1D 0x1D -#define PHY_ANA1D_DEBUG_ADDR 0x0004 -#define _PHY_FLD0 0x1D -#define _PHY_FLD0_CLK_REQ_20C 0x8000 -#define _PHY_FLD0_RX_IDLE_EN 0x1000 -#define _PHY_FLD0_BIT_ERR_RSTN 0x0800 -#define _PHY_FLD0_BER_COUNT 0x01E0 -#define _PHY_FLD0_BER_TIMER 0x001E -#define _PHY_FLD0_CHECK_EN 0x0001 - -#define PHY_FLD4 0x1E -#define PHY_FLD4_FLDEN_SEL 0x4000 -#define PHY_FLD4_REQ_REF 0x2000 -#define PHY_FLD4_RXAMP_OFF 0x1000 -#define PHY_FLD4_REQ_ADDA 0x0800 -#define PHY_FLD4_BER_COUNT 0x00E0 -#define PHY_FLD4_BER_TIMER 0x000A -#define PHY_FLD4_BER_CHK_EN 0x0001 -#define PHY_FLD4_INIT_27S 0x5C7F -#define PHY_DIG1E 0x1E -#define PHY_DIG1E_REV 0x4000 -#define PHY_DIG1E_D0_X_D1 0x1000 -#define PHY_DIG1E_RX_ON_HOST 0x0800 -#define PHY_DIG1E_RCLK_REF_HOST 0x0400 -#define PHY_DIG1E_RCLK_TX_EN_KEEP 0x0040 -#define PHY_DIG1E_RCLK_TX_TERM_KEEP 0x0020 -#define PHY_DIG1E_RCLK_RX_EIDLE_ON 0x0010 -#define PHY_DIG1E_TX_TERM_KEEP 0x0008 -#define PHY_DIG1E_RX_TERM_KEEP 0x0004 -#define PHY_DIG1E_TX_EN_KEEP 0x0002 -#define PHY_DIG1E_RX_EN_KEEP 0x0001 -#define PHY_DUM_REG 0x1F - -#define PCR_ASPM_SETTING_REG1 0x160 -#define PCR_ASPM_SETTING_REG2 0x168 - -#define PCR_SETTING_REG1 0x724 -#define PCR_SETTING_REG2 0x814 -#define PCR_SETTING_REG3 0x747 - -#define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) - -#define RTS5227_DEVICE_ID 0x5227 -#define RTS_MAX_TIMES_FREQ_REDUCTION 8 - -struct rtsx_pcr; - -struct pcr_handle { - struct rtsx_pcr *pcr; -}; - -struct pcr_ops { - int (*write_phy)(struct rtsx_pcr *pcr, u8 addr, u16 val); - int (*read_phy)(struct rtsx_pcr *pcr, u8 addr, u16 *val); - int (*extra_init_hw)(struct rtsx_pcr *pcr); - int (*optimize_phy)(struct rtsx_pcr *pcr); - int (*turn_on_led)(struct rtsx_pcr *pcr); - int (*turn_off_led)(struct rtsx_pcr *pcr); - int (*enable_auto_blink)(struct rtsx_pcr *pcr); - int (*disable_auto_blink)(struct rtsx_pcr *pcr); - int (*card_power_on)(struct rtsx_pcr *pcr, int card); - int (*card_power_off)(struct rtsx_pcr *pcr, int card); - int (*switch_output_voltage)(struct rtsx_pcr *pcr, - u8 voltage); - unsigned int (*cd_deglitch)(struct rtsx_pcr *pcr); - int (*conv_clk_and_div_n)(int clk, int dir); - void (*fetch_vendor_settings)(struct rtsx_pcr *pcr); - void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state); - - void (*set_aspm)(struct rtsx_pcr *pcr, bool enable); - int (*set_ltr_latency)(struct rtsx_pcr *pcr, u32 latency); - int (*set_l1off_sub)(struct rtsx_pcr *pcr, u8 val); - void (*set_l1off_cfg_sub_d0)(struct rtsx_pcr *pcr, int active); - void (*full_on)(struct rtsx_pcr *pcr); - void (*power_saving)(struct rtsx_pcr *pcr); -}; - -enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; - -#define ASPM_L1_1_EN_MASK BIT(3) -#define ASPM_L1_2_EN_MASK BIT(2) -#define PM_L1_1_EN_MASK BIT(1) -#define PM_L1_2_EN_MASK BIT(0) - -#define ASPM_L1_1_EN BIT(0) -#define ASPM_L1_2_EN BIT(1) -#define PM_L1_1_EN BIT(2) -#define PM_L1_2_EN BIT(3) -#define LTR_L1SS_PWR_GATE_EN BIT(4) -#define L1_SNOOZE_TEST_EN BIT(5) -#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) - -enum dev_aspm_mode { - DEV_ASPM_DISABLE = 0, - DEV_ASPM_DYNAMIC, - DEV_ASPM_BACKDOOR, - DEV_ASPM_STATIC, -}; - -/* - * struct rtsx_cr_option - card reader option - * @dev_flags: device flags - * @force_clkreq_0: force clock request - * @ltr_en: enable ltr mode flag - * @ltr_enabled: ltr mode in configure space flag - * @ltr_active: ltr mode status - * @ltr_active_latency: ltr mode active latency - * @ltr_idle_latency: ltr mode idle latency - * @ltr_l1off_latency: ltr mode l1off latency - * @dev_aspm_mode: device aspm mode - * @l1_snooze_delay: l1 snooze delay - * @ltr_l1off_sspwrgate: ltr l1off sspwrgate - * @ltr_l1off_snooze_sspwrgate: ltr l1off snooze sspwrgate - */ -struct rtsx_cr_option { - u32 dev_flags; - bool force_clkreq_0; - bool ltr_en; - bool ltr_enabled; - bool ltr_active; - u32 ltr_active_latency; - u32 ltr_idle_latency; - u32 ltr_l1off_latency; - enum dev_aspm_mode dev_aspm_mode; - u32 l1_snooze_delay; - u8 ltr_l1off_sspwrgate; - u8 ltr_l1off_snooze_sspwrgate; -}; - -#define rtsx_set_dev_flag(cr, flag) \ - ((cr)->option.dev_flags |= (flag)) -#define rtsx_clear_dev_flag(cr, flag) \ - ((cr)->option.dev_flags &= ~(flag)) -#define rtsx_check_dev_flag(cr, flag) \ - ((cr)->option.dev_flags & (flag)) - -struct rtsx_pcr { - struct pci_dev *pci; - unsigned int id; - int pcie_cap; - struct rtsx_cr_option option; - - /* pci resources */ - unsigned long addr; - void __iomem *remap_addr; - int irq; - - /* host reserved buffer */ - void *rtsx_resv_buf; - dma_addr_t rtsx_resv_buf_addr; - - void *host_cmds_ptr; - dma_addr_t host_cmds_addr; - int ci; - - void *host_sg_tbl_ptr; - dma_addr_t host_sg_tbl_addr; - int sgi; - - u32 bier; - char trans_result; - - unsigned int card_inserted; - unsigned int card_removed; - unsigned int card_exist; - - struct delayed_work carddet_work; - struct delayed_work idle_work; - - spinlock_t lock; - struct mutex pcr_mutex; - struct completion *done; - struct completion *finish_me; - - unsigned int cur_clock; - bool remove_pci; - bool msi_en; - -#define EXTRA_CAPS_SD_SDR50 (1 << 0) -#define EXTRA_CAPS_SD_SDR104 (1 << 1) -#define EXTRA_CAPS_SD_DDR50 (1 << 2) -#define EXTRA_CAPS_MMC_HSDDR (1 << 3) -#define EXTRA_CAPS_MMC_HS200 (1 << 4) -#define EXTRA_CAPS_MMC_8BIT (1 << 5) - u32 extra_caps; - -#define IC_VER_A 0 -#define IC_VER_B 1 -#define IC_VER_C 2 -#define IC_VER_D 3 - u8 ic_version; - - u8 sd30_drive_sel_1v8; - u8 sd30_drive_sel_3v3; - u8 card_drive_sel; -#define ASPM_L1_EN 0x02 - u8 aspm_en; - bool aspm_enabled; - -#define PCR_MS_PMOS (1 << 0) -#define PCR_REVERSE_SOCKET (1 << 1) - u32 flags; - - u32 tx_initial_phase; - u32 rx_initial_phase; - - const u32 *sd_pull_ctl_enable_tbl; - const u32 *sd_pull_ctl_disable_tbl; - const u32 *ms_pull_ctl_enable_tbl; - const u32 *ms_pull_ctl_disable_tbl; - - const struct pcr_ops *ops; - enum PDEV_STAT state; - - u16 reg_pm_ctrl3; - - int num_slots; - struct rtsx_slot *slots; - - u8 dma_error_count; -}; - -#define PID_524A 0x524A -#define PID_5249 0x5249 -#define PID_5250 0x5250 -#define PID_525A 0x525A - -#define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) -#define PCI_VID(pcr) ((pcr)->pci->vendor) -#define PCI_PID(pcr) ((pcr)->pci->device) -#define is_version(pcr, pid, ver) \ - (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version == (ver)) -#define pcr_dbg(pcr, fmt, arg...) \ - dev_dbg(&(pcr)->pci->dev, fmt, ##arg) - -#define SDR104_PHASE(val) ((val) & 0xFF) -#define SDR50_PHASE(val) (((val) >> 8) & 0xFF) -#define DDR50_PHASE(val) (((val) >> 16) & 0xFF) -#define SDR104_TX_PHASE(pcr) SDR104_PHASE((pcr)->tx_initial_phase) -#define SDR50_TX_PHASE(pcr) SDR50_PHASE((pcr)->tx_initial_phase) -#define DDR50_TX_PHASE(pcr) DDR50_PHASE((pcr)->tx_initial_phase) -#define SDR104_RX_PHASE(pcr) SDR104_PHASE((pcr)->rx_initial_phase) -#define SDR50_RX_PHASE(pcr) SDR50_PHASE((pcr)->rx_initial_phase) -#define DDR50_RX_PHASE(pcr) DDR50_PHASE((pcr)->rx_initial_phase) -#define SET_CLOCK_PHASE(sdr104, sdr50, ddr50) \ - (((ddr50) << 16) | ((sdr50) << 8) | (sdr104)) - -void rtsx_pci_start_run(struct rtsx_pcr *pcr); -int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data); -int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data); -int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); -int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); -void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr); -void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, - u8 cmd_type, u16 reg_addr, u8 mask, u8 data); -void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); -int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); -int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read, int timeout); -int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read); -void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read); -int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int count, bool read, int timeout); -int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); -int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); -int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); -int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card); -int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, - u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); -int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card); -int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card); -int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card); -int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage); -unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr); -void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr); - -static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr) -{ - return (u8 *)(pcr->host_cmds_ptr); -} - -static inline int rtsx_pci_update_cfg_byte(struct rtsx_pcr *pcr, int addr, - u8 mask, u8 append) -{ - int err; - u8 val; - - err = pci_read_config_byte(pcr->pci, addr, &val); - if (err < 0) - return err; - return pci_write_config_byte(pcr->pci, addr, (val & mask) | append); -} - -static inline void rtsx_pci_write_be32(struct rtsx_pcr *pcr, u16 reg, u32 val) -{ - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg, 0xFF, val >> 24); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 1, 0xFF, val >> 16); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 2, 0xFF, val >> 8); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 3, 0xFF, val); -} - -static inline int rtsx_pci_update_phy(struct rtsx_pcr *pcr, u8 addr, - u16 mask, u16 append) -{ - int err; - u16 val; - - err = rtsx_pci_read_phy_register(pcr, addr, &val); - if (err < 0) - return err; - - return rtsx_pci_write_phy_register(pcr, addr, (val & mask) | append); -} - -#endif diff --git a/include/linux/mfd/rtsx_usb.h b/include/linux/mfd/rtsx_usb.h deleted file mode 100644 index c446e4fd6b5c..000000000000 --- a/include/linux/mfd/rtsx_usb.h +++ /dev/null @@ -1,628 +0,0 @@ -/* Driver for Realtek RTS5139 USB card reader - * - * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, see . - * - * Author: - * Roger Tseng - */ - -#ifndef __RTSX_USB_H -#define __RTSX_USB_H - -#include - -/* related module names */ -#define RTSX_USB_SD_CARD 0 -#define RTSX_USB_MS_CARD 1 - -/* endpoint numbers */ -#define EP_BULK_OUT 1 -#define EP_BULK_IN 2 -#define EP_INTR_IN 3 - -/* USB vendor requests */ -#define RTSX_USB_REQ_REG_OP 0x00 -#define RTSX_USB_REQ_POLL 0x02 - -/* miscellaneous parameters */ -#define MIN_DIV_N 60 -#define MAX_DIV_N 120 - -#define MAX_PHASE 15 -#define RX_TUNING_CNT 3 - -#define QFN24 0 -#define LQFP48 1 -#define CHECK_PKG(ucr, pkg) ((ucr)->package == (pkg)) - -/* data structures */ -struct rtsx_ucr { - u16 vendor_id; - u16 product_id; - - int package; - u8 ic_version; - bool is_rts5179; - - unsigned int cur_clk; - - u8 *cmd_buf; - unsigned int cmd_idx; - u8 *rsp_buf; - - struct usb_device *pusb_dev; - struct usb_interface *pusb_intf; - struct usb_sg_request current_sg; - unsigned char *iobuf; - dma_addr_t iobuf_dma; - - struct timer_list sg_timer; - struct mutex dev_mutex; -}; - -/* buffer size */ -#define IOBUF_SIZE 1024 - -/* prototypes of exported functions */ -extern int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status); - -extern int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data); -extern int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, - u8 data); - -extern int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, - u8 data); -extern int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, - u8 *data); - -extern void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, - u16 reg_addr, u8 mask, u8 data); -extern int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout); -extern int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout); -extern int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, - void *buf, unsigned int len, int use_sg, - unsigned int *act_len, int timeout); - -extern int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); -extern int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); -extern int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, - u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); -extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); - -/* card status */ -#define SD_CD 0x01 -#define MS_CD 0x02 -#define XD_CD 0x04 -#define CD_MASK (SD_CD | MS_CD | XD_CD) -#define SD_WP 0x08 - -/* reader command field offset & parameters */ -#define READ_REG_CMD 0 -#define WRITE_REG_CMD 1 -#define CHECK_REG_CMD 2 - -#define PACKET_TYPE 4 -#define CNT_H 5 -#define CNT_L 6 -#define STAGE_FLAG 7 -#define CMD_OFFSET 8 -#define SEQ_WRITE_DATA_OFFSET 12 - -#define BATCH_CMD 0 -#define SEQ_READ 1 -#define SEQ_WRITE 2 - -#define STAGE_R 0x01 -#define STAGE_DI 0x02 -#define STAGE_DO 0x04 -#define STAGE_MS_STATUS 0x08 -#define STAGE_XD_STATUS 0x10 -#define MODE_C 0x00 -#define MODE_CR (STAGE_R) -#define MODE_CDIR (STAGE_R | STAGE_DI) -#define MODE_CDOR (STAGE_R | STAGE_DO) - -#define EP0_OP_SHIFT 14 -#define EP0_READ_REG_CMD 2 -#define EP0_WRITE_REG_CMD 3 - -#define rtsx_usb_cmd_hdr_tag(ucr) \ - do { \ - ucr->cmd_buf[0] = 'R'; \ - ucr->cmd_buf[1] = 'T'; \ - ucr->cmd_buf[2] = 'C'; \ - ucr->cmd_buf[3] = 'R'; \ - } while (0) - -static inline void rtsx_usb_init_cmd(struct rtsx_ucr *ucr) -{ - rtsx_usb_cmd_hdr_tag(ucr); - ucr->cmd_idx = 0; - ucr->cmd_buf[PACKET_TYPE] = BATCH_CMD; -} - -/* internal register address */ -#define FPDCTL 0xFC00 -#define SSC_DIV_N_0 0xFC07 -#define SSC_CTL1 0xFC09 -#define SSC_CTL2 0xFC0A -#define CFG_MODE 0xFC0E -#define CFG_MODE_1 0xFC0F -#define RCCTL 0xFC14 -#define SOF_WDOG 0xFC28 -#define SYS_DUMMY0 0xFC30 - -#define MS_BLKEND 0xFD30 -#define MS_READ_START 0xFD31 -#define MS_READ_COUNT 0xFD32 -#define MS_WRITE_START 0xFD33 -#define MS_WRITE_COUNT 0xFD34 -#define MS_COMMAND 0xFD35 -#define MS_OLD_BLOCK_0 0xFD36 -#define MS_OLD_BLOCK_1 0xFD37 -#define MS_NEW_BLOCK_0 0xFD38 -#define MS_NEW_BLOCK_1 0xFD39 -#define MS_LOG_BLOCK_0 0xFD3A -#define MS_LOG_BLOCK_1 0xFD3B -#define MS_BUS_WIDTH 0xFD3C -#define MS_PAGE_START 0xFD3D -#define MS_PAGE_LENGTH 0xFD3E -#define MS_CFG 0xFD40 -#define MS_TPC 0xFD41 -#define MS_TRANS_CFG 0xFD42 -#define MS_TRANSFER 0xFD43 -#define MS_INT_REG 0xFD44 -#define MS_BYTE_CNT 0xFD45 -#define MS_SECTOR_CNT_L 0xFD46 -#define MS_SECTOR_CNT_H 0xFD47 -#define MS_DBUS_H 0xFD48 - -#define CARD_DMA1_CTL 0xFD5C -#define CARD_PULL_CTL1 0xFD60 -#define CARD_PULL_CTL2 0xFD61 -#define CARD_PULL_CTL3 0xFD62 -#define CARD_PULL_CTL4 0xFD63 -#define CARD_PULL_CTL5 0xFD64 -#define CARD_PULL_CTL6 0xFD65 -#define CARD_EXIST 0xFD6F -#define CARD_INT_PEND 0xFD71 - -#define LDO_POWER_CFG 0xFD7B - -#define SD_CFG1 0xFDA0 -#define SD_CFG2 0xFDA1 -#define SD_CFG3 0xFDA2 -#define SD_STAT1 0xFDA3 -#define SD_STAT2 0xFDA4 -#define SD_BUS_STAT 0xFDA5 -#define SD_PAD_CTL 0xFDA6 -#define SD_SAMPLE_POINT_CTL 0xFDA7 -#define SD_PUSH_POINT_CTL 0xFDA8 -#define SD_CMD0 0xFDA9 -#define SD_CMD1 0xFDAA -#define SD_CMD2 0xFDAB -#define SD_CMD3 0xFDAC -#define SD_CMD4 0xFDAD -#define SD_CMD5 0xFDAE -#define SD_BYTE_CNT_L 0xFDAF -#define SD_BYTE_CNT_H 0xFDB0 -#define SD_BLOCK_CNT_L 0xFDB1 -#define SD_BLOCK_CNT_H 0xFDB2 -#define SD_TRANSFER 0xFDB3 -#define SD_CMD_STATE 0xFDB5 -#define SD_DATA_STATE 0xFDB6 -#define SD_VPCLK0_CTL 0xFC2A -#define SD_VPCLK1_CTL 0xFC2B -#define SD_DCMPS0_CTL 0xFC2C -#define SD_DCMPS1_CTL 0xFC2D - -#define CARD_DMA1_CTL 0xFD5C - -#define HW_VERSION 0xFC01 - -#define SSC_CLK_FPGA_SEL 0xFC02 -#define CLK_DIV 0xFC03 -#define SFSM_ED 0xFC04 - -#define CD_DEGLITCH_WIDTH 0xFC20 -#define CD_DEGLITCH_EN 0xFC21 -#define AUTO_DELINK_EN 0xFC23 - -#define FPGA_PULL_CTL 0xFC1D -#define CARD_CLK_SOURCE 0xFC2E - -#define CARD_SHARE_MODE 0xFD51 -#define CARD_DRIVE_SEL 0xFD52 -#define CARD_STOP 0xFD53 -#define CARD_OE 0xFD54 -#define CARD_AUTO_BLINK 0xFD55 -#define CARD_GPIO 0xFD56 -#define SD30_DRIVE_SEL 0xFD57 - -#define CARD_DATA_SOURCE 0xFD5D -#define CARD_SELECT 0xFD5E - -#define CARD_CLK_EN 0xFD79 -#define CARD_PWR_CTL 0xFD7A - -#define OCPCTL 0xFD80 -#define OCPPARA1 0xFD81 -#define OCPPARA2 0xFD82 -#define OCPSTAT 0xFD83 - -#define HS_USB_STAT 0xFE01 -#define HS_VCONTROL 0xFE26 -#define HS_VSTAIN 0xFE27 -#define HS_VLOADM 0xFE28 -#define HS_VSTAOUT 0xFE29 - -#define MC_IRQ 0xFF00 -#define MC_IRQEN 0xFF01 -#define MC_FIFO_CTL 0xFF02 -#define MC_FIFO_BC0 0xFF03 -#define MC_FIFO_BC1 0xFF04 -#define MC_FIFO_STAT 0xFF05 -#define MC_FIFO_MODE 0xFF06 -#define MC_FIFO_RD_PTR0 0xFF07 -#define MC_FIFO_RD_PTR1 0xFF08 -#define MC_DMA_CTL 0xFF10 -#define MC_DMA_TC0 0xFF11 -#define MC_DMA_TC1 0xFF12 -#define MC_DMA_TC2 0xFF13 -#define MC_DMA_TC3 0xFF14 -#define MC_DMA_RST 0xFF15 - -#define RBUF_SIZE_MASK 0xFBFF -#define RBUF_BASE 0xF000 -#define PPBUF_BASE1 0xF800 -#define PPBUF_BASE2 0xFA00 - -/* internal register value macros */ -#define POWER_OFF 0x03 -#define PARTIAL_POWER_ON 0x02 -#define POWER_ON 0x00 -#define POWER_MASK 0x03 -#define LDO3318_PWR_MASK 0x0C -#define LDO_ON 0x00 -#define LDO_SUSPEND 0x08 -#define LDO_OFF 0x0C -#define DV3318_AUTO_PWR_OFF 0x10 -#define FORCE_LDO_POWERB 0x60 - -/* LDO_POWER_CFG */ -#define TUNE_SD18_MASK 0x1C -#define TUNE_SD18_1V7 0x00 -#define TUNE_SD18_1V8 (0x01 << 2) -#define TUNE_SD18_1V9 (0x02 << 2) -#define TUNE_SD18_2V0 (0x03 << 2) -#define TUNE_SD18_2V7 (0x04 << 2) -#define TUNE_SD18_2V8 (0x05 << 2) -#define TUNE_SD18_2V9 (0x06 << 2) -#define TUNE_SD18_3V3 (0x07 << 2) - -/* CLK_DIV */ -#define CLK_CHANGE 0x80 -#define CLK_DIV_1 0x00 -#define CLK_DIV_2 0x01 -#define CLK_DIV_4 0x02 -#define CLK_DIV_8 0x03 - -#define SSC_POWER_MASK 0x01 -#define SSC_POWER_DOWN 0x01 -#define SSC_POWER_ON 0x00 - -#define FPGA_VER 0x80 -#define HW_VER_MASK 0x0F - -#define EXTEND_DMA1_ASYNC_SIGNAL 0x02 - -/* CFG_MODE*/ -#define XTAL_FREE 0x80 -#define CLK_MODE_MASK 0x03 -#define CLK_MODE_12M_XTAL 0x00 -#define CLK_MODE_NON_XTAL 0x01 -#define CLK_MODE_24M_OSC 0x02 -#define CLK_MODE_48M_OSC 0x03 - -/* CFG_MODE_1*/ -#define RTS5179 0x02 - -#define NYET_EN 0x01 -#define NYET_MSAK 0x01 - -#define SD30_DRIVE_MASK 0x07 -#define SD20_DRIVE_MASK 0x03 - -#define DISABLE_SD_CD 0x08 -#define DISABLE_MS_CD 0x10 -#define DISABLE_XD_CD 0x20 -#define SD_CD_DEGLITCH_EN 0x01 -#define MS_CD_DEGLITCH_EN 0x02 -#define XD_CD_DEGLITCH_EN 0x04 - -#define CARD_SHARE_LQFP48 0x04 -#define CARD_SHARE_QFN24 0x00 -#define CARD_SHARE_LQFP_SEL 0x04 -#define CARD_SHARE_XD 0x00 -#define CARD_SHARE_SD 0x01 -#define CARD_SHARE_MS 0x02 -#define CARD_SHARE_MASK 0x03 - - -/* SD30_DRIVE_SEL */ -#define DRIVER_TYPE_A 0x05 -#define DRIVER_TYPE_B 0x03 -#define DRIVER_TYPE_C 0x02 -#define DRIVER_TYPE_D 0x01 - -/* SD_BUS_STAT */ -#define SD_CLK_TOGGLE_EN 0x80 -#define SD_CLK_FORCE_STOP 0x40 -#define SD_DAT3_STATUS 0x10 -#define SD_DAT2_STATUS 0x08 -#define SD_DAT1_STATUS 0x04 -#define SD_DAT0_STATUS 0x02 -#define SD_CMD_STATUS 0x01 - -/* SD_PAD_CTL */ -#define SD_IO_USING_1V8 0x80 -#define SD_IO_USING_3V3 0x7F -#define TYPE_A_DRIVING 0x00 -#define TYPE_B_DRIVING 0x01 -#define TYPE_C_DRIVING 0x02 -#define TYPE_D_DRIVING 0x03 - -/* CARD_CLK_EN */ -#define SD_CLK_EN 0x04 -#define MS_CLK_EN 0x08 - -/* CARD_SELECT */ -#define SD_MOD_SEL 2 -#define MS_MOD_SEL 3 - -/* CARD_SHARE_MODE */ -#define CARD_SHARE_LQFP48 0x04 -#define CARD_SHARE_QFN24 0x00 -#define CARD_SHARE_LQFP_SEL 0x04 -#define CARD_SHARE_XD 0x00 -#define CARD_SHARE_SD 0x01 -#define CARD_SHARE_MS 0x02 -#define CARD_SHARE_MASK 0x03 - -/* SSC_CTL1 */ -#define SSC_RSTB 0x80 -#define SSC_8X_EN 0x40 -#define SSC_FIX_FRAC 0x20 -#define SSC_SEL_1M 0x00 -#define SSC_SEL_2M 0x08 -#define SSC_SEL_4M 0x10 -#define SSC_SEL_8M 0x18 - -/* SSC_CTL2 */ -#define SSC_DEPTH_MASK 0x03 -#define SSC_DEPTH_DISALBE 0x00 -#define SSC_DEPTH_2M 0x01 -#define SSC_DEPTH_1M 0x02 -#define SSC_DEPTH_512K 0x03 - -/* SD_VPCLK0_CTL */ -#define PHASE_CHANGE 0x80 -#define PHASE_NOT_RESET 0x40 - -/* SD_TRANSFER */ -#define SD_TRANSFER_START 0x80 -#define SD_TRANSFER_END 0x40 -#define SD_STAT_IDLE 0x20 -#define SD_TRANSFER_ERR 0x10 -#define SD_TM_NORMAL_WRITE 0x00 -#define SD_TM_AUTO_WRITE_3 0x01 -#define SD_TM_AUTO_WRITE_4 0x02 -#define SD_TM_AUTO_READ_3 0x05 -#define SD_TM_AUTO_READ_4 0x06 -#define SD_TM_CMD_RSP 0x08 -#define SD_TM_AUTO_WRITE_1 0x09 -#define SD_TM_AUTO_WRITE_2 0x0A -#define SD_TM_NORMAL_READ 0x0C -#define SD_TM_AUTO_READ_1 0x0D -#define SD_TM_AUTO_READ_2 0x0E -#define SD_TM_AUTO_TUNING 0x0F - -/* SD_CFG1 */ -#define SD_CLK_DIVIDE_0 0x00 -#define SD_CLK_DIVIDE_256 0xC0 -#define SD_CLK_DIVIDE_128 0x80 -#define SD_CLK_DIVIDE_MASK 0xC0 -#define SD_BUS_WIDTH_1BIT 0x00 -#define SD_BUS_WIDTH_4BIT 0x01 -#define SD_BUS_WIDTH_8BIT 0x02 -#define SD_ASYNC_FIFO_RST 0x10 -#define SD_20_MODE 0x00 -#define SD_DDR_MODE 0x04 -#define SD_30_MODE 0x08 - -/* SD_CFG2 */ -#define SD_CALCULATE_CRC7 0x00 -#define SD_NO_CALCULATE_CRC7 0x80 -#define SD_CHECK_CRC16 0x00 -#define SD_NO_CHECK_CRC16 0x40 -#define SD_WAIT_CRC_TO_EN 0x20 -#define SD_WAIT_BUSY_END 0x08 -#define SD_NO_WAIT_BUSY_END 0x00 -#define SD_CHECK_CRC7 0x00 -#define SD_NO_CHECK_CRC7 0x04 -#define SD_RSP_LEN_0 0x00 -#define SD_RSP_LEN_6 0x01 -#define SD_RSP_LEN_17 0x02 -#define SD_RSP_TYPE_R0 0x04 -#define SD_RSP_TYPE_R1 0x01 -#define SD_RSP_TYPE_R1b 0x09 -#define SD_RSP_TYPE_R2 0x02 -#define SD_RSP_TYPE_R3 0x05 -#define SD_RSP_TYPE_R4 0x05 -#define SD_RSP_TYPE_R5 0x01 -#define SD_RSP_TYPE_R6 0x01 -#define SD_RSP_TYPE_R7 0x01 - -/* SD_STAT1 */ -#define SD_CRC7_ERR 0x80 -#define SD_CRC16_ERR 0x40 -#define SD_CRC_WRITE_ERR 0x20 -#define SD_CRC_WRITE_ERR_MASK 0x1C -#define GET_CRC_TIME_OUT 0x02 -#define SD_TUNING_COMPARE_ERR 0x01 - -/* SD_DATA_STATE */ -#define SD_DATA_IDLE 0x80 - -/* CARD_DATA_SOURCE */ -#define PINGPONG_BUFFER 0x01 -#define RING_BUFFER 0x00 - -/* CARD_OE */ -#define SD_OUTPUT_EN 0x04 -#define MS_OUTPUT_EN 0x08 - -/* CARD_STOP */ -#define SD_STOP 0x04 -#define MS_STOP 0x08 -#define SD_CLR_ERR 0x40 -#define MS_CLR_ERR 0x80 - -/* CARD_CLK_SOURCE */ -#define CRC_FIX_CLK (0x00 << 0) -#define CRC_VAR_CLK0 (0x01 << 0) -#define CRC_VAR_CLK1 (0x02 << 0) -#define SD30_FIX_CLK (0x00 << 2) -#define SD30_VAR_CLK0 (0x01 << 2) -#define SD30_VAR_CLK1 (0x02 << 2) -#define SAMPLE_FIX_CLK (0x00 << 4) -#define SAMPLE_VAR_CLK0 (0x01 << 4) -#define SAMPLE_VAR_CLK1 (0x02 << 4) - -/* SD_SAMPLE_POINT_CTL */ -#define DDR_FIX_RX_DAT 0x00 -#define DDR_VAR_RX_DAT 0x80 -#define DDR_FIX_RX_DAT_EDGE 0x00 -#define DDR_FIX_RX_DAT_14_DELAY 0x40 -#define DDR_FIX_RX_CMD 0x00 -#define DDR_VAR_RX_CMD 0x20 -#define DDR_FIX_RX_CMD_POS_EDGE 0x00 -#define DDR_FIX_RX_CMD_14_DELAY 0x10 -#define SD20_RX_POS_EDGE 0x00 -#define SD20_RX_14_DELAY 0x08 -#define SD20_RX_SEL_MASK 0x08 - -/* SD_PUSH_POINT_CTL */ -#define DDR_FIX_TX_CMD_DAT 0x00 -#define DDR_VAR_TX_CMD_DAT 0x80 -#define DDR_FIX_TX_DAT_14_TSU 0x00 -#define DDR_FIX_TX_DAT_12_TSU 0x40 -#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 -#define DDR_FIX_TX_CMD_14_AHEAD 0x20 -#define SD20_TX_NEG_EDGE 0x00 -#define SD20_TX_14_AHEAD 0x10 -#define SD20_TX_SEL_MASK 0x10 -#define DDR_VAR_SDCLK_POL_SWAP 0x01 - -/* MS_CFG */ -#define SAMPLE_TIME_RISING 0x00 -#define SAMPLE_TIME_FALLING 0x80 -#define PUSH_TIME_DEFAULT 0x00 -#define PUSH_TIME_ODD 0x40 -#define NO_EXTEND_TOGGLE 0x00 -#define EXTEND_TOGGLE_CHK 0x20 -#define MS_BUS_WIDTH_1 0x00 -#define MS_BUS_WIDTH_4 0x10 -#define MS_BUS_WIDTH_8 0x18 -#define MS_2K_SECTOR_MODE 0x04 -#define MS_512_SECTOR_MODE 0x00 -#define MS_TOGGLE_TIMEOUT_EN 0x00 -#define MS_TOGGLE_TIMEOUT_DISEN 0x01 -#define MS_NO_CHECK_INT 0x02 - -/* MS_TRANS_CFG */ -#define WAIT_INT 0x80 -#define NO_WAIT_INT 0x00 -#define NO_AUTO_READ_INT_REG 0x00 -#define AUTO_READ_INT_REG 0x40 -#define MS_CRC16_ERR 0x20 -#define MS_RDY_TIMEOUT 0x10 -#define MS_INT_CMDNK 0x08 -#define MS_INT_BREQ 0x04 -#define MS_INT_ERR 0x02 -#define MS_INT_CED 0x01 - -/* MS_TRANSFER */ -#define MS_TRANSFER_START 0x80 -#define MS_TRANSFER_END 0x40 -#define MS_TRANSFER_ERR 0x20 -#define MS_BS_STATE 0x10 -#define MS_TM_READ_BYTES 0x00 -#define MS_TM_NORMAL_READ 0x01 -#define MS_TM_WRITE_BYTES 0x04 -#define MS_TM_NORMAL_WRITE 0x05 -#define MS_TM_AUTO_READ 0x08 -#define MS_TM_AUTO_WRITE 0x0C -#define MS_TM_SET_CMD 0x06 -#define MS_TM_COPY_PAGE 0x07 -#define MS_TM_MULTI_READ 0x02 -#define MS_TM_MULTI_WRITE 0x03 - -/* MC_FIFO_CTL */ -#define FIFO_FLUSH 0x01 - -/* MC_DMA_RST */ -#define DMA_RESET 0x01 - -/* MC_DMA_CTL */ -#define DMA_TC_EQ_0 0x80 -#define DMA_DIR_TO_CARD 0x00 -#define DMA_DIR_FROM_CARD 0x02 -#define DMA_EN 0x01 -#define DMA_128 (0 << 2) -#define DMA_256 (1 << 2) -#define DMA_512 (2 << 2) -#define DMA_1024 (3 << 2) -#define DMA_PACK_SIZE_MASK 0x0C - -/* CARD_INT_PEND */ -#define XD_INT 0x10 -#define MS_INT 0x08 -#define SD_INT 0x04 - -/* LED operations*/ -static inline int rtsx_usb_turn_on_led(struct rtsx_ucr *ucr) -{ - return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x02); -} - -static inline int rtsx_usb_turn_off_led(struct rtsx_ucr *ucr) -{ - return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x03); -} - -/* HW error clearing */ -static inline void rtsx_usb_clear_fsm_err(struct rtsx_ucr *ucr) -{ - rtsx_usb_ep0_write_register(ucr, SFSM_ED, 0xf8, 0xf8); -} - -static inline void rtsx_usb_clear_dma_err(struct rtsx_ucr *ucr) -{ - rtsx_usb_ep0_write_register(ucr, MC_FIFO_CTL, - FIFO_FLUSH, FIFO_FLUSH); - rtsx_usb_ep0_write_register(ucr, MC_DMA_RST, DMA_RESET, DMA_RESET); -} -#endif /* __RTS51139_H */ diff --git a/include/linux/rtsx_common.h b/include/linux/rtsx_common.h new file mode 100644 index 000000000000..443176ee1ab0 --- /dev/null +++ b/include/linux/rtsx_common.h @@ -0,0 +1,50 @@ +/* Driver for Realtek driver-based card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#ifndef __RTSX_COMMON_H +#define __RTSX_COMMON_H + +#define DRV_NAME_RTSX_PCI "rtsx_pci" +#define DRV_NAME_RTSX_PCI_SDMMC "rtsx_pci_sdmmc" +#define DRV_NAME_RTSX_PCI_MS "rtsx_pci_ms" + +#define RTSX_REG_PAIR(addr, val) (((u32)(addr) << 16) | (u8)(val)) + +#define RTSX_SSC_DEPTH_4M 0x01 +#define RTSX_SSC_DEPTH_2M 0x02 +#define RTSX_SSC_DEPTH_1M 0x03 +#define RTSX_SSC_DEPTH_500K 0x04 +#define RTSX_SSC_DEPTH_250K 0x05 + +#define RTSX_SD_CARD 0 +#define RTSX_MS_CARD 1 + +#define CLK_TO_DIV_N 0 +#define DIV_N_TO_CLK 1 + +struct platform_device; + +struct rtsx_slot { + struct platform_device *p_dev; + void (*card_event)(struct platform_device *p_dev); +}; + +#endif diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h new file mode 100644 index 000000000000..82abac70b3db --- /dev/null +++ b/include/linux/rtsx_pci.h @@ -0,0 +1,1141 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#ifndef __RTSX_PCI_H +#define __RTSX_PCI_H + +#include +#include +#include + +#define MAX_RW_REG_CNT 1024 + +#define RTSX_HCBAR 0x00 +#define RTSX_HCBCTLR 0x04 +#define STOP_CMD (0x01 << 28) +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + +#define RTSX_HDBAR 0x08 +#define SG_INT 0x04 +#define SG_END 0x02 +#define SG_VALID 0x01 +#define SG_NO_OP 0x00 +#define SG_TRANS_DATA (0x02 << 4) +#define SG_LINK_DESC (0x03 << 4) +#define RTSX_HDBCTLR 0x0C +#define SDMA_MODE 0x00 +#define ADMA_MODE (0x02 << 26) +#define STOP_DMA (0x01 << 28) +#define TRIG_DMA (0x01 << 31) + +#define RTSX_HAIMR 0x10 +#define HAIMR_TRANS_START (0x01 << 31) +#define HAIMR_READ 0x00 +#define HAIMR_WRITE (0x01 << 30) +#define HAIMR_READ_START (HAIMR_TRANS_START | HAIMR_READ) +#define HAIMR_WRITE_START (HAIMR_TRANS_START | HAIMR_WRITE) +#define HAIMR_TRANS_END (HAIMR_TRANS_START) + +#define RTSX_BIPR 0x14 +#define CMD_DONE_INT (1 << 31) +#define DATA_DONE_INT (1 << 30) +#define TRANS_OK_INT (1 << 29) +#define TRANS_FAIL_INT (1 << 28) +#define XD_INT (1 << 27) +#define MS_INT (1 << 26) +#define SD_INT (1 << 25) +#define GPIO0_INT (1 << 24) +#define OC_INT (1 << 23) +#define SD_WRITE_PROTECT (1 << 19) +#define XD_EXIST (1 << 18) +#define MS_EXIST (1 << 17) +#define SD_EXIST (1 << 16) +#define DELINK_INT GPIO0_INT +#define MS_OC_INT (1 << 23) +#define SD_OC_INT (1 << 22) + +#define CARD_INT (XD_INT | MS_INT | SD_INT) +#define NEED_COMPLETE_INT (DATA_DONE_INT | TRANS_OK_INT | TRANS_FAIL_INT) +#define RTSX_INT (CMD_DONE_INT | NEED_COMPLETE_INT | \ + CARD_INT | GPIO0_INT | OC_INT) +#define CARD_EXIST (XD_EXIST | MS_EXIST | SD_EXIST) + +#define RTSX_BIER 0x18 +#define CMD_DONE_INT_EN (1 << 31) +#define DATA_DONE_INT_EN (1 << 30) +#define TRANS_OK_INT_EN (1 << 29) +#define TRANS_FAIL_INT_EN (1 << 28) +#define XD_INT_EN (1 << 27) +#define MS_INT_EN (1 << 26) +#define SD_INT_EN (1 << 25) +#define GPIO0_INT_EN (1 << 24) +#define OC_INT_EN (1 << 23) +#define DELINK_INT_EN GPIO0_INT_EN +#define MS_OC_INT_EN (1 << 23) +#define SD_OC_INT_EN (1 << 22) + + +/* + * macros for easy use + */ +#define rtsx_pci_writel(pcr, reg, value) \ + iowrite32(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readl(pcr, reg) \ + ioread32((pcr)->remap_addr + reg) +#define rtsx_pci_writew(pcr, reg, value) \ + iowrite16(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readw(pcr, reg) \ + ioread16((pcr)->remap_addr + reg) +#define rtsx_pci_writeb(pcr, reg, value) \ + iowrite8(value, (pcr)->remap_addr + reg) +#define rtsx_pci_readb(pcr, reg) \ + ioread8((pcr)->remap_addr + reg) + +#define rtsx_pci_read_config_byte(pcr, where, val) \ + pci_read_config_byte((pcr)->pci, where, val) + +#define rtsx_pci_write_config_byte(pcr, where, val) \ + pci_write_config_byte((pcr)->pci, where, val) + +#define rtsx_pci_read_config_dword(pcr, where, val) \ + pci_read_config_dword((pcr)->pci, where, val) + +#define rtsx_pci_write_config_dword(pcr, where, val) \ + pci_write_config_dword((pcr)->pci, where, val) + +#define STATE_TRANS_NONE 0 +#define STATE_TRANS_CMD 1 +#define STATE_TRANS_BUF 2 +#define STATE_TRANS_SG 3 + +#define TRANS_NOT_READY 0 +#define TRANS_RESULT_OK 1 +#define TRANS_RESULT_FAIL 2 +#define TRANS_NO_DEVICE 3 + +#define RTSX_RESV_BUF_LEN 4096 +#define HOST_CMDS_BUF_LEN 1024 +#define HOST_SG_TBL_BUF_LEN (RTSX_RESV_BUF_LEN - HOST_CMDS_BUF_LEN) +#define HOST_SG_TBL_ITEMS (HOST_SG_TBL_BUF_LEN / 8) +#define MAX_SG_ITEM_LEN 0x80000 +#define HOST_TO_DEVICE 0 +#define DEVICE_TO_HOST 1 + +#define OUTPUT_3V3 0 +#define OUTPUT_1V8 1 + +#define RTSX_PHASE_MAX 32 +#define RX_TUNING_CNT 3 + +#define MS_CFG 0xFD40 +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 +#define MS_TPC 0xFD41 +#define MS_TRANS_CFG 0xFD42 +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 +#define MS_TRANSFER 0xFD43 +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C +#define MS_INT_REG 0xFD44 +#define MS_BYTE_CNT 0xFD45 +#define MS_SECTOR_CNT_L 0xFD46 +#define MS_SECTOR_CNT_H 0xFD47 +#define MS_DBUS_H 0xFD48 + +#define SD_CFG1 0xFDA0 +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_NOT_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 +#define SD_CLK_DIVIDE_MASK 0xC0 +#define SD_CFG2 0xFDA1 +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_NO_CHECK_WAIT_CRC_TO 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 +#define SD_CFG3 0xFDA2 +#define SD_RSP_80CLK_TIMEOUT_EN 0x01 + +#define SD_STAT1 0xFDA3 +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 +#define SD_STAT2 0xFDA4 +#define SD_RSP_80CLK_TIMEOUT 0x01 + +#define SD_BUS_STAT 0xFDA5 +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 +#define SD_PAD_CTL 0xFDA6 +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 +#define SD_SAMPLE_POINT_CTL 0xFDA7 +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 +#define SD_PUSH_POINT_CTL 0xFDA8 +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 +#define SD_CMD0 0xFDA9 +#define SD_CMD_START 0x40 +#define SD_CMD1 0xFDAA +#define SD_CMD2 0xFDAB +#define SD_CMD3 0xFDAC +#define SD_CMD4 0xFDAD +#define SD_CMD5 0xFDAE +#define SD_BYTE_CNT_L 0xFDAF +#define SD_BYTE_CNT_H 0xFDB0 +#define SD_BLOCK_CNT_L 0xFDB1 +#define SD_BLOCK_CNT_H 0xFDB2 +#define SD_TRANSFER 0xFDB3 +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F +#define SD_CMD_STATE 0xFDB5 +#define SD_CMD_IDLE 0x80 + +#define SD_DATA_STATE 0xFDB6 +#define SD_DATA_IDLE 0x80 + +#define SRCTL 0xFC13 + +#define DCM_DRP_CTL 0xFC23 +#define DCM_RESET 0x08 +#define DCM_LOCKED 0x04 +#define DCM_208M 0x00 +#define DCM_TX 0x01 +#define DCM_RX 0x02 +#define DCM_DRP_TRIG 0xFC24 +#define DRP_START 0x80 +#define DRP_DONE 0x40 +#define DCM_DRP_CFG 0xFC25 +#define DRP_WRITE 0x80 +#define DRP_READ 0x00 +#define DCM_WRITE_ADDRESS_50 0x50 +#define DCM_WRITE_ADDRESS_51 0x51 +#define DCM_READ_ADDRESS_00 0x00 +#define DCM_READ_ADDRESS_51 0x51 +#define DCM_DRP_WR_DATA_L 0xFC26 +#define DCM_DRP_WR_DATA_H 0xFC27 +#define DCM_DRP_RD_DATA_L 0xFC28 +#define DCM_DRP_RD_DATA_H 0xFC29 +#define SD_VPCLK0_CTL 0xFC2A +#define SD_VPCLK1_CTL 0xFC2B +#define PHASE_SELECT_MASK 0x1F +#define SD_DCMPS0_CTL 0xFC2C +#define SD_DCMPS1_CTL 0xFC2D +#define SD_VPTX_CTL SD_VPCLK0_CTL +#define SD_VPRX_CTL SD_VPCLK1_CTL +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 +#define SD_DCMPS_TX_CTL SD_DCMPS0_CTL +#define SD_DCMPS_RX_CTL SD_DCMPS1_CTL +#define DCMPS_CHANGE 0x80 +#define DCMPS_CHANGE_DONE 0x40 +#define DCMPS_ERROR 0x20 +#define DCMPS_CURRENT_PHASE 0x1F +#define CARD_CLK_SOURCE 0xFC2E +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) +#define CARD_PWR_CTL 0xFD50 +#define PMOS_STRG_MASK 0x10 +#define PMOS_STRG_800mA 0x10 +#define PMOS_STRG_400mA 0x00 +#define SD_POWER_OFF 0x03 +#define SD_PARTIAL_POWER_ON 0x01 +#define SD_POWER_ON 0x00 +#define SD_POWER_MASK 0x03 +#define MS_POWER_OFF 0x0C +#define MS_PARTIAL_POWER_ON 0x04 +#define MS_POWER_ON 0x00 +#define MS_POWER_MASK 0x0C +#define BPP_POWER_OFF 0x0F +#define BPP_POWER_5_PERCENT_ON 0x0E +#define BPP_POWER_10_PERCENT_ON 0x0C +#define BPP_POWER_15_PERCENT_ON 0x08 +#define BPP_POWER_ON 0x00 +#define BPP_POWER_MASK 0x0F +#define SD_VCC_PARTIAL_POWER_ON 0x02 +#define SD_VCC_POWER_ON 0x00 +#define CARD_CLK_SWITCH 0xFD51 +#define RTL8411B_PACKAGE_MODE 0xFD51 +#define CARD_SHARE_MODE 0xFD52 +#define CARD_SHARE_MASK 0x0F +#define CARD_SHARE_MULTI_LUN 0x00 +#define CARD_SHARE_NORMAL 0x00 +#define CARD_SHARE_48_SD 0x04 +#define CARD_SHARE_48_MS 0x08 +#define CARD_SHARE_BAROSSA_SD 0x01 +#define CARD_SHARE_BAROSSA_MS 0x02 +#define CARD_DRIVE_SEL 0xFD53 +#define MS_DRIVE_8mA (0x01 << 6) +#define MMC_DRIVE_8mA (0x01 << 4) +#define XD_DRIVE_8mA (0x01 << 2) +#define GPIO_DRIVE_8mA 0x01 +#define RTS5209_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ + XD_DRIVE_8mA | GPIO_DRIVE_8mA) +#define RTL8411_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | MMC_DRIVE_8mA |\ + XD_DRIVE_8mA) +#define RTSX_CARD_DRIVE_DEFAULT (MS_DRIVE_8mA | GPIO_DRIVE_8mA) + +#define CARD_STOP 0xFD54 +#define SPI_STOP 0x01 +#define XD_STOP 0x02 +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SPI_CLR_ERR 0x10 +#define XD_CLR_ERR 0x20 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 +#define CARD_OE 0xFD55 +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 +#define CARD_AUTO_BLINK 0xFD56 +#define CARD_GPIO_DIR 0xFD57 +#define CARD_GPIO 0xFD58 +#define CARD_DATA_SOURCE 0xFD5B +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 +#define SD30_CLK_DRIVE_SEL 0xFD5A +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 +#define CARD_SELECT 0xFD5C +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 +#define SD30_DRIVE_SEL 0xFD5E +#define CFG_DRIVER_TYPE_A 0x02 +#define CFG_DRIVER_TYPE_B 0x03 +#define CFG_DRIVER_TYPE_C 0x01 +#define CFG_DRIVER_TYPE_D 0x00 +#define SD30_CMD_DRIVE_SEL 0xFD5E +#define SD30_DAT_DRIVE_SEL 0xFD5F +#define CARD_CLK_EN 0xFD69 +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 +#define SDIO_CTRL 0xFD6B +#define CD_PAD_CTL 0xFD73 +#define CD_DISABLE_MASK 0x07 +#define MS_CD_DISABLE 0x04 +#define SD_CD_DISABLE 0x02 +#define XD_CD_DISABLE 0x01 +#define CD_DISABLE 0x07 +#define CD_ENABLE 0x00 +#define MS_CD_EN_ONLY 0x03 +#define SD_CD_EN_ONLY 0x05 +#define XD_CD_EN_ONLY 0x06 +#define FORCE_CD_LOW_MASK 0x38 +#define FORCE_CD_XD_LOW 0x08 +#define FORCE_CD_SD_LOW 0x10 +#define FORCE_CD_MS_LOW 0x20 +#define CD_AUTO_DISABLE 0x40 +#define FPDCTL 0xFC00 +#define SSC_POWER_DOWN 0x01 +#define SD_OC_POWER_DOWN 0x02 +#define ALL_POWER_DOWN 0x07 +#define OC_POWER_DOWN 0x06 +#define PDINFO 0xFC01 + +#define CLK_CTL 0xFC02 +#define CHANGE_CLK 0x01 +#define CLK_LOW_FREQ 0x01 + +#define CLK_DIV 0xFC03 +#define CLK_DIV_1 0x01 +#define CLK_DIV_2 0x02 +#define CLK_DIV_4 0x03 +#define CLK_DIV_8 0x04 +#define CLK_SEL 0xFC04 + +#define SSC_DIV_N_0 0xFC0F +#define SSC_DIV_N_1 0xFC10 +#define SSC_CTL1 0xFC11 +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 +#define SSC_CTL2 0xFC12 +#define SSC_DEPTH_MASK 0x07 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_4M 0x01 +#define SSC_DEPTH_2M 0x02 +#define SSC_DEPTH_1M 0x03 +#define SSC_DEPTH_500K 0x04 +#define SSC_DEPTH_250K 0x05 +#define RCCTL 0xFC14 + +#define FPGA_PULL_CTL 0xFC1D +#define OLT_LED_CTL 0xFC1E +#define GPIO_CTL 0xFC1F + +#define LDO_CTL 0xFC1E +#define BPP_ASIC_1V7 0x00 +#define BPP_ASIC_1V8 0x01 +#define BPP_ASIC_1V9 0x02 +#define BPP_ASIC_2V0 0x03 +#define BPP_ASIC_2V7 0x04 +#define BPP_ASIC_2V8 0x05 +#define BPP_ASIC_3V2 0x06 +#define BPP_ASIC_3V3 0x07 +#define BPP_REG_TUNED18 0x07 +#define BPP_TUNED18_SHIFT_8402 5 +#define BPP_TUNED18_SHIFT_8411 4 +#define BPP_PAD_MASK 0x04 +#define BPP_PAD_3V3 0x04 +#define BPP_PAD_1V8 0x00 +#define BPP_LDO_POWB 0x03 +#define BPP_LDO_ON 0x00 +#define BPP_LDO_SUSPEND 0x02 +#define BPP_LDO_OFF 0x03 +#define SYS_VER 0xFC32 + +#define CARD_PULL_CTL1 0xFD60 +#define CARD_PULL_CTL2 0xFD61 +#define CARD_PULL_CTL3 0xFD62 +#define CARD_PULL_CTL4 0xFD63 +#define CARD_PULL_CTL5 0xFD64 +#define CARD_PULL_CTL6 0xFD65 + +/* PCI Express Related Registers */ +#define IRQEN0 0xFE20 +#define IRQSTAT0 0xFE21 +#define DMA_DONE_INT 0x80 +#define SUSPEND_INT 0x40 +#define LINK_RDY_INT 0x20 +#define LINK_DOWN_INT 0x10 +#define IRQEN1 0xFE22 +#define IRQSTAT1 0xFE23 +#define TLPRIEN 0xFE24 +#define TLPRISTAT 0xFE25 +#define TLPTIEN 0xFE26 +#define TLPTISTAT 0xFE27 +#define DMATC0 0xFE28 +#define DMATC1 0xFE29 +#define DMATC2 0xFE2A +#define DMATC3 0xFE2B +#define DMACTL 0xFE2C +#define DMA_RST 0x80 +#define DMA_BUSY 0x04 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 4) +#define DMA_256 (1 << 4) +#define DMA_512 (2 << 4) +#define DMA_1024 (3 << 4) +#define DMA_PACK_SIZE_MASK 0x30 +#define BCTL 0xFE2D +#define RBBC0 0xFE2E +#define RBBC1 0xFE2F +#define RBDAT 0xFE30 +#define RBCTL 0xFE34 +#define CFGADDR0 0xFE35 +#define CFGADDR1 0xFE36 +#define CFGDATA0 0xFE37 +#define CFGDATA1 0xFE38 +#define CFGDATA2 0xFE39 +#define CFGDATA3 0xFE3A +#define CFGRWCTL 0xFE3B +#define PHYRWCTL 0xFE3C +#define PHYDATA0 0xFE3D +#define PHYDATA1 0xFE3E +#define PHYADDR 0xFE3F +#define MSGRXDATA0 0xFE40 +#define MSGRXDATA1 0xFE41 +#define MSGRXDATA2 0xFE42 +#define MSGRXDATA3 0xFE43 +#define MSGTXDATA0 0xFE44 +#define MSGTXDATA1 0xFE45 +#define MSGTXDATA2 0xFE46 +#define MSGTXDATA3 0xFE47 +#define MSGTXCTL 0xFE48 +#define LTR_CTL 0xFE4A +#define LTR_TX_EN_MASK BIT(7) +#define LTR_TX_EN_1 BIT(7) +#define LTR_TX_EN_0 0 +#define LTR_LATENCY_MODE_MASK BIT(6) +#define LTR_LATENCY_MODE_HW 0 +#define LTR_LATENCY_MODE_SW BIT(6) +#define OBFF_CFG 0xFE4C + +#define CDRESUMECTL 0xFE52 +#define WAKE_SEL_CTL 0xFE54 +#define PCLK_CTL 0xFE55 +#define PCLK_MODE_SEL 0x20 +#define PME_FORCE_CTL 0xFE56 + +#define ASPM_FORCE_CTL 0xFE57 +#define FORCE_ASPM_CTL0 0x10 +#define FORCE_ASPM_VAL_MASK 0x03 +#define FORCE_ASPM_L1_EN 0x02 +#define FORCE_ASPM_L0_EN 0x01 +#define FORCE_ASPM_NO_ASPM 0x00 +#define PM_CLK_FORCE_CTL 0xFE58 +#define FUNC_FORCE_CTL 0xFE59 +#define FUNC_FORCE_UPME_XMT_DBG 0x02 +#define PERST_GLITCH_WIDTH 0xFE5C +#define CHANGE_LINK_STATE 0xFE5B +#define RESET_LOAD_REG 0xFE5E +#define EFUSE_CONTENT 0xFE5F +#define HOST_SLEEP_STATE 0xFE60 +#define HOST_ENTER_S1 1 +#define HOST_ENTER_S3 2 + +#define SDIO_CFG 0xFE70 +#define PM_EVENT_DEBUG 0xFE71 +#define PME_DEBUG_0 0x08 +#define NFTS_TX_CTRL 0xFE72 + +#define PWR_GATE_CTRL 0xFE75 +#define PWR_GATE_EN 0x01 +#define LDO3318_PWR_MASK 0x06 +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x04 +#define LDO_OFF 0x06 +#define PWD_SUSPEND_EN 0xFE76 +#define LDO_PWR_SEL 0xFE78 + +#define L1SUB_CONFIG1 0xFE8D +#define L1SUB_CONFIG2 0xFE8E +#define L1SUB_AUTO_CFG 0x02 +#define L1SUB_CONFIG3 0xFE8F +#define L1OFF_MBIAS2_EN_5250 BIT(7) + +#define DUMMY_REG_RESET_0 0xFE90 + +#define AUTOLOAD_CFG_BASE 0xFF00 +#define PETXCFG 0xFF03 +#define FORCE_CLKREQ_DELINK_MASK BIT(7) +#define FORCE_CLKREQ_LOW 0x80 +#define FORCE_CLKREQ_HIGH 0x00 + +#define PM_CTRL1 0xFF44 +#define CD_RESUME_EN_MASK 0xF0 + +#define PM_CTRL2 0xFF45 +#define PM_CTRL3 0xFF46 +#define SDIO_SEND_PME_EN 0x80 +#define FORCE_RC_MODE_ON 0x40 +#define FORCE_RX50_LINK_ON 0x20 +#define D3_DELINK_MODE_EN 0x10 +#define USE_PESRTB_CTL_DELINK 0x08 +#define DELAY_PIN_WAKE 0x04 +#define RESET_PIN_WAKE 0x02 +#define PM_WAKE_EN 0x01 +#define PM_CTRL4 0xFF47 + +/* Memory mapping */ +#define SRAM_BASE 0xE600 +#define RBUF_BASE 0xF400 +#define PPBUF_BASE1 0xF800 +#define PPBUF_BASE2 0xFA00 +#define IMAGE_FLAG_ADDR0 0xCE80 +#define IMAGE_FLAG_ADDR1 0xCE81 + +#define RREF_CFG 0xFF6C +#define RREF_VBGSEL_MASK 0x38 +#define RREF_VBGSEL_1V25 0x28 + +#define OOBS_CONFIG 0xFF6E +#define OOBS_AUTOK_DIS 0x80 +#define OOBS_VAL_MASK 0x1F + +#define LDO_DV18_CFG 0xFF70 +#define LDO_DV18_SR_MASK 0xC0 +#define LDO_DV18_SR_DF 0x40 + +#define LDO_CONFIG2 0xFF71 +#define LDO_D3318_MASK 0x07 +#define LDO_D3318_33V 0x07 +#define LDO_D3318_18V 0x02 + +#define LDO_VCC_CFG0 0xFF72 +#define LDO_VCC_LMTVTH_MASK 0x30 +#define LDO_VCC_LMTVTH_2A 0x10 + +#define LDO_VCC_CFG1 0xFF73 +#define LDO_VCC_REF_TUNE_MASK 0x30 +#define LDO_VCC_REF_1V2 0x20 +#define LDO_VCC_TUNE_MASK 0x07 +#define LDO_VCC_1V8 0x04 +#define LDO_VCC_3V3 0x07 +#define LDO_VCC_LMT_EN 0x08 + +#define LDO_VIO_CFG 0xFF75 +#define LDO_VIO_SR_MASK 0xC0 +#define LDO_VIO_SR_DF 0x40 +#define LDO_VIO_REF_TUNE_MASK 0x30 +#define LDO_VIO_REF_1V2 0x20 +#define LDO_VIO_TUNE_MASK 0x07 +#define LDO_VIO_1V7 0x03 +#define LDO_VIO_1V8 0x04 +#define LDO_VIO_3V3 0x07 + +#define LDO_DV12S_CFG 0xFF76 +#define LDO_REF12_TUNE_MASK 0x18 +#define LDO_REF12_TUNE_DF 0x10 +#define LDO_D12_TUNE_MASK 0x07 +#define LDO_D12_TUNE_DF 0x04 + +#define LDO_AV12S_CFG 0xFF77 +#define LDO_AV12S_TUNE_MASK 0x07 +#define LDO_AV12S_TUNE_DF 0x04 + +#define SD40_LDO_CTL1 0xFE7D +#define SD40_VIO_TUNE_MASK 0x70 +#define SD40_VIO_TUNE_1V7 0x30 +#define SD_VIO_LDO_1V8 0x40 +#define SD_VIO_LDO_3V3 0x70 + +/* Phy register */ +#define PHY_PCR 0x00 +#define PHY_PCR_FORCE_CODE 0xB000 +#define PHY_PCR_OOBS_CALI_50 0x0800 +#define PHY_PCR_OOBS_VCM_08 0x0200 +#define PHY_PCR_OOBS_SEN_90 0x0040 +#define PHY_PCR_RSSI_EN 0x0002 +#define PHY_PCR_RX10K 0x0001 + +#define PHY_RCR0 0x01 +#define PHY_RCR1 0x02 +#define PHY_RCR1_ADP_TIME_4 0x0400 +#define PHY_RCR1_VCO_COARSE 0x001F +#define PHY_RCR1_INIT_27S 0x0A1F +#define PHY_SSCCR2 0x02 +#define PHY_SSCCR2_PLL_NCODE 0x0A00 +#define PHY_SSCCR2_TIME0 0x001C +#define PHY_SSCCR2_TIME2_WIDTH 0x0003 + +#define PHY_RCR2 0x03 +#define PHY_RCR2_EMPHASE_EN 0x8000 +#define PHY_RCR2_NADJR 0x4000 +#define PHY_RCR2_CDR_SR_2 0x0100 +#define PHY_RCR2_FREQSEL_12 0x0040 +#define PHY_RCR2_CDR_SC_12P 0x0010 +#define PHY_RCR2_CALIB_LATE 0x0002 +#define PHY_RCR2_INIT_27S 0xC152 +#define PHY_SSCCR3 0x03 +#define PHY_SSCCR3_STEP_IN 0x2740 +#define PHY_SSCCR3_CHECK_DELAY 0x0008 +#define _PHY_ANA03 0x03 +#define _PHY_ANA03_TIMER_MAX 0x2700 +#define _PHY_ANA03_OOBS_DEB_EN 0x0040 +#define _PHY_CMU_DEBUG_EN 0x0008 + +#define PHY_RTCR 0x04 +#define PHY_RDR 0x05 +#define PHY_RDR_RXDSEL_1_9 0x4000 +#define PHY_SSC_AUTO_PWD 0x0600 +#define PHY_TCR0 0x06 +#define PHY_TCR1 0x07 +#define PHY_TUNE 0x08 +#define PHY_TUNE_TUNEREF_1_0 0x4000 +#define PHY_TUNE_VBGSEL_1252 0x0C00 +#define PHY_TUNE_SDBUS_33 0x0200 +#define PHY_TUNE_TUNED18 0x01C0 +#define PHY_TUNE_TUNED12 0X0020 +#define PHY_TUNE_TUNEA12 0x0004 +#define PHY_TUNE_VOLTAGE_MASK 0xFC3F +#define PHY_TUNE_VOLTAGE_3V3 0x03C0 +#define PHY_TUNE_D18_1V8 0x0100 +#define PHY_TUNE_D18_1V7 0x0080 +#define PHY_ANA08 0x08 +#define PHY_ANA08_RX_EQ_DCGAIN 0x5000 +#define PHY_ANA08_SEL_RX_EN 0x0400 +#define PHY_ANA08_RX_EQ_VAL 0x03C0 +#define PHY_ANA08_SCP 0x0020 +#define PHY_ANA08_SEL_IPI 0x0004 + +#define PHY_IMR 0x09 +#define PHY_BPCR 0x0A +#define PHY_BPCR_IBRXSEL 0x0400 +#define PHY_BPCR_IBTXSEL 0x0100 +#define PHY_BPCR_IB_FILTER 0x0080 +#define PHY_BPCR_CMIRROR_EN 0x0040 + +#define PHY_BIST 0x0B +#define PHY_RAW_L 0x0C +#define PHY_RAW_H 0x0D +#define PHY_RAW_DATA 0x0E +#define PHY_HOST_CLK_CTRL 0x0F +#define PHY_DMR 0x10 +#define PHY_BACR 0x11 +#define PHY_BACR_BASIC_MASK 0xFFF3 +#define PHY_IER 0x12 +#define PHY_BCSR 0x13 +#define PHY_BPR 0x14 +#define PHY_BPNR2 0x15 +#define PHY_BPNR 0x16 +#define PHY_BRNR2 0x17 +#define PHY_BENR 0x18 +#define PHY_REV 0x19 +#define PHY_REV_RESV 0xE000 +#define PHY_REV_RXIDLE_LATCHED 0x1000 +#define PHY_REV_P1_EN 0x0800 +#define PHY_REV_RXIDLE_EN 0x0400 +#define PHY_REV_CLKREQ_TX_EN 0x0200 +#define PHY_REV_CLKREQ_RX_EN 0x0100 +#define PHY_REV_CLKREQ_DT_1_0 0x0040 +#define PHY_REV_STOP_CLKRD 0x0020 +#define PHY_REV_RX_PWST 0x0008 +#define PHY_REV_STOP_CLKWR 0x0004 +#define _PHY_REV0 0x19 +#define _PHY_REV0_FILTER_OUT 0x3800 +#define _PHY_REV0_CDR_BYPASS_PFD 0x0100 +#define _PHY_REV0_CDR_RX_IDLE_BYPASS 0x0002 + +#define PHY_FLD0 0x1A +#define PHY_ANA1A 0x1A +#define PHY_ANA1A_TXR_LOOPBACK 0x2000 +#define PHY_ANA1A_RXT_BIST 0x0500 +#define PHY_ANA1A_TXR_BIST 0x0040 +#define PHY_ANA1A_REV 0x0006 +#define PHY_FLD0_INIT_27S 0x2546 +#define PHY_FLD1 0x1B +#define PHY_FLD2 0x1C +#define PHY_FLD3 0x1D +#define PHY_FLD3_TIMER_4 0x0800 +#define PHY_FLD3_TIMER_6 0x0020 +#define PHY_FLD3_RXDELINK 0x0004 +#define PHY_FLD3_INIT_27S 0x0004 +#define PHY_ANA1D 0x1D +#define PHY_ANA1D_DEBUG_ADDR 0x0004 +#define _PHY_FLD0 0x1D +#define _PHY_FLD0_CLK_REQ_20C 0x8000 +#define _PHY_FLD0_RX_IDLE_EN 0x1000 +#define _PHY_FLD0_BIT_ERR_RSTN 0x0800 +#define _PHY_FLD0_BER_COUNT 0x01E0 +#define _PHY_FLD0_BER_TIMER 0x001E +#define _PHY_FLD0_CHECK_EN 0x0001 + +#define PHY_FLD4 0x1E +#define PHY_FLD4_FLDEN_SEL 0x4000 +#define PHY_FLD4_REQ_REF 0x2000 +#define PHY_FLD4_RXAMP_OFF 0x1000 +#define PHY_FLD4_REQ_ADDA 0x0800 +#define PHY_FLD4_BER_COUNT 0x00E0 +#define PHY_FLD4_BER_TIMER 0x000A +#define PHY_FLD4_BER_CHK_EN 0x0001 +#define PHY_FLD4_INIT_27S 0x5C7F +#define PHY_DIG1E 0x1E +#define PHY_DIG1E_REV 0x4000 +#define PHY_DIG1E_D0_X_D1 0x1000 +#define PHY_DIG1E_RX_ON_HOST 0x0800 +#define PHY_DIG1E_RCLK_REF_HOST 0x0400 +#define PHY_DIG1E_RCLK_TX_EN_KEEP 0x0040 +#define PHY_DIG1E_RCLK_TX_TERM_KEEP 0x0020 +#define PHY_DIG1E_RCLK_RX_EIDLE_ON 0x0010 +#define PHY_DIG1E_TX_TERM_KEEP 0x0008 +#define PHY_DIG1E_RX_TERM_KEEP 0x0004 +#define PHY_DIG1E_TX_EN_KEEP 0x0002 +#define PHY_DIG1E_RX_EN_KEEP 0x0001 +#define PHY_DUM_REG 0x1F + +#define PCR_ASPM_SETTING_REG1 0x160 +#define PCR_ASPM_SETTING_REG2 0x168 + +#define PCR_SETTING_REG1 0x724 +#define PCR_SETTING_REG2 0x814 +#define PCR_SETTING_REG3 0x747 + +#define rtsx_pci_init_cmd(pcr) ((pcr)->ci = 0) + +#define RTS5227_DEVICE_ID 0x5227 +#define RTS_MAX_TIMES_FREQ_REDUCTION 8 + +struct rtsx_pcr; + +struct pcr_handle { + struct rtsx_pcr *pcr; +}; + +struct pcr_ops { + int (*write_phy)(struct rtsx_pcr *pcr, u8 addr, u16 val); + int (*read_phy)(struct rtsx_pcr *pcr, u8 addr, u16 *val); + int (*extra_init_hw)(struct rtsx_pcr *pcr); + int (*optimize_phy)(struct rtsx_pcr *pcr); + int (*turn_on_led)(struct rtsx_pcr *pcr); + int (*turn_off_led)(struct rtsx_pcr *pcr); + int (*enable_auto_blink)(struct rtsx_pcr *pcr); + int (*disable_auto_blink)(struct rtsx_pcr *pcr); + int (*card_power_on)(struct rtsx_pcr *pcr, int card); + int (*card_power_off)(struct rtsx_pcr *pcr, int card); + int (*switch_output_voltage)(struct rtsx_pcr *pcr, + u8 voltage); + unsigned int (*cd_deglitch)(struct rtsx_pcr *pcr); + int (*conv_clk_and_div_n)(int clk, int dir); + void (*fetch_vendor_settings)(struct rtsx_pcr *pcr); + void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state); + + void (*set_aspm)(struct rtsx_pcr *pcr, bool enable); + int (*set_ltr_latency)(struct rtsx_pcr *pcr, u32 latency); + int (*set_l1off_sub)(struct rtsx_pcr *pcr, u8 val); + void (*set_l1off_cfg_sub_d0)(struct rtsx_pcr *pcr, int active); + void (*full_on)(struct rtsx_pcr *pcr); + void (*power_saving)(struct rtsx_pcr *pcr); +}; + +enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; + +#define ASPM_L1_1_EN_MASK BIT(3) +#define ASPM_L1_2_EN_MASK BIT(2) +#define PM_L1_1_EN_MASK BIT(1) +#define PM_L1_2_EN_MASK BIT(0) + +#define ASPM_L1_1_EN BIT(0) +#define ASPM_L1_2_EN BIT(1) +#define PM_L1_1_EN BIT(2) +#define PM_L1_2_EN BIT(3) +#define LTR_L1SS_PWR_GATE_EN BIT(4) +#define L1_SNOOZE_TEST_EN BIT(5) +#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) + +enum dev_aspm_mode { + DEV_ASPM_DISABLE = 0, + DEV_ASPM_DYNAMIC, + DEV_ASPM_BACKDOOR, + DEV_ASPM_STATIC, +}; + +/* + * struct rtsx_cr_option - card reader option + * @dev_flags: device flags + * @force_clkreq_0: force clock request + * @ltr_en: enable ltr mode flag + * @ltr_enabled: ltr mode in configure space flag + * @ltr_active: ltr mode status + * @ltr_active_latency: ltr mode active latency + * @ltr_idle_latency: ltr mode idle latency + * @ltr_l1off_latency: ltr mode l1off latency + * @dev_aspm_mode: device aspm mode + * @l1_snooze_delay: l1 snooze delay + * @ltr_l1off_sspwrgate: ltr l1off sspwrgate + * @ltr_l1off_snooze_sspwrgate: ltr l1off snooze sspwrgate + */ +struct rtsx_cr_option { + u32 dev_flags; + bool force_clkreq_0; + bool ltr_en; + bool ltr_enabled; + bool ltr_active; + u32 ltr_active_latency; + u32 ltr_idle_latency; + u32 ltr_l1off_latency; + enum dev_aspm_mode dev_aspm_mode; + u32 l1_snooze_delay; + u8 ltr_l1off_sspwrgate; + u8 ltr_l1off_snooze_sspwrgate; +}; + +#define rtsx_set_dev_flag(cr, flag) \ + ((cr)->option.dev_flags |= (flag)) +#define rtsx_clear_dev_flag(cr, flag) \ + ((cr)->option.dev_flags &= ~(flag)) +#define rtsx_check_dev_flag(cr, flag) \ + ((cr)->option.dev_flags & (flag)) + +struct rtsx_pcr { + struct pci_dev *pci; + unsigned int id; + int pcie_cap; + struct rtsx_cr_option option; + + /* pci resources */ + unsigned long addr; + void __iomem *remap_addr; + int irq; + + /* host reserved buffer */ + void *rtsx_resv_buf; + dma_addr_t rtsx_resv_buf_addr; + + void *host_cmds_ptr; + dma_addr_t host_cmds_addr; + int ci; + + void *host_sg_tbl_ptr; + dma_addr_t host_sg_tbl_addr; + int sgi; + + u32 bier; + char trans_result; + + unsigned int card_inserted; + unsigned int card_removed; + unsigned int card_exist; + + struct delayed_work carddet_work; + struct delayed_work idle_work; + + spinlock_t lock; + struct mutex pcr_mutex; + struct completion *done; + struct completion *finish_me; + + unsigned int cur_clock; + bool remove_pci; + bool msi_en; + +#define EXTRA_CAPS_SD_SDR50 (1 << 0) +#define EXTRA_CAPS_SD_SDR104 (1 << 1) +#define EXTRA_CAPS_SD_DDR50 (1 << 2) +#define EXTRA_CAPS_MMC_HSDDR (1 << 3) +#define EXTRA_CAPS_MMC_HS200 (1 << 4) +#define EXTRA_CAPS_MMC_8BIT (1 << 5) + u32 extra_caps; + +#define IC_VER_A 0 +#define IC_VER_B 1 +#define IC_VER_C 2 +#define IC_VER_D 3 + u8 ic_version; + + u8 sd30_drive_sel_1v8; + u8 sd30_drive_sel_3v3; + u8 card_drive_sel; +#define ASPM_L1_EN 0x02 + u8 aspm_en; + bool aspm_enabled; + +#define PCR_MS_PMOS (1 << 0) +#define PCR_REVERSE_SOCKET (1 << 1) + u32 flags; + + u32 tx_initial_phase; + u32 rx_initial_phase; + + const u32 *sd_pull_ctl_enable_tbl; + const u32 *sd_pull_ctl_disable_tbl; + const u32 *ms_pull_ctl_enable_tbl; + const u32 *ms_pull_ctl_disable_tbl; + + const struct pcr_ops *ops; + enum PDEV_STAT state; + + u16 reg_pm_ctrl3; + + int num_slots; + struct rtsx_slot *slots; + + u8 dma_error_count; +}; + +#define PID_524A 0x524A +#define PID_5249 0x5249 +#define PID_5250 0x5250 +#define PID_525A 0x525A + +#define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) +#define PCI_VID(pcr) ((pcr)->pci->vendor) +#define PCI_PID(pcr) ((pcr)->pci->device) +#define is_version(pcr, pid, ver) \ + (CHK_PCI_PID(pcr, pid) && (pcr)->ic_version == (ver)) +#define pcr_dbg(pcr, fmt, arg...) \ + dev_dbg(&(pcr)->pci->dev, fmt, ##arg) + +#define SDR104_PHASE(val) ((val) & 0xFF) +#define SDR50_PHASE(val) (((val) >> 8) & 0xFF) +#define DDR50_PHASE(val) (((val) >> 16) & 0xFF) +#define SDR104_TX_PHASE(pcr) SDR104_PHASE((pcr)->tx_initial_phase) +#define SDR50_TX_PHASE(pcr) SDR50_PHASE((pcr)->tx_initial_phase) +#define DDR50_TX_PHASE(pcr) DDR50_PHASE((pcr)->tx_initial_phase) +#define SDR104_RX_PHASE(pcr) SDR104_PHASE((pcr)->rx_initial_phase) +#define SDR50_RX_PHASE(pcr) SDR50_PHASE((pcr)->rx_initial_phase) +#define DDR50_RX_PHASE(pcr) DDR50_PHASE((pcr)->rx_initial_phase) +#define SET_CLOCK_PHASE(sdr104, sdr50, ddr50) \ + (((ddr50) << 16) | ((sdr50) << 8) | (sdr104)) + +void rtsx_pci_start_run(struct rtsx_pcr *pcr); +int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data); +int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data); +int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); +int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); +void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr); +void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, + u8 cmd_type, u16 reg_addr, u8 mask, u8 data); +void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); +int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); +int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read, int timeout); +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int count, bool read, int timeout); +int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); +int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); +int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card); +int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); +int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card); +int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card); +int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage); +unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr); +void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr); + +static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr) +{ + return (u8 *)(pcr->host_cmds_ptr); +} + +static inline int rtsx_pci_update_cfg_byte(struct rtsx_pcr *pcr, int addr, + u8 mask, u8 append) +{ + int err; + u8 val; + + err = pci_read_config_byte(pcr->pci, addr, &val); + if (err < 0) + return err; + return pci_write_config_byte(pcr->pci, addr, (val & mask) | append); +} + +static inline void rtsx_pci_write_be32(struct rtsx_pcr *pcr, u16 reg, u32 val) +{ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg, 0xFF, val >> 24); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 1, 0xFF, val >> 16); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 2, 0xFF, val >> 8); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 3, 0xFF, val); +} + +static inline int rtsx_pci_update_phy(struct rtsx_pcr *pcr, u8 addr, + u16 mask, u16 append) +{ + int err; + u16 val; + + err = rtsx_pci_read_phy_register(pcr, addr, &val); + if (err < 0) + return err; + + return rtsx_pci_write_phy_register(pcr, addr, (val & mask) | append); +} + +#endif diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h new file mode 100644 index 000000000000..c446e4fd6b5c --- /dev/null +++ b/include/linux/rtsx_usb.h @@ -0,0 +1,628 @@ +/* Driver for Realtek RTS5139 USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ + +#ifndef __RTSX_USB_H +#define __RTSX_USB_H + +#include + +/* related module names */ +#define RTSX_USB_SD_CARD 0 +#define RTSX_USB_MS_CARD 1 + +/* endpoint numbers */ +#define EP_BULK_OUT 1 +#define EP_BULK_IN 2 +#define EP_INTR_IN 3 + +/* USB vendor requests */ +#define RTSX_USB_REQ_REG_OP 0x00 +#define RTSX_USB_REQ_POLL 0x02 + +/* miscellaneous parameters */ +#define MIN_DIV_N 60 +#define MAX_DIV_N 120 + +#define MAX_PHASE 15 +#define RX_TUNING_CNT 3 + +#define QFN24 0 +#define LQFP48 1 +#define CHECK_PKG(ucr, pkg) ((ucr)->package == (pkg)) + +/* data structures */ +struct rtsx_ucr { + u16 vendor_id; + u16 product_id; + + int package; + u8 ic_version; + bool is_rts5179; + + unsigned int cur_clk; + + u8 *cmd_buf; + unsigned int cmd_idx; + u8 *rsp_buf; + + struct usb_device *pusb_dev; + struct usb_interface *pusb_intf; + struct usb_sg_request current_sg; + unsigned char *iobuf; + dma_addr_t iobuf_dma; + + struct timer_list sg_timer; + struct mutex dev_mutex; +}; + +/* buffer size */ +#define IOBUF_SIZE 1024 + +/* prototypes of exported functions */ +extern int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status); + +extern int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data); +extern int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); + +extern int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); +extern int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, + u8 *data); + +extern void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, + u16 reg_addr, u8 mask, u8 data); +extern int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout); +extern int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout); +extern int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int use_sg, + unsigned int *act_len, int timeout); + +extern int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); +extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); + +/* card status */ +#define SD_CD 0x01 +#define MS_CD 0x02 +#define XD_CD 0x04 +#define CD_MASK (SD_CD | MS_CD | XD_CD) +#define SD_WP 0x08 + +/* reader command field offset & parameters */ +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + +#define PACKET_TYPE 4 +#define CNT_H 5 +#define CNT_L 6 +#define STAGE_FLAG 7 +#define CMD_OFFSET 8 +#define SEQ_WRITE_DATA_OFFSET 12 + +#define BATCH_CMD 0 +#define SEQ_READ 1 +#define SEQ_WRITE 2 + +#define STAGE_R 0x01 +#define STAGE_DI 0x02 +#define STAGE_DO 0x04 +#define STAGE_MS_STATUS 0x08 +#define STAGE_XD_STATUS 0x10 +#define MODE_C 0x00 +#define MODE_CR (STAGE_R) +#define MODE_CDIR (STAGE_R | STAGE_DI) +#define MODE_CDOR (STAGE_R | STAGE_DO) + +#define EP0_OP_SHIFT 14 +#define EP0_READ_REG_CMD 2 +#define EP0_WRITE_REG_CMD 3 + +#define rtsx_usb_cmd_hdr_tag(ucr) \ + do { \ + ucr->cmd_buf[0] = 'R'; \ + ucr->cmd_buf[1] = 'T'; \ + ucr->cmd_buf[2] = 'C'; \ + ucr->cmd_buf[3] = 'R'; \ + } while (0) + +static inline void rtsx_usb_init_cmd(struct rtsx_ucr *ucr) +{ + rtsx_usb_cmd_hdr_tag(ucr); + ucr->cmd_idx = 0; + ucr->cmd_buf[PACKET_TYPE] = BATCH_CMD; +} + +/* internal register address */ +#define FPDCTL 0xFC00 +#define SSC_DIV_N_0 0xFC07 +#define SSC_CTL1 0xFC09 +#define SSC_CTL2 0xFC0A +#define CFG_MODE 0xFC0E +#define CFG_MODE_1 0xFC0F +#define RCCTL 0xFC14 +#define SOF_WDOG 0xFC28 +#define SYS_DUMMY0 0xFC30 + +#define MS_BLKEND 0xFD30 +#define MS_READ_START 0xFD31 +#define MS_READ_COUNT 0xFD32 +#define MS_WRITE_START 0xFD33 +#define MS_WRITE_COUNT 0xFD34 +#define MS_COMMAND 0xFD35 +#define MS_OLD_BLOCK_0 0xFD36 +#define MS_OLD_BLOCK_1 0xFD37 +#define MS_NEW_BLOCK_0 0xFD38 +#define MS_NEW_BLOCK_1 0xFD39 +#define MS_LOG_BLOCK_0 0xFD3A +#define MS_LOG_BLOCK_1 0xFD3B +#define MS_BUS_WIDTH 0xFD3C +#define MS_PAGE_START 0xFD3D +#define MS_PAGE_LENGTH 0xFD3E +#define MS_CFG 0xFD40 +#define MS_TPC 0xFD41 +#define MS_TRANS_CFG 0xFD42 +#define MS_TRANSFER 0xFD43 +#define MS_INT_REG 0xFD44 +#define MS_BYTE_CNT 0xFD45 +#define MS_SECTOR_CNT_L 0xFD46 +#define MS_SECTOR_CNT_H 0xFD47 +#define MS_DBUS_H 0xFD48 + +#define CARD_DMA1_CTL 0xFD5C +#define CARD_PULL_CTL1 0xFD60 +#define CARD_PULL_CTL2 0xFD61 +#define CARD_PULL_CTL3 0xFD62 +#define CARD_PULL_CTL4 0xFD63 +#define CARD_PULL_CTL5 0xFD64 +#define CARD_PULL_CTL6 0xFD65 +#define CARD_EXIST 0xFD6F +#define CARD_INT_PEND 0xFD71 + +#define LDO_POWER_CFG 0xFD7B + +#define SD_CFG1 0xFDA0 +#define SD_CFG2 0xFDA1 +#define SD_CFG3 0xFDA2 +#define SD_STAT1 0xFDA3 +#define SD_STAT2 0xFDA4 +#define SD_BUS_STAT 0xFDA5 +#define SD_PAD_CTL 0xFDA6 +#define SD_SAMPLE_POINT_CTL 0xFDA7 +#define SD_PUSH_POINT_CTL 0xFDA8 +#define SD_CMD0 0xFDA9 +#define SD_CMD1 0xFDAA +#define SD_CMD2 0xFDAB +#define SD_CMD3 0xFDAC +#define SD_CMD4 0xFDAD +#define SD_CMD5 0xFDAE +#define SD_BYTE_CNT_L 0xFDAF +#define SD_BYTE_CNT_H 0xFDB0 +#define SD_BLOCK_CNT_L 0xFDB1 +#define SD_BLOCK_CNT_H 0xFDB2 +#define SD_TRANSFER 0xFDB3 +#define SD_CMD_STATE 0xFDB5 +#define SD_DATA_STATE 0xFDB6 +#define SD_VPCLK0_CTL 0xFC2A +#define SD_VPCLK1_CTL 0xFC2B +#define SD_DCMPS0_CTL 0xFC2C +#define SD_DCMPS1_CTL 0xFC2D + +#define CARD_DMA1_CTL 0xFD5C + +#define HW_VERSION 0xFC01 + +#define SSC_CLK_FPGA_SEL 0xFC02 +#define CLK_DIV 0xFC03 +#define SFSM_ED 0xFC04 + +#define CD_DEGLITCH_WIDTH 0xFC20 +#define CD_DEGLITCH_EN 0xFC21 +#define AUTO_DELINK_EN 0xFC23 + +#define FPGA_PULL_CTL 0xFC1D +#define CARD_CLK_SOURCE 0xFC2E + +#define CARD_SHARE_MODE 0xFD51 +#define CARD_DRIVE_SEL 0xFD52 +#define CARD_STOP 0xFD53 +#define CARD_OE 0xFD54 +#define CARD_AUTO_BLINK 0xFD55 +#define CARD_GPIO 0xFD56 +#define SD30_DRIVE_SEL 0xFD57 + +#define CARD_DATA_SOURCE 0xFD5D +#define CARD_SELECT 0xFD5E + +#define CARD_CLK_EN 0xFD79 +#define CARD_PWR_CTL 0xFD7A + +#define OCPCTL 0xFD80 +#define OCPPARA1 0xFD81 +#define OCPPARA2 0xFD82 +#define OCPSTAT 0xFD83 + +#define HS_USB_STAT 0xFE01 +#define HS_VCONTROL 0xFE26 +#define HS_VSTAIN 0xFE27 +#define HS_VLOADM 0xFE28 +#define HS_VSTAOUT 0xFE29 + +#define MC_IRQ 0xFF00 +#define MC_IRQEN 0xFF01 +#define MC_FIFO_CTL 0xFF02 +#define MC_FIFO_BC0 0xFF03 +#define MC_FIFO_BC1 0xFF04 +#define MC_FIFO_STAT 0xFF05 +#define MC_FIFO_MODE 0xFF06 +#define MC_FIFO_RD_PTR0 0xFF07 +#define MC_FIFO_RD_PTR1 0xFF08 +#define MC_DMA_CTL 0xFF10 +#define MC_DMA_TC0 0xFF11 +#define MC_DMA_TC1 0xFF12 +#define MC_DMA_TC2 0xFF13 +#define MC_DMA_TC3 0xFF14 +#define MC_DMA_RST 0xFF15 + +#define RBUF_SIZE_MASK 0xFBFF +#define RBUF_BASE 0xF000 +#define PPBUF_BASE1 0xF800 +#define PPBUF_BASE2 0xFA00 + +/* internal register value macros */ +#define POWER_OFF 0x03 +#define PARTIAL_POWER_ON 0x02 +#define POWER_ON 0x00 +#define POWER_MASK 0x03 +#define LDO3318_PWR_MASK 0x0C +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x08 +#define LDO_OFF 0x0C +#define DV3318_AUTO_PWR_OFF 0x10 +#define FORCE_LDO_POWERB 0x60 + +/* LDO_POWER_CFG */ +#define TUNE_SD18_MASK 0x1C +#define TUNE_SD18_1V7 0x00 +#define TUNE_SD18_1V8 (0x01 << 2) +#define TUNE_SD18_1V9 (0x02 << 2) +#define TUNE_SD18_2V0 (0x03 << 2) +#define TUNE_SD18_2V7 (0x04 << 2) +#define TUNE_SD18_2V8 (0x05 << 2) +#define TUNE_SD18_2V9 (0x06 << 2) +#define TUNE_SD18_3V3 (0x07 << 2) + +/* CLK_DIV */ +#define CLK_CHANGE 0x80 +#define CLK_DIV_1 0x00 +#define CLK_DIV_2 0x01 +#define CLK_DIV_4 0x02 +#define CLK_DIV_8 0x03 + +#define SSC_POWER_MASK 0x01 +#define SSC_POWER_DOWN 0x01 +#define SSC_POWER_ON 0x00 + +#define FPGA_VER 0x80 +#define HW_VER_MASK 0x0F + +#define EXTEND_DMA1_ASYNC_SIGNAL 0x02 + +/* CFG_MODE*/ +#define XTAL_FREE 0x80 +#define CLK_MODE_MASK 0x03 +#define CLK_MODE_12M_XTAL 0x00 +#define CLK_MODE_NON_XTAL 0x01 +#define CLK_MODE_24M_OSC 0x02 +#define CLK_MODE_48M_OSC 0x03 + +/* CFG_MODE_1*/ +#define RTS5179 0x02 + +#define NYET_EN 0x01 +#define NYET_MSAK 0x01 + +#define SD30_DRIVE_MASK 0x07 +#define SD20_DRIVE_MASK 0x03 + +#define DISABLE_SD_CD 0x08 +#define DISABLE_MS_CD 0x10 +#define DISABLE_XD_CD 0x20 +#define SD_CD_DEGLITCH_EN 0x01 +#define MS_CD_DEGLITCH_EN 0x02 +#define XD_CD_DEGLITCH_EN 0x04 + +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + + +/* SD30_DRIVE_SEL */ +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 + +/* SD_BUS_STAT */ +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 + +/* SD_PAD_CTL */ +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 + +/* CARD_CLK_EN */ +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 + +/* CARD_SELECT */ +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 + +/* CARD_SHARE_MODE */ +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + +/* SSC_CTL1 */ +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 + +/* SSC_CTL2 */ +#define SSC_DEPTH_MASK 0x03 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_2M 0x01 +#define SSC_DEPTH_1M 0x02 +#define SSC_DEPTH_512K 0x03 + +/* SD_VPCLK0_CTL */ +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 + +/* SD_TRANSFER */ +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F + +/* SD_CFG1 */ +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_CLK_DIVIDE_MASK 0xC0 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 + +/* SD_CFG2 */ +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_WAIT_CRC_TO_EN 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 + +/* SD_STAT1 */ +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 + +/* SD_DATA_STATE */ +#define SD_DATA_IDLE 0x80 + +/* CARD_DATA_SOURCE */ +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 + +/* CARD_OE */ +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 + +/* CARD_STOP */ +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 + +/* CARD_CLK_SOURCE */ +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) + +/* SD_SAMPLE_POINT_CTL */ +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 + +/* SD_PUSH_POINT_CTL */ +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 + +/* MS_CFG */ +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 + +/* MS_TRANS_CFG */ +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 + +/* MS_TRANSFER */ +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C +#define MS_TM_SET_CMD 0x06 +#define MS_TM_COPY_PAGE 0x07 +#define MS_TM_MULTI_READ 0x02 +#define MS_TM_MULTI_WRITE 0x03 + +/* MC_FIFO_CTL */ +#define FIFO_FLUSH 0x01 + +/* MC_DMA_RST */ +#define DMA_RESET 0x01 + +/* MC_DMA_CTL */ +#define DMA_TC_EQ_0 0x80 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 2) +#define DMA_256 (1 << 2) +#define DMA_512 (2 << 2) +#define DMA_1024 (3 << 2) +#define DMA_PACK_SIZE_MASK 0x0C + +/* CARD_INT_PEND */ +#define XD_INT 0x10 +#define MS_INT 0x08 +#define SD_INT 0x04 + +/* LED operations*/ +static inline int rtsx_usb_turn_on_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x02); +} + +static inline int rtsx_usb_turn_off_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x03); +} + +/* HW error clearing */ +static inline void rtsx_usb_clear_fsm_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, SFSM_ED, 0xf8, 0xf8); +} + +static inline void rtsx_usb_clear_dma_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, MC_FIFO_CTL, + FIFO_FLUSH, FIFO_FLUSH); + rtsx_usb_ep0_write_register(ucr, MC_DMA_RST, DMA_RESET, DMA_RESET); +} +#endif /* __RTS51139_H */ -- cgit v1.2.3 From 65bedda1feec4f57e1322a200853cc29079b01c6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 29 Nov 2017 03:08:35 +0000 Subject: ASoC: rsnd: dma.c: spin lock is no longer needed in IRQ handler DMA handler had needed to calculate pointer before, but it doesn't need now. Thus, we can remove unnecessary spin lock from DMAC handler. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/dma.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 4d750bdf8e24..41de23417c4a 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -71,25 +71,7 @@ static struct rsnd_mod mem = { static void __rsnd_dmaen_complete(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { - struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - bool elapsed = false; - unsigned long flags; - - /* - * Renesas sound Gen1 needs 1 DMAC, - * Gen2 needs 2 DMAC. - * In Gen2 case, it are Audio-DMAC, and Audio-DMAC-peri-peri. - * But, Audio-DMAC-peri-peri doesn't have interrupt, - * and this driver is assuming that here. - */ - spin_lock_irqsave(&priv->lock, flags); - if (rsnd_io_is_working(io)) - elapsed = true; - - spin_unlock_irqrestore(&priv->lock, flags); - - if (elapsed) rsnd_dai_period_elapsed(io); } -- cgit v1.2.3 From 5da4e04ae480aac5274dd020af3dfa21935028f7 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Wed, 29 Nov 2017 17:09:56 +0800 Subject: misc: rtsx: Add support for RTS5260 Add support for new chip rts5260. In order to support rts5260, the definitions of some internal registers and workflow have to be modified and are different from its predecessors and OCP function is added for RTS5260. So we need this patch to ensure RTS5260 can work. Signed-off-by: Rui Feng Reviewed-by: Daniel Bristot de Oliveira Tested-by: Perry Yuan Signed-off-by: Lee Jones --- drivers/misc/cardreader/Kconfig | 2 +- drivers/misc/cardreader/Makefile | 2 +- drivers/misc/cardreader/rts5260.c | 748 +++++++++++++++++++++++++++++++++++++ drivers/misc/cardreader/rts5260.h | 45 +++ drivers/misc/cardreader/rtsx_pcr.c | 123 +++++- drivers/misc/cardreader/rtsx_pcr.h | 10 + include/linux/rtsx_pci.h | 234 +++++++++++- 7 files changed, 1157 insertions(+), 7 deletions(-) create mode 100644 drivers/misc/cardreader/rts5260.c create mode 100644 drivers/misc/cardreader/rts5260.h diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig index e7d835a160bb..69e815e32a8c 100644 --- a/drivers/misc/cardreader/Kconfig +++ b/drivers/misc/cardreader/Kconfig @@ -4,7 +4,7 @@ config MISC_RTSX_PCI select MFD_CORE help This supports for Realtek PCI-Express card reader including rts5209, - rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411. + rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, rts5260. Realtek card readers support access to many types of memory cards, such as Memory Stick, Memory Stick Pro, Secure Digital and MultiMediaCard. diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile index 78337b24fc62..9fabfcc6fa7a 100644 --- a/drivers/misc/cardreader/Makefile +++ b/drivers/misc/cardreader/Makefile @@ -1,4 +1,4 @@ -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c new file mode 100644 index 000000000000..3b308640282d --- /dev/null +++ b/drivers/misc/cardreader/rts5260.c @@ -0,0 +1,748 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2016-2017 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Steven FENG + * Rui FENG + * Wei WANG + */ + +#include +#include +#include + +#include "rts5260.h" +#include "rtsx_pcr.h" + +static u8 rts5260_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & IC_VERSION_MASK; +} + +static void rts5260_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[6][3] = { + {0x94, 0x94, 0x94}, + {0x11, 0x11, 0x18}, + {0x55, 0x55, 0x5C}, + {0x94, 0x94, 0x94}, + {0x94, 0x94, 0x94}, + {0xFF, 0xFF, 0xFF}, + }; + u8 driving_1v8[6][3] = { + {0x9A, 0x89, 0x89}, + {0xC4, 0xC4, 0xC4}, + {0x3C, 0x3C, 0x3C}, + {0x9B, 0x99, 0x99}, + {0x9A, 0x89, 0x89}, + {0xFE, 0xFE, 0xFE}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN); +} + +static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_EN); +} + +static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_DISABLE); +} + +static int rts5260_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0, + RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_ON); +} + +static int rts5260_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0, + RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_OFF); +} + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5260_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5260_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5260_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5260_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK + | SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF, + CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + + return 0; +} + +static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) + rtsx_pci_enable_ocp(pcr); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + if (err < 0) + return err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG0, + RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1, + LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_POWERON, DV331812_POWERON); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + + msleep(20); + + if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 || + pcr->extra_caps & EXTRA_CAPS_SD_SDR104) + sd_set_sample_push_timing_sd30(pcr); + + /* Initialize SD_CFG1 register */ + rtsx_pci_write_register(pcr, SD_CFG1, 0xFF, + SD_CLK_DIVIDE_128 | SD_20_MODE); + + rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL, + 0xFF, SD20_RX_POS_EDGE); + rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0); + rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR, + SD_STOP | SD_CLR_ERR); + + /* Reset SD_CFG3 register */ + rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0); + rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG, + SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 | + SD30_CLK_STOP_CFG0, 0); + + rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0); + + return err; +} + +static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + DV331812_MASK, DV331812_33); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + DV331812_MASK, DV331812_17); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, + SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5260_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); +} + +static void rts5260_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0, + RTS5260_DMA_RST | RTS5260_ADMA3_RST, + RTS5260_DMA_RST | RTS5260_ADMA3_RST); + rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH); +} + +static void rts5260_card_before_power_off(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rts5260_stop_cmd(pcr); + rts5260_switch_output_voltage(pcr, OUTPUT_3V3); + + if (option->ocp_en) + rtsx_pci_disable_ocp(pcr); +} + +static int rts5260_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + + rts5260_card_before_power_off(pcr); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1, + LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_POWERON, DV331812_POWEROFF); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + + return err; +} + +static void rts5260_init_ocp(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) { + u8 mask, val; + + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN); + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN); + + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_THD_MASK, + option->sd_400mA_ocp_thd); + + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_THD_MASK, + RTS5260_DVIO_OCP_THD_350); + + rtsx_pci_write_register(pcr, RTS5260_DV331812_CFG, + RTS5260_DV331812_OCP_THD_MASK, + RTS5260_DV331812_OCP_THD_210); + + mask = SD_OCP_GLITCH_MASK | SDVIO_OCP_GLITCH_MASK; + val = pcr->hw_param.ocp_glitch; + rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val); + + rtsx_pci_enable_ocp(pcr); + } else { + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN, 0); + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN, 0); + } +} + +static void rts5260_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0); + + val = SD_OCP_INT_EN | SD_DETECT_EN; + val |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_DETECT_EN | DV3318_OCP_INT_EN, + DV3318_DETECT_EN | DV3318_OCP_INT_EN); +} + +static void rts5260_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + + mask = SD_OCP_INT_EN | SD_DETECT_EN; + mask |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_DETECT_EN | DV3318_OCP_INT_EN, 0); + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); +} + +int rts5260_get_ocpstat(struct rtsx_pcr *pcr, u8 *val) +{ + return rtsx_pci_read_register(pcr, REG_OCPSTAT, val); +} + +int rts5260_get_ocpstat2(struct rtsx_pcr *pcr, u8 *val) +{ + return rtsx_pci_read_register(pcr, REG_DV3318_OCPSTAT, val); +} + +void rts5260_clear_ocpstat(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + u8 val = 0; + + mask = SD_OCP_INT_CLR | SD_OC_CLR; + mask |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR; + val = SD_OCP_INT_CLR | SD_OC_CLR; + val |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR); + udelay(10); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR, 0); +} + +void rts5260_process_ocp(struct rtsx_pcr *pcr) +{ + if (!pcr->option.ocp_en) + return; + + rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); + rts5260_get_ocpstat2(pcr, &pcr->ocp_stat2); + if (pcr->card_exist & SD_EXIST) + sd_power_off_card3v3(pcr); + else if (pcr->card_exist & MS_EXIST) + ms_power_off_card3v3(pcr); + + if (!(pcr->card_exist & MS_EXIST) && !(pcr->card_exist & SD_EXIST)) { + if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER | + SDVIO_OC_NOW | SDVIO_OC_EVER)) || + (pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) + rtsx_pci_clear_ocpstat(pcr); + pcr->ocp_stat = 0; + pcr->ocp_stat2 = 0; + } + + if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER | + SDVIO_OC_NOW | SDVIO_OC_EVER)) || + (pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) { + if (pcr->card_exist & SD_EXIST) + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + else if (pcr->card_exist & MS_EXIST) + rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0); + } +} + +int rts5260_init_hw(struct rtsx_pcr *pcr) +{ + int err; + + rtsx_pci_init_ocp(pcr); + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG1, + AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE); + /* Rest L1SUB Config */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CLK_FORCE_CTL, + CLK_PM_EN, CLK_PM_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWD_SUSPEND_EN, 0xFF, 0xFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + PWR_GATE_EN, PWR_GATE_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, REG_VREF, + PWD_SUSPND_EN, PWD_SUSPND_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RBCTL, + U_AUTO_DMA_EN_MASK, U_AUTO_DMA_DISABLE); + + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, + OBFF_EN_MASK, OBFF_DISABLE); + + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + if (err < 0) + return err; + + return 0; +} + +static void rts5260_pwr_saving_setting(struct rtsx_pcr *pcr) +{ + int lss_l1_1, lss_l1_2; + + lss_l1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN) + | rtsx_check_dev_flag(pcr, PM_L1_1_EN); + lss_l1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN) + | rtsx_check_dev_flag(pcr, PM_L1_2_EN); + + if (lss_l1_2) { + pcr_dbg(pcr, "Set parameters for L1.2."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_2_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_2_PD_FE_EN); + } else if (lss_l1_1) { + pcr_dbg(pcr, "Set parameters for L1.1."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_1_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_1_PD_FE_EN); + } else { + pcr_dbg(pcr, "Set parameters for L1."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_0_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_0_PD_FE_EN); + } + + rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_DPHY_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_MAC_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD30_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD40_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_SYS_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + /*Option cut APHY*/ + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_0, + 0xFF, CFG_PCIE_APHY_OFF_0_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_1, + 0xFF, CFG_PCIE_APHY_OFF_1_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_2, + 0xFF, CFG_PCIE_APHY_OFF_2_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_3, + 0xFF, CFG_PCIE_APHY_OFF_3_DEFAULT); + /*CDR DEC*/ + rtsx_pci_write_register(pcr, PWC_CDR, 0xFF, PWC_CDR_DEFAULT); + /*PWMPFM*/ + rtsx_pci_write_register(pcr, CFG_LP_FPWM_VALUE, + 0xFF, CFG_LP_FPWM_VALUE_DEFAULT); + /*No Power Saving WA*/ + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_MISC_RET_VALUE, + 0xFF, CFG_L1_0_CRC_MISC_RET_VALUE_DEFAULT); +} + +static void rts5260_init_from_cfg(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + u32 lval; + + rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_5260, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + + rts5260_pwr_saving_setting(pcr); + + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; +} + +static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + /* Set mcu_cnt to 7 to ensure data can be sampled properly */ + rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07); + rtsx_pci_write_register(pcr, SSC_DIV_N_0, 0xFF, 0x5D); + + rts5260_init_from_cfg(pcr); + + /* force no MDIO*/ + rtsx_pci_write_register(pcr, RTS5260_AUTOLOAD_CFG4, + 0xFF, RTS5260_MIMO_DISABLE); + /*Modify SDVCC Tune Default Parameters!*/ + rtsx_pci_write_register(pcr, LDO_VCC_CFG0, + RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33); + + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + + rts5260_init_hw(pcr); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + return 0; +} + +void rts5260_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + if (!enable) + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static void rts5260_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &pcr->option; + u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR); + int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST); + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* l1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + if (aspm_L1_1 || aspm_L1_2) { + if (rtsx_check_dev_flag(pcr, + LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) { + if (card_exist) + val &= ~L1OFF_MBIAS2_EN_5250; + else + val |= L1OFF_MBIAS2_EN_5250; + } + } + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts5260_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .turn_on_led = rts5260_turn_on_led, + .turn_off_led = rts5260_turn_off_led, + .extra_init_hw = rts5260_extra_init_hw, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rts5260_card_power_on, + .card_power_off = rts5260_card_power_off, + .switch_output_voltage = rts5260_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .stop_cmd = rts5260_stop_cmd, + .set_aspm = rts5260_set_aspm, + .set_l1off_cfg_sub_d0 = rts5260_set_l1off_cfg_sub_d0, + .enable_ocp = rts5260_enable_ocp, + .disable_ocp = rts5260_disable_ocp, + .init_ocp = rts5260_init_ocp, + .process_ocp = rts5260_process_ocp, + .get_ocpstat = rts5260_get_ocpstat, + .clear_ocpstat = rts5260_clear_ocpstat, +}; + +void rts5260_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + struct rtsx_hw_param *hw_param = &pcr->hw_param; + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5260_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5260_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5260_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5260_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5260_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + + pcr->ops = &rts5260_pcr_ops; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + option->ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + option->ocp_en = 1; + if (option->ocp_en) + hw_param->interrupt_en |= SD_OC_INT_EN; + hw_param->ocp_glitch = SD_OCP_GLITCH_10M | SDVIO_OCP_GLITCH_800U; + option->sd_400mA_ocp_thd = RTS5260_DVCC_OCP_THD_550; + option->sd_800mA_ocp_thd = RTS5260_DVCC_OCP_THD_970; +} diff --git a/drivers/misc/cardreader/rts5260.h b/drivers/misc/cardreader/rts5260.h new file mode 100644 index 000000000000..53a1411c8868 --- /dev/null +++ b/drivers/misc/cardreader/rts5260.h @@ -0,0 +1,45 @@ +#ifndef __RTS5260_H__ +#define __RTS5260_H__ + +#define RTS5260_DVCC_CTRL 0xFF73 +#define RTS5260_DVCC_OCP_EN (0x01 << 7) +#define RTS5260_DVCC_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DVCC_POWERON (0x01 << 3) +#define RTS5260_DVCC_OCP_CL_EN (0x01 << 2) + +#define RTS5260_DVIO_CTRL 0xFF75 +#define RTS5260_DVIO_OCP_EN (0x01 << 7) +#define RTS5260_DVIO_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DVIO_POWERON (0x01 << 3) +#define RTS5260_DVIO_OCP_CL_EN (0x01 << 2) + +#define RTS5260_DV331812_CFG 0xFF71 +#define RTS5260_DV331812_OCP_EN (0x01 << 7) +#define RTS5260_DV331812_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DV331812_POWERON (0x01 << 3) +#define RTS5260_DV331812_SEL (0x01 << 2) +#define RTS5260_DV331812_VDD1 (0x01 << 2) +#define RTS5260_DV331812_VDD2 (0x00 << 2) + +#define RTS5260_DV331812_OCP_THD_120 (0x00 << 4) +#define RTS5260_DV331812_OCP_THD_140 (0x01 << 4) +#define RTS5260_DV331812_OCP_THD_160 (0x02 << 4) +#define RTS5260_DV331812_OCP_THD_180 (0x03 << 4) +#define RTS5260_DV331812_OCP_THD_210 (0x04 << 4) +#define RTS5260_DV331812_OCP_THD_240 (0x05 << 4) +#define RTS5260_DV331812_OCP_THD_270 (0x06 << 4) +#define RTS5260_DV331812_OCP_THD_300 (0x07 << 4) + +#define RTS5260_DVIO_OCP_THD_250 (0x00 << 4) +#define RTS5260_DVIO_OCP_THD_300 (0x01 << 4) +#define RTS5260_DVIO_OCP_THD_350 (0x02 << 4) +#define RTS5260_DVIO_OCP_THD_400 (0x03 << 4) +#define RTS5260_DVIO_OCP_THD_450 (0x04 << 4) +#define RTS5260_DVIO_OCP_THD_500 (0x05 << 4) +#define RTS5260_DVIO_OCP_THD_550 (0x06 << 4) +#define RTS5260_DVIO_OCP_THD_600 (0x07 << 4) + +#define RTS5260_DVCC_OCP_THD_550 (0x00 << 4) +#define RTS5260_DVCC_OCP_THD_970 (0x05 << 4) + +#endif diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index b60bd2a3ba64..99adc67bbf73 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -62,6 +62,7 @@ static const struct pci_device_id rtsx_pci_ids[] = { { PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -334,6 +335,9 @@ EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register); void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr) { + if (pcr->ops->stop_cmd) + return pcr->ops->stop_cmd(pcr); + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); @@ -826,7 +830,7 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, return err; /* Wait SSC clock stable */ - udelay(10); + udelay(SSC_CLOCK_STABLE_WAIT); err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); if (err < 0) return err; @@ -963,6 +967,20 @@ static void rtsx_pci_card_detect(struct work_struct *work) pcr->slots[RTSX_MS_CARD].p_dev); } +void rtsx_pci_process_ocp(struct rtsx_pcr *pcr) +{ + if (pcr->ops->process_ocp) + pcr->ops->process_ocp(pcr); +} + +int rtsx_pci_process_ocp_interrupt(struct rtsx_pcr *pcr) +{ + if (pcr->option.ocp_en) + rtsx_pci_process_ocp(pcr); + + return 0; +} + static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) { struct rtsx_pcr *pcr = dev_id; @@ -987,6 +1005,9 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) int_reg &= (pcr->bier | 0x7FFFFF); + if (int_reg & SD_OC_INT) + rtsx_pci_process_ocp_interrupt(pcr); + if (int_reg & SD_INT) { if (int_reg & SD_EXIST) { pcr->card_inserted |= SD_EXIST; @@ -1119,6 +1140,102 @@ static void rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) } #endif +void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = SD_OCP_INT_EN | SD_DETECT_EN; + + if (pcr->ops->enable_ocp) + pcr->ops->enable_ocp(pcr); + else + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + +} + +void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = SD_OCP_INT_EN | SD_DETECT_EN; + + if (pcr->ops->disable_ocp) + pcr->ops->disable_ocp(pcr); + else + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); +} + +void rtsx_pci_init_ocp(struct rtsx_pcr *pcr) +{ + if (pcr->ops->init_ocp) { + pcr->ops->init_ocp(pcr); + } else { + struct rtsx_cr_option *option = &(pcr->option); + + if (option->ocp_en) { + u8 val = option->sd_400mA_ocp_thd; + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0); + rtsx_pci_write_register(pcr, REG_OCPPARA1, + SD_OCP_TIME_MASK, SD_OCP_TIME_800); + rtsx_pci_write_register(pcr, REG_OCPPARA2, + SD_OCP_THD_MASK, val); + rtsx_pci_write_register(pcr, REG_OCPGLITCH, + SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch); + rtsx_pci_enable_ocp(pcr); + } else { + /* OC power down */ + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); + } + } +} + +int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val) +{ + if (pcr->ops->get_ocpstat) + return pcr->ops->get_ocpstat(pcr, val); + else + return rtsx_pci_read_register(pcr, REG_OCPSTAT, val); +} + +void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr) +{ + if (pcr->ops->clear_ocpstat) { + pcr->ops->clear_ocpstat(pcr); + } else { + u8 mask = SD_OCP_INT_CLR | SD_OC_CLR; + u8 val = SD_OCP_INT_CLR | SD_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + } +} + +int sd_power_off_card3v3(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | + MS_CLK_EN | SD40_CLK_EN, 0); + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + + rtsx_pci_card_power_off(pcr, RTSX_SD_CARD); + + msleep(50); + + rtsx_pci_card_pull_ctl_disable(pcr, RTSX_SD_CARD); + + return 0; +} + +int ms_power_off_card3v3(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | + MS_CLK_EN | SD40_CLK_EN, 0); + + rtsx_pci_card_pull_ctl_disable(pcr, RTSX_MS_CARD); + + rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0); + rtsx_pci_card_power_off(pcr, RTSX_MS_CARD); + + return 0; +} + static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) { int err; @@ -1189,6 +1306,7 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) case PID_5250: case PID_524A: case PID_525A: + case PID_5260: rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); break; default: @@ -1265,6 +1383,9 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5286: rtl8402_init_params(pcr); break; + case 0x5260: + rts5260_init_params(pcr); + break; } pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index b0691c95b103..c544e35d0154 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -44,6 +44,8 @@ #define ASPM_MASK_NEG 0xFC #define MASK_8_BIT_DEF 0xFF +#define SSC_CLOCK_STABLE_WAIT 130 + int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); @@ -57,6 +59,7 @@ void rts5249_init_params(struct rtsx_pcr *pcr); void rts524a_init_params(struct rtsx_pcr *pcr); void rts525a_init_params(struct rtsx_pcr *pcr); void rtl8411b_init_params(struct rtsx_pcr *pcr); +void rts5260_init_params(struct rtsx_pcr *pcr); static inline u8 map_sd_drive(int idx) { @@ -99,5 +102,12 @@ do { \ int rtsx_gops_pm_reset(struct rtsx_pcr *pcr); int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency); int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val); +void rtsx_pci_init_ocp(struct rtsx_pcr *pcr); +void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr); +void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr); +int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val); +void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr); +int sd_power_off_card3v3(struct rtsx_pcr *pcr); +int ms_power_off_card3v3(struct rtsx_pcr *pcr); #endif diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 82abac70b3db..a44670e1c537 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -203,6 +203,7 @@ #define SD_DDR_MODE 0x04 #define SD_30_MODE 0x08 #define SD_CLK_DIVIDE_MASK 0xC0 +#define SD_MODE_SELECT_MASK 0x0C #define SD_CFG2 0xFDA1 #define SD_CALCULATE_CRC7 0x00 #define SD_NO_CALCULATE_CRC7 0x80 @@ -226,6 +227,7 @@ #define SD_RSP_TYPE_R6 0x01 #define SD_RSP_TYPE_R7 0x01 #define SD_CFG3 0xFDA2 +#define SD30_CLK_END_EN 0x10 #define SD_RSP_80CLK_TIMEOUT_EN 0x01 #define SD_STAT1 0xFDA3 @@ -309,6 +311,12 @@ #define SD_DATA_STATE 0xFDB6 #define SD_DATA_IDLE 0x80 +#define REG_SD_STOP_SDCLK_CFG 0xFDB8 +#define SD30_CLK_STOP_CFG_EN 0x04 +#define SD30_CLK_STOP_CFG1 0x02 +#define SD30_CLK_STOP_CFG0 0x01 +#define REG_PRE_RW_MODE 0xFD70 +#define EN_INFINITE_MODE 0x01 #define SRCTL 0xFC13 @@ -434,6 +442,7 @@ #define CARD_CLK_EN 0xFD69 #define SD_CLK_EN 0x04 #define MS_CLK_EN 0x08 +#define SD40_CLK_EN 0x10 #define SDIO_CTRL 0xFD6B #define CD_PAD_CTL 0xFD73 #define CD_DISABLE_MASK 0x07 @@ -453,8 +462,8 @@ #define FPDCTL 0xFC00 #define SSC_POWER_DOWN 0x01 #define SD_OC_POWER_DOWN 0x02 -#define ALL_POWER_DOWN 0x07 -#define OC_POWER_DOWN 0x06 +#define ALL_POWER_DOWN 0x03 +#define OC_POWER_DOWN 0x02 #define PDINFO 0xFC01 #define CLK_CTL 0xFC02 @@ -490,6 +499,9 @@ #define FPGA_PULL_CTL 0xFC1D #define OLT_LED_CTL 0xFC1E +#define LED_SHINE_MASK 0x08 +#define LED_SHINE_EN 0x08 +#define LED_SHINE_DISABLE 0x00 #define GPIO_CTL 0xFC1F #define LDO_CTL 0xFC1E @@ -511,7 +523,11 @@ #define BPP_LDO_ON 0x00 #define BPP_LDO_SUSPEND 0x02 #define BPP_LDO_OFF 0x03 +#define EFUSE_CTL 0xFC30 +#define EFUSE_ADD 0xFC31 #define SYS_VER 0xFC32 +#define EFUSE_DATAL 0xFC34 +#define EFUSE_DATAH 0xFC35 #define CARD_PULL_CTL1 0xFD60 #define CARD_PULL_CTL2 0xFD61 @@ -553,6 +569,9 @@ #define RBBC1 0xFE2F #define RBDAT 0xFE30 #define RBCTL 0xFE34 +#define U_AUTO_DMA_EN_MASK 0x20 +#define U_AUTO_DMA_DISABLE 0x00 +#define RB_FLUSH 0x80 #define CFGADDR0 0xFE35 #define CFGADDR1 0xFE36 #define CFGDATA0 0xFE37 @@ -581,6 +600,8 @@ #define LTR_LATENCY_MODE_HW 0 #define LTR_LATENCY_MODE_SW BIT(6) #define OBFF_CFG 0xFE4C +#define OBFF_EN_MASK 0x03 +#define OBFF_DISABLE 0x00 #define CDRESUMECTL 0xFE52 #define WAKE_SEL_CTL 0xFE54 @@ -595,6 +616,7 @@ #define FORCE_ASPM_L0_EN 0x01 #define FORCE_ASPM_NO_ASPM 0x00 #define PM_CLK_FORCE_CTL 0xFE58 +#define CLK_PM_EN 0x01 #define FUNC_FORCE_CTL 0xFE59 #define FUNC_FORCE_UPME_XMT_DBG 0x02 #define PERST_GLITCH_WIDTH 0xFE5C @@ -620,14 +642,23 @@ #define LDO_PWR_SEL 0xFE78 #define L1SUB_CONFIG1 0xFE8D +#define AUX_CLK_ACTIVE_SEL_MASK 0x01 +#define MAC_CKSW_DONE 0x00 #define L1SUB_CONFIG2 0xFE8E #define L1SUB_AUTO_CFG 0x02 #define L1SUB_CONFIG3 0xFE8F #define L1OFF_MBIAS2_EN_5250 BIT(7) #define DUMMY_REG_RESET_0 0xFE90 +#define IC_VERSION_MASK 0x0F +#define REG_VREF 0xFE97 +#define PWD_SUSPND_EN 0x10 +#define RTS5260_DMA_RST_CTL_0 0xFEBF +#define RTS5260_DMA_RST 0x80 +#define RTS5260_ADMA3_RST 0x40 #define AUTOLOAD_CFG_BASE 0xFF00 +#define RELINK_TIME_MASK 0x01 #define PETXCFG 0xFF03 #define FORCE_CLKREQ_DELINK_MASK BIT(7) #define FORCE_CLKREQ_LOW 0x80 @@ -667,15 +698,24 @@ #define LDO_DV18_CFG 0xFF70 #define LDO_DV18_SR_MASK 0xC0 #define LDO_DV18_SR_DF 0x40 +#define DV331812_MASK 0x70 +#define DV331812_33 0x70 +#define DV331812_17 0x30 #define LDO_CONFIG2 0xFF71 #define LDO_D3318_MASK 0x07 #define LDO_D3318_33V 0x07 #define LDO_D3318_18V 0x02 +#define DV331812_VDD1 0x04 +#define DV331812_POWERON 0x08 +#define DV331812_POWEROFF 0x00 #define LDO_VCC_CFG0 0xFF72 #define LDO_VCC_LMTVTH_MASK 0x30 #define LDO_VCC_LMTVTH_2A 0x10 +/*RTS5260*/ +#define RTS5260_DVCC_TUNE_MASK 0x70 +#define RTS5260_DVCC_33 0x70 #define LDO_VCC_CFG1 0xFF73 #define LDO_VCC_REF_TUNE_MASK 0x30 @@ -684,6 +724,10 @@ #define LDO_VCC_1V8 0x04 #define LDO_VCC_3V3 0x07 #define LDO_VCC_LMT_EN 0x08 +/*RTS5260*/ +#define LDO_POW_SDVDD1_MASK 0x08 +#define LDO_POW_SDVDD1_ON 0x08 +#define LDO_POW_SDVDD1_OFF 0x00 #define LDO_VIO_CFG 0xFF75 #define LDO_VIO_SR_MASK 0xC0 @@ -711,6 +755,160 @@ #define SD_VIO_LDO_1V8 0x40 #define SD_VIO_LDO_3V3 0x70 +#define RTS5260_AUTOLOAD_CFG4 0xFF7F +#define RTS5260_MIMO_DISABLE 0x8A + +#define RTS5260_REG_GPIO_CTL0 0xFC1A +#define RTS5260_REG_GPIO_MASK 0x01 +#define RTS5260_REG_GPIO_ON 0x01 +#define RTS5260_REG_GPIO_OFF 0x00 + +#define PWR_GLOBAL_CTRL 0xF200 +#define PCIE_L1_2_EN 0x0C +#define PCIE_L1_1_EN 0x0A +#define PCIE_L1_0_EN 0x09 +#define PWR_FE_CTL 0xF201 +#define PCIE_L1_2_PD_FE_EN 0x0C +#define PCIE_L1_1_PD_FE_EN 0x0A +#define PCIE_L1_0_PD_FE_EN 0x09 +#define CFG_PCIE_APHY_OFF_0 0xF204 +#define CFG_PCIE_APHY_OFF_0_DEFAULT 0xBF +#define CFG_PCIE_APHY_OFF_1 0xF205 +#define CFG_PCIE_APHY_OFF_1_DEFAULT 0xFF +#define CFG_PCIE_APHY_OFF_2 0xF206 +#define CFG_PCIE_APHY_OFF_2_DEFAULT 0x01 +#define CFG_PCIE_APHY_OFF_3 0xF207 +#define CFG_PCIE_APHY_OFF_3_DEFAULT 0x00 +#define CFG_L1_0_PCIE_MAC_RET_VALUE 0xF20C +#define CFG_L1_0_PCIE_DPHY_RET_VALUE 0xF20E +#define CFG_L1_0_SYS_RET_VALUE 0xF210 +#define CFG_L1_0_CRC_MISC_RET_VALUE 0xF212 +#define CFG_L1_0_CRC_SD30_RET_VALUE 0xF214 +#define CFG_L1_0_CRC_SD40_RET_VALUE 0xF216 +#define CFG_LP_FPWM_VALUE 0xF219 +#define CFG_LP_FPWM_VALUE_DEFAULT 0x18 +#define PWC_CDR 0xF253 +#define PWC_CDR_DEFAULT 0x03 +#define CFG_L1_0_RET_VALUE_DEFAULT 0x1B +#define CFG_L1_0_CRC_MISC_RET_VALUE_DEFAULT 0x0C + +/* OCPCTL */ +#define SD_DETECT_EN 0x08 +#define SD_OCP_INT_EN 0x04 +#define SD_OCP_INT_CLR 0x02 +#define SD_OC_CLR 0x01 + +#define SDVIO_DETECT_EN (1 << 7) +#define SDVIO_OCP_INT_EN (1 << 6) +#define SDVIO_OCP_INT_CLR (1 << 5) +#define SDVIO_OC_CLR (1 << 4) + +/* OCPSTAT */ +#define SD_OCP_DETECT 0x08 +#define SD_OC_NOW 0x04 +#define SD_OC_EVER 0x02 + +#define SDVIO_OC_NOW (1 << 6) +#define SDVIO_OC_EVER (1 << 5) + +#define REG_OCPCTL 0xFD6A +#define REG_OCPSTAT 0xFD6E +#define REG_OCPGLITCH 0xFD6C +#define REG_OCPPARA1 0xFD6B +#define REG_OCPPARA2 0xFD6D + +/* rts5260 DV3318 OCP-related registers */ +#define REG_DV3318_OCPCTL 0xFD89 +#define DV3318_OCP_TIME_MASK 0xF0 +#define DV3318_DETECT_EN 0x08 +#define DV3318_OCP_INT_EN 0x04 +#define DV3318_OCP_INT_CLR 0x02 +#define DV3318_OCP_CLR 0x01 + +#define REG_DV3318_OCPSTAT 0xFD8A +#define DV3318_OCP_GlITCH_TIME_MASK 0xF0 +#define DV3318_OCP_DETECT 0x08 +#define DV3318_OCP_NOW 0x04 +#define DV3318_OCP_EVER 0x02 + +#define SD_OCP_GLITCH_MASK 0x0F + +/* OCPPARA1 */ +#define SDVIO_OCP_TIME_60 0x00 +#define SDVIO_OCP_TIME_100 0x10 +#define SDVIO_OCP_TIME_200 0x20 +#define SDVIO_OCP_TIME_400 0x30 +#define SDVIO_OCP_TIME_600 0x40 +#define SDVIO_OCP_TIME_800 0x50 +#define SDVIO_OCP_TIME_1100 0x60 +#define SDVIO_OCP_TIME_MASK 0x70 + +#define SD_OCP_TIME_60 0x00 +#define SD_OCP_TIME_100 0x01 +#define SD_OCP_TIME_200 0x02 +#define SD_OCP_TIME_400 0x03 +#define SD_OCP_TIME_600 0x04 +#define SD_OCP_TIME_800 0x05 +#define SD_OCP_TIME_1100 0x06 +#define SD_OCP_TIME_MASK 0x07 + +/* OCPPARA2 */ +#define SDVIO_OCP_THD_190 0x00 +#define SDVIO_OCP_THD_250 0x10 +#define SDVIO_OCP_THD_320 0x20 +#define SDVIO_OCP_THD_380 0x30 +#define SDVIO_OCP_THD_440 0x40 +#define SDVIO_OCP_THD_500 0x50 +#define SDVIO_OCP_THD_570 0x60 +#define SDVIO_OCP_THD_630 0x70 +#define SDVIO_OCP_THD_MASK 0x70 + +#define SD_OCP_THD_450 0x00 +#define SD_OCP_THD_550 0x01 +#define SD_OCP_THD_650 0x02 +#define SD_OCP_THD_750 0x03 +#define SD_OCP_THD_850 0x04 +#define SD_OCP_THD_950 0x05 +#define SD_OCP_THD_1050 0x06 +#define SD_OCP_THD_1150 0x07 +#define SD_OCP_THD_MASK 0x07 + +#define SDVIO_OCP_GLITCH_MASK 0xF0 +#define SDVIO_OCP_GLITCH_NONE 0x00 +#define SDVIO_OCP_GLITCH_50U 0x10 +#define SDVIO_OCP_GLITCH_100U 0x20 +#define SDVIO_OCP_GLITCH_200U 0x30 +#define SDVIO_OCP_GLITCH_600U 0x40 +#define SDVIO_OCP_GLITCH_800U 0x50 +#define SDVIO_OCP_GLITCH_1M 0x60 +#define SDVIO_OCP_GLITCH_2M 0x70 +#define SDVIO_OCP_GLITCH_3M 0x80 +#define SDVIO_OCP_GLITCH_4M 0x90 +#define SDVIO_OCP_GLIVCH_5M 0xA0 +#define SDVIO_OCP_GLITCH_6M 0xB0 +#define SDVIO_OCP_GLITCH_7M 0xC0 +#define SDVIO_OCP_GLITCH_8M 0xD0 +#define SDVIO_OCP_GLITCH_9M 0xE0 +#define SDVIO_OCP_GLITCH_10M 0xF0 + +#define SD_OCP_GLITCH_MASK 0x0F +#define SD_OCP_GLITCH_NONE 0x00 +#define SD_OCP_GLITCH_50U 0x01 +#define SD_OCP_GLITCH_100U 0x02 +#define SD_OCP_GLITCH_200U 0x03 +#define SD_OCP_GLITCH_600U 0x04 +#define SD_OCP_GLITCH_800U 0x05 +#define SD_OCP_GLITCH_1M 0x06 +#define SD_OCP_GLITCH_2M 0x07 +#define SD_OCP_GLITCH_3M 0x08 +#define SD_OCP_GLITCH_4M 0x09 +#define SD_OCP_GLIVCH_5M 0x0A +#define SD_OCP_GLITCH_6M 0x0B +#define SD_OCP_GLITCH_7M 0x0C +#define SD_OCP_GLITCH_8M 0x0D +#define SD_OCP_GLITCH_9M 0x0E +#define SD_OCP_GLITCH_10M 0x0F + /* Phy register */ #define PHY_PCR 0x00 #define PHY_PCR_FORCE_CODE 0xB000 @@ -857,6 +1055,7 @@ #define PCR_ASPM_SETTING_REG1 0x160 #define PCR_ASPM_SETTING_REG2 0x168 +#define PCR_ASPM_SETTING_5260 0x178 #define PCR_SETTING_REG1 0x724 #define PCR_SETTING_REG2 0x814 @@ -890,6 +1089,7 @@ struct pcr_ops { int (*conv_clk_and_div_n)(int clk, int dir); void (*fetch_vendor_settings)(struct rtsx_pcr *pcr); void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state); + void (*stop_cmd)(struct rtsx_pcr *pcr); void (*set_aspm)(struct rtsx_pcr *pcr, bool enable); int (*set_ltr_latency)(struct rtsx_pcr *pcr, u32 latency); @@ -897,6 +1097,12 @@ struct pcr_ops { void (*set_l1off_cfg_sub_d0)(struct rtsx_pcr *pcr, int active); void (*full_on)(struct rtsx_pcr *pcr); void (*power_saving)(struct rtsx_pcr *pcr); + void (*enable_ocp)(struct rtsx_pcr *pcr); + void (*disable_ocp)(struct rtsx_pcr *pcr); + void (*init_ocp)(struct rtsx_pcr *pcr); + void (*process_ocp)(struct rtsx_pcr *pcr); + int (*get_ocpstat)(struct rtsx_pcr *pcr, u8 *val); + void (*clear_ocpstat)(struct rtsx_pcr *pcr); }; enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; @@ -935,6 +1141,9 @@ enum dev_aspm_mode { * @l1_snooze_delay: l1 snooze delay * @ltr_l1off_sspwrgate: ltr l1off sspwrgate * @ltr_l1off_snooze_sspwrgate: ltr l1off snooze sspwrgate + * @ocp_en: enable ocp flag + * @sd_400mA_ocp_thd: 400mA ocp thd + * @sd_800mA_ocp_thd: 800mA ocp thd */ struct rtsx_cr_option { u32 dev_flags; @@ -949,6 +1158,19 @@ struct rtsx_cr_option { u32 l1_snooze_delay; u8 ltr_l1off_sspwrgate; u8 ltr_l1off_snooze_sspwrgate; + bool ocp_en; + u8 sd_400mA_ocp_thd; + u8 sd_800mA_ocp_thd; +}; + +/* + * struct rtsx_hw_param - card reader hardware param + * @interrupt_en: indicate which interrutp enable + * @ocp_glitch: ocp glitch time + */ +struct rtsx_hw_param { + u32 interrupt_en; + u8 ocp_glitch; }; #define rtsx_set_dev_flag(cr, flag) \ @@ -963,6 +1185,7 @@ struct rtsx_pcr { unsigned int id; int pcie_cap; struct rtsx_cr_option option; + struct rtsx_hw_param hw_param; /* pci resources */ unsigned long addr; @@ -1042,12 +1265,15 @@ struct rtsx_pcr { struct rtsx_slot *slots; u8 dma_error_count; + u8 ocp_stat; + u8 ocp_stat2; }; #define PID_524A 0x524A -#define PID_5249 0x5249 -#define PID_5250 0x5250 +#define PID_5249 0x5249 +#define PID_5250 0x5250 #define PID_525A 0x525A +#define PID_5260 0x5260 #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) -- cgit v1.2.3 From 226d7449135ffc62866c06d73b28cac90b3f31e4 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Fri, 24 Nov 2017 18:08:26 +0800 Subject: ASoC: nau8825: disable crosstalk by default The driver makes the crosstalk funciton disabled by default which can simplify the codec function. The platform may not need this funciton and reduce the potential risk. Therefore, We change the property "nuvoton,crosstalk-bypass" to "nuvoton,crosstalk-enable". The crosstalk measurement is enabled if the property is set. Otherwise, it is disabled. Besides, add more condition in the entry point of the crosstalk sequence to disable the function completely. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/nau8825.txt | 4 ++-- sound/soc/codecs/nau8825.c | 23 ++++++++++++---------- sound/soc/codecs/nau8825.h | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/nau8825.txt b/Documentation/devicetree/bindings/sound/nau8825.txt index 2f5e973285a6..d16d96839bcb 100644 --- a/Documentation/devicetree/bindings/sound/nau8825.txt +++ b/Documentation/devicetree/bindings/sound/nau8825.txt @@ -69,7 +69,7 @@ Optional properties: - nuvoton,jack-insert-debounce: number from 0 to 7 that sets debounce time to 2^(n+2) ms - nuvoton,jack-eject-debounce: number from 0 to 7 that sets debounce time to 2^(n+2) ms - - nuvoton,crosstalk-bypass: make crosstalk function bypass if set. + - nuvoton,crosstalk-enable: make crosstalk function enable if set. - clocks: list of phandle and clock specifier pairs according to common clock bindings for the clocks described in clock-names @@ -98,7 +98,7 @@ Example: nuvoton,short-key-debounce = <2>; nuvoton,jack-insert-debounce = <7>; nuvoton,jack-eject-debounce = <7>; - nuvoton,crosstalk-bypass; + nuvoton,crosstalk-enable; clock-names = "mclk"; clocks = <&tegra_car TEGRA210_CLK_CLK_OUT_2>; diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 714ce17da717..d3c1a02f1e15 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -815,11 +815,12 @@ static void nau8825_xtalk_work(struct work_struct *work) static void nau8825_xtalk_cancel(struct nau8825 *nau8825) { - /* If the xtalk_protect is true, that means the process is still - * on going. The driver forces to cancel the cross talk task and + /* If the crosstalk is eanbled and the process is on going, + * the driver forces to cancel the crosstalk task and * restores the configuration to original status. */ - if (nau8825->xtalk_protect) { + if (nau8825->xtalk_enable && nau8825->xtalk_state != + NAU8825_XTALK_DONE) { cancel_work_sync(&nau8825->xtalk_work); nau8825_xtalk_clean(nau8825); } @@ -1686,7 +1687,7 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) } else if (active_irq & NAU8825_HEADSET_COMPLETION_IRQ) { if (nau8825_is_jack_inserted(regmap)) { event |= nau8825_jack_insert(nau8825); - if (!nau8825->xtalk_bypass && !nau8825->high_imped) { + if (nau8825->xtalk_enable && !nau8825->high_imped) { /* Apply the cross talk suppression in the * headset without high impedance. */ @@ -1732,8 +1733,10 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) nau8825->xtalk_event_mask = event_mask; } } else if (active_irq & NAU8825_IMPEDANCE_MEAS_IRQ) { - schedule_work(&nau8825->xtalk_work); - clear_irq = NAU8825_IMPEDANCE_MEAS_IRQ; + if (nau8825->xtalk_enable) { + schedule_work(&nau8825->xtalk_work); + clear_irq = NAU8825_IMPEDANCE_MEAS_IRQ; + } } else if ((active_irq & NAU8825_JACK_INSERTION_IRQ_MASK) == NAU8825_JACK_INSERTION_DETECTED) { /* One more step to check GPIO status directly. Thus, the @@ -2440,8 +2443,8 @@ static void nau8825_print_device_properties(struct nau8825 *nau8825) nau8825->jack_insert_debounce); dev_dbg(dev, "jack-eject-debounce: %d\n", nau8825->jack_eject_debounce); - dev_dbg(dev, "crosstalk-bypass: %d\n", - nau8825->xtalk_bypass); + dev_dbg(dev, "crosstalk-enable: %d\n", + nau8825->xtalk_enable); } static int nau8825_read_device_properties(struct device *dev, @@ -2506,8 +2509,8 @@ static int nau8825_read_device_properties(struct device *dev, &nau8825->jack_eject_debounce); if (ret) nau8825->jack_eject_debounce = 0; - nau8825->xtalk_bypass = device_property_read_bool(dev, - "nuvoton,crosstalk-bypass"); + nau8825->xtalk_enable = device_property_read_bool(dev, + "nuvoton,crosstalk-enable"); nau8825->mclk = devm_clk_get(dev, "mclk"); if (PTR_ERR(nau8825->mclk) == -EPROBE_DEFER) { diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index 8aee5c8647ae..199d6ea4dcdc 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -476,7 +476,7 @@ struct nau8825 { int xtalk_event_mask; bool xtalk_protect; int imp_rms[NAU8825_XTALK_IMM]; - int xtalk_bypass; + int xtalk_enable; }; int nau8825_enable_jack_detect(struct snd_soc_codec *codec, -- cgit v1.2.3 From c997a92a78161af605b314cbe6cf636663999652 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 22 Nov 2017 00:55:14 +0100 Subject: ASoC: fsl_ssi: call _fsl_ssi_set_dai_fmt() just once in AC'97 mode In AC'97 mode we configure and start SSI RX / TX on probe path via a call to _fsl_ssi_set_dai_fmt() function. We don't need to call this function again later and in fact don't want to do it since this function temporarily sets STCR, SRCR and SCR to some intermediate values. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 9e97a0529f37..939d1b8894dc 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1079,6 +1079,9 @@ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) { struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); + if (fsl_ssi_is_ac97(ssi_private)) + return 0; + return _fsl_ssi_set_dai_fmt(cpu_dai->dev, ssi_private, fmt); } -- cgit v1.2.3 From 01ca485171e3253f3aee555437519c0d316d4b0c Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 22 Nov 2017 00:54:26 +0100 Subject: ASoC: fsl_ssi: only enable proper channel slots in AC'97 mode We need to make sure that only proper channel slots (in SACCST register) are enabled at playback start time since some AC'97 CODECs (like VT1613 on UDOO board) were observed requesting via SLOTREQ spurious ones just after an AC'97 link is started but before the CODEC is configured by its driver. When a bit for some channel slot is set in a SLOTREQ request then SSI sets the relevant bit in SACCST automatically, which then 'sticks' until it is manually unset. The SACCST register is not writable directly, we have to use SACCDIS and SACCEN registers to configure it instead (these aren't normal registers: writing a '1' bit at some position in SACCEN sets the relevant bit in SACCST; SACCDIS operates in a similar way but allows unsetting bits in SACCST). Theoretically, this should be necessary only for the very first playback but since some CODECs are so untrustworthy and extra channel slots enabled mean ruined playback let's play safe here and make sure that no extra slots are enabled in SACCST every time a playback is started. Signed-off-by: Maciej S. Szmigiero Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 52 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 939d1b8894dc..20ef09e1a395 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -577,8 +577,54 @@ static void fsl_ssi_rx_config(struct fsl_ssi_private *ssi_private, bool enable) fsl_ssi_config(ssi_private, enable, &ssi_private->rxtx_reg_val.rx); } +static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi_private *ssi_private) +{ + struct regmap *regs = ssi_private->regs; + + /* no SACC{ST,EN,DIS} regs on imx21-class SSI */ + if (!ssi_private->soc->imx21regs) { + /* + * Note that these below aren't just normal registers. + * They are a way to disable or enable bits in SACCST + * register: + * - writing a '1' bit at some position in SACCEN sets the + * relevant bit in SACCST, + * - writing a '1' bit at some position in SACCDIS unsets + * the relevant bit in SACCST register. + * + * The two writes below first disable all channels slots, + * then enable just slots 3 & 4 ("PCM Playback Left Channel" + * and "PCM Playback Right Channel"). + */ + regmap_write(regs, CCSR_SSI_SACCDIS, 0xff); + regmap_write(regs, CCSR_SSI_SACCEN, 0x300); + } +} + static void fsl_ssi_tx_config(struct fsl_ssi_private *ssi_private, bool enable) { + /* + * Why are we setting up SACCST everytime we are starting a + * playback? + * Some CODECs (like VT1613 CODEC on UDOO board) like to + * (sometimes) set extra bits in their SLOTREQ requests. + * When a bit is set in a SLOTREQ request then SSI sets the + * relevant bit in SACCST automatically (it is enough if a bit was + * set in a SLOTREQ just once, bits in SACCST are 'sticky'). + * If an extra slot gets enabled that's a disaster for playback + * because some of normal left or right channel samples are + * redirected instead to this extra slot. + * + * A workaround implemented in fsl-asoc-card of setting an + * appropriate CODEC register so that slots 3 & 4 (the normal + * stereo playback slots) are used for S/PDIF seems to mostly fix + * this issue on the UDOO board but since this CODEC is so + * untrustworthy let's play safe here and make sure that no extra + * slots are enabled every time a playback is started. + */ + if (enable && fsl_ssi_is_ac97(ssi_private)) + fsl_ssi_tx_ac97_saccst_setup(ssi_private); + fsl_ssi_config(ssi_private, enable, &ssi_private->rxtx_reg_val.tx); } @@ -633,12 +679,6 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi_private *ssi_private) regmap_write(regs, CCSR_SSI_SACNT, CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV); - /* no SACC{ST,EN,DIS} regs on imx21-class SSI */ - if (!ssi_private->soc->imx21regs) { - regmap_write(regs, CCSR_SSI_SACCDIS, 0xff); - regmap_write(regs, CCSR_SSI_SACCEN, 0x300); - } - /* * Enable SSI, Transmit and Receive. AC97 has to communicate with the * codec before a stream is started. -- cgit v1.2.3 From b89b6925bb9d48926d7ba713d3f13b14fc35c544 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 16 Nov 2017 11:55:18 -0800 Subject: ASoC: fsl_asrc: Fix typo in a field define ASRFSTi_IAEi has an 11-bit offset as its _SHIFT macro defines. So this patch just fixes that. Reported-by: Laurent Charpentier Signed-off-by: Nicolin Chen Reviewed-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h index 0f163abe4ba3..52c27a358933 100644 --- a/sound/soc/fsl/fsl_asrc.h +++ b/sound/soc/fsl/fsl_asrc.h @@ -260,8 +260,8 @@ #define ASRFSTi_OUTPUT_FIFO_SHIFT 12 #define ASRFSTi_OUTPUT_FIFO_MASK (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT) #define ASRFSTi_IAEi_SHIFT 11 -#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_OAFi_SHIFT) -#define ASRFSTi_IAEi (1 << ASRFSTi_OAFi_SHIFT) +#define ASRFSTi_IAEi_MASK (1 << ASRFSTi_IAEi_SHIFT) +#define ASRFSTi_IAEi (1 << ASRFSTi_IAEi_SHIFT) #define ASRFSTi_INPUT_FIFO_WIDTH 7 #define ASRFSTi_INPUT_FIFO_SHIFT 0 #define ASRFSTi_INPUT_FIFO_MASK ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1) -- cgit v1.2.3 From 2befc01bf7358ab635ca362f3f5ba37d99ef31c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 19 Nov 2017 06:09:06 +0100 Subject: MAINTAINERS: regulator: Add Documentation/power/regulator/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..a0d4e85d511a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14629,6 +14629,7 @@ W: http://www.slimlogic.co.uk/?p=48 T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git S: Supported F: Documentation/devicetree/bindings/regulator/ +F: Documentation/power/regulator/ F: drivers/regulator/ F: include/dt-bindings/regulator/ F: include/linux/regulator/ -- cgit v1.2.3 From 983ba99a8f275aae9614a30469d0f480550fbc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 19 Nov 2017 06:09:07 +0100 Subject: regulator: Update code examples in documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This involves using the REGULATOR_SUPPLY initializer macro and reindenting some of the code. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Mark Brown --- Documentation/power/regulator/machine.txt | 36 ++++++++++++++----------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt index 757e3b53dc11..eff4dcaaa252 100644 --- a/Documentation/power/regulator/machine.txt +++ b/Documentation/power/regulator/machine.txt @@ -23,16 +23,12 @@ struct regulator_consumer_supply { e.g. for the machine above static struct regulator_consumer_supply regulator1_consumers[] = { -{ - .dev_name = "dev_name(consumer B)", - .supply = "Vcc", -},}; + REGULATOR_SUPPLY("Vcc", "consumer B"), +}; static struct regulator_consumer_supply regulator2_consumers[] = { -{ - .dev = "dev_name(consumer A"), - .supply = "Vcc", -},}; + REGULATOR_SUPPLY("Vcc", "consumer A"), +}; This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2 to the 'Vcc' supply for Consumer A. @@ -78,20 +74,20 @@ static struct regulator_init_data regulator2_data = { Finally the regulator devices must be registered in the usual manner. static struct platform_device regulator_devices[] = { -{ - .name = "regulator", - .id = DCDC_1, - .dev = { - .platform_data = ®ulator1_data, + { + .name = "regulator", + .id = DCDC_1, + .dev = { + .platform_data = ®ulator1_data, + }, }, -}, -{ - .name = "regulator", - .id = DCDC_2, - .dev = { - .platform_data = ®ulator2_data, + { + .name = "regulator", + .id = DCDC_2, + .dev = { + .platform_data = ®ulator2_data, + }, }, -}, }; /* register regulator 1 device */ platform_device_register(®ulator_devices[0]); -- cgit v1.2.3 From a1a68fcaf165a6ed202d8e29a692c559e10106c4 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 20 Nov 2017 15:27:28 +0800 Subject: regmap: Remove the redundant config to select hwspinlock The hwspinlock was changed to a bool by commit d048236dfdfe ("hwspinlock: Change hwspinlock to a bool"), so we do not need the REGMAP_HWSPINLOCK config to select hwspinlock or not. Signed-off-by: Baolin Wang Signed-off-by: Mark Brown --- drivers/base/regmap/Kconfig | 4 ---- drivers/base/regmap/regmap.c | 11 ++--------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index 3a1535d812d8..0368fd7b3a41 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -6,7 +6,6 @@ config REGMAP default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ) select IRQ_DOMAIN if REGMAP_IRQ - select REGMAP_HWSPINLOCK if HWSPINLOCK=y bool config REGCACHE_COMPRESSED @@ -38,6 +37,3 @@ config REGMAP_MMIO config REGMAP_IRQ bool - -config REGMAP_HWSPINLOCK - bool diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 8d516a9bfc01..f25ab18ca057 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -414,7 +414,6 @@ static unsigned int regmap_parse_64_native(const void *buf) } #endif -#ifdef REGMAP_HWSPINLOCK static void regmap_lock_hwlock(void *__map) { struct regmap *map = __map; @@ -457,7 +456,6 @@ static void regmap_unlock_hwlock_irqrestore(void *__map) hwspin_unlock_irqrestore(map->hwlock, &map->spinlock_flags); } -#endif static void regmap_lock_mutex(void *__map) { @@ -674,7 +672,6 @@ struct regmap *__regmap_init(struct device *dev, map->unlock = config->unlock; map->lock_arg = config->lock_arg; } else if (config->hwlock_id) { -#ifdef REGMAP_HWSPINLOCK map->hwlock = hwspin_lock_request_specific(config->hwlock_id); if (!map->hwlock) { ret = -ENXIO; @@ -697,10 +694,6 @@ struct regmap *__regmap_init(struct device *dev, } map->lock_arg = map; -#else - ret = -EINVAL; - goto err_map; -#endif } else { if ((bus && bus->fast_io) || config->fast_io) { @@ -1116,7 +1109,7 @@ err_range: regmap_range_exit(map); kfree(map->work_buf); err_hwlock: - if (IS_ENABLED(REGMAP_HWSPINLOCK) && map->hwlock) + if (map->hwlock) hwspin_lock_free(map->hwlock); err_map: kfree(map); @@ -1305,7 +1298,7 @@ void regmap_exit(struct regmap *map) kfree(async->work_buf); kfree(async); } - if (IS_ENABLED(REGMAP_HWSPINLOCK) && map->hwlock) + if (map->hwlock) hwspin_lock_free(map->hwlock); kfree(map); } -- cgit v1.2.3 From 494665a0f91b51443ca14ee1dca6d96cfd003b38 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 20 Nov 2017 02:45:36 +0000 Subject: ASoC: soc-utils: add const to dummy_codec Let's align dummy_platform and dummy_codec Signed-off-by: Kuninori Morimoto Reviewed-by: Simon Horman Signed-off-by: Mark Brown --- sound/soc/soc-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index e30aacbcfc29..bcd3da2739e2 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -288,7 +288,7 @@ static const struct snd_soc_platform_driver dummy_platform = { .ops = &dummy_dma_ops, }; -static struct snd_soc_codec_driver dummy_codec; +static const struct snd_soc_codec_driver dummy_codec; #define STUB_RATES SNDRV_PCM_RATE_8000_192000 #define STUB_FORMATS (SNDRV_PCM_FMTBIT_S8 | \ -- cgit v1.2.3 From 10582635dc8f4f99448c5dcddac38cc18a72dfde Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Mon, 27 Nov 2017 23:34:44 +0100 Subject: ASoC: fsl_ssi: add 20-bit sample format for AC'97 and use it for capture When testing AC'97 capture on UDOO board (currently the only user of fsl_ssi driver in the AC'97 mode) it become obvious that there is a massive distortion above certain, small input signal. This problem has been traced to silicon errata ERR003778: "In AC97, 16-bit mode, received data is shifted by 4-bit locations" that has "No fix scheduled". This errata suggests a workaround of doing a 4-bit shift back in SDMA script for this specific operation mode, however our SDMA scripts are shared between various SoC peripherals so we can't really modify them. There is a simple way to avoid this problem, however, that is to disallow recording in 16-bit mode and only support it in AC'97-native 20-bit mode. We have to use a 4-byte format for this since SSI FIFOs do not allow 3-byte accesses (and these aren't supported by imx-sdma driver anyway). With this change the capture distortion is gone. We can also add this format as an additional one supported for playback, using this opportunity to make sure that we use CPU-endian-native formats in AC'97 mode as we already do in I2S mode. There is no problem in using different bit widths in playback and capture in AC'97 mode so allow this, too. Signed-off-by: Maciej S. Szmigiero Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 20ef09e1a395..c350117c8e31 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1278,14 +1278,15 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE, + .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20, }, .capture = { .stream_name = "AC97 Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_48000, - .formats = SNDRV_PCM_FMTBIT_S16_LE, + /* 16-bit capture is broken (errata ERR003778) */ + .formats = SNDRV_PCM_FMTBIT_S20, }, .ops = &fsl_ssi_dai_ops, }; @@ -1557,11 +1558,12 @@ static int fsl_ssi_probe(struct platform_device *pdev) /* Are the RX and the TX clocks locked? */ if (!of_find_property(np, "fsl,ssi-asynchronous", NULL)) { - if (!fsl_ssi_is_ac97(ssi_private)) + if (!fsl_ssi_is_ac97(ssi_private)) { ssi_private->cpu_dai_drv.symmetric_rates = 1; + ssi_private->cpu_dai_drv.symmetric_samplebits = 1; + } ssi_private->cpu_dai_drv.symmetric_channels = 1; - ssi_private->cpu_dai_drv.symmetric_samplebits = 1; } /* Determine the FIFO depth. */ -- cgit v1.2.3 From 5c9afbda911ce20b3f2181d1e440a0222e1027dd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 17 Nov 2017 22:37:53 +0100 Subject: dmaengine: ioat: Fix error handling path If the last test in 'ioat_dma_self_test()' fails, we must release all the allocated resources and not just part of them. Signed-off-by: Christophe JAILLET Acked-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 2f31d3d0caa6..7792a9186f9c 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) if (memcmp(src, dest, IOAT_TEST_SIZE)) { dev_err(dev, "Self-test copy failed compare, disabling\n"); err = -ENODEV; - goto free_resources; + goto unmap_dma; } unmap_dma: -- cgit v1.2.3 From 62a277d43d47e74972de44d33bd3763e31992414 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 20 Nov 2017 08:28:14 -0600 Subject: dmaengine: at_hdmac: fix potential NULL pointer dereference in atc_prep_dma_interleaved _xt_ is being dereferenced before it is null checked, hence there is a potential null pointer dereference. Fix this by moving the pointer dereference after _xt_ has been null checked. This issue was detected with the help of Coccinelle. Fixes: 4483320e241c ("dmaengine: Use Pointer xt after NULL check.") Signed-off-by: Gustavo A. R. Silva Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fbab271b3bf9..a861b5b4d443 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan, unsigned long flags) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct data_chunk *first = xt->sgl; + struct data_chunk *first; struct at_desc *desc = NULL; size_t xfer_count; unsigned int dwidth; @@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan, if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) return NULL; + first = xt->sgl; + dev_info(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", __func__, &xt->src_start, &xt->dst_start, xt->numf, -- cgit v1.2.3 From bc2bd45b1f7f35b80335367f682c0ae5b2f37911 Mon Sep 17 00:00:00 2001 From: Sriram Periyasamy Date: Wed, 22 Nov 2017 17:39:46 +0530 Subject: ASoC: Intel: Skylake: Parse nhlt and register clock device When NHLT endpoint is present for a SSP then we create clock for that SSP. MCLK is consistent across endpoints and configuration for an SSP, so query only for first endpoint for an SSP. For SCLK/SCLKFS, the best fit is queried from the NHLT configurations which matches the clock rate requested. Best fit is decided based on below: 1. If rate matches with multiple configurations, then the first configuration is selected. 2. If for a selected fs and bits_per_sample, there are multiple endpoint configuration match, then the configuration with max number of channels is selected. So, the user has to set the rate which fits max number of channels So we create a platform device and pass clock information parsed as platform data. Signed-off-by: Sriram Periyasamy Signed-off-by: Jaikrishna Nemallapudi Signed-off-by: Subhransu S. Prusty Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-i2s.h | 64 ++++++++++++++ sound/soc/intel/skylake/skl-nhlt.c | 155 ++++++++++++++++++++++++++++++++++ sound/soc/intel/skylake/skl-ssp-clk.h | 79 +++++++++++++++++ sound/soc/intel/skylake/skl.c | 93 ++++++++++++++++++++ sound/soc/intel/skylake/skl.h | 4 + 5 files changed, 395 insertions(+) create mode 100644 sound/soc/intel/skylake/skl-i2s.h create mode 100644 sound/soc/intel/skylake/skl-ssp-clk.h diff --git a/sound/soc/intel/skylake/skl-i2s.h b/sound/soc/intel/skylake/skl-i2s.h new file mode 100644 index 000000000000..dcf819bc688f --- /dev/null +++ b/sound/soc/intel/skylake/skl-i2s.h @@ -0,0 +1,64 @@ +/* + * skl-i2s.h - i2s blob mapping + * + * Copyright (C) 2017 Intel Corp + * Author: Subhransu S. Prusty < subhransu.s.prusty@intel.com> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#ifndef __SOUND_SOC_SKL_I2S_H +#define __SOUND_SOC_SKL_I2S_H + +#define SKL_I2S_MAX_TIME_SLOTS 8 +#define SKL_MCLK_DIV_CLK_SRC_MASK GENMASK(17, 16) + +#define SKL_MNDSS_DIV_CLK_SRC_MASK GENMASK(21, 20) +#define SKL_SHIFT(x) (ffs(x) - 1) +#define SKL_MCLK_DIV_RATIO_MASK GENMASK(11, 0) + +struct skl_i2s_config { + u32 ssc0; + u32 ssc1; + u32 sscto; + u32 sspsp; + u32 sstsa; + u32 ssrsa; + u32 ssc2; + u32 sspsp2; + u32 ssc3; + u32 ssioc; +} __packed; + +struct skl_i2s_config_mclk { + u32 mdivctrl; + u32 mdivr; +}; + +/** + * struct skl_i2s_config_blob_legacy - Structure defines I2S Gateway + * configuration legacy blob + * + * @gtw_attr: Gateway attribute for the I2S Gateway + * @tdm_ts_group: TDM slot mapping against channels in the Gateway. + * @i2s_cfg: I2S HW registers + * @mclk: MCLK clock source and divider values + */ +struct skl_i2s_config_blob_legacy { + u32 gtw_attr; + u32 tdm_ts_group[SKL_I2S_MAX_TIME_SLOTS]; + struct skl_i2s_config i2s_cfg; + struct skl_i2s_config_mclk mclk; +}; + +#endif /* __SOUND_SOC_SKL_I2S_H */ diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index e7d766d56c8e..4d2136c0389a 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -19,6 +19,7 @@ */ #include #include "skl.h" +#include "skl-i2s.h" /* Unique identification for getting NHLT blobs */ static guid_t osc_guid = @@ -262,3 +263,157 @@ void skl_nhlt_remove_sysfs(struct skl *skl) sysfs_remove_file(&dev->kobj, &dev_attr_platform_id.attr); } + +/* + * Queries NHLT for all the fmt configuration for a particular endpoint and + * stores all possible rates supported in a rate table for the corresponding + * sclk/sclkfs. + */ +void skl_get_ssp_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks, + struct nhlt_fmt *fmt, u8 id) +{ + struct skl_i2s_config_blob_legacy *i2s_config; + struct skl_clk_parent_src *parent; + struct skl_ssp_clk *sclk, *sclkfs; + struct nhlt_fmt_cfg *fmt_cfg; + struct wav_fmt_ext *wav_fmt; + unsigned long rate = 0; + bool present = false; + int rate_index = 0; + u16 channels, bps; + u8 clk_src; + int i, j; + u32 fs; + + sclk = &ssp_clks[SKL_SCLK_OFS]; + sclkfs = &ssp_clks[SKL_SCLKFS_OFS]; + + if (fmt->fmt_count == 0) + return; + + for (i = 0; i < fmt->fmt_count; i++) { + fmt_cfg = &fmt->fmt_config[i]; + wav_fmt = &fmt_cfg->fmt_ext; + + channels = wav_fmt->fmt.channels; + bps = wav_fmt->fmt.bits_per_sample; + fs = wav_fmt->fmt.samples_per_sec; + + /* + * In case of TDM configuration on a ssp, there can + * be more than one blob in which channel masks are + * different for each usecase for a specific rate and bps. + * But the sclk rate will be generated for the total + * number of channels used for that endpoint. + * + * So for the given fs and bps, choose blob which has + * the superset of all channels for that endpoint and + * derive the rate. + */ + for (j = i; j < fmt->fmt_count; j++) { + fmt_cfg = &fmt->fmt_config[j]; + wav_fmt = &fmt_cfg->fmt_ext; + if ((fs == wav_fmt->fmt.samples_per_sec) && + (bps == wav_fmt->fmt.bits_per_sample)) + channels = max_t(u16, channels, + wav_fmt->fmt.channels); + } + + rate = channels * bps * fs; + + /* check if the rate is added already to the given SSP's sclk */ + for (j = 0; (sclk[id].rate_cfg[j].rate != 0) && + (j < SKL_MAX_CLK_RATES); j++) { + if (sclk[id].rate_cfg[j].rate == rate) { + present = true; + break; + } + } + + /* Fill rate and parent for sclk/sclkfs */ + if (!present) { + /* MCLK Divider Source Select */ + i2s_config = (struct skl_i2s_config_blob_legacy *) + fmt->fmt_config[0].config.caps; + clk_src = ((i2s_config->mclk.mdivctrl) + & SKL_MNDSS_DIV_CLK_SRC_MASK) >> + SKL_SHIFT(SKL_MNDSS_DIV_CLK_SRC_MASK); + + parent = skl_get_parent_clk(clk_src); + + /* + * Do not copy the config data if there is no parent + * clock available for this clock source select + */ + if (!parent) + continue; + + sclk[id].rate_cfg[rate_index].rate = rate; + sclk[id].rate_cfg[rate_index].config = fmt_cfg; + sclkfs[id].rate_cfg[rate_index].rate = rate; + sclkfs[id].rate_cfg[rate_index].config = fmt_cfg; + sclk[id].parent_name = parent->name; + sclkfs[id].parent_name = parent->name; + + rate_index++; + } + } +} + +void skl_get_mclk(struct skl *skl, struct skl_ssp_clk *mclk, + struct nhlt_fmt *fmt, u8 id) +{ + struct skl_i2s_config_blob_legacy *i2s_config; + struct nhlt_specific_cfg *fmt_cfg; + struct skl_clk_parent_src *parent; + u32 clkdiv, div_ratio; + u8 clk_src; + + fmt_cfg = &fmt->fmt_config[0].config; + i2s_config = (struct skl_i2s_config_blob_legacy *)fmt_cfg->caps; + + /* MCLK Divider Source Select */ + clk_src = ((i2s_config->mclk.mdivctrl) & SKL_MCLK_DIV_CLK_SRC_MASK) >> + SKL_SHIFT(SKL_MCLK_DIV_CLK_SRC_MASK); + + clkdiv = i2s_config->mclk.mdivr & SKL_MCLK_DIV_RATIO_MASK; + + /* bypass divider */ + div_ratio = 1; + + if (clkdiv != SKL_MCLK_DIV_RATIO_MASK) + /* Divider is 2 + clkdiv */ + div_ratio = clkdiv + 2; + + /* Calculate MCLK rate from source using div value */ + parent = skl_get_parent_clk(clk_src); + if (!parent) + return; + + mclk[id].rate_cfg[0].rate = parent->rate/div_ratio; + mclk[id].rate_cfg[0].config = &fmt->fmt_config[0]; + mclk[id].parent_name = parent->name; +} + +void skl_get_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks) +{ + struct nhlt_acpi_table *nhlt = (struct nhlt_acpi_table *)skl->nhlt; + struct nhlt_endpoint *epnt; + struct nhlt_fmt *fmt; + int i; + u8 id; + + epnt = (struct nhlt_endpoint *)nhlt->desc; + for (i = 0; i < nhlt->endpoint_count; i++) { + if (epnt->linktype == NHLT_LINK_SSP) { + id = epnt->virtual_bus_id; + + fmt = (struct nhlt_fmt *)(epnt->config.caps + + epnt->config.size); + + skl_get_ssp_clks(skl, ssp_clks, fmt, id); + skl_get_mclk(skl, ssp_clks, fmt, id); + } + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); + } +} diff --git a/sound/soc/intel/skylake/skl-ssp-clk.h b/sound/soc/intel/skylake/skl-ssp-clk.h new file mode 100644 index 000000000000..c9ea84004260 --- /dev/null +++ b/sound/soc/intel/skylake/skl-ssp-clk.h @@ -0,0 +1,79 @@ +/* + * skl-ssp-clk.h - Skylake ssp clock information and ipc structure + * + * Copyright (C) 2017 Intel Corp + * Author: Jaikrishna Nemallapudi + * Author: Subhransu S. Prusty + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#ifndef SOUND_SOC_SKL_SSP_CLK_H +#define SOUND_SOC_SKL_SSP_CLK_H + +#define SKL_MAX_SSP 6 +/* xtal/cardinal/pll, parent of ssp clocks and mclk */ +#define SKL_MAX_CLK_SRC 3 +#define SKL_MAX_SSP_CLK_TYPES 3 /* mclk, sclk, sclkfs */ + +#define SKL_MAX_CLK_CNT (SKL_MAX_SSP * SKL_MAX_SSP_CLK_TYPES) + +/* Max number of configurations supported for each clock */ +#define SKL_MAX_CLK_RATES 10 + +#define SKL_SCLK_OFS SKL_MAX_SSP +#define SKL_SCLKFS_OFS (SKL_SCLK_OFS + SKL_MAX_SSP) + +enum skl_clk_type { + SKL_MCLK, + SKL_SCLK, + SKL_SCLK_FS, +}; + +enum skl_clk_src_type { + SKL_XTAL, + SKL_CARDINAL, + SKL_PLL, +}; + +struct skl_clk_parent_src { + u8 clk_id; + const char *name; + unsigned long rate; + const char *parent_name; +}; + +struct skl_clk_rate_cfg_table { + unsigned long rate; + void *config; +}; + +/* + * rate for mclk will be in rates[0]. For sclk and sclkfs, rates[] store + * all possible clocks ssp can generate for that platform. + */ +struct skl_ssp_clk { + const char *name; + const char *parent_name; + struct skl_clk_rate_cfg_table rate_cfg[SKL_MAX_CLK_RATES]; +}; + +struct skl_clk_pdata { + struct skl_clk_parent_src *parent_clks; + int num_clks; + struct skl_ssp_clk *ssp_clks; + void *pvt_data; +}; + +#endif /* SOUND_SOC_SKL_SSP_CLK_H */ diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index acb0ab470ca6..63e5456ef401 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -436,6 +436,23 @@ static int skl_free(struct hdac_ext_bus *ebus) return 0; } +/* + * For each ssp there are 3 clocks (mclk/sclk/sclkfs). + * e.g. for ssp0, clocks will be named as + * "ssp0_mclk", "ssp0_sclk", "ssp0_sclkfs" + * So for skl+, there are 6 ssps, so 18 clocks will be created. + */ +static struct skl_ssp_clk skl_ssp_clks[] = { + {.name = "ssp0_mclk"}, {.name = "ssp1_mclk"}, {.name = "ssp2_mclk"}, + {.name = "ssp3_mclk"}, {.name = "ssp4_mclk"}, {.name = "ssp5_mclk"}, + {.name = "ssp0_sclk"}, {.name = "ssp1_sclk"}, {.name = "ssp2_sclk"}, + {.name = "ssp3_sclk"}, {.name = "ssp4_sclk"}, {.name = "ssp5_sclk"}, + {.name = "ssp0_sclkfs"}, {.name = "ssp1_sclkfs"}, + {.name = "ssp2_sclkfs"}, + {.name = "ssp3_sclkfs"}, {.name = "ssp4_sclkfs"}, + {.name = "ssp5_sclkfs"}, +}; + static int skl_machine_device_register(struct skl *skl, void *driver_data) { struct hdac_bus *bus = ebus_to_hbus(&skl->ebus); @@ -510,6 +527,74 @@ static void skl_dmic_device_unregister(struct skl *skl) platform_device_unregister(skl->dmic_dev); } +static struct skl_clk_parent_src skl_clk_src[] = { + { .clk_id = SKL_XTAL, .name = "xtal" }, + { .clk_id = SKL_CARDINAL, .name = "cardinal", .rate = 24576000 }, + { .clk_id = SKL_PLL, .name = "pll", .rate = 96000000 }, +}; + +struct skl_clk_parent_src *skl_get_parent_clk(u8 clk_id) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(skl_clk_src); i++) { + if (skl_clk_src[i].clk_id == clk_id) + return &skl_clk_src[i]; + } + + return NULL; +} + +void init_skl_xtal_rate(int pci_id) +{ + switch (pci_id) { + case 0x9d70: + case 0x9d71: + skl_clk_src[0].rate = 24000000; + return; + + default: + skl_clk_src[0].rate = 19200000; + return; + } +} + +static int skl_clock_device_register(struct skl *skl) +{ + struct platform_device_info pdevinfo = {NULL}; + struct skl_clk_pdata *clk_pdata; + + clk_pdata = devm_kzalloc(&skl->pci->dev, sizeof(*clk_pdata), + GFP_KERNEL); + if (!clk_pdata) + return -ENOMEM; + + init_skl_xtal_rate(skl->pci->device); + + clk_pdata->parent_clks = skl_clk_src; + clk_pdata->ssp_clks = skl_ssp_clks; + clk_pdata->num_clks = ARRAY_SIZE(skl_ssp_clks); + + /* Query NHLT to fill the rates and parent */ + skl_get_clks(skl, clk_pdata->ssp_clks); + clk_pdata->pvt_data = skl; + + /* Register Platform device */ + pdevinfo.parent = &skl->pci->dev; + pdevinfo.id = -1; + pdevinfo.name = "skl-ssp-clk"; + pdevinfo.data = clk_pdata; + pdevinfo.size_data = sizeof(*clk_pdata); + skl->clk_dev = platform_device_register_full(&pdevinfo); + return PTR_ERR_OR_ZERO(skl->clk_dev); +} + +static void skl_clock_device_unregister(struct skl *skl) +{ + if (skl->clk_dev) + platform_device_unregister(skl->clk_dev); +} + /* * Probe the given codec address */ @@ -792,6 +877,11 @@ static int skl_probe(struct pci_dev *pci, /* check if dsp is there */ if (bus->ppcap) { + /* create device for dsp clk */ + err = skl_clock_device_register(skl); + if (err < 0) + goto out_clk_free; + err = skl_machine_device_register(skl, (void *)pci_id->driver_data); if (err < 0) @@ -823,6 +913,8 @@ out_dsp_free: skl_free_dsp(skl); out_mach_free: skl_machine_device_unregister(skl); +out_clk_free: + skl_clock_device_unregister(skl); out_nhlt_free: skl_nhlt_free(skl->nhlt); out_free: @@ -873,6 +965,7 @@ static void skl_remove(struct pci_dev *pci) skl_free_dsp(skl); skl_machine_device_unregister(skl); skl_dmic_device_unregister(skl); + skl_clock_device_unregister(skl); skl_nhlt_remove_sysfs(skl); skl_nhlt_free(skl->nhlt); skl_free(ebus); diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h index e00cde8200dd..554ad6b5a823 100644 --- a/sound/soc/intel/skylake/skl.h +++ b/sound/soc/intel/skylake/skl.h @@ -25,6 +25,7 @@ #include #include #include "skl-nhlt.h" +#include "skl-ssp-clk.h" #define SKL_SUSPEND_DELAY 2000 @@ -52,6 +53,7 @@ struct skl { unsigned int init_done:1; /* delayed init status */ struct platform_device *dmic_dev; struct platform_device *i2s_dev; + struct platform_device *clk_dev; struct snd_soc_platform *platform; struct snd_soc_dai_driver *dais; @@ -125,6 +127,8 @@ const struct skl_dsp_ops *skl_get_dsp_ops(int pci_id); void skl_update_d0i3c(struct device *dev, bool enable); int skl_nhlt_create_sysfs(struct skl *skl); void skl_nhlt_remove_sysfs(struct skl *skl); +void skl_get_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks); +struct skl_clk_parent_src *skl_get_parent_clk(u8 clk_id); struct skl_module_cfg; -- cgit v1.2.3 From ea261bd02a671e2dd60380053dddffedab81644d Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 21 Nov 2017 17:15:45 +0000 Subject: ASoC: intel: byt: Introduce new map for dual mics The RT5651 codec has 3 analog inputs. Some laptops have two different internal analog microphones on the external case. Add a new custom quirk mapping the two internal mics on IN1P / IN2P, leaving the headset mic on IN3P. Signed-off-by: Carlo Caione Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index d955836c6870..e3d5e6ea707f 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -38,6 +38,7 @@ enum { BYT_RT5651_DMIC_MAP, BYT_RT5651_IN1_MAP, BYT_RT5651_IN2_MAP, + BYT_RT5651_IN1_IN2_MAP, }; #define BYT_RT5651_MAP(quirk) ((quirk) & GENMASK(7, 0)) @@ -171,6 +172,13 @@ static const struct snd_soc_dapm_route byt_rt5651_intmic_in2_map[] = { {"IN2P", NULL, "Internal Mic"}, }; +static const struct snd_soc_dapm_route byt_rt5651_intmic_in1_in2_map[] = { + {"Internal Mic", NULL, "micbias1"}, + {"IN1P", NULL, "Internal Mic"}, + {"IN2P", NULL, "Internal Mic"}, + {"IN3P", NULL, "Headset Mic"}, +}; + static const struct snd_kcontrol_new byt_rt5651_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), @@ -281,6 +289,10 @@ static int byt_rt5651_init(struct snd_soc_pcm_runtime *runtime) custom_map = byt_rt5651_intmic_in2_map; num_routes = ARRAY_SIZE(byt_rt5651_intmic_in2_map); break; + case BYT_RT5651_IN1_IN2_MAP: + custom_map = byt_rt5651_intmic_in1_in2_map; + num_routes = ARRAY_SIZE(byt_rt5651_intmic_in1_in2_map); + break; default: custom_map = byt_rt5651_intmic_dmic_map; num_routes = ARRAY_SIZE(byt_rt5651_intmic_dmic_map); -- cgit v1.2.3 From 56fa898be862053327b2ff8abfa0a6e7f350f81d Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 21 Nov 2017 17:15:46 +0000 Subject: ASoC: intel: byt: Fix quirk for KIANO laptop This laptop has actually two different analog mics, no just one. Fix the quirk to reflect the correct configuration. Signed-off-by: Carlo Caione Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index e3d5e6ea707f..488ec48f296a 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -264,7 +264,7 @@ static const struct dmi_system_id byt_rt5651_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "KIANO"), DMI_MATCH(DMI_PRODUCT_NAME, "KIANO SlimNote 14.2"), }, - .driver_data = (void *)(BYT_RT5651_IN2_MAP), + .driver_data = (void *)(BYT_RT5651_IN1_IN2_MAP), }, {} }; -- cgit v1.2.3 From 15d8374874ded0bec37ef27f8301a6d54032c0e5 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 14 Nov 2017 14:43:27 +0000 Subject: mfd: cros ec: spi: Don't send first message too soon On the Tegra124 Nyan-Big chromebook the very first SPI message sent to the EC is failing. The Tegra SPI driver configures the SPI chip-selects to be active-high by default (and always has for many years). The EC SPI requires an active-low chip-select and so the Tegra chip-select is reconfigured to be active-low when the EC SPI driver calls spi_setup(). The problem is that if the first SPI message to the EC is sent too soon after reconfiguring the SPI chip-select, it fails. The EC SPI driver prevents back-to-back SPI messages being sent too soon by keeping track of the time the last transfer was sent via the variable 'last_transfer_ns'. To prevent the very first transfer being sent too soon, initialise the 'last_transfer_ns' variable after calling spi_setup() and before sending the first SPI message. Cc: Signed-off-by: Jon Hunter Reviewed-by: Brian Norris Reviewed-by: Douglas Anderson Acked-by: Benson Leung Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index c9714072e224..a14196e95e9b 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -667,6 +667,7 @@ static int cros_ec_spi_probe(struct spi_device *spi) sizeof(struct ec_response_get_protocol_info); ec_dev->dout_size = sizeof(struct ec_host_request); + ec_spi->last_transfer_ns = ktime_get_ns(); err = cros_ec_register(ec_dev); if (err) { -- cgit v1.2.3 From 0a423772de2f3d7b00899987884f62f63ae00dcb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 11 Nov 2017 16:38:43 +0100 Subject: mfd: twl4030-audio: Fix sibling-node lookup A helper purported to look up a child node based on its name was using the wrong of-helper and ended up prematurely freeing the parent of-node while leaking any matching node. To make things worse, any matching node would not even necessarily be a child node as the whole device tree was searched depth-first starting at the parent. Fixes: 019a7e6b7b31 ("mfd: twl4030-audio: Add DT support") Cc: stable # 3.7 Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Signed-off-by: Lee Jones --- drivers/mfd/twl4030-audio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c index da16bf45fab4..dc94ffc6321a 100644 --- a/drivers/mfd/twl4030-audio.c +++ b/drivers/mfd/twl4030-audio.c @@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void) EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk); static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->codec) return true; - if (of_find_node_by_name(node, "codec")) + node = of_get_child_by_name(parent, "codec"); + if (node) { + of_node_put(node); return true; + } return false; } -- cgit v1.2.3 From 85e9b13cbb130a3209f21bd7933933399c389ffe Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 11 Nov 2017 16:38:44 +0100 Subject: mfd: twl6040: Fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. To make things worse, the parent node was prematurely freed, while the child node was leaked. Note that the CONFIG_OF compile guard can be removed as of_get_child_by_name() provides a !CONFIG_OF implementation which always fails. Cc: stable # 3.5 Fixes: 37e13cecaa14 ("mfd: Add support for Device Tree to twl6040") Fixes: ca2cad6ae38e ("mfd: Fix twl6040 build failure") Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Signed-off-by: Lee Jones --- drivers/mfd/twl6040.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index d66502d36ba0..dd19f17a1b63 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = { }; -static bool twl6040_has_vibra(struct device_node *node) +static bool twl6040_has_vibra(struct device_node *parent) { -#ifdef CONFIG_OF - if (of_find_node_by_name(node, "vibra")) + struct device_node *node; + + node = of_get_child_by_name(parent, "vibra"); + if (node) { + of_node_put(node); return true; -#endif + } + return false; } -- cgit v1.2.3 From 001dde9400d5c3e9e2ce2abe06c1efa70a25dfde Mon Sep 17 00:00:00 2001 From: Shawn Nematbakhsh Date: Wed, 27 Sep 2017 14:35:27 -0700 Subject: mfd: cros ec: spi: Fix "in progress" error signaling For host commands that take a long time to process, cros ec can return early by signaling a EC_RES_IN_PROGRESS result. The host must then poll status with EC_CMD_GET_COMMS_STATUS until completion of the command. None of the above applies when data link errors are encountered. When errors such as EC_SPI_PAST_END are encountered during command transmission, it usually means the command was not received by the EC. Treating such errors as if they were 'EC_RES_IN_PROGRESS' results is almost always the wrong decision, and can result in host commands silently being lost. Reported-by: Jon Hunter Signed-off-by: Shawn Nematbakhsh Reviewed-by: Brian Norris Tested-by: Jon Hunter Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_spi.c | 52 ++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index a14196e95e9b..59c82cdcf48d 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, u8 *ptr; u8 *rx_buf; u8 sum; + u8 rx_byte; int ret = 0, final_ret; len = cros_ec_prepare_tx(ec_dev, ec_msg); @@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) { /* Verify that EC can process command */ for (i = 0; i < len; i++) { - switch (rx_buf[i]) { - case EC_SPI_PAST_END: - case EC_SPI_RX_BAD_DATA: - case EC_SPI_NOT_READY: - ret = -EAGAIN; - ec_msg->result = EC_RES_IN_PROGRESS; - default: + rx_byte = rx_buf[i]; + if (rx_byte == EC_SPI_PAST_END || + rx_byte == EC_SPI_RX_BAD_DATA || + rx_byte == EC_SPI_NOT_READY) { + ret = -EREMOTEIO; break; } - if (ret) - break; } - if (!ret) - ret = cros_ec_spi_receive_packet(ec_dev, - ec_msg->insize + sizeof(*response)); - } else { - dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); } + if (!ret) + ret = cros_ec_spi_receive_packet(ec_dev, + ec_msg->insize + sizeof(*response)); + else + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + final_ret = terminate_request(ec_dev); spi_bus_unlock(ec_spi->spi->master); @@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, int i, len; u8 *ptr; u8 *rx_buf; + u8 rx_byte; int sum; int ret = 0, final_ret; @@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, if (!ret) { /* Verify that EC can process command */ for (i = 0; i < len; i++) { - switch (rx_buf[i]) { - case EC_SPI_PAST_END: - case EC_SPI_RX_BAD_DATA: - case EC_SPI_NOT_READY: - ret = -EAGAIN; - ec_msg->result = EC_RES_IN_PROGRESS; - default: + rx_byte = rx_buf[i]; + if (rx_byte == EC_SPI_PAST_END || + rx_byte == EC_SPI_RX_BAD_DATA || + rx_byte == EC_SPI_NOT_READY) { + ret = -EREMOTEIO; break; } - if (ret) - break; } - if (!ret) - ret = cros_ec_spi_receive_response(ec_dev, - ec_msg->insize + EC_MSG_TX_PROTO_BYTES); - } else { - dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); } + if (!ret) + ret = cros_ec_spi_receive_response(ec_dev, + ec_msg->insize + EC_MSG_TX_PROTO_BYTES); + else + dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret); + final_ret = terminate_request(ec_dev); spi_bus_unlock(ec_spi->spi->master); -- cgit v1.2.3 From 3f27bb5f00dc10609c2704cd39a130c8155a8510 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Nov 2017 09:41:11 +0100 Subject: tools headers: Follow the upstream UAPI header version 100% differ from the kernel Remove this from check-headers.sh: opts="--ignore-blank-lines --ignore-space-change" as the easiest policy is to just follow the upstream UAPI header version 100%. Pure space-only changes are comparatively rare. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Link: http://lkml.kernel.org/r/20171121084111.y6p5zwqso2cbms5s@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 77406d25e521..e66a8a7bcced 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h check () { file=$1 - opts="--ignore-blank-lines --ignore-space-change" shift while [ -n "$*" ]; do -- cgit v1.2.3 From 4ca69ca9db3ae51ac7cc0bd1af7961b7a3ba5b87 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 23 Nov 2017 08:46:23 +0100 Subject: perf test: Disable test cases 19 and 20 on s390x The s390x CPU sampling and measurement facilities do not support perf events of type PERF_TYPE_BREAKPOINT. The test cases are executed and fail with -ENOENT due to missing hardware support. Disable the execution of both test cases based on a platform check. This is the same approach as done for PowerPC. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Martin Schwidefsky LPU-Reference: 20171123074623.20817-1-tmricht@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-uqvoy6a1tsu8jddo5jjg4h85@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 335b695f4970..a467615c5a0e 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -296,7 +296,7 @@ bool test__bp_signal_is_supported(void) * instruction breakpoint using the perf event interface. * Once it's there we can release this. */ -#ifdef __powerpc__ +#if defined(__powerpc__) || defined(__s390x__) return false; #else return true; -- cgit v1.2.3 From bfd8f72c2778f5bd63dc9eb6d23bd7a0d99cff6d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:42:58 -0800 Subject: perf record: Synthesize unit/scale/... in event update Move the code to synthesize event updates for scale/unit/cpus to a common utility file, and use it both from stat and record. This allows to access scale and other extra qualifiers from perf script. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171117214300.32746-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 ++++++ tools/perf/builtin-stat.c | 62 +++-------------------------------------- tools/perf/util/header.c | 68 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 5 ++++ 4 files changed, 86 insertions(+), 58 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 003255910c05..b92d6d67bca8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -372,6 +372,8 @@ try_again: ui__error("%s\n", msg); goto out; } + + pos->supported = true; } if (perf_evlist__apply_filters(evlist, &pos)) { @@ -784,6 +786,13 @@ static int record__synthesize(struct record *rec, bool tail) perf_event__synthesize_guest_os, tool); } + err = perf_event__synthesize_extra_attr(&rec->tool, + rec->evlist, + process_synthesized_event, + data->is_pipe); + if (err) + goto out; + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, opts->proc_map_timeout, 1); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 59af5a8419e2..a027b4712e48 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -458,19 +458,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool has_unit(struct perf_evsel *counter) -{ - return counter->unit && *counter->unit; -} - -static bool has_scale(struct perf_evsel *counter) -{ - return counter->scale != 1; -} - static int perf_stat_synthesize_config(bool is_pipe) { - struct perf_evsel *counter; int err; if (is_pipe) { @@ -482,53 +471,10 @@ static int perf_stat_synthesize_config(bool is_pipe) } } - /* - * Synthesize other events stuff not carried within - * attr event - unit, scale, name - */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->supported) - continue; - - /* - * Synthesize unit and scale only if it's defined. - */ - if (has_unit(counter)) { - err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel unit.\n"); - return err; - } - } - - if (has_scale(counter)) { - err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - if (counter->own_cpus) { - err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - /* - * Name is needed only for pipe output, - * perf.data carries event names. - */ - if (is_pipe) { - err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel name.\n"); - return err; - } - } - } + err = perf_event__synthesize_extra_attr(NULL, + evsel_list, + process_synthesized_event, + is_pipe); err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, process_synthesized_event, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7c0e9d587bfa..5890e08e0754 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3258,6 +3258,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, return err; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel counter.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_evlist **pevlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 29ccbfdf8724..91befc3b550d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -107,6 +107,11 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); + int perf_event__process_feature(struct perf_tool *tool, union perf_event *event, struct perf_session *session); -- cgit v1.2.3 From 373565d285e8d2113f1b6c0a2e461b9c8d0da1c9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:42:59 -0800 Subject: perf record: Synthesize thread map and cpu map Synthesize the per attr thread maps and cpu maps in 'perf record'. This allows code from 'perf stat' called from 'perf script' to access this information. Committer testing: Please see the PERF_RECORD_THREAD_MAP and PERF_RECORD_CPU_MAP records, added by this patch: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf report -D | grep PERF_RECORD_ | head 0xe8 [0x20]: PERF_RECORD_TIME_CONV: unhandled! 0x108 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 23568 0x130 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x148 [0x28]: PERF_RECORD_COMM: perf:23568/23568 0x570 [0x8]: PERF_RECORD_FINISHED_ROUND 445342677837144 0x170 [0x28]: PERF_RECORD_COMM exec: sleep:23568/23568 445342677847339 0x198 [0x68]: PERF_RECORD_MMAP2 23568/23568: [0x564c943a4000(0x208000) @ 0 fd:00 3147174 2566255743]: r-xp /usr/bin/sleep 445342677862450 0x200 [0x70]: PERF_RECORD_MMAP2 23568/23568: [0x7f25968a8000(0x229000) @ 0 fd:00 3151761 2566238119]: r-xp /usr/lib64/ld-2.25.so 445342677873174 0x270 [0x60]: PERF_RECORD_MMAP2 23568/23568: [0x7ffc98176000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] 445342677891928 0x2d0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4002): 23568/23568: 0xffffffff8f84c7e7 period: 1 addr: 0 $ Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20171117214300.32746-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b92d6d67bca8..e304bc47fe9b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -793,6 +793,21 @@ static int record__synthesize(struct record *rec, bool tail) if (err) goto out; + err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, + process_synthesized_event, + NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, + process_synthesized_event, NULL); + if (err < 0) { + pr_err("Couldn't synthesize cpu map.\n"); + return err; + } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, opts->proc_map_timeout, 1); -- cgit v1.2.3 From 4bd1bef8bba2f99ff472ae3617864dda301f81bd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:43:00 -0800 Subject: perf script: Allow computing 'perf stat' style metrics Add support for computing 'perf stat' style metrics in 'perf script'. When using leader sampling we can get metrics for each sampling period by computing formulas over the values of the different group members. This allows things like fine grained IPC tracking through sampling, much more fine grained than with 'perf stat'. The metric is still averaged over the sampling period, it is not just for the sampling point. This patch adds a new metric output field for 'perf script' that uses the existing 'perf stat' metrics infrastructure to compute any metrics supported by 'perf stat'. For example to sample IPC: $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1 $ perf script -F metric,ip,sym,time,cpu,comm ... alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: metric: 0.13 insn per cycle swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: metric: 0.23 insn per cycle qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: metric: 0.46 insn per cycle :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: metric: 0.45 insn per cycle TopDown: This requires disabling SMT if you have it enabled, because SMT would require sampling per core, which is not supported. $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\ topdown-recovery-bubbles,\ topdown-slots-retired,topdown-total-slots,\ topdown-slots-issued}:S' -a sleep 1 $ perf script --header -I -F cpu,ip,sym,event,metric,period ... [000] 121108 ref-cycles: ffffffff8165222e copy_user_enhanced_fast_string [000] 190350 topdown-fetch-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 2055 topdown-recovery-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 148729 topdown-slots-retired: ffffffff8165222e copy_user_enhanced_fast_string [000] 144324 topdown-total-slots: ffffffff8165222e copy_user_enhanced_fast_string [000] 160852 topdown-slots-issued: ffffffff8165222e copy_user_enhanced_fast_string [000] metric: 33.0% frontend bound [000] metric: 3.5% bad speculation [000] metric: 25.8% retiring [000] metric: 37.7% backend bound [000] 112112 ref-cycles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 357222 topdown-fetch-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 3325 topdown-recovery-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 323553 topdown-slots-retired: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 270507 topdown-total-slots: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 341226 topdown-slots-issued: ffffffff8165aec8 _raw_spin_lock_irqsave [000] metric: 33.0% frontend bound [000] metric: 2.9% bad speculation [000] metric: 29.9% retiring [000] metric: 34.2% backend bound ... v2: Use evsel->priv for new fields Port to new base line, support fp output. Handle stats in ->stats, not ->priv Minor cleanups Extra explanation about the use of the term 'averaging', from Andi in the thread in the Link: tag below: The current samples contains the sum of event counts for a sampling period. EventA-1 EventA-2 EventA-3 EventA-4 EventB-1 EventB-2 EventC-3 gap with no events overflow |-----------------------------------------------------------------| period-start period-end ^ ^ | | previous sample current sample So EventA = 4 and EventB = 3 at the sample point I generate a metric, let's say EventA / EventB. It applies to the whole period. But the metric is over a longer time which does not have the same behavior. For example the gap above doesn't have any events, while they are clustered at the beginning and end of the sample period. But we're summing everything together. The metric doesn't know that the gap is different than the busy period. That's what I'm trying to express with averaging. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 10 +++- tools/perf/builtin-script.c | 97 +++++++++++++++++++++++++++++++- tools/perf/util/metricgroup.c | 4 ++ 3 files changed, 108 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2811fcf684cb..974ceb12c7f3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -217,6 +217,14 @@ OPTIONS The brstackoff field will print an offset into a specific dso/binary. + With the metric option perf script can compute metrics for + sampling periods, similar to perf stat. This requires + specifying a group with multiple metrics with the :S option + for perf record. perf will sample on the first event, and + compute metrics for all the events in the group. Please note + that the metric computed is averaged over the whole sampling + period, not just for the sample point. + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ee7c7aaaae72..39d8b55f0db3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -22,6 +22,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/color.h" #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" @@ -90,6 +91,7 @@ enum perf_output_field { PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, + PERF_OUTPUT_METRIC = 1U << 28, }; struct output_option { @@ -124,6 +126,7 @@ struct output_option { {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, + {.str = "metric", .field = PERF_OUTPUT_METRIC}, }; enum { @@ -215,12 +218,20 @@ struct perf_evsel_script { char *filename; FILE *fp; u64 samples; + /* For metric output */ + u64 val; + int gnum; }; +static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel) +{ + return (struct perf_evsel_script *)evsel->priv; +} + static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, struct perf_data *data) { - struct perf_evsel_script *es = malloc(sizeof(*es)); + struct perf_evsel_script *es = zalloc(sizeof(*es)); if (es != NULL) { if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) @@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel es->fp = fopen(es->filename, "w"); if (es->fp == NULL) goto out_free_filename; - es->samples = 0; } return es; @@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp) return fprintf(fp, "%-*s", maxlen, out); } +struct metric_ctx { + struct perf_sample *sample; + struct thread *thread; + struct perf_evsel *evsel; + FILE *fp; +}; + +static void script_print_metric(void *ctx, const char *color, + const char *fmt, + const char *unit, double val) +{ + struct metric_ctx *mctx = ctx; + + if (!fmt) + return; + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); + if (color) + color_fprintf(mctx->fp, color, fmt, val); + else + printf(fmt, val); + fprintf(mctx->fp, " %s\n", unit); +} + +static void script_new_line(void *ctx) +{ + struct metric_ctx *mctx = ctx; + + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); +} + +static void perf_sample__fprint_metric(struct perf_script *script, + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + FILE *fp) +{ + struct perf_stat_output_ctx ctx = { + .print_metric = script_print_metric, + .new_line = script_new_line, + .ctx = &(struct metric_ctx) { + .sample = sample, + .thread = thread, + .evsel = evsel, + .fp = fp, + }, + .force_header = false, + }; + struct perf_evsel *ev2; + static bool init; + u64 val; + + if (!init) { + perf_stat__init_shadow_stats(); + init = true; + } + if (!evsel->stats) + perf_evlist__alloc_stats(script->session->evlist, false); + if (evsel_script(evsel->leader)->gnum++ == 0) + perf_stat__reset_shadow_stats(); + val = sample->period * evsel->scale; + perf_stat__update_shadow_stats(evsel, + val, + sample->cpu); + evsel_script(evsel)->val = val; + if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { + for_each_group_member (ev2, evsel->leader) { + perf_stat__print_shadow_stats(ev2, + evsel_script(ev2)->val, + sample->cpu, + &ctx, + NULL); + } + evsel_script(evsel->leader)->gnum = 0; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); fprintf(fp, "\n"); + + if (PRINT_FIELD(METRIC)) + perf_sample__fprint_metric(script, thread, evsel, sample, fp); } static struct scripting_ops *scripting_ops; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0ddd9c199227..6fd709017bbc 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct metric_event me = { .evsel = evsel }; + + if (!metric_events) + return NULL; + nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); -- cgit v1.2.3 From 2e38e661f00603584fa5a64acdf580b400bad570 Mon Sep 17 00:00:00 2001 From: Hansuk Hong Date: Fri, 24 Nov 2017 01:05:46 +0900 Subject: perf buildid-cache: Document for Node.js USDT Add a tip for Node.js USDT(User-Level Statically Defined Tracing) probes in tips.txt Signed-off-by: Hansuk Hong Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171123160546.9722-1-flavono123@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/tips.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index db0ca3063eae..3dd1dbe28407 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -32,3 +32,4 @@ Order by the overhead of source file name and line number: perf report -s srclin System-wide collection from all CPUs: perf record -a Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list +To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` -- cgit v1.2.3 From f250b09c779550e4a7a412dae6d3ad34d5201019 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Nov 2017 15:35:04 -0300 Subject: perf report: Fix -D output for user metadata events The PERF_RECORD_USER_ events are synthesized by the tool to assist in processing the PERF_RECORD_ ones generated by the kernel, the printing of that information doesn't come with a perf_sample structure, so, when dumping the event fields using 'perf report -D' there were columns that end up not being printed. To tidy up a bit this, fake a perf_sample structure with zeroes to have the missing columns printed and avoid the occasional surprise with that. Before: 0 0x45b8 [0x68]: PERF_RECORD_MMAP -1/0: [0xffffffffc12ec000(0x4000) @ 0]: x /lib/modules/4.14.0+/kernel/fs/nls/nls_utf8.ko 0x4620 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27820 0x4648 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x4660 [0x28]: PERF_RECORD_COMM: perf:27820/27820 0x4a58 [0x8]: PERF_RECORD_FINISHED_ROUND 447723433020976 0x4688 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 27820/27820: 0xffffffff8f1b6d7a period: 1 addr: 0 After: $ perf report -D | grep PERF_RECORD_ | head 0 0xe8 [0x20]: PERF_RECORD_TIME_CONV: unhandled! 0 0x108 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 32555 0 0x130 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x148 [0x28]: PERF_RECORD_COMM: perf:32555/32555 0 0x4e8 [0x8]: PERF_RECORD_FINISHED_ROUND 448743409421205 0x170 [0x28]: PERF_RECORD_COMM exec: sleep:32555/32555 448743409431883 0x198 [0x68]: PERF_RECORD_MMAP2 32555/32555: [0x55e11d75a000(0x208000) @ 0 fd:00 3147174 2566255743]: r-xp /usr/bin/sleep 448743409443873 0x200 [0x70]: PERF_RECORD_MMAP2 32555/32555: [0x7f0ced316000(0x229000) @ 0 fd:00 3151761 2566238119]: r-xp /usr/lib64/ld-2.25.so 448743409454790 0x270 [0x60]: PERF_RECORD_MMAP2 32555/32555: [0x7ffe84f6d000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] 448743409479500 0x2d0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4002): 32555/32555: 0xffffffff8f84c7e7 period: 1 addr: 0 $ Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Thomas Gleixner Fixes: 9aefcab0de47 ("perf session: Consolidate the dump code") Link: https://lkml.kernel.org/n/tip-todcu15x0cwgppkh1gi6uhru@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index df2857137908..54e30f1bcbd7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1348,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; + struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { -- cgit v1.2.3 From c2653297311612b0ead3b72b3629bfd963af2273 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 10:35:25 +0200 Subject: perf intel-pt: Improve build messages for files that differ from the kernel Print file names of files that differ. For example, instead of: Warning: Intel PT: x86 instruction decoder differs from kernel print: Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h' Reported-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/1511253326-22308-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/Build | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c @(diff -I 2>&1 | grep -q 'option requires an argument' && \ - test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ - diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ - diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ - diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ + ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ + ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -- cgit v1.2.3 From 17a68b835921c4664929376447cae5e3374ce2a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 27 Nov 2017 16:11:49 -0300 Subject: Documentation: Add Arnaldo Melo to list of enforcement statement endorsers Add my name to the list. Signed-off-by: Arnaldo Carvalho de Melo --- Documentation/process/kernel-enforcement-statement.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst index b3170671a1df..bfa6a78103d8 100644 --- a/Documentation/process/kernel-enforcement-statement.rst +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -118,6 +118,7 @@ we might work for today, have in the past, or will in the future. - Mike Marshall - Chris Mason - Paul E. McKenney + - Arnaldo Carvalho de Melo - David S. Miller - Ingo Molnar - Kuninori Morimoto -- cgit v1.2.3 From da8df83957b179e5edc1029f637e5b69eff44967 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 29 Nov 2017 22:48:11 -0800 Subject: Input: joystick/analog - riscv has get_cycles() Fixes: drivers/input/joystick/analog.c:176:2: warning: #warning Precise timer not defined for this architecture. [-Wcpp] Signed-off-by: Olof Johansson Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/analog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 3d8ff09eba57..c868a878c84f 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -163,7 +163,7 @@ static unsigned int get_time_pit(void) #define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" -#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) +#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "get_cycles" -- cgit v1.2.3 From 4c83c071b7849ca3e8072284a8587669d8ba6a3d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 16 Nov 2017 16:09:29 -0800 Subject: Input: elants_i2c - do not clobber interrupt trigger on x86 This is similar to commit a4b0a58bb142 ("Input: elan_i2c - do not clobber interrupt trigger on x86") On x86 we historically used falling edge interrupts in the driver because that's how first Chrome devices were configured. They also did not use ACPI to enumerate I2C devices (because back then there was no kernel support for that), so trigger was hard-coded in the driver. However the controller behavior is much more reliable if we use level triggers, and that is how we configured ARM devices, and how want to configure newer x86 devices as well. All newer x86 boxes have their I2C devices enumerated in ACPI. Let's see if platform code (ACPI, DT) described interrupt and specified particular trigger type, and if so, let's use it instead of always clobbering trigger with IRQF_TRIGGER_FALLING. We will still use this trigger type as a fallback if platform code left interrupt trigger unconfigured. Reviewed-by: Guenter Roeck Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index e102d7764bc2..a458e5ec9e41 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client, } /* - * Systems using device tree should set up interrupt via DTS, - * the rest will use the default falling edge interrupts. + * Platform code (ACPI, DTS) should normally set up interrupt + * for us, but in case it did not let's fall back to using falling + * edge to be compatible with older Chromebooks. */ - irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING; + irqflags = irq_get_trigger_type(client->irq); + if (!irqflags) + irqflags = IRQF_TRIGGER_FALLING; error = devm_request_threaded_irq(&client->dev, client->irq, NULL, elants_i2c_irq, -- cgit v1.2.3 From f883199d17b87e86a4ebd50bdee69285814bdce7 Mon Sep 17 00:00:00 2001 From: Xiaolei Li Date: Thu, 2 Nov 2017 10:05:07 +0800 Subject: mtd: nand: mtk: use nand_reset() to reset NAND devices in resume function Previously, we only select chips and then send reset command to a NAND device during resuming nand driver. There is a lack of deselecting chips. It is advised to reset and initialize a NAND device using nand_reset(). Signed-off-by: Xiaolei Li Reviewed-by: Matthias Brugger Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_nand.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index d86a7d131cc0..6d0101e13ef6 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -1540,7 +1540,6 @@ static int mtk_nfc_resume(struct device *dev) struct mtk_nfc *nfc = dev_get_drvdata(dev); struct mtk_nfc_nand_chip *chip; struct nand_chip *nand; - struct mtd_info *mtd; int ret; u32 i; @@ -1553,11 +1552,8 @@ static int mtk_nfc_resume(struct device *dev) /* reset NAND chip if VCC was powered off */ list_for_each_entry(chip, &nfc->chips, node) { nand = &chip->nand; - mtd = nand_to_mtd(nand); - for (i = 0; i < chip->nsels; i++) { - nand->select_chip(mtd, i); - nand->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); - } + for (i = 0; i < chip->nsels; i++) + nand_reset(nand, i); } return 0; -- cgit v1.2.3 From b13a9735ae74d4fa9e4d53b4dcfdd779997c8e0e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 3 Nov 2017 15:31:47 -0500 Subject: mtd: nand: gpmi: replace _manual_ swap with swap macro Make use of the swap macro and remove unnecessary variables swap. This makes the code easier to read and maintain. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Han Xu Signed-off-by: Boris Brezillon --- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 50f8d4a1b983..9e365d488b6c 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1487,12 +1487,8 @@ static int gpmi_ecc_read_page_raw(struct mtd_info *mtd, * See the layout description for a detailed explanation on why this * is needed. */ - if (this->swap_block_mark) { - u8 swap = tmp_buf[0]; - - tmp_buf[0] = tmp_buf[mtd->writesize]; - tmp_buf[mtd->writesize] = swap; - } + if (this->swap_block_mark) + swap(tmp_buf[0], tmp_buf[mtd->writesize]); /* * Copy the metadata section into the oob buffer (this section is @@ -1615,12 +1611,8 @@ static int gpmi_ecc_write_page_raw(struct mtd_info *mtd, * See the layout description for a detailed explanation on why this * is needed. */ - if (this->swap_block_mark) { - u8 swap = tmp_buf[0]; - - tmp_buf[0] = tmp_buf[mtd->writesize]; - tmp_buf[mtd->writesize] = swap; - } + if (this->swap_block_mark) + swap(tmp_buf[0], tmp_buf[mtd->writesize]); chip->write_buf(mtd, tmp_buf, mtd->writesize + mtd->oobsize); -- cgit v1.2.3 From df467899da0b71465760b4e35127bce837244eee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 8 Nov 2017 17:00:27 +0100 Subject: mtd: nand: fix interpretation of NAND_CMD_NONE in nand_command[_lp]() Some drivers (like nand_hynix.c) call ->cmdfunc() with NAND_CMD_NONE and a column address and expect the controller to only send address cycles. Right now, the default ->cmdfunc() implementations provided by the core do not filter out the command cycle in this case and forwards the request to the controller driver through the ->cmd_ctrl() method. The thing is, NAND controller drivers can get this wrong and send a command cycle with a NAND_CMD_NONE opcode and since NAND_CMD_NONE is -1, and the command field is usually casted to an u8, we end up sending the 0xFF command which is actually a RESET operation. Add conditions in nand_command[_lp]() functions to sending the initial command cycle when command == NAND_CMD_NONE. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 6135d007a068..630048f5abdc 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -710,7 +710,8 @@ static void nand_command(struct mtd_info *mtd, unsigned int command, chip->cmd_ctrl(mtd, readcmd, ctrl); ctrl &= ~NAND_CTRL_CHANGE; } - chip->cmd_ctrl(mtd, command, ctrl); + if (command != NAND_CMD_NONE) + chip->cmd_ctrl(mtd, command, ctrl); /* Address cycle, when necessary */ ctrl = NAND_CTRL_ALE | NAND_CTRL_CHANGE; @@ -738,6 +739,7 @@ static void nand_command(struct mtd_info *mtd, unsigned int command, */ switch (command) { + case NAND_CMD_NONE: case NAND_CMD_PAGEPROG: case NAND_CMD_ERASE1: case NAND_CMD_ERASE2: @@ -831,7 +833,9 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, } /* Command latch cycle */ - chip->cmd_ctrl(mtd, command, NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE); + if (command != NAND_CMD_NONE) + chip->cmd_ctrl(mtd, command, + NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE); if (column != -1 || page_addr != -1) { int ctrl = NAND_CTRL_CHANGE | NAND_NCE | NAND_ALE; @@ -866,6 +870,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, */ switch (command) { + case NAND_CMD_NONE: case NAND_CMD_CACHEDPROG: case NAND_CMD_PAGEPROG: case NAND_CMD_ERASE1: -- cgit v1.2.3 From 26f0740ed6a211bd81edc9118526636e814bd650 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 13 Nov 2017 10:59:01 +0100 Subject: mtd: nand: hynix: Don't wait after applying new read-retry params Setting read-retry parameters has no impact on the R/B pin, so waiting for the chip to be ready is useless. Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_hynix.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c index 985751eda317..72d98cbff4ca 100644 --- a/drivers/mtd/nand/nand_hynix.c +++ b/drivers/mtd/nand/nand_hynix.c @@ -83,7 +83,6 @@ static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) struct nand_chip *chip = mtd_to_nand(mtd); struct hynix_nand *hynix = nand_get_manufacturer_data(chip); const u8 *values; - int status; int i; values = hynix->read_retry->values + @@ -112,10 +111,6 @@ static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) /* Apply the new settings. */ chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1); - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) - return -EIO; - return 0; } -- cgit v1.2.3 From 51f493ae71adc2c49a317a13c38e54e1cdf46005 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 30 Nov 2017 10:15:02 +0000 Subject: ASoC: codecs: msm8916-wcd: Fix supported formats This codec is configurable for only 16 bit and 32 bit samples, so reflect this in the supported formats also remove 24bit sample from supported list. Signed-off-by: Srinivas Kandagatla Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/msm8916-wcd-analog.c | 2 +- sound/soc/codecs/msm8916-wcd-digital.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 5f3c42c4f74a..066ea2f4ce7b 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -267,7 +267,7 @@ #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\ SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000) #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4; diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index a10a724eb448..13354d6304a8 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -194,7 +194,7 @@ SNDRV_PCM_RATE_32000 | \ SNDRV_PCM_RATE_48000) #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S32_LE) struct msm8916_wcd_digital_priv { struct clk *ahbclk, *mclk; @@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream, RX_I2S_CTL_RX_I2S_MODE_MASK, RX_I2S_CTL_RX_I2S_MODE_16); break; - case SNDRV_PCM_FORMAT_S24_LE: + case SNDRV_PCM_FORMAT_S32_LE: snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL, TX_I2S_CTL_TX_I2S_MODE_MASK, TX_I2S_CTL_TX_I2S_MODE_32); -- cgit v1.2.3 From f00e0030bcbf49936d265330f6e0b8c739ad90c3 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 11:13:52 -0600 Subject: ASoC: tlv320aic31xx: Rename property for reset GPIO The property used to specify a GPIO intended for reset is "reset-gpios", but this binding uses "gpio-reset". It is not compatible with newer methods used to fetch GPIO pins and to prevent the spread of this error to other bindings let's rename to be more standard. We also standardize the pin as active-low, different device trees have marked the GPIO different ways, luckily the driver currently uses the low-level GPIO set function which does not respect the active-low flag, but future changes may change this. This is an active-low reset, mark it as such. Lastly, add an example of use for this property. [Rewrote the title & first paragraph of the commit message for clarity -- broonie] Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/tlv320aic31xx.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt index 6fbba562eaa7..5b3c33bb99e5 100644 --- a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt +++ b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt @@ -22,7 +22,7 @@ Required properties: Optional properties: -- gpio-reset - gpio pin number used for codec reset +- reset-gpios - GPIO specification for the active low RESET input. - ai31xx-micbias-vg - MicBias Voltage setting 1 or MICBIAS_2_0V - MICBIAS output is powered to 2.0V 2 or MICBIAS_2_5V - MICBIAS output is powered to 2.5V @@ -30,6 +30,10 @@ Optional properties: If this node is not mentioned or if the value is unknown, then micbias is set to 2.0V. +Deprecated properties: + +- gpio-reset - gpio pin number used for codec reset + CODEC output pins: * HPL * HPR @@ -48,6 +52,7 @@ CODEC input pins: The pins can be used in referring sound node's audio-routing property. Example: +#include #include tlv320aic31xx: tlv320aic31xx@18 { @@ -56,6 +61,8 @@ tlv320aic31xx: tlv320aic31xx@18 { ai31xx-micbias-vg = ; + reset-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; + HPVDD-supply = <®ulator>; SPRVDD-supply = <®ulator>; SPLVDD-supply = <®ulator>; -- cgit v1.2.3 From a825f31f93281bbe7126b25801deb476d07aaf82 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 11:13:54 -0600 Subject: ASoC: tlv320aic31xx: Use standard reset GPIO OF name The correct DT property for specifying a GPIO used for reset is "reset-gpios", fix this here. [Retitled for accuracy -- broonie] Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index e2862372c26e..4837f25b0760 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1279,9 +1279,16 @@ static void aic31xx_pdata_from_of(struct aic31xx_priv *aic31xx) aic31xx->pdata.micbias_vg = MICBIAS_2_0V; } - ret = of_get_named_gpio(np, "gpio-reset", 0); - if (ret > 0) + ret = of_get_named_gpio(np, "reset-gpios", 0); + if (ret > 0) { aic31xx->pdata.gpio_reset = ret; + } else { + ret = of_get_named_gpio(np, "gpio-reset", 0); + if (ret > 0) { + dev_warn(aic31xx->dev, "Using deprecated property \"gpio-reset\", please update your DT"); + aic31xx->pdata.gpio_reset = ret; + } + } } #else /* CONFIG_OF */ static void aic31xx_pdata_from_of(struct aic31xx_priv *aic31xx) -- cgit v1.2.3 From 943293232ca45988ba0aa693b51025c58e1189ca Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 11:13:53 -0600 Subject: ASoC: tlv320aic3x: Rename property for reset GPIO The property used to specify a GPIO intended for reset is "reset-gpios", but this binding uses "gpio-reset". It is not compatible with newer methods used to fetch GPIO pins and to prevent the spread of this error to other bindings let's rename to be more standard. We also standardize the pin as active-low, different device trees have marked the GPIO different ways, luckily the driver currently uses the low-level GPIO set function which does not respect the active-low flag, but future changes may change this. This is an active-low reset, mark it as such. Lastly, add an example of use for this property. [Rewrote title & first paragraph for clarity & accuracy -- broonie] Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/tlv320aic3x.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/tlv320aic3x.txt b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt index ba5b45c483f5..9796c4639262 100644 --- a/Documentation/devicetree/bindings/sound/tlv320aic3x.txt +++ b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt @@ -17,7 +17,7 @@ Required properties: Optional properties: -- gpio-reset - gpio pin number used for codec reset +- reset-gpios - GPIO specification for the active low RESET input. - ai3x-gpio-func - - AIC3X_GPIO1 & AIC3X_GPIO2 Functionality - Not supported on tlv320aic3104 - ai3x-micbias-vg - MicBias Voltage required. @@ -34,6 +34,10 @@ Optional properties: - AVDD-supply, IOVDD-supply, DRVDD-supply, DVDD-supply : power supplies for the device as covered in Documentation/devicetree/bindings/regulator/regulator.txt +Deprecated properties: + +- gpio-reset - gpio pin number used for codec reset + CODEC output pins: * LLOUT * RLOUT @@ -61,10 +65,14 @@ The pins can be used in referring sound node's audio-routing property. Example: +#include + tlv320aic3x: tlv320aic3x@1b { compatible = "ti,tlv320aic3x"; reg = <0x1b>; + reset-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; + AVDD-supply = <®ulator>; IOVDD-supply = <®ulator>; DRVDD-supply = <®ulator>; -- cgit v1.2.3 From 025f8449818c46770f5652a0263f8cfb89d01455 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 11:13:55 -0600 Subject: ASoC: tlv320aic3x: Use standard reset GPIO OF name The correct DT property for specifying a GPIO used for reset is "reset-gpios", fix this here. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic3x.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 06f92571eba4..b751cad545da 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1804,11 +1804,18 @@ static int aic3x_i2c_probe(struct i2c_client *i2c, if (!ai3x_setup) return -ENOMEM; - ret = of_get_named_gpio(np, "gpio-reset", 0); - if (ret >= 0) + ret = of_get_named_gpio(np, "reset-gpios", 0); + if (ret >= 0) { aic3x->gpio_reset = ret; - else - aic3x->gpio_reset = -1; + } else { + ret = of_get_named_gpio(np, "gpio-reset", 0); + if (ret > 0) { + dev_warn(&i2c->dev, "Using deprecated property \"gpio-reset\", please update your DT"); + aic3x->gpio_reset = ret; + } else { + aic3x->gpio_reset = -1; + } + } if (of_property_read_u32_array(np, "ai3x-gpio-func", ai3x_setup->gpio_func, 2) >= 0) { -- cgit v1.2.3 From e3fee43a968fd39dcc56be3757fcdfe250964125 Mon Sep 17 00:00:00 2001 From: John Hsu Date: Thu, 30 Nov 2017 10:13:17 +0800 Subject: ASoC: nau8825: set clear_irq when imm IRQ happened Although the crosstalk is disabled, it is better to set clear_irq properly when the impedance measurement interrupt happens. It can avoid that the driver clears other IRQs by accident if the active_irq has another IRQ events. Signed-off-by: John Hsu Reviewed-by: Wu-Cheng Li Tested-by: Wu-Cheng Li Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index d3c1a02f1e15..603cd72c2a25 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -1733,10 +1733,9 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) nau8825->xtalk_event_mask = event_mask; } } else if (active_irq & NAU8825_IMPEDANCE_MEAS_IRQ) { - if (nau8825->xtalk_enable) { + if (nau8825->xtalk_enable) schedule_work(&nau8825->xtalk_work); - clear_irq = NAU8825_IMPEDANCE_MEAS_IRQ; - } + clear_irq = NAU8825_IMPEDANCE_MEAS_IRQ; } else if ((active_irq & NAU8825_JACK_INSERTION_IRQ_MASK) == NAU8825_JACK_INSERTION_DETECTED) { /* One more step to check GPIO status directly. Thus, the -- cgit v1.2.3 From b1c52b7e7cc6a57128869008b84d98a4de78fe2d Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:42 -0600 Subject: ASoC: tlv320aic31xx: File header and copyright cleanup Fix header copyright tags, while we are here, also switch to SPDX and fixup MODULE tags to match. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 26 +++++++++----------------- sound/soc/codecs/tlv320aic31xx.h | 15 ++++----------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 4837f25b0760..b98d9b1f216f 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1,22 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * ALSA SoC TLV320AIC31XX codec driver + * ALSA SoC TLV320AIC31xx CODEC Driver * - * Copyright (C) 2014 Texas Instruments, Inc. - * - * Author: Jyri Sarha + * Copyright (C) 2014-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Jyri Sarha * * Based on ground work by: Ajit Kulkarni * - * This package is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * THIS PACKAGE IS PROVIDED AS IS AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. - * - * The TLV320AIC31xx series of audio codec is a low-power, highly integrated - * high performance codec which provides a stereo DAC, a mono ADC, + * The TLV320AIC31xx series of audio codecs are low-power, highly integrated + * high performance codecs which provides a stereo DAC, a mono ADC, * and mono/stereo Class-D speaker driver. */ @@ -1414,6 +1406,6 @@ static struct i2c_driver aic31xx_i2c_driver = { module_i2c_driver(aic31xx_i2c_driver); -MODULE_DESCRIPTION("ASoC TLV320AIC3111 codec driver"); -MODULE_AUTHOR("Jyri Sarha"); -MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jyri Sarha "); +MODULE_DESCRIPTION("ASoC TLV320AIC31xx CODEC Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 730fb2058869..a9ea2f99eba0 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -1,17 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * ALSA SoC TLV320AIC31XX codec driver - * - * Copyright (C) 2013 Texas Instruments, Inc. - * - * This package is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * ALSA SoC TLV320AIC31xx CODEC Driver Definitions * + * Copyright (C) 2014-2017 Texas Instruments Incorporated - http://www.ti.com/ */ + #ifndef _TLV320AIC31XX_H #define _TLV320AIC31XX_H -- cgit v1.2.3 From c7734e8e7eddf065c15e865b8de4224b01f03409 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:43 -0600 Subject: ASoC: tlv320aic31xx: Change aic31xx_power_off return type to void The return value is not checked, and even if it was there is nothing we could do about it and messages are already printed. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index b98d9b1f216f..0563a49cc5e4 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1081,16 +1081,13 @@ static int aic31xx_power_on(struct snd_soc_codec *codec) return 0; } -static int aic31xx_power_off(struct snd_soc_codec *codec) +static void aic31xx_power_off(struct snd_soc_codec *codec) { struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); - int ret = 0; regcache_cache_only(aic31xx->regmap, true); - ret = regulator_bulk_disable(ARRAY_SIZE(aic31xx->supplies), - aic31xx->supplies); - - return ret; + regulator_bulk_disable(ARRAY_SIZE(aic31xx->supplies), + aic31xx->supplies); } static int aic31xx_set_bias_level(struct snd_soc_codec *codec, -- cgit v1.2.3 From 09303601bacda714220048a61f6864f88594b231 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:44 -0600 Subject: ASoC: tlv320aic31xx: Move ACPI table next to OF table Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 0563a49cc5e4..d974e8651e30 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1285,6 +1285,14 @@ static void aic31xx_pdata_from_of(struct aic31xx_priv *aic31xx) } #endif /* CONFIG_OF */ +#ifdef CONFIG_ACPI +static const struct acpi_device_id aic31xx_acpi_match[] = { + { "10TI3100", 0 }, + { } +}; +MODULE_DEVICE_TABLE(acpi, aic31xx_acpi_match); +#endif + static int aic31xx_device_init(struct aic31xx_priv *aic31xx) { int ret, i; @@ -1382,14 +1390,6 @@ static const struct i2c_device_id aic31xx_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, aic31xx_i2c_id); -#ifdef CONFIG_ACPI -static const struct acpi_device_id aic31xx_acpi_match[] = { - { "10TI3100", 0 }, - { } -}; -MODULE_DEVICE_TABLE(acpi, aic31xx_acpi_match); -#endif - static struct i2c_driver aic31xx_i2c_driver = { .driver = { .name = "tlv320aic31xx-codec", -- cgit v1.2.3 From 737e0b7b67bdfe24090fab2852044bb283282fc5 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:46 -0600 Subject: ASoC: tlv320aic31xx: Fix GPIO1 register definition GPIO1 control register is number 51, fix this here. Fixes: bafcbfe429eb ("ASoC: tlv320aic31xx: Make the register values human readable") Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/tlv320aic31xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 730fb2058869..1ff3edb7bbb6 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -116,7 +116,7 @@ struct aic31xx_pdata { /* INT2 interrupt control */ #define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 50) +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) #define AIC31XX_DACPRB AIC31XX_REG(0, 60) /* ADC Instruction Set Register */ -- cgit v1.2.3 From 108884e6c5aa731c6704000e27444d51d090078b Mon Sep 17 00:00:00 2001 From: Tamaki Nishino Date: Thu, 30 Nov 2017 20:27:52 +0900 Subject: ALSA: usb-audio: Change the semantics of the enable option This patch changes the semantics of the enable option for snd-usb-audio in order to allow users to disable a device specified by either or both of the vendor id and the product id. Signed-off-by: Tamaki Nishino Signed-off-by: Takashi Iwai --- sound/usb/card.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 23d1d23aefec..8018d56cfecc 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -585,15 +585,24 @@ static int usb_audio_probe(struct usb_interface *intf, * now look for an empty slot and create a new card instance */ for (i = 0; i < SNDRV_CARDS; i++) - if (enable[i] && ! usb_chip[i] && + if (!usb_chip[i] && (vid[i] == -1 || vid[i] == USB_ID_VENDOR(id)) && (pid[i] == -1 || pid[i] == USB_ID_PRODUCT(id))) { - err = snd_usb_audio_create(intf, dev, i, quirk, - id, &chip); - if (err < 0) + if (enable[i]) { + err = snd_usb_audio_create(intf, dev, i, quirk, + id, &chip); + if (err < 0) + goto __error; + chip->pm_intf = intf; + break; + } else if (vid[i] != -1 || pid[i] != -1) { + dev_info(&dev->dev, + "device (%04x:%04x) is disabled\n", + USB_ID_VENDOR(id), + USB_ID_PRODUCT(id)); + err = -ENOENT; goto __error; - chip->pm_intf = intf; - break; + } } if (!chip) { dev_err(&dev->dev, "no available usb audio device\n"); -- cgit v1.2.3 From 8d26fdfcb45dc420115b267ac9d6b3ac13457f1b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:35:08 +0100 Subject: spi: Fix double "when" Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7b2170bfd6e7..bc6bb325d1bf 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when - * when not using a GPIO line) + * not using a GPIO line) * * @statistics: statistics for the spi_device * -- cgit v1.2.3 From 5d8e614f6cf8850657edbd1859391a2ae45b4488 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:38:50 +0100 Subject: spi: sh-msiof: Use dev_warn_once() instead of open-coding Use the helper introduced by commit e135303bd5bebcd2 ("device: Add dev__once variants") instead of open-coding the same functionality. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index fcd261f98b9f..81a9144f5442 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -912,9 +912,8 @@ static int sh_msiof_transfer_one(struct spi_master *master, ret = sh_msiof_dma_once(p, tx_buf, rx_buf, l); if (ret == -EAGAIN) { - pr_warn_once("%s %s: DMA not available, falling back to PIO\n", - dev_driver_string(&p->pdev->dev), - dev_name(&p->pdev->dev)); + dev_warn_once(&p->pdev->dev, + "DMA not available, falling back to PIO\n"); break; } if (ret) -- cgit v1.2.3 From 3b2323c2c1c4acf8961cfcdddcee9889daaa21e3 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 26 Nov 2017 20:20:59 -0800 Subject: perf bench futex: Use cpumaps It was reported that the whole futex bench breaks when dealing with non-contiguously numbered cpus. $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online $ ./perf bench futex all perf: pthread_create: Operation not permitted Run summary [PID 14934]: 7 threads, each .... James had implemented an approach with cpumaps that use an in house flavor. Instead of re-inventing the wheel, I've redone the patch such that we use the perf's util/cpumap.c interface instead. Applies to all futex benchmarks. Suggested-by: Arnaldo Carvalho de Melo Originally-from: James Yang Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Cc: Kim Phillips Link: http://lkml.kernel.org/r/20171127042101.3659-2-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 19 ++++++++++++------- tools/perf/bench/futex-lock-pi.c | 23 ++++++++++++++--------- tools/perf/bench/futex-requeue.c | 22 +++++++++++++--------- tools/perf/bench/futex-wake-parallel.c | 24 +++++++++++++++--------- tools/perf/bench/futex-wake.c | 18 +++++++++++------- 5 files changed, 65 insertions(+), 41 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 58ae6ed8f38b..2defb6df7fd0 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -24,6 +24,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -118,11 +119,12 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpu; + cpu_set_t cpuset; struct sigaction act; - unsigned int i, ncpus; + unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -130,14 +132,16 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + goto errmem; sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) /* default to the number of CPUs */ - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -163,10 +167,10 @@ int bench_futex_hash(int argc, const char **argv) if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -217,6 +221,7 @@ int bench_futex_hash(int argc, const char **argv) print_summary(); free(worker); + free(cpu); return ret; errmem: err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 08653ae8a8c4..8e9c4753e304 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -15,6 +15,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -32,7 +33,7 @@ static struct worker *worker; static unsigned int nsecs = 10; static bool silent = false, multi = false; static bool done = false, fshared = false; -static unsigned int ncpus, nthreads = 0; +static unsigned int nthreads = 0; static int futex_flag = 0; struct timeval start, end, runtime; static pthread_mutex_t thread_lock; @@ -113,9 +114,10 @@ static void *workerfn(void *arg) return NULL; } -static void create_threads(struct worker *w, pthread_attr_t thread_attr) +static void create_threads(struct worker *w, pthread_attr_t thread_attr, + struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; @@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr) } else worker[i].futex = &global_futex; - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) @@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv) unsigned int i; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); if (argc) goto err; - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_attr_init(&thread_attr); gettimeofday(&start, NULL); - create_threads(worker, thread_attr); + create_threads(worker, thread_attr, cpu); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 1058c194608a..fc692efa0c05 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -22,6 +22,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats requeuetime_stats, requeued_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; static const struct option options[] = { @@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused) } static void block_threads(pthread_t *w, - pthread_attr_t thread_attr) + pthread_attr_t thread_attr, struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; /* create and block all threads */ for (i = 0; i < nthreads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv) unsigned int i, j; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); if (argc) goto err; - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "cpu_map__new"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr); + block_threads(worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index b4732dad9f89..4488c27e8a43 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -21,6 +21,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -43,7 +44,7 @@ static unsigned int nblocked_threads = 0, nwaking_threads = 0; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting; +static unsigned int threads_starting; static int futex_flag = 0; static const struct option options[] = { @@ -119,19 +120,20 @@ static void *blocked_workerfn(void *arg __maybe_unused) return NULL; } -static void block_threads(pthread_t *w, pthread_attr_t thread_attr) +static void block_threads(pthread_t *w, pthread_attr_t thread_attr, + struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nblocked_threads; /* create and block all threads */ for (i = 0; i < nblocked_threads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) @@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) struct sigaction act; pthread_attr_t thread_attr; struct thread_data *waking_worker; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_wake_parallel_usage, 0); @@ -217,9 +220,12 @@ int bench_futex_wake_parallel(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); + if (!nblocked_threads) - nblocked_threads = ncpus; + nblocked_threads = cpu->nr; /* some sanity checks */ if (nwaking_threads > nblocked_threads || !nwaking_threads) @@ -259,7 +265,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); /* create, launch & block all threads */ - block_threads(blocked_worker, thread_attr); + block_threads(blocked_worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 8c5c0b6b5c97..e8181ad7d088 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -22,6 +22,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -89,19 +90,19 @@ static void print_summary(void) } static void block_threads(pthread_t *w, - pthread_attr_t thread_attr) + pthread_attr_t thread_attr, struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; /* create and block all threads */ for (i = 0; i < nthreads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv) unsigned int i, j; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); if (argc) { @@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; @@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr); + block_threads(worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); -- cgit v1.2.3 From 34832dc44d44d7ea586617d99895e8cfc840be03 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 17 Nov 2017 12:09:12 +0100 Subject: mtd: nand: gpmi-nand: Remove wrong Kconfig help text The GPMI nand Kconfig help texts mentions that the GPMI nand driver might conflict with SD cards. The only conflict there might really be is that both controllers use the same pins, but this is resolved by the pincontroller setup in the device tree. In any way the GPMI driver can safely be enabled, the text is just wrong. Remove it. Signed-off-by: Sascha Hauer Reviewed-by: Fabio Estevam Acked-by: Han Xu Signed-off-by: Boris Brezillon --- drivers/mtd/nand/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index bb48aafed9a2..859eb7790c46 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -376,9 +376,7 @@ config MTD_NAND_GPMI_NAND Enables NAND Flash support for IMX23, IMX28 or IMX6. The GPMI controller is very powerful, with the help of BCH module, it can do the hardware ECC. The GPMI supports several - NAND flashs at the same time. The GPMI may conflicts with other - block, such as SD card. So pay attention to it when you enable - the GPMI. + NAND flashs at the same time. config MTD_NAND_BRCMNAND tristate "Broadcom STB NAND controller" -- cgit v1.2.3 From d822401d1c6898a4a4ee03977b78b8cec402e88a Mon Sep 17 00:00:00 2001 From: Jesse Chan Date: Mon, 20 Nov 2017 12:57:13 -0800 Subject: mtd: nand: denali_pci: add missing MODULE_DESCRIPTION/AUTHOR/LICENSE This change resolves a new compile-time warning when built as a loadable module: WARNING: modpost: missing MODULE_LICENSE() in drivers/mtd/nand/denali_pci.o see include/linux/module.h for more information This adds the license as "GPL v2", which matches the header of the file. MODULE_DESCRIPTION and MODULE_AUTHOR are also added. Signed-off-by: Jesse Chan Acked-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/denali_pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/denali_pci.c b/drivers/mtd/nand/denali_pci.c index 57fb7ae31412..49cb3e1f8bd0 100644 --- a/drivers/mtd/nand/denali_pci.c +++ b/drivers/mtd/nand/denali_pci.c @@ -125,3 +125,7 @@ static struct pci_driver denali_pci_driver = { .remove = denali_pci_remove, }; module_pci_driver(denali_pci_driver); + +MODULE_DESCRIPTION("PCI driver for Denali NAND controller"); +MODULE_AUTHOR("Intel Corporation and its suppliers"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From bccb06c353af3764ca86d9da47652458e6c2eb41 Mon Sep 17 00:00:00 2001 From: Jagdish Gediya Date: Thu, 23 Nov 2017 17:04:31 +0530 Subject: mtd: nand: ifc: update bufnum mask for ver >= 2.0.0 Bufnum mask is used to calculate page position in the internal SRAM. As IFC version 2.0.0 has 16KB of internal SRAM as compared to older versions which had 8KB. Hence bufnum mask needs to be updated. Signed-off-by: Jagdish Gediya Signed-off-by: Prabhakar Kushwaha Signed-off-by: Boris Brezillon --- drivers/mtd/nand/fsl_ifc_nand.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 9e03bac7f34c..bbdd68a54d68 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -916,6 +916,13 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) if (ctrl->version >= FSL_IFC_VERSION_1_1_0) fsl_ifc_sram_init(priv); + /* + * As IFC version 2.0.0 has 16KB of internal SRAM as compared to older + * versions which had 8KB. Hence bufnum mask needs to be updated. + */ + if (ctrl->version >= FSL_IFC_VERSION_2_0_0) + priv->bufnum_mask = (priv->bufnum_mask * 2) + 1; + return 0; } -- cgit v1.2.3 From c9e916a4b462104cdd463b8749cf1345f3ad0577 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 23 Nov 2017 22:18:43 +0900 Subject: mtd: nand: remove unnecessary extern from driver headers 'extern' is not necessary for function declarations. scripts/checkpatch.pl with --strict option reports the following: CHECK: extern prototypes should be avoided in .h files Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/denali.h | 4 +-- drivers/mtd/nand/gpmi-nand/gpmi-nand.h | 46 +++++++++++++++++----------------- drivers/mtd/nand/sm_common.h | 2 +- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/mtd/nand/denali.h b/drivers/mtd/nand/denali.h index 2911066dacac..9ad33d237378 100644 --- a/drivers/mtd/nand/denali.h +++ b/drivers/mtd/nand/denali.h @@ -329,7 +329,7 @@ struct denali_nand_info { #define DENALI_CAP_DMA_64BIT BIT(1) int denali_calc_ecc_bytes(int step_size, int strength); -extern int denali_init(struct denali_nand_info *denali); -extern void denali_remove(struct denali_nand_info *denali); +int denali_init(struct denali_nand_info *denali); +void denali_remove(struct denali_nand_info *denali); #endif /* __DENALI_H__ */ diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h index a45e4ce13d10..06c1f993912c 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h @@ -268,31 +268,31 @@ struct timing_threshold { }; /* Common Services */ -extern int common_nfc_set_geometry(struct gpmi_nand_data *); -extern struct dma_chan *get_dma_chan(struct gpmi_nand_data *); -extern void prepare_data_dma(struct gpmi_nand_data *, - enum dma_data_direction dr); -extern int start_dma_without_bch_irq(struct gpmi_nand_data *, - struct dma_async_tx_descriptor *); -extern int start_dma_with_bch_irq(struct gpmi_nand_data *, - struct dma_async_tx_descriptor *); +int common_nfc_set_geometry(struct gpmi_nand_data *); +struct dma_chan *get_dma_chan(struct gpmi_nand_data *); +void prepare_data_dma(struct gpmi_nand_data *, + enum dma_data_direction dr); +int start_dma_without_bch_irq(struct gpmi_nand_data *, + struct dma_async_tx_descriptor *); +int start_dma_with_bch_irq(struct gpmi_nand_data *, + struct dma_async_tx_descriptor *); /* GPMI-NAND helper function library */ -extern int gpmi_init(struct gpmi_nand_data *); -extern int gpmi_extra_init(struct gpmi_nand_data *); -extern void gpmi_clear_bch(struct gpmi_nand_data *); -extern void gpmi_dump_info(struct gpmi_nand_data *); -extern int bch_set_geometry(struct gpmi_nand_data *); -extern int gpmi_is_ready(struct gpmi_nand_data *, unsigned chip); -extern int gpmi_send_command(struct gpmi_nand_data *); -extern void gpmi_begin(struct gpmi_nand_data *); -extern void gpmi_end(struct gpmi_nand_data *); -extern int gpmi_read_data(struct gpmi_nand_data *); -extern int gpmi_send_data(struct gpmi_nand_data *); -extern int gpmi_send_page(struct gpmi_nand_data *, - dma_addr_t payload, dma_addr_t auxiliary); -extern int gpmi_read_page(struct gpmi_nand_data *, - dma_addr_t payload, dma_addr_t auxiliary); +int gpmi_init(struct gpmi_nand_data *); +int gpmi_extra_init(struct gpmi_nand_data *); +void gpmi_clear_bch(struct gpmi_nand_data *); +void gpmi_dump_info(struct gpmi_nand_data *); +int bch_set_geometry(struct gpmi_nand_data *); +int gpmi_is_ready(struct gpmi_nand_data *, unsigned chip); +int gpmi_send_command(struct gpmi_nand_data *); +void gpmi_begin(struct gpmi_nand_data *); +void gpmi_end(struct gpmi_nand_data *); +int gpmi_read_data(struct gpmi_nand_data *); +int gpmi_send_data(struct gpmi_nand_data *); +int gpmi_send_page(struct gpmi_nand_data *, + dma_addr_t payload, dma_addr_t auxiliary); +int gpmi_read_page(struct gpmi_nand_data *, + dma_addr_t payload, dma_addr_t auxiliary); void gpmi_copy_bits(u8 *dst, size_t dst_bit_off, const u8 *src, size_t src_bit_off, diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h index d3e028e58b0f..1581671b05ae 100644 --- a/drivers/mtd/nand/sm_common.h +++ b/drivers/mtd/nand/sm_common.h @@ -36,7 +36,7 @@ struct sm_oob { #define SM_SMALL_OOB_SIZE 8 -extern int sm_register_device(struct mtd_info *mtd, int smartmedia); +int sm_register_device(struct mtd_info *mtd, int smartmedia); static inline int sm_sector_valid(struct sm_oob *oob) -- cgit v1.2.3 From 8a8c8ba1c8a65522f07fd3ccbae94712c471e683 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 23 Nov 2017 22:32:28 +0900 Subject: mtd: nand: denali: rename misleading dma_buf to tmp_buf The "dma_buf" is not used for a DMA bounce buffer, but for arranging the transferred data for the syndrome page layout. In fact, it is used in the PIO mode as well, so "dma_buf" is a misleading name. Rename it to "tmp_buf". Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/denali.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 5124f8ae8c04..34008a02ddb0 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -710,12 +710,12 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, int ecc_steps = chip->ecc.steps; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; - void *dma_buf = denali->buf; + void *tmp_buf = denali->buf; int oob_skip = denali->oob_skip_bytes; size_t size = writesize + oobsize; int ret, i, pos, len; - ret = denali_data_xfer(denali, dma_buf, size, page, 1, 0); + ret = denali_data_xfer(denali, tmp_buf, size, page, 1, 0); if (ret) return ret; @@ -730,11 +730,11 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, else if (pos + len > writesize) len = writesize - pos; - memcpy(buf, dma_buf + pos, len); + memcpy(buf, tmp_buf + pos, len); buf += len; if (len < ecc_size) { len = ecc_size - len; - memcpy(buf, dma_buf + writesize + oob_skip, + memcpy(buf, tmp_buf + writesize + oob_skip, len); buf += len; } @@ -745,7 +745,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *oob = chip->oob_poi; /* BBM at the beginning of the OOB area */ - memcpy(oob, dma_buf + writesize, oob_skip); + memcpy(oob, tmp_buf + writesize, oob_skip); oob += oob_skip; /* OOB ECC */ @@ -758,11 +758,11 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, else if (pos + len > writesize) len = writesize - pos; - memcpy(oob, dma_buf + pos, len); + memcpy(oob, tmp_buf + pos, len); oob += len; if (len < ecc_bytes) { len = ecc_bytes - len; - memcpy(oob, dma_buf + writesize + oob_skip, + memcpy(oob, tmp_buf + writesize + oob_skip, len); oob += len; } @@ -770,7 +770,7 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, /* OOB free */ len = oobsize - (oob - chip->oob_poi); - memcpy(oob, dma_buf + size - len, len); + memcpy(oob, tmp_buf + size - len, len); } return 0; @@ -841,7 +841,7 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, int ecc_steps = chip->ecc.steps; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; - void *dma_buf = denali->buf; + void *tmp_buf = denali->buf; int oob_skip = denali->oob_skip_bytes; size_t size = writesize + oobsize; int i, pos, len; @@ -851,7 +851,7 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, * This simplifies the logic. */ if (!buf || !oob_required) - memset(dma_buf, 0xff, size); + memset(tmp_buf, 0xff, size); /* Arrange the buffer for syndrome payload/ecc layout */ if (buf) { @@ -864,11 +864,11 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, else if (pos + len > writesize) len = writesize - pos; - memcpy(dma_buf + pos, buf, len); + memcpy(tmp_buf + pos, buf, len); buf += len; if (len < ecc_size) { len = ecc_size - len; - memcpy(dma_buf + writesize + oob_skip, buf, + memcpy(tmp_buf + writesize + oob_skip, buf, len); buf += len; } @@ -879,7 +879,7 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *oob = chip->oob_poi; /* BBM at the beginning of the OOB area */ - memcpy(dma_buf + writesize, oob, oob_skip); + memcpy(tmp_buf + writesize, oob, oob_skip); oob += oob_skip; /* OOB ECC */ @@ -892,11 +892,11 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, else if (pos + len > writesize) len = writesize - pos; - memcpy(dma_buf + pos, oob, len); + memcpy(tmp_buf + pos, oob, len); oob += len; if (len < ecc_bytes) { len = ecc_bytes - len; - memcpy(dma_buf + writesize + oob_skip, oob, + memcpy(tmp_buf + writesize + oob_skip, oob, len); oob += len; } @@ -904,10 +904,10 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, /* OOB free */ len = oobsize - (oob - chip->oob_poi); - memcpy(dma_buf + size - len, oob, len); + memcpy(tmp_buf + size - len, oob, len); } - return denali_data_xfer(denali, dma_buf, size, page, 1, 1); + return denali_data_xfer(denali, tmp_buf, size, page, 1, 1); } static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From e719135881f00c01ca400abb8a5dadaf297a24f9 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Wed, 29 Nov 2017 18:23:56 +0100 Subject: xfrm: fix XFRMA_OUTPUT_MARK policy entry This seems to be an obvious typo, NLA_U32 is type of the attribute, not its (minimal) length. Fixes: 077fbac405bf ("net: xfrm: support setting an output mark.") Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c2cfcc6fdb34..ff58c37469d6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2485,7 +2485,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, - [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, + [XFRMA_OUTPUT_MARK] = { .type = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { -- cgit v1.2.3 From 4ce3dbe397d7b6b15f272ae757c78c35e9e4b61d Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 28 Nov 2017 19:55:40 +0200 Subject: xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0) Code path when (encap_type < 0) does not verify the state is valid before progressing. This will result in a crash if, for instance, x->km.state == XFRM_STATE_ACQ. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Aviv Heller Signed-off-by: Yevgeny Kliteynik Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 347ab31574d5..da6447389ffb 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -207,7 +207,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) xfrm_address_t *daddr; struct xfrm_mode *inner_mode; u32 mark = skb->mark; - unsigned int family; + unsigned int family = AF_UNSPEC; int decaps = 0; int async = 0; bool xfrm_gro = false; @@ -216,6 +216,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { x = xfrm_input_state(skb); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + if (x->km.state == XFRM_STATE_ACQ) + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + else + XFRM_INC_STATS(net, + LINUX_MIB_XFRMINSTATEINVALID); + goto drop; + } + family = x->outer_mode->afinfo->family; /* An encap_type of -1 indicates async resumption. */ -- cgit v1.2.3 From ddc47e4404b58f03e98345398fb12d38fe291512 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 29 Nov 2017 06:53:55 +0100 Subject: xfrm: Fix stack-out-of-bounds read on socket policy lookup. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by catching address family missmatches of the policy and the flow already before we do the lookup. Reported-by: syzbot Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9542975eb2f9..038ec68f6901 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, family); + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; -- cgit v1.2.3 From 56075f6072e7fdac302cff4e1b4c93b64ced99ab Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Sun, 26 Nov 2017 15:34:04 +1100 Subject: HID: holtekff: move MODULE_* parameters out of #ifdef block If you compile with: CONFIG_HID_HOLTEK=m CONFIG_HOLTEK_FF is not set You get the following warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/hid/hid-holtekff.o see include/linux/module.h for more information Fix this by moving the module info out of the #ifdef CONFIG_HOLTEK_FF block and into the un-guarded part of the file. Signed-off-by: Daniel Axtens Acked-by: Anssi Hannula Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-holtekff.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 9325545fc3ae..edc0f64bb584 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -32,10 +32,6 @@ #ifdef CONFIG_HOLTEK_FF -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Anssi Hannula "); -MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); - /* * These commands and parameters are currently known: * @@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = { .probe = holtek_probe, }; module_hid_driver(holtek_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Anssi Hannula "); +MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices"); -- cgit v1.2.3 From eb94555e9e97c9983461214046b4d72c4ab4ba70 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 30 Nov 2017 18:01:28 +0100 Subject: mtd: nand: use usual return values for the ->erase() hook Avoid using specific defined values for checking returned status of the ->erase() hook. Instead, use usual negative error values on failure, zero otherwise. Signed-off-by: Miquel Raynal Acked-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/denali.c | 2 +- drivers/mtd/nand/docg4.c | 7 ++++++- drivers/mtd/nand/nand_base.c | 10 ++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 34008a02ddb0..3e19861a46c6 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -951,7 +951,7 @@ static int denali_erase(struct mtd_info *mtd, int page) irq_status = denali_wait_for_irq(denali, INTR__ERASE_COMP | INTR__ERASE_FAIL); - return irq_status & INTR__ERASE_COMP ? 0 : NAND_STATUS_FAIL; + return irq_status & INTR__ERASE_COMP ? 0 : -EIO; } static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr, diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c index 2436cbc71662..45c01b4b34c7 100644 --- a/drivers/mtd/nand/docg4.c +++ b/drivers/mtd/nand/docg4.c @@ -900,6 +900,7 @@ static int docg4_erase_block(struct mtd_info *mtd, int page) struct docg4_priv *doc = nand_get_controller_data(nand); void __iomem *docptr = doc->virtadr; uint16_t g4_page; + int status; dev_dbg(doc->dev, "%s: page %04x\n", __func__, page); @@ -939,7 +940,11 @@ static int docg4_erase_block(struct mtd_info *mtd, int page) poll_status(doc); write_nop(docptr); - return nand->waitfunc(mtd, nand); + status = nand->waitfunc(mtd, nand); + if (status < 0) + return status; + + return status & NAND_STATUS_FAIL ? -EIO : 0; } static int write_page(struct mtd_info *mtd, struct nand_chip *nand, diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 630048f5abdc..eacc3f39cafd 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2989,11 +2989,17 @@ out: static int single_erase(struct mtd_info *mtd, int page) { struct nand_chip *chip = mtd_to_nand(mtd); + int status; + /* Send commands to erase a block */ chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); - return chip->waitfunc(mtd, chip); + status = chip->waitfunc(mtd, chip); + if (status < 0) + return status; + + return status & NAND_STATUS_FAIL ? -EIO : 0; } /** @@ -3077,7 +3083,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, status = chip->erase(mtd, page & chip->pagemask); /* See if block erase succeeded */ - if (status & NAND_STATUS_FAIL) { + if (status) { pr_debug("%s: failed erase, page 0x%08x\n", __func__, page); instr->state = MTD_ERASE_FAILED; -- cgit v1.2.3 From 8f52df50d9366f770a894d14ef724e5e04574e98 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 30 Nov 2017 21:16:56 -0800 Subject: leds: pm8058: Silence pointer to integer size warning The pointer returned by of_device_get_match_data() doesn't have the same size as u32 on 64-bit architectures, causing a compile warning when compile-testing the driver on such platform. Cast the return value of of_device_get_match_data() to unsigned long and then to u32 to silence this warning. Fixes: 7f866986e705 ("leds: add PM8058 LEDs driver") Signed-off-by: Bjorn Andersson Reviewed-by: Linus Walleij Acked-by: Pavel Machek Signed-off-by: Lee Jones --- drivers/leds/leds-pm8058.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/leds-pm8058.c b/drivers/leds/leds-pm8058.c index a52674327857..8988ba3b2d65 100644 --- a/drivers/leds/leds-pm8058.c +++ b/drivers/leds/leds-pm8058.c @@ -106,7 +106,7 @@ static int pm8058_led_probe(struct platform_device *pdev) if (!led) return -ENOMEM; - led->ledtype = (u32)of_device_get_match_data(&pdev->dev); + led->ledtype = (u32)(unsigned long)of_device_get_match_data(&pdev->dev); map = dev_get_regmap(pdev->dev.parent, NULL); if (!map) { -- cgit v1.2.3 From fe77d8257c4d838c5976557ddb87bd789f312412 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 29 Nov 2017 10:25:02 +0100 Subject: batman-adv: Always initialize fragment header priority The batman-adv unuicast fragment header contains 3 bits for the priority of the packet. These bits will be initialized when the skb->priority contains a value between 256 and 263. But otherwise, the uninitialized bits from the stack will be used. Fixes: c0f25c802b33 ("batman-adv: Include frame priority in fragment header") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index a98cf1104a30..ebe6e38934e4 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -499,6 +499,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ if (skb->priority >= 256 && skb->priority <= 263) frag_header.priority = skb->priority - 256; + else + frag_header.priority = 0; ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); -- cgit v1.2.3 From 198a62ddffa4a4ffaeb741f642b7b52f2d91ae9b Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 29 Nov 2017 10:50:42 +0100 Subject: batman-adv: Fix check of retrieved orig_gw in batadv_v_gw_is_eligible The batadv_v_gw_is_eligible function already assumes that orig_node is not NULL. But batadv_gw_node_get may have failed to find the originator. It must therefore be checked whether the batadv_gw_node_get failed and not whether orig_node is NULL to detect this error. Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 341ceab8338d..e0e2bfcd6b3e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -814,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, } orig_gw = batadv_gw_node_get(bat_priv, orig_node); - if (!orig_node) + if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) -- cgit v1.2.3 From b09b1c3bc0b7f391d00ea9e0c5970b1bbbe86eca Mon Sep 17 00:00:00 2001 From: "Ughreja, Rakesh A" Date: Fri, 1 Dec 2017 14:43:17 +0530 Subject: ASoC: hdac_hdmi: introduce macro to access HDMI private data This patch replaces the direct access of HDMI private data with macro hdev_to_hdmi_priv in order to prepare the code to remove hdac_ext_device usage in the subsequent patch. Signed-off-by: Rakesh Ughreja Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 68 +++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index f3b4f4dfae6a..05af2299579b 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -138,6 +138,8 @@ struct hdac_hdmi_priv { struct hdac_hdmi_drv_data *drv_data; }; +#define hdev_to_hdmi_priv(_hdev) ((to_ehdac_device(_hdev))->private_data) + static struct hdac_hdmi_pcm * hdac_hdmi_get_pcm_from_cvt(struct hdac_hdmi_priv *hdmi, struct hdac_hdmi_cvt *cvt) @@ -351,7 +353,7 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *hdac, struct hdmi_audio_infoframe frame; struct hdac_hdmi_pin *pin = port->pin; struct dp_audio_infoframe dp_ai; - struct hdac_hdmi_priv *hdmi = hdac->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); struct hdac_hdmi_cvt *cvt = pcm->cvt; u8 *dip; int ret; @@ -433,7 +435,7 @@ static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai, int slots, int slot_width) { struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; @@ -453,7 +455,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hparams, struct snd_soc_dai *dai) { struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdac->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_port *port; struct hdac_hdmi_pcm *pcm; @@ -566,7 +568,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdac->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_cvt *cvt; struct hdac_hdmi_port *port; @@ -609,7 +611,7 @@ static void hdac_hdmi_pcm_close(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdac->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; @@ -633,8 +635,7 @@ static int hdac_hdmi_query_cvt_params(struct hdac_device *hdac, struct hdac_hdmi_cvt *cvt) { unsigned int chans; - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); int err; chans = get_wcaps(hdac, cvt->nid); @@ -696,7 +697,7 @@ static void hdac_hdmi_fill_route(struct snd_soc_dapm_route *route, static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_ext_device *edev, struct hdac_hdmi_port *port) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pcm *pcm = NULL; struct hdac_hdmi_port *p; @@ -782,7 +783,7 @@ static int hdac_hdmi_cvt_output_widget_event(struct snd_soc_dapm_widget *w, { struct hdac_hdmi_cvt *cvt = w->priv; struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pcm *pcm; dev_dbg(&edev->hdac.dev, "%s: widget: %s event: %x\n", @@ -864,7 +865,7 @@ static int hdac_hdmi_set_pin_port_mux(struct snd_kcontrol *kcontrol, struct snd_soc_dapm_context *dapm = w->dapm; struct hdac_hdmi_port *port = w->priv; struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pcm *pcm = NULL; const char *cvt_name = e->texts[ucontrol->value.enumerated.item[0]]; @@ -922,7 +923,7 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev, struct snd_soc_dapm_widget *widget, const char *widget_name) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pin *pin = port->pin; struct snd_kcontrol_new *kc; struct hdac_hdmi_cvt *cvt; @@ -990,7 +991,7 @@ static void hdac_hdmi_add_pinmux_cvt_route(struct hdac_ext_device *edev, struct snd_soc_dapm_widget *widgets, struct snd_soc_dapm_route *route, int rindex) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); const struct snd_kcontrol_new *kc; struct soc_enum *se; int mux_index = hdmi->num_cvt + hdmi->num_ports; @@ -1033,7 +1034,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm) struct snd_soc_dapm_widget *widgets; struct snd_soc_dapm_route *route; struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct snd_soc_dai_driver *dai_drv = dapm->component->dai_drv; char widget_name[NAME_SIZE]; struct hdac_hdmi_cvt *cvt; @@ -1134,7 +1135,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm) static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_cvt *cvt; int dai_id = 0; @@ -1161,7 +1162,7 @@ static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev) static int hdac_hdmi_add_cvt(struct hdac_ext_device *edev, hda_nid_t nid) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_cvt *cvt; char name[NAME_SIZE]; @@ -1209,7 +1210,7 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin, struct hdac_hdmi_port *port) { struct hdac_ext_device *edev = pin->edev; - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pcm *pcm; int size = 0; int port_id = -1; @@ -1304,7 +1305,7 @@ static int hdac_hdmi_add_ports(struct hdac_hdmi_priv *hdmi, static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pin *pin; int ret; @@ -1336,8 +1337,7 @@ static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid) static void hdac_hdmi_skl_enable_all_pins(struct hdac_device *hdac) { unsigned int vendor_param; - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); unsigned int vendor_nid = hdmi->drv_data->vendor_nid; vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, @@ -1355,8 +1355,7 @@ static void hdac_hdmi_skl_enable_all_pins(struct hdac_device *hdac) static void hdac_hdmi_skl_enable_dp12(struct hdac_device *hdac) { unsigned int vendor_param; - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); unsigned int vendor_nid = hdmi->drv_data->vendor_nid; vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, @@ -1452,9 +1451,9 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev, hda_nid_t nid; int i, num_nodes; struct hdac_device *hdac = &edev->hdac; - struct hdac_hdmi_priv *hdmi = edev->private_data; struct hdac_hdmi_cvt *temp_cvt, *cvt_next; struct hdac_hdmi_pin *temp_pin, *pin_next; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); int ret; hdac_hdmi_skl_enable_all_pins(hdac); @@ -1537,7 +1536,7 @@ free_widgets: static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe) { struct hdac_ext_device *edev = aptr; - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pin *pin = NULL; struct hdac_hdmi_port *hport = NULL; struct snd_soc_codec *codec = edev->scodec; @@ -1614,7 +1613,7 @@ static int create_fill_jack_kcontrols(struct snd_soc_card *card, char *name; int i = 0, j; struct snd_soc_codec *codec = edev->scodec; - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); kc = devm_kcalloc(codec->dev, hdmi->num_ports, sizeof(*kc), GFP_KERNEL); @@ -1652,7 +1651,7 @@ int hdac_hdmi_jack_port_init(struct snd_soc_codec *codec, struct snd_soc_dapm_context *dapm) { struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pin *pin; struct snd_soc_dapm_widget *widgets; struct snd_soc_dapm_route *route; @@ -1728,7 +1727,7 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device, { struct snd_soc_codec *codec = dai->codec; struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pcm *pcm; struct snd_pcm *snd_pcm; int err; @@ -1791,7 +1790,7 @@ static void hdac_hdmi_present_sense_all_pins(struct hdac_ext_device *edev, static int hdmi_codec_probe(struct snd_soc_codec *codec) { struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(&codec->component); struct hdac_ext_link *hlink = NULL; @@ -1870,7 +1869,7 @@ static int hdmi_codec_prepare(struct device *dev) static void hdmi_codec_complete(struct device *dev) { struct hdac_ext_device *edev = to_hda_ext_device(dev); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_device *hdac = &edev->hdac; /* Power up afg */ @@ -1904,8 +1903,7 @@ static const struct snd_soc_codec_driver hdmi_hda_codec = { static void hdac_hdmi_get_chmap(struct hdac_device *hdac, int pcm_idx, unsigned char *chmap) { - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); memcpy(chmap, pcm->chmap, ARRAY_SIZE(pcm->chmap)); @@ -1915,7 +1913,7 @@ static void hdac_hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx, unsigned char *chmap, int prepared) { struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); struct hdac_hdmi_port *port; @@ -1936,8 +1934,7 @@ static void hdac_hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx, static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx) { - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); if (!pcm) @@ -1951,8 +1948,7 @@ static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx) static int hdac_hdmi_get_spk_alloc(struct hdac_device *hdac, int pcm_idx) { - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); struct hdac_hdmi_port *port; @@ -2058,7 +2054,7 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev) { - struct hdac_hdmi_priv *hdmi = edev->private_data; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_pin *pin, *pin_next; struct hdac_hdmi_cvt *cvt, *cvt_next; struct hdac_hdmi_pcm *pcm, *pcm_next; -- cgit v1.2.3 From 72bc39cf53fabf56907f9d6c4b120fc49d9abc95 Mon Sep 17 00:00:00 2001 From: "Ughreja, Rakesh A" Date: Fri, 1 Dec 2017 14:43:18 +0530 Subject: ASoC: hdac_hdmi: clean up hdac_ext_device variable names Existing code uses hdac and edev inconsistently to represent hdac_ext_device structure which creates confusion because hdac is used even to represent hdac_device. So this patch replaces all the variable instances of hdac_ext_device with edev. In the later patch all the variable instances of hdac_device will be replaced with hdev. This prepares the code base to remove the usage of hdac_ext_device data structures done in the subsequent patches. Signed-off-by: Rakesh Ughreja Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 82 ++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 05af2299579b..98a695ba9373 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -197,18 +197,18 @@ static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm, /* * Get the no devices that can be connected to a port on the Pin widget. */ -static int hdac_hdmi_get_port_len(struct hdac_ext_device *hdac, hda_nid_t nid) +static int hdac_hdmi_get_port_len(struct hdac_ext_device *edev, hda_nid_t nid) { unsigned int caps; unsigned int type, param; - caps = get_wcaps(&hdac->hdac, nid); + caps = get_wcaps(&edev->hdac, nid); type = get_wcaps_type(caps); if (!(caps & AC_WCAP_DIGITAL) || (type != AC_WID_PIN)) return 0; - param = snd_hdac_read_parm_uncached(&hdac->hdac, nid, + param = snd_hdac_read_parm_uncached(&edev->hdac, nid, AC_PAR_DEVLIST_LEN); if (param == -1) return param; @@ -221,10 +221,10 @@ static int hdac_hdmi_get_port_len(struct hdac_ext_device *hdac, hda_nid_t nid) * id selected on the pin. Return 0 means the first port entry * is selected or MST is not supported. */ -static int hdac_hdmi_port_select_get(struct hdac_ext_device *hdac, +static int hdac_hdmi_port_select_get(struct hdac_ext_device *edev, struct hdac_hdmi_port *port) { - return snd_hdac_codec_read(&hdac->hdac, port->pin->nid, + return snd_hdac_codec_read(&edev->hdac, port->pin->nid, 0, AC_VERB_GET_DEVICE_SEL, 0); } @@ -232,7 +232,7 @@ static int hdac_hdmi_port_select_get(struct hdac_ext_device *hdac, * Sets the selected port entry for the configuring Pin widget verb. * returns error if port set is not equal to port get otherwise success */ -static int hdac_hdmi_port_select_set(struct hdac_ext_device *hdac, +static int hdac_hdmi_port_select_set(struct hdac_ext_device *edev, struct hdac_hdmi_port *port) { int num_ports; @@ -241,7 +241,7 @@ static int hdac_hdmi_port_select_set(struct hdac_ext_device *hdac, return 0; /* AC_PAR_DEVLIST_LEN is 0 based. */ - num_ports = hdac_hdmi_get_port_len(hdac, port->pin->nid); + num_ports = hdac_hdmi_get_port_len(edev, port->pin->nid); if (num_ports < 0) return -EIO; @@ -252,13 +252,13 @@ static int hdac_hdmi_port_select_set(struct hdac_ext_device *hdac, if (num_ports + 1 < port->id) return 0; - snd_hdac_codec_write(&hdac->hdac, port->pin->nid, 0, + snd_hdac_codec_write(&edev->hdac, port->pin->nid, 0, AC_VERB_SET_DEVICE_SEL, port->id); - if (port->id != hdac_hdmi_port_select_get(hdac, port)) + if (port->id != hdac_hdmi_port_select_get(edev, port)) return -EIO; - dev_dbg(&hdac->hdac.dev, "Selected the port=%d\n", port->id); + dev_dbg(&edev->hdac.dev, "Selected the port=%d\n", port->id); return 0; } @@ -323,14 +323,14 @@ format_constraint: } static void -hdac_hdmi_set_dip_index(struct hdac_ext_device *hdac, hda_nid_t pin_nid, +hdac_hdmi_set_dip_index(struct hdac_ext_device *edev, hda_nid_t pin_nid, int packet_index, int byte_index) { int val; val = (packet_index << 5) | (byte_index & 0x1f); - snd_hdac_codec_write(&hdac->hdac, pin_nid, 0, + snd_hdac_codec_write(&edev->hdac, pin_nid, 0, AC_VERB_SET_HDMI_DIP_INDEX, val); } @@ -346,14 +346,14 @@ struct dp_audio_infoframe { u8 LFEPBL01_LSV36_DM_INH7; }; -static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *hdac, +static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev, struct hdac_hdmi_pcm *pcm, struct hdac_hdmi_port *port) { uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE]; struct hdmi_audio_infoframe frame; struct hdac_hdmi_pin *pin = port->pin; struct dp_audio_infoframe dp_ai; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_cvt *cvt = pcm->cvt; u8 *dip; int ret; @@ -362,11 +362,11 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *hdac, u8 conn_type; int channels, ca; - ca = snd_hdac_channel_allocation(&hdac->hdac, port->eld.info.spk_alloc, + ca = snd_hdac_channel_allocation(&edev->hdac, port->eld.info.spk_alloc, pcm->channels, pcm->chmap_set, true, pcm->chmap); channels = snd_hdac_get_active_channels(ca); - hdmi->chmap.ops.set_channel_count(&hdac->hdac, cvt->nid, channels); + hdmi->chmap.ops.set_channel_count(&edev->hdac, cvt->nid, channels); snd_hdac_setup_channel_mapping(&hdmi->chmap, pin->nid, false, ca, pcm->channels, pcm->chmap, pcm->chmap_set); @@ -399,32 +399,32 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *hdac, break; default: - dev_err(&hdac->hdac.dev, "Invalid connection type: %d\n", + dev_err(&edev->hdac.dev, "Invalid connection type: %d\n", conn_type); return -EIO; } /* stop infoframe transmission */ - hdac_hdmi_set_dip_index(hdac, pin->nid, 0x0, 0x0); - snd_hdac_codec_write(&hdac->hdac, pin->nid, 0, + hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); + snd_hdac_codec_write(&edev->hdac, pin->nid, 0, AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_DISABLE); /* Fill infoframe. Index auto-incremented */ - hdac_hdmi_set_dip_index(hdac, pin->nid, 0x0, 0x0); + hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); if (conn_type == DRM_ELD_CONN_TYPE_HDMI) { for (i = 0; i < sizeof(buffer); i++) - snd_hdac_codec_write(&hdac->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdac, pin->nid, 0, AC_VERB_SET_HDMI_DIP_DATA, buffer[i]); } else { for (i = 0; i < sizeof(dp_ai); i++) - snd_hdac_codec_write(&hdac->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdac, pin->nid, 0, AC_VERB_SET_HDMI_DIP_DATA, dip[i]); } /* Start infoframe */ - hdac_hdmi_set_dip_index(hdac, pin->nid, 0x0, 0x0); - snd_hdac_codec_write(&hdac->hdac, pin->nid, 0, + hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); + snd_hdac_codec_write(&edev->hdac, pin->nid, 0, AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_BEST); return 0; @@ -454,8 +454,8 @@ static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai, static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hparams, struct snd_soc_dai *dai) { - struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); + struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_port *port; struct hdac_hdmi_pcm *pcm; @@ -468,7 +468,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, return -ENODEV; if ((!port->eld.monitor_present) || (!port->eld.eld_valid)) { - dev_err(&hdac->hdac.dev, + dev_err(&edev->hdac.dev, "device is not configured for this pin:port%d:%d\n", port->pin->nid, port->id); return -ENODEV; @@ -488,28 +488,28 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, return 0; } -static int hdac_hdmi_query_port_connlist(struct hdac_ext_device *hdac, +static int hdac_hdmi_query_port_connlist(struct hdac_ext_device *edev, struct hdac_hdmi_pin *pin, struct hdac_hdmi_port *port) { - if (!(get_wcaps(&hdac->hdac, pin->nid) & AC_WCAP_CONN_LIST)) { - dev_warn(&hdac->hdac.dev, + if (!(get_wcaps(&edev->hdac, pin->nid) & AC_WCAP_CONN_LIST)) { + dev_warn(&edev->hdac.dev, "HDMI: pin %d wcaps %#x does not support connection list\n", - pin->nid, get_wcaps(&hdac->hdac, pin->nid)); + pin->nid, get_wcaps(&edev->hdac, pin->nid)); return -EINVAL; } - if (hdac_hdmi_port_select_set(hdac, port) < 0) + if (hdac_hdmi_port_select_set(edev, port) < 0) return -EIO; - port->num_mux_nids = snd_hdac_get_connections(&hdac->hdac, pin->nid, + port->num_mux_nids = snd_hdac_get_connections(&edev->hdac, pin->nid, port->mux_nids, HDA_MAX_CONNECTIONS); if (port->num_mux_nids == 0) - dev_warn(&hdac->hdac.dev, + dev_warn(&edev->hdac.dev, "No connections found for pin:port %d:%d\n", pin->nid, port->id); - dev_dbg(&hdac->hdac.dev, "num_mux_nids %d for pin:port %d:%d\n", + dev_dbg(&edev->hdac.dev, "num_mux_nids %d for pin:port %d:%d\n", port->num_mux_nids, pin->nid, port->id); return port->num_mux_nids; @@ -567,8 +567,8 @@ static struct hdac_hdmi_port *hdac_hdmi_get_port_from_cvt( static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); + struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_cvt *cvt; struct hdac_hdmi_port *port; @@ -577,7 +577,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, dai_map = &hdmi->dai_map[dai->id]; cvt = dai_map->cvt; - port = hdac_hdmi_get_port_from_cvt(hdac, hdmi, cvt); + port = hdac_hdmi_get_port_from_cvt(edev, hdmi, cvt); /* * To make PA and other userland happy. @@ -588,7 +588,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, if ((!port->eld.monitor_present) || (!port->eld.eld_valid)) { - dev_warn(&hdac->hdac.dev, + dev_warn(&edev->hdac.dev, "Failed: present?:%d ELD valid?:%d pin:port: %d:%d\n", port->eld.monitor_present, port->eld.eld_valid, port->pin->nid, port->id); @@ -610,8 +610,8 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, static void hdac_hdmi_pcm_close(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct hdac_ext_device *hdac = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&hdac->hdac); + struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; -- cgit v1.2.3 From f0c5ebebacf3cc246e51e8814f5d4b22179f37bd Mon Sep 17 00:00:00 2001 From: "Ughreja, Rakesh A" Date: Fri, 1 Dec 2017 14:43:19 +0530 Subject: ASoC: hdac_hdmi: clean up hdac_device variable names This patch renames all the variable instances of hdac_device with hdev to prepare the code base to remove the usage of hdac_ext_device data structures done in the following patches. Existing code uses hdev and hdac as variable names for hdac_device as well as hdac_ext_device, which creates confusion. Signed-off-by: Rakesh Ughreja Signed-off-by: Mark Brown --- include/sound/hdaudio_ext.h | 4 +- sound/hda/ext/hdac_ext_bus.c | 2 +- sound/soc/codecs/hdac_hdmi.c | 312 +++++++++++++++++++++---------------------- 3 files changed, 159 insertions(+), 159 deletions(-) diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index ca00130cb028..9c14e21dda85 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -193,7 +193,7 @@ struct hda_dai_map { * @pvt_data - private data, for asoc contains asoc codec object */ struct hdac_ext_device { - struct hdac_device hdac; + struct hdac_device hdev; struct hdac_ext_bus *ebus; /* soc-dai to nid map */ @@ -213,7 +213,7 @@ struct hdac_ext_dma_params { u8 stream_tag; }; #define to_ehdac_device(dev) (container_of((dev), \ - struct hdac_ext_device, hdac)) + struct hdac_ext_device, hdev)) /* * HD-audio codec base driver */ diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c index 31b510c5ca0b..0daf31383084 100644 --- a/sound/hda/ext/hdac_ext_bus.c +++ b/sound/hda/ext/hdac_ext_bus.c @@ -146,7 +146,7 @@ int snd_hdac_ext_bus_device_init(struct hdac_ext_bus *ebus, int addr) edev = kzalloc(sizeof(*edev), GFP_KERNEL); if (!edev) return -ENOMEM; - hdev = &edev->hdac; + hdev = &edev->hdev; edev->ebus = ebus; snprintf(name, sizeof(name), "ehdaudio%dD%d", ebus->idx, addr); diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 98a695ba9373..3a35ede7027d 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -171,7 +171,7 @@ static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm, * ports. */ if (pcm->jack_event == 0) { - dev_dbg(&edev->hdac.dev, + dev_dbg(&edev->hdev.dev, "jack report for pcm=%d\n", pcm->pcm_id); snd_soc_jack_report(pcm->jack, SND_JACK_AVOUT, @@ -202,13 +202,13 @@ static int hdac_hdmi_get_port_len(struct hdac_ext_device *edev, hda_nid_t nid) unsigned int caps; unsigned int type, param; - caps = get_wcaps(&edev->hdac, nid); + caps = get_wcaps(&edev->hdev, nid); type = get_wcaps_type(caps); if (!(caps & AC_WCAP_DIGITAL) || (type != AC_WID_PIN)) return 0; - param = snd_hdac_read_parm_uncached(&edev->hdac, nid, + param = snd_hdac_read_parm_uncached(&edev->hdev, nid, AC_PAR_DEVLIST_LEN); if (param == -1) return param; @@ -224,7 +224,7 @@ static int hdac_hdmi_get_port_len(struct hdac_ext_device *edev, hda_nid_t nid) static int hdac_hdmi_port_select_get(struct hdac_ext_device *edev, struct hdac_hdmi_port *port) { - return snd_hdac_codec_read(&edev->hdac, port->pin->nid, + return snd_hdac_codec_read(&edev->hdev, port->pin->nid, 0, AC_VERB_GET_DEVICE_SEL, 0); } @@ -252,13 +252,13 @@ static int hdac_hdmi_port_select_set(struct hdac_ext_device *edev, if (num_ports + 1 < port->id) return 0; - snd_hdac_codec_write(&edev->hdac, port->pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0, AC_VERB_SET_DEVICE_SEL, port->id); if (port->id != hdac_hdmi_port_select_get(edev, port)) return -EIO; - dev_dbg(&edev->hdac.dev, "Selected the port=%d\n", port->id); + dev_dbg(&edev->hdev.dev, "Selected the port=%d\n", port->id); return 0; } @@ -278,9 +278,9 @@ static struct hdac_hdmi_pcm *get_hdmi_pcm_from_id(struct hdac_hdmi_priv *hdmi, static inline struct hdac_ext_device *to_hda_ext_device(struct device *dev) { - struct hdac_device *hdac = dev_to_hdac_dev(dev); + struct hdac_device *hdev = dev_to_hdac_dev(dev); - return to_ehdac_device(hdac); + return to_ehdac_device(hdev); } static unsigned int sad_format(const u8 *sad) @@ -330,7 +330,7 @@ hdac_hdmi_set_dip_index(struct hdac_ext_device *edev, hda_nid_t pin_nid, val = (packet_index << 5) | (byte_index & 0x1f); - snd_hdac_codec_write(&edev->hdac, pin_nid, 0, + snd_hdac_codec_write(&edev->hdev, pin_nid, 0, AC_VERB_SET_HDMI_DIP_INDEX, val); } @@ -353,7 +353,7 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev, struct hdmi_audio_infoframe frame; struct hdac_hdmi_pin *pin = port->pin; struct dp_audio_infoframe dp_ai; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_cvt *cvt = pcm->cvt; u8 *dip; int ret; @@ -362,11 +362,11 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev, u8 conn_type; int channels, ca; - ca = snd_hdac_channel_allocation(&edev->hdac, port->eld.info.spk_alloc, + ca = snd_hdac_channel_allocation(&edev->hdev, port->eld.info.spk_alloc, pcm->channels, pcm->chmap_set, true, pcm->chmap); channels = snd_hdac_get_active_channels(ca); - hdmi->chmap.ops.set_channel_count(&edev->hdac, cvt->nid, channels); + hdmi->chmap.ops.set_channel_count(&edev->hdev, cvt->nid, channels); snd_hdac_setup_channel_mapping(&hdmi->chmap, pin->nid, false, ca, pcm->channels, pcm->chmap, pcm->chmap_set); @@ -399,14 +399,14 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev, break; default: - dev_err(&edev->hdac.dev, "Invalid connection type: %d\n", + dev_err(&edev->hdev.dev, "Invalid connection type: %d\n", conn_type); return -EIO; } /* stop infoframe transmission */ hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); - snd_hdac_codec_write(&edev->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, pin->nid, 0, AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_DISABLE); @@ -414,17 +414,17 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev, hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); if (conn_type == DRM_ELD_CONN_TYPE_HDMI) { for (i = 0; i < sizeof(buffer); i++) - snd_hdac_codec_write(&edev->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, pin->nid, 0, AC_VERB_SET_HDMI_DIP_DATA, buffer[i]); } else { for (i = 0; i < sizeof(dp_ai); i++) - snd_hdac_codec_write(&edev->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, pin->nid, 0, AC_VERB_SET_HDMI_DIP_DATA, dip[i]); } /* Start infoframe */ hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0); - snd_hdac_codec_write(&edev->hdac, pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, pin->nid, 0, AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_BEST); return 0; @@ -435,11 +435,11 @@ static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai, int slots, int slot_width) { struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; - dev_dbg(&edev->hdac.dev, "%s: strm_tag: %d\n", __func__, tx_mask); + dev_dbg(&edev->hdev.dev, "%s: strm_tag: %d\n", __func__, tx_mask); dai_map = &hdmi->dai_map[dai->id]; @@ -455,7 +455,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hparams, struct snd_soc_dai *dai) { struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_port *port; struct hdac_hdmi_pcm *pcm; @@ -468,7 +468,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream, return -ENODEV; if ((!port->eld.monitor_present) || (!port->eld.eld_valid)) { - dev_err(&edev->hdac.dev, + dev_err(&edev->hdev.dev, "device is not configured for this pin:port%d:%d\n", port->pin->nid, port->id); return -ENODEV; @@ -492,24 +492,24 @@ static int hdac_hdmi_query_port_connlist(struct hdac_ext_device *edev, struct hdac_hdmi_pin *pin, struct hdac_hdmi_port *port) { - if (!(get_wcaps(&edev->hdac, pin->nid) & AC_WCAP_CONN_LIST)) { - dev_warn(&edev->hdac.dev, + if (!(get_wcaps(&edev->hdev, pin->nid) & AC_WCAP_CONN_LIST)) { + dev_warn(&edev->hdev.dev, "HDMI: pin %d wcaps %#x does not support connection list\n", - pin->nid, get_wcaps(&edev->hdac, pin->nid)); + pin->nid, get_wcaps(&edev->hdev, pin->nid)); return -EINVAL; } if (hdac_hdmi_port_select_set(edev, port) < 0) return -EIO; - port->num_mux_nids = snd_hdac_get_connections(&edev->hdac, pin->nid, + port->num_mux_nids = snd_hdac_get_connections(&edev->hdev, pin->nid, port->mux_nids, HDA_MAX_CONNECTIONS); if (port->num_mux_nids == 0) - dev_warn(&edev->hdac.dev, + dev_warn(&edev->hdev.dev, "No connections found for pin:port %d:%d\n", pin->nid, port->id); - dev_dbg(&edev->hdac.dev, "num_mux_nids %d for pin:port %d:%d\n", + dev_dbg(&edev->hdev.dev, "num_mux_nids %d for pin:port %d:%d\n", port->num_mux_nids, pin->nid, port->id); return port->num_mux_nids; @@ -568,7 +568,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_cvt *cvt; struct hdac_hdmi_port *port; @@ -588,7 +588,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream, if ((!port->eld.monitor_present) || (!port->eld.eld_valid)) { - dev_warn(&edev->hdac.dev, + dev_warn(&edev->hdev.dev, "Failed: present?:%d ELD valid?:%d pin:port: %d:%d\n", port->eld.monitor_present, port->eld.eld_valid, port->pin->nid, port->id); @@ -611,7 +611,7 @@ static void hdac_hdmi_pcm_close(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; @@ -632,13 +632,13 @@ static void hdac_hdmi_pcm_close(struct snd_pcm_substream *substream, } static int -hdac_hdmi_query_cvt_params(struct hdac_device *hdac, struct hdac_hdmi_cvt *cvt) +hdac_hdmi_query_cvt_params(struct hdac_device *hdev, struct hdac_hdmi_cvt *cvt) { unsigned int chans; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); int err; - chans = get_wcaps(hdac, cvt->nid); + chans = get_wcaps(hdev, cvt->nid); chans = get_wcaps_channels(chans); cvt->params.channels_min = 2; @@ -647,12 +647,12 @@ hdac_hdmi_query_cvt_params(struct hdac_device *hdac, struct hdac_hdmi_cvt *cvt) if (chans > hdmi->chmap.channels_max) hdmi->chmap.channels_max = chans; - err = snd_hdac_query_supported_pcm(hdac, cvt->nid, + err = snd_hdac_query_supported_pcm(hdev, cvt->nid, &cvt->params.rates, &cvt->params.formats, &cvt->params.maxbps); if (err < 0) - dev_err(&hdac->dev, + dev_err(&hdev->dev, "Failed to query pcm params for nid %d: %d\n", cvt->nid, err); @@ -697,7 +697,7 @@ static void hdac_hdmi_fill_route(struct snd_soc_dapm_route *route, static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_ext_device *edev, struct hdac_hdmi_port *port) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pcm *pcm = NULL; struct hdac_hdmi_port *p; @@ -717,9 +717,9 @@ static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_ext_device *edev, static void hdac_hdmi_set_power_state(struct hdac_ext_device *edev, hda_nid_t nid, unsigned int pwr_state) { - if (get_wcaps(&edev->hdac, nid) & AC_WCAP_POWER) { - if (!snd_hdac_check_power_state(&edev->hdac, nid, pwr_state)) - snd_hdac_codec_write(&edev->hdac, nid, 0, + if (get_wcaps(&edev->hdev, nid) & AC_WCAP_POWER) { + if (!snd_hdac_check_power_state(&edev->hdev, nid, pwr_state)) + snd_hdac_codec_write(&edev->hdev, nid, 0, AC_VERB_SET_POWER_STATE, pwr_state); } } @@ -727,8 +727,8 @@ static void hdac_hdmi_set_power_state(struct hdac_ext_device *edev, static void hdac_hdmi_set_amp(struct hdac_ext_device *edev, hda_nid_t nid, int val) { - if (get_wcaps(&edev->hdac, nid) & AC_WCAP_OUT_AMP) - snd_hdac_codec_write(&edev->hdac, nid, 0, + if (get_wcaps(&edev->hdev, nid) & AC_WCAP_OUT_AMP) + snd_hdac_codec_write(&edev->hdev, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, val); } @@ -740,7 +740,7 @@ static int hdac_hdmi_pin_output_widget_event(struct snd_soc_dapm_widget *w, struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev); struct hdac_hdmi_pcm *pcm; - dev_dbg(&edev->hdac.dev, "%s: widget: %s event: %x\n", + dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n", __func__, w->name, event); pcm = hdac_hdmi_get_pcm(edev, port); @@ -756,7 +756,7 @@ static int hdac_hdmi_pin_output_widget_event(struct snd_soc_dapm_widget *w, hdac_hdmi_set_power_state(edev, port->pin->nid, AC_PWRST_D0); /* Enable out path for this pin widget */ - snd_hdac_codec_write(&edev->hdac, port->pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); hdac_hdmi_set_amp(edev, port->pin->nid, AMP_OUT_UNMUTE); @@ -767,7 +767,7 @@ static int hdac_hdmi_pin_output_widget_event(struct snd_soc_dapm_widget *w, hdac_hdmi_set_amp(edev, port->pin->nid, AMP_OUT_MUTE); /* Disable out path for this pin widget */ - snd_hdac_codec_write(&edev->hdac, port->pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0); hdac_hdmi_set_power_state(edev, port->pin->nid, AC_PWRST_D3); @@ -783,10 +783,10 @@ static int hdac_hdmi_cvt_output_widget_event(struct snd_soc_dapm_widget *w, { struct hdac_hdmi_cvt *cvt = w->priv; struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pcm *pcm; - dev_dbg(&edev->hdac.dev, "%s: widget: %s event: %x\n", + dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n", __func__, w->name, event); pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, cvt); @@ -798,23 +798,23 @@ static int hdac_hdmi_cvt_output_widget_event(struct snd_soc_dapm_widget *w, hdac_hdmi_set_power_state(edev, cvt->nid, AC_PWRST_D0); /* Enable transmission */ - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_DIGI_CONVERT_1, 1); /* Category Code (CC) to zero */ - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_DIGI_CONVERT_2, 0); - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_CHANNEL_STREAMID, pcm->stream_tag); - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_STREAM_FORMAT, pcm->format); break; case SND_SOC_DAPM_POST_PMD: - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_CHANNEL_STREAMID, 0); - snd_hdac_codec_write(&edev->hdac, cvt->nid, 0, + snd_hdac_codec_write(&edev->hdev, cvt->nid, 0, AC_VERB_SET_STREAM_FORMAT, 0); hdac_hdmi_set_power_state(edev, cvt->nid, AC_PWRST_D3); @@ -832,7 +832,7 @@ static int hdac_hdmi_pin_mux_widget_event(struct snd_soc_dapm_widget *w, struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev); int mux_idx; - dev_dbg(&edev->hdac.dev, "%s: widget: %s event: %x\n", + dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n", __func__, w->name, event); if (!kc) @@ -845,7 +845,7 @@ static int hdac_hdmi_pin_mux_widget_event(struct snd_soc_dapm_widget *w, return -EIO; if (mux_idx > 0) { - snd_hdac_codec_write(&edev->hdac, port->pin->nid, 0, + snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0, AC_VERB_SET_CONNECT_SEL, (mux_idx - 1)); } @@ -865,7 +865,7 @@ static int hdac_hdmi_set_pin_port_mux(struct snd_kcontrol *kcontrol, struct snd_soc_dapm_context *dapm = w->dapm; struct hdac_hdmi_port *port = w->priv; struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pcm *pcm = NULL; const char *cvt_name = e->texts[ucontrol->value.enumerated.item[0]]; @@ -923,7 +923,7 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev, struct snd_soc_dapm_widget *widget, const char *widget_name) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pin *pin = port->pin; struct snd_kcontrol_new *kc; struct hdac_hdmi_cvt *cvt; @@ -935,17 +935,17 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev, int i = 0; int num_items = hdmi->num_cvt + 1; - kc = devm_kzalloc(&edev->hdac.dev, sizeof(*kc), GFP_KERNEL); + kc = devm_kzalloc(&edev->hdev.dev, sizeof(*kc), GFP_KERNEL); if (!kc) return -ENOMEM; - se = devm_kzalloc(&edev->hdac.dev, sizeof(*se), GFP_KERNEL); + se = devm_kzalloc(&edev->hdev.dev, sizeof(*se), GFP_KERNEL); if (!se) return -ENOMEM; snprintf(kc_name, NAME_SIZE, "Pin %d port %d Input", pin->nid, port->id); - kc->name = devm_kstrdup(&edev->hdac.dev, kc_name, GFP_KERNEL); + kc->name = devm_kstrdup(&edev->hdev.dev, kc_name, GFP_KERNEL); if (!kc->name) return -ENOMEM; @@ -963,24 +963,24 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev, se->mask = roundup_pow_of_two(se->items) - 1; sprintf(mux_items, "NONE"); - items[i] = devm_kstrdup(&edev->hdac.dev, mux_items, GFP_KERNEL); + items[i] = devm_kstrdup(&edev->hdev.dev, mux_items, GFP_KERNEL); if (!items[i]) return -ENOMEM; list_for_each_entry(cvt, &hdmi->cvt_list, head) { i++; sprintf(mux_items, "cvt %d", cvt->nid); - items[i] = devm_kstrdup(&edev->hdac.dev, mux_items, GFP_KERNEL); + items[i] = devm_kstrdup(&edev->hdev.dev, mux_items, GFP_KERNEL); if (!items[i]) return -ENOMEM; } - se->texts = devm_kmemdup(&edev->hdac.dev, items, + se->texts = devm_kmemdup(&edev->hdev.dev, items, (num_items * sizeof(char *)), GFP_KERNEL); if (!se->texts) return -ENOMEM; - return hdac_hdmi_fill_widget_info(&edev->hdac.dev, widget, + return hdac_hdmi_fill_widget_info(&edev->hdev.dev, widget, snd_soc_dapm_mux, port, widget_name, NULL, kc, 1, hdac_hdmi_pin_mux_widget_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_REG); @@ -991,7 +991,7 @@ static void hdac_hdmi_add_pinmux_cvt_route(struct hdac_ext_device *edev, struct snd_soc_dapm_widget *widgets, struct snd_soc_dapm_route *route, int rindex) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); const struct snd_kcontrol_new *kc; struct soc_enum *se; int mux_index = hdmi->num_cvt + hdmi->num_ports; @@ -1034,7 +1034,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm) struct snd_soc_dapm_widget *widgets; struct snd_soc_dapm_route *route; struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct snd_soc_dai_driver *dai_drv = dapm->component->dai_drv; char widget_name[NAME_SIZE]; struct hdac_hdmi_cvt *cvt; @@ -1135,7 +1135,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm) static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_cvt *cvt; int dai_id = 0; @@ -1151,7 +1151,7 @@ static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev) dai_id++; if (dai_id == HDA_MAX_CVTS) { - dev_warn(&edev->hdac.dev, + dev_warn(&edev->hdev.dev, "Max dais supported: %d\n", dai_id); break; } @@ -1162,7 +1162,7 @@ static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev) static int hdac_hdmi_add_cvt(struct hdac_ext_device *edev, hda_nid_t nid) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_cvt *cvt; char name[NAME_SIZE]; @@ -1177,7 +1177,7 @@ static int hdac_hdmi_add_cvt(struct hdac_ext_device *edev, hda_nid_t nid) list_add_tail(&cvt->head, &hdmi->cvt_list); hdmi->num_cvt++; - return hdac_hdmi_query_cvt_params(&edev->hdac, cvt); + return hdac_hdmi_query_cvt_params(&edev->hdev, cvt); } static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev, @@ -1189,7 +1189,7 @@ static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev, >> DRM_ELD_VER_SHIFT; if (ver != ELD_VER_CEA_861D && ver != ELD_VER_PARTIAL) { - dev_err(&edev->hdac.dev, "HDMI: Unknown ELD version %d\n", ver); + dev_err(&edev->hdev.dev, "HDMI: Unknown ELD version %d\n", ver); return -EINVAL; } @@ -1197,7 +1197,7 @@ static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev, DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT; if (mnl > ELD_MAX_MNL) { - dev_err(&edev->hdac.dev, "HDMI: MNL Invalid %d\n", mnl); + dev_err(&edev->hdev.dev, "HDMI: MNL Invalid %d\n", mnl); return -EINVAL; } @@ -1210,7 +1210,7 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin, struct hdac_hdmi_port *port) { struct hdac_ext_device *edev = pin->edev; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pcm *pcm; int size = 0; int port_id = -1; @@ -1228,7 +1228,7 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin, if (pin->mst_capable) port_id = port->id; - size = snd_hdac_acomp_get_eld(&edev->hdac, pin->nid, port_id, + size = snd_hdac_acomp_get_eld(&edev->hdev, pin->nid, port_id, &port->eld.monitor_present, port->eld.eld_buffer, ELD_MAX_SIZE); @@ -1251,7 +1251,7 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin, if (!port->eld.monitor_present || !port->eld.eld_valid) { - dev_err(&edev->hdac.dev, "%s: disconnect for pin:port %d:%d\n", + dev_err(&edev->hdev.dev, "%s: disconnect for pin:port %d:%d\n", __func__, pin->nid, port->id); /* @@ -1305,7 +1305,7 @@ static int hdac_hdmi_add_ports(struct hdac_hdmi_priv *hdmi, static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pin *pin; int ret; @@ -1334,38 +1334,38 @@ static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid) #define INTEL_EN_DP12 0x02 /* enable DP 1.2 features */ #define INTEL_EN_ALL_PIN_CVTS 0x01 /* enable 2nd & 3rd pins and convertors */ -static void hdac_hdmi_skl_enable_all_pins(struct hdac_device *hdac) +static void hdac_hdmi_skl_enable_all_pins(struct hdac_device *hdev) { unsigned int vendor_param; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); unsigned int vendor_nid = hdmi->drv_data->vendor_nid; - vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, + vendor_param = snd_hdac_codec_read(hdev, vendor_nid, 0, INTEL_GET_VENDOR_VERB, 0); if (vendor_param == -1 || vendor_param & INTEL_EN_ALL_PIN_CVTS) return; vendor_param |= INTEL_EN_ALL_PIN_CVTS; - vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, + vendor_param = snd_hdac_codec_read(hdev, vendor_nid, 0, INTEL_SET_VENDOR_VERB, vendor_param); if (vendor_param == -1) return; } -static void hdac_hdmi_skl_enable_dp12(struct hdac_device *hdac) +static void hdac_hdmi_skl_enable_dp12(struct hdac_device *hdev) { unsigned int vendor_param; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); unsigned int vendor_nid = hdmi->drv_data->vendor_nid; - vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, + vendor_param = snd_hdac_codec_read(hdev, vendor_nid, 0, INTEL_GET_VENDOR_VERB, 0); if (vendor_param == -1 || vendor_param & INTEL_EN_DP12) return; /* enable DP1.2 mode */ vendor_param |= INTEL_EN_DP12; - vendor_param = snd_hdac_codec_read(hdac, vendor_nid, 0, + vendor_param = snd_hdac_codec_read(hdev, vendor_nid, 0, INTEL_SET_VENDOR_VERB, vendor_param); if (vendor_param == -1) return; @@ -1383,7 +1383,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = { * Each converter can support a stream independently. So a dai is created * based on the number of converter queried. */ -static int hdac_hdmi_create_dais(struct hdac_device *hdac, +static int hdac_hdmi_create_dais(struct hdac_device *hdev, struct snd_soc_dai_driver **dais, struct hdac_hdmi_priv *hdmi, int num_dais) { @@ -1396,20 +1396,20 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdac, u64 formats; int ret; - hdmi_dais = devm_kzalloc(&hdac->dev, + hdmi_dais = devm_kzalloc(&hdev->dev, (sizeof(*hdmi_dais) * num_dais), GFP_KERNEL); if (!hdmi_dais) return -ENOMEM; list_for_each_entry(cvt, &hdmi->cvt_list, head) { - ret = snd_hdac_query_supported_pcm(hdac, cvt->nid, + ret = snd_hdac_query_supported_pcm(hdev, cvt->nid, &rates, &formats, &bps); if (ret) return ret; sprintf(dai_name, "intel-hdmi-hifi%d", i+1); - hdmi_dais[i].name = devm_kstrdup(&hdac->dev, + hdmi_dais[i].name = devm_kstrdup(&hdev->dev, dai_name, GFP_KERNEL); if (!hdmi_dais[i].name) @@ -1417,7 +1417,7 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdac, snprintf(name, sizeof(name), "hifi%d", i+1); hdmi_dais[i].playback.stream_name = - devm_kstrdup(&hdac->dev, name, GFP_KERNEL); + devm_kstrdup(&hdev->dev, name, GFP_KERNEL); if (!hdmi_dais[i].playback.stream_name) return -ENOMEM; @@ -1450,29 +1450,29 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev, { hda_nid_t nid; int i, num_nodes; - struct hdac_device *hdac = &edev->hdac; struct hdac_hdmi_cvt *temp_cvt, *cvt_next; struct hdac_hdmi_pin *temp_pin, *pin_next; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); + struct hdac_device *hdev = &edev->hdev; int ret; - hdac_hdmi_skl_enable_all_pins(hdac); - hdac_hdmi_skl_enable_dp12(hdac); + hdac_hdmi_skl_enable_all_pins(hdev); + hdac_hdmi_skl_enable_dp12(hdev); - num_nodes = snd_hdac_get_sub_nodes(hdac, hdac->afg, &nid); + num_nodes = snd_hdac_get_sub_nodes(hdev, hdev->afg, &nid); if (!nid || num_nodes <= 0) { - dev_warn(&hdac->dev, "HDMI: failed to get afg sub nodes\n"); + dev_warn(&hdev->dev, "HDMI: failed to get afg sub nodes\n"); return -EINVAL; } - hdac->num_nodes = num_nodes; - hdac->start_nid = nid; + hdev->num_nodes = num_nodes; + hdev->start_nid = nid; - for (i = 0; i < hdac->num_nodes; i++, nid++) { + for (i = 0; i < hdev->num_nodes; i++, nid++) { unsigned int caps; unsigned int type; - caps = get_wcaps(hdac, nid); + caps = get_wcaps(hdev, nid); type = get_wcaps_type(caps); if (!(caps & AC_WCAP_DIGITAL)) @@ -1494,16 +1494,16 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev, } } - hdac->end_nid = nid; + hdev->end_nid = nid; if (!hdmi->num_pin || !hdmi->num_cvt) { ret = -EIO; goto free_widgets; } - ret = hdac_hdmi_create_dais(hdac, dais, hdmi, hdmi->num_cvt); + ret = hdac_hdmi_create_dais(hdev, dais, hdmi, hdmi->num_cvt); if (ret) { - dev_err(&hdac->dev, "Failed to create dais with err: %d\n", + dev_err(&hdev->dev, "Failed to create dais with err: %d\n", ret); goto free_widgets; } @@ -1536,7 +1536,7 @@ free_widgets: static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe) { struct hdac_ext_device *edev = aptr; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pin *pin = NULL; struct hdac_hdmi_port *hport = NULL; struct snd_soc_codec *codec = edev->scodec; @@ -1545,7 +1545,7 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe) /* Don't know how this mapping is derived */ hda_nid_t pin_nid = port + 0x04; - dev_dbg(&edev->hdac.dev, "%s: for pin:%d port=%d\n", __func__, + dev_dbg(&edev->hdev.dev, "%s: for pin:%d port=%d\n", __func__, pin_nid, pipe); /* @@ -1558,7 +1558,7 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe) SNDRV_CTL_POWER_D0) return; - if (atomic_read(&edev->hdac.in_pm)) + if (atomic_read(&edev->hdev.in_pm)) return; list_for_each_entry(pin, &hdmi->pin_list, head) { @@ -1613,7 +1613,7 @@ static int create_fill_jack_kcontrols(struct snd_soc_card *card, char *name; int i = 0, j; struct snd_soc_codec *codec = edev->scodec; - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); kc = devm_kcalloc(codec->dev, hdmi->num_ports, sizeof(*kc), GFP_KERNEL); @@ -1651,7 +1651,7 @@ int hdac_hdmi_jack_port_init(struct snd_soc_codec *codec, struct snd_soc_dapm_context *dapm) { struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pin *pin; struct snd_soc_dapm_widget *widgets; struct snd_soc_dapm_route *route; @@ -1727,7 +1727,7 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device, { struct snd_soc_codec *codec = dai->codec; struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pcm *pcm; struct snd_pcm *snd_pcm; int err; @@ -1749,7 +1749,7 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device, if (snd_pcm) { err = snd_hdac_add_chmap_ctls(snd_pcm, device, &hdmi->chmap); if (err < 0) { - dev_err(&edev->hdac.dev, + dev_err(&edev->hdev.dev, "chmap control add failed with err: %d for pcm: %d\n", err, device); kfree(pcm); @@ -1790,7 +1790,7 @@ static void hdac_hdmi_present_sense_all_pins(struct hdac_ext_device *edev, static int hdmi_codec_probe(struct snd_soc_codec *codec) { struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(&codec->component); struct hdac_ext_link *hlink = NULL; @@ -1802,9 +1802,9 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec) * hold the ref while we probe, also no need to drop the ref on * exit, we call pm_runtime_suspend() so that will do for us */ - hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdev.dev)); if (!hlink) { - dev_err(&edev->hdac.dev, "hdac link not found\n"); + dev_err(&edev->hdev.dev, "hdac link not found\n"); return -EIO; } @@ -1817,7 +1817,7 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec) aops.audio_ptr = edev; ret = snd_hdac_i915_register_notifier(&aops); if (ret < 0) { - dev_err(&edev->hdac.dev, "notifier register failed: err: %d\n", + dev_err(&edev->hdev.dev, "notifier register failed: err: %d\n", ret); return ret; } @@ -1830,9 +1830,9 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec) * hdac_device core already sets the state to active and calls * get_noresume. So enable runtime and set the device to suspend. */ - pm_runtime_enable(&edev->hdac.dev); - pm_runtime_put(&edev->hdac.dev); - pm_runtime_suspend(&edev->hdac.dev); + pm_runtime_enable(&edev->hdev.dev); + pm_runtime_put(&edev->hdev.dev); + pm_runtime_suspend(&edev->hdev.dev); return 0; } @@ -1841,7 +1841,7 @@ static int hdmi_codec_remove(struct snd_soc_codec *codec) { struct hdac_ext_device *edev = snd_soc_codec_get_drvdata(codec); - pm_runtime_disable(&edev->hdac.dev); + pm_runtime_disable(&edev->hdev.dev); return 0; } @@ -1849,9 +1849,9 @@ static int hdmi_codec_remove(struct snd_soc_codec *codec) static int hdmi_codec_prepare(struct device *dev) { struct hdac_ext_device *edev = to_hda_ext_device(dev); - struct hdac_device *hdac = &edev->hdac; + struct hdac_device *hdev = &edev->hdev; - pm_runtime_get_sync(&edev->hdac.dev); + pm_runtime_get_sync(&edev->hdev.dev); /* * Power down afg. @@ -1860,7 +1860,7 @@ static int hdmi_codec_prepare(struct device *dev) * is received. So setting power state is ensured without using loop * to read the state. */ - snd_hdac_codec_read(hdac, hdac->afg, 0, AC_VERB_SET_POWER_STATE, + snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D3); return 0; @@ -1869,15 +1869,15 @@ static int hdmi_codec_prepare(struct device *dev) static void hdmi_codec_complete(struct device *dev) { struct hdac_ext_device *edev = to_hda_ext_device(dev); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); - struct hdac_device *hdac = &edev->hdac; + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); + struct hdac_device *hdev = &edev->hdev; /* Power up afg */ - snd_hdac_codec_read(hdac, hdac->afg, 0, AC_VERB_SET_POWER_STATE, + snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D0); - hdac_hdmi_skl_enable_all_pins(&edev->hdac); - hdac_hdmi_skl_enable_dp12(&edev->hdac); + hdac_hdmi_skl_enable_all_pins(&edev->hdev); + hdac_hdmi_skl_enable_dp12(&edev->hdev); /* * As the ELD notify callback request is not entertained while the @@ -1887,7 +1887,7 @@ static void hdmi_codec_complete(struct device *dev) */ hdac_hdmi_present_sense_all_pins(edev, hdmi, false); - pm_runtime_put_sync(&edev->hdac.dev); + pm_runtime_put_sync(&edev->hdev.dev); } #else #define hdmi_codec_prepare NULL @@ -1900,20 +1900,20 @@ static const struct snd_soc_codec_driver hdmi_hda_codec = { .idle_bias_off = true, }; -static void hdac_hdmi_get_chmap(struct hdac_device *hdac, int pcm_idx, +static void hdac_hdmi_get_chmap(struct hdac_device *hdev, int pcm_idx, unsigned char *chmap) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); memcpy(chmap, pcm->chmap, ARRAY_SIZE(pcm->chmap)); } -static void hdac_hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx, +static void hdac_hdmi_set_chmap(struct hdac_device *hdev, int pcm_idx, unsigned char *chmap, int prepared) { - struct hdac_ext_device *edev = to_ehdac_device(hdac); - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_ext_device *edev = to_ehdac_device(hdev); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); struct hdac_hdmi_port *port; @@ -1932,9 +1932,9 @@ static void hdac_hdmi_set_chmap(struct hdac_device *hdac, int pcm_idx, mutex_unlock(&pcm->lock); } -static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx) +static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdev, int pcm_idx) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); if (!pcm) @@ -1946,9 +1946,9 @@ static bool is_hdac_hdmi_pcm_attached(struct hdac_device *hdac, int pcm_idx) return true; } -static int hdac_hdmi_get_spk_alloc(struct hdac_device *hdac, int pcm_idx) +static int hdac_hdmi_get_spk_alloc(struct hdac_device *hdev, int pcm_idx) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx); struct hdac_hdmi_port *port; @@ -1979,30 +1979,30 @@ static struct hdac_hdmi_drv_data intel_drv_data = { static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) { - struct hdac_device *codec = &edev->hdac; + struct hdac_device *hdev = &edev->hdev; struct hdac_hdmi_priv *hdmi_priv; struct snd_soc_dai_driver *hdmi_dais = NULL; struct hdac_ext_link *hlink = NULL; int num_dais = 0; int ret = 0; - struct hdac_driver *hdrv = drv_to_hdac_driver(codec->dev.driver); - const struct hda_device_id *hdac_id = hdac_get_device_id(codec, hdrv); + struct hdac_driver *hdrv = drv_to_hdac_driver(hdev->dev.driver); + const struct hda_device_id *hdac_id = hdac_get_device_id(hdev, hdrv); /* hold the ref while we probe */ - hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdev.dev)); if (!hlink) { - dev_err(&edev->hdac.dev, "hdac link not found\n"); + dev_err(&edev->hdev.dev, "hdac link not found\n"); return -EIO; } snd_hdac_ext_bus_link_get(edev->ebus, hlink); - hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL); + hdmi_priv = devm_kzalloc(&hdev->dev, sizeof(*hdmi_priv), GFP_KERNEL); if (hdmi_priv == NULL) return -ENOMEM; edev->private_data = hdmi_priv; - snd_hdac_register_chmap_ops(codec, &hdmi_priv->chmap); + snd_hdac_register_chmap_ops(hdev, &hdmi_priv->chmap); hdmi_priv->chmap.ops.get_chmap = hdac_hdmi_get_chmap; hdmi_priv->chmap.ops.set_chmap = hdac_hdmi_set_chmap; hdmi_priv->chmap.ops.is_pcm_attached = is_hdac_hdmi_pcm_attached; @@ -2017,7 +2017,7 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) else hdmi_priv->drv_data = &intel_drv_data; - dev_set_drvdata(&codec->dev, edev); + dev_set_drvdata(&hdev->dev, edev); INIT_LIST_HEAD(&hdmi_priv->pin_list); INIT_LIST_HEAD(&hdmi_priv->cvt_list); @@ -2028,9 +2028,9 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) * Turned off in the runtime_suspend during the first explicit * pm_runtime_suspend call. */ - ret = snd_hdac_display_power(edev->hdac.bus, true); + ret = snd_hdac_display_power(edev->hdev.bus, true); if (ret < 0) { - dev_err(&edev->hdac.dev, + dev_err(&edev->hdev.dev, "Cannot turn on display power on i915 err: %d\n", ret); return ret; @@ -2038,13 +2038,13 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) ret = hdac_hdmi_parse_and_map_nid(edev, &hdmi_dais, &num_dais); if (ret < 0) { - dev_err(&codec->dev, + dev_err(&hdev->dev, "Failed in parse and map nid with err: %d\n", ret); return ret; } /* ASoC specific initialization */ - ret = snd_soc_register_codec(&codec->dev, &hdmi_hda_codec, + ret = snd_soc_register_codec(&hdev->dev, &hdmi_hda_codec, hdmi_dais, num_dais); snd_hdac_ext_bus_link_put(edev->ebus, hlink); @@ -2054,14 +2054,14 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev) { - struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdac); + struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); struct hdac_hdmi_pin *pin, *pin_next; struct hdac_hdmi_cvt *cvt, *cvt_next; struct hdac_hdmi_pcm *pcm, *pcm_next; struct hdac_hdmi_port *port, *port_next; int i; - snd_soc_unregister_codec(&edev->hdac.dev); + snd_soc_unregister_codec(&edev->hdev.dev); list_for_each_entry_safe(pcm, pcm_next, &hdmi->pcm_list, head) { pcm->cvt = NULL; @@ -2097,8 +2097,8 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev) static int hdac_hdmi_runtime_suspend(struct device *dev) { struct hdac_ext_device *edev = to_hda_ext_device(dev); - struct hdac_device *hdac = &edev->hdac; - struct hdac_bus *bus = hdac->bus; + struct hdac_device *hdev = &edev->hdev; + struct hdac_bus *bus = hdev->bus; struct hdac_ext_bus *ebus = hbus_to_ebus(bus); struct hdac_ext_link *hlink = NULL; int err; @@ -2116,7 +2116,7 @@ static int hdac_hdmi_runtime_suspend(struct device *dev) * is received. So setting power state is ensured without using loop * to read the state. */ - snd_hdac_codec_read(hdac, hdac->afg, 0, AC_VERB_SET_POWER_STATE, + snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D3); err = snd_hdac_display_power(bus, false); if (err < 0) { @@ -2138,8 +2138,8 @@ static int hdac_hdmi_runtime_suspend(struct device *dev) static int hdac_hdmi_runtime_resume(struct device *dev) { struct hdac_ext_device *edev = to_hda_ext_device(dev); - struct hdac_device *hdac = &edev->hdac; - struct hdac_bus *bus = hdac->bus; + struct hdac_device *hdev = &edev->hdev; + struct hdac_bus *bus = hdev->bus; struct hdac_ext_bus *ebus = hbus_to_ebus(bus); struct hdac_ext_link *hlink = NULL; int err; @@ -2164,11 +2164,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev) return err; } - hdac_hdmi_skl_enable_all_pins(&edev->hdac); - hdac_hdmi_skl_enable_dp12(&edev->hdac); + hdac_hdmi_skl_enable_all_pins(&edev->hdev); + hdac_hdmi_skl_enable_dp12(&edev->hdev); /* Power up afg */ - snd_hdac_codec_read(hdac, hdac->afg, 0, AC_VERB_SET_POWER_STATE, + snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, AC_PWRST_D0); return 0; -- cgit v1.2.3 From 56ae83f11db58a3a62d0d309efb84544f2c02e3b Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Nov 2017 18:56:16 +0100 Subject: ASoC: tfa9879: Export OF device ID as module alias The I2C core always reports a MODALIAS of the form i2c: even if the device was registered via OF, this means that exporting the OF device ID table device aliases in the module is not needed. But in order to change how the core reports modaliases to user-space, it's better to export it. Before this patch: $ modinfo sound/soc/codecs/snd-soc-tfa9879.ko | grep alias alias: i2c:tfa9879 After this patch: $ modinfo sound/soc/codecs/snd-soc-tfa9879.ko | grep alias alias: i2c:tfa9879 alias: of:N*T*Cnxp,tfa9879C* alias: of:N*T*Cnxp,tfa9879 Signed-off-by: Javier Martinez Canillas Signed-off-by: Mark Brown --- sound/soc/codecs/tfa9879.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/tfa9879.c b/sound/soc/codecs/tfa9879.c index f8dd67ca0744..e7ca764b5729 100644 --- a/sound/soc/codecs/tfa9879.c +++ b/sound/soc/codecs/tfa9879.c @@ -316,6 +316,7 @@ static const struct of_device_id tfa9879_of_match[] = { { .compatible = "nxp,tfa9879", }, { } }; +MODULE_DEVICE_TABLE(of, tfa9879_of_match); static struct i2c_driver tfa9879_i2c_driver = { .driver = { -- cgit v1.2.3 From 8d6fb0bce2021baf056344cb0abb2df00c5fe6d5 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 21:47:10 +0530 Subject: ASoC: ep93xx-ac97: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Signed-off-by: Mark Brown --- sound/soc/cirrus/ep93xx-ac97.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/cirrus/ep93xx-ac97.c b/sound/soc/cirrus/ep93xx-ac97.c index bbf7a9266a99..cd5a939ad608 100644 --- a/sound/soc/cirrus/ep93xx-ac97.c +++ b/sound/soc/cirrus/ep93xx-ac97.c @@ -365,7 +365,7 @@ static int ep93xx_ac97_probe(struct platform_device *pdev) { struct ep93xx_ac97_info *info; struct resource *res; - unsigned int irq; + int irq; int ret; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); @@ -378,8 +378,8 @@ static int ep93xx_ac97_probe(struct platform_device *pdev) return PTR_ERR(info->regs); irq = platform_get_irq(pdev, 0); - if (!irq) - return -ENODEV; + if (irq <= 0) + return irq < 0 ? irq : -ENODEV; ret = devm_request_irq(&pdev->dev, irq, ep93xx_ac97_interrupt, IRQF_TRIGGER_HIGH, pdev->name, info); -- cgit v1.2.3 From d6e2c4ffdf67de68e0263630525d2b521132d66a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 21:47:11 +0530 Subject: ASoC: mt8173: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8173/mt8173-afe-pcm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c index 8a643a35d3d4..c7f7f8add5d9 100644 --- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c @@ -1083,7 +1083,7 @@ static int mt8173_afe_init_audio_clk(struct mtk_base_afe *afe) static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) { int ret, i; - unsigned int irq_id; + int irq_id; struct mtk_base_afe *afe; struct mt8173_afe_private *afe_priv; struct resource *res; @@ -1105,9 +1105,9 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) afe->dev = &pdev->dev; irq_id = platform_get_irq(pdev, 0); - if (!irq_id) { + if (irq_id <= 0) { dev_err(afe->dev, "np %s no irq\n", afe->dev->of_node->name); - return -ENXIO; + return irq_id < 0 ? irq_id : -ENXIO; } ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler, 0, "Afe_ISR_Handle", (void *)afe); -- cgit v1.2.3 From fa8cc38165c2f6f73bf947087b3cdc5dd9b9b560 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 21:47:12 +0530 Subject: ASoC: nuc900: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Signed-off-by: Mark Brown --- sound/soc/nuc900/nuc900-ac97.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c index b6615affe571..5e4fbd2d3479 100644 --- a/sound/soc/nuc900/nuc900-ac97.c +++ b/sound/soc/nuc900/nuc900-ac97.c @@ -346,8 +346,8 @@ static int nuc900_ac97_drvprobe(struct platform_device *pdev) } nuc900_audio->irq_num = platform_get_irq(pdev, 0); - if (!nuc900_audio->irq_num) { - ret = -EBUSY; + if (nuc900_audio->irq_num <= 0) { + ret = nuc900_audio->irq_num < 0 ? nuc900_audio->irq_num : -EBUSY; goto out; } -- cgit v1.2.3 From eee44bfcf931428d7e94a9ae2092d687386a135a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 21:47:13 +0530 Subject: ASoC: intel: sst: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst/sst_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/atom/sst/sst_acpi.c b/sound/soc/intel/atom/sst/sst_acpi.c index 32d6e02e2104..6cd481bec275 100644 --- a/sound/soc/intel/atom/sst/sst_acpi.c +++ b/sound/soc/intel/atom/sst/sst_acpi.c @@ -236,6 +236,9 @@ static int sst_platform_get_resources(struct intel_sst_drv *ctx) /* Find the IRQ */ ctx->irq_num = platform_get_irq(pdev, ctx->pdata->res_info->acpi_ipc_irq_index); + if (ctx->irq_num <= 0) + return ctx->irq_num < 0 ? ctx->irq_num : -EIO; + return 0; } -- cgit v1.2.3 From 00a5cc096774fbc9ac979765fa820e7c8d9121c4 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 21:47:14 +0530 Subject: ASoC: intel: mfld: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Mark Brown --- sound/soc/intel/boards/mfld_machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/boards/mfld_machine.c b/sound/soc/intel/boards/mfld_machine.c index 6f44acfb4aae..7cb44fdde1ee 100644 --- a/sound/soc/intel/boards/mfld_machine.c +++ b/sound/soc/intel/boards/mfld_machine.c @@ -372,6 +372,8 @@ static int snd_mfld_mc_probe(struct platform_device *pdev) /* retrive the irq number */ irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return irq < 0 ? irq : -ENODEV; /* audio interrupt base of SRAM location where * interrupts are stored by System FW */ -- cgit v1.2.3 From ca7840fb47208a3521f3b60c1b78a2f0c59b4dc5 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:45 -0600 Subject: ASoC: tlv320aic31xx: General source formatting cleanup Simple non-functional changes including: * Fix spelling errors * Reformat code for easier reading * Remove unneeded code * Remove assignments that are always overridden * Normalize function return paths Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 63 ++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index d974e8651e30..07c014501e5e 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -136,8 +136,7 @@ static const struct regmap_config aic31xx_i2c_regmap = { .max_register = 12 * 128, }; -#define AIC31XX_NUM_SUPPLIES 6 -static const char * const aic31xx_supply_names[AIC31XX_NUM_SUPPLIES] = { +static const char * const aic31xx_supply_names[] = { "HPVDD", "SPRVDD", "SPLVDD", @@ -146,6 +145,8 @@ static const char * const aic31xx_supply_names[AIC31XX_NUM_SUPPLIES] = { "DVDD", }; +#define AIC31XX_NUM_SUPPLIES ARRAY_SIZE(aic31xx_supply_names) + struct aic31xx_disable_nb { struct notifier_block nb; struct aic31xx_priv *aic31xx; @@ -177,7 +178,7 @@ struct aic31xx_rate_divs { u8 madc; }; -/* ADC dividers can be disabled by cofiguring them to 0 */ +/* ADC dividers can be disabled by configuring them to 0 */ static const struct aic31xx_rate_divs aic31xx_divs[] = { /* mclk/p rate pll: j d dosr ndac mdac aors nadc madc */ /* 8k rate */ @@ -832,11 +833,17 @@ static int aic31xx_setup_pll(struct snd_soc_codec *codec, dev_dbg(codec->dev, "pll %d.%04d/%d dosr %d n %d m %d aosr %d n %d m %d bclk_n %d\n", - aic31xx_divs[i].pll_j, aic31xx_divs[i].pll_d, - aic31xx->p_div, aic31xx_divs[i].dosr, - aic31xx_divs[i].ndac, aic31xx_divs[i].mdac, - aic31xx_divs[i].aosr, aic31xx_divs[i].nadc, - aic31xx_divs[i].madc, bclk_n); + aic31xx_divs[i].pll_j, + aic31xx_divs[i].pll_d, + aic31xx->p_div, + aic31xx_divs[i].dosr, + aic31xx_divs[i].ndac, + aic31xx_divs[i].mdac, + aic31xx_divs[i].aosr, + aic31xx_divs[i].nadc, + aic31xx_divs[i].madc, + bclk_n + ); return 0; } @@ -973,8 +980,9 @@ static int aic31xx_set_dai_sysclk(struct snd_soc_dai *codec_dai, dev_dbg(codec->dev, "## %s: clk_id = %d, freq = %d, dir = %d\n", __func__, clk_id, freq, dir); - for (i = 1; freq/i > 20000000 && i < 8; i++) - ; + for (i = 1; i < 8; i++) + if (freq / i <= 20000000) + break; if (freq/i > 20000000) { dev_err(aic31xx->dev, "%s: Too high mclk frequency %u\n", __func__, freq); @@ -982,9 +990,9 @@ static int aic31xx_set_dai_sysclk(struct snd_soc_dai *codec_dai, } aic31xx->p_div = i; - for (i = 0; i < ARRAY_SIZE(aic31xx_divs) && - aic31xx_divs[i].mclk_p != freq/aic31xx->p_div; i++) - ; + for (i = 0; i < ARRAY_SIZE(aic31xx_divs); i++) + if (aic31xx_divs[i].mclk_p == freq / aic31xx->p_div) + break; if (i == ARRAY_SIZE(aic31xx_divs)) { dev_err(aic31xx->dev, "%s: Unsupported frequency %d\n", __func__, freq); @@ -996,6 +1004,7 @@ static int aic31xx_set_dai_sysclk(struct snd_soc_dai *codec_dai, clk_id << AIC31XX_PLL_CLKIN_SHIFT); aic31xx->sysclk = freq; + return 0; } @@ -1057,7 +1066,7 @@ static void aic31xx_clk_off(struct snd_soc_codec *codec) static int aic31xx_power_on(struct snd_soc_codec *codec) { struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); - int ret = 0; + int ret; ret = regulator_bulk_enable(ARRAY_SIZE(aic31xx->supplies), aic31xx->supplies); @@ -1070,7 +1079,7 @@ static int aic31xx_power_on(struct snd_soc_codec *codec) } regcache_cache_only(aic31xx->regmap, false); ret = regcache_sync(aic31xx->regmap); - if (ret != 0) { + if (ret) { dev_err(codec->dev, "Failed to restore cache: %d\n", ret); regcache_cache_only(aic31xx->regmap, true); @@ -1078,6 +1087,7 @@ static int aic31xx_power_on(struct snd_soc_codec *codec) aic31xx->supplies); return ret; } + return 0; } @@ -1126,14 +1136,11 @@ static int aic31xx_set_bias_level(struct snd_soc_codec *codec, static int aic31xx_codec_probe(struct snd_soc_codec *codec) { - int ret = 0; struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); - int i; + int i, ret; dev_dbg(aic31xx->dev, "## %s\n", __func__); - aic31xx = snd_soc_codec_get_drvdata(codec); - aic31xx->codec = codec; for (i = 0; i < ARRAY_SIZE(aic31xx->supplies); i++) { @@ -1158,8 +1165,10 @@ static int aic31xx_codec_probe(struct snd_soc_codec *codec) return ret; ret = aic31xx_add_widgets(codec); + if (ret) + return ret; - return ret; + return 0; } static int aic31xx_codec_remove(struct snd_soc_codec *codec) @@ -1322,10 +1331,12 @@ static int aic31xx_device_init(struct aic31xx_priv *aic31xx) ret = devm_regulator_bulk_get(aic31xx->dev, ARRAY_SIZE(aic31xx->supplies), aic31xx->supplies); - if (ret != 0) + if (ret) { dev_err(aic31xx->dev, "Failed to request supplies: %d\n", ret); + return ret; + } - return ret; + return 0; } static int aic31xx_i2c_probe(struct i2c_client *i2c, @@ -1333,18 +1344,15 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, { struct aic31xx_priv *aic31xx; int ret; - const struct regmap_config *regmap_config; dev_dbg(&i2c->dev, "## %s: %s codec_type = %d\n", __func__, id->name, (int) id->driver_data); - regmap_config = &aic31xx_i2c_regmap; - aic31xx = devm_kzalloc(&i2c->dev, sizeof(*aic31xx), GFP_KERNEL); - if (aic31xx == NULL) + if (!aic31xx) return -ENOMEM; - aic31xx->regmap = devm_regmap_init_i2c(i2c, regmap_config); + aic31xx->regmap = devm_regmap_init_i2c(i2c, &aic31xx_i2c_regmap); if (IS_ERR(aic31xx->regmap)) { ret = PTR_ERR(aic31xx->regmap); dev_err(&i2c->dev, "Failed to allocate register map: %d\n", @@ -1400,7 +1408,6 @@ static struct i2c_driver aic31xx_i2c_driver = { .remove = aic31xx_i2c_remove, .id_table = aic31xx_i2c_id, }; - module_i2c_driver(aic31xx_i2c_driver); MODULE_AUTHOR("Jyri Sarha "); -- cgit v1.2.3 From 12eb4d66ba2e14072b54f37f5a4a6f70457e228a Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:47 -0600 Subject: ASoC: tlv320aic31xx: Reformat header file using GENMASK and BIT macros We also move the comments describing the registers to after the register definition to remove non-uniform vertical white-space, this makes cross-referencing with the datasheet much easier. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.h | 320 ++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 187 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 6efea0485392..15ac7cba86fe 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -10,20 +10,21 @@ #define AIC31XX_RATES SNDRV_PCM_RATE_8000_192000 -#define AIC31XX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE \ - | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_LE \ - | SNDRV_PCM_FMTBIT_S32_LE) +#define AIC31XX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE) - -#define AIC31XX_STEREO_CLASS_D_BIT 0x1 -#define AIC31XX_MINIDSP_BIT 0x2 -#define DAC31XX_BIT 0x4 +#define AIC31XX_STEREO_CLASS_D_BIT BIT(1) +#define AIC31XX_MINIDSP_BIT BIT(2) +#define DAC31XX_BIT BIT(3) enum aic31xx_type { AIC3100 = 0, AIC3110 = AIC31XX_STEREO_CLASS_D_BIT, AIC3120 = AIC31XX_MINIDSP_BIT, - AIC3111 = (AIC31XX_STEREO_CLASS_D_BIT | AIC31XX_MINIDSP_BIT), + AIC3111 = AIC31XX_STEREO_CLASS_D_BIT | AIC31XX_MINIDSP_BIT, DAC3100 = DAC31XX_BIT, DAC3101 = DAC31XX_BIT | AIC31XX_STEREO_CLASS_D_BIT, }; @@ -36,222 +37,167 @@ struct aic31xx_pdata { #define AIC31XX_REG(page, reg) ((page * 128) + reg) -/* Page Control Register */ -#define AIC31XX_PAGECTL AIC31XX_REG(0, 0) +#define AIC31XX_PAGECTL AIC31XX_REG(0, 0) /* Page Control Register */ /* Page 0 Registers */ -/* Software reset register */ -#define AIC31XX_RESET AIC31XX_REG(0, 1) -/* OT FLAG register */ -#define AIC31XX_OT_FLAG AIC31XX_REG(0, 3) -/* Clock clock Gen muxing, Multiplexers*/ -#define AIC31XX_CLKMUX AIC31XX_REG(0, 4) -/* PLL P and R-VAL register */ -#define AIC31XX_PLLPR AIC31XX_REG(0, 5) -/* PLL J-VAL register */ -#define AIC31XX_PLLJ AIC31XX_REG(0, 6) -/* PLL D-VAL MSB register */ -#define AIC31XX_PLLDMSB AIC31XX_REG(0, 7) -/* PLL D-VAL LSB register */ -#define AIC31XX_PLLDLSB AIC31XX_REG(0, 8) -/* DAC NDAC_VAL register*/ -#define AIC31XX_NDAC AIC31XX_REG(0, 11) -/* DAC MDAC_VAL register */ -#define AIC31XX_MDAC AIC31XX_REG(0, 12) -/* DAC OSR setting register 1, MSB value */ -#define AIC31XX_DOSRMSB AIC31XX_REG(0, 13) -/* DAC OSR setting register 2, LSB value */ -#define AIC31XX_DOSRLSB AIC31XX_REG(0, 14) +#define AIC31XX_RESET AIC31XX_REG(0, 1) /* Software reset register */ +#define AIC31XX_OT_FLAG AIC31XX_REG(0, 3) /* OT FLAG register */ +#define AIC31XX_CLKMUX AIC31XX_REG(0, 4) /* Clock clock Gen muxing, Multiplexers*/ +#define AIC31XX_PLLPR AIC31XX_REG(0, 5) /* PLL P and R-VAL register */ +#define AIC31XX_PLLJ AIC31XX_REG(0, 6) /* PLL J-VAL register */ +#define AIC31XX_PLLDMSB AIC31XX_REG(0, 7) /* PLL D-VAL MSB register */ +#define AIC31XX_PLLDLSB AIC31XX_REG(0, 8) /* PLL D-VAL LSB register */ +#define AIC31XX_NDAC AIC31XX_REG(0, 11) /* DAC NDAC_VAL register*/ +#define AIC31XX_MDAC AIC31XX_REG(0, 12) /* DAC MDAC_VAL register */ +#define AIC31XX_DOSRMSB AIC31XX_REG(0, 13) /* DAC OSR setting register 1, MSB value */ +#define AIC31XX_DOSRLSB AIC31XX_REG(0, 14) /* DAC OSR setting register 2, LSB value */ #define AIC31XX_MINI_DSP_INPOL AIC31XX_REG(0, 16) -/* Clock setting register 8, PLL */ -#define AIC31XX_NADC AIC31XX_REG(0, 18) -/* Clock setting register 9, PLL */ -#define AIC31XX_MADC AIC31XX_REG(0, 19) -/* ADC Oversampling (AOSR) Register */ -#define AIC31XX_AOSR AIC31XX_REG(0, 20) -/* Clock setting register 9, Multiplexers */ -#define AIC31XX_CLKOUTMUX AIC31XX_REG(0, 25) -/* Clock setting register 10, CLOCKOUT M divider value */ -#define AIC31XX_CLKOUTMVAL AIC31XX_REG(0, 26) -/* Audio Interface Setting Register 1 */ -#define AIC31XX_IFACE1 AIC31XX_REG(0, 27) -/* Audio Data Slot Offset Programming */ -#define AIC31XX_DATA_OFFSET AIC31XX_REG(0, 28) -/* Audio Interface Setting Register 2 */ -#define AIC31XX_IFACE2 AIC31XX_REG(0, 29) -/* Clock setting register 11, BCLK N Divider */ -#define AIC31XX_BCLKN AIC31XX_REG(0, 30) -/* Audio Interface Setting Register 3, Secondary Audio Interface */ -#define AIC31XX_IFACESEC1 AIC31XX_REG(0, 31) -/* Audio Interface Setting Register 4 */ -#define AIC31XX_IFACESEC2 AIC31XX_REG(0, 32) -/* Audio Interface Setting Register 5 */ -#define AIC31XX_IFACESEC3 AIC31XX_REG(0, 33) -/* I2C Bus Condition */ -#define AIC31XX_I2C AIC31XX_REG(0, 34) -/* ADC FLAG */ -#define AIC31XX_ADCFLAG AIC31XX_REG(0, 36) -/* DAC Flag Registers */ -#define AIC31XX_DACFLAG1 AIC31XX_REG(0, 37) +#define AIC31XX_NADC AIC31XX_REG(0, 18) /* Clock setting register 8, PLL */ +#define AIC31XX_MADC AIC31XX_REG(0, 19) /* Clock setting register 9, PLL */ +#define AIC31XX_AOSR AIC31XX_REG(0, 20) /* ADC Oversampling (AOSR) Register */ +#define AIC31XX_CLKOUTMUX AIC31XX_REG(0, 25) /* Clock setting register 9, Multiplexers */ +#define AIC31XX_CLKOUTMVAL AIC31XX_REG(0, 26) /* Clock setting register 10, CLOCKOUT M divider value */ +#define AIC31XX_IFACE1 AIC31XX_REG(0, 27) /* Audio Interface Setting Register 1 */ +#define AIC31XX_DATA_OFFSET AIC31XX_REG(0, 28) /* Audio Data Slot Offset Programming */ +#define AIC31XX_IFACE2 AIC31XX_REG(0, 29) /* Audio Interface Setting Register 2 */ +#define AIC31XX_BCLKN AIC31XX_REG(0, 30) /* Clock setting register 11, BCLK N Divider */ +#define AIC31XX_IFACESEC1 AIC31XX_REG(0, 31) /* Audio Interface Setting Register 3, Secondary Audio Interface */ +#define AIC31XX_IFACESEC2 AIC31XX_REG(0, 32) /* Audio Interface Setting Register 4 */ +#define AIC31XX_IFACESEC3 AIC31XX_REG(0, 33) /* Audio Interface Setting Register 5 */ +#define AIC31XX_I2C AIC31XX_REG(0, 34) /* I2C Bus Condition */ +#define AIC31XX_ADCFLAG AIC31XX_REG(0, 36) /* ADC FLAG */ +#define AIC31XX_DACFLAG1 AIC31XX_REG(0, 37) /* DAC Flag Registers */ #define AIC31XX_DACFLAG2 AIC31XX_REG(0, 38) -/* Sticky Interrupt flag (overflow) */ -#define AIC31XX_OFFLAG AIC31XX_REG(0, 39) -/* Sticy DAC Interrupt flags */ -#define AIC31XX_INTRDACFLAG AIC31XX_REG(0, 44) -/* Sticy ADC Interrupt flags */ -#define AIC31XX_INTRADCFLAG AIC31XX_REG(0, 45) -/* DAC Interrupt flags 2 */ -#define AIC31XX_INTRDACFLAG2 AIC31XX_REG(0, 46) -/* ADC Interrupt flags 2 */ -#define AIC31XX_INTRADCFLAG2 AIC31XX_REG(0, 47) -/* INT1 interrupt control */ -#define AIC31XX_INT1CTRL AIC31XX_REG(0, 48) -/* INT2 interrupt control */ -#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) -/* GPIO1 control */ -#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) - +#define AIC31XX_OFFLAG AIC31XX_REG(0, 39) /* Sticky Interrupt flag (overflow) */ +#define AIC31XX_INTRDACFLAG AIC31XX_REG(0, 44) /* Sticy DAC Interrupt flags */ +#define AIC31XX_INTRADCFLAG AIC31XX_REG(0, 45) /* Sticy ADC Interrupt flags */ +#define AIC31XX_INTRDACFLAG2 AIC31XX_REG(0, 46) /* DAC Interrupt flags 2 */ +#define AIC31XX_INTRADCFLAG2 AIC31XX_REG(0, 47) /* ADC Interrupt flags 2 */ +#define AIC31XX_INT1CTRL AIC31XX_REG(0, 48) /* INT1 interrupt control */ +#define AIC31XX_INT2CTRL AIC31XX_REG(0, 49) /* INT2 interrupt control */ +#define AIC31XX_GPIO1 AIC31XX_REG(0, 51) /* GPIO1 control */ #define AIC31XX_DACPRB AIC31XX_REG(0, 60) -/* ADC Instruction Set Register */ -#define AIC31XX_ADCPRB AIC31XX_REG(0, 61) -/* DAC channel setup register */ -#define AIC31XX_DACSETUP AIC31XX_REG(0, 63) -/* DAC Mute and volume control register */ -#define AIC31XX_DACMUTE AIC31XX_REG(0, 64) -/* Left DAC channel digital volume control */ -#define AIC31XX_LDACVOL AIC31XX_REG(0, 65) -/* Right DAC channel digital volume control */ -#define AIC31XX_RDACVOL AIC31XX_REG(0, 66) -/* Headset detection */ -#define AIC31XX_HSDETECT AIC31XX_REG(0, 67) -/* ADC Digital Mic */ -#define AIC31XX_ADCSETUP AIC31XX_REG(0, 81) -/* ADC Digital Volume Control Fine Adjust */ -#define AIC31XX_ADCFGA AIC31XX_REG(0, 82) -/* ADC Digital Volume Control Coarse Adjust */ -#define AIC31XX_ADCVOL AIC31XX_REG(0, 83) - +#define AIC31XX_ADCPRB AIC31XX_REG(0, 61) /* ADC Instruction Set Register */ +#define AIC31XX_DACSETUP AIC31XX_REG(0, 63) /* DAC channel setup register */ +#define AIC31XX_DACMUTE AIC31XX_REG(0, 64) /* DAC Mute and volume control register */ +#define AIC31XX_LDACVOL AIC31XX_REG(0, 65) /* Left DAC channel digital volume control */ +#define AIC31XX_RDACVOL AIC31XX_REG(0, 66) /* Right DAC channel digital volume control */ +#define AIC31XX_HSDETECT AIC31XX_REG(0, 67) /* Headset detection */ +#define AIC31XX_ADCSETUP AIC31XX_REG(0, 81) /* ADC Digital Mic */ +#define AIC31XX_ADCFGA AIC31XX_REG(0, 82) /* ADC Digital Volume Control Fine Adjust */ +#define AIC31XX_ADCVOL AIC31XX_REG(0, 83) /* ADC Digital Volume Control Coarse Adjust */ /* Page 1 Registers */ -/* Headphone drivers */ -#define AIC31XX_HPDRIVER AIC31XX_REG(1, 31) -/* Class-D Speakear Amplifier */ -#define AIC31XX_SPKAMP AIC31XX_REG(1, 32) -/* HP Output Drivers POP Removal Settings */ -#define AIC31XX_HPPOP AIC31XX_REG(1, 33) -/* Output Driver PGA Ramp-Down Period Control */ -#define AIC31XX_SPPGARAMP AIC31XX_REG(1, 34) -/* DAC_L and DAC_R Output Mixer Routing */ -#define AIC31XX_DACMIXERROUTE AIC31XX_REG(1, 35) -/* Left Analog Vol to HPL */ -#define AIC31XX_LANALOGHPL AIC31XX_REG(1, 36) -/* Right Analog Vol to HPR */ -#define AIC31XX_RANALOGHPR AIC31XX_REG(1, 37) -/* Left Analog Vol to SPL */ -#define AIC31XX_LANALOGSPL AIC31XX_REG(1, 38) -/* Right Analog Vol to SPR */ -#define AIC31XX_RANALOGSPR AIC31XX_REG(1, 39) -/* HPL Driver */ -#define AIC31XX_HPLGAIN AIC31XX_REG(1, 40) -/* HPR Driver */ -#define AIC31XX_HPRGAIN AIC31XX_REG(1, 41) -/* SPL Driver */ -#define AIC31XX_SPLGAIN AIC31XX_REG(1, 42) -/* SPR Driver */ -#define AIC31XX_SPRGAIN AIC31XX_REG(1, 43) -/* HP Driver Control */ -#define AIC31XX_HPCONTROL AIC31XX_REG(1, 44) -/* MIC Bias Control */ -#define AIC31XX_MICBIAS AIC31XX_REG(1, 46) -/* MIC PGA*/ -#define AIC31XX_MICPGA AIC31XX_REG(1, 47) -/* Delta-Sigma Mono ADC Channel Fine-Gain Input Selection for P-Terminal */ -#define AIC31XX_MICPGAPI AIC31XX_REG(1, 48) -/* ADC Input Selection for M-Terminal */ -#define AIC31XX_MICPGAMI AIC31XX_REG(1, 49) -/* Input CM Settings */ -#define AIC31XX_MICPGACM AIC31XX_REG(1, 50) - -/* Bits, masks and shifts */ +#define AIC31XX_HPDRIVER AIC31XX_REG(1, 31) /* Headphone drivers */ +#define AIC31XX_SPKAMP AIC31XX_REG(1, 32) /* Class-D Speakear Amplifier */ +#define AIC31XX_HPPOP AIC31XX_REG(1, 33) /* HP Output Drivers POP Removal Settings */ +#define AIC31XX_SPPGARAMP AIC31XX_REG(1, 34) /* Output Driver PGA Ramp-Down Period Control */ +#define AIC31XX_DACMIXERROUTE AIC31XX_REG(1, 35) /* DAC_L and DAC_R Output Mixer Routing */ +#define AIC31XX_LANALOGHPL AIC31XX_REG(1, 36) /* Left Analog Vol to HPL */ +#define AIC31XX_RANALOGHPR AIC31XX_REG(1, 37) /* Right Analog Vol to HPR */ +#define AIC31XX_LANALOGSPL AIC31XX_REG(1, 38) /* Left Analog Vol to SPL */ +#define AIC31XX_RANALOGSPR AIC31XX_REG(1, 39) /* Right Analog Vol to SPR */ +#define AIC31XX_HPLGAIN AIC31XX_REG(1, 40) /* HPL Driver */ +#define AIC31XX_HPRGAIN AIC31XX_REG(1, 41) /* HPR Driver */ +#define AIC31XX_SPLGAIN AIC31XX_REG(1, 42) /* SPL Driver */ +#define AIC31XX_SPRGAIN AIC31XX_REG(1, 43) /* SPR Driver */ +#define AIC31XX_HPCONTROL AIC31XX_REG(1, 44) /* HP Driver Control */ +#define AIC31XX_MICBIAS AIC31XX_REG(1, 46) /* MIC Bias Control */ +#define AIC31XX_MICPGA AIC31XX_REG(1, 47) /* MIC PGA*/ +#define AIC31XX_MICPGAPI AIC31XX_REG(1, 48) /* Delta-Sigma Mono ADC Channel Fine-Gain Input Selection for P-Terminal */ +#define AIC31XX_MICPGAMI AIC31XX_REG(1, 49) /* ADC Input Selection for M-Terminal */ +#define AIC31XX_MICPGACM AIC31XX_REG(1, 50) /* Input CM Settings */ + +/* Bits, masks, and shifts */ /* AIC31XX_CLKMUX */ -#define AIC31XX_PLL_CLKIN_MASK 0x0c -#define AIC31XX_PLL_CLKIN_SHIFT 2 -#define AIC31XX_PLL_CLKIN_MCLK 0 -#define AIC31XX_CODEC_CLKIN_MASK 0x03 -#define AIC31XX_CODEC_CLKIN_SHIFT 0 -#define AIC31XX_CODEC_CLKIN_PLL 3 -#define AIC31XX_CODEC_CLKIN_BCLK 1 - -/* AIC31XX_PLLPR, AIC31XX_NDAC, AIC31XX_MDAC, AIC31XX_NADC, AIC31XX_MADC, - AIC31XX_BCLKN */ -#define AIC31XX_PLL_MASK 0x7f -#define AIC31XX_PM_MASK 0x80 +#define AIC31XX_PLL_CLKIN_MASK GENMASK(3, 2) +#define AIC31XX_PLL_CLKIN_SHIFT (2) +#define AIC31XX_PLL_CLKIN_MCLK 0x00 +#define AIC31XX_PLL_CLKIN_BCKL 0x01 +#define AIC31XX_PLL_CLKIN_GPIO1 0x02 +#define AIC31XX_PLL_CLKIN_DIN 0x03 +#define AIC31XX_CODEC_CLKIN_MASK GENMASK(1, 0) +#define AIC31XX_CODEC_CLKIN_SHIFT (0) +#define AIC31XX_CODEC_CLKIN_MCLK 0x00 +#define AIC31XX_CODEC_CLKIN_BCLK 0x01 +#define AIC31XX_CODEC_CLKIN_GPIO1 0x02 +#define AIC31XX_CODEC_CLKIN_PLL 0x03 + +/* AIC31XX_PLLPR */ +/* AIC31XX_NDAC */ +/* AIC31XX_MDAC */ +/* AIC31XX_NADC */ +/* AIC31XX_MADC */ +/* AIC31XX_BCLKN */ +#define AIC31XX_PLL_MASK GENMASK(6, 0) +#define AIC31XX_PM_MASK BIT(7) /* AIC31XX_IFACE1 */ -#define AIC31XX_WORD_LEN_16BITS 0x00 -#define AIC31XX_WORD_LEN_20BITS 0x01 -#define AIC31XX_WORD_LEN_24BITS 0x02 -#define AIC31XX_WORD_LEN_32BITS 0x03 -#define AIC31XX_IFACE1_DATALEN_MASK 0x30 -#define AIC31XX_IFACE1_DATALEN_SHIFT (4) -#define AIC31XX_IFACE1_DATATYPE_MASK 0xC0 +#define AIC31XX_IFACE1_DATATYPE_MASK GENMASK(7, 6) #define AIC31XX_IFACE1_DATATYPE_SHIFT (6) #define AIC31XX_I2S_MODE 0x00 #define AIC31XX_DSP_MODE 0x01 #define AIC31XX_RIGHT_JUSTIFIED_MODE 0x02 #define AIC31XX_LEFT_JUSTIFIED_MODE 0x03 -#define AIC31XX_IFACE1_MASTER_MASK 0x0C -#define AIC31XX_BCLK_MASTER 0x08 -#define AIC31XX_WCLK_MASTER 0x04 +#define AIC31XX_IFACE1_DATALEN_MASK GENMASK(5, 4) +#define AIC31XX_IFACE1_DATALEN_SHIFT (4) +#define AIC31XX_WORD_LEN_16BITS 0x00 +#define AIC31XX_WORD_LEN_20BITS 0x01 +#define AIC31XX_WORD_LEN_24BITS 0x02 +#define AIC31XX_WORD_LEN_32BITS 0x03 +#define AIC31XX_IFACE1_MASTER_MASK GENMASK(3, 2) +#define AIC31XX_BCLK_MASTER BIT(2) +#define AIC31XX_WCLK_MASTER BIT(3) /* AIC31XX_DATA_OFFSET */ -#define AIC31XX_DATA_OFFSET_MASK 0xFF +#define AIC31XX_DATA_OFFSET_MASK GENMASK(7, 0) /* AIC31XX_IFACE2 */ -#define AIC31XX_BCLKINV_MASK 0x08 -#define AIC31XX_BDIVCLK_MASK 0x03 +#define AIC31XX_BCLKINV_MASK BIT(3) +#define AIC31XX_BDIVCLK_MASK GENMASK(1, 0) #define AIC31XX_DAC2BCLK 0x00 #define AIC31XX_DACMOD2BCLK 0x01 #define AIC31XX_ADC2BCLK 0x02 #define AIC31XX_ADCMOD2BCLK 0x03 /* AIC31XX_ADCFLAG */ -#define AIC31XX_ADCPWRSTATUS_MASK 0x40 +#define AIC31XX_ADCPWRSTATUS_MASK BIT(6) /* AIC31XX_DACFLAG1 */ -#define AIC31XX_LDACPWRSTATUS_MASK 0x80 -#define AIC31XX_RDACPWRSTATUS_MASK 0x08 -#define AIC31XX_HPLDRVPWRSTATUS_MASK 0x20 -#define AIC31XX_HPRDRVPWRSTATUS_MASK 0x02 -#define AIC31XX_SPLDRVPWRSTATUS_MASK 0x10 -#define AIC31XX_SPRDRVPWRSTATUS_MASK 0x01 +#define AIC31XX_LDACPWRSTATUS_MASK BIT(7) +#define AIC31XX_HPLDRVPWRSTATUS_MASK BIT(5) +#define AIC31XX_SPLDRVPWRSTATUS_MASK BIT(4) +#define AIC31XX_RDACPWRSTATUS_MASK BIT(3) +#define AIC31XX_HPRDRVPWRSTATUS_MASK BIT(1) +#define AIC31XX_SPRDRVPWRSTATUS_MASK BIT(0) /* AIC31XX_INTRDACFLAG */ -#define AIC31XX_HPSCDETECT_MASK 0x80 -#define AIC31XX_BUTTONPRESS_MASK 0x20 -#define AIC31XX_HSPLUG_MASK 0x10 -#define AIC31XX_LDRCTHRES_MASK 0x08 -#define AIC31XX_RDRCTHRES_MASK 0x04 -#define AIC31XX_DACSINT_MASK 0x02 -#define AIC31XX_DACAINT_MASK 0x01 +#define AIC31XX_HPLSCDETECT BIT(7) +#define AIC31XX_HPRSCDETECT BIT(6) +#define AIC31XX_BUTTONPRESS BIT(5) +#define AIC31XX_HSPLUG BIT(4) +#define AIC31XX_LDRCTHRES BIT(3) +#define AIC31XX_RDRCTHRES BIT(2) +#define AIC31XX_DACSINT BIT(1) +#define AIC31XX_DACAINT BIT(0) /* AIC31XX_INT1CTRL */ -#define AIC31XX_HSPLUGDET_MASK 0x80 -#define AIC31XX_BUTTONPRESSDET_MASK 0x40 -#define AIC31XX_DRCTHRES_MASK 0x20 -#define AIC31XX_AGCNOISE_MASK 0x10 -#define AIC31XX_OC_MASK 0x08 -#define AIC31XX_ENGINE_MASK 0x04 +#define AIC31XX_HSPLUGDET BIT(7) +#define AIC31XX_BUTTONPRESSDET BIT(6) +#define AIC31XX_DRCTHRES BIT(5) +#define AIC31XX_AGCNOISE BIT(4) +#define AIC31XX_SC BIT(3) +#define AIC31XX_ENGINE BIT(2) /* AIC31XX_DACSETUP */ -#define AIC31XX_SOFTSTEP_MASK 0x03 +#define AIC31XX_SOFTSTEP_MASK GENMASK(1, 0) /* AIC31XX_DACMUTE */ -#define AIC31XX_DACMUTE_MASK 0x0C +#define AIC31XX_DACMUTE_MASK GENMASK(3, 2) /* AIC31XX_MICBIAS */ -#define AIC31XX_MICBIAS_MASK 0x03 -#define AIC31XX_MICBIAS_SHIFT 0 +#define AIC31XX_MICBIAS_MASK GENMASK(1, 0) +#define AIC31XX_MICBIAS_SHIFT 0 #endif /* _TLV320AIC31XX_H */ -- cgit v1.2.3 From e88c3881361cee9b778bf4e4ded43da7a0917ce1 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:48 -0600 Subject: ASoC: tlv320aic31xx: Merge init function into probe The function aic31xx_device_init() is only called from probe and does nothing that logically shouldn't be in probe, remove this unneeded function call and move its code into probe where it was called. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 55 ++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 07c014501e5e..c84febd991a0 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1302,9 +1302,29 @@ static const struct acpi_device_id aic31xx_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, aic31xx_acpi_match); #endif -static int aic31xx_device_init(struct aic31xx_priv *aic31xx) +static int aic31xx_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) { - int ret, i; + struct aic31xx_priv *aic31xx; + int i, ret; + + dev_dbg(&i2c->dev, "## %s: %s codec_type = %d\n", __func__, + id->name, (int)id->driver_data); + + aic31xx = devm_kzalloc(&i2c->dev, sizeof(*aic31xx), GFP_KERNEL); + if (!aic31xx) + return -ENOMEM; + + aic31xx->regmap = devm_regmap_init_i2c(i2c, &aic31xx_i2c_regmap); + if (IS_ERR(aic31xx->regmap)) { + ret = PTR_ERR(aic31xx->regmap); + dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + ret); + return ret; + } + aic31xx->dev = &i2c->dev; + + aic31xx->pdata.codec_type = id->driver_data; dev_set_drvdata(aic31xx->dev, aic31xx); @@ -1336,37 +1356,6 @@ static int aic31xx_device_init(struct aic31xx_priv *aic31xx) return ret; } - return 0; -} - -static int aic31xx_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) -{ - struct aic31xx_priv *aic31xx; - int ret; - - dev_dbg(&i2c->dev, "## %s: %s codec_type = %d\n", __func__, - id->name, (int) id->driver_data); - - aic31xx = devm_kzalloc(&i2c->dev, sizeof(*aic31xx), GFP_KERNEL); - if (!aic31xx) - return -ENOMEM; - - aic31xx->regmap = devm_regmap_init_i2c(i2c, &aic31xx_i2c_regmap); - if (IS_ERR(aic31xx->regmap)) { - ret = PTR_ERR(aic31xx->regmap); - dev_err(&i2c->dev, "Failed to allocate register map: %d\n", - ret); - return ret; - } - aic31xx->dev = &i2c->dev; - - aic31xx->pdata.codec_type = id->driver_data; - - ret = aic31xx_device_init(aic31xx); - if (ret) - return ret; - if (aic31xx->pdata.codec_type & DAC31XX_BIT) return snd_soc_register_codec(&i2c->dev, &soc_codec_driver_aic31xx, -- cgit v1.2.3 From b6b247cd5e37560e410c88b108e7408dafe60c15 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:49 -0600 Subject: ASoC: tlv320aic31xx: Switch GPIO handling to use gpiod_* API Move to using newer gpiod_* GPIO handling functions. This simplifies the code and eases dropping platform data in the next patch. Also remember GPIO are active low, so set "1" to reset. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index c84febd991a0..ab03a19f6aaa 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -157,6 +157,7 @@ struct aic31xx_priv { u8 i2c_regs_status; struct device *dev; struct regmap *regmap; + struct gpio_desc *gpio_reset; struct aic31xx_pdata pdata; struct regulator_bulk_data supplies[AIC31XX_NUM_SUPPLIES]; struct aic31xx_disable_nb disable_nb[AIC31XX_NUM_SUPPLIES]; @@ -1020,8 +1021,8 @@ static int aic31xx_regulator_event(struct notifier_block *nb, * Put codec to reset and as at least one of the * supplies was disabled. */ - if (gpio_is_valid(aic31xx->pdata.gpio_reset)) - gpio_set_value(aic31xx->pdata.gpio_reset, 0); + if (aic31xx->gpio_reset) + gpiod_set_value(aic31xx->gpio_reset, 1); regcache_mark_dirty(aic31xx->regmap); dev_dbg(aic31xx->dev, "## %s: DISABLE received\n", __func__); @@ -1073,8 +1074,8 @@ static int aic31xx_power_on(struct snd_soc_codec *codec) if (ret) return ret; - if (gpio_is_valid(aic31xx->pdata.gpio_reset)) { - gpio_set_value(aic31xx->pdata.gpio_reset, 1); + if (aic31xx->gpio_reset) { + gpiod_set_value(aic31xx->gpio_reset, 0); udelay(100); } regcache_cache_only(aic31xx->regmap, false); @@ -1334,15 +1335,11 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, else if (aic31xx->dev->of_node) aic31xx_pdata_from_of(aic31xx); - if (aic31xx->pdata.gpio_reset) { - ret = devm_gpio_request_one(aic31xx->dev, - aic31xx->pdata.gpio_reset, - GPIOF_OUT_INIT_HIGH, - "aic31xx-reset-pin"); - if (ret < 0) { - dev_err(aic31xx->dev, "not able to acquire gpio\n"); - return ret; - } + aic31xx->gpio_reset = devm_gpiod_get_optional(aic31xx->dev, "reset", + GPIOD_OUT_LOW); + if (IS_ERR(aic31xx->gpio_reset)) { + dev_err(aic31xx->dev, "not able to acquire gpio\n"); + return PTR_ERR(aic31xx->gpio_reset); } for (i = 0; i < ARRAY_SIZE(aic31xx->supplies); i++) -- cgit v1.2.3 From c6b8c779213dfe2a31e12400b1a2cf2a9a843236 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:52 -0600 Subject: ASoC: tlv320aic31xx: Check clock and divider before division If our set_sysclk DAI callback has not been called yet p_div will be 0 and dividing by this will cause an error. Print an error message and leave before this. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index ab03a19f6aaa..05e6d194d6a9 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -754,11 +754,17 @@ static int aic31xx_setup_pll(struct snd_soc_codec *codec, { struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); int bclk_score = snd_soc_params_to_frame_size(params); - int mclk_p = aic31xx->sysclk / aic31xx->p_div; + int mclk_p; int bclk_n = 0; int match = -1; int i; + if (!aic31xx->sysclk || !aic31xx->p_div) { + dev_err(codec->dev, "Master clock not supplied\n"); + return -EINVAL; + } + mclk_p = aic31xx->sysclk / aic31xx->p_div; + /* Use PLL as CODEC_CLKIN and DAC_CLK as BDIV_CLKIN */ snd_soc_update_bits(codec, AIC31XX_CLKMUX, AIC31XX_CODEC_CLKIN_MASK, AIC31XX_CODEC_CLKIN_PLL); -- cgit v1.2.3 From 77f8b3cfc33cd4231cc2748bcac9f43b9eea546c Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:53 -0600 Subject: ASoC: tlv320aic31xx: Add CODEC clock slave support This CODEC supports being the WCLK and/or BCLK slave, add support for this here. Also make the alert into an error as alert is more urgent than needed here and is rarely used. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 05e6d194d6a9..d77cc36d7360 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -925,8 +925,16 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_CBM_CFM: iface_reg1 |= AIC31XX_BCLK_MASTER | AIC31XX_WCLK_MASTER; break; + case SND_SOC_DAIFMT_CBS_CFM: + iface_reg1 |= AIC31XX_WCLK_MASTER; + break; + case SND_SOC_DAIFMT_CBM_CFS: + iface_reg1 |= AIC31XX_BCLK_MASTER; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; default: - dev_alert(codec->dev, "Invalid DAI master/slave interface\n"); + dev_err(codec->dev, "Invalid DAI master/slave interface\n"); return -EINVAL; } -- cgit v1.2.3 From dcb407b257af06fa58b0544ec01ec9e0d3927e02 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Wed, 29 Nov 2017 15:32:54 -0600 Subject: ASoC: tlv320aic31xx: Handle inverted BCLK in non-DSP modes Currently BCLK inverting is only handled when the DAI format is DSP, but the BCLK may be inverted in any supported mode. Without this using this CODEC in any other mode than DSP with the BCLK inverted leads to bad sampling timing and very poor audio quality. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index d77cc36d7360..38fd6ea275fb 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -938,6 +938,18 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } + /* signal polarity */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + iface_reg2 |= AIC31XX_BCLKINV_MASK; + break; + default: + dev_err(codec->dev, "Invalid DAI clock signal polarity\n"); + return -EINVAL; + } + /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: @@ -945,16 +957,12 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_DSP_A: dsp_a_val = 0x1; /* fall through */ case SND_SOC_DAIFMT_DSP_B: - /* NOTE: BCLKINV bit value 1 equas NB and 0 equals IB */ - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - iface_reg2 |= AIC31XX_BCLKINV_MASK; - break; - case SND_SOC_DAIFMT_IB_NF: - break; - default: - return -EINVAL; - } + /* + * NOTE: This CODEC samples on the falling edge of BCLK in + * DSP mode, this is inverted compared to what most DAIs + * expect, so we invert for this mode + */ + iface_reg2 ^= AIC31XX_BCLKINV_MASK; iface_reg1 |= (AIC31XX_DSP_MODE << AIC31XX_IFACE1_DATATYPE_SHIFT); break; -- cgit v1.2.3 From c18a7ac3398d0cef29749f9568666db8321aa4c9 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Mon, 6 Nov 2017 11:41:04 +0100 Subject: memory: omap-gpmc: Make 'bank-width' property optional Error out only if both 'bank-width' and 'gpmc,device-width' are missing. As 'bank-width' is mostly used for NOR devices and all other devices must use 'gpmc,device-width' update the error message accordingly. Signed-off-by: Ladislav Michl Signed-off-by: Roger Quadros --- drivers/memory/omap-gpmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index a385a35c7de9..0e30ee1c8677 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -2077,8 +2077,9 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, } else { ret = of_property_read_u32(child, "bank-width", &gpmc_s.device_width); - if (ret < 0) { - dev_err(&pdev->dev, "%pOF has no 'bank-width' property\n", + if (ret < 0 && !gpmc_s.device_width) { + dev_err(&pdev->dev, + "%pOF has no 'gpmc,device-width' property\n", child); goto err; } -- cgit v1.2.3 From 974a6b20518602310637bd8ac9ad348bf8a864d6 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 1 Dec 2017 11:47:56 +0100 Subject: batman-adv: Fix kernel-doc for timer functions The commit e99e88a9d2b0 ("treewide: setup_timer() -> timer_setup()") changed the argument name and type of the timer function but didn't adjust the kernel-doc of these functions. Signed-off-by: Sven Eckelmann Acked-by: Kees Cook Signed-off-by: Simon Wunderlich --- net/batman-adv/tp_meter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 15cd2139381e..ebc4e2241c77 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -482,7 +482,7 @@ static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) /** * batadv_tp_sender_timeout - timer that fires in case of packet loss - * @arg: address of the related tp_vars + * @t: address to timer_list inside tp_vars * * If fired it means that there was packet loss. * Switch to Slow Start, set the ss_threshold to half of the current cwnd and @@ -1106,7 +1106,7 @@ static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) /** * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is * reached without received ack - * @arg: address of the related tp_vars + * @t: address to timer_list inside tp_vars */ static void batadv_tp_receiver_shutdown(struct timer_list *t) { -- cgit v1.2.3 From 741f5afbba70ff3cddcc5bba2595d9a44fa722e5 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 2 Dec 2017 17:36:45 +0100 Subject: ARM: dts: rockchip: add cpu0-regulator on rk3066a-marsboard The rk3066 also has operating points now, but without adjusting the cpu-regulator will break once higher voltages are needed for a specific frequency, so add the needed cpu0-regulator. Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3066a-marsboard.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts index c6d92c25df42..d23ee6d911ac 100644 --- a/arch/arm/boot/dts/rk3066a-marsboard.dts +++ b/arch/arm/boot/dts/rk3066a-marsboard.dts @@ -83,6 +83,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm>; +}; + &i2c1 { status = "okay"; clock-frequency = <400000>; -- cgit v1.2.3 From f9ecc83f8d723372976df8eda3193726d7a24fcb Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 1 Dec 2017 13:37:12 -0500 Subject: eeprom: at24: fix I2C device selection for runtime PM The at24 driver creates dummy I2C devices to access offsets in the chip that are outside the area supported using a single I2C address. It is not meaningful to use runtime PM to such devices; the system firmware (ACPI) does not know about these devices nor runtime PM was enabled for them. Always use the real device instead of the dummy ones. Fixes: 98e8201039af ("eeprom: at24: enable runtime pm support") Signed-off-by: Sakari Ailus Tested-by: Sven Van Asbroeck on a 24AA16/24LC16B [Bartosz: rebased on top of previous fixes for 4.15, tweaked the commit message] [Sven: fixed Bartosz's rebase] Signed-off-by: Sven Van Asbroeck Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 305a7a464d09..20b4f26d30d7 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -562,7 +562,7 @@ static ssize_t at24_eeprom_write_i2c(struct at24_data *at24, const char *buf, static int at24_read(void *priv, unsigned int off, void *val, size_t count) { struct at24_data *at24 = priv; - struct i2c_client *client; + struct device *dev = &at24->client[0]->dev; char *buf = val; int ret; @@ -572,11 +572,9 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) if (off + count > at24->chip.byte_len) return -EINVAL; - client = at24_translate_offset(at24, &off); - - ret = pm_runtime_get_sync(&client->dev); + ret = pm_runtime_get_sync(dev); if (ret < 0) { - pm_runtime_put_noidle(&client->dev); + pm_runtime_put_noidle(dev); return ret; } @@ -592,7 +590,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) status = at24->read_func(at24, buf, off, count); if (status < 0) { mutex_unlock(&at24->lock); - pm_runtime_put(&client->dev); + pm_runtime_put(dev); return status; } buf += status; @@ -602,7 +600,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) mutex_unlock(&at24->lock); - pm_runtime_put(&client->dev); + pm_runtime_put(dev); return 0; } @@ -610,7 +608,7 @@ static int at24_read(void *priv, unsigned int off, void *val, size_t count) static int at24_write(void *priv, unsigned int off, void *val, size_t count) { struct at24_data *at24 = priv; - struct i2c_client *client; + struct device *dev = &at24->client[0]->dev; char *buf = val; int ret; @@ -620,11 +618,9 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) if (off + count > at24->chip.byte_len) return -EINVAL; - client = at24_translate_offset(at24, &off); - - ret = pm_runtime_get_sync(&client->dev); + ret = pm_runtime_get_sync(dev); if (ret < 0) { - pm_runtime_put_noidle(&client->dev); + pm_runtime_put_noidle(dev); return ret; } @@ -640,7 +636,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) status = at24->write_func(at24, buf, off, count); if (status < 0) { mutex_unlock(&at24->lock); - pm_runtime_put(&client->dev); + pm_runtime_put(dev); return status; } buf += status; @@ -650,7 +646,7 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count) mutex_unlock(&at24->lock); - pm_runtime_put(&client->dev); + pm_runtime_put(dev); return 0; } -- cgit v1.2.3 From fc82228a5e3860502dbf3bfa4a9570cb7093cf7f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 3 Dec 2017 20:38:01 -0500 Subject: ext4: support fast symlinks from ext3 file systems 407cd7fb83c0 (ext4: change fast symlink test to not rely on i_blocks) broke ~10 years old ext3 file systems created by 2.6.17. Any ELF executable fails because the /lib/ld-linux.so.2 fast symlink cannot be read anymore. The patch assumed fast symlinks were created in a specific way, but that's not true on these really old file systems. The new behavior is apparently needed only with the large EA inode feature. Revert to the old behavior if the large EA inode feature is not set. This makes my old VM boot again. Fixes: 407cd7fb83c0 (ext4: change fast symlink test to not rely on i_blocks) Signed-off-by: Andi Kleen Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7df2c5644e59..534a9130f625 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, */ int ext4_inode_is_fast_symlink(struct inode *inode) { + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + int ea_blocks = EXT4_I(inode)->i_file_acl ? + EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; + + if (ext4_has_inline_data(inode)) + return 0; + + return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); + } return S_ISLNK(inode->i_mode) && inode->i_size && (inode->i_size < EXT4_N_BLOCKS * 4); } -- cgit v1.2.3 From c894aa97577e47d3066b27b32499ecf899bfa8b0 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sun, 3 Dec 2017 22:52:51 -0500 Subject: ext4: fix fdatasync(2) after fallocate(2) operation Currently, fallocate(2) with KEEP_SIZE followed by a fdatasync(2) then crash, we'll see wrong allocated block number (stat -c %b), the blocks allocated beyond EOF are all lost. fstests generic/468 exposes this bug. Commit 67a7d5f561f4 ("ext4: fix fdatasync(2) after extent manipulation operations") fixed all the other extent manipulation operation paths such as hole punch, zero range, collapse range etc., but forgot the fallocate case. So similarly, fix it by recording the correct journal tid in ext4 inode in fallocate(2) path, so that ext4_sync_file() will wait for the right tid to be committed on fdatasync(2). This addresses the test failure in xfstests test generic/468. Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 07bca11749d4..c941251ac0c0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4722,6 +4722,7 @@ retry: EXT4_INODE_EOFBLOCKS); } ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); ret2 = ext4_journal_stop(handle); if (ret2) break; -- cgit v1.2.3 From 51bd7125f740a1bc9e79cd117d7d50f1dc67a446 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 4 Dec 2017 14:19:56 +1100 Subject: misc: rtsx: Fix symbol clashes drivers/staging/rts5208/sd.o: In function `.sd_power_off_card3v3': (.text+0x5760): multiple definition of `.sd_power_off_card3v3' drivers/misc/cardreader/rtsx_pcr.o:(.text+0x4630): first defined here drivers/staging/rts5208/sd.o:(.opd+0x378): multiple definition of `sd_power_off_card3v3' drivers/misc/cardreader/rtsx_pcr.o:(.opd+0x4f8): first defined here drivers/staging/rts5208/ms.o: In function `.ms_power_off_card3v3': (.text+0x5e70): multiple definition of `.ms_power_off_card3v3' drivers/misc/cardreader/rtsx_pcr.o:(.text+0x46e0): first defined here drivers/staging/rts5208/ms.o:(.opd+0x360): multiple definition of `ms_power_off_card3v3' drivers/misc/cardreader/rtsx_pcr.o:(.opd+0x510): first defined here Fixes: 5da4e04ae480 ("misc: rtsx: Add support for RTS5260") Signed-off-by: Stephen Rothwell Signed-off-by: Lee Jones --- drivers/misc/cardreader/rts5260.c | 4 ++-- drivers/misc/cardreader/rtsx_pcr.c | 4 ++-- drivers/misc/cardreader/rtsx_pcr.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 3b308640282d..07cb93abf685 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -426,9 +426,9 @@ void rts5260_process_ocp(struct rtsx_pcr *pcr) rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); rts5260_get_ocpstat2(pcr, &pcr->ocp_stat2); if (pcr->card_exist & SD_EXIST) - sd_power_off_card3v3(pcr); + rtsx_sd_power_off_card3v3(pcr); else if (pcr->card_exist & MS_EXIST) - ms_power_off_card3v3(pcr); + rtsx_ms_power_off_card3v3(pcr); if (!(pcr->card_exist & MS_EXIST) && !(pcr->card_exist & SD_EXIST)) { if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER | diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 99adc67bbf73..5345170fc57b 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1208,7 +1208,7 @@ void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr) } } -int sd_power_off_card3v3(struct rtsx_pcr *pcr) +int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr) { rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | MS_CLK_EN | SD40_CLK_EN, 0); @@ -1223,7 +1223,7 @@ int sd_power_off_card3v3(struct rtsx_pcr *pcr) return 0; } -int ms_power_off_card3v3(struct rtsx_pcr *pcr) +int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr) { rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | MS_CLK_EN | SD40_CLK_EN, 0); diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index c544e35d0154..6ea1655db0bb 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -107,7 +107,7 @@ void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr); void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr); int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val); void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr); -int sd_power_off_card3v3(struct rtsx_pcr *pcr); -int ms_power_off_card3v3(struct rtsx_pcr *pcr); +int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr); +int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr); #endif -- cgit v1.2.3 From 4b380c42f7d00a395feede754f0bc2292eebe6e5 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sun, 3 Dec 2017 12:12:45 -0800 Subject: netfilter: nfnetlink_cthelper: Add missing permission checks The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, nfnl_cthelper_list is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: $ nfct helper list nfct v1.4.4: netlink error: Operation not permitted $ vpnns -- nfct helper list { .name = ftp, .queuenum = 0, .l3protonum = 2, .l4protonum = 6, .priv_data_len = 24, .status = enabled, }; Add capable() checks in nfnetlink_cthelper, as this is cleaner than trying to generalize the solution. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 41628b393673..d33ce6d5ebce 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -407,6 +408,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; int ret = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -611,6 +615,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth; bool tuple_set = false; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_cthelper_dump_table, @@ -678,6 +685,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, struct nfnl_cthelper *nlcth, *n; int j = 0, ret; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); -- cgit v1.2.3 From 6ab405114b0b229151ef06f4e31c7834dd09d0c0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 1 Dec 2017 01:46:07 +0100 Subject: netfilter: xt_bpf: add overflow checks Check whether inputs from userspace are too long (explicit length field too big or string not null-terminated) to avoid out-of-bounds reads. As far as I can tell, this can at worst lead to very limited kernel heap memory disclosure or oopses. This bug can be triggered by an unprivileged user even if the xt_bpf module is not loaded: iptables is available in network namespaces, and the xt_bpf module can be autoloaded. Triggering the bug with a classic BPF filter with fake length 0x1000 causes the following KASAN report: ================================================================== BUG: KASAN: slab-out-of-bounds in bpf_prog_create+0x84/0xf0 Read of size 32768 at addr ffff8801eff2c494 by task test/4627 CPU: 0 PID: 4627 Comm: test Not tainted 4.15.0-rc1+ #1 [...] Call Trace: dump_stack+0x5c/0x85 print_address_description+0x6a/0x260 kasan_report+0x254/0x370 ? bpf_prog_create+0x84/0xf0 memcpy+0x1f/0x50 bpf_prog_create+0x84/0xf0 bpf_mt_check+0x90/0xd6 [xt_bpf] [...] Allocated by task 4627: kasan_kmalloc+0xa0/0xd0 __kmalloc_node+0x47/0x60 xt_alloc_table_info+0x41/0x70 [x_tables] [...] The buggy address belongs to the object at ffff8801eff2c3c0 which belongs to the cache kmalloc-2048 of size 2048 The buggy address is located 212 bytes inside of 2048-byte region [ffff8801eff2c3c0, ffff8801eff2cbc0) [...] ================================================================== Fixes: e6f30c731718 ("netfilter: x_tables: add xt_bpf match") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_bpf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 041da0d9c06f..1f7fbd3c7e5a 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -27,6 +27,9 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, { struct sock_fprog_kern program; + if (len > XT_BPF_MAX_NUM_INSTR) + return -EINVAL; + program.len = len; program.filter = insns; @@ -55,6 +58,9 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) mm_segment_t oldfs = get_fs(); int retval, fd; + if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) + return -EINVAL; + set_fs(KERNEL_DS); fd = bpf_obj_get_user(path, 0); set_fs(oldfs); -- cgit v1.2.3 From 912d7985f3cef1b901a4fd9fede549b919fe7ac3 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 9 Nov 2017 16:35:35 -0600 Subject: ARM: dts: rockchip: fix rk3288 iep-IOMMU interrupts property cells The interrupts property in the iep-IOMMU node for the rk3288 dts file has a spurious extra cell causing a dtc warning: Warning (interrupts_property): interrupts size is (16), expected multiple of 12 in /iommu@ff900800 Remove the extra cell. Signed-off-by: Rob Herring Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd24894ee5c6..6102e4e7f35c 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -956,7 +956,7 @@ iep_mmu: iommu@ff900800 { compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x40>; - interrupts = ; + interrupts = ; interrupt-names = "iep_mmu"; #iommu-cells = <0>; status = "disabled"; -- cgit v1.2.3 From 5ba7dcfe77037b67016263ea597a8b431692ecab Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 3 Dec 2017 11:26:45 +0100 Subject: batman-adv: Fix lock for ogm cnt access in batadv_iv_ogm_calc_tq The originator node object orig_neigh_node is used to when accessing the bcast_own(_sum) and real_packet_count information. The access to them has to be protected with the spinlock in orig_neigh_node. But the function uses the lock in orig_node instead. This is incorrect because they could be two different originator node objects. Fixes: 0ede9f41b217 ("batman-adv: protect bit operations to count OGMs with spinlock") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 1b659ab652fb..bbe8414b6ee7 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1214,7 +1214,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); @@ -1224,7 +1224,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, } else { neigh_rq_count = 0; } - spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) -- cgit v1.2.3 From 5a93bae2c382c588f437ce0395e8032ae287dc36 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Thu, 19 Oct 2017 14:32:33 +0800 Subject: tracing: Fix code comments in trace.c Naming in code comments for tracing_snapshot, tracing_snapshot_alloc and trace_pid_filter_add_remove_task don't match the real function names. And latency_trace has been removed from tracing directory. Fix them. Link: http://lkml.kernel.org/r/1508394753-20887-1-git-send-email-chuhu@redhat.com Fixes: cab5037 ("tracing/ftrace: Enable snapshot function trigger") Fixes: 886b5b7 ("tracing: remove /debug/tracing/latency_trace") Signed-off-by: Chunyu Hu [ Replaced /sys/kernel/debug/tracing with /sys/kerne/tracing ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73e67b68c53b..5815ec16edd4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct } /** - * trace_pid_filter_add_remove - Add or remove a task from a pid_list + * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list * @pid_list: The list to modify * @self: The current task for fork or NULL for exit * @task: The task to add or remove @@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr) } /** - * trace_snapshot - take a snapshot of the current buffer. + * tracing_snapshot - take a snapshot of the current buffer. * * This causes a swap between the snapshot buffer and the current live * tracing buffer. You can use this to take snapshots of the live @@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void) EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); /** - * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. + * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. * - * This is similar to trace_snapshot(), but it will allocate the + * This is similar to tracing_snapshot(), but it will allocate the * snapshot buffer if it isn't already allocated. Use this only * where it is safe to sleep, as the allocation may sleep. * @@ -1303,7 +1303,7 @@ unsigned long __read_mostly tracing_thresh; /* * Copy the new maximum trace into the separate maximum-trace * structure. (this way the maximum trace is permanently saved, - * for later retrieval via /sys/kernel/debug/tracing/latency_trace) + * for later retrieval via /sys/kernel/tracing/tracing_max_latency) */ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) -- cgit v1.2.3 From 250d0c7754aa37c6443f07f1f5f591e2806295d8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 10:32:13 +0200 Subject: tracing: always define trace_{irq,preempt}_{enable_disable} We get a build error in the irqsoff tracer in some configurations: kernel/trace/trace_irqsoff.c: In function 'trace_preempt_on': kernel/trace/trace_irqsoff.c:855:2: error: implicit declaration of function 'trace_preempt_enable_rcuidle'; did you mean 'trace_irq_enable_rcuidle'? [-Werror=implicit-function-declaration] trace_preempt_enable_rcuidle(a0, a1); The problem is that trace_preempt_enable_rcuidle() has different definition based on multiple Kconfig symbols, but not all combinations have a valid definition. This changes the conditions so that we always get exactly one definition of each of the four tracing macros. I have not tried to verify that these definitions are sensible, but now we can build all randconfig combinations again. Link: http://lkml.kernel.org/r/20171019083230.2450779-1-arnd@arndb.de Fixes: d59158162e03 ("tracing: Add support for preempt and irq enable/disable events") Acked-by: Joel Fernandes Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/preemptirq.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h index f5024c560d8f..9c4eb33c5a1d 100644 --- a/include/trace/events/preemptirq.h +++ b/include/trace/events/preemptirq.h @@ -56,15 +56,18 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #include -#else /* !CONFIG_PREEMPTIRQ_EVENTS */ +#endif /* !CONFIG_PREEMPTIRQ_EVENTS */ +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING) #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_preempt_enable(...) -#define trace_preempt_disable(...) #define trace_irq_enable_rcuidle(...) #define trace_irq_disable_rcuidle(...) +#endif + +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) #define trace_preempt_enable_rcuidle(...) #define trace_preempt_disable_rcuidle(...) - #endif -- cgit v1.2.3 From 90e406f96f630c07d631a021fd4af10aac913e77 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 30 Nov 2017 11:39:43 +0800 Subject: tracing: Allocate mask_str buffer dynamically The default NR_CPUS can be very large, but actual possible nr_cpu_ids usually is very small. For my x86 distribution, the NR_CPUS is 8192 and nr_cpu_ids is 4. About 2 pages are wasted. Most machines don't have so many CPUs, so define a array with NR_CPUS just wastes memory. So let's allocate the buffer dynamically when need. With this change, the mutext tracing_cpumask_update_lock also can be removed now, which was used to protect mask_str. Link: http://lkml.kernel.org/r/1512013183-19107-1-git-send-email-changbin.du@intel.com Fixes: 36dfe9252bd4c ("ftrace: make use of tracing_cpumask") Cc: stable@vger.kernel.org Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5815ec16edd4..9f3f043ba3b7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = { .llseek = seq_lseek, }; -/* - * The tracer itself will not take this lock, but still we want - * to provide a consistent cpumask to user-space: - */ -static DEFINE_MUTEX(tracing_cpumask_update_lock); - -/* - * Temporary storage for the character representation of the - * CPU bitmask (and one more byte for the newline): - */ -static char mask_str[NR_CPUS + 1]; - static ssize_t tracing_cpumask_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct trace_array *tr = file_inode(filp)->i_private; + char *mask_str; int len; - mutex_lock(&tracing_cpumask_update_lock); + len = snprintf(NULL, 0, "%*pb\n", + cpumask_pr_args(tr->tracing_cpumask)) + 1; + mask_str = kmalloc(len, GFP_KERNEL); + if (!mask_str) + return -ENOMEM; - len = snprintf(mask_str, count, "%*pb\n", + len = snprintf(mask_str, len, "%*pb\n", cpumask_pr_args(tr->tracing_cpumask)); if (len >= count) { count = -EINVAL; goto out_err; } - count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); + count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); out_err: - mutex_unlock(&tracing_cpumask_update_lock); + kfree(mask_str); return count; } @@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, if (err) goto err_unlock; - mutex_lock(&tracing_cpumask_update_lock); - local_irq_disable(); arch_spin_lock(&tr->max_lock); for_each_tracing_cpu(cpu) { @@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, local_irq_enable(); cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); - - mutex_unlock(&tracing_cpumask_update_lock); free_cpumask_var(tracing_cpumask_new); return count; -- cgit v1.2.3 From 2dde6b0034dbc050957cdb6539ce28eca57e8cdf Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 3 Nov 2017 11:39:57 +0100 Subject: tracing: make PREEMPTIRQ_EVENTS depend on TRACING When CONFIG_TRACING is disabled, the new preemptirq events tracer produces a build failure: In file included from kernel/trace/trace_irqsoff.c:17:0: kernel/trace/trace.h: In function 'trace_test_and_set_recursion': kernel/trace/trace.h:542:28: error: 'struct task_struct' has no member named 'trace_recursion' Adding an explicit dependency avoids the broken configuration. Link: http://lkml.kernel.org/r/20171103104031.270375-1-arnd@arndb.de Fixes: d59158162e03 ("tracing: Add support for preempt and irq enable/disable events") Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..904c952ac383 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS bool "Enable trace events for preempt and irq disable/enable" select TRACE_IRQFLAGS depends on DEBUG_PREEMPT || !PROVE_LOCKING + depends on TRACING default n help Enable tracing of disable and enable events for preemption and irqs. -- cgit v1.2.3 From c4bfd39d7fa5203d4b387c283d360e9a108e85b3 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 17 May 2017 17:14:15 -0700 Subject: ring-buffer: Remove unused function __rb_data_page_index() This fixes the following warning when building with clang: kernel/trace/ring_buffer.c:1842:1: error: unused function '__rb_data_page_index' [-Werror,-Wunused-function] Link: http://lkml.kernel.org/r/20170518001415.5223-1-mka@chromium.org Reviewed-by: Douglas Anderson Signed-off-by: Matthias Kaehlcke Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 91874a95060d..c87766c1c204 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1799,12 +1799,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val) } EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); -static __always_inline void * -__rb_data_page_index(struct buffer_data_page *bpage, unsigned index) -{ - return bpage->data + index; -} - static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) { return bpage->page->data + index; -- cgit v1.2.3 From a773d419275bf54854ca6cfda8f2594ed2790faa Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 2 Jun 2017 13:20:25 +0300 Subject: tracing: Pass export pointer as argument to ->write() By passing an export descriptor to the write function, users don't need to keep a global static pointer and can rely on container_of() to fetch their own structure. Link: http://lkml.kernel.org/r/20170602102025.5140-1-felipe.balbi@linux.intel.com Acked-by: Steven Rostedt (VMware) Reviewed-by: Chunyan Zhang Signed-off-by: Felipe Balbi Signed-off-by: Steven Rostedt (VMware) --- drivers/hwtracing/stm/ftrace.c | 6 ++++-- include/linux/trace.h | 2 +- kernel/trace/trace.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c index bd126a7c6da2..7da75644c750 100644 --- a/drivers/hwtracing/stm/ftrace.c +++ b/drivers/hwtracing/stm/ftrace.c @@ -42,9 +42,11 @@ static struct stm_ftrace { * @len: length of the data packet */ static void notrace -stm_ftrace_write(const void *buf, unsigned int len) +stm_ftrace_write(struct trace_export *export, const void *buf, unsigned int len) { - stm_source_write(&stm_ftrace.data, STM_FTRACE_CHAN, buf, len); + struct stm_ftrace *stm = container_of(export, struct stm_ftrace, ftrace); + + stm_source_write(&stm->data, STM_FTRACE_CHAN, buf, len); } static int stm_ftrace_link(struct stm_source_data *data) diff --git a/include/linux/trace.h b/include/linux/trace.h index d24991c1fef3..b95ffb2188ab 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -18,7 +18,7 @@ */ struct trace_export { struct trace_export __rcu *next; - void (*write)(const void *, unsigned int); + void (*write)(struct trace_export *, const void *, unsigned int); }; int register_ftrace_export(struct trace_export *export); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3f043ba3b7..59518b8126d0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export, entry = ring_buffer_event_data(event); size = ring_buffer_event_length(event); - export->write(entry, size); + export->write(export, entry, size); } static DEFINE_MUTEX(ftrace_export_lock); -- cgit v1.2.3 From 4ab53fe612e21b0f509a3b468c56706364de98df Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Nov 2017 11:12:27 +0000 Subject: PM: Provide a config snippet for disabling PM A frequent source of build problems is poor handling of optional PM support, almost all development is done with the PM options enabled but they can be turned off. Currently few if any of the build test services do this as standard as there is no standard config for it and the use of selects and def_bool means that simply setting CONFIG_PM=n doesn't do what is expected. To make this easier provide a fragement that can be used with KCONFIG_ALLCONFIG to force PM off. CONFIG_XEN is disabled as Xen uses hibernation callbacks which end up turning on power management on architectures with Xen. Some cpuidle implementations on ARM select PM so CONFIG_CPU_IDLE is disabled, and some ARM architectures unconditionally enable PM so they are also disabled. Signed-off-by: Mark Brown Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 + kernel/configs/nopm.config | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 kernel/configs/nopm.config diff --git a/MAINTAINERS b/MAINTAINERS index d4fdcb12616c..ae6e590c5125 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10890,6 +10890,7 @@ F: include/linux/pm.h F: include/linux/pm_* F: include/linux/powercap.h F: drivers/powercap/ +F: kernel/configs/nopm.config POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland diff --git a/kernel/configs/nopm.config b/kernel/configs/nopm.config new file mode 100644 index 000000000000..81ff07863576 --- /dev/null +++ b/kernel/configs/nopm.config @@ -0,0 +1,15 @@ +CONFIG_PM=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n + +# Triggers PM on OMAP +CONFIG_CPU_IDLE=n + +# Triggers enablement via hibernate callbacks +CONFIG_XEN=n + +# ARM/ARM64 architectures that select PM unconditionally +CONFIG_ARCH_OMAP2PLUS_TYPICAL=n +CONFIG_ARCH_RENESAS=n +CONFIG_ARCH_TEGRA=n +CONFIG_ARCH_VEXPRESS=n -- cgit v1.2.3 From 045149e6a22119e5bf0d16a0b24a4173a2abb71d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Nov 2017 01:23:16 +0100 Subject: cpufreq: Clean up cpufreq_parse_governor() Drop an unnecessary local variable from cpufreq_parse_governor() and rearrange the code in there to make it easier to follow. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 41d148af7748..4d76b7c57b7a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -604,16 +604,15 @@ static struct cpufreq_governor *find_governor(const char *str_governor) static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, struct cpufreq_governor **governor) { - int err = -EINVAL; - if (cpufreq_driver->setpolicy) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_PERFORMANCE; - err = 0; - } else if (!strncasecmp(str_governor, "powersave", - CPUFREQ_NAME_LEN)) { + return 0; + } + + if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { *policy = CPUFREQ_POLICY_POWERSAVE; - err = 0; + return 0; } } else { struct cpufreq_governor *t; @@ -621,26 +620,29 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_lock(&cpufreq_governor_mutex); t = find_governor(str_governor); - - if (t == NULL) { + if (!t) { int ret; mutex_unlock(&cpufreq_governor_mutex); + ret = request_module("cpufreq_%s", str_governor); + if (ret) + return -EINVAL; + mutex_lock(&cpufreq_governor_mutex); - if (ret == 0) - t = find_governor(str_governor); + t = find_governor(str_governor); } - if (t != NULL) { + mutex_unlock(&cpufreq_governor_mutex); + + if (t) { *governor = t; - err = 0; + return 0; } - - mutex_unlock(&cpufreq_governor_mutex); } - return err; + + return -EINVAL; } /** -- cgit v1.2.3 From ae0ff89f36b282ef32ff0f73e847352ea625464c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Nov 2017 01:24:05 +0100 Subject: cpufreq: Pass policy pointer to cpufreq_parse_governor() Pass policy pointer to cpufreq_parse_governor() instead of passing pointers to two members of it so as to make the code slightly more straightforward. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 4d76b7c57b7a..8f356c4befda 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -601,17 +601,17 @@ static struct cpufreq_governor *find_governor(const char *str_governor) /** * cpufreq_parse_governor - parse a governor string */ -static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, - struct cpufreq_governor **governor) +static int cpufreq_parse_governor(char *str_governor, + struct cpufreq_policy *policy) { if (cpufreq_driver->setpolicy) { if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - *policy = CPUFREQ_POLICY_PERFORMANCE; + policy->policy = CPUFREQ_POLICY_PERFORMANCE; return 0; } if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - *policy = CPUFREQ_POLICY_POWERSAVE; + policy->policy = CPUFREQ_POLICY_POWERSAVE; return 0; } } else { @@ -637,7 +637,7 @@ static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, mutex_unlock(&cpufreq_governor_mutex); if (t) { - *governor = t; + policy->governor = t; return 0; } } @@ -762,8 +762,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, if (ret != 1) return -EINVAL; - if (cpufreq_parse_governor(str_governor, &new_policy.policy, - &new_policy.governor)) + if (cpufreq_parse_governor(str_governor, &new_policy)) return -EINVAL; ret = cpufreq_set_policy(policy, &new_policy); @@ -1046,8 +1045,7 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy) if (policy->last_policy) new_policy.policy = policy->last_policy; else - cpufreq_parse_governor(gov->name, &new_policy.policy, - NULL); + cpufreq_parse_governor(gov->name, &new_policy); } /* set default policy */ return cpufreq_set_policy(policy, &new_policy); -- cgit v1.2.3 From 70d1ff71161b1c56c6d025e6a957bc878dfd940b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Nov 2017 01:30:16 +0100 Subject: cpufreq: Drop pointless return statement Drop a pointless return statement from cpufreq_unregister_governor(). Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 8f356c4befda..d2a22de4e4d2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2160,7 +2160,6 @@ void cpufreq_unregister_governor(struct cpufreq_governor *governor) mutex_lock(&cpufreq_governor_mutex); list_del(&governor->governor_list); mutex_unlock(&cpufreq_governor_mutex); - return; } EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); -- cgit v1.2.3 From a8b149d32b663c1a4105273295184b78f53d33cf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Nov 2017 14:27:07 +0100 Subject: cpufreq: Fix governor module removal race It is possible to remove a cpufreq governor module after cpufreq_parse_governor() has returned success in store_scaling_governor() and before cpufreq_set_policy() acquires a reference to it, because the governor list is not protected during that period and nothing prevents the governor from being unregistered then. Prevent that from happening by acquiring an extra reference to the governor module temporarily in cpufreq_parse_governor(), under cpufreq_governor_mutex, and dropping it in store_scaling_governor(), when cpufreq_set_policy() returns. Note that the second cpufreq_parse_governor() call site is fine, because it only cares about the policy member of new_policy. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d2a22de4e4d2..421f318c0e66 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -633,6 +633,8 @@ static int cpufreq_parse_governor(char *str_governor, t = find_governor(str_governor); } + if (t && !try_module_get(t->owner)) + t = NULL; mutex_unlock(&cpufreq_governor_mutex); @@ -766,6 +768,10 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, return -EINVAL; ret = cpufreq_set_policy(policy, &new_policy); + + if (new_policy.governor) + module_put(new_policy.governor->owner); + return ret ? ret : count; } -- cgit v1.2.3 From 4b688b1c48dbbc6b1c939899f992985e2b03150c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 1 Dec 2017 15:26:06 +0100 Subject: ACPICA: Rename variable to match upstream There is a variable name mismatch in acpi_ut_strtoul_multiply64() between the ACPICA code in the kernel and the corresponding upstream code which may be problematic if changes to this particular piece of code are made upstream and ported to Linux, so rename the variable in question to match its name in the upstream code. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utstrsuppt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/utstrsuppt.c b/drivers/acpi/acpica/utstrsuppt.c index b2fc371c402e..97f48d71f9e6 100644 --- a/drivers/acpi/acpica/utstrsuppt.c +++ b/drivers/acpi/acpica/utstrsuppt.c @@ -370,7 +370,7 @@ acpi_ut_insert_digit(u64 *accumulated_value, u32 base, int ascii_digit) static acpi_status acpi_ut_strtoul_multiply64(u64 multiplicand, u32 base, u64 *out_product) { - u64 val; + u64 product; u64 quotient; /* Exit if either operand is zero */ @@ -393,15 +393,15 @@ acpi_ut_strtoul_multiply64(u64 multiplicand, u32 base, u64 *out_product) return (AE_NUMERIC_OVERFLOW); } - val = multiplicand * base; + product = multiplicand * base; /* Check for 32-bit overflow if necessary */ - if ((acpi_gbl_integer_bit_width == 32) && (val > ACPI_UINT32_MAX)) { + if ((acpi_gbl_integer_bit_width == 32) && (product > ACPI_UINT32_MAX)) { return (AE_NUMERIC_OVERFLOW); } - *out_product = val; + *out_product = product; return (AE_OK); } -- cgit v1.2.3 From afc567a4977b2d798e05153dd131a3c8d4758c0c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 25 Nov 2017 00:27:26 -0500 Subject: dm table: fix regression from improper dm_dev_internal.count refcount_t conversion Multiple refcounts are needed if the device was already added. The micro-optimization of setting the refcount to 1 on first added (rather than fall thru to a common refcount_inc) lost sight of the fact that the refcount_inc is also needed for the case when the device already exists and the mode need not be upgraded. Fixes: 2a0b4682e0 ("dm: convert dm_dev_internal.count from atomic_t to refcount_t") Reported-by: Zdenek Kabelac Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 88130b5d95f9..aaffd0c0ee9a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -453,14 +453,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, refcount_set(&dd->count, 1); list_add(&dd->list, &t->devices); + goto out; } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { r = upgrade_mode(dd, mode, t->md); if (r) return r; - refcount_inc(&dd->count); } - + refcount_inc(&dd->count); +out: *result = dd->dm_dev; return 0; } -- cgit v1.2.3 From 7e6358d244e4706fe612a77b9c36519a33600ac0 Mon Sep 17 00:00:00 2001 From: "monty_pavel@sina.com" Date: Sat, 25 Nov 2017 01:43:50 +0800 Subject: dm: fix various targets to dm_register_target after module __init resources created A NULL pointer is seen if two concurrent "vgchange -ay -K " processes race to load the dm-thin-pool module: PID: 25992 TASK: ffff883cd7d23500 CPU: 4 COMMAND: "vgchange" #0 [ffff883cd743d600] machine_kexec at ffffffff81038fa9 0000001 [ffff883cd743d660] crash_kexec at ffffffff810c5992 0000002 [ffff883cd743d730] oops_end at ffffffff81515c90 0000003 [ffff883cd743d760] no_context at ffffffff81049f1b 0000004 [ffff883cd743d7b0] __bad_area_nosemaphore at ffffffff8104a1a5 0000005 [ffff883cd743d800] bad_area at ffffffff8104a2ce 0000006 [ffff883cd743d830] __do_page_fault at ffffffff8104aa6f 0000007 [ffff883cd743d950] do_page_fault at ffffffff81517bae 0000008 [ffff883cd743d980] page_fault at ffffffff81514f95 [exception RIP: kmem_cache_alloc+108] RIP: ffffffff8116ef3c RSP: ffff883cd743da38 RFLAGS: 00010046 RAX: 0000000000000004 RBX: ffffffff81121b90 RCX: ffff881bf1e78cc0 RDX: 0000000000000000 RSI: 00000000000000d0 RDI: 0000000000000000 RBP: ffff883cd743da68 R8: ffff881bf1a4eb00 R9: 0000000080042000 R10: 0000000000002000 R11: 0000000000000000 R12: 00000000000000d0 R13: 0000000000000000 R14: 00000000000000d0 R15: 0000000000000246 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 0000009 [ffff883cd743da70] mempool_alloc_slab at ffffffff81121ba5 0000010 [ffff883cd743da80] mempool_create_node at ffffffff81122083 0000011 [ffff883cd743dad0] mempool_create at ffffffff811220f4 0000012 [ffff883cd743dae0] pool_ctr at ffffffffa08de049 [dm_thin_pool] 0000013 [ffff883cd743dbd0] dm_table_add_target at ffffffffa0005f2f [dm_mod] 0000014 [ffff883cd743dc30] table_load at ffffffffa0008ba9 [dm_mod] 0000015 [ffff883cd743dc90] ctl_ioctl at ffffffffa0009dc4 [dm_mod] The race results in a NULL pointer because: Process A (vgchange -ay -K): a. send DM_LIST_VERSIONS_CMD ioctl; b. pool_target not registered; c. modprobe dm_thin_pool and wait until end. Process B (vgchange -ay -K): a. send DM_LIST_VERSIONS_CMD ioctl; b. pool_target registered; c. table_load->dm_table_add_target->pool_ctr; d. _new_mapping_cache is NULL and panic. Note: 1. process A and process B are two concurrent processes. 2. pool_target can be detected by process B but _new_mapping_cache initialization has not ended. To fix dm-thin-pool, and other targets (cache, multipath, and snapshot) with the same problem, simply dm_register_target() after all resources created during module init (as labelled with __init) are finished. Cc: stable@vger.kernel.org Signed-off-by: monty Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-target.c | 12 +++++------ drivers/md/dm-mpath.c | 18 ++++++++--------- drivers/md/dm-snap.c | 48 ++++++++++++++++++++++---------------------- drivers/md/dm-thin.c | 22 +++++++++----------- 4 files changed, 49 insertions(+), 51 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index cf23a14f9c6a..47407e43b96a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -3472,18 +3472,18 @@ static int __init dm_cache_init(void) { int r; - r = dm_register_target(&cache_target); - if (r) { - DMERR("cache target registration failed: %d", r); - return r; - } - migration_cache = KMEM_CACHE(dm_cache_migration, 0); if (!migration_cache) { dm_unregister_target(&cache_target); return -ENOMEM; } + r = dm_register_target(&cache_target); + if (r) { + DMERR("cache target registration failed: %d", r); + return r; + } + return 0; } diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c8faa2b85842..35a2a2fa477f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1957,13 +1957,6 @@ static int __init dm_multipath_init(void) { int r; - r = dm_register_target(&multipath_target); - if (r < 0) { - DMERR("request-based register failed %d", r); - r = -EINVAL; - goto bad_register_target; - } - kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) { DMERR("failed to create workqueue kmpathd"); @@ -1985,13 +1978,20 @@ static int __init dm_multipath_init(void) goto bad_alloc_kmpath_handlerd; } + r = dm_register_target(&multipath_target); + if (r < 0) { + DMERR("request-based register failed %d", r); + r = -EINVAL; + goto bad_register_target; + } + return 0; +bad_register_target: + destroy_workqueue(kmpath_handlerd); bad_alloc_kmpath_handlerd: destroy_workqueue(kmultipathd); bad_alloc_kmultipathd: - dm_unregister_target(&multipath_target); -bad_register_target: return r; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1113b42e1eda..a0613bd8ed00 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2411,24 +2411,6 @@ static int __init dm_snapshot_init(void) return r; } - r = dm_register_target(&snapshot_target); - if (r < 0) { - DMERR("snapshot target register failed %d", r); - goto bad_register_snapshot_target; - } - - r = dm_register_target(&origin_target); - if (r < 0) { - DMERR("Origin target register failed %d", r); - goto bad_register_origin_target; - } - - r = dm_register_target(&merge_target); - if (r < 0) { - DMERR("Merge target register failed %d", r); - goto bad_register_merge_target; - } - r = init_origin_hash(); if (r) { DMERR("init_origin_hash failed."); @@ -2449,19 +2431,37 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } + r = dm_register_target(&snapshot_target); + if (r < 0) { + DMERR("snapshot target register failed %d", r); + goto bad_register_snapshot_target; + } + + r = dm_register_target(&origin_target); + if (r < 0) { + DMERR("Origin target register failed %d", r); + goto bad_register_origin_target; + } + + r = dm_register_target(&merge_target); + if (r < 0) { + DMERR("Merge target register failed %d", r); + goto bad_register_merge_target; + } + return 0; -bad_pending_cache: - kmem_cache_destroy(exception_cache); -bad_exception_cache: - exit_origin_hash(); -bad_origin_hash: - dm_unregister_target(&merge_target); bad_register_merge_target: dm_unregister_target(&origin_target); bad_register_origin_target: dm_unregister_target(&snapshot_target); bad_register_snapshot_target: + kmem_cache_destroy(pending_cache); +bad_pending_cache: + kmem_cache_destroy(exception_cache); +bad_exception_cache: + exit_origin_hash(); +bad_origin_hash: dm_exception_store_exit(); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 89e5dff9b4cf..f91d771fff4b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -4355,30 +4355,28 @@ static struct target_type thin_target = { static int __init dm_thin_init(void) { - int r; + int r = -ENOMEM; pool_table_init(); + _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); + if (!_new_mapping_cache) + return r; + r = dm_register_target(&thin_target); if (r) - return r; + goto bad_new_mapping_cache; r = dm_register_target(&pool_target); if (r) - goto bad_pool_target; - - r = -ENOMEM; - - _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); - if (!_new_mapping_cache) - goto bad_new_mapping_cache; + goto bad_thin_target; return 0; -bad_new_mapping_cache: - dm_unregister_target(&pool_target); -bad_pool_target: +bad_thin_target: dm_unregister_target(&thin_target); +bad_new_mapping_cache: + kmem_cache_destroy(_new_mapping_cache); return r; } -- cgit v1.2.3 From c6766aae8e08008a8860271448f22ab71cb848b4 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:03:32 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm8998 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm8998.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm8998.c b/sound/soc/codecs/wm8998.c index 2d211dbe7422..1288e1f67dcf 100644 --- a/sound/soc/codecs/wm8998.c +++ b/sound/soc/codecs/wm8998.c @@ -1275,9 +1275,11 @@ static int wm8998_codec_probe(struct snd_soc_codec *codec) struct wm8998_priv *priv = snd_soc_codec_get_drvdata(codec); struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); + struct arizona *arizona = priv->core.arizona; int ret; - priv->core.arizona->dapm = dapm; + arizona->dapm = dapm; + snd_soc_codec_init_regmap(codec, arizona->regmap); ret = arizona_init_spk(codec); if (ret < 0) @@ -1313,17 +1315,9 @@ static unsigned int wm8998_digital_vu[] = { ARIZONA_DAC_DIGITAL_VOLUME_5R, }; -static struct regmap *wm8998_get_regmap(struct device *dev) -{ - struct wm8998_priv *priv = dev_get_drvdata(dev); - - return priv->core.arizona->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm8998 = { .probe = wm8998_codec_probe, .remove = wm8998_codec_remove, - .get_regmap = wm8998_get_regmap, .idle_bias_off = true, -- cgit v1.2.3 From 1e4a36afe04b8edd9affc41371822a79599b98e0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:03:48 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm8997 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm8997.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm8997.c b/sound/soc/codecs/wm8997.c index 77f512767273..cac9b3e7e15d 100644 --- a/sound/soc/codecs/wm8997.c +++ b/sound/soc/codecs/wm8997.c @@ -1062,8 +1062,11 @@ static int wm8997_codec_probe(struct snd_soc_codec *codec) struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct wm8997_priv *priv = snd_soc_codec_get_drvdata(codec); + struct arizona *arizona = priv->core.arizona; int ret; + snd_soc_codec_init_regmap(codec, arizona->regmap); + ret = arizona_init_spk(codec); if (ret < 0) return ret; @@ -1095,17 +1098,9 @@ static unsigned int wm8997_digital_vu[] = { ARIZONA_DAC_DIGITAL_VOLUME_5R, }; -static struct regmap *wm8997_get_regmap(struct device *dev) -{ - struct wm8997_priv *priv = dev_get_drvdata(dev); - - return priv->core.arizona->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm8997 = { .probe = wm8997_codec_probe, .remove = wm8997_codec_remove, - .get_regmap = wm8997_get_regmap, .idle_bias_off = true, -- cgit v1.2.3 From 5a81eb5356c87ad60f65378464ba92a7d645ce1d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:04:02 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm8994 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm8994.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index f91b49e1ece3..21ffd6403173 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -3993,6 +3993,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) unsigned int reg; int ret, i; + snd_soc_codec_init_regmap(codec, control->regmap); + wm8994->hubs.codec = codec; mutex_init(&wm8994->accdet_lock); @@ -4434,19 +4436,11 @@ static int wm8994_codec_remove(struct snd_soc_codec *codec) return 0; } -static struct regmap *wm8994_get_regmap(struct device *dev) -{ - struct wm8994 *control = dev_get_drvdata(dev->parent); - - return control->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm8994 = { .probe = wm8994_codec_probe, .remove = wm8994_codec_remove, .suspend = wm8994_codec_suspend, .resume = wm8994_codec_resume, - .get_regmap = wm8994_get_regmap, .set_bias_level = wm8994_set_bias_level, }; -- cgit v1.2.3 From 74c76497946e1f15b51d603735eb8273ac89381e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:04:17 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm8400 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm8400.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c index 6c59fb933bd6..a36adf881bca 100644 --- a/sound/soc/codecs/wm8400.c +++ b/sound/soc/codecs/wm8400.c @@ -1285,6 +1285,7 @@ static int wm8400_codec_probe(struct snd_soc_codec *codec) if (priv == NULL) return -ENOMEM; + snd_soc_codec_init_regmap(codec, wm8400->regmap); snd_soc_codec_set_drvdata(codec, priv); priv->wm8400 = wm8400; @@ -1325,17 +1326,9 @@ static int wm8400_codec_remove(struct snd_soc_codec *codec) return 0; } -static struct regmap *wm8400_get_regmap(struct device *dev) -{ - struct wm8400 *wm8400 = dev_get_platdata(dev); - - return wm8400->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm8400 = { .probe = wm8400_codec_probe, .remove = wm8400_codec_remove, - .get_regmap = wm8400_get_regmap, .set_bias_level = wm8400_set_bias_level, .suspend_bias_off = true, -- cgit v1.2.3 From 40b84884617d9f07663862770d2574a71e313bc7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:04:31 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm8350 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm8350.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index 2efc5b41ad0f..fc79c6725d06 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -1472,6 +1472,8 @@ static int wm8350_codec_probe(struct snd_soc_codec *codec) GFP_KERNEL); if (priv == NULL) return -ENOMEM; + + snd_soc_codec_init_regmap(codec, wm8350->regmap); snd_soc_codec_set_drvdata(codec, priv); priv->wm8350 = wm8350; @@ -1580,17 +1582,9 @@ static int wm8350_codec_remove(struct snd_soc_codec *codec) return 0; } -static struct regmap *wm8350_get_regmap(struct device *dev) -{ - struct wm8350 *wm8350 = dev_get_platdata(dev); - - return wm8350->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm8350 = { .probe = wm8350_codec_probe, .remove = wm8350_codec_remove, - .get_regmap = wm8350_get_regmap, .set_bias_level = wm8350_set_bias_level, .suspend_bias_off = true, -- cgit v1.2.3 From 893d7cbea2fa259573e4ba36075e74f489f6dd28 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:04:45 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm5110 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm5110.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 6ed1e1f9ce51..fb0cf9c61f48 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -2280,9 +2280,11 @@ static int wm5110_codec_probe(struct snd_soc_codec *codec) struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct wm5110_priv *priv = snd_soc_codec_get_drvdata(codec); + struct arizona *arizona = priv->core.arizona; int i, ret; - priv->core.arizona->dapm = dapm; + arizona->dapm = dapm; + snd_soc_codec_init_regmap(codec, arizona->regmap); ret = arizona_init_spk(codec); if (ret < 0) @@ -2344,17 +2346,9 @@ static unsigned int wm5110_digital_vu[] = { ARIZONA_DAC_DIGITAL_VOLUME_6R, }; -static struct regmap *wm5110_get_regmap(struct device *dev) -{ - struct wm5110_priv *priv = dev_get_drvdata(dev); - - return priv->core.arizona->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm5110 = { .probe = wm5110_codec_probe, .remove = wm5110_codec_remove, - .get_regmap = wm5110_get_regmap, .idle_bias_off = true, -- cgit v1.2.3 From 2d6e28c368aabbdc841c44a2213a5ee16cfd5ae2 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:05:01 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on wm5102 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm5102.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index 4f0481d3c7a7..fc066caa1918 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -1935,8 +1935,11 @@ static int wm5102_codec_probe(struct snd_soc_codec *codec) struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct wm5102_priv *priv = snd_soc_codec_get_drvdata(codec); + struct arizona *arizona = priv->core.arizona; int ret; + snd_soc_codec_init_regmap(codec, arizona->regmap); + ret = wm_adsp2_codec_probe(&priv->core.adsp[0], codec); if (ret) return ret; @@ -1989,17 +1992,9 @@ static unsigned int wm5102_digital_vu[] = { ARIZONA_DAC_DIGITAL_VOLUME_5R, }; -static struct regmap *wm5102_get_regmap(struct device *dev) -{ - struct wm5102_priv *priv = dev_get_drvdata(dev); - - return priv->core.arizona->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_wm5102 = { .probe = wm5102_codec_probe, .remove = wm5102_codec_remove, - .get_regmap = wm5102_get_regmap, .idle_bias_off = true, -- cgit v1.2.3 From 3047ec50c3ebbe082217dc20ec0db4f3e5c7abea Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:05:15 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on si476x To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/si476x.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/si476x.c b/sound/soc/codecs/si476x.c index 354dc0d64f11..7b91ee267b4e 100644 --- a/sound/soc/codecs/si476x.c +++ b/sound/soc/codecs/si476x.c @@ -231,14 +231,17 @@ static struct snd_soc_dai_driver si476x_dai = { .ops = &si476x_dai_ops, }; -static struct regmap *si476x_get_regmap(struct device *dev) +static int si476x_probe(struct snd_soc_component *component) { - return dev_get_regmap(dev->parent, NULL); + snd_soc_component_init_regmap(component, + dev_get_regmap(component->dev->parent, NULL)); + + return 0; } static const struct snd_soc_codec_driver soc_codec_dev_si476x = { - .get_regmap = si476x_get_regmap, .component_driver = { + .probe = si476x_probe, .dapm_widgets = si476x_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(si476x_dapm_widgets), .dapm_routes = si476x_dapm_routes, -- cgit v1.2.3 From 33953d851d37f8b62b96d70fdccd35b62dffebe8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:05:31 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on mc13783 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/mc13783.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/mc13783.c b/sound/soc/codecs/mc13783.c index 4fd8d1dc4eef..be7a45f05bbf 100644 --- a/sound/soc/codecs/mc13783.c +++ b/sound/soc/codecs/mc13783.c @@ -610,6 +610,9 @@ static int mc13783_probe(struct snd_soc_codec *codec) { struct mc13783_priv *priv = snd_soc_codec_get_drvdata(codec); + snd_soc_codec_init_regmap(codec, + dev_get_regmap(codec->dev->parent, NULL)); + /* these are the reset values */ mc13xxx_reg_write(priv->mc13xxx, MC13783_AUDIO_RX0, 0x25893); mc13xxx_reg_write(priv->mc13xxx, MC13783_AUDIO_RX1, 0x00d35A); @@ -728,15 +731,9 @@ static struct snd_soc_dai_driver mc13783_dai_sync[] = { } }; -static struct regmap *mc13783_get_regmap(struct device *dev) -{ - return dev_get_regmap(dev->parent, NULL); -} - static const struct snd_soc_codec_driver soc_codec_dev_mc13783 = { .probe = mc13783_probe, .remove = mc13783_remove, - .get_regmap = mc13783_get_regmap, .component_driver = { .controls = mc13783_control_list, .num_controls = ARRAY_SIZE(mc13783_control_list), -- cgit v1.2.3 From 3fa8c49f27c15df259b7b8f94eb126ae491893fd Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 4 Dec 2017 18:36:10 +0100 Subject: arm64: dts: rockchip: fix trailing 0 in rk3328 tsadc interrupts Probably due to some copy-paste mistake, the tsadc of rk3328 ended up with a 0 as 4th element that shouldn't be there, as interrupts on the rk3328 only have multiples of 3, making dtc complain. So remove it. Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99..2426da631938 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -514,7 +514,7 @@ tsadc: tsadc@ff250000 { compatible = "rockchip,rk3328-tsadc"; reg = <0x0 0xff250000 0x0 0x100>; - interrupts = ; + interrupts = ; assigned-clocks = <&cru SCLK_TSADC>; assigned-clock-rates = <50000>; clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; -- cgit v1.2.3 From 60e1780ef27c626c7eaabae6103a218102b6e6ba Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:05:46 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on cq93vc To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/cq93vc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cq93vc.c b/sound/soc/codecs/cq93vc.c index 6ed2cc374768..3bf93652bb31 100644 --- a/sound/soc/codecs/cq93vc.c +++ b/sound/soc/codecs/cq93vc.c @@ -121,17 +121,19 @@ static struct snd_soc_dai_driver cq93vc_dai = { .ops = &cq93vc_dai_ops, }; -static struct regmap *cq93vc_get_regmap(struct device *dev) +static int cq93vc_probe(struct snd_soc_component *component) { - struct davinci_vc *davinci_vc = dev->platform_data; + struct davinci_vc *davinci_vc = component->dev->platform_data; - return davinci_vc->regmap; + snd_soc_component_init_regmap(component, davinci_vc->regmap); + + return 0; } static const struct snd_soc_codec_driver soc_codec_dev_cq93vc = { .set_bias_level = cq93vc_set_bias_level, - .get_regmap = cq93vc_get_regmap, .component_driver = { + .probe = cq93vc_probe, .controls = cq93vc_snd_controls, .num_controls = ARRAY_SIZE(cq93vc_snd_controls), }, -- cgit v1.2.3 From ba8dd49f9431497c6b13514cfab3065b01057247 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:06:00 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on cs47l24 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/cs47l24.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/cs47l24.c b/sound/soc/codecs/cs47l24.c index 94c0209977d0..be2750680838 100644 --- a/sound/soc/codecs/cs47l24.c +++ b/sound/soc/codecs/cs47l24.c @@ -1120,9 +1120,11 @@ static int cs47l24_codec_probe(struct snd_soc_codec *codec) struct snd_soc_dapm_context *dapm = snd_soc_codec_get_dapm(codec); struct snd_soc_component *component = snd_soc_dapm_to_component(dapm); struct cs47l24_priv *priv = snd_soc_codec_get_drvdata(codec); + struct arizona *arizona = priv->core.arizona; int ret; - priv->core.arizona->dapm = dapm; + arizona->dapm = dapm; + snd_soc_codec_init_regmap(codec, arizona->regmap); ret = arizona_init_spk(codec); if (ret < 0) @@ -1175,17 +1177,9 @@ static unsigned int cs47l24_digital_vu[] = { ARIZONA_DAC_DIGITAL_VOLUME_4L, }; -static struct regmap *cs47l24_get_regmap(struct device *dev) -{ - struct cs47l24_priv *priv = dev_get_drvdata(dev); - - return priv->core.arizona->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_cs47l24 = { .probe = cs47l24_codec_probe, .remove = cs47l24_codec_remove, - .get_regmap = cs47l24_get_regmap, .idle_bias_off = true, -- cgit v1.2.3 From 79b4885967cff17e0344b451e8fc4782013f0c9c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:06:15 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on 88pm860x To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/88pm860x-codec.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/sound/soc/codecs/88pm860x-codec.c b/sound/soc/codecs/88pm860x-codec.c index 848c5fe49bc7..be8ea723dff9 100644 --- a/sound/soc/codecs/88pm860x-codec.c +++ b/sound/soc/codecs/88pm860x-codec.c @@ -1319,6 +1319,7 @@ static int pm860x_probe(struct snd_soc_codec *codec) int i, ret; pm860x->codec = codec; + snd_soc_codec_init_regmap(codec, pm860x->regmap); for (i = 0; i < 4; i++) { ret = request_threaded_irq(pm860x->irq[i], NULL, @@ -1348,18 +1349,10 @@ static int pm860x_remove(struct snd_soc_codec *codec) return 0; } -static struct regmap *pm860x_get_regmap(struct device *dev) -{ - struct pm860x_priv *pm860x = dev_get_drvdata(dev); - - return pm860x->regmap; -} - static const struct snd_soc_codec_driver soc_codec_dev_pm860x = { .probe = pm860x_probe, .remove = pm860x_remove, .set_bias_level = pm860x_set_bias_level, - .get_regmap = pm860x_get_regmap, .component_driver = { .controls = pm860x_snd_controls, -- cgit v1.2.3 From 42e193cdcb9e197e788638bd33bab3d1905815f6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:06:29 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on msm8916 To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. Let's use snd_soc_component_init_regmap() and remove .get_regmap Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/msm8916-wcd-analog.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 066ea2f4ce7b..44062bb7bf2f 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -712,6 +712,8 @@ static int pm8916_wcd_analog_probe(struct snd_soc_codec *codec) return err; } + snd_soc_codec_init_regmap(codec, + dev_get_regmap(codec->dev->parent, NULL)); snd_soc_codec_set_drvdata(codec, priv); priv->pmic_rev = snd_soc_read(codec, CDC_D_REVISION1); priv->codec_version = snd_soc_read(codec, CDC_D_PERPH_SUBTYPE); @@ -943,11 +945,6 @@ static int pm8916_wcd_analog_set_jack(struct snd_soc_codec *codec, return 0; } -static struct regmap *pm8916_get_regmap(struct device *dev) -{ - return dev_get_regmap(dev->parent, NULL); -} - static irqreturn_t mbhc_btn_release_irq_handler(int irq, void *arg) { struct pm8916_wcd_analog_priv *priv = arg; @@ -1082,7 +1079,6 @@ static const struct snd_soc_codec_driver pm8916_wcd_analog = { .probe = pm8916_wcd_analog_probe, .remove = pm8916_wcd_analog_remove, .set_jack = pm8916_wcd_analog_set_jack, - .get_regmap = pm8916_get_regmap, .component_driver = { .controls = pm8916_wcd_analog_snd_controls, .num_controls = ARRAY_SIZE(pm8916_wcd_analog_snd_controls), -- cgit v1.2.3 From 4b952275d2e7c6b32671a121ea5303a467bf2a0f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 Nov 2017 06:06:44 +0000 Subject: ASoC: use snd_soc_component_init_regmap() on atmel-classd To setup regmap, ALSA SoC has snd_soc_component_init_regmap() and .get_regmap. But these are duplicated feature. This driver is using .get_regmap and set regmap by using dev_get_regmap(), but it is automatically done by snd_soc_component_add_unlocked(). let's remove .get_regmap. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/atmel/atmel-classd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 8445edd06737..ebabed69f0e6 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -308,15 +308,9 @@ static int atmel_classd_codec_resume(struct snd_soc_codec *codec) return regcache_sync(dd->regmap); } -static struct regmap *atmel_classd_codec_get_remap(struct device *dev) -{ - return dev_get_regmap(dev, NULL); -} - static struct snd_soc_codec_driver soc_codec_dev_classd = { .probe = atmel_classd_codec_probe, .resume = atmel_classd_codec_resume, - .get_regmap = atmel_classd_codec_get_remap, .component_driver = { .controls = atmel_classd_snd_controls, .num_controls = ARRAY_SIZE(atmel_classd_snd_controls), -- cgit v1.2.3 From 7afa535eb107d587b22ffbbbaaeb4a0b87b94496 Mon Sep 17 00:00:00 2001 From: "Mukunda, Vijendar" Date: Mon, 4 Dec 2017 20:46:24 +0530 Subject: ASoC: amd: added error checks in dma driver added additional error checks in acp dma driver v2: printed error codes for acp init & acp deinit failure cases. Signed-off-by: Vijendar Mukunda Signed-off-by: Mark Brown --- sound/soc/amd/acp-pcm-dma.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index b5e41df6bb3a..c33a512283a4 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -850,6 +850,9 @@ static snd_pcm_uframes_t acp_dma_pointer(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct audio_substream_data *rtd = runtime->private_data; + if (!rtd) + return -EINVAL; + buffersize = frames_to_bytes(runtime, runtime->buffer_size); bytescount = acp_get_byte_count(rtd->acp_mmio, substream->stream); @@ -875,6 +878,8 @@ static int acp_dma_prepare(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct audio_substream_data *rtd = runtime->private_data; + if (!rtd) + return -EINVAL; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { config_acp_dma_channel(rtd->acp_mmio, SYSRAM_TO_ACP_CH_NUM, PLAYBACK_START_DMA_DESCR_CH12, @@ -1091,7 +1096,11 @@ static int acp_audio_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, audio_drv_data); /* Initialize the ACP */ - acp_init(audio_drv_data->acp_mmio, audio_drv_data->asic_type); + status = acp_init(audio_drv_data->acp_mmio, audio_drv_data->asic_type); + if (status) { + dev_err(&pdev->dev, "ACP Init failed status:%d\n", status); + return status; + } status = snd_soc_register_platform(&pdev->dev, &acp_asoc_platform); if (status != 0) { @@ -1108,9 +1117,12 @@ static int acp_audio_probe(struct platform_device *pdev) static int acp_audio_remove(struct platform_device *pdev) { + int status; struct audio_drv_data *adata = dev_get_drvdata(&pdev->dev); - acp_deinit(adata->acp_mmio); + status = acp_deinit(adata->acp_mmio); + if (status) + dev_err(&pdev->dev, "ACP Deinit failed status:%d\n", status); snd_soc_unregister_platform(&pdev->dev); pm_runtime_disable(&pdev->dev); @@ -1120,9 +1132,14 @@ static int acp_audio_remove(struct platform_device *pdev) static int acp_pcm_resume(struct device *dev) { u16 bank; + int status; struct audio_drv_data *adata = dev_get_drvdata(dev); - acp_init(adata->acp_mmio, adata->asic_type); + status = acp_init(adata->acp_mmio, adata->asic_type); + if (status) { + dev_err(dev, "ACP Init failed status:%d\n", status); + return status; + } if (adata->play_stream && adata->play_stream->runtime) { /* For Stoney, Memory gating is disabled,i.e SRAM Banks @@ -1154,18 +1171,26 @@ static int acp_pcm_resume(struct device *dev) static int acp_pcm_runtime_suspend(struct device *dev) { + int status; struct audio_drv_data *adata = dev_get_drvdata(dev); - acp_deinit(adata->acp_mmio); + status = acp_deinit(adata->acp_mmio); + if (status) + dev_err(dev, "ACP Deinit failed status:%d\n", status); acp_reg_write(0, adata->acp_mmio, mmACP_EXTERNAL_INTR_ENB); return 0; } static int acp_pcm_runtime_resume(struct device *dev) { + int status; struct audio_drv_data *adata = dev_get_drvdata(dev); - acp_init(adata->acp_mmio, adata->asic_type); + status = acp_init(adata->acp_mmio, adata->asic_type); + if (status) { + dev_err(dev, "ACP Init failed status:%d\n", status); + return status; + } acp_reg_write(1, adata->acp_mmio, mmACP_EXTERNAL_INTR_ENB); return 0; } -- cgit v1.2.3 From 8e79ec98e1f613f6fda5d91b16f5e38cf0bd4627 Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Mon, 4 Dec 2017 10:30:11 +0530 Subject: ASoC: Intel: Skylake: Make local functions static Some functions are local to the source and do not need to be in global scope, so make them static. Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-nhlt.c | 4 ++-- sound/soc/intel/skylake/skl.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index 4d2136c0389a..ca5dc2be7b68 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -269,7 +269,7 @@ void skl_nhlt_remove_sysfs(struct skl *skl) * stores all possible rates supported in a rate table for the corresponding * sclk/sclkfs. */ -void skl_get_ssp_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks, +static void skl_get_ssp_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks, struct nhlt_fmt *fmt, u8 id) { struct skl_i2s_config_blob_legacy *i2s_config; @@ -360,7 +360,7 @@ void skl_get_ssp_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks, } } -void skl_get_mclk(struct skl *skl, struct skl_ssp_clk *mclk, +static void skl_get_mclk(struct skl *skl, struct skl_ssp_clk *mclk, struct nhlt_fmt *fmt, u8 id) { struct skl_i2s_config_blob_legacy *i2s_config; diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 63e5456ef401..a89592b2850e 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -545,7 +545,7 @@ struct skl_clk_parent_src *skl_get_parent_clk(u8 clk_id) return NULL; } -void init_skl_xtal_rate(int pci_id) +static void init_skl_xtal_rate(int pci_id) { switch (pci_id) { case 0x9d70: -- cgit v1.2.3 From 446c4724cc7174429ce381e5948e58da07274944 Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Mon, 4 Dec 2017 10:30:12 +0530 Subject: ASoC: Intel: Skylake: Fix descriptions for exported function args Fix a few incorrect descriptions for arguments in exported functions. Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-sst-utils.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index 369ef7ce981c..746df24bfd82 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -178,7 +178,8 @@ static inline int skl_pvtid_128(struct uuid_module *module) * skl_get_pvt_id: generate a private id for use as module id * * @ctx: driver context - * @mconfig: module configuration data + * @uuid_mod: module's uuid + * @instance_id: module's instance id * * This generates a 128 bit private unique id for a module TYPE so that * module instance is unique @@ -208,7 +209,8 @@ EXPORT_SYMBOL_GPL(skl_get_pvt_id); * skl_put_pvt_id: free up the private id allocated * * @ctx: driver context - * @mconfig: module configuration data + * @uuid_mod: module's uuid + * @pvt_id: module pvt id * * This frees a 128 bit private unique id previously generated */ -- cgit v1.2.3 From 8f1a1df58e011e54ffcc2acd09249579e9467544 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 1 Dec 2017 22:32:35 -0800 Subject: ASoC: eukrea-tlv320: Remove include line of fsl_ssi.h The machine driver links both imx-ssi (legacy non-DT driver) and fsl_ssi (up-to-date DT based driver). So It also includes both imx-ssi.h and fsl_ssi.h header files. This creates a limitation for two header files -- they can't define anything with identical names. Since the eukrea-tlv320 machine driver now does not really need anything being defined in the fsl_ssi header file, and it's also going to take some time to clean up two SSI drivers, this patch takes a quick way to remove the #include fsl_ssi.h line for now. It can be added back once the header files are done refactoring. The eukrea-tlv320 driver is still compiled successfully without any erorr using imx_v6_v7_defconfig, after removing it. Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/eukrea-tlv320.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/fsl/eukrea-tlv320.c b/sound/soc/fsl/eukrea-tlv320.c index 84ef6385736c..191426a6d9ad 100644 --- a/sound/soc/fsl/eukrea-tlv320.c +++ b/sound/soc/fsl/eukrea-tlv320.c @@ -29,7 +29,6 @@ #include "../codecs/tlv320aic23.h" #include "imx-ssi.h" -#include "fsl_ssi.h" #include "imx-audmux.h" #define CODEC_CLOCK 12000000 -- cgit v1.2.3 From 70424d8e6e15abd32e189130be220d0063e082bc Mon Sep 17 00:00:00 2001 From: John Hsu Date: Fri, 1 Dec 2017 10:01:50 +0800 Subject: ASoC: nau8825: improve semaphore control After reviewing the crosstalk protection, there are two flaws at semaphore control. The first one is that the semaphore releases are not enough; and the other is that down_interruptible has an risk to make the ISR sleep. Therefore, the driver add more releases before the funcitons return. Take down_trylock to replace down_interruptible. The ISR can control the protection as well and never sleep by semaphore. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 603cd72c2a25..5778eadbf9e6 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -245,13 +245,14 @@ static const unsigned short logtable[256] = { * tasks are allowed to acquire the semaphore, calling this function will * put the task to sleep. If the semaphore is not released within the * specified number of jiffies, this function returns. - * Acquires the semaphore without jiffies. If no more tasks are allowed - * to acquire the semaphore, calling this function will put the task to - * sleep until the semaphore is released. * If the semaphore is not released within the specified number of jiffies, - * this function returns -ETIME. - * If the sleep is interrupted by a signal, this function will return -EINTR. - * It returns 0 if the semaphore was acquired successfully. + * this function returns -ETIME. If the sleep is interrupted by a signal, + * this function will return -EINTR. It returns 0 if the semaphore was + * acquired successfully. + * + * Acquires the semaphore without jiffies. Try to acquire the semaphore + * atomically. Returns 0 if the semaphore has been acquired successfully + * or 1 if it it cannot be acquired. */ static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) { @@ -262,8 +263,8 @@ static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) if (ret < 0) dev_warn(nau8825->dev, "Acquire semaphore timeout\n"); } else { - ret = down_interruptible(&nau8825->xtalk_sem); - if (ret < 0) + ret = down_trylock(&nau8825->xtalk_sem); + if (ret) dev_warn(nau8825->dev, "Acquire semaphore fail\n"); } @@ -1246,8 +1247,10 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, regmap_read(nau8825->regmap, NAU8825_REG_DAC_CTRL1, &osr); osr &= NAU8825_DAC_OVERSAMPLE_MASK; if (nau8825_clock_check(nau8825, substream->stream, - params_rate(params), osr)) + params_rate(params), osr)) { + nau8825_sema_release(nau8825); return -EINVAL; + } regmap_update_bits(nau8825->regmap, NAU8825_REG_CLK_DIVIDER, NAU8825_CLK_DAC_SRC_MASK, osr_dac_sel[osr].clk_src << NAU8825_CLK_DAC_SRC_SFT); @@ -1255,8 +1258,10 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, regmap_read(nau8825->regmap, NAU8825_REG_ADC_RATE, &osr); osr &= NAU8825_ADC_SYNC_DOWN_MASK; if (nau8825_clock_check(nau8825, substream->stream, - params_rate(params), osr)) + params_rate(params), osr)) { + nau8825_sema_release(nau8825); return -EINVAL; + } regmap_update_bits(nau8825->regmap, NAU8825_REG_CLK_DIVIDER, NAU8825_CLK_ADC_SRC_MASK, osr_adc_sel[osr].clk_src << NAU8825_CLK_ADC_SRC_SFT); @@ -1273,8 +1278,10 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, bclk_div = 1; else if (bclk_fs <= 128) bclk_div = 0; - else + else { + nau8825_sema_release(nau8825); return -EINVAL; + } regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2, NAU8825_I2S_LRC_DIV_MASK | NAU8825_I2S_BLK_DIV_MASK, ((bclk_div + 1) << NAU8825_I2S_LRC_DIV_SFT) | bclk_div); @@ -1294,6 +1301,7 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, val_len |= NAU8825_I2S_DL_32; break; default: + nau8825_sema_release(nau8825); return -EINVAL; } @@ -1312,8 +1320,6 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); unsigned int ctrl1_val = 0, ctrl2_val = 0; - nau8825_sema_acquire(nau8825, 3 * HZ); - switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFM: ctrl2_val |= NAU8825_I2S_MS_MASTER; @@ -1355,6 +1361,8 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return -EINVAL; } + nau8825_sema_acquire(nau8825, 3 * HZ); + regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1, NAU8825_I2S_DL_MASK | NAU8825_I2S_DF_MASK | NAU8825_I2S_BP_MASK | NAU8825_I2S_PCMB_MASK, @@ -1701,7 +1709,7 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) int ret; nau8825->xtalk_protect = true; ret = nau8825_sema_acquire(nau8825, 0); - if (ret < 0) + if (ret) nau8825->xtalk_protect = false; } /* Startup cross talk detection process */ @@ -2383,7 +2391,7 @@ static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec) regcache_sync(nau8825->regmap); nau8825->xtalk_protect = true; ret = nau8825_sema_acquire(nau8825, 0); - if (ret < 0) + if (ret) nau8825->xtalk_protect = false; enable_irq(nau8825->irq); -- cgit v1.2.3 From adf6895754e2503d994a765535fd1813f8834674 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Nov 2017 19:42:52 -0800 Subject: acpi, nfit: fix health event notification Integration testing with a BIOS that generates injected health event notifications fails to communicate those events to userspace. The nfit driver neglects to link the ACPI DIMM device with the necessary driver data so acpi_nvdimm_notify() fails this lookup: nfit_mem = dev_get_drvdata(dev); if (nfit_mem && nfit_mem->flags_attr) sysfs_notify_dirent(nfit_mem->flags_attr); Add the necessary linkage when installing the notification handler and clean it up when the nfit driver instance is torn down. Cc: Cc: Toshi Kani Cc: Vishal Verma Fixes: ba9c8dd3c222 ("acpi, nfit: add dimm device notification support") Reported-by: Daniel Osawa Tested-by: Daniel Osawa Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ff2580e7611d..abeb4df4f22e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } -- cgit v1.2.3 From fa25b4f59f1de9536104b632ec4c4c0b8f8e54ec Mon Sep 17 00:00:00 2001 From: John Hsu Date: Fri, 1 Dec 2017 10:01:37 +0800 Subject: ASoC: nau8825: improve crosstalk measurement protection The sequence of crosstalk measurement is not robust. The driver add more protections to make it stronger. The improvements as follows: (1)Give backup table the default value. The values are the same with the default of hardware registers. The impact will decline once restoring the backup table in the wrong way. (2)Add xtalk_baktab_initialized flag. The flag can keep the initiation status of backup table. It helps to backup and restore the backup-table correctly. (3)Add cancel parameter in the restore function. The volume ramping should do in crosstalk DONE state only. Otherwise, the delay action is not allowed because the restore may happen during the jack eject interruption. (4)Add xtalk_protect condition check before scheduling work in ISR. It makes sure the semaphore hold during the crosstalk measurement. The sequence is still under protection from playback interference. Signed-off-by: John Hsu Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 45 +++++++++++++++++++++++++++++++-------------- sound/soc/codecs/nau8825.h | 1 + 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 5778eadbf9e6..2aea642b4a5d 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -194,10 +194,10 @@ static const struct reg_default nau8825_reg_defaults[] = { /* register backup table when cross talk detection */ static struct reg_default nau8825_xtalk_baktab[] = { - { NAU8825_REG_ADC_DGAIN_CTRL, 0 }, + { NAU8825_REG_ADC_DGAIN_CTRL, 0x00cf }, { NAU8825_REG_HSVOL_CTRL, 0 }, - { NAU8825_REG_DACL_CTRL, 0 }, - { NAU8825_REG_DACR_CTRL, 0 }, + { NAU8825_REG_DACL_CTRL, 0x00cf }, + { NAU8825_REG_DACR_CTRL, 0x02cf }, }; static const unsigned short logtable[256] = { @@ -455,22 +455,32 @@ static void nau8825_xtalk_backup(struct nau8825 *nau8825) { int i; + if (nau8825->xtalk_baktab_initialized) + return; + /* Backup some register values to backup table */ for (i = 0; i < ARRAY_SIZE(nau8825_xtalk_baktab); i++) regmap_read(nau8825->regmap, nau8825_xtalk_baktab[i].reg, &nau8825_xtalk_baktab[i].def); + + nau8825->xtalk_baktab_initialized = true; } -static void nau8825_xtalk_restore(struct nau8825 *nau8825) +static void nau8825_xtalk_restore(struct nau8825 *nau8825, bool cause_cancel) { int i, volume; + if (!nau8825->xtalk_baktab_initialized) + return; + /* Restore register values from backup table; When the driver restores - * the headphone volumem, it needs recover to original level gradually - * with 3dB per step for less pop noise. + * the headphone volume in XTALK_DONE state, it needs recover to + * original level gradually with 3dB per step for less pop noise. + * Otherwise, the restore should do ASAP. */ for (i = 0; i < ARRAY_SIZE(nau8825_xtalk_baktab); i++) { - if (nau8825_xtalk_baktab[i].reg == NAU8825_REG_HSVOL_CTRL) { + if (!cause_cancel && nau8825_xtalk_baktab[i].reg == + NAU8825_REG_HSVOL_CTRL) { /* Ramping up the volume change to reduce pop noise */ volume = nau8825_xtalk_baktab[i].def & NAU8825_HPR_VOL_MASK; @@ -480,6 +490,8 @@ static void nau8825_xtalk_restore(struct nau8825 *nau8825) regmap_write(nau8825->regmap, nau8825_xtalk_baktab[i].reg, nau8825_xtalk_baktab[i].def); } + + nau8825->xtalk_baktab_initialized = false; } static void nau8825_xtalk_prepare_dac(struct nau8825 *nau8825) @@ -645,7 +657,7 @@ static void nau8825_xtalk_clean_adc(struct nau8825 *nau8825) NAU8825_POWERUP_ADCL | NAU8825_ADC_VREFSEL_MASK, 0); } -static void nau8825_xtalk_clean(struct nau8825 *nau8825) +static void nau8825_xtalk_clean(struct nau8825 *nau8825, bool cause_cancel) { /* Enable internal VCO needed for interruptions */ nau8825_configure_sysclk(nau8825, NAU8825_CLK_INTERNAL, 0); @@ -661,7 +673,7 @@ static void nau8825_xtalk_clean(struct nau8825 *nau8825) NAU8825_I2S_MS_MASK | NAU8825_I2S_LRC_DIV_MASK | NAU8825_I2S_BLK_DIV_MASK, NAU8825_I2S_MS_SLAVE); /* Restore value of specific register for cross talk */ - nau8825_xtalk_restore(nau8825); + nau8825_xtalk_restore(nau8825, cause_cancel); } static void nau8825_xtalk_imm_start(struct nau8825 *nau8825, int vol) @@ -780,7 +792,7 @@ static void nau8825_xtalk_measure(struct nau8825 *nau8825) dev_dbg(nau8825->dev, "cross talk sidetone: %x\n", sidetone); regmap_write(nau8825->regmap, NAU8825_REG_DAC_DGAIN_CTRL, (sidetone << 8) | sidetone); - nau8825_xtalk_clean(nau8825); + nau8825_xtalk_clean(nau8825, false); nau8825->xtalk_state = NAU8825_XTALK_DONE; break; default: @@ -823,7 +835,7 @@ static void nau8825_xtalk_cancel(struct nau8825 *nau8825) if (nau8825->xtalk_enable && nau8825->xtalk_state != NAU8825_XTALK_DONE) { cancel_work_sync(&nau8825->xtalk_work); - nau8825_xtalk_clean(nau8825); + nau8825_xtalk_clean(nau8825, true); } /* Reset parameters for cross talk suppression function */ nau8825_sema_reset(nau8825); @@ -1713,8 +1725,11 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) nau8825->xtalk_protect = false; } /* Startup cross talk detection process */ - nau8825->xtalk_state = NAU8825_XTALK_PREPARE; - schedule_work(&nau8825->xtalk_work); + if (nau8825->xtalk_protect) { + nau8825->xtalk_state = + NAU8825_XTALK_PREPARE; + schedule_work(&nau8825->xtalk_work); + } } else { /* The cross talk suppression shouldn't apply * in the headset with high impedance. Thus, @@ -1741,7 +1756,8 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) nau8825->xtalk_event_mask = event_mask; } } else if (active_irq & NAU8825_IMPEDANCE_MEAS_IRQ) { - if (nau8825->xtalk_enable) + /* crosstalk detection enable and process on going */ + if (nau8825->xtalk_enable && nau8825->xtalk_protect) schedule_work(&nau8825->xtalk_work); clear_irq = NAU8825_IMPEDANCE_MEAS_IRQ; } else if ((active_irq & NAU8825_JACK_INSERTION_IRQ_MASK) == @@ -2578,6 +2594,7 @@ static int nau8825_i2c_probe(struct i2c_client *i2c, */ nau8825->xtalk_state = NAU8825_XTALK_DONE; nau8825->xtalk_protect = false; + nau8825->xtalk_baktab_initialized = false; sema_init(&nau8825->xtalk_sem, 1); INIT_WORK(&nau8825->xtalk_work, nau8825_xtalk_work); diff --git a/sound/soc/codecs/nau8825.h b/sound/soc/codecs/nau8825.h index 199d6ea4dcdc..f7e732125882 100644 --- a/sound/soc/codecs/nau8825.h +++ b/sound/soc/codecs/nau8825.h @@ -477,6 +477,7 @@ struct nau8825 { bool xtalk_protect; int imp_rms[NAU8825_XTALK_IMM]; int xtalk_enable; + bool xtalk_baktab_initialized; /* True if initialized. */ }; int nau8825_enable_jack_detect(struct snd_soc_codec *codec, -- cgit v1.2.3 From a7e6425ea5816947d3cb51fbf57351207c074383 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:25:02 -0700 Subject: workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: Tejun Heo Reviewed-by: Lai Jiangshan --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8fdb710bfdd7..aee7eaab05cb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2135,7 +2135,7 @@ __acquires(&pool->lock) * stop_machine. At the same time, report a quiescent RCU state so * the same condition doesn't freeze RCU. */ - cond_resched_rcu_qs(); + cond_resched(); spin_lock_irq(&pool->lock); -- cgit v1.2.3 From e31d28b6ab8f0431e2288edb02723269f54d1471 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:26:32 -0700 Subject: trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: Steven Rostedt Cc: Ingo Molnar --- kernel/trace/trace_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c index 79f838a75077..22fee766081b 100644 --- a/kernel/trace/trace_benchmark.c +++ b/kernel/trace/trace_benchmark.c @@ -165,7 +165,7 @@ static int benchmark_event_kthread(void *arg) * this thread will never voluntarily schedule which would * block synchronize_rcu_tasks() indefinitely. */ - cond_resched_rcu_qs(); + cond_resched(); } return 0; -- cgit v1.2.3 From edf22f4ca26babcd8cba4f049c6be53f0e73bcc1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:31:12 -0700 Subject: softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: NeilBrown Cc: Ingo Molnar --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 2f5e87f1bae2..24d243ef8e71 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -665,7 +665,7 @@ static void run_ksoftirqd(unsigned int cpu) */ __do_softirq(); local_irq_enable(); - cond_resched_rcu_qs(); + cond_resched(); return; } local_irq_enable(); -- cgit v1.2.3 From 388a4c88064e7e62602b4d92ca127f0b0c9b305a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:39:34 -0700 Subject: fs: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore makes this change. Signed-off-by: Paul E. McKenney Cc: Alexander Viro Cc: --- fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 3b080834b870..fc0eeb812e2c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -391,7 +391,7 @@ static struct fdtable *close_files(struct files_struct * files) struct file * file = xchg(&fdt->fd[i], NULL); if (file) { filp_close(file, files); - cond_resched_rcu_qs(); + cond_resched(); } } i++; -- cgit v1.2.3 From f2b1760aedba1d8394636ba31b9e864e82527528 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Oct 2017 08:42:41 -0700 Subject: doc: Eliminate cond_resched_rcu_qs() in favor of cond_resched() Now that cond_resched() also provides RCU quiescent states when needed, it can be used in place of cond_resched_rcu_qs(). This commit therefore documents this change. Signed-off-by: Paul E. McKenney --- Documentation/RCU/Design/Data-Structures/Data-Structures.html | 3 ++- Documentation/RCU/Design/Requirements/Requirements.html | 4 ++-- Documentation/RCU/stallwarn.txt | 10 ++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 38d6d800761f..412466e4967a 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -1097,7 +1097,8 @@ will cause the CPU to disregard the values of its counters on its next exit from idle. Finally, the rcu_qs_ctr_snap field is used to detect cases where a given operation has resulted in a quiescent state -for all flavors of RCU, for example, cond_resched_rcu_qs(). +for all flavors of RCU, for example, cond_resched() +when RCU has indicated a need for quiescent states.

RCU Callback Handling
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 62e847bcdcdd..0372e6c54eef 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -2797,7 +2797,7 @@ RCU must avoid degrading real-time response for CPU-bound threads, whether executing in usermode (which is one use case for CONFIG_NO_HZ_FULL=y) or in the kernel. That said, CPU-bound loops in the kernel must execute -cond_resched_rcu_qs() at least once per few tens of milliseconds +cond_resched() at least once per few tens of milliseconds in order to avoid receiving an IPI from RCU.

@@ -3128,7 +3128,7 @@ The solution, in the form of is to have implicit read-side critical sections that are delimited by voluntary context switches, that is, calls to schedule(), -cond_resched_rcu_qs(), and +cond_resched(), and synchronize_rcu_tasks(). In addition, transitions to and from userspace execution also delimit tasks-RCU read-side critical sections. diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index a08f928c8557..4259f95c3261 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -23,12 +23,10 @@ o A CPU looping with preemption disabled. This condition can o A CPU looping with bottom halves disabled. This condition can result in RCU-sched and RCU-bh stalls. -o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the - kernel without invoking schedule(). Note that cond_resched() - does not necessarily prevent RCU CPU stall warnings. Therefore, - if the looping in the kernel is really expected and desirable - behavior, you might need to replace some of the cond_resched() - calls with calls to cond_resched_rcu_qs(). +o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel + without invoking schedule(). If the looping in the kernel is + really expected and desirable behavior, you might need to add + some calls to cond_resched(). o Booting Linux using a console connection that is too slow to keep up with the boot-time console-message rate. For example, -- cgit v1.2.3 From dc259accec0845ddf56e87337c0b211026eca0ae Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 5 Nov 2017 05:51:43 -0800 Subject: rcu: Account for rcu_all_qs() in cond_resched() If cond_resched() returns false, then it has already invoked rcu_all_qs(). This is also invoked (now redundantly) by rcu_note_voluntary_context_switch(). This commit therefore changes cond_resched_rcu_qs() to invoke rcu_note_voluntary_context_switch_lite() instead of rcu_note_voluntary_context_switch() to avoid the redundant invocation of rcu_all_qs(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a6ddc42f87a5..7bd8b5a6db10 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -197,7 +197,7 @@ static inline void exit_tasks_rcu_finish(void) { } #define cond_resched_rcu_qs() \ do { \ if (!cond_resched()) \ - rcu_note_voluntary_context_switch(current); \ + rcu_note_voluntary_context_switch_lite(current); \ } while (0) /* -- cgit v1.2.3 From eb733366f5f7f416a7d9215a40e00d57aa193361 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Wed, 22 Nov 2017 20:43:14 +0900 Subject: ASoC: spdif: Add S32_LE support for S/PDIF dummy codec drivers AIO on UniPhier can output S/PDIF where no codec is needed. This patch adds S32_LE support for dummy codec drivers. If one S/PDIF controller has its own limitation, its CPU DAI driver should set the supported format by its own circumstance, since the soc-pcm driver will use the intersection of cpu_dai's formats and codec_dai's formats. Signed-off-by: Katsuhiro Suzuki Signed-off-by: Mark Brown --- sound/soc/codecs/spdif_receiver.c | 5 +++-- sound/soc/codecs/spdif_transmitter.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/spdif_receiver.c b/sound/soc/codecs/spdif_receiver.c index 7acd05140a81..c8fd6367f6c0 100644 --- a/sound/soc/codecs/spdif_receiver.c +++ b/sound/soc/codecs/spdif_receiver.c @@ -34,10 +34,11 @@ static const struct snd_soc_dapm_route dir_routes[] = { #define STUB_RATES SNDRV_PCM_RATE_8000_192000 #define STUB_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ SNDRV_PCM_FMTBIT_S20_3LE | \ - SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE | \ SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE) -static const struct snd_soc_codec_driver soc_codec_spdif_dir = { +static struct snd_soc_codec_driver soc_codec_spdif_dir = { .component_driver = { .dapm_widgets = dir_widgets, .num_dapm_widgets = ARRAY_SIZE(dir_widgets), diff --git a/sound/soc/codecs/spdif_transmitter.c b/sound/soc/codecs/spdif_transmitter.c index 063a64ff82d3..037aa1d45559 100644 --- a/sound/soc/codecs/spdif_transmitter.c +++ b/sound/soc/codecs/spdif_transmitter.c @@ -27,7 +27,8 @@ #define STUB_RATES SNDRV_PCM_RATE_8000_192000 #define STUB_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ SNDRV_PCM_FMTBIT_S20_3LE | \ - SNDRV_PCM_FMTBIT_S24_LE) + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE) static const struct snd_soc_dapm_widget dit_widgets[] = { SND_SOC_DAPM_OUTPUT("spdif-out"), @@ -37,7 +38,7 @@ static const struct snd_soc_dapm_route dit_routes[] = { { "spdif-out", NULL, "Playback" }, }; -static const struct snd_soc_codec_driver soc_codec_spdif_dit = { +static struct snd_soc_codec_driver soc_codec_spdif_dit = { .component_driver = { .dapm_widgets = dit_widgets, .num_dapm_widgets = ARRAY_SIZE(dit_widgets), -- cgit v1.2.3 From e85c8d3e25c09fd9b21ba74e14078ab4c1d977ef Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Wed, 22 Nov 2017 20:43:15 +0900 Subject: ASoC: uniphier: add DT bindings documentation for UniPhier EVEA This patch adds DT binding documentation for UniPhier EVEA that is SoC inner sound codec of UniPhier series. Signed-off-by: Katsuhiro Suzuki Acked-by: Rob Herring Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/uniphier,evea.txt | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/uniphier,evea.txt diff --git a/Documentation/devicetree/bindings/sound/uniphier,evea.txt b/Documentation/devicetree/bindings/sound/uniphier,evea.txt new file mode 100644 index 000000000000..3f31b235f18b --- /dev/null +++ b/Documentation/devicetree/bindings/sound/uniphier,evea.txt @@ -0,0 +1,26 @@ +Socionext EVEA - UniPhier SoC internal codec driver + +Required properties: +- compatible : should be "socionext,uniphier-evea". +- reg : offset and length of the register set for the device. +- clock-names : should include following entries: + "evea", "exiv" +- clocks : a list of phandle, should contain an entry for each + entries in clock-names. +- reset-names : should include following entries: + "evea", "exiv", "adamv" +- resets : a list of phandle, should contain reset entries of + reset-names. +- #sound-dai-cells: should be 1. + +Example: + + codec { + compatible = "socionext,uniphier-evea"; + reg = <0x57900000 0x1000>; + clock-names = "evea", "exiv"; + clocks = <&sys_clk 41>, <&sys_clk 42>; + reset-names = "evea", "exiv", "adamv"; + resets = <&sys_rst 41>, <&sys_rst 42>, <&adamv_rst 0>; + #sound-dai-cells = <1>; + }; -- cgit v1.2.3 From 3a47b1dfa2913038623cec3164adfb2448269fa6 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Wed, 22 Nov 2017 20:43:17 +0900 Subject: ASoC: uniphier: add support for UniPhier EVEA codec This patch adds EVEA codec driver. This codec core is in inside of UniPhier SoC. Signed-off-by: Katsuhiro Suzuki Signed-off-by: Mark Brown --- sound/soc/Kconfig | 1 + sound/soc/Makefile | 1 + sound/soc/uniphier/Kconfig | 19 ++ sound/soc/uniphier/Makefile | 3 + sound/soc/uniphier/evea.c | 567 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 591 insertions(+) create mode 100644 sound/soc/uniphier/Kconfig create mode 100644 sound/soc/uniphier/Makefile create mode 100644 sound/soc/uniphier/evea.c diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index d22758165496..84c3582f3982 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -71,6 +71,7 @@ source "sound/soc/stm/Kconfig" source "sound/soc/sunxi/Kconfig" source "sound/soc/tegra/Kconfig" source "sound/soc/txx9/Kconfig" +source "sound/soc/uniphier/Kconfig" source "sound/soc/ux500/Kconfig" source "sound/soc/xtensa/Kconfig" source "sound/soc/zte/Kconfig" diff --git a/sound/soc/Makefile b/sound/soc/Makefile index 5327f4d6c668..74cd1858d38b 100644 --- a/sound/soc/Makefile +++ b/sound/soc/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_SND_SOC) += stm/ obj-$(CONFIG_SND_SOC) += sunxi/ obj-$(CONFIG_SND_SOC) += tegra/ obj-$(CONFIG_SND_SOC) += txx9/ +obj-$(CONFIG_SND_SOC) += uniphier/ obj-$(CONFIG_SND_SOC) += ux500/ obj-$(CONFIG_SND_SOC) += xtensa/ obj-$(CONFIG_SND_SOC) += zte/ diff --git a/sound/soc/uniphier/Kconfig b/sound/soc/uniphier/Kconfig new file mode 100644 index 000000000000..02886a457eaf --- /dev/null +++ b/sound/soc/uniphier/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +config SND_SOC_UNIPHIER + tristate "ASoC support for UniPhier" + depends on (ARCH_UNIPHIER || COMPILE_TEST) + help + Say Y or M if you want to add support for the Socionext + UniPhier SoC audio interfaces. You will also need to select the + audio interfaces to support below. + If unsure select "N". + +config SND_SOC_UNIPHIER_EVEA_CODEC + tristate "UniPhier SoC internal audio codec" + depends on SND_SOC_UNIPHIER + select REGMAP_MMIO + help + This adds Codec driver for Socionext UniPhier LD11/20 SoC + internal DAC. This driver supports Line In / Out and HeadPhone. + Select Y if you use such device. + If unsure select "N". diff --git a/sound/soc/uniphier/Makefile b/sound/soc/uniphier/Makefile new file mode 100644 index 000000000000..3be00d72f5e5 --- /dev/null +++ b/sound/soc/uniphier/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +snd-soc-uniphier-evea-objs := evea.o +obj-$(CONFIG_SND_SOC_UNIPHIER_EVEA_CODEC) += snd-soc-uniphier-evea.o diff --git a/sound/soc/uniphier/evea.c b/sound/soc/uniphier/evea.c new file mode 100644 index 000000000000..0cc9efff1d9a --- /dev/null +++ b/sound/soc/uniphier/evea.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Socionext UniPhier EVEA ADC/DAC codec driver. + * + * Copyright (c) 2016-2017 Socionext Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "evea" +#define EVEA_RATES SNDRV_PCM_RATE_48000 +#define EVEA_FORMATS SNDRV_PCM_FMTBIT_S32_LE + +#define AADCPOW(n) (0x0078 + 0x04 * (n)) +#define AADCPOW_AADC_POWD BIT(0) +#define AHPOUTPOW 0x0098 +#define AHPOUTPOW_HP_ON BIT(4) +#define ALINEPOW 0x009c +#define ALINEPOW_LIN2_POWD BIT(3) +#define ALINEPOW_LIN1_POWD BIT(4) +#define ALO1OUTPOW 0x00a8 +#define ALO1OUTPOW_LO1_ON BIT(4) +#define ALO2OUTPOW 0x00ac +#define ALO2OUTPOW_ADAC2_MUTE BIT(0) +#define ALO2OUTPOW_LO2_ON BIT(4) +#define AANAPOW 0x00b8 +#define AANAPOW_A_POWD BIT(4) +#define ADACSEQ1(n) (0x0144 + 0x40 * (n)) +#define ADACSEQ1_MMUTE BIT(1) +#define ADACSEQ2(n) (0x0160 + 0x40 * (n)) +#define ADACSEQ2_ADACIN_FIX BIT(0) +#define ADAC1ODC 0x0200 +#define ADAC1ODC_HP_DIS_RES_MASK GENMASK(2, 1) +#define ADAC1ODC_HP_DIS_RES_OFF (0x0 << 1) +#define ADAC1ODC_HP_DIS_RES_ON (0x3 << 1) +#define ADAC1ODC_ADAC_RAMPCLT_MASK GENMASK(8, 7) +#define ADAC1ODC_ADAC_RAMPCLT_NORMAL (0x0 << 7) +#define ADAC1ODC_ADAC_RAMPCLT_REDUCE (0x1 << 7) + +struct evea_priv { + struct clk *clk, *clk_exiv; + struct reset_control *rst, *rst_exiv, *rst_adamv; + struct regmap *regmap; + + int switch_lin; + int switch_lo; + int switch_hp; +}; + +static const struct snd_soc_dapm_widget evea_widgets[] = { + SND_SOC_DAPM_ADC("ADC", "Capture", SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_INPUT("LIN1_LP"), + SND_SOC_DAPM_INPUT("LIN1_RP"), + SND_SOC_DAPM_INPUT("LIN2_LP"), + SND_SOC_DAPM_INPUT("LIN2_RP"), + SND_SOC_DAPM_INPUT("LIN3_LP"), + SND_SOC_DAPM_INPUT("LIN3_RP"), + + SND_SOC_DAPM_DAC("DAC", "Playback", SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_OUTPUT("HP1_L"), + SND_SOC_DAPM_OUTPUT("HP1_R"), + SND_SOC_DAPM_OUTPUT("LO2_L"), + SND_SOC_DAPM_OUTPUT("LO2_R"), +}; + +static const struct snd_soc_dapm_route evea_routes[] = { + { "ADC", NULL, "LIN1_LP" }, + { "ADC", NULL, "LIN1_RP" }, + { "ADC", NULL, "LIN2_LP" }, + { "ADC", NULL, "LIN2_RP" }, + { "ADC", NULL, "LIN3_LP" }, + { "ADC", NULL, "LIN3_RP" }, + + { "HP1_L", NULL, "DAC" }, + { "HP1_R", NULL, "DAC" }, + { "LO2_L", NULL, "DAC" }, + { "LO2_R", NULL, "DAC" }, +}; + +static void evea_set_power_state_on(struct evea_priv *evea) +{ + struct regmap *map = evea->regmap; + + regmap_update_bits(map, AANAPOW, AANAPOW_A_POWD, + AANAPOW_A_POWD); + + regmap_update_bits(map, ADAC1ODC, ADAC1ODC_HP_DIS_RES_MASK, + ADAC1ODC_HP_DIS_RES_ON); + + regmap_update_bits(map, ADAC1ODC, ADAC1ODC_ADAC_RAMPCLT_MASK, + ADAC1ODC_ADAC_RAMPCLT_REDUCE); + + regmap_update_bits(map, ADACSEQ2(0), ADACSEQ2_ADACIN_FIX, 0); + regmap_update_bits(map, ADACSEQ2(1), ADACSEQ2_ADACIN_FIX, 0); + regmap_update_bits(map, ADACSEQ2(2), ADACSEQ2_ADACIN_FIX, 0); +} + +static void evea_set_power_state_off(struct evea_priv *evea) +{ + struct regmap *map = evea->regmap; + + regmap_update_bits(map, ADAC1ODC, ADAC1ODC_HP_DIS_RES_MASK, + ADAC1ODC_HP_DIS_RES_ON); + + regmap_update_bits(map, ADACSEQ1(0), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + regmap_update_bits(map, ADACSEQ1(1), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + regmap_update_bits(map, ADACSEQ1(2), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + + regmap_update_bits(map, ALO1OUTPOW, ALO1OUTPOW_LO1_ON, 0); + regmap_update_bits(map, ALO2OUTPOW, ALO2OUTPOW_LO2_ON, 0); + regmap_update_bits(map, AHPOUTPOW, AHPOUTPOW_HP_ON, 0); +} + +static int evea_update_switch_lin(struct evea_priv *evea) +{ + struct regmap *map = evea->regmap; + + if (evea->switch_lin) { + regmap_update_bits(map, ALINEPOW, + ALINEPOW_LIN2_POWD | ALINEPOW_LIN1_POWD, + ALINEPOW_LIN2_POWD | ALINEPOW_LIN1_POWD); + + regmap_update_bits(map, AADCPOW(0), AADCPOW_AADC_POWD, + AADCPOW_AADC_POWD); + regmap_update_bits(map, AADCPOW(1), AADCPOW_AADC_POWD, + AADCPOW_AADC_POWD); + } else { + regmap_update_bits(map, AADCPOW(0), AADCPOW_AADC_POWD, 0); + regmap_update_bits(map, AADCPOW(1), AADCPOW_AADC_POWD, 0); + + regmap_update_bits(map, ALINEPOW, + ALINEPOW_LIN2_POWD | ALINEPOW_LIN1_POWD, 0); + } + + return 0; +} + +static int evea_update_switch_lo(struct evea_priv *evea) +{ + struct regmap *map = evea->regmap; + + if (evea->switch_lo) { + regmap_update_bits(map, ADACSEQ1(0), ADACSEQ1_MMUTE, 0); + regmap_update_bits(map, ADACSEQ1(2), ADACSEQ1_MMUTE, 0); + + regmap_update_bits(map, ALO1OUTPOW, ALO1OUTPOW_LO1_ON, + ALO1OUTPOW_LO1_ON); + regmap_update_bits(map, ALO2OUTPOW, + ALO2OUTPOW_ADAC2_MUTE | ALO2OUTPOW_LO2_ON, + ALO2OUTPOW_ADAC2_MUTE | ALO2OUTPOW_LO2_ON); + } else { + regmap_update_bits(map, ADACSEQ1(0), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + regmap_update_bits(map, ADACSEQ1(2), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + + regmap_update_bits(map, ALO1OUTPOW, ALO1OUTPOW_LO1_ON, 0); + regmap_update_bits(map, ALO2OUTPOW, + ALO2OUTPOW_ADAC2_MUTE | ALO2OUTPOW_LO2_ON, + 0); + } + + return 0; +} + +static int evea_update_switch_hp(struct evea_priv *evea) +{ + struct regmap *map = evea->regmap; + + if (evea->switch_hp) { + regmap_update_bits(map, ADACSEQ1(1), ADACSEQ1_MMUTE, 0); + + regmap_update_bits(map, AHPOUTPOW, AHPOUTPOW_HP_ON, + AHPOUTPOW_HP_ON); + + regmap_update_bits(map, ADAC1ODC, ADAC1ODC_HP_DIS_RES_MASK, + ADAC1ODC_HP_DIS_RES_OFF); + } else { + regmap_update_bits(map, ADAC1ODC, ADAC1ODC_HP_DIS_RES_MASK, + ADAC1ODC_HP_DIS_RES_ON); + + regmap_update_bits(map, ADACSEQ1(1), ADACSEQ1_MMUTE, + ADACSEQ1_MMUTE); + + regmap_update_bits(map, AHPOUTPOW, AHPOUTPOW_HP_ON, 0); + } + + return 0; +} + +static void evea_update_switch_all(struct evea_priv *evea) +{ + evea_update_switch_lin(evea); + evea_update_switch_lo(evea); + evea_update_switch_hp(evea); +} + +static int evea_get_switch_lin(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + ucontrol->value.integer.value[0] = evea->switch_lin; + + return 0; +} + +static int evea_set_switch_lin(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + if (evea->switch_lin == ucontrol->value.integer.value[0]) + return 0; + + evea->switch_lin = ucontrol->value.integer.value[0]; + + return evea_update_switch_lin(evea); +} + +static int evea_get_switch_lo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + ucontrol->value.integer.value[0] = evea->switch_lo; + + return 0; +} + +static int evea_set_switch_lo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + if (evea->switch_lo == ucontrol->value.integer.value[0]) + return 0; + + evea->switch_lo = ucontrol->value.integer.value[0]; + + return evea_update_switch_lo(evea); +} + +static int evea_get_switch_hp(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + ucontrol->value.integer.value[0] = evea->switch_hp; + + return 0; +} + +static int evea_set_switch_hp(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + if (evea->switch_hp == ucontrol->value.integer.value[0]) + return 0; + + evea->switch_hp = ucontrol->value.integer.value[0]; + + return evea_update_switch_hp(evea); +} + +static const struct snd_kcontrol_new eva_controls[] = { + SOC_SINGLE_BOOL_EXT("Line Capture Switch", 0, + evea_get_switch_lin, evea_set_switch_lin), + SOC_SINGLE_BOOL_EXT("Line Playback Switch", 0, + evea_get_switch_lo, evea_set_switch_lo), + SOC_SINGLE_BOOL_EXT("Headphone Playback Switch", 0, + evea_get_switch_hp, evea_set_switch_hp), +}; + +static int evea_codec_probe(struct snd_soc_codec *codec) +{ + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + evea->switch_lin = 1; + evea->switch_lo = 1; + evea->switch_hp = 1; + + evea_set_power_state_on(evea); + evea_update_switch_all(evea); + + return 0; +} + +static int evea_codec_suspend(struct snd_soc_codec *codec) +{ + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + + evea_set_power_state_off(evea); + + reset_control_assert(evea->rst_adamv); + reset_control_assert(evea->rst_exiv); + reset_control_assert(evea->rst); + + clk_disable_unprepare(evea->clk_exiv); + clk_disable_unprepare(evea->clk); + + return 0; +} + +static int evea_codec_resume(struct snd_soc_codec *codec) +{ + struct evea_priv *evea = snd_soc_codec_get_drvdata(codec); + int ret; + + ret = clk_prepare_enable(evea->clk); + if (ret) + return ret; + + ret = clk_prepare_enable(evea->clk_exiv); + if (ret) + goto err_out_clock; + + ret = reset_control_deassert(evea->rst); + if (ret) + goto err_out_clock_exiv; + + ret = reset_control_deassert(evea->rst_exiv); + if (ret) + goto err_out_reset; + + ret = reset_control_deassert(evea->rst_adamv); + if (ret) + goto err_out_reset_exiv; + + evea_set_power_state_on(evea); + evea_update_switch_all(evea); + + return 0; + +err_out_reset_exiv: + reset_control_assert(evea->rst_exiv); + +err_out_reset: + reset_control_assert(evea->rst); + +err_out_clock_exiv: + clk_disable_unprepare(evea->clk_exiv); + +err_out_clock: + clk_disable_unprepare(evea->clk); + + return ret; +} + +static struct snd_soc_codec_driver soc_codec_evea = { + .probe = evea_codec_probe, + .suspend = evea_codec_suspend, + .resume = evea_codec_resume, + + .component_driver = { + .dapm_widgets = evea_widgets, + .num_dapm_widgets = ARRAY_SIZE(evea_widgets), + .dapm_routes = evea_routes, + .num_dapm_routes = ARRAY_SIZE(evea_routes), + .controls = eva_controls, + .num_controls = ARRAY_SIZE(eva_controls), + }, +}; + +static struct snd_soc_dai_driver soc_dai_evea[] = { + { + .name = DRV_NAME "-line1", + .playback = { + .stream_name = "Line Out 1", + .formats = EVEA_FORMATS, + .rates = EVEA_RATES, + .channels_min = 2, + .channels_max = 2, + }, + .capture = { + .stream_name = "Line In 1", + .formats = EVEA_FORMATS, + .rates = EVEA_RATES, + .channels_min = 2, + .channels_max = 2, + }, + }, + { + .name = DRV_NAME "-hp1", + .playback = { + .stream_name = "Headphone 1", + .formats = EVEA_FORMATS, + .rates = EVEA_RATES, + .channels_min = 2, + .channels_max = 2, + }, + }, + { + .name = DRV_NAME "-lo2", + .playback = { + .stream_name = "Line Out 2", + .formats = EVEA_FORMATS, + .rates = EVEA_RATES, + .channels_min = 2, + .channels_max = 2, + }, + }, +}; + +static const struct regmap_config evea_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0xffc, + .cache_type = REGCACHE_NONE, +}; + +static int evea_probe(struct platform_device *pdev) +{ + struct evea_priv *evea; + struct resource *res; + void __iomem *preg; + int ret; + + evea = devm_kzalloc(&pdev->dev, sizeof(struct evea_priv), GFP_KERNEL); + if (!evea) + return -ENOMEM; + + evea->clk = devm_clk_get(&pdev->dev, "evea"); + if (IS_ERR(evea->clk)) + return PTR_ERR(evea->clk); + + evea->clk_exiv = devm_clk_get(&pdev->dev, "exiv"); + if (IS_ERR(evea->clk_exiv)) + return PTR_ERR(evea->clk_exiv); + + evea->rst = devm_reset_control_get_shared(&pdev->dev, "evea"); + if (IS_ERR(evea->rst)) + return PTR_ERR(evea->rst); + + evea->rst_exiv = devm_reset_control_get_shared(&pdev->dev, "exiv"); + if (IS_ERR(evea->rst_exiv)) + return PTR_ERR(evea->rst_exiv); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + preg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(preg)) + return PTR_ERR(preg); + + evea->regmap = devm_regmap_init_mmio(&pdev->dev, preg, + &evea_regmap_config); + if (IS_ERR(evea->regmap)) + return PTR_ERR(evea->regmap); + + ret = clk_prepare_enable(evea->clk); + if (ret) + return ret; + + ret = clk_prepare_enable(evea->clk_exiv); + if (ret) + goto err_out_clock; + + ret = reset_control_deassert(evea->rst); + if (ret) + goto err_out_clock_exiv; + + ret = reset_control_deassert(evea->rst_exiv); + if (ret) + goto err_out_reset; + + /* ADAMV will hangup if EXIV reset is asserted */ + evea->rst_adamv = devm_reset_control_get_shared(&pdev->dev, "adamv"); + if (IS_ERR(evea->rst_adamv)) { + ret = PTR_ERR(evea->rst_adamv); + goto err_out_reset_exiv; + } + + ret = reset_control_deassert(evea->rst_adamv); + if (ret) + goto err_out_reset_exiv; + + platform_set_drvdata(pdev, evea); + + ret = snd_soc_register_codec(&pdev->dev, &soc_codec_evea, + soc_dai_evea, ARRAY_SIZE(soc_dai_evea)); + if (ret) + goto err_out_reset_adamv; + + return 0; + +err_out_reset_adamv: + reset_control_assert(evea->rst_adamv); + +err_out_reset_exiv: + reset_control_assert(evea->rst_exiv); + +err_out_reset: + reset_control_assert(evea->rst); + +err_out_clock_exiv: + clk_disable_unprepare(evea->clk_exiv); + +err_out_clock: + clk_disable_unprepare(evea->clk); + + return ret; +} + +static int evea_remove(struct platform_device *pdev) +{ + struct evea_priv *evea = platform_get_drvdata(pdev); + + snd_soc_unregister_codec(&pdev->dev); + + reset_control_assert(evea->rst_adamv); + reset_control_assert(evea->rst_exiv); + reset_control_assert(evea->rst); + + clk_disable_unprepare(evea->clk_exiv); + clk_disable_unprepare(evea->clk); + + return 0; +} + +static const struct of_device_id evea_of_match[] = { + { .compatible = "socionext,uniphier-evea", }, + {} +}; +MODULE_DEVICE_TABLE(of, evea_of_match); + +static struct platform_driver evea_codec_driver = { + .driver = { + .name = DRV_NAME, + .of_match_table = of_match_ptr(evea_of_match), + }, + .probe = evea_probe, + .remove = evea_remove, +}; +module_platform_driver(evea_codec_driver); + +MODULE_AUTHOR("Katsuhiro Suzuki "); +MODULE_DESCRIPTION("UniPhier EVEA codec driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 576f8f46e7c923f830dfa61924ad547447399b05 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Wed, 22 Nov 2017 20:43:20 +0900 Subject: MAINTAINERS: add entries for UniPhier ASoC sound drivers Signed-off-by: Katsuhiro Suzuki Signed-off-by: Mark Brown --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..55ae8ea8722a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12581,6 +12581,12 @@ F: include/media/soc* F: drivers/media/i2c/soc_camera/ F: drivers/media/platform/soc_camera/ +SOCIONEXT UNIPHIER SOUND DRIVER +M: Katsuhiro Suzuki +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +S: Maintained +F: sound/soc/uniphier/ + SOEKRIS NET48XX LED SUPPORT M: Chris Boot S: Maintained -- cgit v1.2.3 From 40555946447a394889243e4393e312f65d847e1e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 09:15:21 -0700 Subject: doc: READ_ONCE() now implies smp_barrier_depends() This commit updates an example in memory-barriers.txt to account for the fact that READ_ONCE() now implies smp_barrier_depends(). Signed-off-by: Paul E. McKenney [ paulmck: Added MEMORY_BARRIER instructions from DEC Alpha from READ_ONCE(), per David Howells's feedback. ] --- Documentation/memory-barriers.txt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 479ecec80593..13fd35b6a597 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -227,17 +227,20 @@ There are some minimal guarantees that may be expected of a CPU: (*) On any given CPU, dependent memory accesses will be issued in order, with respect to itself. This means that for: - Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q); + Q = READ_ONCE(P); D = READ_ONCE(*Q); the CPU will issue the following memory operations: Q = LOAD P, D = LOAD *Q - and always in that order. On most systems, smp_read_barrier_depends() - does nothing, but it is required for DEC Alpha. The READ_ONCE() - is required to prevent compiler mischief. Please note that you - should normally use something like rcu_dereference() instead of - open-coding smp_read_barrier_depends(). + and always in that order. However, on DEC Alpha, READ_ONCE() also + emits a memory-barrier instruction, so that a DEC Alpha CPU will + instead issue the following memory operations: + + Q = LOAD P, MEMORY_BARRIER, D = LOAD *Q, MEMORY_BARRIER + + Whether on DEC Alpha or not, the READ_ONCE() also prevents compiler + mischief. (*) Overlapping loads and stores within a particular CPU will appear to be ordered within that CPU. This means that for: -- cgit v1.2.3 From a4bd78ed215873a68869e41fd59543be8ca38e7f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 09:17:49 -0700 Subject: mn10300: READ_ONCE() now implies smp_read_barrier_depends() Given that READ_ONCE() now implies smp_read_barrier_depends(), there is no need for the open-coded smp_read_barrier_depends() in mn10300_serial_receive_interrupt() and mn10300_serial_poll_get_char(). This commit therefore removes them, but replaces them with comments calling out that carrying dependencies through non-pointers is quite dangerous. Compilers simply know too much about integers. Signed-off-by: Paul E. McKenney Cc: David Howells Cc: Mark Rutland Cc: --- arch/mn10300/kernel/mn10300-serial.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index d7ef1232a82a..4994b570dfd9 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -550,7 +550,7 @@ try_again: return; } - smp_read_barrier_depends(); + /* READ_ONCE() enforces dependency, but dangerous through integer!!! */ ch = port->rx_buffer[ix++]; st = port->rx_buffer[ix++]; smp_mb(); @@ -1728,7 +1728,10 @@ static int mn10300_serial_poll_get_char(struct uart_port *_port) if (CIRC_CNT(port->rx_inp, ix, MNSC_BUFFER_SIZE) == 0) return NO_POLL_CHAR; - smp_read_barrier_depends(); + /* + * READ_ONCE() enforces dependency, but dangerous + * through integer!!! + */ ch = port->rx_buffer[ix++]; st = port->rx_buffer[ix++]; smp_mb(); -- cgit v1.2.3 From cb7e125e03274cffa97d74433c876765efffaf6a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 09:26:25 -0700 Subject: drivers/net/ethernet/qlogic/qed: Fix __qed_spq_block() ordering The __qed_spq_block() function expects an smp_read_barrier_depends() to order a prior READ_ONCE() against a later load that does not depend on the prior READ_ONCE(), an expectation that can fail to be met. This commit therefore replaces the READ_ONCE() with smp_load_acquire() and removes the smp_read_barrier_depends(). Signed-off-by: Paul E. McKenney Cc: Ariel Elior Cc: Cc: --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index be48d9abd001..c1237ec58b6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -97,9 +97,7 @@ static int __qed_spq_block(struct qed_hwfn *p_hwfn, while (iter_cnt--) { /* Validate we receive completion update */ - if (READ_ONCE(comp_done->done) == 1) { - /* Read updated FW return value */ - smp_read_barrier_depends(); + if (smp_load_acquire(&comp_done->done) == 1) { /* ^^^ */ if (p_fw_ret) *p_fw_ret = comp_done->fw_return_code; return 0; -- cgit v1.2.3 From 7088efa9137a15d7d21e3abce73e40c9c8a18d68 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 10:04:27 -0700 Subject: fs/dcache: Use release-acquire for name/length update The code in __d_alloc() carefully orders filling in the NUL character of the name (and the length, hash, and the name itself) with assigning of the name itself. However, prepend_name() does not order the accesses to the ->name and ->len fields, other than on TSO systems. This commit therefore replaces prepend_name()'s READ_ONCE() of ->name with an smp_load_acquire(), which orders against the subsequent READ_ONCE() of ->len. Because READ_ONCE() now incorporates smp_read_barrier_depends(), prepend_name()'s smp_read_barrier_depends() is removed. Finally, to save a line, the smp_wmb()/store pair in __d_alloc() is replaced by smp_store_release(). Signed-off-by: Paul E. McKenney Cc: Alexander Viro Cc: --- fs/dcache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 5c7df1df81ff..379dce86f001 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1636,8 +1636,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dname[name->len] = 0; /* Make sure we always see the terminating NUL character */ - smp_wmb(); - dentry->d_name.name = dname; + smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_lockref.count = 1; dentry->d_flags = 0; @@ -3047,17 +3046,14 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) * retry it again when a d_move() does happen. So any garbage in the buffer * due to mismatched pointer and length will be discarded. * - * Data dependency barrier is needed to make sure that we see that terminating - * NUL. Alpha strikes again, film at 11... + * Load acquire is needed to make sure that we see that terminating NUL. */ static int prepend_name(char **buffer, int *buflen, const struct qstr *name) { - const char *dname = READ_ONCE(name->name); + const char *dname = smp_load_acquire(&name->name); /* ^^^ */ u32 dlen = READ_ONCE(name->len); char *p; - smp_read_barrier_depends(); - *buflen -= dlen + 1; if (*buflen < 0) return -ENAMETOOLONG; -- cgit v1.2.3 From b393e8b33efd2ee08576ceddc10c2b4bfb3b5435 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 10:20:44 -0700 Subject: percpu: READ_ONCE() now implies smp_read_barrier_depends() Because READ_ONCE() now implies smp_read_barrier_depends(), this commit removes the now-redundant smp_read_barrier_depends() following the READ_ONCE() in __ref_is_percpu(). Signed-off-by: Paul E. McKenney Acked-by: Tejun Heo Cc: Christoph Lameter --- include/linux/percpu-refcount.h | 6 +++--- lib/percpu-refcount.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 6658d9ee5257..864d167a1073 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -139,12 +139,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that * READ_ONCE() is required when fetching it. + * + * The smp_read_barrier_depends() implied by READ_ONCE() pairs + * with smp_store_release() in __percpu_ref_switch_to_percpu(). */ percpu_ptr = READ_ONCE(ref->percpu_count_ptr); - /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */ - smp_read_barrier_depends(); - /* * Theoretically, the following could test just ATOMIC; however, * then we'd have to mask off DEAD separately as DEAD may be diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe03c6d52761..30e7dd88148b 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -197,10 +197,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); /* - * Restore per-cpu operation. smp_store_release() is paired with - * smp_read_barrier_depends() in __ref_is_percpu() and guarantees - * that the zeroing is visible to all percpu accesses which can see - * the following __PERCPU_REF_ATOMIC clearing. + * Restore per-cpu operation. smp_store_release() is paired + * with READ_ONCE() in __ref_is_percpu() and guarantees that the + * zeroing is visible to all percpu accesses which can see the + * following __PERCPU_REF_ATOMIC clearing. */ for_each_possible_cpu(cpu) *per_cpu_ptr(percpu_count, cpu) = 0; -- cgit v1.2.3 From 137f61f6528f2bd552a75c59567d29db2857af97 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 10:30:58 -0700 Subject: rcu: Adjust read-side accessor comments for READ_ONCE() Now that READ_ONCE() implies smp_read_barrier_depends(), the commit updates now-misleading comments to account for this change. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a6ddc42f87a5..000432b87e5a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -433,12 +433,12 @@ static inline void rcu_preempt_sleep_check(void) { } * @p: The pointer to read * * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the READ_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. Although rcu_access_pointer() may also be used in cases where - * update-side locks prevent the value of the pointer from changing, you - * should instead use rcu_dereference_protected() for this use case. + * lockdep checks for being in an RCU read-side critical section. This is + * useful when the value of this pointer is accessed, but the pointer is + * not dereferenced, for example, when testing an RCU-protected pointer + * against NULL. Although rcu_access_pointer() may also be used in cases + * where update-side locks prevent the value of the pointer from changing, + * you should instead use rcu_dereference_protected() for this use case. * * It is also permissible to use rcu_access_pointer() when read-side * access to the pointer was removed at least one grace period ago, as @@ -521,12 +521,11 @@ static inline void rcu_preempt_sleep_check(void) { } * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the READ_ONCE(). This - * is useful in cases where update-side locks prevent the value of the - * pointer from changing. Please note that this primitive does *not* - * prevent the compiler from repeating this reference or combining it - * with other references, so it should not be used without protection - * of appropriate locks. + * the READ_ONCE(). This is useful in cases where update-side locks + * prevent the value of the pointer from changing. Please note that this + * primitive does *not* prevent the compiler from repeating this reference + * or combining it with other references, so it should not be used without + * protection of appropriate locks. * * This function is only for update-side use. Using this function * when protected only by rcu_read_lock() will result in infrequent -- cgit v1.2.3 From 1ba9c5e6c615e8aca9041e27c40f25569704ae72 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 10:37:22 -0700 Subject: rtnetlink: Update now-misleading smp_read_barrier_depends() comment Now that READ_ONCE() implies smp_read_barrier_depends(), update the rtnl_dereference() header comment accordingly. Signed-off-by: Paul E. McKenney Cc: "David S. Miller" Cc: Vladislav Yasevich Cc: Mark Rutland Cc: David Ahern Cc: Vlad Yasevich --- include/linux/rtnetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2032ce2eb20b..1eadec3fc228 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -70,8 +70,7 @@ static inline bool lockdep_rtnl_is_held(void) * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the READ_ONCE(), because - * caller holds RTNL. + * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) -- cgit v1.2.3 From 98b22737847cc015a797567e32d0a4826003afbf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:00:32 -0700 Subject: seqlock: Remove now-redundant smp_read_barrier_depends() READ_ONCE() now implies smp_read_barrier_depends(), so this patch removes the now-redundant smp_read_barrier_depends() from raw_read_seqcount_latch(). Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar --- include/linux/seqlock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f189a8a3bbb8..bcf4cf26b8c8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -278,9 +278,8 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - int seq = READ_ONCE(s->sequence); /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ - smp_read_barrier_depends(); + int seq = READ_ONCE(s->sequence); /* ^^^ */ return seq; } -- cgit v1.2.3 From 5c6338b4877038d28148fcfe1e7f862970ebaad1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:08:53 -0700 Subject: uprobes: Remove now-redundant smp_read_barrier_depends() Now that READ_ONCE() implies smp_read_barrier_depends(), the get_xol_area() and get_trampoline_vaddr() no longer need their smp_read_barrier_depends() calls, which this commit removes. While we are here, convert the corresponding smp_wmb() to an smp_store_release(). Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin --- kernel/events/uprobes.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 267f6ef91d97..ce6848e46e94 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1167,8 +1167,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) } ret = 0; - smp_wmb(); /* pairs with get_xol_area() */ - mm->uprobes_state.xol_area = area; + /* pairs with get_xol_area() */ + smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */ fail: up_write(&mm->mmap_sem); @@ -1230,8 +1230,8 @@ static struct xol_area *get_xol_area(void) if (!mm->uprobes_state.xol_area) __create_xol_area(0); - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(mm->uprobes_state.xol_area); /* ^^^ */ return area; } @@ -1528,8 +1528,8 @@ static unsigned long get_trampoline_vaddr(void) struct xol_area *area; unsigned long trampoline_vaddr = -1; - area = current->mm->uprobes_state.xol_area; - smp_read_barrier_depends(); + /* Pairs with xol_add_vma() smp_store_release() */ + area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */ if (area) trampoline_vaddr = area->vaddr; -- cgit v1.2.3 From 548095dea63ffc016d39c35b32c628d033638aca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:22:50 -0700 Subject: locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() Queued spinlocks are not used by DEC Alpha, and furthermore operations such as READ_ONCE() and release/relaxed RMW atomics are being changed to imply smp_read_barrier_depends(). This commit therefore removes the now-redundant smp_read_barrier_depends() from queued_spin_lock_slowpath(), and adjusts the comments accordingly. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar --- kernel/locking/qspinlock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 294294c71ba4..38ece035039e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -170,7 +170,7 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) * @tail : The new queue tail code word * Return: The previous queue tail code word * - * xchg(lock, tail) + * xchg(lock, tail), which heads an address dependency * * p,*,* -> n,*,* ; prev = xchg(lock, node) */ @@ -409,13 +409,11 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); /* - * The above xchg_tail() is also a load of @lock which generates, - * through decode_tail(), a pointer. - * - * The address dependency matches the RELEASE of xchg_tail() - * such that the access to @prev must happen after. + * The above xchg_tail() is also a load of @lock which + * generates, through decode_tail(), a pointer. The address + * dependency matches the RELEASE of xchg_tail() such that + * the subsequent access to @prev happens after. */ - smp_read_barrier_depends(); WRITE_ONCE(prev->next, node); -- cgit v1.2.3 From 243d1a7977ae0814aa1ccb8bb87f8a4e0822ca8f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:30:11 -0700 Subject: tracepoint: Remove smp_read_barrier_depends() from comment The comment in tracepoint_add_func() mentions smp_read_barrier_depends(), whose use should be quite restricted. This commit updates the comment to instead mention the smp_store_release() and rcu_dereference_sched() that the current code actually uses. Signed-off-by: Paul E. McKenney Cc: Ingo Molnar Acked-by: Steven Rostedt (VMware) Acked-by: Mathieu Desnoyers --- kernel/tracepoint.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 685c50ae6300..671b13457387 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -212,11 +212,10 @@ static int tracepoint_add_func(struct tracepoint *tp, } /* - * rcu_assign_pointer has a smp_wmb() which makes sure that the new - * probe callbacks array is consistent before setting a pointer to it. - * This array is referenced by __DO_TRACE from - * include/linux/tracepoints.h. A matching smp_read_barrier_depends() - * is used. + * rcu_assign_pointer has as smp_store_release() which makes sure + * that the new probe callbacks array is consistent before setting + * a pointer to it. This array is referenced by __DO_TRACE from + * include/linux/tracepoint.h using rcu_dereference_sched(). */ rcu_assign_pointer(tp->funcs, tp_funcs); if (!static_key_enabled(&tp->key)) -- cgit v1.2.3 From 516df050615e4b0fd2dd0448cb5a807208a3837a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:39:57 -0700 Subject: lib/assoc_array: Remove smp_read_barrier_depends() Now that smp_read_barrier_depends() is implied by READ_ONCE(), the several smp_read_barrier_depends() calls may be removed from lib/assoc_array.c. This commit makes this change and marks the READ_ONCE() calls that head address dependencies. Signed-off-by: Paul E. McKenney Cc: Jonathan Corbet Cc: Mark Rutland Cc: Alexander Kuleshov Cc: David Howells --- lib/assoc_array.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index b77d51da8c73..c6659cb37033 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -38,12 +38,10 @@ begin_node: if (assoc_array_ptr_is_shortcut(cursor)) { /* Descend through a shortcut */ shortcut = assoc_array_ptr_to_shortcut(cursor); - smp_read_barrier_depends(); - cursor = READ_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ } node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); slot = 0; /* We perform two passes of each node. @@ -55,15 +53,12 @@ begin_node: */ has_meta = 0; for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ has_meta |= (unsigned long)ptr; if (ptr && assoc_array_ptr_is_leaf(ptr)) { - /* We need a barrier between the read of the pointer - * and dereferencing the pointer - but only if we are - * actually going to dereference it. + /* We need a barrier between the read of the pointer, + * which is supplied by the above READ_ONCE(). */ - smp_read_barrier_depends(); - /* Invoke the callback */ ret = iterator(assoc_array_ptr_to_leaf(ptr), iterator_data); @@ -86,10 +81,8 @@ begin_node: continue_node: node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); - for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (assoc_array_ptr_is_meta(ptr)) { cursor = ptr; goto begin_node; @@ -98,16 +91,15 @@ continue_node: finished_node: /* Move up to the parent (may need to skip back over a shortcut) */ - parent = READ_ONCE(node->back_pointer); + parent = READ_ONCE(node->back_pointer); /* Address dependency. */ slot = node->parent_slot; if (parent == stop) return 0; if (assoc_array_ptr_is_shortcut(parent)) { shortcut = assoc_array_ptr_to_shortcut(parent); - smp_read_barrier_depends(); cursor = parent; - parent = READ_ONCE(shortcut->back_pointer); + parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */ slot = shortcut->parent_slot; if (parent == stop) return 0; @@ -147,7 +139,7 @@ int assoc_array_iterate(const struct assoc_array *array, void *iterator_data), void *iterator_data) { - struct assoc_array_ptr *root = READ_ONCE(array->root); + struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */ if (!root) return 0; @@ -194,7 +186,7 @@ assoc_array_walk(const struct assoc_array *array, pr_devel("-->%s()\n", __func__); - cursor = READ_ONCE(array->root); + cursor = READ_ONCE(array->root); /* Address dependency. */ if (!cursor) return assoc_array_walk_tree_empty; @@ -216,11 +208,9 @@ jumped: consider_node: node = assoc_array_ptr_to_node(cursor); - smp_read_barrier_depends(); - slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); slot &= ASSOC_ARRAY_FAN_MASK; - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ pr_devel("consider slot %x [ix=%d type=%lu]\n", slot, level, (unsigned long)ptr & 3); @@ -254,7 +244,6 @@ consider_node: cursor = ptr; follow_shortcut: shortcut = assoc_array_ptr_to_shortcut(cursor); - smp_read_barrier_depends(); pr_devel("shortcut to %d\n", shortcut->skip_to_level); sc_level = level + ASSOC_ARRAY_LEVEL_STEP; BUG_ON(sc_level > shortcut->skip_to_level); @@ -294,7 +283,7 @@ follow_shortcut: } while (sc_level < shortcut->skip_to_level); /* The shortcut matches the leaf's index to this point. */ - cursor = READ_ONCE(shortcut->next_node); + cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { level = sc_level; goto jumped; @@ -331,20 +320,18 @@ void *assoc_array_find(const struct assoc_array *array, return NULL; node = result.terminal_node.node; - smp_read_barrier_depends(); /* If the target key is available to us, it's has to be pointed to by * the terminal node. */ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { - ptr = READ_ONCE(node->slots[slot]); + ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */ if (ptr && assoc_array_ptr_is_leaf(ptr)) { /* We need a barrier between the read of the pointer * and dereferencing the pointer - but only if we are * actually going to dereference it. */ leaf = assoc_array_ptr_to_leaf(ptr); - smp_read_barrier_depends(); if (ops->compare_object(leaf, index_key)) return (void *)leaf; } -- cgit v1.2.3 From 08df477434754629303c9e2bfa8d67ecb44f9c20 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 11:51:45 -0700 Subject: mm/ksm: Remove now-redundant smp_read_barrier_depends() Because READ_ONCE() now implies smp_read_barrier_depends(), the smp_read_barrier_depends() in get_ksm_page() is now redundant. This commit removes it and updates the comments. Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Ingo Molnar Cc: "Aneesh Kumar K.V" Cc: Claudio Imbrenda Cc: --- mm/ksm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index be8f4576f842..c406f75957ad 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -675,15 +675,8 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it) expected_mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); again: - kpfn = READ_ONCE(stable_node->kpfn); + kpfn = READ_ONCE(stable_node->kpfn); /* Address dependency. */ page = pfn_to_page(kpfn); - - /* - * page is computed from kpfn, so on most architectures reading - * page->mapping is naturally ordered after reading node->kpfn, - * but on Alpha we need to be more careful. - */ - smp_read_barrier_depends(); if (READ_ONCE(page->mapping) != expected_mapping) goto stale; -- cgit v1.2.3 From 4be2b04e43fd3d8164d7aeb1808e47fbeb0c0de0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 12:09:04 -0700 Subject: netfilter: Remove now-redundant smp_read_barrier_depends() READ_ONCE() now implies smp_read_barrier_depends(), which means that the instances in arpt_do_table(), ipt_do_table(), and ip6t_do_table() are now redundant. This commit removes them and adjusts the comments. Signed-off-by: Paul E. McKenney Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: Cc: Cc: --- net/ipv4/netfilter/arp_tables.c | 7 +------ net/ipv4/netfilter/ip_tables.c | 7 +------ net/ipv6/netfilter/ip6_tables.c | 7 +------ 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index f88221aebc9d..d242c2d29161 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -202,13 +202,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = table->private; + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); - /* - * Ensure we load private-> members after we've fetched the base - * pointer. - */ - smp_read_barrier_depends(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4cbe5e80f3bf..46866cc24a84 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -260,13 +260,8 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = table->private; + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); - /* - * Ensure we load private-> members after we've fetched the base - * pointer. - */ - smp_read_barrier_depends(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index f06e25065a34..ac1db84722a7 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -282,12 +282,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = table->private; - /* - * Ensure we load private-> members after we've fetched the base - * pointer. - */ - smp_read_barrier_depends(); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; -- cgit v1.2.3 From d963007c7210deebef48c5e57aa4ca4cf9c059cd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 9 Oct 2017 12:19:27 -0700 Subject: keyring: Remove now-redundant smp_read_barrier_depends() Now that the associative-array library properly heads dependency chains, the various smp_read_barrier_depends() calls in security/keys/keyring.c are no longer needed. This commit therefore removes them. Signed-off-by: Paul E. McKenney Cc: David Howells Cc: "Serge E. Hallyn" Cc: Cc: Reviewed-by: James Morris --- security/keys/keyring.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d0bccebbd3b5..41bcf57e96f2 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -713,7 +713,6 @@ descend_to_keyring: * doesn't contain any keyring pointers. */ shortcut = assoc_array_ptr_to_shortcut(ptr); - smp_read_barrier_depends(); if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) goto not_this_keyring; @@ -723,8 +722,6 @@ descend_to_keyring: } node = assoc_array_ptr_to_node(ptr); - smp_read_barrier_depends(); - ptr = node->slots[0]; if (!assoc_array_ptr_is_meta(ptr)) goto begin_node; @@ -736,7 +733,6 @@ descend_to_node: kdebug("descend"); if (assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); - smp_read_barrier_depends(); ptr = READ_ONCE(shortcut->next_node); BUG_ON(!assoc_array_ptr_is_node(ptr)); } @@ -744,7 +740,6 @@ descend_to_node: begin_node: kdebug("begin_node"); - smp_read_barrier_depends(); slot = 0; ascend_to_node: /* Go through the slots in a node */ @@ -792,14 +787,12 @@ ascend_to_node: if (ptr && assoc_array_ptr_is_shortcut(ptr)) { shortcut = assoc_array_ptr_to_shortcut(ptr); - smp_read_barrier_depends(); ptr = READ_ONCE(shortcut->back_pointer); slot = shortcut->parent_slot; } if (!ptr) goto not_this_keyring; node = assoc_array_ptr_to_node(ptr); - smp_read_barrier_depends(); slot++; /* If we've ascended to the root (zero backpointer), we must have just -- cgit v1.2.3 From bc53e3aa88e8240823c1c440e6bab3c3a5ba5f59 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 27 Nov 2017 17:31:01 +0100 Subject: ARM: dts: at91: disable the nxp,se97b SMBUS timeout on the TSE-850 The I2C adapter driver is sometimes slow, causing the SCL line to be stuck low for more than the stipulated SMBUS timeout of 25-35 ms. This causes the client device to give up which in turn causes silent corruption of data. So, disable the SMBUS timeout in the client device. Signed-off-by: Peter Rosin Acked-by: Guenter Roeck Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/at91-tse850-3.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 5f29010cdbd8..9b82cc8843e1 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -221,6 +221,7 @@ jc42@18 { compatible = "nxp,se97b", "jedec,jc-42.4-temp"; reg = <0x18>; + smbus-timeout-disable; }; dpot: mcp4651-104@28 { -- cgit v1.2.3 From e085ac7a6ddbd746966083c5e13aa290c3e9a253 Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Mon, 4 Dec 2017 17:54:37 +0100 Subject: x86/MCE: Extend table to report action optional errors through CMCI too According to the Intel SDM Volume 3B (253669-063US, July 2017), action optional (SRAO) errors can be reported either via MCE or CMC: In cases when SRAO is signaled via CMCI the error signature is indicated via UC=1, PCC=0, S=0. Type(*1) UC EN PCC S AR Signaling --------------------------------------------------------------- UC 1 1 1 x x MCE SRAR 1 1 0 1 1 MCE SRAO 1 x(*2) 0 x(*2) 0 MCE/CMC UCNA 1 x 0 0 0 CMC CE 0 x x x x CMC NOTES: 1. SRAR, SRAO and UCNA errors are supported by the processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set. 2. EN=1, S=1 when signaled via MCE. EN=x, S=0 when signaled via CMC. And there is a description in 15.6.2 UCR Error Reporting and Logging, for bit S: S (Signaling) flag, bit 56 - Indicates (when set) that a machine check exception was generated for the UCR error reported in this MC bank... When the S flag in the IA32_MCi_STATUS register is clear, this UCR error was not signaled via a machine check exception and instead was reported as a corrected machine check (CMC). So merge the two cases and just remove the S=0 check for SRAO in mce_severity(). [ Borislav: Massage commit message.] Signed-off-by: Xie XiuQi Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Tony Luck Tested-by: Chen Wei Cc: linux-edac Link: http://lkml.kernel.org/r/1511575548-41992-1-git-send-email-xiexiuqi@huawei.com --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 4ca632a06e0b..5bbd06f38ff6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -59,6 +59,7 @@ static struct severity { #define MCGMASK(x, y) .mcgmask = x, .mcgres = y #define MASK(x, y) .mask = x, .result = y #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) +#define MCI_UC_AR (MCI_STATUS_UC|MCI_STATUS_AR) #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) #define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV) @@ -101,6 +102,22 @@ static struct severity { NOSER, BITCLR(MCI_STATUS_UC) ), + /* + * known AO MCACODs reported via MCE or CMC: + * + * SRAO could be signaled either via a machine check exception or + * CMCI with the corresponding bit S 1 or 0. So we don't need to + * check bit S for SRAO. + */ + MCESEV( + AO, "Action optional: memory scrubbing error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB) + ), + MCESEV( + AO, "Action optional: last level cache writeback error", + SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) + ), + /* ignore OVER for UCNA */ MCESEV( UCNA, "Uncorrected no action required", @@ -149,15 +166,6 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR) ), - /* known AO MCACODs: */ - MCESEV( - AO, "Action optional: memory scrubbing error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB) - ), - MCESEV( - AO, "Action optional: last level cache writeback error", - SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB) - ), MCESEV( SOME, "Action optional: unknown MCACOD", SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S) -- cgit v1.2.3 From c8a4364c33ac7ed63278267b8f6d8c15810d5fd1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 4 Dec 2017 17:54:38 +0100 Subject: x86/mce/AMD: Don't set DEF_INT_TYPE in MSR_CU_DEF_ERR on SMCA systems The McaIntrCfg register (MSRC000_0410), previously known as CU_DEFER_ERR, is used on SMCA systems to set the LVT offset for the Threshold and Deferred error interrupts. This register was used on non-SMCA systems to also set the Deferred interrupt type in bits 2:1. However, these bits are reserved on SMCA systems. Only set MSRC000_0410[2:1] on non-SMCA systems. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20171120162646.5210-1-Yazen.Ghannam@amd.com --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 486f640b02ef..a38ab1fa53a2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -407,7 +407,9 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) (deferred_error_int_vector != amd_deferred_error_interrupt)) deferred_error_int_vector = amd_deferred_error_interrupt; - low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + if (!mce_flags.smca) + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; + wrmsr(MSR_CU_DEF_ERR, low, high); } -- cgit v1.2.3 From ce179cbdedf2f54306177e591664be7b18cf386a Mon Sep 17 00:00:00 2001 From: Yuantian Tang Date: Mon, 4 Dec 2017 17:01:20 +0800 Subject: ahci: qoriq: refine port register configuration These PP2C and PP3C registers control the configuration of the PHY control OOB timing for the COMINIT/COMWAKE parameters respectively for sata port. Overwrite default values with calculated ones to get better OOB timing. Signed-off-by: Tang Yuantian Signed-off-by: Tejun Heo --- drivers/ata/ahci_qoriq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index b6b0bf76dfc7..2685f28160f7 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -35,6 +35,8 @@ /* port register default value */ #define AHCI_PORT_PHY_1_CFG 0xa003fffe +#define AHCI_PORT_PHY2_CFG 0x28184d1f +#define AHCI_PORT_PHY3_CFG 0x0e081509 #define AHCI_PORT_TRANS_CFG 0x08000029 #define AHCI_PORT_AXICC_CFG 0x3fffffff @@ -183,6 +185,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -190,6 +194,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS2080A: writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -201,6 +207,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -212,6 +220,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -219,6 +229,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS2088A: writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); -- cgit v1.2.3 From 2467c0451ce5574738e223b93e3253c9a7015be1 Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Fri, 1 Dec 2017 11:47:22 +0100 Subject: ahci: mtk: Change driver name to ahci-mtk The driver name "ahci" is already used by the ahci platform driver. This leads to the following error: Error: Driver 'ahci' is already registered, aborting... Change the name to ahci-mtk to fix this. Signed-off-by: Matthias Brugger Signed-off-by: Tejun Heo --- drivers/ata/ahci_mtk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c index 489452a64303..0ae6971c2a4c 100644 --- a/drivers/ata/ahci_mtk.c +++ b/drivers/ata/ahci_mtk.c @@ -25,7 +25,7 @@ #include #include "ahci.h" -#define DRV_NAME "ahci" +#define DRV_NAME "ahci-mtk" #define SYS_CFG 0x14 #define SYS_CFG_SATA_MSK GENMASK(31, 30) -- cgit v1.2.3 From 2dc0b46b5ea30f169b0b272253ea846a5a281731 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Tue, 14 Nov 2017 16:17:25 -0600 Subject: libata: sata_down_spd_limit should return if driver has not recorded sstatus speed During hotplug, it is possible for 6Gbps link speed to be limited all the way down to 1.5 Gbps which may lead to a slower link speed when drive is re-connected. This behavior has been seen on a Intel Lewisburg SATA controller (8086:a1d2) with HGST HUH728080ALE600 drive where SATA link speed was limited to 1.5 Gbps and when re-connected the link came up 3.0 Gbps. This patch was retested on above configuration and showed the hotplugged link to come back online at max speed (6Gbps). I did not see the downgrade when testing on Intel C600/X79, but retested patched linux-4.14-rc5 kernel and didn't see any side effects from this change. Also, successfully retested hotplug on port multiplier 3Gbps link. tj: Minor comment updates. Signed-off-by: David Milburn Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 2a882929de4a..8193b38a1cae 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3082,13 +3082,19 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit) bit = fls(mask) - 1; mask &= ~(1 << bit); - /* Mask off all speeds higher than or equal to the current - * one. Force 1.5Gbps if current SPD is not available. + /* + * Mask off all speeds higher than or equal to the current one. At + * this point, if current SPD is not available and we previously + * recorded the link speed from SStatus, the driver has already + * masked off the highest bit so mask should already be 1 or 0. + * Otherwise, we should not force 1.5Gbps on a link where we have + * not previously recorded speed from SStatus. Just return in this + * case. */ if (spd > 1) mask &= (1 << (spd - 1)) - 1; else - mask &= 1; + return -EINVAL; /* were we already at the bottom? */ if (!mask) -- cgit v1.2.3 From 11db855c3d06e82f432cb1bafd73296586d5ceec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Dec 2017 14:41:11 -0800 Subject: Revert "cpuset: Make cpuset hotplug synchronous" This reverts commit 1599a185f0e6113be185b9fb809c621c73865829. This and the previous commit led to another circular locking scenario and the scenario which is fixed by this commit no longer exists after e8b3f8db7aad ("workqueue/hotplug: simplify workqueue_offline_cpu()") which removes work item flushing from hotplug path. Revert it for now. Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 6 ++++++ kernel/cgroup/cpuset.c | 41 +++++++++++++++++++++-------------------- kernel/power/process.c | 2 ++ kernel/sched/core.c | 1 + 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2ab910f85154..1b8e41597ef5 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -52,7 +52,9 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); +extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); +extern void cpuset_wait_for_hotplug(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -165,11 +167,15 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} +static inline void cpuset_force_rebuild(void) { } + static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_wait_for_hotplug(void) { } + static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 227bc25d951d..cab5fd1ee767 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2277,8 +2277,15 @@ retry: mutex_unlock(&cpuset_mutex); } +static bool force_rebuild; + +void cpuset_force_rebuild(void) +{ + force_rebuild = true; +} + /** - * cpuset_hotplug - handle CPU/memory hotunplug for a cpuset + * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset * * This function is called after either CPU or memory configuration has * changed and updates cpuset accordingly. The top_cpuset is always @@ -2293,7 +2300,7 @@ retry: * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */ -static void cpuset_hotplug(bool use_cpu_hp_lock) +static void cpuset_hotplug_workfn(struct work_struct *work) { static cpumask_t new_cpus; static nodemask_t new_mems; @@ -2351,31 +2358,25 @@ static void cpuset_hotplug(bool use_cpu_hp_lock) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated) { - if (use_cpu_hp_lock) - rebuild_sched_domains(); - else { - /* Acquiring cpu_hotplug_lock is not required. - * When cpuset_hotplug() is called in hotplug path, - * cpu_hotplug_lock is held by the hotplug context - * which is waiting for cpuhp_thread_fun to indicate - * completion of callback. - */ - mutex_lock(&cpuset_mutex); - rebuild_sched_domains_cpuslocked(); - mutex_unlock(&cpuset_mutex); - } + if (cpus_updated || force_rebuild) { + force_rebuild = false; + rebuild_sched_domains(); } } -static void cpuset_hotplug_workfn(struct work_struct *work) +void cpuset_update_active_cpus(void) { - cpuset_hotplug(true); + /* + * We're inside cpu hotplug critical region which usually nests + * inside cgroup synchronization. Bounce actual hotplug processing + * to a work item to avoid reverse locking order. + */ + schedule_work(&cpuset_hotplug_work); } -void cpuset_update_active_cpus(void) +void cpuset_wait_for_hotplug(void) { - cpuset_hotplug(false); + flush_work(&cpuset_hotplug_work); } /* diff --git a/kernel/power/process.c b/kernel/power/process.c index c326d7235c5f..7381d49a44db 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -204,6 +204,8 @@ void thaw_processes(void) __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); + cpuset_wait_for_hotplug(); + read_lock(&tasklist_lock); for_each_process_thread(g, p) { /* No other threads should have PF_SUSPEND_TASK set */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 88b3450b29ab..75554f366fd3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5624,6 +5624,7 @@ static void cpuset_cpu_active(void) * restore the original sched domains by considering the * cpuset configurations. */ + cpuset_force_rebuild(); } cpuset_update_active_cpus(); } -- cgit v1.2.3 From e8b3f8db7aad99fcc5234fc5b89984ff6620de3d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 1 Dec 2017 22:20:36 +0800 Subject: workqueue/hotplug: simplify workqueue_offline_cpu() Since the recent cpu/hotplug refactoring, workqueue_offline_cpu() is guaranteed to run on the local cpu which is going offline. This also fixes the following deadlock by removing work item scheduling and flushing from CPU hotplug path. http://lkml.kernel.org/r/1504764252-29091-1-git-send-email-prsood@codeaurora.org tj: Description update. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6a5658cb46da..48a4d00f55dc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1635,7 +1635,7 @@ static void worker_enter_idle(struct worker *worker) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); /* - * Sanity check nr_running. Because wq_unbind_fn() releases + * Sanity check nr_running. Because unbind_workers() releases * pool->lock between setting %WORKER_UNBOUND and zapping * nr_running, the warning may trigger spuriously. Check iff * unbind is not in progress. @@ -4511,9 +4511,8 @@ void show_workqueue_state(void) * cpu comes back online. */ -static void wq_unbind_fn(struct work_struct *work) +static void unbind_workers(int cpu) { - int cpu = smp_processor_id(); struct worker_pool *pool; struct worker *worker; @@ -4710,12 +4709,13 @@ int workqueue_online_cpu(unsigned int cpu) int workqueue_offline_cpu(unsigned int cpu) { - struct work_struct unbind_work; struct workqueue_struct *wq; /* unbinding per-cpu workers should happen on the local CPU */ - INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); - queue_work_on(cpu, system_highpri_wq, &unbind_work); + if (WARN_ON(cpu != smp_processor_id())) + return -1; + + unbind_workers(cpu); /* update NUMA affinity of unbound workqueues */ mutex_lock(&wq_pool_mutex); @@ -4723,9 +4723,6 @@ int workqueue_offline_cpu(unsigned int cpu) wq_update_unbound_numa(wq, cpu, false); mutex_unlock(&wq_pool_mutex); - /* wait for per-cpu unbinding to finish */ - flush_work(&unbind_work); - destroy_work_on_stack(&unbind_work); return 0; } -- cgit v1.2.3 From 62408c1ef00784e8bcfc4848ade76480fb8aed21 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 1 Dec 2017 22:23:07 +0800 Subject: workqueue/hotplug: remove the workaround in rebind_workers() Since the cpu/hotplug refactoring, DOWN_FAILED is never called without preceding DOWN_PREPARE making the workaround unnecessary. Remove it. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 48a4d00f55dc..45ce93f3dd1f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4589,16 +4589,6 @@ static void rebind_workers(struct worker_pool *pool) spin_lock_irq(&pool->lock); - /* - * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED - * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is - * being reworked and this can go away in time. - */ - if (!(pool->flags & POOL_DISASSOCIATED)) { - spin_unlock_irq(&pool->lock); - return; - } - pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { -- cgit v1.2.3 From bdfbbda90aeb75ce0951413fd7f495d4d377bd5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 4 Dec 2017 14:55:59 -0800 Subject: Revert "cgroup/cpuset: remove circular dependency deadlock" This reverts commit aa24163b2ee5c92120e32e99b5a93143a0f4258e. This and the following commit led to another circular locking scenario and the scenario which is fixed by this commit no longer exists after e8b3f8db7aad ("workqueue/hotplug: simplify workqueue_offline_cpu()") which removes work item flushing from hotplug path. Revert it for now. Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 53 ++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index cab5fd1ee767..f7efa7b4d825 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -812,18 +812,6 @@ done: return ndoms; } -static void cpuset_sched_change_begin(void) -{ - cpus_read_lock(); - mutex_lock(&cpuset_mutex); -} - -static void cpuset_sched_change_end(void) -{ - mutex_unlock(&cpuset_mutex); - cpus_read_unlock(); -} - /* * Rebuild scheduler domains. * @@ -833,14 +821,16 @@ static void cpuset_sched_change_end(void) * 'cpus' is removed, then call this routine to rebuild the * scheduler's dynamic sched domains. * + * Call with cpuset_mutex held. Takes get_online_cpus(). */ -static void rebuild_sched_domains_cpuslocked(void) +static void rebuild_sched_domains_locked(void) { struct sched_domain_attr *attr; cpumask_var_t *doms; int ndoms; lockdep_assert_held(&cpuset_mutex); + get_online_cpus(); /* * We have raced with CPU hotplug. Don't do anything to avoid @@ -848,25 +838,27 @@ static void rebuild_sched_domains_cpuslocked(void) * Anyways, hotplug work item will rebuild sched domains. */ if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) - return; + goto out; /* Generate domain masks and attrs */ ndoms = generate_sched_domains(&doms, &attr); /* Have scheduler rebuild the domains */ partition_sched_domains(ndoms, doms, attr); +out: + put_online_cpus(); } #else /* !CONFIG_SMP */ -static void rebuild_sched_domains_cpuslocked(void) +static void rebuild_sched_domains_locked(void) { } #endif /* CONFIG_SMP */ void rebuild_sched_domains(void) { - cpuset_sched_change_begin(); - rebuild_sched_domains_cpuslocked(); - cpuset_sched_change_end(); + mutex_lock(&cpuset_mutex); + rebuild_sched_domains_locked(); + mutex_unlock(&cpuset_mutex); } /** @@ -952,7 +944,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) rcu_read_unlock(); if (need_rebuild_sched_domains) - rebuild_sched_domains_cpuslocked(); + rebuild_sched_domains_locked(); } /** @@ -1284,7 +1276,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) cs->relax_domain_level = val; if (!cpumask_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains_cpuslocked(); + rebuild_sched_domains_locked(); } return 0; @@ -1317,6 +1309,7 @@ static void update_tasks_flags(struct cpuset *cs) * * Call with cpuset_mutex held. */ + static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on) { @@ -1349,7 +1342,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) - rebuild_sched_domains_cpuslocked(); + rebuild_sched_domains_locked(); if (spread_flag_changed) update_tasks_flags(cs); @@ -1617,7 +1610,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = 0; - cpuset_sched_change_begin(); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; @@ -1653,7 +1646,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - cpuset_sched_change_end(); + mutex_unlock(&cpuset_mutex); return retval; } @@ -1664,7 +1657,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, cpuset_filetype_t type = cft->private; int retval = -ENODEV; - cpuset_sched_change_begin(); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1677,7 +1670,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: - cpuset_sched_change_end(); + mutex_unlock(&cpuset_mutex); return retval; } @@ -1716,7 +1709,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, kernfs_break_active_protection(of->kn); flush_work(&cpuset_hotplug_work); - cpuset_sched_change_begin(); + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1740,7 +1733,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_trial_cpuset(trialcs); out_unlock: - cpuset_sched_change_end(); + mutex_unlock(&cpuset_mutex); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); flush_workqueue(cpuset_migrate_mm_wq); @@ -2041,14 +2034,14 @@ out_unlock: /* * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains_cpuslocked(). + * will call rebuild_sched_domains_locked(). */ static void cpuset_css_offline(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); - cpuset_sched_change_begin(); + mutex_lock(&cpuset_mutex); if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -2056,7 +2049,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); - cpuset_sched_change_end(); + mutex_unlock(&cpuset_mutex); } static void cpuset_css_free(struct cgroup_subsys_state *css) -- cgit v1.2.3 From 325c4b3b81027068914854adcba4e97200c809df Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 20:28:07 +0200 Subject: PM / sysfs: Convert to use sysfs_streq() ...instead of custom approach. Signed-off-by: Andy Shevchenko Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index e153e28b1857..662632ac5e0e 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -108,16 +108,10 @@ static ssize_t control_show(struct device *dev, struct device_attribute *attr, static ssize_t control_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { - char *cp; - int len = n; - - cp = memchr(buf, '\n', n); - if (cp) - len = cp - buf; device_lock(dev); - if (len == sizeof ctrl_auto - 1 && strncmp(buf, ctrl_auto, len) == 0) + if (sysfs_streq(buf, ctrl_auto)) pm_runtime_allow(dev); - else if (len == sizeof ctrl_on - 1 && strncmp(buf, ctrl_on, len) == 0) + else if (sysfs_streq(buf, ctrl_on)) pm_runtime_forbid(dev); else n = -EINVAL; @@ -245,7 +239,7 @@ static ssize_t pm_qos_resume_latency_store(struct device *dev, if (value == 0) value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; - } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) { + } else if (sysfs_streq(buf, "n/a")) { value = 0; } else { return -EINVAL; @@ -285,9 +279,9 @@ static ssize_t pm_qos_latency_tolerance_store(struct device *dev, if (value < 0) return -EINVAL; } else { - if (!strcmp(buf, "auto") || !strcmp(buf, "auto\n")) + if (sysfs_streq(buf, "auto")) value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; - else if (!strcmp(buf, "any") || !strcmp(buf, "any\n")) + else if (sysfs_streq(buf, "any")) value = PM_QOS_LATENCY_ANY; else return -EINVAL; @@ -342,20 +336,12 @@ static ssize_t wake_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { - char *cp; - int len = n; - if (!device_can_wakeup(dev)) return -EINVAL; - cp = memchr(buf, '\n', n); - if (cp) - len = cp - buf; - if (len == sizeof _enabled - 1 - && strncmp(buf, _enabled, sizeof _enabled - 1) == 0) + if (sysfs_streq(buf, _enabled)) device_set_wakeup_enable(dev, 1); - else if (len == sizeof _disabled - 1 - && strncmp(buf, _disabled, sizeof _disabled - 1) == 0) + else if (sysfs_streq(buf, _disabled)) device_set_wakeup_enable(dev, 0); else return -EINVAL; @@ -566,16 +552,9 @@ static ssize_t async_show(struct device *dev, struct device_attribute *attr, static ssize_t async_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { - char *cp; - int len = n; - - cp = memchr(buf, '\n', n); - if (cp) - len = cp - buf; - if (len == sizeof _enabled - 1 && strncmp(buf, _enabled, len) == 0) + if (sysfs_streq(buf, _enabled)) device_enable_async_suspend(dev); - else if (len == sizeof _disabled - 1 && - strncmp(buf, _disabled, len) == 0) + else if (sysfs_streq(buf, _disabled)) device_disable_async_suspend(dev); else return -EINVAL; -- cgit v1.2.3 From f0e6d9f164c2269df69b6d2fe05c285392a6a0d4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 20:28:08 +0200 Subject: PM / sysfs: Remove redundant 'else' keyword. There is no need to use 'else' if in main branch 'return' is present. No functional change intended. Signed-off-by: Andy Shevchenko Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 662632ac5e0e..1bf5e163ef1f 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -216,7 +216,7 @@ static ssize_t pm_qos_resume_latency_show(struct device *dev, if (value == 0) return sprintf(buf, "n/a\n"); - else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; return sprintf(buf, "%d\n", value); @@ -261,7 +261,7 @@ static ssize_t pm_qos_latency_tolerance_show(struct device *dev, if (value < 0) return sprintf(buf, "auto\n"); - else if (value == PM_QOS_LATENCY_ANY) + if (value == PM_QOS_LATENCY_ANY) return sprintf(buf, "any\n"); return sprintf(buf, "%d\n", value); @@ -527,11 +527,11 @@ static ssize_t rtpm_children_show(struct device *dev, static ssize_t rtpm_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { - if ((dev->power.disable_depth) && (dev->power.runtime_auto == false)) + if (dev->power.disable_depth && (dev->power.runtime_auto == false)) return sprintf(buf, "disabled & forbidden\n"); - else if (dev->power.disable_depth) + if (dev->power.disable_depth) return sprintf(buf, "disabled\n"); - else if (dev->power.runtime_auto == false) + if (dev->power.runtime_auto == false) return sprintf(buf, "forbidden\n"); return sprintf(buf, "enabled\n"); } -- cgit v1.2.3 From 47acbd77e6e481abf2f41d3a99cb3762f296b2e6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 20:28:09 +0200 Subject: PM / sysfs: Convert to use DEVICE_ATTR_RO / DEVICE_ATTR_RW Use DEVICE_ATTR_RO() and DEVICE_ATTR_RW() macros instead of open coding them. No functional change intended. Signed-off-by: Andy Shevchenko Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/base/power/sysfs.c | 133 ++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 68 deletions(-) diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 1bf5e163ef1f..0f651efc58a1 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -119,9 +119,9 @@ static ssize_t control_store(struct device * dev, struct device_attribute *attr, return n; } -static DEVICE_ATTR(control, 0644, control_show, control_store); +static DEVICE_ATTR_RW(control); -static ssize_t rtpm_active_time_show(struct device *dev, +static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; @@ -132,9 +132,9 @@ static ssize_t rtpm_active_time_show(struct device *dev, return ret; } -static DEVICE_ATTR(runtime_active_time, 0444, rtpm_active_time_show, NULL); +static DEVICE_ATTR_RO(runtime_active_time); -static ssize_t rtpm_suspended_time_show(struct device *dev, +static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; @@ -146,9 +146,9 @@ static ssize_t rtpm_suspended_time_show(struct device *dev, return ret; } -static DEVICE_ATTR(runtime_suspended_time, 0444, rtpm_suspended_time_show, NULL); +static DEVICE_ATTR_RO(runtime_suspended_time); -static ssize_t rtpm_status_show(struct device *dev, +static ssize_t runtime_status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *p; @@ -178,7 +178,7 @@ static ssize_t rtpm_status_show(struct device *dev, return sprintf(buf, p); } -static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL); +static DEVICE_ATTR_RO(runtime_status); static ssize_t autosuspend_delay_ms_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -205,12 +205,11 @@ static ssize_t autosuspend_delay_ms_store(struct device *dev, return n; } -static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show, - autosuspend_delay_ms_store); +static DEVICE_ATTR_RW(autosuspend_delay_ms); -static ssize_t pm_qos_resume_latency_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t pm_qos_resume_latency_us_show(struct device *dev, + struct device_attribute *attr, + char *buf) { s32 value = dev_pm_qos_requested_resume_latency(dev); @@ -222,9 +221,9 @@ static ssize_t pm_qos_resume_latency_show(struct device *dev, return sprintf(buf, "%d\n", value); } -static ssize_t pm_qos_resume_latency_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t n) +static ssize_t pm_qos_resume_latency_us_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t n) { s32 value; int ret; @@ -250,12 +249,11 @@ static ssize_t pm_qos_resume_latency_store(struct device *dev, return ret < 0 ? ret : n; } -static DEVICE_ATTR(pm_qos_resume_latency_us, 0644, - pm_qos_resume_latency_show, pm_qos_resume_latency_store); +static DEVICE_ATTR_RW(pm_qos_resume_latency_us); -static ssize_t pm_qos_latency_tolerance_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, + struct device_attribute *attr, + char *buf) { s32 value = dev_pm_qos_get_user_latency_tolerance(dev); @@ -267,9 +265,9 @@ static ssize_t pm_qos_latency_tolerance_show(struct device *dev, return sprintf(buf, "%d\n", value); } -static ssize_t pm_qos_latency_tolerance_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t n) +static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t n) { s32 value; int ret; @@ -290,8 +288,7 @@ static ssize_t pm_qos_latency_tolerance_store(struct device *dev, return ret < 0 ? ret : n; } -static DEVICE_ATTR(pm_qos_latency_tolerance_us, 0644, - pm_qos_latency_tolerance_show, pm_qos_latency_tolerance_store); +static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us); static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, @@ -317,24 +314,22 @@ static ssize_t pm_qos_no_power_off_store(struct device *dev, return ret < 0 ? ret : n; } -static DEVICE_ATTR(pm_qos_no_power_off, 0644, - pm_qos_no_power_off_show, pm_qos_no_power_off_store); +static DEVICE_ATTR_RW(pm_qos_no_power_off); #ifdef CONFIG_PM_SLEEP static const char _enabled[] = "enabled"; static const char _disabled[] = "disabled"; -static ssize_t -wake_show(struct device * dev, struct device_attribute *attr, char * buf) +static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, + char *buf) { return sprintf(buf, "%s\n", device_can_wakeup(dev) ? (device_may_wakeup(dev) ? _enabled : _disabled) : ""); } -static ssize_t -wake_store(struct device * dev, struct device_attribute *attr, - const char * buf, size_t n) +static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t n) { if (!device_can_wakeup(dev)) return -EINVAL; @@ -348,10 +343,10 @@ wake_store(struct device * dev, struct device_attribute *attr, return n; } -static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); +static DEVICE_ATTR_RW(wakeup); static ssize_t wakeup_count_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { unsigned long count = 0; bool enabled = false; @@ -365,10 +360,11 @@ static ssize_t wakeup_count_show(struct device *dev, return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL); +static DEVICE_ATTR_RO(wakeup_count); static ssize_t wakeup_active_count_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; @@ -382,11 +378,11 @@ static ssize_t wakeup_active_count_show(struct device *dev, return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); +static DEVICE_ATTR_RO(wakeup_active_count); static ssize_t wakeup_abort_count_show(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; @@ -400,7 +396,7 @@ static ssize_t wakeup_abort_count_show(struct device *dev, return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL); +static DEVICE_ATTR_RO(wakeup_abort_count); static ssize_t wakeup_expire_count_show(struct device *dev, struct device_attribute *attr, @@ -418,10 +414,10 @@ static ssize_t wakeup_expire_count_show(struct device *dev, return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL); +static DEVICE_ATTR_RO(wakeup_expire_count); static ssize_t wakeup_active_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { unsigned int active = 0; bool enabled = false; @@ -435,10 +431,11 @@ static ssize_t wakeup_active_show(struct device *dev, return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL); +static DEVICE_ATTR_RO(wakeup_active); -static ssize_t wakeup_total_time_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_total_time_ms_show(struct device *dev, + struct device_attribute *attr, + char *buf) { s64 msec = 0; bool enabled = false; @@ -452,10 +449,10 @@ static ssize_t wakeup_total_time_show(struct device *dev, return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL); +static DEVICE_ATTR_RO(wakeup_total_time_ms); -static ssize_t wakeup_max_time_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_max_time_ms_show(struct device *dev, + struct device_attribute *attr, char *buf) { s64 msec = 0; bool enabled = false; @@ -469,10 +466,11 @@ static ssize_t wakeup_max_time_show(struct device *dev, return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL); +static DEVICE_ATTR_RO(wakeup_max_time_ms); -static ssize_t wakeup_last_time_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_last_time_ms_show(struct device *dev, + struct device_attribute *attr, + char *buf) { s64 msec = 0; bool enabled = false; @@ -486,12 +484,12 @@ static ssize_t wakeup_last_time_show(struct device *dev, return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); +static DEVICE_ATTR_RO(wakeup_last_time_ms); #ifdef CONFIG_PM_AUTOSLEEP -static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev, + struct device_attribute *attr, + char *buf) { s64 msec = 0; bool enabled = false; @@ -505,27 +503,29 @@ static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, - wakeup_prevent_sleep_time_show, NULL); +static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); #endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG -static ssize_t rtpm_usagecount_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t runtime_usage_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", atomic_read(&dev->power.usage_count)); } +static DEVICE_ATTR_RO(runtime_usage); -static ssize_t rtpm_children_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t runtime_active_kids_show(struct device *dev, + struct device_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", dev->power.ignore_children ? 0 : atomic_read(&dev->power.child_count)); } +static DEVICE_ATTR_RO(runtime_active_kids); -static ssize_t rtpm_enabled_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t runtime_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) { if (dev->power.disable_depth && (dev->power.runtime_auto == false)) return sprintf(buf, "disabled & forbidden\n"); @@ -535,10 +535,7 @@ static ssize_t rtpm_enabled_show(struct device *dev, return sprintf(buf, "forbidden\n"); return sprintf(buf, "enabled\n"); } - -static DEVICE_ATTR(runtime_usage, 0444, rtpm_usagecount_show, NULL); -static DEVICE_ATTR(runtime_active_kids, 0444, rtpm_children_show, NULL); -static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL); +static DEVICE_ATTR_RO(runtime_enabled); #ifdef CONFIG_PM_SLEEP static ssize_t async_show(struct device *dev, struct device_attribute *attr, @@ -561,7 +558,7 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr, return n; } -static DEVICE_ATTR(async, 0644, async_show, async_store); +static DEVICE_ATTR_RW(async); #endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_ADVANCED_DEBUG */ -- cgit v1.2.3 From 1172ee31259b51a9b2d83b05f01161fd5938b15d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 13 Nov 2017 16:46:41 +0100 Subject: PM / core: Re-factor some code dealing with parents in __device_suspend() Let's make the code a bit more readable by moving some of the code, which deals with adjustments for parent devices in __device_suspend(), into its own function. Signed-off-by: Ulf Hansson Reviewed-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 73ec6796d9e1..c0d5f4a3611d 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1479,6 +1479,22 @@ static int legacy_suspend(struct device *dev, pm_message_t state, return error; } +static void dpm_propagate_to_parent(struct device *dev) +{ + struct device *parent = dev->parent; + + if (!parent) + return; + + spin_lock_irq(&parent->power.lock); + + parent->power.direct_complete = false; + if (dev->power.wakeup_path && !parent->power.ignore_children) + parent->power.wakeup_path = true; + + spin_unlock_irq(&parent->power.lock); +} + static void dpm_clear_suppliers_direct_complete(struct device *dev) { struct device_link *link; @@ -1590,19 +1606,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) End: if (!error) { - struct device *parent = dev->parent; - dev->power.is_suspended = true; - if (parent) { - spin_lock_irq(&parent->power.lock); - - dev->parent->power.direct_complete = false; - if (dev->power.wakeup_path - && !dev->parent->power.ignore_children) - dev->parent->power.wakeup_path = true; - - spin_unlock_irq(&parent->power.lock); - } + dpm_propagate_to_parent(dev); dpm_clear_suppliers_direct_complete(dev); } -- cgit v1.2.3 From 3c5b977f06b754b00a49ee7bf1595491afab7de6 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Tue, 28 Nov 2017 16:48:08 -0500 Subject: ACPI: APEI: handle PCIe AER errors in separate function Move PCIe AER error handling code into a separate function. Signed-off-by: Tyler Baicar Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 64 +++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 6402f7fad3bb..f67eb763e950 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -414,6 +414,39 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int #endif } +static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev) +{ +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + + if (sev == GHES_SEV_RECOVERABLE && + sec_sev == GHES_SEV_RECOVERABLE && + pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && + pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { + unsigned int devfn; + int aer_severity; + + devfn = PCI_DEVFN(pcie_err->device_id.device, + pcie_err->device_id.function); + aer_severity = cper_severity_to_aer(gdata->error_severity); + + /* + * If firmware reset the component to contain + * the error, we must reinitialize it before + * use, so treat it as a fatal AER error. + */ + if (gdata->flags & CPER_SEC_RESET) + aer_severity = AER_FATAL; + + aer_recover_queue(pcie_err->device_id.segment, + pcie_err->device_id.bus, + devfn, aer_severity, + (struct aer_capability_regs *) + pcie_err->aer_info); + } +#endif +} + static void ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -441,38 +474,9 @@ static void ghes_do_proc(struct ghes *ghes, arch_apei_report_mem_error(sev, mem_err); ghes_handle_memory_failure(gdata, sev); } -#ifdef CONFIG_ACPI_APEI_PCIEAER else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); - - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && - pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { - unsigned int devfn; - int aer_severity; - - devfn = PCI_DEVFN(pcie_err->device_id.device, - pcie_err->device_id.function); - aer_severity = cper_severity_to_aer(gdata->error_severity); - - /* - * If firmware reset the component to contain - * the error, we must reinitialize it before - * use, so treat it as a fatal AER error. - */ - if (gdata->flags & CPER_SEC_RESET) - aer_severity = AER_FATAL; - - aer_recover_queue(pcie_err->device_id.segment, - pcie_err->device_id.bus, - devfn, aer_severity, - (struct aer_capability_regs *) - pcie_err->aer_info); - } - + ghes_handle_aer(gdata, sev, sec_sev); } -#endif else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); -- cgit v1.2.3 From 9852ce9ae213d39a98f161db84b90b047fbdc436 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Tue, 28 Nov 2017 16:48:09 -0500 Subject: ACPI: APEI: call into AER handling regardless of severity Currently the GHES code only calls into the AER driver for recoverable type errors. This is incorrect because errors of other severities do not get logged by the AER driver and do not get exposed to user space via the AER trace event. So, call into the AER driver for PCIe errors regardless of the severity Signed-off-by: Tyler Baicar Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f67eb763e950..cc65d1992635 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -414,14 +414,26 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int #endif } -static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev) +/* + * PCIe AER errors need to be sent to the AER driver for reporting and + * recovery. The GHES severities map to the following AER severities and + * require the following handling: + * + * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE + * These need to be reported by the AER driver but no recovery is + * necessary. + * GHES_SEV_RECOVERABLE -> AER_NONFATAL + * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL + * These both need to be reported and recovered from by the AER driver. + * GHES_SEV_PANIC does not make it to this handling since the kernel must + * panic. + */ +static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) { #ifdef CONFIG_ACPI_APEI_PCIEAER struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); - if (sev == GHES_SEV_RECOVERABLE && - sec_sev == GHES_SEV_RECOVERABLE && - pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && + if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { unsigned int devfn; int aer_severity; @@ -475,7 +487,7 @@ static void ghes_do_proc(struct ghes *ghes, ghes_handle_memory_failure(gdata, sev); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { - ghes_handle_aer(gdata, sev, sec_sev); + ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); -- cgit v1.2.3 From 24bc8f03be22eaf17cc0f75c139a863c67fdfc85 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 15 Oct 2017 22:24:54 +0100 Subject: ACPI / APEI: remove redundant variables len and node_len Variables len and node_len are redundant and can be removed. Cleans up clang warning: node_len = GHES_ESTATUS_NODE_LEN(len); Signed-off-by: Colin Ian King Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index cc65d1992635..16c4a10b7506 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -886,7 +886,6 @@ static void ghes_print_queued_estatus(void) struct ghes_estatus_node *estatus_node; struct acpi_hest_generic *generic; struct acpi_hest_generic_status *estatus; - u32 len, node_len; llnode = llist_del_all(&ghes_estatus_llist); /* @@ -898,8 +897,6 @@ static void ghes_print_queued_estatus(void) estatus_node = llist_entry(llnode, struct ghes_estatus_node, llnode); estatus = GHES_ESTATUS_FROM_NODE(estatus_node); - len = cper_estatus_len(estatus); - node_len = GHES_ESTATUS_NODE_LEN(len); generic = estatus_node->generic; ghes_print_estatus(NULL, generic, estatus); llnode = llnode->next; -- cgit v1.2.3 From ba69ead9e9e9bb3cec5faf03526c36764ac8942a Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 27 Nov 2017 23:47:34 +0100 Subject: scsi: scsi_devinfo: handle non-terminated strings devinfo->vendor and devinfo->model aren't necessarily zero-terminated. Fixes: b8018b973c7c "scsi_devinfo: fixup string compare" Signed-off-by: Martin Wilck Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 78d4aa8df675..b256d4cbd3ad 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -458,7 +458,8 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor, /* * vendor strings must be an exact match */ - if (vmax != strlen(devinfo->vendor) || + if (vmax != strnlen(devinfo->vendor, + sizeof(devinfo->vendor)) || memcmp(devinfo->vendor, vskip, vmax)) continue; @@ -466,7 +467,7 @@ static struct scsi_dev_info_list *scsi_dev_info_list_find(const char *vendor, * @model specifies the full string, and * must be larger or equal to devinfo->model */ - mlen = strlen(devinfo->model); + mlen = strnlen(devinfo->model, sizeof(devinfo->model)); if (mmax < mlen || memcmp(devinfo->model, mskip, mlen)) continue; return devinfo; -- cgit v1.2.3 From 81df022b688d43d2a3667518b2f755d384397910 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 27 Nov 2017 23:47:35 +0100 Subject: scsi: scsi_devinfo: cleanly zero-pad devinfo strings Cleanly fill memory for "vendor" and "model" with 0-bytes for the "compatible" case rather than adding only a single 0 byte. This simplifies the devinfo code a a bit, and avoids mistakes in other places of the code (not in current upstream, but we had one such mistake in the SUSE kernel). [mkp: applied by hand and added braces] Signed-off-by: Martin Wilck Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index b256d4cbd3ad..449ef5adbb2b 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -34,7 +34,6 @@ struct scsi_dev_info_list_table { }; -static const char spaces[] = " "; /* 16 of them */ static blist_flags_t scsi_default_dev_flags; static LIST_HEAD(scsi_dev_info_list); static char scsi_dev_flags[256]; @@ -298,20 +297,13 @@ static void scsi_strcpy_devinfo(char *name, char *to, size_t to_length, size_t from_length; from_length = strlen(from); - strncpy(to, from, min(to_length, from_length)); - if (from_length < to_length) { - if (compatible) { - /* - * NUL terminate the string if it is short. - */ - to[from_length] = '\0'; - } else { - /* - * space pad the string if it is short. - */ - strncpy(&to[from_length], spaces, - to_length - from_length); - } + /* This zero-pads the destination */ + strncpy(to, from, to_length); + if (from_length < to_length && !compatible) { + /* + * space pad the string if it is short. + */ + memset(&to[from_length], ' ', to_length - from_length); } if (from_length > to_length) printk(KERN_WARNING "%s: %s string '%s' is too long\n", -- cgit v1.2.3 From e2bf801ecd4e62222a46d1ba9e57e710171d29c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Mon, 27 Nov 2017 20:05:34 +0100 Subject: sunxi-rsb: Include OF based modalias in device uevent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the OF-based modalias in the uevent sent when registering devices on the sunxi RSB bus, so that user space has a chance to autoload the kernel module for the device. Fixes a regression caused by commit 3f241bfa60bd ("arm64: allwinner: a64: pine64: Use dcdc1 regulator for mmc0"). When the axp20x-rsb module for the AXP803 PMIC is built as a module, it is not loaded and the system ends up with an disfunctional MMC controller. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Cc: stable # 4.4.x 7a3b7cd332db of: device: Export of_device_{get_modalias, uvent_modalias} to modules Acked-by: Chen-Yu Tsai Signed-off-by: Stefan Brüns Signed-off-by: Maxime Ripard --- drivers/bus/sunxi-rsb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 328ca93781cf..1b76d9585902 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) -- cgit v1.2.3 From e17e237cd69f9f6ecaa0e875f889ad401a625148 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 4 Dec 2017 16:44:01 +0800 Subject: ARM: dts: sunxi: Convert to CCU index macros for HDMI controller When the HDMI controller device node was added, the needed PLL clock macros were not exported. A separate patch addresses that, but it is merged through a different tree. Now that both patches are in mainline proper, we can convert the raw numbers to proper macros. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun4i-a10.dtsi | 4 ++-- arch/arm/boot/dts/sun5i-a10s.dtsi | 4 ++-- arch/arm/boot/dts/sun6i-a31.dtsi | 4 ++-- arch/arm/boot/dts/sun7i-a20.dtsi | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index b91300d49a31..5840f5c75c3b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -502,8 +502,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 6ae4d95e230e..316cb8b2945b 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -82,8 +82,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 16>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 8bfa12b548e0..72d3fe44ecaf 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -429,8 +429,8 @@ interrupts = ; clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, <&ccu CLK_HDMI_DDC>, - <&ccu 7>, - <&ccu 13>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; resets = <&ccu RST_AHB1_HDMI>; reset-names = "ahb"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 68dfa82544fc..59655e42e4b0 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -581,8 +581,8 @@ reg = <0x01c16000 0x1000>; interrupts = ; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, -- cgit v1.2.3 From 7d556bfc49adddf2beb0d16c91945c3b8b783282 Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Mon, 4 Dec 2017 10:23:07 +0530 Subject: arm64: allwinner: a64-sopine: Fix to use dcdc1 regulator instead of vcc3v3 Since current tree support AXP803 regulators, replace fixed regulator vcc3v3 with AXP803 dcdc1 regulator where ever it need to replace. Tested mmc0 on sopine baseboard. Signed-off-by: Jagan Teki Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 2 +- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index a053a6ac5267..abe179de35d7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -96,7 +96,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_vcc1v8>; bus-width = <8>; non-removable; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index a5da18a6f286..43418bd881d8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -45,19 +45,10 @@ #include "sun50i-a64.dtsi" -/ { - reg_vcc3v3: vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; non-removable; disable-wp; bus-width = <4>; -- cgit v1.2.3 From f88e9301948173dd35afad4a6939092c7f269aed Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 3 Nov 2017 22:58:54 +0300 Subject: arm64: dts: orange-pi-zero-plus2: fix sdcard detect The sdcard detect pin on orange-pi-zero-plus2 is pulled up. Fix cd-gpio description to enable sdcard detect. Signed-off-by: Sergey Matyukevich Signed-off-by: Maxime Ripard --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts index b6b7a561df8c..a42fd79a62a3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts @@ -71,7 +71,7 @@ pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; vmmc-supply = <®_vcc3v3>; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; status = "okay"; }; -- cgit v1.2.3 From 5149b685b122d3bee78bd3403997f9ddb1223f4a Mon Sep 17 00:00:00 2001 From: Erick Chen Date: Tue, 5 Dec 2017 14:35:45 +0800 Subject: regulator: Add Spreadtrum SC2731 regulator documentation This patch adds support for the Spreadtrum SC2731 voltage regulator device. Signed-off-by: Erick Chen Signed-off-by: Mark Brown --- .../bindings/regulator/sprd,sc2731-regulator.txt | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt diff --git a/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt b/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt new file mode 100644 index 000000000000..63dc07877cd6 --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/sprd,sc2731-regulator.txt @@ -0,0 +1,43 @@ +Spreadtrum SC2731 Voltage regulators + +The SC2731 integrates low-voltage and low quiescent current DCDC/LDO. +14 LDO and 3 DCDCs are designed for external use. All DCDCs/LDOs have +their own bypass (power-down) control signals. External tantalum or MLCC +ceramic capacitors are recommended to use with these LDOs. + +Required properties: + - compatible: should be "sprd,sc27xx-regulator". + +List of regulators provided by this controller. It is named according to +its regulator type, BUCK_ and LDO_. The definition for each +of these nodes is defined using the standard binding for regulators at +Documentation/devicetree/bindings/regulator/regulator.txt. + +The valid names for regulators are: +BUCK: + BUCK_CPU0, BUCK_CPU1, BUCK_RF +LDO: + LDO_CAMA0, LDO_CAMA1, LDO_CAMMOT, LDO_VLDO, LDO_EMMCCORE, LDO_SDCORE, + LDO_SDIO, LDO_WIFIPA, LDO_USB33, LDO_CAMD0, LDO_CAMD1, LDO_CON, + LDO_CAMIO, LDO_SRAM + +Example: + regulators { + compatible = "sprd,sc27xx-regulator"; + + vddarm0: BUCK_CPU0 { + regulator-name = "vddarm0"; + regulator-min-microvolt = <400000>; + regulator-max-microvolt = <1996875>; + regulator-ramp-delay = <25000>; + regulator-always-on; + }; + + vddcama0: LDO_CAMA0 { + regulator-name = "vddcama0"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3750000>; + regulator-enable-ramp-delay = <100>; + }; + ... + }; -- cgit v1.2.3 From 433c9bb77b8131ef340148565996b3818fbf2f23 Mon Sep 17 00:00:00 2001 From: Erick Chen Date: Tue, 5 Dec 2017 14:35:46 +0800 Subject: regulator: sc2731: Add regulator driver to support Spreadtrum SC2731 PMIC Add regulator driver for Spreadtrum SC2731 device. It has 17 general purpose LDOs, BUCKs generator and digital output to control regulators. Signed-off-by: Erick Chen Reviewed-by: Baolin Wang Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 7 + drivers/regulator/Makefile | 1 + drivers/regulator/sc2731-regulator.c | 256 +++++++++++++++++++++++++++++++++++ 3 files changed, 264 insertions(+) create mode 100644 drivers/regulator/sc2731-regulator.c diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 96cd55f9e3c5..b27417ca188a 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -744,6 +744,13 @@ config REGULATOR_S5M8767 via I2C bus. S5M8767A have 9 Bucks and 28 LDOs output and supports DVS mode with 8bits of output voltage control. +config REGULATOR_SC2731 + tristate "Spreadtrum SC2731 power regulator driver" + depends on MFD_SC27XX_PMIC || COMPILE_TEST + help + This driver provides support for the voltage regulators on the + SC2731 PMIC. + config REGULATOR_SKY81452 tristate "Skyworks Solutions SKY81452 voltage regulator" depends on MFD_SKY81452 diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 80ffc57a9ca3..19fea09ba10a 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -95,6 +95,7 @@ obj-$(CONFIG_REGULATOR_RT5033) += rt5033-regulator.o obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o obj-$(CONFIG_REGULATOR_S5M8767) += s5m8767.o +obj-$(CONFIG_REGULATOR_SC2731) += sc2731-regulator.o obj-$(CONFIG_REGULATOR_SKY81452) += sky81452-regulator.o obj-$(CONFIG_REGULATOR_STM32_VREFBUF) += stm32-vrefbuf.o obj-$(CONFIG_REGULATOR_STW481X_VMMC) += stw481x-vmmc.o diff --git a/drivers/regulator/sc2731-regulator.c b/drivers/regulator/sc2731-regulator.c new file mode 100644 index 000000000000..794fcd504b3d --- /dev/null +++ b/drivers/regulator/sc2731-regulator.c @@ -0,0 +1,256 @@ + //SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Spreadtrum Communications Inc. + */ + +#include +#include +#include +#include +#include +#include + +/* + * SC2731 regulator lock register + */ +#define SC2731_PWR_WR_PROT_VALUE 0xf0c +#define SC2731_WR_UNLOCK 0x6e7f + +/* + * SC2731 enable register + */ +#define SC2731_POWER_PD_SW 0xc28 +#define SC2731_LDO_CAMA0_PD 0xcfc +#define SC2731_LDO_CAMA1_PD 0xd04 +#define SC2731_LDO_CAMMOT_PD 0xd0c +#define SC2731_LDO_VLDO_PD 0xd6c +#define SC2731_LDO_EMMCCORE_PD 0xd2c +#define SC2731_LDO_SDCORE_PD 0xd74 +#define SC2731_LDO_SDIO_PD 0xd70 +#define SC2731_LDO_WIFIPA_PD 0xd4c +#define SC2731_LDO_USB33_PD 0xd5c +#define SC2731_LDO_CAMD0_PD 0xd7c +#define SC2731_LDO_CAMD1_PD 0xd84 +#define SC2731_LDO_CON_PD 0xd8c +#define SC2731_LDO_CAMIO_PD 0xd94 +#define SC2731_LDO_SRAM_PD 0xd78 + +/* + * SC2731 enable mask + */ +#define SC2731_DCDC_CPU0_PD_MASK BIT(4) +#define SC2731_DCDC_CPU1_PD_MASK BIT(3) +#define SC2731_DCDC_RF_PD_MASK BIT(11) +#define SC2731_LDO_CAMA0_PD_MASK BIT(0) +#define SC2731_LDO_CAMA1_PD_MASK BIT(0) +#define SC2731_LDO_CAMMOT_PD_MASK BIT(0) +#define SC2731_LDO_VLDO_PD_MASK BIT(0) +#define SC2731_LDO_EMMCCORE_PD_MASK BIT(0) +#define SC2731_LDO_SDCORE_PD_MASK BIT(0) +#define SC2731_LDO_SDIO_PD_MASK BIT(0) +#define SC2731_LDO_WIFIPA_PD_MASK BIT(0) +#define SC2731_LDO_USB33_PD_MASK BIT(0) +#define SC2731_LDO_CAMD0_PD_MASK BIT(0) +#define SC2731_LDO_CAMD1_PD_MASK BIT(0) +#define SC2731_LDO_CON_PD_MASK BIT(0) +#define SC2731_LDO_CAMIO_PD_MASK BIT(0) +#define SC2731_LDO_SRAM_PD_MASK BIT(0) + +/* + * SC2731 vsel register + */ +#define SC2731_DCDC_CPU0_VOL 0xc54 +#define SC2731_DCDC_CPU1_VOL 0xc64 +#define SC2731_DCDC_RF_VOL 0xcb8 +#define SC2731_LDO_CAMA0_VOL 0xd00 +#define SC2731_LDO_CAMA1_VOL 0xd08 +#define SC2731_LDO_CAMMOT_VOL 0xd10 +#define SC2731_LDO_VLDO_VOL 0xd28 +#define SC2731_LDO_EMMCCORE_VOL 0xd30 +#define SC2731_LDO_SDCORE_VOL 0xd38 +#define SC2731_LDO_SDIO_VOL 0xd40 +#define SC2731_LDO_WIFIPA_VOL 0xd50 +#define SC2731_LDO_USB33_VOL 0xd60 +#define SC2731_LDO_CAMD0_VOL 0xd80 +#define SC2731_LDO_CAMD1_VOL 0xd88 +#define SC2731_LDO_CON_VOL 0xd90 +#define SC2731_LDO_CAMIO_VOL 0xd98 +#define SC2731_LDO_SRAM_VOL 0xdB0 + +/* + * SC2731 vsel register mask + */ +#define SC2731_DCDC_CPU0_VOL_MASK GENMASK(8, 0) +#define SC2731_DCDC_CPU1_VOL_MASK GENMASK(8, 0) +#define SC2731_DCDC_RF_VOL_MASK GENMASK(8, 0) +#define SC2731_LDO_CAMA0_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_CAMA1_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_CAMMOT_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_VLDO_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_EMMCCORE_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_SDCORE_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_SDIO_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_WIFIPA_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_USB33_VOL_MASK GENMASK(7, 0) +#define SC2731_LDO_CAMD0_VOL_MASK GENMASK(6, 0) +#define SC2731_LDO_CAMD1_VOL_MASK GENMASK(6, 0) +#define SC2731_LDO_CON_VOL_MASK GENMASK(6, 0) +#define SC2731_LDO_CAMIO_VOL_MASK GENMASK(6, 0) +#define SC2731_LDO_SRAM_VOL_MASK GENMASK(6, 0) + +enum sc2731_regulator_id { + SC2731_BUCK_CPU0, + SC2731_BUCK_CPU1, + SC2731_BUCK_RF, + SC2731_LDO_CAMA0, + SC2731_LDO_CAMA1, + SC2731_LDO_CAMMOT, + SC2731_LDO_VLDO, + SC2731_LDO_EMMCCORE, + SC2731_LDO_SDCORE, + SC2731_LDO_SDIO, + SC2731_LDO_WIFIPA, + SC2731_LDO_USB33, + SC2731_LDO_CAMD0, + SC2731_LDO_CAMD1, + SC2731_LDO_CON, + SC2731_LDO_CAMIO, + SC2731_LDO_SRAM, +}; + +static const struct regulator_ops sc2731_regu_linear_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, + .list_voltage = regulator_list_voltage_linear, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_sel = regulator_set_voltage_sel_regmap, +}; + +#define SC2731_REGU_LINEAR(_id, en_reg, en_mask, vreg, vmask, \ + vstep, vmin, vmax) { \ + .name = #_id, \ + .of_match = of_match_ptr(#_id), \ + .ops = &sc2731_regu_linear_ops, \ + .type = REGULATOR_VOLTAGE, \ + .id = SC2731_##_id, \ + .owner = THIS_MODULE, \ + .min_uV = vmin, \ + .n_voltages = ((vmax) - (vmin)) / (vstep) + 1, \ + .uV_step = vstep, \ + .enable_is_inverted = true, \ + .enable_val = 0, \ + .enable_reg = en_reg, \ + .enable_mask = en_mask, \ + .vsel_reg = vreg, \ + .vsel_mask = vmask, \ +} + +static struct regulator_desc regulators[] = { + SC2731_REGU_LINEAR(BUCK_CPU0, SC2731_POWER_PD_SW, + SC2731_DCDC_CPU0_PD_MASK, SC2731_DCDC_CPU0_VOL, + SC2731_DCDC_CPU0_VOL_MASK, 3125, 400000, 1996875), + SC2731_REGU_LINEAR(BUCK_CPU1, SC2731_POWER_PD_SW, + SC2731_DCDC_CPU1_PD_MASK, SC2731_DCDC_CPU1_VOL, + SC2731_DCDC_CPU1_VOL_MASK, 3125, 400000, 1996875), + SC2731_REGU_LINEAR(BUCK_RF, SC2731_POWER_PD_SW, SC2731_DCDC_RF_PD_MASK, + SC2731_DCDC_RF_VOL, SC2731_DCDC_RF_VOL_MASK, + 3125, 600000, 2196875), + SC2731_REGU_LINEAR(LDO_CAMA0, SC2731_LDO_CAMA0_PD, + SC2731_LDO_CAMA0_PD_MASK, SC2731_LDO_CAMA0_VOL, + SC2731_LDO_CAMA0_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_CAMA1, SC2731_LDO_CAMA1_PD, + SC2731_LDO_CAMA1_PD_MASK, SC2731_LDO_CAMA1_VOL, + SC2731_LDO_CAMA1_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_CAMMOT, SC2731_LDO_CAMMOT_PD, + SC2731_LDO_CAMMOT_PD_MASK, SC2731_LDO_CAMMOT_VOL, + SC2731_LDO_CAMMOT_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_VLDO, SC2731_LDO_VLDO_PD, + SC2731_LDO_VLDO_PD_MASK, SC2731_LDO_VLDO_VOL, + SC2731_LDO_VLDO_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_EMMCCORE, SC2731_LDO_EMMCCORE_PD, + SC2731_LDO_EMMCCORE_PD_MASK, SC2731_LDO_EMMCCORE_VOL, + SC2731_LDO_EMMCCORE_VOL_MASK, 10000, 1200000, + 3750000), + SC2731_REGU_LINEAR(LDO_SDCORE, SC2731_LDO_SDCORE_PD, + SC2731_LDO_SDCORE_PD_MASK, SC2731_LDO_SDCORE_VOL, + SC2731_LDO_SDCORE_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_SDIO, SC2731_LDO_SDIO_PD, + SC2731_LDO_SDIO_PD_MASK, SC2731_LDO_SDIO_VOL, + SC2731_LDO_SDIO_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_WIFIPA, SC2731_LDO_WIFIPA_PD, + SC2731_LDO_WIFIPA_PD_MASK, SC2731_LDO_WIFIPA_VOL, + SC2731_LDO_WIFIPA_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_USB33, SC2731_LDO_USB33_PD, + SC2731_LDO_USB33_PD_MASK, SC2731_LDO_USB33_VOL, + SC2731_LDO_USB33_VOL_MASK, 10000, 1200000, 3750000), + SC2731_REGU_LINEAR(LDO_CAMD0, SC2731_LDO_CAMD0_PD, + SC2731_LDO_CAMD0_PD_MASK, SC2731_LDO_CAMD0_VOL, + SC2731_LDO_CAMD0_VOL_MASK, 6250, 1000000, 1793750), + SC2731_REGU_LINEAR(LDO_CAMD1, SC2731_LDO_CAMD1_PD, + SC2731_LDO_CAMD1_PD_MASK, SC2731_LDO_CAMD1_VOL, + SC2731_LDO_CAMD1_VOL_MASK, 6250, 1000000, 1793750), + SC2731_REGU_LINEAR(LDO_CON, SC2731_LDO_CON_PD, + SC2731_LDO_CON_PD_MASK, SC2731_LDO_CON_VOL, + SC2731_LDO_CON_VOL_MASK, 6250, 1000000, 1793750), + SC2731_REGU_LINEAR(LDO_CAMIO, SC2731_LDO_CAMIO_PD, + SC2731_LDO_CAMIO_PD_MASK, SC2731_LDO_CAMIO_VOL, + SC2731_LDO_CAMIO_VOL_MASK, 6250, 1000000, 1793750), + SC2731_REGU_LINEAR(LDO_SRAM, SC2731_LDO_SRAM_PD, + SC2731_LDO_SRAM_PD_MASK, SC2731_LDO_SRAM_VOL, + SC2731_LDO_SRAM_VOL_MASK, 6250, 1000000, 1793750), +}; + +static int sc2731_regulator_unlock(struct regmap *regmap) +{ + return regmap_write(regmap, SC2731_PWR_WR_PROT_VALUE, + SC2731_WR_UNLOCK); +} + +static int sc2731_regulator_probe(struct platform_device *pdev) +{ + int i, ret; + struct regmap *regmap; + struct regulator_config config = { }; + struct regulator_dev *rdev; + + regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!regmap) { + dev_err(&pdev->dev, "failed to get regmap.\n"); + return -ENODEV; + } + + ret = sc2731_regulator_unlock(regmap); + if (ret) { + dev_err(&pdev->dev, "failed to release regulator lock\n"); + return ret; + } + + config.dev = &pdev->dev; + config.regmap = regmap; + + for (i = 0; i < ARRAY_SIZE(regulators); i++) { + rdev = devm_regulator_register(&pdev->dev, ®ulators[i], + &config); + if (IS_ERR(rdev)) { + dev_err(&pdev->dev, "failed to register regulator %s\n", + regulators[i].name); + return PTR_ERR(rdev); + } + } + + return 0; +} + +static struct platform_driver sc2731_regulator_driver = { + .driver = { + .name = "sc27xx-regulator", + }, + .probe = sc2731_regulator_probe, +}; + +module_platform_driver(sc2731_regulator_driver); + +MODULE_AUTHOR("Chen Junhui "); +MODULE_DESCRIPTION("Spreadtrum SC2731 regulator driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 3b6eed8deb37c349bffc7b3b4d722ce4023f8b11 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:20:42 +0000 Subject: ASoC: don't use rtd->codec on soc_dev_attr_is_visible() rtd->codec will be removed soon. checking rtd->num_codecs is enough Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0edac80df34..90f1122d91c4 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -213,7 +213,7 @@ static umode_t soc_dev_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_pmdown_time.attr) return attr->mode; /* always visible */ - return rtd->codec ? attr->mode : 0; /* enabled only with codec */ + return rtd->num_codecs ? attr->mode : 0; /* enabled only with codec */ } static const struct attribute_group soc_dapm_dev_group = { -- cgit v1.2.3 From e5acfc7d3562ae251cb786b5b52d4345dd16a02c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:23:05 +0000 Subject: ASoC: don't use rtd->codec on snd_soc_new_compress() rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/soc-compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index d9b1e6417fb9..81232f4ab614 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -1096,7 +1096,6 @@ static struct snd_compr_ops soc_compr_dyn_ops = { */ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) { - struct snd_soc_codec *codec = rtd->codec; struct snd_soc_platform *platform = rtd->platform; struct snd_soc_component *component; struct snd_soc_rtdcom_list *rtdcom; @@ -1199,8 +1198,9 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) ret = snd_compress_new(rtd->card->snd_card, num, direction, new_name, compr); if (ret < 0) { + component = rtd->codec_dai->component; pr_err("compress asoc: can't create compress for codec %s\n", - codec->component.name); + component->name); goto compr_err; } -- cgit v1.2.3 From 845f80cb401c2ff6b9b8d75ebfc04b83b70268ef Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:23:21 +0000 Subject: ASoC: don't use rtd->codec on fsl-asoc-card rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/fsl/fsl-asoc-card.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 1225e0399de8..989be518c4ed 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -442,8 +442,8 @@ static int fsl_asoc_card_late_probe(struct snd_soc_card *card) if (fsl_asoc_card_is_ac97(priv)) { #if IS_ENABLED(CONFIG_SND_AC97_CODEC) - struct snd_soc_codec *codec = rtd->codec; - struct snd_ac97 *ac97 = snd_soc_codec_get_drvdata(codec); + struct snd_soc_component *component = rtd->codec_dai->component; + struct snd_ac97 *ac97 = snd_soc_component_get_drvdata(component); /* * Use slots 3/4 for S/PDIF so SSI won't try to enable -- cgit v1.2.3 From 356a383bd978e58b5324284dc21210467968b4ff Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:23:35 +0000 Subject: ASoC: don't use rtd->codec on intel/skylake rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 4380e40c6af0..18138dc872d9 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -536,7 +536,7 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream, snd_soc_dai_set_dma_data(dai, substream, (void *)link_dev); - link = snd_hdac_ext_bus_get_link(ebus, rtd->codec->component.name); + link = snd_hdac_ext_bus_get_link(ebus, codec_dai->component->name); if (!link) return -EINVAL; @@ -619,7 +619,7 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream, link_dev->link_prepared = 0; - link = snd_hdac_ext_bus_get_link(ebus, rtd->codec->component.name); + link = snd_hdac_ext_bus_get_link(ebus, rtd->codec_dai->component->name); if (!link) return -EINVAL; -- cgit v1.2.3 From 187c43df88196c0c4b231771a39e4a46f20a4f7a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:23:52 +0000 Subject: ASoC: don't use rtd->codec on Intel/haswell rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/intel/boards/haswell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index 5e1ea0371c90..3c5160779204 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -76,7 +76,7 @@ static int haswell_rt5640_hw_params(struct snd_pcm_substream *substream, } /* set correct codec filter for DAI format and clock config */ - snd_soc_update_bits(rtd->codec, 0x83, 0xffff, 0x8000); + snd_soc_component_update_bits(codec_dai->component, 0x83, 0xffff, 0x8000); return ret; } -- cgit v1.2.3 From 96e1b9eef4e53a1fea2b889881ca293d153fe0d1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:24:13 +0000 Subject: ASoC: don't use rtd->codec on qcom/apq8016_sbc rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/qcom/apq8016_sbc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index 03851fedd1e2..704428735e3c 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -43,7 +43,7 @@ struct apq8016_sbc_data { static int apq8016_sbc_dai_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_dai *cpu_dai = rtd->cpu_dai; - struct snd_soc_codec *codec; + struct snd_soc_component *component; struct snd_soc_dai_link *dai_link = rtd->dai_link; struct snd_soc_card *card = rtd->card; struct apq8016_sbc_data *pdata = snd_soc_card_get_drvdata(card); @@ -102,15 +102,15 @@ static int apq8016_sbc_dai_init(struct snd_soc_pcm_runtime *rtd) for (i = 0 ; i < dai_link->num_codecs; i++) { struct snd_soc_dai *dai = rtd->codec_dais[i]; - codec = dai->codec; + component = dai->component; /* Set default mclk for internal codec */ - rval = snd_soc_codec_set_sysclk(codec, 0, 0, DEFAULT_MCLK_RATE, + rval = snd_soc_component_set_sysclk(component, 0, 0, DEFAULT_MCLK_RATE, SND_SOC_CLOCK_IN); if (rval != 0 && rval != -ENOTSUPP) { dev_warn(card->dev, "Failed to set mclk: %d\n", rval); return rval; } - rval = snd_soc_codec_set_jack(codec, &pdata->jack, NULL); + rval = snd_soc_component_set_jack(component, &pdata->jack, NULL); if (rval != 0 && rval != -ENOTSUPP) { dev_warn(card->dev, "Failed to set jack: %d\n", rval); return rval; -- cgit v1.2.3 From f4a2be1c559e53e31545bdea2c246dbce6b70e1c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 5 Dec 2017 04:24:28 +0000 Subject: ASoC: don't use rtd->codec on samsung/bells rtd->codec will be removed soon. rtd->codec = rtd->codec_dai->codec, thus, we can use rtd->codec_dai->component instead of it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/samsung/bells.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/sound/soc/samsung/bells.c b/sound/soc/samsung/bells.c index 34deba461ae1..0e66cd8ef2f9 100644 --- a/sound/soc/samsung/bells.c +++ b/sound/soc/samsung/bells.c @@ -60,13 +60,13 @@ static int bells_set_bias_level(struct snd_soc_card *card, { struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; - struct snd_soc_codec *codec; + struct snd_soc_component *component; struct bells_drvdata *bells = card->drvdata; int ret; rtd = snd_soc_get_pcm_runtime(card, card->dai_link[DAI_DSP_CODEC].name); codec_dai = rtd->codec_dai; - codec = codec_dai->codec; + component = codec_dai->component; if (dapm->dev != codec_dai->dev) return 0; @@ -76,7 +76,7 @@ static int bells_set_bias_level(struct snd_soc_card *card, if (dapm->bias_level != SND_SOC_BIAS_STANDBY) break; - ret = snd_soc_codec_set_pll(codec, WM5102_FLL1, + ret = snd_soc_component_set_pll(component, WM5102_FLL1, ARIZONA_FLL_SRC_MCLK1, MCLK_RATE, bells->sysclk_rate); @@ -84,7 +84,7 @@ static int bells_set_bias_level(struct snd_soc_card *card, pr_err("Failed to start FLL: %d\n", ret); if (bells->asyncclk_rate) { - ret = snd_soc_codec_set_pll(codec, WM5102_FLL2, + ret = snd_soc_component_set_pll(component, WM5102_FLL2, ARIZONA_FLL_SRC_AIF2BCLK, BCLK2_RATE, bells->asyncclk_rate); @@ -106,27 +106,27 @@ static int bells_set_bias_level_post(struct snd_soc_card *card, { struct snd_soc_pcm_runtime *rtd; struct snd_soc_dai *codec_dai; - struct snd_soc_codec *codec; + struct snd_soc_component *component; struct bells_drvdata *bells = card->drvdata; int ret; rtd = snd_soc_get_pcm_runtime(card, card->dai_link[DAI_DSP_CODEC].name); codec_dai = rtd->codec_dai; - codec = codec_dai->codec; + component = codec_dai->component; if (dapm->dev != codec_dai->dev) return 0; switch (level) { case SND_SOC_BIAS_STANDBY: - ret = snd_soc_codec_set_pll(codec, WM5102_FLL1, 0, 0, 0); + ret = snd_soc_component_set_pll(component, WM5102_FLL1, 0, 0, 0); if (ret < 0) { pr_err("Failed to stop FLL: %d\n", ret); return ret; } if (bells->asyncclk_rate) { - ret = snd_soc_codec_set_pll(codec, WM5102_FLL2, + ret = snd_soc_component_set_pll(component, WM5102_FLL2, 0, 0, 0); if (ret < 0) { pr_err("Failed to stop FLL: %d\n", ret); @@ -148,8 +148,8 @@ static int bells_late_probe(struct snd_soc_card *card) { struct bells_drvdata *bells = card->drvdata; struct snd_soc_pcm_runtime *rtd; - struct snd_soc_codec *wm0010; - struct snd_soc_codec *codec; + struct snd_soc_component *wm0010; + struct snd_soc_component *component; struct snd_soc_dai *aif1_dai; struct snd_soc_dai *aif2_dai; struct snd_soc_dai *aif3_dai; @@ -157,22 +157,22 @@ static int bells_late_probe(struct snd_soc_card *card) int ret; rtd = snd_soc_get_pcm_runtime(card, card->dai_link[DAI_AP_DSP].name); - wm0010 = rtd->codec; + wm0010 = rtd->codec_dai->component; rtd = snd_soc_get_pcm_runtime(card, card->dai_link[DAI_DSP_CODEC].name); - codec = rtd->codec; + component = rtd->codec_dai->component; aif1_dai = rtd->codec_dai; - ret = snd_soc_codec_set_sysclk(codec, ARIZONA_CLK_SYSCLK, + ret = snd_soc_component_set_sysclk(component, ARIZONA_CLK_SYSCLK, ARIZONA_CLK_SRC_FLL1, bells->sysclk_rate, SND_SOC_CLOCK_IN); if (ret != 0) { - dev_err(codec->dev, "Failed to set SYSCLK: %d\n", ret); + dev_err(component->dev, "Failed to set SYSCLK: %d\n", ret); return ret; } - ret = snd_soc_codec_set_sysclk(wm0010, 0, 0, SYS_MCLK_RATE, 0); + ret = snd_soc_component_set_sysclk(wm0010, 0, 0, SYS_MCLK_RATE, 0); if (ret != 0) { dev_err(wm0010->dev, "Failed to set WM0010 clock: %d\n", ret); return ret; @@ -182,20 +182,20 @@ static int bells_late_probe(struct snd_soc_card *card) if (ret != 0) dev_err(aif1_dai->dev, "Failed to set AIF1 clock: %d\n", ret); - ret = snd_soc_codec_set_sysclk(codec, ARIZONA_CLK_OPCLK, 0, + ret = snd_soc_component_set_sysclk(component, ARIZONA_CLK_OPCLK, 0, SYS_MCLK_RATE, SND_SOC_CLOCK_OUT); if (ret != 0) - dev_err(codec->dev, "Failed to set OPCLK: %d\n", ret); + dev_err(component->dev, "Failed to set OPCLK: %d\n", ret); if (card->num_rtd == DAI_CODEC_CP) return 0; - ret = snd_soc_codec_set_sysclk(codec, ARIZONA_CLK_ASYNCCLK, + ret = snd_soc_component_set_sysclk(component, ARIZONA_CLK_ASYNCCLK, ARIZONA_CLK_SRC_FLL2, bells->asyncclk_rate, SND_SOC_CLOCK_IN); if (ret != 0) { - dev_err(codec->dev, "Failed to set ASYNCCLK: %d\n", ret); + dev_err(component->dev, "Failed to set ASYNCCLK: %d\n", ret); return ret; } @@ -221,7 +221,7 @@ static int bells_late_probe(struct snd_soc_card *card) return ret; } - ret = snd_soc_codec_set_sysclk(wm9081_dai->codec, WM9081_SYSCLK_MCLK, + ret = snd_soc_component_set_sysclk(wm9081_dai->component, WM9081_SYSCLK_MCLK, 0, SYS_MCLK_RATE, 0); if (ret != 0) { dev_err(wm9081_dai->dev, "Failed to set MCLK: %d\n", ret); -- cgit v1.2.3 From 25ab5abf5b141d7fd13eed506c7458aa04749c29 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Dec 2017 10:14:42 -0300 Subject: tools build feature: Check if pthread_barrier_t is available As 'perf bench futex wake-parallel" will use this, which is not available in older systems such as versions of the android NDK used in my container build tests (r12b and r15c at the moment). Cc: Adrian Hunter Cc: David Ahern Cc: Davidlohr Bueso Cc: James Yang Cc: Kim Phillips Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-1i7iv54in4wj08lwo55b0pzv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-pthread-barrier.c | 12 ++++++++++++ tools/perf/Makefile.config | 4 ++++ 5 files changed, 26 insertions(+) create mode 100644 tools/build/feature/test-pthread-barrier.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c71a05b9c984..e52fcefee379 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -56,6 +56,7 @@ FEATURE_TESTS_BASIC := \ libunwind-arm \ libunwind-aarch64 \ pthread-attr-setaffinity-np \ + pthread-barrier \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 96982640fbf8..cff38f342283 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -37,6 +37,7 @@ FILES= \ test-libunwind-debug-frame-arm.bin \ test-libunwind-debug-frame-aarch64.bin \ test-pthread-attr-setaffinity-np.bin \ + test-pthread-barrier.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ @@ -79,6 +80,9 @@ $(OUTPUT)test-hello.bin: $(OUTPUT)test-pthread-attr-setaffinity-np.bin: $(BUILD) -D_GNU_SOURCE -lpthread +$(OUTPUT)test-pthread-barrier.bin: + $(BUILD) -lpthread + $(OUTPUT)test-stackprotector-all.bin: $(BUILD) -fstack-protector-all diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 4112702e4aed..6fdf83263ab7 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -118,6 +118,10 @@ # include "test-pthread-attr-setaffinity-np.c" #undef main +#define main main_test_pthread_barrier +# include "test-pthread-barrier.c" +#undef main + #define main main_test_sched_getcpu # include "test-sched_getcpu.c" #undef main @@ -187,6 +191,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_pthread_barrier(); main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); diff --git a/tools/build/feature/test-pthread-barrier.c b/tools/build/feature/test-pthread-barrier.c new file mode 100644 index 000000000000..0558d9334d97 --- /dev/null +++ b/tools/build/feature/test-pthread-barrier.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int main(void) +{ + pthread_barrier_t barrier; + + pthread_barrier_init(&barrier, NULL, 1); + pthread_barrier_wait(&barrier); + return pthread_barrier_destroy(&barrier); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6786fa2419f..2c437baf8364 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -263,6 +263,10 @@ ifeq ($(feature-pthread-attr-setaffinity-np), 1) CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP endif +ifeq ($(feature-pthread-barrier), 1) + CFLAGS += -DHAVE_PTHREAD_BARRIER +endif + ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) -- cgit v1.2.3 From 8085e5ab41dadce558808b4186a6ea9d0862d3c0 Mon Sep 17 00:00:00 2001 From: James Yang Date: Sun, 26 Nov 2017 20:21:01 -0800 Subject: perf bench futex: Sync waker threads Waker threads in the futex wake-parallel benchmark are started by a loop using pthread_create(). However, there is no synchronization for when the waker threads wake the waiting threads. Comparison of the waker threads' measurement timestamps show they are not all running concurrently because older waker threads finish their task before newer waker threads even start. This patch uses a barrier to better synchronize the waker threads. Signed-off-by: James Yang Link: http://lkml.kernel.org/r/20171127042101.3659-4-dave@stgolabs.net Signed-off-by: Davidlohr Bueso [ Disable the wake-parallel test for systems without pthread_barrier_t ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-wake-parallel.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 4488c27e8a43..69d8fdc87315 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -7,7 +7,17 @@ * for each individual thread to service its share of work. Ultimately * it can be used to measure futex_wake() changes. */ +#include "bench.h" +#include +#include "../util/debug.h" +#ifndef HAVE_PTHREAD_BARRIER +int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) +{ + pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); + return 0; +} +#else /* HAVE_PTHREAD_BARRIER */ /* For the CLR_() macros */ #include #include @@ -15,11 +25,9 @@ #include #include "../util/stat.h" #include -#include #include #include #include -#include "bench.h" #include "futex.h" #include "cpumap.h" @@ -43,6 +51,7 @@ static bool done = false, silent = false, fshared = false; static unsigned int nblocked_threads = 0, nwaking_threads = 0; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; +static pthread_barrier_t barrier; static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; @@ -65,6 +74,8 @@ static void *waking_workerfn(void *arg) struct thread_data *waker = (struct thread_data *) arg; struct timeval start, end; + pthread_barrier_wait(&barrier); + gettimeofday(&start, NULL); waker->nwoken = futex_wake(&futex, nwakes, futex_flag); @@ -85,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); + /* create and block all threads */ for (i = 0; i < nwaking_threads; i++) { /* @@ -97,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) err(EXIT_FAILURE, "pthread_create"); } + pthread_barrier_wait(&barrier); + for (i = 0; i < nwaking_threads; i++) if (pthread_join(td[i].worker, NULL)) err(EXIT_FAILURE, "pthread_join"); + + pthread_barrier_destroy(&barrier); } static void *blocked_workerfn(void *arg __maybe_unused) @@ -303,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv) free(blocked_worker); return ret; } +#endif /* HAVE_PTHREAD_BARRIER */ -- cgit v1.2.3 From 36c263607d36c6a3788c09301d9f5fe35404048a Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 24 Nov 2017 10:46:37 +0100 Subject: perf annotate: Fix unnecessary memory allocation for s390x This patch fixes a bug introduced with commit d9f8dfa9baf9 ("perf annotate s390: Implement jump types for perf annotate"). 'perf annotate' displays annotated assembler output by reading output of command objdump and parsing the disassembled lines. For each shown mnemonic this function sequence is executed: disasm_line__new() | +--> disasm_line__init_ins() | +--> ins__find() | +--> arch->associate_instruction_ops() The s390x specific function assigned to function pointer associate_instruction_ops refers to function s390__associate_ins_ops(). This function checks for supported mnemonics and assigns a NULL pointer to unsupported mnemonics. However even the NULL pointer is added to the architecture dependend instruction array. This leads to an extremely large architecture instruction array (due to array resize logic in function arch__grow_instructions()). Depending on the objdump output being parsed the array can end up with several ten-thousand elements. This patch checks if a mnemonic is supported and only adds supported ones into the architecture instruction array. The array does not contain elements with NULL pointers anymore. Before the patch (With some debug printf output): [root@s35lp76 perf]# time ./perf annotate --stdio > /tmp/xxxbb real 8m49.679s user 7m13.008s sys 0m1.649s [root@s35lp76 perf]# fgrep '__ins__find sorted:1 nr_instructions:' /tmp/xxxbb | tail -1 __ins__find sorted:1 nr_instructions:87433 ins:0x341583c0 [root@s35lp76 perf]# The number of different s390x branch/jump/call/return instructions entered into the array is 87433. After the patch (With some printf debug output:) [root@s35lp76 perf]# time ./perf annotate --stdio > /tmp/xxxaa real 1m24.553s user 0m0.587s sys 0m1.530s [root@s35lp76 perf]# fgrep '__ins__find sorted:1 nr_instructions:' /tmp/xxxaa | tail -1 __ins__find sorted:1 nr_instructions:56 ins:0x3f406570 [root@s35lp76 perf]# The number of different s390x branch/jump/call/return instructions entered into the array is 56 which is sensible. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Acked-by: Ravi Bangoria Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20171124094637.55558-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/annotate/instructions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index e0e466c650df..8c72b44444cb 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -18,7 +18,8 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na if (!strcmp(name, "br")) ops = &ret_ops; - arch__associate_ins_ops(arch, name, ops); + if (ops) + arch__associate_ins_ops(arch, name, ops); return ops; } -- cgit v1.2.3 From 35a8a148d8c1ee9e5ae18f9565a880490f816f89 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 28 Nov 2017 08:56:32 +0100 Subject: perf annotate: Fix objdump comment parsing for Intel mov dissassembly The command 'perf annotate' parses the output of objdump and also investigates the comments produced by objdump. For example the output of objdump produces (on x86): 23eee: 4c 8b 3d 13 01 21 00 mov 0x210113(%rip),%r15 # 234008 and the function mov__parse() is called to investigate the complete line. Mov__parse() breaks this line into several parts and finally calls function comment__symbol() to parse the data after the comment character '#'. Comment__symbol() expects a hexadecimal address followed by a symbol in '<' and '>' brackets. However the 2nd parameter given to function comment__symbol() always points to the comment character '#'. The address parsing always returns 0 because the character '#' is not a digit and strtoull() fails without being noticed. Fix this by advancing the second parameter to function comment__symbol() by one byte before invocation and add an error check after strtoull() has been called. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Acked-by: Ravi Bangoria Cc: Heiko Carstens Cc: Martin Schwidefsky Fixes: 6de783b6f50f ("perf annotate: Resolve symbols using objdump comment") Link: http://lkml.kernel.org/r/20171128075632.72182-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 22ea7936d92f..facad1e279a8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -322,6 +322,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; name = strchr(endptr, '<'); if (name == NULL) return -1; @@ -435,8 +437,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m return 0; comment = ltrim(comment); - comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -480,7 +482,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops return 0; comment = ltrim(comment); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; } -- cgit v1.2.3 From 33fec3e393dc1c55737cfb9c876b5c0da0d6f380 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 1 Dec 2017 18:57:25 +0800 Subject: perf rblist: Create rblist__exit() function Currently we have a rblist__delete() which is used to delete a rblist. While rblist__delete() will free the pointer of rblist at the end. It's an inconvenience for the user to delete a rblist which is not allocated by something like malloc(). For example, the rblist is embedded in a larger data structure. This patch creates a new function rblist__exit() which is similar to rblist__delete() but it will not free the pointer of rblist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512125856-22056-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/rblist.c | 19 ++++++++++++------- tools/perf/util/rblist.h | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist) return; } +void rblist__exit(struct rblist *rblist) +{ + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rblist__remove_node(rblist, pos); + } +} + void rblist__delete(struct rblist *rblist) { if (rblist != NULL) { - struct rb_node *pos, *next = rb_first(&rblist->entries); - - while (next) { - pos = next; - next = rb_next(pos); - rblist__remove_node(rblist, pos); - } + rblist__exit(rblist); free(rblist); } } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 4c8638a22571..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -29,6 +29,7 @@ struct rblist { }; void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist); void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); -- cgit v1.2.3 From b984aff7811bbac75b3f05931643d815067cf45c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 1 Dec 2017 18:57:28 +0800 Subject: perf stat: Add rbtree node_delete op In current stat-shadow.c, the rbtree deleting is ignored. The patch adds the implementation to node_delete method of rblist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512125856-22056-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..57ec22513971 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -87,6 +87,16 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, return &nd->rb_node; } +static void saved_value_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct saved_value *v; + + BUG_ON(!rb_node); + v = container_of(rb_node, struct saved_value, rb_node); + free(v); +} + static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, bool create) @@ -114,7 +124,7 @@ void perf_stat__init_shadow_stats(void) rblist__init(&runtime_saved_values); runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; - /* No delete for now */ + runtime_saved_values.node_delete = saved_value_delete; } static int evsel_context(struct perf_evsel *evsel) -- cgit v1.2.3 From 8d3cd4c3d3ab5f4f9edd5c593b7743f7fbd3526d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 1 Dec 2017 11:44:30 -0300 Subject: perf thread_map: Add method to map all threads in the system Reusing the thread_map__new_by_uid() proc scanning already in place to return a map with all threads in the system. Based-on-a-patch-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-khh28q0wwqbqtrk32bfe07hd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.c | 22 ++++++++++++++++------ tools/perf/util/thread_map.h | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..2b653853eec2 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid) { DIR *proc; int max_threads = 32, items, i; @@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; - struct stat st; pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ @@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - if (stat(path, &st) != 0) - continue; + if (uid != UINT_MAX) { + struct stat st; - if (st.st_uid != uid) - continue; + if (stat(path, &st) != 0 || st.st_uid != uid) + continue; + } snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); @@ -178,6 +178,16 @@ out_free_closedir: goto out_closedir; } +struct thread_map *thread_map__new_all_cpus(void) +{ + return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + return __thread_map__new_all_cpus(uid); +} + struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index f15803985435..07a765fb22bb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); struct thread_map *thread_map__new_event(struct thread_map_event *event); -- cgit v1.2.3 From 1dc4ddf112a408e607a073d951b962b6c6e2bd6c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 30 Nov 2017 09:49:25 +0100 Subject: perf s390: Always build with -fPIC On s390, object files must be compiled with position-indepedent code in order to be incrementally linked or linked to shared libraries. Therefore, add -fPIC to the CFLAGS for s390 to ensure each object file is built properly. Reported-by: Jonathan Hermann Signed-off-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Thomas Richter Cc: linux s390 list LPU-Reference: 1512031765-9382-1-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-a8wga8hrl0d0r84cal96fmgv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2c437baf8364..bf86c09ca889 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -41,6 +41,7 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 + CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) -- cgit v1.2.3 From 54e32dc0f89ec7fcb87df6c45b096e57f050f22b Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:18 +0530 Subject: perf pmu: Pass pmu as a parameter to get_cpuid_str() The cpuid string will not be same on all CPUs on heterogeneous platforms like ARM's big.LITTLE, adding provision(using pmu->cpus) to find cpuid string from associated CPUs of PMU CORE device. Also optimise arguments to function pmu_add_cpu_aliases. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Jayachandran C Cc: Jonathan Cameron Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Link: http://lkml.kernel.org/r/20171016183222.25750-2-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 2 +- tools/perf/arch/x86/util/header.c | 2 +- tools/perf/util/header.h | 3 ++- tools/perf/util/metricgroup.c | 4 ++-- tools/perf/util/pmu.c | 22 +++++++++++----------- tools/perf/util/pmu.h | 2 +- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 7a4cf80c207a..0b242664f5ea 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -35,7 +35,7 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(void) +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *bufp; diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 33027c5e6f92..b626d2bad9f1 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -66,7 +66,7 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(void) +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 91befc3b550d..317fb901e47f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,6 +9,7 @@ #include #include "event.h" #include "env.h" +#include "pmu.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -171,5 +172,5 @@ int write_padded(struct feat_fd *fd, const void *bf, */ int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6fd709017bbc..e48410c99b39 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -274,7 +274,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int i; struct rblist groups; @@ -372,7 +372,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int ret = -EINVAL; int i, j; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 80fb1593913a..4e7dd3a0f123 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -542,12 +542,12 @@ static bool pmu_is_uncore(const char *name) * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { return NULL; } -static char *perf_pmu__getcpuid(void) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; static bool printed; @@ -556,7 +556,7 @@ static char *perf_pmu__getcpuid(void) if (cpuid) cpuid = strdup(cpuid); if (!cpuid) - cpuid = get_cpuid_str(); + cpuid = get_cpuid_str(pmu); if (!cpuid) return NULL; @@ -567,10 +567,10 @@ static char *perf_pmu__getcpuid(void) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(void) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { struct pmu_events_map *map; - char *cpuid = perf_pmu__getcpuid(); + char *cpuid = perf_pmu__getcpuid(pmu); int i; i = 0; @@ -593,13 +593,14 @@ struct pmu_events_map *perf_pmu__find_map(void) * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { int i; struct pmu_events_map *map; struct pmu_event *pe; + const char *name = pmu->name; - map = perf_pmu__find_map(); + map = perf_pmu__find_map(pmu); if (!map) return; @@ -661,21 +662,20 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; - pmu_add_cpu_aliases(&aliases, name); pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; pmu->cpus = pmu_cpumask(name); - + pmu->name = strdup(name); + pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + pmu_add_cpu_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - pmu->name = strdup(name); - pmu->type = type; list_add_tail(&pmu->list, &pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27c75e635866..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -92,6 +92,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -struct pmu_events_map *perf_pmu__find_map(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); #endif /* __PMU_H */ -- cgit v1.2.3 From b57df28893543db3466172088786fae39b7fc3ad Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:19 +0530 Subject: perf tools arm64: Add support for get_cpuid_str function. The get_cpuid_str function returns the MIDR string of the first online cpu from the range of cpus associated with the PMU CORE device. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-3-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/header.c | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tools/perf/arch/arm64/util/header.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index cef6fb38d17e..b1ab72d2a42e 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,3 +1,4 @@ +libperf-y += header.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c new file mode 100644 index 000000000000..534cd2507d83 --- /dev/null +++ b/tools/perf/arch/arm64/util/header.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include "header.h" + +#define MIDR "/regs/identification/midr_el1" +#define MIDR_SIZE 19 +#define MIDR_REVISION_MASK 0xf +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) + +char *get_cpuid_str(struct perf_pmu *pmu) +{ + char *buf = NULL; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + int cpu; + u64 midr = 0; + struct cpu_map *cpus; + FILE *file; + + if (!sysfs || !pmu || !pmu->cpus) + return NULL; + + buf = malloc(MIDR_SIZE); + if (!buf) + return NULL; + + /* read midr from list of cpus mapped to this pmu */ + cpus = cpu_map__get(pmu->cpus); + for (cpu = 0; cpu < cpus->nr; cpu++) { + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, + sysfs, cpus->map[cpu]); + + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + continue; + } + + if (!fgets(buf, MIDR_SIZE, file)) { + fclose(file); + continue; + } + fclose(file); + + /* Ignore/clear Variant[23:20] and + * Revision[3:0] of MIDR + */ + midr = strtoul(buf, NULL, 16); + midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK)); + scnprintf(buf, MIDR_SIZE, "0x%016lx", midr); + /* got midr break loop */ + break; + } + + if (!midr) { + pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + free(buf); + buf = NULL; + } + + cpu_map__put(cpus); + return buf; +} -- cgit v1.2.3 From ca0168e8a77cf833f8c9ac1d26a3a4012bab4f72 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Dec 2017 09:32:25 -0500 Subject: alloc_super(): do ->s_umount initialization earlier ... so that failure exits could count on it having been done. Signed-off-by: Al Viro --- fs/super.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/super.c b/fs/super.c index d4e33e8f1e6f..7ff1349609e4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, INIT_LIST_HEAD(&s->s_mounts); s->s_user_ns = get_user_ns(user_ns); + init_rwsem(&s->s_umount); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); if (security_sb_alloc(s)) goto fail; @@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, goto fail; if (list_lru_init_memcg(&s->s_inode_lru)) goto fail; - - init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); - /* - * sget() can have s_umount recursion. - * - * When it cannot find a suitable sb, it allocates a new - * one (this one), and tries again to find a suitable old - * one. - * - * In case that succeeds, it will acquire the s_umount - * lock of the old one. Since these are clearly distrinct - * locks, and this object isn't exposed yet, there's no - * risk of deadlocks. - * - * Annotate this by putting this lock in a different - * subclass. - */ - down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); s->s_count = 1; atomic_set(&s->s_active, 1); mutex_init(&s->s_vfs_rename_mutex); -- cgit v1.2.3 From be17f1ce8572d6e15559897421fb7041360bb64a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 30 Nov 2017 15:49:10 +0100 Subject: mmc: core: properly init drv_type When the latest version of parsing the new eMMC bindings was moved from core.c to mmc.c, it was overlooked that drv_type could be used uninitialized. Fix it! Fixes: 6186d06c519e21 ("mmc: parse new binding for eMMC fixed driver type") Reported-by: Colin Ian King Reported-by: Dan Carpenter Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index d209fb466979..208a762b87ef 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1290,7 +1290,7 @@ out_err: static void mmc_select_driver_type(struct mmc_card *card) { - int card_drv_type, drive_strength, drv_type; + int card_drv_type, drive_strength, drv_type = 0; int fixed_drv_type = card->host->fixed_drv_type; card_drv_type = card->ext_csd.raw_driver_strength | -- cgit v1.2.3 From 14b22ae028de56cca980171db625d1e9925c8fba Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Thu, 24 Aug 2017 16:30:58 +0530 Subject: perf pmu: Add helper function is_pmu_core to detect PMU CORE devices On some platforms, PMU core devices sysfs name is not cpu. Adding function is_pmu_core to detect PMU core devices using core device specific hints in sysfs. For arm64 platforms, all core devices have file "cpus" in sysfs. Signed-off-by: Ganapatrao Kulkarni Tested-by: Shaokun Zhang Tested-by: Jin Yao Acked-by: Will Deacon Link: https://lkml.kernel.org/n/tip-y1woxt1k2pqqwpprhonnft2s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 4e7dd3a0f123..732ff579ec65 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -536,6 +536,34 @@ static bool pmu_is_uncore(const char *name) return !!cpus; } +/* + * PMU CORE devices have different name other than cpu in sysfs on some + * platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + /* Look for cpu sysfs (x86 and others) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); + if ((stat(path, &st) == 0) && + (strncmp(name, "cpu", strlen("cpu")) == 0)) + return 1; + + /* Look for cpu sysfs (specific to arm) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", + sysfs, name); + if (stat(path, &st) == 0) + return 1; + + return 0; +} + /* * Return the CPU id as a raw string. * @@ -609,7 +637,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) */ i = 0; while (1) { - const char *pname; pe = &map->table[i++]; if (!pe->name) { @@ -618,9 +645,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) - continue; + if (!is_pmu_core(name)) { + /* check for uncore devices */ + if (pe->pmu == NULL) + continue; + if (strncmp(pe->pmu, name, strlen(pe->pmu))) + continue; + } /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, -- cgit v1.2.3 From d3964221ea14690fe51cb57331b88b5c69e4d2cb Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:21 +0530 Subject: perf vendor events arm64: Add ThunderX2 implementation defined pmu core events This is not a full event list, but a short list of useful events. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-5-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/cavium/thunderx2-imp-def.json | 62 ++++++++++++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 15 ++++++ 2 files changed, 77 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json new file mode 100644 index 000000000000..2db45c40ebc7 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json @@ -0,0 +1,62 @@ +[ + { + "PublicDescription": "Attributable Level 1 data cache access, read", + "EventCode": "0x40", + "EventName": "l1d_cache_rd", + "BriefDescription": "L1D cache read", + }, + { + "PublicDescription": "Attributable Level 1 data cache access, write ", + "EventCode": "0x41", + "EventName": "l1d_cache_wr", + "BriefDescription": "L1D cache write", + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, read", + "EventCode": "0x42", + "EventName": "l1d_cache_refill_rd", + "BriefDescription": "L1D cache refill read", + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, write", + "EventCode": "0x43", + "EventName": "l1d_cache_refill_wr", + "BriefDescription": "L1D refill write", + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, read", + "EventCode": "0x4C", + "EventName": "l1d_tlb_refill_rd", + "BriefDescription": "L1D tlb refill read", + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, write", + "EventCode": "0x4D", + "EventName": "l1d_tlb_refill_wr", + "BriefDescription": "L1D tlb refill write", + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, read", + "EventCode": "0x4E", + "EventName": "l1d_tlb_rd", + "BriefDescription": "L1D tlb read", + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, write", + "EventCode": "0x4F", + "EventName": "l1d_tlb_wr", + "BriefDescription": "L1D tlb write", + }, + { + "PublicDescription": "Bus access read", + "EventCode": "0x60", + "EventName": "bus_access_rd", + "BriefDescription": "Bus access read", + }, + { + "PublicDescription": "Bus access write", + "EventCode": "0x61", + "EventName": "bus_access_wr", + "BriefDescription": "Bus access write", + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv new file mode 100644 index 000000000000..219d6756134e --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -0,0 +1,15 @@ +# Format: +# MIDR,Version,JSON/file/pathname,Type +# +# where +# MIDR Processor version +# Variant[23:20] and Revision [3:0] should be zero. +# Version could be used to track version of of JSON file +# but currently unused. +# JSON/file/pathname is the path to JSON file, relative +# to tools/perf/pmu-events/arch/arm64/. +# Type is core, uncore etc +# +# +#Family-model,Version,Filename,EventType +0x00000000420f5160,v1,cavium,core -- cgit v1.2.3 From de3d0f12be476271d03f1ddb5a7c241c2f07f126 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:22 +0530 Subject: perf pmu: Add check for valid cpuid in perf_pmu__find_map() On some platforms(arm/arm64) which uses cpus map to get corresponding cpuid string, cpuid can be NULL for PMUs other than CORE PMUs. Adding check for NULL cpuid in function perf_pmu__find_map to avoid segmentation fault. Signed-off-by: Ganapatrao Kulkarni Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-6-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 732ff579ec65..8b7c151579c0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -601,6 +601,12 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) char *cpuid = perf_pmu__getcpuid(pmu); int i; + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + i = 0; for (;;) { map = &pmu_events_map[i++]; -- cgit v1.2.3 From c6707fdef7e2c1eb5458988b49c33497affdebbf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Dec 2017 12:23:08 -0300 Subject: perf tools: Fix up build in hardnened environments On Fedora systems the perl and python CFLAGS/LDFLAGS include the hardened specs from redhat-rpm-config package. We apply them only for perl/python objects, which makes them not compatible with the rest of the objects and the build fails with: /usr/bin/ld: perf-in.o: relocation R_X86_64_32 against `.rodata.str1.1' can not be used when making a shared object; recompile with -f +PIC /usr/bin/ld: libperf.a(libperf-in.o): relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile w +ith -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status make[2]: *** [Makefile.perf:507: perf] Error 1 make[1]: *** [Makefile.perf:210: sub-make] Error 2 make: *** [Makefile:69: all] Error 2 Mainly it's caused by perl/python objects being compiled with: -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 which prevent the final link impossible, because it will check for 'proper' objects with following option: -specs=/usr/lib/rpm/redhat/redhat-hardened-ld Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20171204082437.GC30564@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index bf86c09ca889..808066c823f7 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -185,9 +185,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC_NO_CLANG), 1) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -577,7 +575,6 @@ ifndef NO_GTK2 endif endif - ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else @@ -585,6 +582,8 @@ else PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3 From f74b9d3a1ac2b9c3ae1475f474ca0e6644746fbf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:37 +0000 Subject: perf evlist: Remove 'overwrite' parameter from perf_evlist__mmap Now all perf_evlist__mmap's users doesn't set 'overwrite'. Remove it from arguments list. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-2-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 3 +-- tools/perf/util/python.c | 2 +- 17 files changed, 18 insertions(+), 20 deletions(-) diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index b59678e8c1e2..06abe8108b33 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -84,7 +84,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(perf_evlist__open(evlist)); - CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false)); + CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); pc = evlist->mmap[0].base; ret = perf_read_tsc_conversion(pc, &tc); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 597c7de9bec9..98853162eae9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1044,7 +1044,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) { + if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { ui__error("Failed to mmap the events: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); perf_evlist__close(evlist); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0077724fb24f..540461f5e345 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -907,7 +907,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { + if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, str_error_r(errno, msg, sizeof(msg))); goto out_err; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 84debdbad327..7c57898095ea 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2437,7 +2437,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_apply_filters; - err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); if (err < 0) goto out_error_mmap; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 43a8c6ac4070..cf37e43c42f3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages, false); + err = perf_evlist__mmap(evlist, mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 34c22cdf4d5d..c433dd30975a 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -167,7 +167,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fcc8984bc329..3bf7b145b826 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -639,7 +639,7 @@ static int do_test_code_reading(bool try_kcore) break; } - ret = perf_evlist__mmap(evlist, UINT_MAX, false); + ret = perf_evlist__mmap(evlist, UINT_MAX); if (ret < 0) { pr_debug("perf_evlist__mmap failed\n"); goto out_put; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 842d33637a18..c46530918938 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -95,7 +95,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un goto out_err; } - CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false)); + CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); /* * First, test that a 'comm' event can be found when the event is diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 91f10d6d9ae2..c0e971da965c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128, false) < 0) { + if (perf_evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index d9619d265314..97c9407d02a0 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -64,7 +64,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, UINT_MAX, false); + err = perf_evlist__mmap(evlist, UINT_MAX); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index c34904d37705..0afafab85238 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -141,7 +141,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * fds in the same CPU to be injected in the same mmap ring buffer * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). */ - err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index c6937ed12e6b..f6c72f915d48 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128, false); + err = perf_evlist__mmap(evlist, 128); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 7d3f4bf9534f..33e00295a972 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -449,7 +449,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out; } - err = perf_evlist__mmap(evlist, UINT_MAX, false); + err = perf_evlist__mmap(evlist, UINT_MAX); if (err) { pr_debug("perf_evlist__mmap failed!\n"); goto out_err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 5d06ac81f7f1..01b62b81751b 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -101,7 +101,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128, false) < 0) { + if (perf_evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 199bb82efbcd..3c1778b500e0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1091,10 +1091,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); + return perf_evlist__mmap_ex(evlist, pages, false, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4e8131dacbd7..f0f2c8b2504b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -171,8 +171,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, bool overwrite, unsigned int auxtrace_pages, bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); size_t perf_evlist__mmap_size(unsigned long pages); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8e49d9cafcfc..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } -- cgit v1.2.3 From 7a276ff6c3202697c3c15cad757dec3bb07d14bf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:38 +0000 Subject: perf evlist: Remove 'overwrite' parameter from perf_evlist__mmap_ex All users of perf_evlist__mmap_ex set !overwrite. Remove it from its arguments list. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/evlist.c | 8 ++++---- tools/perf/util/evlist.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e304bc47fe9b..08070f87d489 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -301,7 +301,7 @@ static int record__mmap_evlist(struct record *rec, struct record_opts *opts = &rec->opts; char msg[512]; - if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode) < 0) { if (errno == EPERM) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3c1778b500e0..93272d932407 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1052,14 +1052,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; struct mmap_params mp = { - .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), + .prot = PROT_READ | PROT_WRITE, }; if (!evlist->mmap) @@ -1070,7 +1070,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = overwrite; + evlist->overwrite = false; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1093,7 +1093,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, false, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f0f2c8b2504b..424a3d6015af 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,7 +169,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); -- cgit v1.2.3 From 144b9a4fc53039c09007b71a06640560a6e62140 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:39 +0000 Subject: perf evlist: Remove evlist->overwrite evlist->overwrite is set to false in all users. It can be removed. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 08070f87d489..3bc6ceeae1f9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -500,7 +500,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], false, backward, rec, record__pushfn) != 0) { rc = -1; goto out; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 93272d932407..a59134fb141f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, evlist->overwrite); + return perf_mmap__read_forward(md, false); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { - perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); + perf_mmap__consume(&evlist->mmap[idx], false); } static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -1070,7 +1070,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = false; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 424a3d6015af..eec33770b8e6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -31,7 +31,6 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_mmaps; - bool overwrite; bool enabled; bool has_user_cpus; size_t mmap_len; -- cgit v1.2.3 From ca6a9a05391960be5e8161a59a9854b32325d901 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:40 +0000 Subject: perf mmap: Remove overwrite from arguments list of perf_mmap__push 'overwrite' argument is always 'false'. Remove it from arguments list of perf_mmap__push(). Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-5-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/mmap.c | 6 +++--- tools/perf/util/mmap.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3bc6ceeae1f9..26b8571d0fdb 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -500,7 +500,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], false, backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) { rc = -1; goto out; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9fe5f9c7d577..703ed41a9269 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -299,7 +299,7 @@ static int rb_find_range(void *data, int mask, u64 head, u64 old, return backward_rb_find_range(data, mask, head, start, end); } -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -321,7 +321,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, backward); return 0; } @@ -346,7 +346,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, } md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, backward); out: return rc; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index efd78b827b05..2c3d291785de 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -89,7 +89,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); -- cgit v1.2.3 From 8eb7a1fe31612fd3e8ae8042dd2ebaf7575504cb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:41 +0000 Subject: perf mmap: Remove overwrite and check_messup from mmap read All perf_mmap__read_forward() read from read-write ring buffer, so no need check_messup. Reading from backward ring buffer doesn't require check_messup because it never mess up. Cleanup arguments lists. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-6-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 28 ++++------------------------ tools/perf/util/mmap.h | 2 +- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a59134fb141f..68c1f9546650 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, false); + return perf_mmap__read_forward(md); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 703ed41a9269..3f262e707a41 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, +static union perf_event *perf_mmap__read(struct perf_mmap *map, u64 start, u64 end, u64 *prev) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; int diff = end - start; - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > map->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - if (diff >= (int)sizeof(event->header)) { size_t size; @@ -89,7 +69,7 @@ broken_event: return event; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; u64 old = map->prev; @@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess head = perf_mmap__read_head(map); - return perf_mmap__read(map, check_messup, old, head, &map->prev); + return perf_mmap__read(map, old, head, &map->prev); } union perf_event *perf_mmap__read_backward(struct perf_mmap *map) @@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) else end = head + map->mask + 1; - return perf_mmap__read(map, false, start, end, &map->prev); + return perf_mmap__read(map, start, end, &map->prev); } void perf_mmap__read_catchup(struct perf_mmap *map) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 2c3d291785de..d640273b7762 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -86,7 +86,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) pc->data_tail = tail; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, bool backward, -- cgit v1.2.3 From 0125195268a0f9886b582c7e73da98f89029796f Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Sat, 2 Dec 2017 13:16:41 +0900 Subject: perf c2c: Add a tip about cacheline events Signed-off-by: Sangwon Hong Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1512188201-14109-1-git-send-email-qpakzk@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/tips.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 3dd1dbe28407..849599f39c5e 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -33,3 +33,4 @@ System-wide collection from all CPUs: perf record -a Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` +To report cacheline events from previous recording: perf c2c report -- cgit v1.2.3 From fbc2844e84038ce3687d203ac80b66194e9f21e6 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Mon, 4 Dec 2017 09:57:28 -0500 Subject: perf vendor events: Use more flexible pattern matching for CPU identification for mapfile.csv The powerpc cpuid information includes chip revision information. Changes between chip revisions are usually minor bug fixes and usually do not affect the operation of the performance monitoring hardware. The original mapfile.csv matching requires enumerating every possible cpuid string. When a new minor chip revision is produced a new entry has to be added to the mapfile.csv and the code recompiled to allow perf to have the implementation specific perf events for this new minor revision. For users of various distibutions of Linux having to wait for a new release of the kernel's perf tool to be built with these trivial patches is inconvenient. Using regular expressions rather than exactly string matching of the entire cpuid string allows developers to write mapfile.csv files that do not require patches and recompiles for each of these minor version changes. If special cases need to be made for some particular versions, they can be placed earlier in the mapfile.csv file before the more general matches. Signed-off-by: William Cohen Tested-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shriya Link: http://lkml.kernel.org/r/20171204145728.16792-1-wcohen@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/mapfile.csv | 12 ++------ tools/perf/pmu-events/arch/x86/mapfile.csv | 5 +--- tools/perf/pmu-events/jevents.c | 39 +++++++++++++++++++++++++- tools/perf/util/pmu.c | 20 ++++++++++++- 4 files changed, 60 insertions(+), 16 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index a0f3a11ca19f..229150e7ab7d 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -13,13 +13,5 @@ # # Power8 entries -004b0000,1,power8,core -004b0201,1,power8,core -004c0000,1,power8,core -004d0000,1,power8,core -004d0100,1,power8,core -004d0200,1,power8,core -004c0100,1,power8,core -004e0100,1,power9,core -004e0200,1,power9,core -004e1200,1,power9,core +004[bcd][[:xdigit:]]{4},1,power8,core +004e[[:xdigit:]]{4},1,power9,core diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index fe1a2c47cabf..93656f2fd53a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -23,10 +23,7 @@ GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core GenuineIntel-6-2E,v2,nehalemex,core -GenuineIntel-6-4E,v24,skylake,core -GenuineIntel-6-5E,v24,skylake,core -GenuineIntel-6-8E,v24,skylake,core -GenuineIntel-6-9E,v24,skylake,core +GenuineIntel-6-[4589]E,v24,skylake,core GenuineIntel-6-37,v13,silvermont,core GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9eb7047bafe4..b578aa26e375 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -116,6 +116,43 @@ static void fixdesc(char *s) *e = 0; } +/* Add escapes for '\' so they are proper C strings. */ +static char *fixregex(char *s) +{ + int len = 0; + int esc_count = 0; + char *fixed = NULL; + char *p, *q; + + /* Count the number of '\' in string */ + for (p = s; *p; p++) { + ++len; + if (*p == '\\') + ++esc_count; + } + + if (esc_count == 0) + return s; + + /* allocate space for a new string */ + fixed = (char *) malloc(len + 1); + if (!fixed) + return NULL; + + /* copy over the characters */ + q = fixed; + for (p = s; *p; p++) { + if (*p == '\\') { + *q = '\\'; + ++q; + } + *q = *p; + ++q; + } + *q = '\0'; + return fixed; +} + static struct msrmap { const char *num; const char *pname; @@ -648,7 +685,7 @@ static int process_mapfile(FILE *outfp, char *fpath) } line[strlen(line)-1] = '\0'; - cpuid = strtok_r(p, ",", &save); + cpuid = fixregex(strtok_r(p, ",", &save)); version = strtok_r(NULL, ",", &save); fname = strtok_r(NULL, ",", &save); type = strtok_r(NULL, ",", &save); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b7c151579c0..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" #include "pmu.h" #include "parse-events.h" @@ -609,14 +610,31 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) i = 0; for (;;) { + regex_t re; + regmatch_t pmatch[1]; + int match; + map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (!strcmp(map->cpuid, cpuid)) + if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", map->cpuid); break; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + break; + } } free(cpuid); return map; -- cgit v1.2.3 From c343bade301dfe608e86b034cbabed3c0d5a50f5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Dec 2017 13:08:47 -0300 Subject: x86/asm: Allow again using asm.h when building for the 'bpf' clang target Up to f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") we were able to use x86 headers to build to the 'bpf' clang target, as done by the BPF code in tools/perf/. With that commit, we ended up with following failure for 'perf test LLVM', this is because "clang ... -target bpf ..." fails since 4.0 does not have bpf inline asm support and 6.0 does not recognize the register 'esp', fix it by guarding that part with an #ifndef __BPF__, that is defined by clang when building to the "bpf" target. # perf test -v LLVM 37: LLVM search and compile : 37.1: Basic BPF llvm compile : --- start --- test child forked, pid 25526 Kernel build dir is set to /lib/modules/4.14.0+/build set env: KBUILD_DIR=/lib/modules/4.14.0+/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40e00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.14.0+/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-example.c * Test basic LLVM building */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define BPF_ANY 0 #define BPF_MAP_TYPE_ARRAY 2 #define BPF_FUNC_map_lookup_elem 1 #define BPF_FUNC_map_update_elem 2 static void *(*bpf_map_lookup_elem)(void *map, void *key) = (void *) BPF_FUNC_map_lookup_elem; static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = (void *) BPF_FUNC_map_update_elem; struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def SEC("maps") flip_table = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=SyS_epoll_wait") int bpf_func__SyS_epoll_wait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); int new_flag; if (!flag) return 0; /* flip flag and store back */ new_flag = !*flag; bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY); return new_flag; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; ' | $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o - test child finished with 0 ---- end ---- LLVM search and compile subtest 0: Ok 37.2: kbuild searching : --- start --- test child forked, pid 25950 Kernel build dir is set to /lib/modules/4.14.0+/build set env: KBUILD_DIR=/lib/modules/4.14.0+/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40e00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.14.0+/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-test-kbuild.c * Test include from kernel header */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define SEC(NAME) __attribute__((section(NAME), used)) #include #include SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) { return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; ' | $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o - In file included from :12: In file included from /home/acme/git/linux/arch/x86/include/uapi/asm/ptrace.h:5: In file included from /home/acme/git/linux/include/linux/compiler.h:242: In file included from /home/acme/git/linux/arch/x86/include/asm/barrier.h:5: In file included from /home/acme/git/linux/arch/x86/include/asm/alternative.h:10: /home/acme/git/linux/arch/x86/include/asm/asm.h:145:50: error: unknown register name 'esp' in asm register unsigned long current_stack_pointer asm(_ASM_SP); ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:44:18: note: expanded from macro '_ASM_SP' #define _ASM_SP __ASM_REG(sp) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:27:32: note: expanded from macro '__ASM_REG' #define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:18:29: note: expanded from macro '__ASM_SEL_RAW' # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:11:32: note: expanded from macro '__ASM_FORM_RAW' # define __ASM_FORM_RAW(x) #x ^ :4:1: note: expanded from here "esp" ^ 1 error generated. ERROR: unable to compile - Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c - with proper -I and -D options. Failed to compile test case: 'kbuild searching' test child finished with -1 ---- end ---- LLVM search and compile subtest 1: FAILED! Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Cc: Yonghong Song Link: https://lkml.kernel.org/r/20171128175948.GL3298@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/asm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..386a6900e206 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,6 +136,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -145,5 +146,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ -- cgit v1.2.3 From 712d36db5a5c57eb79e962a0f9b85964640e8415 Mon Sep 17 00:00:00 2001 From: Seokho Song <0xdevssh@gmail.com> Date: Tue, 5 Dec 2017 01:02:44 +0900 Subject: perf report: Set browser mode right before setup_browser() There are codes that print messages to the screen between assignment of the use_browser variable and setup_browser(). But since the GUI browser is not initialized during that period, all messages fail to show if the user passed the --gtk option to perf as GTK is not initialized yet. Reorder the code to assign use_browser variable right before setup_browser() is called. Signed-off-by: Seokho Song <0xdevssh@gmail.com> Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20171204160244.6332-1-0xdevssh@gmail.com Signed-off-by: Park Ju Hyung Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index af5dd038195e..eb9ce6327e71 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -921,13 +921,6 @@ int cmd_report(int argc, const char **argv) return -EINVAL; } - if (report.use_stdio) - use_browser = 0; - else if (report.use_tui) - use_browser = 1; - else if (report.use_gtk) - use_browser = 2; - if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; if (symbol_conf.cumulate_callchain && !callchain_param.order_set) @@ -1014,6 +1007,13 @@ repeat: perf_hpp_list.need_collapse = true; } + if (report.use_stdio) + use_browser = 0; + else if (report.use_tui) + use_browser = 1; + else if (report.use_gtk) + use_browser = 2; + /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; -- cgit v1.2.3 From 71f566a34986f4a86a8c546c7a36f70f0132b8a9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:05 +0000 Subject: perf mmap: Fix perf backward recording 'perf record' backward recording doesn't work as we expected: it never overwrites when ring buffer gets full. Test: Run a busy python printing task background like this: while True: print 123 send SIGUSR2 to perf to capture snapshot, then: # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101520743 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101521251 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101521692 ] ^C[ perf record: Woken up 1 times to write data ] [ perf record: Dump perf.data.2017110101521936 ] [ perf record: Captured and wrote 0.826 MB perf.data. ] # ./perf script -i ./perf.data.2017110101520743 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101521251 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101521692 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 Timestamps never change, but my background task is a dead loop, can easily overwhelm the ring buffer. This patch fixes it by forcing unsetting PROT_WRITE for a backward ring buffer, so all backward ring buffers become overwrite ring buffers. Test result: # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101285323 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101290053 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101290446 ] ^C[ perf record: Woken up 1 times to write data ] [ perf record: Dump perf.data.2017110101290837 ] [ perf record: Captured and wrote 0.826 MB perf.data. ] # ./perf script -i ./perf.data.2017110101285323 | head -n3 python 2545 [000] 11064.268083: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11064.268086: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101290 | head -n3 failed to open ./perf.data.2017110101290: No such file or directory # ./perf script -i ./perf.data.2017110101290053 | head -n3 python 2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11071.564064: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) # ./perf script -i ./perf.data.2017110101290 | head -n3 perf.data.2017110101290053 perf.data.2017110101290446 perf.data.2017110101290837 # ./perf script -i ./perf.data.2017110101290446 | head -n3 sshd 1321 [000] 11075.499473: raw_syscalls:sys_exit: NR 14 = 0 sshd 1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000) sshd 1321 [000] 11075.499474: raw_syscalls:sys_exit: NR 14 = 0 # ./perf script -i ./perf.data.2017110101290837 | head -n3 python 2545 [000] 11079.280844: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11079.280850: raw_syscalls:sys_exit: NR 1 = 4 Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-2-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 68c1f9546650..b1cea711232b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -812,6 +812,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, int fd; int cpu; + mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { output = _output_backward; maps = evlist->backward_mmap; @@ -824,6 +825,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + mp->prot &= ~PROT_WRITE; } if (evsel->system_wide && thread) @@ -1058,9 +1060,12 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - struct mmap_params mp = { - .prot = PROT_READ | PROT_WRITE, - }; + /* + * Delay setting mp.prot: set it before calling perf_mmap__mmap. + * Its value is decided by evsel's write_backward. + * So &mp should not be passed through const pointer. + */ + struct mmap_params mp; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist); -- cgit v1.2.3 From 7fb4b407a1242dbc85ea3ed1be065dca8f9a6f5b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:06 +0000 Subject: perf mmap: Don't discard prev in backward mode 'perf record' can switch its output data file. The new output should only store the data after switching. However, in overwrite backward mode, the new output still can have data from before switching. That also brings extra overhead. At the end of mmap_read(), the position of the processed ring buffer is saved in md->prev. Next mmap_read should be end in md->prev if it is not overwriten. That avoids processing duplicate data. However, md->prev is discarded. So next the mmap_read() has to process whole valid ring buffer, which probably includes old processed data. Avoid calling backward_rb_find_range() when md->prev is still available. Signed-off-by: Wang Nan Tested-by: Kan Liang Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 3f262e707a41..5f8cb1583e53 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -267,18 +267,6 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -static int rb_find_range(void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end, bool backward) -{ - if (!backward) { - *start = old; - *end = head; - return 0; - } - - return backward_rb_find_range(data, mask, head, start, end); -} - int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)) { @@ -290,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, void *buf; int rc = 0; - if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) - return -1; + start = backward ? head : old; + end = backward ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + if (!backward) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; - perf_mmap__consume(md, backward); - return 0; + md->prev = head; + perf_mmap__consume(md, backward); + return 0; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (backward_rb_find_range(data, md->mask, head, &start, &end)) + return -1; } if ((start & md->mask) + size != (end & md->mask)) { -- cgit v1.2.3 From 0b72d69a542873ee098867deeb37d27ad4629c64 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:07 +0000 Subject: perf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record Remove the backward/forward concept to make it uniform with user interface (the '--overwrite' option). Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 14 +++++++------- tools/perf/tests/backward-ring-buffer.c | 4 ++-- tools/perf/util/evlist.c | 30 +++++++++++++++--------------- tools/perf/util/evlist.h | 2 +- tools/perf/util/mmap.c | 22 +++++++++++----------- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 26b8571d0fdb..0a5749ef8b94 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -479,7 +479,7 @@ static struct perf_event_header finished_round_event = { }; static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, - bool backward) + bool overwrite) { u64 bytes_written = rec->bytes_written; int i; @@ -489,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (!evlist) return 0; - maps = backward ? evlist->backward_mmap : evlist->mmap; + maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; if (!maps) return 0; - if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) + if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) { rc = -1; goto out; } @@ -520,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (bytes_written != rec->bytes_written) rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); - if (backward) + if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; @@ -692,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist) if (evlist) { if (evlist->mmap && evlist->mmap[0].base) return evlist->mmap[0].base; - if (evlist->backward_mmap && evlist->backward_mmap[0].base) - return evlist->backward_mmap[0].base; + if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) + return evlist->overwrite_mmap[0].base; } return NULL; } diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index cf37e43c42f3..4035d43523c3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - perf_mmap__read_catchup(&evlist->backward_mmap[i]); - while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) { + perf_mmap__read_catchup(&evlist->overwrite_mmap[i]); + while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) { const u32 type = event->header.type; switch (type) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b1cea711232b..3570355bcf39 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); fdarray__exit(&evlist->pollfd); } @@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->backward_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].fd; int err; if (fd < 0) @@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); - if (evlist->backward_mmap) + if (evlist->overwrite_mmap) for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->backward_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i]); } void perf_evlist__munmap(struct perf_evlist *evlist) { perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); } static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_backward) + int thread, int *_output, int *_output_overwrite) { struct perf_evsel *evsel; int revent; @@ -814,14 +814,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { - output = _output_backward; - maps = evlist->backward_mmap; + output = _output_overwrite; + maps = evlist->overwrite_mmap; if (!maps) { maps = perf_evlist__alloc_mmap(evlist); if (!maps) return -1; - evlist->backward_mmap = maps; + evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } @@ -886,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_backward)) + thread, &output, &output_overwrite)) goto out_unmap; } } @@ -914,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_backward)) + &output, &output_overwrite)) goto out_unmap; } @@ -1753,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, RESUME, } action = NONE; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return; switch (old_state) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index eec33770b8e6..75160666d305 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -44,7 +44,7 @@ struct perf_evlist { } workload; struct fdarray pollfd; struct perf_mmap *mmap; - struct perf_mmap *backward_mmap; + struct perf_mmap *overwrite_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 5f8cb1583e53..05076e683938 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -234,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) { struct perf_event_header *pheader; u64 evt_head = head; int size = mask + 1; - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); pheader = (struct perf_event_header *)(buf + (head & mask)); *start = head; while (true) { if (evt_head - head >= (unsigned int)size) { - pr_debug("Finished reading backward ring buffer: rewind\n"); + pr_debug("Finished reading overwrite ring buffer: rewind\n"); if (evt_head - head > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; @@ -255,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); if (pheader->size == 0) { - pr_debug("Finished reading backward ring buffer: get start\n"); + pr_debug("Finished reading overwrite ring buffer: get start\n"); *end = evt_head; return 0; } @@ -267,7 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -int perf_mmap__push(struct perf_mmap *md, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool overwrite, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -278,19 +278,19 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, void *buf; int rc = 0; - start = backward ? head : old; - end = backward ? old : head; + start = overwrite ? head : old; + end = overwrite ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - if (!backward) { + if (!overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_mmap__consume(md, backward); + perf_mmap__consume(md, overwrite); return 0; } @@ -298,7 +298,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (backward_rb_find_range(data, md->mask, head, &start, &end)) + if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) return -1; } @@ -323,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, } md->prev = head; - perf_mmap__consume(md, backward); + perf_mmap__consume(md, overwrite); out: return rc; } -- cgit v1.2.3 From c2f31b79d510ec1a27138bdcf2d0ece1080be85e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Dec 2017 09:10:17 -0800 Subject: cgroup: add warning about RT not being supported on cgroup2 We haven't yet figured out what to do with RT threads on cgroup2. Document the limitation. v2: Included the warning about system management software behavior as suggested by Michael. Signed-off-by: Tejun Heo Reported-by: "Michael Kerrisk (man-pages)" --- Documentation/cgroup-v2.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 779211fbb69f..2cddab7efb20 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for normal scheduling policy and absolute bandwidth allocation model for realtime scheduling policy. +WARNING: cgroup2 doesn't yet support control of realtime processes and +the cpu controller can only be enabled when all RT processes are in +the root cgroup. Be aware that system management software may already +have placed RT processes into nonroot cgroups during the system boot +process, and these processes may need to be moved to the root cgroup +before the cpu controller can be enabled. + CPU Interface Files ~~~~~~~~~~~~~~~~~~~ -- cgit v1.2.3 From adf90eb49055636fc35aede54174456ac3520f27 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:04:22 -0800 Subject: drivers/infiniband: Remove now-redundant smp_read_barrier_depends() The smp_read_barrier_depends() does nothing at all except on DEC Alpha, and no current DEC Alpha systems use Infiniband: lkml.kernel.org/r/20171023085921.jwbntptn6ictbnvj@tower This commit therefore makes Infiniband depend on !ALPHA and removes the now-ineffective invocations of smp_read_barrier_depends() from the InfiniBand driver. Please note that this patch should not be construed as my saying that InfiniBand's memory ordering is correct, but rather that this patch does not in any way affect InfiniBand's correctness. In other words, the result of applying this patch is bug-for-bug compatible with the original. Signed-off-by: Paul E. McKenney Cc: Doug Ledford Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Michael Cree Cc: Andrea Parri Cc: Cc: [ paulmck: Removed drivers/dma/ioat/dma.c per Jason Gunthorpe's feedback. ] Acked-by: Jason Gunthorpe --- drivers/infiniband/Kconfig | 1 + drivers/infiniband/hw/hfi1/rc.c | 4 ---- drivers/infiniband/hw/hfi1/ruc.c | 1 - drivers/infiniband/hw/hfi1/sdma.c | 1 - drivers/infiniband/hw/hfi1/uc.c | 2 -- drivers/infiniband/hw/hfi1/ud.c | 2 -- drivers/infiniband/hw/qib/qib_rc.c | 3 --- drivers/infiniband/hw/qib/qib_ruc.c | 1 - drivers/infiniband/hw/qib/qib_uc.c | 2 -- drivers/infiniband/hw/qib/qib_ud.c | 2 -- drivers/infiniband/sw/rdmavt/qp.c | 1 - 11 files changed, 1 insertion(+), 19 deletions(-) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 98ac46ed7214..3bb6e35b0bbf 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -4,6 +4,7 @@ menuconfig INFINIBAND depends on NET depends on INET depends on m || IPV6 != m + depends on !ALPHA select IRQ_POLL ---help--- Core support for InfiniBand (IB). Make sure to also select diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index fd01a760259f..f527bcda4650 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -302,7 +302,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -346,7 +345,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) newreq = 0; if (qp->s_cur == qp->s_tail) { /* Check if send work queue is empty. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_tail == READ_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; @@ -900,7 +898,6 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, } /* Ensure s_rdma_ack_cnt changes are committed */ - smp_read_barrier_depends(); if (qp->s_rdma_ack_cnt) { hfi1_queue_rc_ack(qp, is_fecn); return; @@ -1562,7 +1559,6 @@ static void rc_rcv_resp(struct hfi1_packet *packet) trace_hfi1_ack(qp, psn); /* Ignore invalid responses. */ - smp_read_barrier_depends(); /* see post_one_send */ if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0) goto ack_done; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 2c7fc6e331ea..13b994738f41 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -362,7 +362,6 @@ static void ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - smp_read_barrier_depends(); /* see post_one_send() */ if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 31c8f89b5fc8..61c130dbed10 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -553,7 +553,6 @@ static void sdma_hw_clean_up_task(unsigned long opaque) static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde) { - smp_read_barrier_depends(); /* see sdma_update_tail() */ return sde->tx_ring[sde->tx_head & sde->sdma_mask]; } diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 991bbee04821..132b63e787d1 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -79,7 +79,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -119,7 +118,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_cur == READ_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index beb5091eccca..deb184574395 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -486,7 +486,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -500,7 +499,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } /* see post_one_send() */ - smp_read_barrier_depends(); if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 8f5754fb8579..1a785c37ad0a 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -246,7 +246,6 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -293,7 +292,6 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) newreq = 0; if (qp->s_cur == qp->s_tail) { /* Check if send work queue is empty. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_tail == READ_ONCE(qp->s_head)) goto bail; /* @@ -1340,7 +1338,6 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - smp_read_barrier_depends(); /* see post_one_send */ if (qib_cmp24(psn, READ_ONCE(qp->s_next_psn)) >= 0) goto ack_done; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 9a37e844d4c8..4662cc7bde92 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -367,7 +367,6 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - smp_read_barrier_depends(); /* see post_one_send() */ if (sqp->s_last == READ_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index bddcc37ace44..70c58b88192c 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -60,7 +60,6 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -90,7 +89,6 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - smp_read_barrier_depends(); /* see post_one_send() */ if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; /* diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 15962ed193ce..386c3c4da0c7 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -252,7 +252,6 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - smp_read_barrier_depends(); /* see post_one_send */ if (qp->s_last == READ_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ @@ -266,7 +265,6 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) } /* see post_one_send() */ - smp_read_barrier_depends(); if (qp->s_cur == READ_ONCE(qp->s_head)) goto bail; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9177df60742a..eae84c216e2f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1684,7 +1684,6 @@ static inline int rvt_qp_is_avail( /* non-reserved operations */ if (likely(qp->s_avail)) return 0; - smp_read_barrier_depends(); /* see rc.c */ slast = READ_ONCE(qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); -- cgit v1.2.3 From 98c1ec7cefaadbf65680d116c3d8612b93a841a0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 1 Dec 2017 17:04:39 -0800 Subject: drivers/dma/ioat: Remove now-redundant smp_read_barrier_depends() Now that READ_ONCE() implies smp_read_barrier_depends(), the __cleanup() and ioat_abort_descs() functions no longer need their smp_read_barrier_depends() calls, which this commit removes. It is actually not entirely clear why this driver ever included smp_read_barrier_depends() given that it appears to be x86-only and given that smp_read_barrier_depends() has no effect whatsoever except on DEC Alpha. Signed-off-by: Paul E. McKenney Cc: Vinod Koul Cc: Dan Williams Cc: --- drivers/dma/ioat/dma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 58d4ccd33672..8b5b23a8ace9 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -597,7 +597,6 @@ static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete) for (i = 0; i < active && !seen_current; i++) { struct dma_async_tx_descriptor *tx; - smp_read_barrier_depends(); prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); desc = ioat_get_ring_ent(ioat_chan, idx + i); dump_desc_dbg(ioat_chan, desc); @@ -715,7 +714,6 @@ static void ioat_abort_descs(struct ioatdma_chan *ioat_chan) for (i = 1; i < active; i++) { struct dma_async_tx_descriptor *tx; - smp_read_barrier_depends(); prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1)); desc = ioat_get_ring_ent(ioat_chan, idx + i); -- cgit v1.2.3 From 9ad3c143d7d6942c66f27bc6c18f5df638f70aff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:20:40 -0800 Subject: doc: De-emphasize smp_read_barrier_depends This commit keeps only the historical and low-level discussion of smp_read_barrier_depends(). Signed-off-by: Paul E. McKenney [ paulmck: Adjusted to allow for David Howells feedback on prior commit. ] --- Documentation/RCU/Design/Requirements/Requirements.html | 3 ++- Documentation/RCU/rcu_dereference.txt | 6 +----- Documentation/RCU/whatisRCU.txt | 3 +-- Documentation/circular-buffers.txt | 3 +-- Documentation/memory-barriers.txt | 7 +++++-- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 62e847bcdcdd..571c3d75510f 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -581,7 +581,8 @@ This guarantee was only partially premeditated. DYNIX/ptx used an explicit memory barrier for publication, but had nothing resembling rcu_dereference() for subscription, nor did it have anything resembling the smp_read_barrier_depends() -that was later subsumed into rcu_dereference(). +that was later subsumed into rcu_dereference() and later +still into READ_ONCE(). The need for these operations made itself known quite suddenly at a late-1990s meeting with the DEC Alpha architects, back in the days when DEC was still a free-standing company. diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt index 1acb26b09b48..ab96227bad42 100644 --- a/Documentation/RCU/rcu_dereference.txt +++ b/Documentation/RCU/rcu_dereference.txt @@ -122,11 +122,7 @@ o Be very careful about comparing pointers obtained from Note that if checks for being within an RCU read-side critical section are not required and the pointer is never dereferenced, rcu_access_pointer() should be used in place - of rcu_dereference(). The rcu_access_pointer() primitive - does not require an enclosing read-side critical section, - and also omits the smp_read_barrier_depends() included in - rcu_dereference(), which in turn should provide a small - performance gain in some CPUs (e.g., the DEC Alpha). + of rcu_dereference(). o The comparison is against a pointer that references memory that was initialized "a long time ago." The reason diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index df62466da4e0..a27fbfb0efb8 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -600,8 +600,7 @@ don't forget about them when submitting patches making use of RCU!] #define rcu_dereference(p) \ ({ \ - typeof(p) _________p1 = p; \ - smp_read_barrier_depends(); \ + typeof(p) _________p1 = READ_ONCE(p); \ (_________p1); \ }) diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt index d4628174b7c5..53e51caa3347 100644 --- a/Documentation/circular-buffers.txt +++ b/Documentation/circular-buffers.txt @@ -220,8 +220,7 @@ before it writes the new tail pointer, which will erase the item. Note the use of READ_ONCE() and smp_load_acquire() to read the opposition index. This prevents the compiler from discarding and -reloading its cached value - which some compilers will do across -smp_read_barrier_depends(). This isn't strictly needed if you can +reloading its cached value. This isn't strictly needed if you can be sure that the opposition index will _only_ be used the once. The smp_load_acquire() additionally forces the CPU to order against subsequent memory references. Similarly, smp_store_release() is used diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 13fd35b6a597..a863009849a3 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1818,7 +1818,7 @@ The Linux kernel has eight basic CPU memory barriers: GENERAL mb() smp_mb() WRITE wmb() smp_wmb() READ rmb() smp_rmb() - DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends() + DATA DEPENDENCY READ_ONCE() All memory barriers except the data dependency barriers imply a compiler @@ -2867,7 +2867,10 @@ access depends on a read, not all do, so it may not be relied on. Other CPUs may also have split caches, but must coordinate between the various cachelets for normal memory accesses. The semantics of the Alpha removes the -need for coordination in the absence of memory barriers. +need for hardware coordination in the absence of memory barriers, which +permitted Alpha to sport higher CPU clock rates back in the day. However, +please note that smp_read_barrier_depends() should not be used except in +Alpha arch-specific code and within the READ_ONCE() macro. CACHE COHERENCY VS DMA -- cgit v1.2.3 From 632a5c1c8fd281b82b1d70bdb1d692cba3b9ffd3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:23:24 -0800 Subject: genetlink: Remove smp_read_barrier_depends() from comment Now that smp_read_barrier_depends() has been de-emphasized, the less said about it, the better. Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Mark Rutland Cc: Kate Stewart Cc: Ingo Molnar Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman --- include/linux/genetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index ecc2928e8046..bc738504ab4a 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -31,8 +31,7 @@ extern wait_queue_head_t genl_sk_destructing_waitq; * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the READ_ONCE(), because - * caller holds genl mutex. + * the READ_ONCE(), because caller holds genl mutex. */ #define genl_dereference(p) \ rcu_dereference_protected(p, lockdep_genl_is_held()) -- cgit v1.2.3 From dfe1b4427835f995aed593d063e6fcdf78b00823 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:26:06 -0800 Subject: netlink: Remove smp_read_barrier_depends() from comment Now that smp_read_barrier_depends() has been de-emphasized, the less said about it, the better. Signed-off-by: Paul E. McKenney Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: Cc: --- include/linux/netfilter/nfnetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 495ba4dd9da5..34551f8aaf9d 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -67,8 +67,7 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id) * @ss: The nfnetlink subsystem ID * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the READ_ONCE(), because - * caller holds the NFNL subsystem mutex. + * the READ_ONCE(), because caller holds the NFNL subsystem mutex. */ #define nfnl_dereference(p, ss) \ rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) -- cgit v1.2.3 From 91db2592e463157d8f4755f56230fb04d0308c4e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:37:35 -0800 Subject: checkpatch: Add warnings for {smp_,}read_barrier_depends() Now that both smp_read_barrier_depends() and read_barrier_depends() are being de-emphasized, warn if any are added. Signed-off-by: Paul E. McKenney Cc: Andy Whitcroft Cc: Joe Perches [ paulmck: Skipped checking files and handled whitespace per Joe Perches. ] --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 95cda3ecc66b..9a384bfe2bd5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5586,6 +5586,12 @@ sub process { } } +# check for smp_read_barrier_depends and read_barrier_depends + if (!$file && $line =~ /\b(smp_|)read_barrier_depends\s*\(/) { + WARN("READ_BARRIER_DEPENDS", + "$1read_barrier_depends should only be used in READ_ONCE or DEC Alpha code\n" . $herecurr); + } + # check of hardware specific defines if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { CHK("ARCH_DEFINES", -- cgit v1.2.3 From 3a5db0b108e0a40f08c2bcff6a675dbf632b91e0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Nov 2017 09:45:10 -0800 Subject: drivers/vhost: Remove now-redundant read_barrier_depends() Because READ_ONCE() now implies read_barrier_depends(), the read_barrier_depends() in next_desc() is now redundant. This commit therefore removes it and the related comments. Signed-off-by: Paul E. McKenney Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Cc: Cc: --- drivers/vhost/vhost.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 33ac2b186b85..78b5940a415a 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1877,12 +1877,7 @@ static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) return -1U; /* Check they're not leading us off end of descriptors. */ - next = vhost16_to_cpu(vq, desc->next); - /* Make sure compiler knows to grab that: we don't want it changing! */ - /* We will use the result as an index in an array, so most - * architectures only need a compiler barrier here. */ - read_barrier_depends(); - + next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); return next; } -- cgit v1.2.3 From 588fb54b0cc5be5fd2e12bb04810534ffc3d49cc Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 30 Nov 2017 13:14:51 +0100 Subject: clk: Manage proper runtime PM state in clk_change_rate() clk_change_rate() propagates rate change down to all its children. Such operation requires managing proper runtime PM state of each child, what was missing. Add needed calls to clk_pm_runtime*() to ensure that set_rate() clock callback is called on runtime active clock. This fixes following issue found on Exynos5433 TM2 board with devfreq enabled: Synchronous External Abort: synchronous external abort (0x96000210) at 0xffffff80093f5600 Internal error: : 96000210 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 5 Comm: kworker/u16:0 Not tainted 4.15.0-rc1-next-20171129+ #4 Hardware name: Samsung TM2 board (DT) Workqueue: devfreq_wq devfreq_monitor task: ffffffc0ca96b600 task.stack: ffffff80093a8000 pstate: a0000085 (NzCv daIf -PAN -UAO) pc : clk_divider_set_rate+0x54/0x118 lr : clk_divider_set_rate+0x44/0x118 ... Process kworker/u16:0 (pid: 5, stack limit = 0xffffff80093a8000) Call trace: clk_divider_set_rate+0x54/0x118 clk_change_rate+0xfc/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_change_rate+0x1f0/0x4e0 clk_core_set_rate_nolock+0x138/0x148 clk_set_rate+0x28/0x50 exynos_bus_passive_target+0x6c/0x11c update_devfreq_passive+0x58/0xb4 devfreq_passive_notifier_call+0x50/0x5c notifier_call_chain+0x4c/0x88 __srcu_notifier_call_chain+0x54/0x80 srcu_notifier_call_chain+0x14/0x1c update_devfreq+0x100/0x1b4 devfreq_monitor+0x2c/0x88 process_one_work+0x148/0x3d8 worker_thread+0x13c/0x3f8 kthread+0x100/0x12c ret_from_fork+0x10/0x18 Reported-by: Chanwoo Choi Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Marek Szyprowski Reviewed-by: Ulf Hansson Tested-by: Chanwoo Choi Reviewed-by: Chanwoo Choi Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 647d056df88c..8a1860a36c77 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core) best_parent_rate = core->parent->rate; } + if (clk_pm_runtime_get(core)) + return; + if (core->flags & CLK_SET_RATE_UNGATE) { unsigned long flags; @@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core) /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); + + clk_pm_runtime_put(core); } static int clk_core_set_rate_nolock(struct clk_core *core, -- cgit v1.2.3 From 975b820b6836b6b6c42fb84cd2e772e2b41bca67 Mon Sep 17 00:00:00 2001 From: Cai Li Date: Tue, 21 Nov 2017 17:24:38 +0800 Subject: clk: fix a panic error caused by accessing NULL pointer In some cases the clock parent would be set NULL when doing re-parent, it will cause a NULL pointer accessing if clk_set trace event is enabled. This patch sets the parent as "none" if the input parameter is NULL. Fixes: dfc202ead312 (clk: Add tracepoints for hardware operations) Signed-off-by: Cai Li Signed-off-by: Chunyan Zhang Signed-off-by: Stephen Boyd --- include/trace/events/clk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h index 758607226bfd..2cd449328aee 100644 --- a/include/trace/events/clk.h +++ b/include/trace/events/clk.h @@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent, TP_STRUCT__entry( __string( name, core->name ) - __string( pname, parent->name ) + __string( pname, parent ? parent->name : "none" ) ), TP_fast_assign( __assign_str(name, core->name); - __assign_str(pname, parent->name); + __assign_str(pname, parent ? parent->name : "none"); ), TP_printk("%s %s", __get_str(name), __get_str(pname)) -- cgit v1.2.3 From 87eba0716011e528f7841026f2cc65683219d0ad Mon Sep 17 00:00:00 2001 From: Klaus Goger Date: Tue, 5 Dec 2017 08:11:58 +0100 Subject: arm64: dts: rockchip: remove vdd_log from rk3399-puma vdd_log has no consumer and therefore will not be set to a specific voltage. Still the PWM output pin gets configured and thence the vdd_log output voltage will changed from it's default. Depending on the idle state of the PWM this will slightly over or undervoltage the logic supply of the RK3399 and cause instability with GbE (undervoltage) and PCIe (overvoltage). Since the default value set by a voltage divider is the correct supply voltage and we don't need to change it during runtime we remove the rail from the devicetree completely so the PWM pin will not be configured. Signed-off-by: Klaus Goger Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 910628d18add..1fc5060d7027 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -155,17 +155,6 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - status = "okay"; - }; }; &cpu_b0 { -- cgit v1.2.3 From bc631943faba6fc3f755748091ada31798fb7d50 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 6 Dec 2017 01:10:05 +0100 Subject: arm64: dts: rockchip: limit rk3328-rock64 gmac speed to 100MBit for now It looks like either the current kernel or the hardware has reliability issues when the gmac is actually running at 1GBit. In my test-case it is not able to boot on a nfsroot at this speed, as the system will always lose the connection to the nfs-server during boot, before reaching any login prompt and not recover from this. So until this is solved, limit the speed to 100MBit as with this the nfsroot survives stress tests like an apt-get upgrade without problems. Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c2..3890468678ce 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,6 +132,8 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; + /* shows instability at 1GBit right now */ + max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; -- cgit v1.2.3 From 3073774e638ef18d222465fe92bfc8fccb90d288 Mon Sep 17 00:00:00 2001 From: Serhii Popovych Date: Mon, 4 Dec 2017 09:36:41 -0500 Subject: KVM: PPC: Book3S HV: Drop prepare_done from struct kvm_resize_hpt Currently the kvm_resize_hpt structure has two fields relevant to the state of an ongoing resize: 'prepare_done', which indicates whether the worker thread has completed or not, and 'error' which indicates whether it was successful or not. Since the success/failure isn't known until completion, this is confusingly redundant. This patch consolidates the information into just the 'error' value: -EBUSY indicates the worked is still in progress, other negative values indicate (completed) failure, 0 indicates successful completion. As a bonus this reduces size of struct kvm_resize_hpt by __alignof__(struct kvm_hpt_info) and saves few bytes of code. While there correct comment in struct kvm_resize_hpt which references a non-existent semaphore (leftover from an early draft). Assert with WARN_ON() in case of HPT allocation thread work runs more than once for resize request or resize_hpt_allocate() returns -EBUSY that is treated specially. Change comparison against zero to make checkpatch.pl happy. Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Serhii Popovych [dwg: Changed BUG_ON()s to WARN_ON()s and altered commit message for clarity] Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 44 +++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 966097232d21..f5f2c6bf5856 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -65,11 +65,17 @@ struct kvm_resize_hpt { u32 order; /* These fields protected by kvm->lock */ + + /* Possible values and their usage: + * <0 an error occurred during allocation, + * -EBUSY allocation is in the progress, + * 0 allocation made successfuly. + */ int error; - bool prepare_done; - /* Private to the work thread, until prepare_done is true, - * then protected by kvm->resize_hpt_sem */ + /* Private to the work thread, until error != -EBUSY, + * then protected by kvm->lock. + */ struct kvm_hpt_info hpt; }; @@ -1433,15 +1439,23 @@ static void resize_hpt_prepare_work(struct work_struct *work) struct kvm *kvm = resize->kvm; int err; + if (WARN_ON(resize->error != -EBUSY)) + return; + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", resize->order); err = resize_hpt_allocate(resize); + /* We have strict assumption about -EBUSY + * when preparing for HPT resize. + */ + if (WARN_ON(err == -EBUSY)) + err = -EINPROGRESS; + mutex_lock(&kvm->lock); resize->error = err; - resize->prepare_done = true; mutex_unlock(&kvm->lock); } @@ -1466,14 +1480,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, if (resize) { if (resize->order == shift) { - /* Suitable resize in progress */ - if (resize->prepare_done) { - ret = resize->error; - if (ret != 0) - resize_hpt_release(kvm, resize); - } else { + /* Suitable resize in progress? */ + ret = resize->error; + if (ret == -EBUSY) ret = 100; /* estimated time in ms */ - } + else if (ret) + resize_hpt_release(kvm, resize); goto out; } @@ -1493,6 +1505,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ret = -ENOMEM; goto out; } + + resize->error = -EBUSY; resize->order = shift; resize->kvm = kvm; INIT_WORK(&resize->work, resize_hpt_prepare_work); @@ -1547,16 +1561,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, if (!resize || (resize->order != shift)) goto out; - ret = -EBUSY; - if (!resize->prepare_done) - goto out; - ret = resize->error; - if (ret != 0) + if (ret) goto out; ret = resize_hpt_rehash(resize); - if (ret != 0) + if (ret) goto out; resize_hpt_pivot(resize); -- cgit v1.2.3 From 4ed11aeefda439c76ddae3ceebcfa4fad111f149 Mon Sep 17 00:00:00 2001 From: Serhii Popovych Date: Mon, 4 Dec 2017 09:36:42 -0500 Subject: KVM: PPC: Book3S HV: Fix use after free in case of multiple resize requests When serving multiple resize requests following could happen: CPU0 CPU1 ---- ---- kvm_vm_ioctl_resize_hpt_prepare(1); -> schedule_work() /* system_rq might be busy: delay */ kvm_vm_ioctl_resize_hpt_prepare(2); mutex_lock(); if (resize) { ... release_hpt_resize(); } ... resize_hpt_prepare_work() -> schedule_work() { mutex_unlock() /* resize->kvm could be wrong */ struct kvm *kvm = resize->kvm; mutex_lock(&kvm->lock); <<<< UAF ... } i.e. a second resize request with different order could be started by kvm_vm_ioctl_resize_hpt_prepare(), causing the previous request to be free()d when there's still an active worker thread which will try to access it. This leads to a use after free in point marked with UAF on the diagram above. To prevent this from happening, instead of unconditionally releasing a pre-existing resize structure from the prepare ioctl(), we check if the existing structure has an in-progress worker. We do that by checking if the resize->error == -EBUSY, which is safe because the resize->error field is protected by the kvm->lock. If there is an active worker, instead of releasing, we mark the structure as stale by unlinking it from kvm_struct. In the worker thread we check for a stale structure (with kvm->lock held), and in that case abort, releasing the stale structure ourself. We make the check both before and the actual allocation. Strictly, only the check afterwards is needed, the check before is an optimization: if the structure happens to become stale before the worker thread is dispatched, rather than during the allocation, it means we can avoid allocating then immediately freeing a potentially substantial amount of memory. This fixes following or similar host kernel crash message: [ 635.277361] Unable to handle kernel paging request for data at address 0x00000000 [ 635.277438] Faulting instruction address: 0xc00000000052f568 [ 635.277446] Oops: Kernel access of bad area, sig: 11 [#1] [ 635.277451] SMP NR_CPUS=2048 NUMA PowerNV [ 635.277470] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter nfsv3 nfs_acl nfs lockd grace fscache kvm_hv kvm rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ext4 ib_srp scsi_transport_srp ib_ipoib mbcache jbd2 rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ocrdma(T) ib_core ses enclosure scsi_transport_sas sg shpchp leds_powernv ibmpowernv i2c_opal i2c_core powernv_rng ipmi_powernv ipmi_devintf ipmi_msghandler ip_tables xfs libcrc32c sr_mod sd_mod cdrom lpfc nvme_fc(T) nvme_fabrics nvme_core ipr nvmet_fc(T) tg3 nvmet libata be2net crc_t10dif crct10dif_generic scsi_transport_fc ptp scsi_tgt pps_core crct10dif_common dm_mirror dm_region_hash dm_log dm_mod [ 635.278687] CPU: 40 PID: 749 Comm: kworker/40:1 Tainted: G ------------ T 3.10.0.bz1510771+ #1 [ 635.278782] Workqueue: events resize_hpt_prepare_work [kvm_hv] [ 635.278851] task: c0000007e6840000 ti: c0000007e9180000 task.ti: c0000007e9180000 [ 635.278919] NIP: c00000000052f568 LR: c0000000009ea310 CTR: c0000000009ea4f0 [ 635.278988] REGS: c0000007e91837f0 TRAP: 0300 Tainted: G ------------ T (3.10.0.bz1510771+) [ 635.279077] MSR: 9000000100009033 CR: 24002022 XER: 00000000 [ 635.279248] CFAR: c000000000009368 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1 GPR00: c0000000009ea310 c0000007e9183a70 c000000001250b00 c0000007e9183b10 GPR04: 0000000000000000 0000000000000000 c0000007e9183650 0000000000000000 GPR08: c0000007ffff7b80 00000000ffffffff 0000000080000028 d00000000d2529a0 GPR12: 0000000000002200 c000000007b56800 c000000000120028 c0000007f135bb40 GPR16: 0000000000000000 c000000005c1e018 c000000005c1e018 0000000000000000 GPR20: 0000000000000001 c0000000011bf778 0000000000000001 fffffffffffffef7 GPR24: 0000000000000000 c000000f1e262e50 0000000000000002 c0000007e9180000 GPR28: c000000f1e262e4c c000000f1e262e50 0000000000000000 c0000007e9183b10 [ 635.280149] NIP [c00000000052f568] __list_add+0x38/0x110 [ 635.280197] LR [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280253] Call Trace: [ 635.280277] [c0000007e9183af0] [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0 [ 635.280356] [c0000007e9183b70] [c0000000009ea554] mutex_lock+0x64/0x70 [ 635.280426] [c0000007e9183ba0] [d00000000d24da04] resize_hpt_prepare_work+0xe4/0x1c0 [kvm_hv] [ 635.280507] [c0000007e9183c40] [c000000000113c0c] process_one_work+0x1dc/0x680 [ 635.280587] [c0000007e9183ce0] [c000000000114250] worker_thread+0x1a0/0x520 [ 635.280655] [c0000007e9183d80] [c00000000012010c] kthread+0xec/0x100 [ 635.280724] [c0000007e9183e30] [c00000000000a4b8] ret_from_kernel_thread+0x5c/0xa4 [ 635.280814] Instruction dump: [ 635.280880] 7c0802a6 fba1ffe8 fbc1fff0 7cbd2b78 fbe1fff8 7c9e2378 7c7f1b78 f8010010 [ 635.281099] f821ff81 e8a50008 7fa52040 40de00b8 7fbd2840 40de008c 7fbff040 [ 635.281324] ---[ end trace b628b73449719b9d ]--- Cc: stable@vger.kernel.org # v4.10+ Fixes: b5baa6877315 ("KVM: PPC: Book3S HV: KVM-HV HPT resizing implementation") Signed-off-by: Serhii Popovych [dwg: Replaced BUG_ON()s with WARN_ONs() and reworded commit message for clarity] Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 50 ++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f5f2c6bf5856..8355398f0bb6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1419,16 +1419,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - BUG_ON(kvm->arch.resize_hpt != resize); + if (WARN_ON(!mutex_is_locked(&kvm->lock))) + return; if (!resize) return; - if (resize->hpt.virt) - kvmppc_free_hpt(&resize->hpt); + if (resize->error != -EBUSY) { + if (resize->hpt.virt) + kvmppc_free_hpt(&resize->hpt); + kfree(resize); + } - kvm->arch.resize_hpt = NULL; - kfree(resize); + if (kvm->arch.resize_hpt == resize) + kvm->arch.resize_hpt = NULL; } static void resize_hpt_prepare_work(struct work_struct *work) @@ -1437,26 +1441,42 @@ static void resize_hpt_prepare_work(struct work_struct *work) struct kvm_resize_hpt, work); struct kvm *kvm = resize->kvm; - int err; + int err = 0; if (WARN_ON(resize->error != -EBUSY)) return; - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", - resize->order); + mutex_lock(&kvm->lock); - err = resize_hpt_allocate(resize); + /* Request is still current? */ + if (kvm->arch.resize_hpt == resize) { + /* We may request large allocations here: + * do not sleep with kvm->lock held for a while. + */ + mutex_unlock(&kvm->lock); - /* We have strict assumption about -EBUSY - * when preparing for HPT resize. - */ - if (WARN_ON(err == -EBUSY)) - err = -EINPROGRESS; + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize->order); - mutex_lock(&kvm->lock); + err = resize_hpt_allocate(resize); + + /* We have strict assumption about -EBUSY + * when preparing for HPT resize. + */ + if (WARN_ON(err == -EBUSY)) + err = -EINPROGRESS; + + mutex_lock(&kvm->lock); + /* It is possible that kvm->arch.resize_hpt != resize + * after we grab kvm->lock again. + */ + } resize->error = err; + if (kvm->arch.resize_hpt != resize) + resize_hpt_release(kvm, resize); + mutex_unlock(&kvm->lock); } -- cgit v1.2.3 From 71334963d01ed7ec61a958a5a6585172793f5a24 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Dec 2017 11:27:59 +0100 Subject: wireless: replace usage of hexdump with od/sed Since od/sed are in posix, hopefully there's a better chance people will have them, over hexdump. Fixes: 90a53e4432b1 ("cfg80211: implement regdb signature checking") Signed-off-by: Johannes Berg --- net/wireless/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 278d979c211a..63cbb6432b2d 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -27,7 +27,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) @$(kecho) " GEN $@" @echo '#include "reg.h"' > $@ @echo 'const u8 shipped_regdb_certs[] = {' >> $@ - @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done + @for f in $^ ; do od -An -v -tx1 < $$f | sed -e 's/ /\n/g' | sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | sed -e 's/^/0x/;s/$$/,/' >> $@ ; done @echo '};' >> $@ @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ @@ -36,6 +36,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ @$(kecho) " GEN $@" @echo '#include "reg.h"' > $@ @echo 'const u8 extra_regdb_certs[] = {' >> $@ - @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done + @for f in $^ ; do test -f $$f && od -An -v -tx1 < $$f | sed -e 's/ /\n/g' | sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | sed -e 's/^/0x/;s/$$/,/' >> $@ ; done @echo '};' >> $@ @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ -- cgit v1.2.3 From 715a12334764657bafb3ab964fb25f4e6115c770 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Dec 2017 11:59:33 +0100 Subject: wireless: don't write C files on failures Change the scripting inside the shipped/extra certs C code generation to not write the file when there are any failures. That way, if the build aborts due to failures, we don't get into a situation where a dummy file has been created and the next build succeeds, but not with the desired output. Fixes: 90a53e4432b1 ("cfg80211: implement regdb signature checking") Signed-off-by: Johannes Berg --- net/wireless/Makefile | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 63cbb6432b2d..d7d6cb00c47b 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -25,17 +25,45 @@ endif $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) @$(kecho) " GEN $@" - @echo '#include "reg.h"' > $@ - @echo 'const u8 shipped_regdb_certs[] = {' >> $@ - @for f in $^ ; do od -An -v -tx1 < $$f | sed -e 's/ /\n/g' | sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | sed -e 's/^/0x/;s/$$/,/' >> $@ ; done - @echo '};' >> $@ - @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ + @(set -e; \ + allf=""; \ + for f in $^ ; do \ + # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ + thisf=$$(od -An -v -tx1 < $$f | \ + sed -e 's/ /\n/g' | \ + sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ + sed -e 's/^/0x/;s/$$/,/'); \ + # file should not be empty - maybe command substitution failed? \ + test ! -z "$$thisf";\ + allf=$$allf$$thisf;\ + done; \ + ( \ + echo '#include "reg.h"'; \ + echo 'const u8 shipped_regdb_certs[] = {'; \ + echo "$$allf"; \ + echo '};'; \ + echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ + ) >> $@) $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @$(kecho) " GEN $@" - @echo '#include "reg.h"' > $@ - @echo 'const u8 extra_regdb_certs[] = {' >> $@ - @for f in $^ ; do test -f $$f && od -An -v -tx1 < $$f | sed -e 's/ /\n/g' | sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | sed -e 's/^/0x/;s/$$/,/' >> $@ ; done - @echo '};' >> $@ - @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ + @(set -e; \ + allf=""; \ + for f in $^ ; do \ + # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ + thisf=$$(od -An -v -tx1 < $$f | \ + sed -e 's/ /\n/g' | \ + sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ + sed -e 's/^/0x/;s/$$/,/'); \ + # file should not be empty - maybe command substitution failed? \ + test ! -z "$$thisf";\ + allf=$$allf$$thisf;\ + done; \ + ( \ + echo '#include "reg.h"'; \ + echo 'const u8 extra_regdb_certs[] = {'; \ + echo "$$allf"; \ + echo '};'; \ + echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ + ) >> $@) -- cgit v1.2.3 From 916a27901de01446bcf57ecca4783f6cff493309 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Tue, 5 Dec 2017 15:42:41 -0800 Subject: netfilter: xt_osf: Add missing permission checks The capability check in nfnetlink_rcv() verifies that the caller has CAP_NET_ADMIN in the namespace that "owns" the netlink socket. However, xt_osf_fingers is shared by all net namespaces on the system. An unprivileged user can create user and net namespaces in which he holds CAP_NET_ADMIN to bypass the netlink_net_capable() check: vpnns -- nfnl_osf -f /tmp/pf.os vpnns -- nfnl_osf -f /tmp/pf.os -d These non-root operations successfully modify the systemwide OS fingerprint list. Add new capable() checks so that they can't. Signed-off-by: Kevin Cernekee Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_osf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 36e14b1f061d..a34f314a8c23 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -70,6 +71,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *kf = NULL, *sf; int err = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; @@ -115,6 +119,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl, struct xt_osf_finger *sf; int err = -ENOENT; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; -- cgit v1.2.3 From 7f6d2ecd3d7acaf205ea7b3e96f9ffc55b92298b Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 3 Dec 2017 19:54:41 -0600 Subject: eeprom: at24: change nvmem stride to 1 Trying to read the MAC address from an eeprom that has an offset that is not a multiple of 4 causes an error currently. Fix it by changing the nvmem stride to 1. Cc: stable@vger.kernel.org Signed-off-by: David Lechner [Bartosz: tweaked the commit message] Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 20b4f26d30d7..4d63ac8a82e0 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -876,7 +876,7 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) at24->nvmem_config.reg_read = at24_read; at24->nvmem_config.reg_write = at24_write; at24->nvmem_config.priv = at24; - at24->nvmem_config.stride = 4; + at24->nvmem_config.stride = 1; at24->nvmem_config.word_size = 1; at24->nvmem_config.size = chip.byte_len; -- cgit v1.2.3 From 2e662342962863bb6044ad581d7cc03795da4e9d Mon Sep 17 00:00:00 2001 From: Michael Stecklein Date: Tue, 5 Dec 2017 09:54:11 -0600 Subject: ASoC: tas6424: add bindings for TAS6424 Add the bindings for the TAS6424 digital amplifier. Signed-off-by: Michael Stecklein Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/ti,tas6424.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/ti,tas6424.txt diff --git a/Documentation/devicetree/bindings/sound/ti,tas6424.txt b/Documentation/devicetree/bindings/sound/ti,tas6424.txt new file mode 100644 index 000000000000..1c4ada0eef4e --- /dev/null +++ b/Documentation/devicetree/bindings/sound/ti,tas6424.txt @@ -0,0 +1,20 @@ +Texas Instruments TAS6424 Quad-Channel Audio amplifier + +The TAS6424 serial control bus communicates through I2C protocols. + +Required properties: + - compatible: "ti,tas6424" - TAS6424 + - reg: I2C slave address + - sound-dai-cells: must be equal to 0 + +Example: + +tas6424: tas6424@6a { + compatible = "ti,tas6424"; + reg = <0x6a>; + + #sound-dai-cells = <0>; +}; + +For more product information please see the link below: +http://www.ti.com/product/TAS6424-Q1 -- cgit v1.2.3 From 157b68babe3281222e08c9c58456ca22544f06bc Mon Sep 17 00:00:00 2001 From: Andreas Dannenberg Date: Tue, 5 Dec 2017 09:54:12 -0600 Subject: ASoC: tas6424: Add support for TAS6424 digital amplifier The Texas Instruments TAS6424 device is a high-efficiency quad-channel Class-D audio power amplifier. Its digital time division multiplexed (TDM) interface enables up to 2 devices to share the same bus, supporting a total of eight channels from one audio serial port. Signed-off-by: Andreas Dannenberg Signed-off-by: Michael Stecklein Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 8 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/tas6424.c | 707 +++++++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/tas6424.h | 144 +++++++++ 4 files changed, 861 insertions(+) create mode 100644 sound/soc/codecs/tas6424.c create mode 100644 sound/soc/codecs/tas6424.h diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..6c2e0d5426f7 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -148,6 +148,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TAS5086 if I2C select SND_SOC_TAS571X if I2C select SND_SOC_TAS5720 if I2C + select SND_SOC_TAS6424 if I2C select SND_SOC_TFA9879 if I2C select SND_SOC_TLV320AIC23_I2C if I2C select SND_SOC_TLV320AIC23_SPI if SPI_MASTER @@ -883,6 +884,13 @@ config SND_SOC_TAS5720 Enable support for Texas Instruments TAS5720L/M high-efficiency mono Class-D audio power amplifiers. +config SND_SOC_TAS6424 + tristate "Texas Instruments TAS6424 Quad-Channel Audio amplifier" + depends on I2C + help + Enable support for Texas Instruments TAS6424 high-efficiency + digital input quad-channel Class-D audio power amplifiers. + config SND_SOC_TFA9879 tristate "NXP Semiconductors TFA9879 amplifier" depends on I2C diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 0001069ce2a7..154abd758c30 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -156,6 +156,7 @@ snd-soc-sti-sas-objs := sti-sas.o snd-soc-tas5086-objs := tas5086.o snd-soc-tas571x-objs := tas571x.o snd-soc-tas5720-objs := tas5720.o +snd-soc-tas6424-objs := tas6424.o snd-soc-tfa9879-objs := tfa9879.o snd-soc-tlv320aic23-objs := tlv320aic23.o snd-soc-tlv320aic23-i2c-objs := tlv320aic23-i2c.o @@ -395,6 +396,7 @@ obj-$(CONFIG_SND_SOC_TAS2552) += snd-soc-tas2552.o obj-$(CONFIG_SND_SOC_TAS5086) += snd-soc-tas5086.o obj-$(CONFIG_SND_SOC_TAS571X) += snd-soc-tas571x.o obj-$(CONFIG_SND_SOC_TAS5720) += snd-soc-tas5720.o +obj-$(CONFIG_SND_SOC_TAS6424) += snd-soc-tas6424.o obj-$(CONFIG_SND_SOC_TFA9879) += snd-soc-tfa9879.o obj-$(CONFIG_SND_SOC_TLV320AIC23) += snd-soc-tlv320aic23.o obj-$(CONFIG_SND_SOC_TLV320AIC23_I2C) += snd-soc-tlv320aic23-i2c.o diff --git a/sound/soc/codecs/tas6424.c b/sound/soc/codecs/tas6424.c new file mode 100644 index 000000000000..49b87f6e85bf --- /dev/null +++ b/sound/soc/codecs/tas6424.c @@ -0,0 +1,707 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ALSA SoC Texas Instruments TAS6424 Quad-Channel Audio Amplifier + * + * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Author: Andreas Dannenberg + * Andrew F. Davis + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "tas6424.h" + +/* Define how often to check (and clear) the fault status register (in ms) */ +#define TAS6424_FAULT_CHECK_INTERVAL 200 + +static const char * const tas6424_supply_names[] = { + "dvdd", /* Digital power supply. Connect to 3.3-V supply. */ + "vbat", /* Supply used for higher voltage analog circuits. */ + "pvdd", /* Class-D amp output FETs supply. */ +}; +#define TAS6424_NUM_SUPPLIES ARRAY_SIZE(tas6424_supply_names) + +struct tas6424_data { + struct device *dev; + struct regmap *regmap; + struct regulator_bulk_data supplies[TAS6424_NUM_SUPPLIES]; + struct delayed_work fault_check_work; + unsigned int last_fault1; + unsigned int last_fault2; + unsigned int last_warn; +}; + +/* + * DAC digital volumes. From -103.5 to 24 dB in 0.5 dB steps. Note that + * setting the gain below -100 dB (register value <0x7) is effectively a MUTE + * as per device datasheet. + */ +static DECLARE_TLV_DB_SCALE(dac_tlv, -10350, 50, 0); + +static const struct snd_kcontrol_new tas6424_snd_controls[] = { + SOC_SINGLE_TLV("Speaker Driver CH1 Playback Volume", + TAS6424_CH1_VOL_CTRL, 0, 0xff, 0, dac_tlv), + SOC_SINGLE_TLV("Speaker Driver CH2 Playback Volume", + TAS6424_CH2_VOL_CTRL, 0, 0xff, 0, dac_tlv), + SOC_SINGLE_TLV("Speaker Driver CH3 Playback Volume", + TAS6424_CH3_VOL_CTRL, 0, 0xff, 0, dac_tlv), + SOC_SINGLE_TLV("Speaker Driver CH4 Playback Volume", + TAS6424_CH4_VOL_CTRL, 0, 0xff, 0, dac_tlv), +}; + +static int tas6424_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct tas6424_data *tas6424 = snd_soc_codec_get_drvdata(codec); + + dev_dbg(codec->dev, "%s() event=0x%0x\n", __func__, event); + + if (event & SND_SOC_DAPM_POST_PMU) { + /* Observe codec shutdown-to-active time */ + msleep(12); + + /* Turn on TAS6424 periodic fault checking/handling */ + tas6424->last_fault1 = 0; + tas6424->last_fault2 = 0; + tas6424->last_warn = 0; + schedule_delayed_work(&tas6424->fault_check_work, + msecs_to_jiffies(TAS6424_FAULT_CHECK_INTERVAL)); + } else if (event & SND_SOC_DAPM_PRE_PMD) { + /* Disable TAS6424 periodic fault checking/handling */ + cancel_delayed_work_sync(&tas6424->fault_check_work); + } + + return 0; +} + +static const struct snd_soc_dapm_widget tas6424_dapm_widgets[] = { + SND_SOC_DAPM_AIF_IN("DAC IN", "Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_DAC_E("DAC", NULL, SND_SOC_NOPM, 0, 0, tas6424_dac_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_OUTPUT("OUT") +}; + +static const struct snd_soc_dapm_route tas6424_audio_map[] = { + { "DAC", NULL, "DAC IN" }, + { "OUT", NULL, "DAC" }, +}; + +static int tas6424_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + unsigned int rate = params_rate(params); + unsigned int width = params_width(params); + u8 sap_ctrl = 0; + + dev_dbg(codec->dev, "%s() rate=%u width=%u\n", __func__, rate, width); + + switch (rate) { + case 44100: + sap_ctrl |= TAS6424_SAP_RATE_44100; + break; + case 48000: + sap_ctrl |= TAS6424_SAP_RATE_48000; + break; + case 96000: + sap_ctrl |= TAS6424_SAP_RATE_96000; + break; + default: + dev_err(codec->dev, "unsupported sample rate: %u\n", rate); + return -EINVAL; + } + + switch (width) { + case 16: + sap_ctrl |= TAS6424_SAP_TDM_SLOT_SZ_16; + break; + case 24: + break; + default: + dev_err(codec->dev, "unsupported sample width: %u\n", width); + return -EINVAL; + } + + snd_soc_update_bits(codec, TAS6424_SAP_CTRL, + TAS6424_SAP_RATE_MASK | + TAS6424_SAP_TDM_SLOT_SZ_16, + sap_ctrl); + + return 0; +} + +static int tas6424_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = dai->codec; + u8 serial_format = 0; + + dev_dbg(codec->dev, "%s() fmt=0x%0x\n", __func__, fmt); + + /* clock masters */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + dev_err(codec->dev, "Invalid DAI master/slave interface\n"); + return -EINVAL; + } + + /* signal polarity */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + default: + dev_err(codec->dev, "Invalid DAI clock signal polarity\n"); + return -EINVAL; + } + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + serial_format |= TAS6424_SAP_I2S; + break; + case SND_SOC_DAIFMT_DSP_A: + serial_format |= TAS6424_SAP_DSP; + break; + case SND_SOC_DAIFMT_DSP_B: + /* + * We can use the fact that the TAS6424 does not care about the + * LRCLK duty cycle during TDM to receive DSP_B formatted data + * in LEFTJ mode (no delaying of the 1st data bit). + */ + serial_format |= TAS6424_SAP_LEFTJ; + break; + case SND_SOC_DAIFMT_LEFT_J: + serial_format |= TAS6424_SAP_LEFTJ; + break; + default: + dev_err(codec->dev, "Invalid DAI interface format\n"); + return -EINVAL; + } + + snd_soc_update_bits(codec, TAS6424_SAP_CTRL, + TAS6424_SAP_FMT_MASK, serial_format); + + return 0; +} + +static int tas6424_set_dai_tdm_slot(struct snd_soc_dai *dai, + unsigned int tx_mask, unsigned int rx_mask, + int slots, int slot_width) +{ + struct snd_soc_codec *codec = dai->codec; + unsigned int first_slot, last_slot; + bool sap_tdm_slot_last; + + dev_dbg(codec->dev, "%s() tx_mask=%d rx_mask=%d\n", __func__, + tx_mask, rx_mask); + + if (!tx_mask || !rx_mask) + return 0; /* nothing needed to disable TDM mode */ + + /* + * Determine the first slot and last slot that is being requested so + * we'll be able to more easily enforce certain constraints as the + * TAS6424's TDM interface is not fully configurable. + */ + first_slot = __ffs(tx_mask); + last_slot = __fls(rx_mask); + + if (last_slot - first_slot != 4) { + dev_err(codec->dev, "tdm mask must cover 4 contiguous slots\n"); + return -EINVAL; + } + + switch (first_slot) { + case 0: + sap_tdm_slot_last = false; + break; + case 4: + sap_tdm_slot_last = true; + break; + default: + dev_err(codec->dev, "tdm mask must start at slot 0 or 4\n"); + return -EINVAL; + } + + snd_soc_update_bits(codec, TAS6424_SAP_CTRL, TAS6424_SAP_TDM_SLOT_LAST, + sap_tdm_slot_last ? TAS6424_SAP_TDM_SLOT_LAST : 0); + + return 0; +} + +static int tas6424_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + unsigned int val; + + dev_dbg(codec->dev, "%s() mute=%d\n", __func__, mute); + + if (mute) + val = TAS6424_ALL_STATE_MUTE; + else + val = TAS6424_ALL_STATE_PLAY; + + snd_soc_write(codec, TAS6424_CH_STATE_CTRL, val); + + return 0; +} + +static int tas6424_power_off(struct snd_soc_codec *codec) +{ + struct tas6424_data *tas6424 = snd_soc_codec_get_drvdata(codec); + int ret; + + snd_soc_write(codec, TAS6424_CH_STATE_CTRL, TAS6424_ALL_STATE_HIZ); + + regcache_cache_only(tas6424->regmap, true); + regcache_mark_dirty(tas6424->regmap); + + ret = regulator_bulk_disable(ARRAY_SIZE(tas6424->supplies), + tas6424->supplies); + if (ret < 0) { + dev_err(codec->dev, "failed to disable supplies: %d\n", ret); + return ret; + } + + return 0; +} + +static int tas6424_power_on(struct snd_soc_codec *codec) +{ + struct tas6424_data *tas6424 = snd_soc_codec_get_drvdata(codec); + int ret; + + ret = regulator_bulk_enable(ARRAY_SIZE(tas6424->supplies), + tas6424->supplies); + if (ret < 0) { + dev_err(codec->dev, "failed to enable supplies: %d\n", ret); + return ret; + } + + regcache_cache_only(tas6424->regmap, false); + + ret = regcache_sync(tas6424->regmap); + if (ret < 0) { + dev_err(codec->dev, "failed to sync regcache: %d\n", ret); + return ret; + } + + snd_soc_write(codec, TAS6424_CH_STATE_CTRL, TAS6424_ALL_STATE_MUTE); + + /* any time we come out of HIZ, the output channels automatically run DC + * load diagnostics, wait here until this completes + */ + msleep(230); + + return 0; +} + +static int tas6424_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + dev_dbg(codec->dev, "%s() level=%d\n", __func__, level); + + switch (level) { + case SND_SOC_BIAS_ON: + case SND_SOC_BIAS_PREPARE: + break; + case SND_SOC_BIAS_STANDBY: + if (snd_soc_codec_get_bias_level(codec) == SND_SOC_BIAS_OFF) + tas6424_power_on(codec); + break; + case SND_SOC_BIAS_OFF: + tas6424_power_off(codec); + break; + } + + return 0; +} + +static struct snd_soc_codec_driver soc_codec_dev_tas6424 = { + .set_bias_level = tas6424_set_bias_level, + .idle_bias_off = true, + + .component_driver = { + .controls = tas6424_snd_controls, + .num_controls = ARRAY_SIZE(tas6424_snd_controls), + .dapm_widgets = tas6424_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(tas6424_dapm_widgets), + .dapm_routes = tas6424_audio_map, + .num_dapm_routes = ARRAY_SIZE(tas6424_audio_map), + }, +}; + +static struct snd_soc_dai_ops tas6424_speaker_dai_ops = { + .hw_params = tas6424_hw_params, + .set_fmt = tas6424_set_dai_fmt, + .set_tdm_slot = tas6424_set_dai_tdm_slot, + .digital_mute = tas6424_mute, +}; + +static struct snd_soc_dai_driver tas6424_dai[] = { + { + .name = "tas6424-amplifier", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 4, + .rates = TAS6424_RATES, + .formats = TAS6424_FORMATS, + }, + .ops = &tas6424_speaker_dai_ops, + }, +}; + +static void tas6424_fault_check_work(struct work_struct *work) +{ + struct tas6424_data *tas6424 = container_of(work, struct tas6424_data, + fault_check_work.work); + struct device *dev = tas6424->dev; + unsigned int reg; + int ret; + + ret = regmap_read(tas6424->regmap, TAS6424_GLOB_FAULT1, ®); + if (ret < 0) { + dev_err(dev, "failed to read FAULT1 register: %d\n", ret); + goto out; + } + + /* + * Ignore any clock faults as there is no clean way to check for them. + * We would need to start checking for those faults *after* the SAIF + * stream has been setup, and stop checking *before* the stream is + * stopped to avoid any false-positives. However there are no + * appropriate hooks to monitor these events. + */ + reg &= TAS6424_FAULT_PVDD_OV | + TAS6424_FAULT_VBAT_OV | + TAS6424_FAULT_PVDD_UV | + TAS6424_FAULT_VBAT_UV; + + if (reg) + goto check_global_fault2_reg; + + /* + * Only flag errors once for a given occurrence. This is needed as + * the TAS6424 will take time clearing the fault condition internally + * during which we don't want to bombard the system with the same + * error message over and over. + */ + if ((reg & TAS6424_FAULT_PVDD_OV) && !(tas6424->last_fault1 & TAS6424_FAULT_PVDD_OV)) + dev_crit(dev, "experienced a PVDD overvoltage fault\n"); + + if ((reg & TAS6424_FAULT_VBAT_OV) && !(tas6424->last_fault1 & TAS6424_FAULT_VBAT_OV)) + dev_crit(dev, "experienced a VBAT overvoltage fault\n"); + + if ((reg & TAS6424_FAULT_PVDD_UV) && !(tas6424->last_fault1 & TAS6424_FAULT_PVDD_UV)) + dev_crit(dev, "experienced a PVDD undervoltage fault\n"); + + if ((reg & TAS6424_FAULT_VBAT_UV) && !(tas6424->last_fault1 & TAS6424_FAULT_VBAT_UV)) + dev_crit(dev, "experienced a VBAT undervoltage fault\n"); + + /* Store current fault1 value so we can detect any changes next time */ + tas6424->last_fault1 = reg; + +check_global_fault2_reg: + ret = regmap_read(tas6424->regmap, TAS6424_GLOB_FAULT2, ®); + if (ret < 0) { + dev_err(dev, "failed to read FAULT2 register: %d\n", ret); + goto out; + } + + reg &= TAS6424_FAULT_OTSD | + TAS6424_FAULT_OTSD_CH1 | + TAS6424_FAULT_OTSD_CH2 | + TAS6424_FAULT_OTSD_CH3 | + TAS6424_FAULT_OTSD_CH4; + + if (!reg) + goto check_warn_reg; + + if ((reg & TAS6424_FAULT_OTSD) && !(tas6424->last_fault2 & TAS6424_FAULT_OTSD)) + dev_crit(dev, "experienced a global overtemp shutdown\n"); + + if ((reg & TAS6424_FAULT_OTSD_CH1) && !(tas6424->last_fault2 & TAS6424_FAULT_OTSD_CH1)) + dev_crit(dev, "experienced an overtemp shutdown on CH1\n"); + + if ((reg & TAS6424_FAULT_OTSD_CH2) && !(tas6424->last_fault2 & TAS6424_FAULT_OTSD_CH2)) + dev_crit(dev, "experienced an overtemp shutdown on CH2\n"); + + if ((reg & TAS6424_FAULT_OTSD_CH3) && !(tas6424->last_fault2 & TAS6424_FAULT_OTSD_CH3)) + dev_crit(dev, "experienced an overtemp shutdown on CH3\n"); + + if ((reg & TAS6424_FAULT_OTSD_CH4) && !(tas6424->last_fault2 & TAS6424_FAULT_OTSD_CH4)) + dev_crit(dev, "experienced an overtemp shutdown on CH4\n"); + + /* Store current fault2 value so we can detect any changes next time */ + tas6424->last_fault2 = reg; + +check_warn_reg: + ret = regmap_read(tas6424->regmap, TAS6424_WARN, ®); + if (ret < 0) { + dev_err(dev, "failed to read WARN register: %d\n", ret); + goto out; + } + + reg &= TAS6424_WARN_VDD_UV | + TAS6424_WARN_VDD_POR | + TAS6424_WARN_VDD_OTW | + TAS6424_WARN_VDD_OTW_CH1 | + TAS6424_WARN_VDD_OTW_CH2 | + TAS6424_WARN_VDD_OTW_CH3 | + TAS6424_WARN_VDD_OTW_CH4; + + if (!reg) + goto out; + + if ((reg & TAS6424_WARN_VDD_UV) && !(tas6424->last_warn & TAS6424_WARN_VDD_UV)) + dev_warn(dev, "experienced a VDD under voltage condition\n"); + + if ((reg & TAS6424_WARN_VDD_POR) && !(tas6424->last_warn & TAS6424_WARN_VDD_POR)) + dev_warn(dev, "experienced a VDD POR condition\n"); + + if ((reg & TAS6424_WARN_VDD_OTW) && !(tas6424->last_warn & TAS6424_WARN_VDD_OTW)) + dev_warn(dev, "experienced a global overtemp warning\n"); + + if ((reg & TAS6424_WARN_VDD_OTW_CH1) && !(tas6424->last_warn & TAS6424_WARN_VDD_OTW_CH1)) + dev_warn(dev, "experienced an overtemp warning on CH1\n"); + + if ((reg & TAS6424_WARN_VDD_OTW_CH2) && !(tas6424->last_warn & TAS6424_WARN_VDD_OTW_CH2)) + dev_warn(dev, "experienced an overtemp warning on CH2\n"); + + if ((reg & TAS6424_WARN_VDD_OTW_CH3) && !(tas6424->last_warn & TAS6424_WARN_VDD_OTW_CH3)) + dev_warn(dev, "experienced an overtemp warning on CH3\n"); + + if ((reg & TAS6424_WARN_VDD_OTW_CH4) && !(tas6424->last_warn & TAS6424_WARN_VDD_OTW_CH4)) + dev_warn(dev, "experienced an overtemp warning on CH4\n"); + + /* Store current warn value so we can detect any changes next time */ + tas6424->last_warn = reg; + + /* Clear any faults by toggling the CLEAR_FAULT control bit */ + ret = regmap_write_bits(tas6424->regmap, TAS6424_MISC_CTRL3, + TAS6424_CLEAR_FAULT, TAS6424_CLEAR_FAULT); + if (ret < 0) + dev_err(dev, "failed to write MISC_CTRL3 register: %d\n", ret); + + ret = regmap_write_bits(tas6424->regmap, TAS6424_MISC_CTRL3, + TAS6424_CLEAR_FAULT, 0); + if (ret < 0) + dev_err(dev, "failed to write MISC_CTRL3 register: %d\n", ret); + +out: + /* Schedule the next fault check at the specified interval */ + schedule_delayed_work(&tas6424->fault_check_work, + msecs_to_jiffies(TAS6424_FAULT_CHECK_INTERVAL)); +} + +static const struct reg_default tas6424_reg_defaults[] = { + { TAS6424_MODE_CTRL, 0x00 }, + { TAS6424_MISC_CTRL1, 0x32 }, + { TAS6424_MISC_CTRL2, 0x62 }, + { TAS6424_SAP_CTRL, 0x04 }, + { TAS6424_CH_STATE_CTRL, 0x55 }, + { TAS6424_CH1_VOL_CTRL, 0xcf }, + { TAS6424_CH2_VOL_CTRL, 0xcf }, + { TAS6424_CH3_VOL_CTRL, 0xcf }, + { TAS6424_CH4_VOL_CTRL, 0xcf }, + { TAS6424_DC_DIAG_CTRL1, 0x00 }, + { TAS6424_DC_DIAG_CTRL2, 0x11 }, + { TAS6424_DC_DIAG_CTRL3, 0x11 }, + { TAS6424_PIN_CTRL, 0xff }, + { TAS6424_AC_DIAG_CTRL1, 0x00 }, + { TAS6424_MISC_CTRL3, 0x00 }, + { TAS6424_CLIP_CTRL, 0x01 }, + { TAS6424_CLIP_WINDOW, 0x14 }, + { TAS6424_CLIP_WARN, 0x00 }, + { TAS6424_CBC_STAT, 0x00 }, + { TAS6424_MISC_CTRL4, 0x40 }, +}; + +static bool tas6424_is_writable_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TAS6424_MODE_CTRL: + case TAS6424_MISC_CTRL1: + case TAS6424_MISC_CTRL2: + case TAS6424_SAP_CTRL: + case TAS6424_CH_STATE_CTRL: + case TAS6424_CH1_VOL_CTRL: + case TAS6424_CH2_VOL_CTRL: + case TAS6424_CH3_VOL_CTRL: + case TAS6424_CH4_VOL_CTRL: + case TAS6424_DC_DIAG_CTRL1: + case TAS6424_DC_DIAG_CTRL2: + case TAS6424_DC_DIAG_CTRL3: + case TAS6424_PIN_CTRL: + case TAS6424_AC_DIAG_CTRL1: + case TAS6424_MISC_CTRL3: + case TAS6424_CLIP_CTRL: + case TAS6424_CLIP_WINDOW: + case TAS6424_CLIP_WARN: + case TAS6424_CBC_STAT: + case TAS6424_MISC_CTRL4: + return true; + default: + return false; + } +} + +static bool tas6424_is_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case TAS6424_DC_LOAD_DIAG_REP12: + case TAS6424_DC_LOAD_DIAG_REP34: + case TAS6424_DC_LOAD_DIAG_REPLO: + case TAS6424_CHANNEL_STATE: + case TAS6424_CHANNEL_FAULT: + case TAS6424_GLOB_FAULT1: + case TAS6424_GLOB_FAULT2: + case TAS6424_WARN: + case TAS6424_AC_LOAD_DIAG_REP1: + case TAS6424_AC_LOAD_DIAG_REP2: + case TAS6424_AC_LOAD_DIAG_REP3: + case TAS6424_AC_LOAD_DIAG_REP4: + return true; + default: + return false; + } +} + +static const struct regmap_config tas6424_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + + .writeable_reg = tas6424_is_writable_reg, + .volatile_reg = tas6424_is_volatile_reg, + + .max_register = TAS6424_MAX, + .reg_defaults = tas6424_reg_defaults, + .num_reg_defaults = ARRAY_SIZE(tas6424_reg_defaults), + .cache_type = REGCACHE_RBTREE, +}; + +#if IS_ENABLED(CONFIG_OF) +static const struct of_device_id tas6424_of_ids[] = { + { .compatible = "ti,tas6424", }, + { }, +}; +MODULE_DEVICE_TABLE(of, tas6424_of_ids); +#endif + +static int tas6424_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct tas6424_data *tas6424; + int ret; + int i; + + tas6424 = devm_kzalloc(dev, sizeof(*tas6424), GFP_KERNEL); + if (!tas6424) + return -ENOMEM; + dev_set_drvdata(dev, tas6424); + + tas6424->dev = dev; + + tas6424->regmap = devm_regmap_init_i2c(client, &tas6424_regmap_config); + if (IS_ERR(tas6424->regmap)) { + ret = PTR_ERR(tas6424->regmap); + dev_err(dev, "unable to allocate register map: %d\n", ret); + return ret; + } + + for (i = 0; i < ARRAY_SIZE(tas6424->supplies); i++) + tas6424->supplies[i].supply = tas6424_supply_names[i]; + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(tas6424->supplies), + tas6424->supplies); + if (ret) { + dev_err(dev, "unable to request supplies: %d\n", ret); + return ret; + } + + ret = regulator_bulk_enable(ARRAY_SIZE(tas6424->supplies), + tas6424->supplies); + if (ret) { + dev_err(dev, "unable to enable supplies: %d\n", ret); + return ret; + } + + /* Reset device to establish well-defined startup state */ + ret = regmap_update_bits(tas6424->regmap, TAS6424_MODE_CTRL, + TAS6424_RESET, TAS6424_RESET); + if (ret) { + dev_err(dev, "unable to reset device: %d\n", ret); + return ret; + } + + INIT_DELAYED_WORK(&tas6424->fault_check_work, tas6424_fault_check_work); + + ret = snd_soc_register_codec(dev, &soc_codec_dev_tas6424, + tas6424_dai, ARRAY_SIZE(tas6424_dai)); + if (ret < 0) { + dev_err(dev, "unable to register codec: %d\n", ret); + return ret; + } + + return 0; +} + +static int tas6424_i2c_remove(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct tas6424_data *tas6424 = dev_get_drvdata(dev); + int ret; + + snd_soc_unregister_codec(dev); + + cancel_delayed_work_sync(&tas6424->fault_check_work); + + ret = regulator_bulk_disable(ARRAY_SIZE(tas6424->supplies), + tas6424->supplies); + if (ret < 0) { + dev_err(dev, "unable to disable supplies: %d\n", ret); + return ret; + } + + return 0; +} + +static const struct i2c_device_id tas6424_i2c_ids[] = { + { "tas6424", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tas6424_i2c_ids); + +static struct i2c_driver tas6424_i2c_driver = { + .driver = { + .name = "tas6424", + .of_match_table = of_match_ptr(tas6424_of_ids), + }, + .probe = tas6424_i2c_probe, + .remove = tas6424_i2c_remove, + .id_table = tas6424_i2c_ids, +}; +module_i2c_driver(tas6424_i2c_driver); + +MODULE_AUTHOR("Andreas Dannenberg "); +MODULE_AUTHOR("Andrew F. Davis "); +MODULE_DESCRIPTION("TAS6424 Audio amplifier driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/tas6424.h b/sound/soc/codecs/tas6424.h new file mode 100644 index 000000000000..430588328a06 --- /dev/null +++ b/sound/soc/codecs/tas6424.h @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ALSA SoC Texas Instruments TAS6424 Quad-Channel Audio Amplifier + * + * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Author: Andreas Dannenberg + * Andrew F. Davis + */ + +#ifndef __TAS6424_H__ +#define __TAS6424_H__ + +#define TAS6424_RATES (SNDRV_PCM_RATE_44100 | \ + SNDRV_PCM_RATE_48000 | \ + SNDRV_PCM_RATE_96000) + +#define TAS6424_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE) + +/* Register Address Map */ +#define TAS6424_MODE_CTRL 0x00 +#define TAS6424_MISC_CTRL1 0x01 +#define TAS6424_MISC_CTRL2 0x02 +#define TAS6424_SAP_CTRL 0x03 +#define TAS6424_CH_STATE_CTRL 0x04 +#define TAS6424_CH1_VOL_CTRL 0x05 +#define TAS6424_CH2_VOL_CTRL 0x06 +#define TAS6424_CH3_VOL_CTRL 0x07 +#define TAS6424_CH4_VOL_CTRL 0x08 +#define TAS6424_DC_DIAG_CTRL1 0x09 +#define TAS6424_DC_DIAG_CTRL2 0x0a +#define TAS6424_DC_DIAG_CTRL3 0x0b +#define TAS6424_DC_LOAD_DIAG_REP12 0x0c +#define TAS6424_DC_LOAD_DIAG_REP34 0x0d +#define TAS6424_DC_LOAD_DIAG_REPLO 0x0e +#define TAS6424_CHANNEL_STATE 0x0f +#define TAS6424_CHANNEL_FAULT 0x10 +#define TAS6424_GLOB_FAULT1 0x11 +#define TAS6424_GLOB_FAULT2 0x12 +#define TAS6424_WARN 0x13 +#define TAS6424_PIN_CTRL 0x14 +#define TAS6424_AC_DIAG_CTRL1 0x15 +#define TAS6424_AC_DIAG_CTRL2 0x16 +#define TAS6424_AC_LOAD_DIAG_REP1 0x17 +#define TAS6424_AC_LOAD_DIAG_REP2 0x18 +#define TAS6424_AC_LOAD_DIAG_REP3 0x19 +#define TAS6424_AC_LOAD_DIAG_REP4 0x1a +#define TAS6424_MISC_CTRL3 0x21 +#define TAS6424_CLIP_CTRL 0x22 +#define TAS6424_CLIP_WINDOW 0x23 +#define TAS6424_CLIP_WARN 0x24 +#define TAS6424_CBC_STAT 0x25 +#define TAS6424_MISC_CTRL4 0x26 +#define TAS6424_MAX TAS6424_MISC_CTRL4 + +/* TAS6424_MODE_CTRL_REG */ +#define TAS6424_RESET BIT(7) + +/* TAS6424_SAP_CTRL_REG */ +#define TAS6424_SAP_RATE_MASK GENMASK(7, 6) +#define TAS6424_SAP_RATE_44100 (0x00 << 6) +#define TAS6424_SAP_RATE_48000 (0x01 << 6) +#define TAS6424_SAP_RATE_96000 (0x02 << 6) +#define TAS6424_SAP_TDM_SLOT_LAST BIT(5) +#define TAS6424_SAP_TDM_SLOT_SZ_16 BIT(4) +#define TAS6424_SAP_TDM_SLOT_SWAP BIT(3) +#define TAS6424_SAP_FMT_MASK GENMASK(2, 0) +#define TAS6424_SAP_RIGHTJ_24 (0x00 << 0) +#define TAS6424_SAP_RIGHTJ_20 (0x01 << 0) +#define TAS6424_SAP_RIGHTJ_18 (0x02 << 0) +#define TAS6424_SAP_RIGHTJ_16 (0x03 << 0) +#define TAS6424_SAP_I2S (0x04 << 0) +#define TAS6424_SAP_LEFTJ (0x05 << 0) +#define TAS6424_SAP_DSP (0x06 << 0) + +/* TAS6424_CH_STATE_CTRL_REG */ +#define TAS6424_CH1_STATE_MASK GENMASK(7, 6) +#define TAS6424_CH1_STATE_PLAY (0x00 << 6) +#define TAS6424_CH1_STATE_HIZ (0x01 << 6) +#define TAS6424_CH1_STATE_MUTE (0x02 << 6) +#define TAS6424_CH1_STATE_DIAG (0x03 << 6) +#define TAS6424_CH2_STATE_MASK GENMASK(5, 4) +#define TAS6424_CH2_STATE_PLAY (0x00 << 4) +#define TAS6424_CH2_STATE_HIZ (0x01 << 4) +#define TAS6424_CH2_STATE_MUTE (0x02 << 4) +#define TAS6424_CH2_STATE_DIAG (0x03 << 4) +#define TAS6424_CH3_STATE_MASK GENMASK(3, 2) +#define TAS6424_CH3_STATE_PLAY (0x00 << 2) +#define TAS6424_CH3_STATE_HIZ (0x01 << 2) +#define TAS6424_CH3_STATE_MUTE (0x02 << 2) +#define TAS6424_CH3_STATE_DIAG (0x03 << 2) +#define TAS6424_CH4_STATE_MASK GENMASK(1, 0) +#define TAS6424_CH4_STATE_PLAY (0x00 << 0) +#define TAS6424_CH4_STATE_HIZ (0x01 << 0) +#define TAS6424_CH4_STATE_MUTE (0x02 << 0) +#define TAS6424_CH4_STATE_DIAG (0x03 << 0) +#define TAS6424_ALL_STATE_PLAY (TAS6424_CH1_STATE_PLAY | \ + TAS6424_CH2_STATE_PLAY | \ + TAS6424_CH3_STATE_PLAY | \ + TAS6424_CH4_STATE_PLAY) +#define TAS6424_ALL_STATE_HIZ (TAS6424_CH1_STATE_HIZ | \ + TAS6424_CH2_STATE_HIZ | \ + TAS6424_CH3_STATE_HIZ | \ + TAS6424_CH4_STATE_HIZ) +#define TAS6424_ALL_STATE_MUTE (TAS6424_CH1_STATE_MUTE | \ + TAS6424_CH2_STATE_MUTE | \ + TAS6424_CH3_STATE_MUTE | \ + TAS6424_CH4_STATE_MUTE) +#define TAS6424_ALL_STATE_DIAG (TAS6424_CH1_STATE_DIAG | \ + TAS6424_CH2_STATE_DIAG | \ + TAS6424_CH3_STATE_DIAG | \ + TAS6424_CH4_STATE_DIAG) + +/* TAS6424_GLOB_FAULT1_REG */ +#define TAS6424_FAULT_CLOCK BIT(4) +#define TAS6424_FAULT_PVDD_OV BIT(3) +#define TAS6424_FAULT_VBAT_OV BIT(2) +#define TAS6424_FAULT_PVDD_UV BIT(1) +#define TAS6424_FAULT_VBAT_UV BIT(0) + +/* TAS6424_GLOB_FAULT2_REG */ +#define TAS6424_FAULT_OTSD BIT(4) +#define TAS6424_FAULT_OTSD_CH1 BIT(3) +#define TAS6424_FAULT_OTSD_CH2 BIT(2) +#define TAS6424_FAULT_OTSD_CH3 BIT(1) +#define TAS6424_FAULT_OTSD_CH4 BIT(0) + +/* TAS6424_WARN_REG */ +#define TAS6424_WARN_VDD_UV BIT(6) +#define TAS6424_WARN_VDD_POR BIT(5) +#define TAS6424_WARN_VDD_OTW BIT(4) +#define TAS6424_WARN_VDD_OTW_CH1 BIT(3) +#define TAS6424_WARN_VDD_OTW_CH2 BIT(2) +#define TAS6424_WARN_VDD_OTW_CH3 BIT(1) +#define TAS6424_WARN_VDD_OTW_CH4 BIT(0) + +/* TAS6424_MISC_CTRL3_REG */ +#define TAS6424_CLEAR_FAULT BIT(7) +#define TAS6424_PBTL_CH_SEL BIT(6) +#define TAS6424_MASK_CBC_WARN BIT(5) +#define TAS6424_MASK_VDD_UV BIT(4) +#define TAS6424_OTSD_AUTO_RECOVERY BIT(3) + +#endif /* __TAS6424_H__ */ -- cgit v1.2.3 From cfe17c9bbe6a673fdafdab179c32b355ed447f66 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Nov 2017 21:15:13 +0900 Subject: kbuild: move cc-option and cc-disable-warning after incl. arch Makefile Geert reported commit ae6b289a3789 ("kbuild: Set KBUILD_CFLAGS before incl. arch Makefile") broke cross-compilation using a cross-compiler that supports less compiler options than the host compiler. For example, cc1: error: unrecognized command line option "-Wno-unused-but-set-variable" This problem happens on architectures that setup CROSS_COMPILE in their arch/*/Makefile. Move the cc-option and cc-disable-warning back to the original position, but keep the Clang target options untouched. Fixes: ae6b289a3789 ("kbuild: Set KBUILD_CFLAGS before incl. arch Makefile") Reported-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada Tested-by: Geert Uytterhoeven --- Makefile | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index c988e46a53cd..477c4cf01cae 100644 --- a/Makefile +++ b/Makefile @@ -484,26 +484,6 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -# Quiet clang warning: comparison of unsigned expression < 0 is always false -KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) -# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -# source of a reference will be _MergedGlobals and not on of the whitelisted names. -# See modpost pattern 2 -KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) -else - -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.extrawarn) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif ifeq ($(config-targets),1) @@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(cc-name),clang) +KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Quiet clang warning: comparison of unsigned expression < 0 is always false +KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) +# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the +# source of a reference will be _MergedGlobals and not on of the whitelisted names. +# See modpost pattern 2 +KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) +KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +else + +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.extrawarn) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) +endif + ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else -- cgit v1.2.3 From 993a3450712b2a723689b6b6b1a7fe6fe053708e Mon Sep 17 00:00:00 2001 From: Andreas Dannenberg Date: Tue, 5 Dec 2017 14:52:56 -0600 Subject: ASoC: pcm186x: Add initial PCM1862/63/64/65 universal ADC driver This is an initial version of the PCM186x codec driver supporting both 2-channel and 4-channel device variants. Not all device features are supported yet such as master/slave mode PLL configuration for which the codec driver currently relies on the PCM186x built-in clock auto-detection feature or the connection of digital microphones. However here is what's here and what should work: - Support for SPI and I2C low-level interfaces - Regmap support and basic register definitions - Input Mixer and Mux selection - I2C, LJ, and TDM DAI format support Signed-off-by: Andreas Dannenberg Signed-off-by: Michael Stecklein Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 17 + sound/soc/codecs/Makefile | 6 + sound/soc/codecs/pcm186x-i2c.c | 69 ++++ sound/soc/codecs/pcm186x-spi.c | 69 ++++ sound/soc/codecs/pcm186x.c | 719 +++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/pcm186x.h | 220 +++++++++++++ 6 files changed, 1100 insertions(+) create mode 100644 sound/soc/codecs/pcm186x-i2c.c create mode 100644 sound/soc/codecs/pcm186x-spi.c create mode 100644 sound/soc/codecs/pcm186x.c create mode 100644 sound/soc/codecs/pcm186x.h diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..dda8c01170b3 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -109,6 +109,8 @@ config SND_SOC_ALL_CODECS select SND_SOC_PCM1681 if I2C select SND_SOC_PCM179X_I2C if I2C select SND_SOC_PCM179X_SPI if SPI_MASTER + select SND_SOC_PCM186X_I2C if I2C + select SND_SOC_PCM186X_SPI if SPI_MASTER select SND_SOC_PCM3008 select SND_SOC_PCM3168A_I2C if I2C select SND_SOC_PCM3168A_SPI if SPI_MASTER @@ -661,6 +663,21 @@ config SND_SOC_PCM179X_SPI Enable support for Texas Instruments PCM179x CODEC. Select this if your PCM179x is connected via an SPI bus. +config SND_SOC_PCM186X + tristate + +config SND_SOC_PCM186X_I2C + tristate "Texas Instruments PCM186x CODECs - I2C" + depends on I2C + select SND_SOC_PCM186X + select REGMAP_I2C + +config SND_SOC_PCM186X_SPI + tristate "Texas Instruments PCM186x CODECs - SPI" + depends on SPI_MASTER + select SND_SOC_PCM186X + select REGMAP_SPI + config SND_SOC_PCM3008 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 0001069ce2a7..146e48a60098 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -105,6 +105,9 @@ snd-soc-pcm1681-objs := pcm1681.o snd-soc-pcm179x-codec-objs := pcm179x.o snd-soc-pcm179x-i2c-objs := pcm179x-i2c.o snd-soc-pcm179x-spi-objs := pcm179x-spi.o +snd-soc-pcm186x-objs := pcm186x.o +snd-soc-pcm186x-i2c-objs := pcm186x-i2c.o +snd-soc-pcm186x-spi-objs := pcm186x-spi.o snd-soc-pcm3008-objs := pcm3008.o snd-soc-pcm3168a-objs := pcm3168a.o snd-soc-pcm3168a-i2c-objs := pcm3168a-i2c.o @@ -345,6 +348,9 @@ obj-$(CONFIG_SND_SOC_PCM1681) += snd-soc-pcm1681.o obj-$(CONFIG_SND_SOC_PCM179X) += snd-soc-pcm179x-codec.o obj-$(CONFIG_SND_SOC_PCM179X_I2C) += snd-soc-pcm179x-i2c.o obj-$(CONFIG_SND_SOC_PCM179X_SPI) += snd-soc-pcm179x-spi.o +obj-$(CONFIG_SND_SOC_PCM186X) += snd-soc-pcm186x.o +obj-$(CONFIG_SND_SOC_PCM186X_I2C) += snd-soc-pcm186x-i2c.o +obj-$(CONFIG_SND_SOC_PCM186X_SPI) += snd-soc-pcm186x-spi.o obj-$(CONFIG_SND_SOC_PCM3008) += snd-soc-pcm3008.o obj-$(CONFIG_SND_SOC_PCM3168A) += snd-soc-pcm3168a.o obj-$(CONFIG_SND_SOC_PCM3168A_I2C) += snd-soc-pcm3168a-i2c.o diff --git a/sound/soc/codecs/pcm186x-i2c.c b/sound/soc/codecs/pcm186x-i2c.c new file mode 100644 index 000000000000..543621232d60 --- /dev/null +++ b/sound/soc/codecs/pcm186x-i2c.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Texas Instruments PCM186x Universal Audio ADC - I2C + * + * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com + * Andreas Dannenberg + * Andrew F. Davis + */ + +#include +#include +#include + +#include "pcm186x.h" + +static const struct of_device_id pcm186x_of_match[] = { + { .compatible = "ti,pcm1862", .data = (void *)PCM1862 }, + { .compatible = "ti,pcm1863", .data = (void *)PCM1863 }, + { .compatible = "ti,pcm1864", .data = (void *)PCM1864 }, + { .compatible = "ti,pcm1865", .data = (void *)PCM1865 }, + { } +}; +MODULE_DEVICE_TABLE(of, pcm186x_of_match); + +static int pcm186x_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + const enum pcm186x_type type = (enum pcm186x_type)id->driver_data; + int irq = i2c->irq; + struct regmap *regmap; + + regmap = devm_regmap_init_i2c(i2c, &pcm186x_regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcm186x_probe(&i2c->dev, type, irq, regmap); +} + +static int pcm186x_i2c_remove(struct i2c_client *i2c) +{ + pcm186x_remove(&i2c->dev); + + return 0; +} + +static const struct i2c_device_id pcm186x_i2c_id[] = { + { "pcm1862", PCM1862 }, + { "pcm1863", PCM1863 }, + { "pcm1864", PCM1864 }, + { "pcm1865", PCM1865 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, pcm186x_i2c_id); + +static struct i2c_driver pcm186x_i2c_driver = { + .probe = pcm186x_i2c_probe, + .remove = pcm186x_i2c_remove, + .id_table = pcm186x_i2c_id, + .driver = { + .name = "pcm186x", + .of_match_table = pcm186x_of_match, + }, +}; +module_i2c_driver(pcm186x_i2c_driver); + +MODULE_AUTHOR("Andreas Dannenberg "); +MODULE_AUTHOR("Andrew F. Davis "); +MODULE_DESCRIPTION("PCM186x Universal Audio ADC I2C Interface Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm186x-spi.c b/sound/soc/codecs/pcm186x-spi.c new file mode 100644 index 000000000000..2366f8e4d4d4 --- /dev/null +++ b/sound/soc/codecs/pcm186x-spi.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Texas Instruments PCM186x Universal Audio ADC - SPI + * + * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com + * Andreas Dannenberg + * Andrew F. Davis + */ + +#include +#include +#include + +#include "pcm186x.h" + +static const struct of_device_id pcm186x_of_match[] = { + { .compatible = "ti,pcm1862", .data = (void *)PCM1862 }, + { .compatible = "ti,pcm1863", .data = (void *)PCM1863 }, + { .compatible = "ti,pcm1864", .data = (void *)PCM1864 }, + { .compatible = "ti,pcm1865", .data = (void *)PCM1865 }, + { } +}; +MODULE_DEVICE_TABLE(of, pcm186x_of_match); + +static int pcm186x_spi_probe(struct spi_device *spi) +{ + const enum pcm186x_type type = + (enum pcm186x_type)spi_get_device_id(spi)->driver_data; + int irq = spi->irq; + struct regmap *regmap; + + regmap = devm_regmap_init_spi(spi, &pcm186x_regmap); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + return pcm186x_probe(&spi->dev, type, irq, regmap); +} + +static int pcm186x_spi_remove(struct spi_device *spi) +{ + pcm186x_remove(&spi->dev); + + return 0; +} + +static const struct spi_device_id pcm186x_spi_id[] = { + { "pcm1862", PCM1862 }, + { "pcm1863", PCM1863 }, + { "pcm1864", PCM1864 }, + { "pcm1865", PCM1865 }, + { } +}; +MODULE_DEVICE_TABLE(spi, pcm186x_spi_id); + +static struct spi_driver pcm186x_spi_driver = { + .probe = pcm186x_spi_probe, + .remove = pcm186x_spi_remove, + .id_table = pcm186x_spi_id, + .driver = { + .name = "pcm186x", + .of_match_table = pcm186x_of_match, + }, +}; +module_spi_driver(pcm186x_spi_driver); + +MODULE_AUTHOR("Andreas Dannenberg "); +MODULE_AUTHOR("Andrew F. Davis "); +MODULE_DESCRIPTION("PCM186x Universal Audio ADC SPI Interface Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm186x.c b/sound/soc/codecs/pcm186x.c new file mode 100644 index 000000000000..f7aa56e20169 --- /dev/null +++ b/sound/soc/codecs/pcm186x.c @@ -0,0 +1,719 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Texas Instruments PCM186x Universal Audio ADC + * + * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com + * Andreas Dannenberg + * Andrew F. Davis + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcm186x.h" + +static const char * const pcm186x_supply_names[] = { + "avdd", /* Analog power supply. Connect to 3.3-V supply. */ + "dvdd", /* Digital power supply. Connect to 3.3-V supply. */ + "iovdd", /* I/O power supply. Connect to 3.3-V or 1.8-V. */ +}; +#define PCM186x_NUM_SUPPLIES ARRAY_SIZE(pcm186x_supply_names) + +struct pcm186x_priv { + struct regmap *regmap; + struct regulator_bulk_data supplies[PCM186x_NUM_SUPPLIES]; + unsigned int sysclk; + unsigned int tdm_offset; + bool is_tdm_mode; + bool is_master_mode; +}; + +static const DECLARE_TLV_DB_SCALE(pcm186x_pga_tlv, -1200, 4000, 50); + +static const struct snd_kcontrol_new pcm1863_snd_controls[] = { + SOC_DOUBLE_R_S_TLV("ADC Capture Volume", PCM186X_PGA_VAL_CH1_L, + PCM186X_PGA_VAL_CH1_R, 0, -24, 80, 7, 0, + pcm186x_pga_tlv), +}; + +static const struct snd_kcontrol_new pcm1865_snd_controls[] = { + SOC_DOUBLE_R_S_TLV("ADC1 Capture Volume", PCM186X_PGA_VAL_CH1_L, + PCM186X_PGA_VAL_CH1_R, 0, -24, 80, 7, 0, + pcm186x_pga_tlv), + SOC_DOUBLE_R_S_TLV("ADC2 Capture Volume", PCM186X_PGA_VAL_CH2_L, + PCM186X_PGA_VAL_CH2_R, 0, -24, 80, 7, 0, + pcm186x_pga_tlv), +}; + +const unsigned int pcm186x_adc_input_channel_sel_value[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x20, 0x30 +}; + +static const char * const pcm186x_adcl_input_channel_sel_text[] = { + "No Select", + "VINL1[SE]", /* Default for ADC1L */ + "VINL2[SE]", /* Default for ADC2L */ + "VINL2[SE] + VINL1[SE]", + "VINL3[SE]", + "VINL3[SE] + VINL1[SE]", + "VINL3[SE] + VINL2[SE]", + "VINL3[SE] + VINL2[SE] + VINL1[SE]", + "VINL4[SE]", + "VINL4[SE] + VINL1[SE]", + "VINL4[SE] + VINL2[SE]", + "VINL4[SE] + VINL2[SE] + VINL1[SE]", + "VINL4[SE] + VINL3[SE]", + "VINL4[SE] + VINL3[SE] + VINL1[SE]", + "VINL4[SE] + VINL3[SE] + VINL2[SE]", + "VINL4[SE] + VINL3[SE] + VINL2[SE] + VINL1[SE]", + "{VIN1P, VIN1M}[DIFF]", + "{VIN4P, VIN4M}[DIFF]", + "{VIN1P, VIN1M}[DIFF] + {VIN4P, VIN4M}[DIFF]" +}; + +static const char * const pcm186x_adcr_input_channel_sel_text[] = { + "No Select", + "VINR1[SE]", /* Default for ADC1R */ + "VINR2[SE]", /* Default for ADC2R */ + "VINR2[SE] + VINR1[SE]", + "VINR3[SE]", + "VINR3[SE] + VINR1[SE]", + "VINR3[SE] + VINR2[SE]", + "VINR3[SE] + VINR2[SE] + VINR1[SE]", + "VINR4[SE]", + "VINR4[SE] + VINR1[SE]", + "VINR4[SE] + VINR2[SE]", + "VINR4[SE] + VINR2[SE] + VINR1[SE]", + "VINR4[SE] + VINR3[SE]", + "VINR4[SE] + VINR3[SE] + VINR1[SE]", + "VINR4[SE] + VINR3[SE] + VINR2[SE]", + "VINR4[SE] + VINR3[SE] + VINR2[SE] + VINR1[SE]", + "{VIN2P, VIN2M}[DIFF]", + "{VIN3P, VIN3M}[DIFF]", + "{VIN2P, VIN2M}[DIFF] + {VIN3P, VIN3M}[DIFF]" +}; + +static const struct soc_enum pcm186x_adc_input_channel_sel[] = { + SOC_VALUE_ENUM_SINGLE(PCM186X_ADC1_INPUT_SEL_L, 0, + PCM186X_ADC_INPUT_SEL_MASK, + ARRAY_SIZE(pcm186x_adcl_input_channel_sel_text), + pcm186x_adcl_input_channel_sel_text, + pcm186x_adc_input_channel_sel_value), + SOC_VALUE_ENUM_SINGLE(PCM186X_ADC1_INPUT_SEL_R, 0, + PCM186X_ADC_INPUT_SEL_MASK, + ARRAY_SIZE(pcm186x_adcr_input_channel_sel_text), + pcm186x_adcr_input_channel_sel_text, + pcm186x_adc_input_channel_sel_value), + SOC_VALUE_ENUM_SINGLE(PCM186X_ADC2_INPUT_SEL_L, 0, + PCM186X_ADC_INPUT_SEL_MASK, + ARRAY_SIZE(pcm186x_adcl_input_channel_sel_text), + pcm186x_adcl_input_channel_sel_text, + pcm186x_adc_input_channel_sel_value), + SOC_VALUE_ENUM_SINGLE(PCM186X_ADC2_INPUT_SEL_R, 0, + PCM186X_ADC_INPUT_SEL_MASK, + ARRAY_SIZE(pcm186x_adcr_input_channel_sel_text), + pcm186x_adcr_input_channel_sel_text, + pcm186x_adc_input_channel_sel_value), +}; + +static const struct snd_kcontrol_new pcm186x_adc_mux_controls[] = { + SOC_DAPM_ENUM("ADC1 Left Input", pcm186x_adc_input_channel_sel[0]), + SOC_DAPM_ENUM("ADC1 Right Input", pcm186x_adc_input_channel_sel[1]), + SOC_DAPM_ENUM("ADC2 Left Input", pcm186x_adc_input_channel_sel[2]), + SOC_DAPM_ENUM("ADC2 Right Input", pcm186x_adc_input_channel_sel[3]), +}; + +static const struct snd_soc_dapm_widget pcm1863_dapm_widgets[] = { + SND_SOC_DAPM_INPUT("VINL1"), + SND_SOC_DAPM_INPUT("VINR1"), + SND_SOC_DAPM_INPUT("VINL2"), + SND_SOC_DAPM_INPUT("VINR2"), + SND_SOC_DAPM_INPUT("VINL3"), + SND_SOC_DAPM_INPUT("VINR3"), + SND_SOC_DAPM_INPUT("VINL4"), + SND_SOC_DAPM_INPUT("VINR4"), + + SND_SOC_DAPM_MUX("ADC Left Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[0]), + SND_SOC_DAPM_MUX("ADC Right Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[1]), + + /* + * Put the codec into SLEEP mode when not in use, allowing the + * Energysense mechanism to operate. + */ + SND_SOC_DAPM_ADC("ADC", "HiFi Capture", PCM186X_POWER_CTRL, 1, 0), +}; + +static const struct snd_soc_dapm_widget pcm1865_dapm_widgets[] = { + SND_SOC_DAPM_INPUT("VINL1"), + SND_SOC_DAPM_INPUT("VINR1"), + SND_SOC_DAPM_INPUT("VINL2"), + SND_SOC_DAPM_INPUT("VINR2"), + SND_SOC_DAPM_INPUT("VINL3"), + SND_SOC_DAPM_INPUT("VINR3"), + SND_SOC_DAPM_INPUT("VINL4"), + SND_SOC_DAPM_INPUT("VINR4"), + + SND_SOC_DAPM_MUX("ADC1 Left Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[0]), + SND_SOC_DAPM_MUX("ADC1 Right Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[1]), + SND_SOC_DAPM_MUX("ADC2 Left Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[2]), + SND_SOC_DAPM_MUX("ADC2 Right Capture Source", SND_SOC_NOPM, 0, 0, + &pcm186x_adc_mux_controls[3]), + + /* + * Put the codec into SLEEP mode when not in use, allowing the + * Energysense mechanism to operate. + */ + SND_SOC_DAPM_ADC("ADC1", "HiFi Capture 1", PCM186X_POWER_CTRL, 1, 0), + SND_SOC_DAPM_ADC("ADC2", "HiFi Capture 2", PCM186X_POWER_CTRL, 1, 0), +}; + +static const struct snd_soc_dapm_route pcm1863_dapm_routes[] = { + { "ADC Left Capture Source", NULL, "VINL1" }, + { "ADC Left Capture Source", NULL, "VINR1" }, + { "ADC Left Capture Source", NULL, "VINL2" }, + { "ADC Left Capture Source", NULL, "VINR2" }, + { "ADC Left Capture Source", NULL, "VINL3" }, + { "ADC Left Capture Source", NULL, "VINR3" }, + { "ADC Left Capture Source", NULL, "VINL4" }, + { "ADC Left Capture Source", NULL, "VINR4" }, + + { "ADC", NULL, "ADC Left Capture Source" }, + + { "ADC Right Capture Source", NULL, "VINL1" }, + { "ADC Right Capture Source", NULL, "VINR1" }, + { "ADC Right Capture Source", NULL, "VINL2" }, + { "ADC Right Capture Source", NULL, "VINR2" }, + { "ADC Right Capture Source", NULL, "VINL3" }, + { "ADC Right Capture Source", NULL, "VINR3" }, + { "ADC Right Capture Source", NULL, "VINL4" }, + { "ADC Right Capture Source", NULL, "VINR4" }, + + { "ADC", NULL, "ADC Right Capture Source" }, +}; + +static const struct snd_soc_dapm_route pcm1865_dapm_routes[] = { + { "ADC1 Left Capture Source", NULL, "VINL1" }, + { "ADC1 Left Capture Source", NULL, "VINR1" }, + { "ADC1 Left Capture Source", NULL, "VINL2" }, + { "ADC1 Left Capture Source", NULL, "VINR2" }, + { "ADC1 Left Capture Source", NULL, "VINL3" }, + { "ADC1 Left Capture Source", NULL, "VINR3" }, + { "ADC1 Left Capture Source", NULL, "VINL4" }, + { "ADC1 Left Capture Source", NULL, "VINR4" }, + + { "ADC1", NULL, "ADC1 Left Capture Source" }, + + { "ADC1 Right Capture Source", NULL, "VINL1" }, + { "ADC1 Right Capture Source", NULL, "VINR1" }, + { "ADC1 Right Capture Source", NULL, "VINL2" }, + { "ADC1 Right Capture Source", NULL, "VINR2" }, + { "ADC1 Right Capture Source", NULL, "VINL3" }, + { "ADC1 Right Capture Source", NULL, "VINR3" }, + { "ADC1 Right Capture Source", NULL, "VINL4" }, + { "ADC1 Right Capture Source", NULL, "VINR4" }, + + { "ADC1", NULL, "ADC1 Right Capture Source" }, + + { "ADC2 Left Capture Source", NULL, "VINL1" }, + { "ADC2 Left Capture Source", NULL, "VINR1" }, + { "ADC2 Left Capture Source", NULL, "VINL2" }, + { "ADC2 Left Capture Source", NULL, "VINR2" }, + { "ADC2 Left Capture Source", NULL, "VINL3" }, + { "ADC2 Left Capture Source", NULL, "VINR3" }, + { "ADC2 Left Capture Source", NULL, "VINL4" }, + { "ADC2 Left Capture Source", NULL, "VINR4" }, + + { "ADC2", NULL, "ADC2 Left Capture Source" }, + + { "ADC2 Right Capture Source", NULL, "VINL1" }, + { "ADC2 Right Capture Source", NULL, "VINR1" }, + { "ADC2 Right Capture Source", NULL, "VINL2" }, + { "ADC2 Right Capture Source", NULL, "VINR2" }, + { "ADC2 Right Capture Source", NULL, "VINL3" }, + { "ADC2 Right Capture Source", NULL, "VINR3" }, + { "ADC2 Right Capture Source", NULL, "VINL4" }, + { "ADC2 Right Capture Source", NULL, "VINR4" }, + + { "ADC2", NULL, "ADC2 Right Capture Source" }, +}; + +static int pcm186x_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + unsigned int rate = params_rate(params); + unsigned int format = params_format(params); + unsigned int width = params_width(params); + unsigned int channels = params_channels(params); + unsigned int div_lrck; + unsigned int div_bck; + u8 tdm_tx_sel = 0; + u8 pcm_cfg = 0; + + dev_dbg(codec->dev, "%s() rate=%u format=0x%x width=%u channels=%u\n", + __func__, rate, format, width, channels); + + switch (width) { + case 16: + pcm_cfg = PCM186X_PCM_CFG_RX_WLEN_16 << + PCM186X_PCM_CFG_RX_WLEN_SHIFT | + PCM186X_PCM_CFG_TX_WLEN_16 << + PCM186X_PCM_CFG_TX_WLEN_SHIFT; + break; + case 20: + pcm_cfg = PCM186X_PCM_CFG_RX_WLEN_20 << + PCM186X_PCM_CFG_RX_WLEN_SHIFT | + PCM186X_PCM_CFG_TX_WLEN_20 << + PCM186X_PCM_CFG_TX_WLEN_SHIFT; + break; + case 24: + pcm_cfg = PCM186X_PCM_CFG_RX_WLEN_24 << + PCM186X_PCM_CFG_RX_WLEN_SHIFT | + PCM186X_PCM_CFG_TX_WLEN_24 << + PCM186X_PCM_CFG_TX_WLEN_SHIFT; + break; + case 32: + pcm_cfg = PCM186X_PCM_CFG_RX_WLEN_32 << + PCM186X_PCM_CFG_RX_WLEN_SHIFT | + PCM186X_PCM_CFG_TX_WLEN_32 << + PCM186X_PCM_CFG_TX_WLEN_SHIFT; + break; + default: + return -EINVAL; + } + + snd_soc_update_bits(codec, PCM186X_PCM_CFG, + PCM186X_PCM_CFG_RX_WLEN_MASK | + PCM186X_PCM_CFG_TX_WLEN_MASK, + pcm_cfg); + + div_lrck = width * channels; + + if (priv->is_tdm_mode) { + /* Select TDM transmission data */ + switch (channels) { + case 2: + tdm_tx_sel = PCM186X_TDM_TX_SEL_2CH; + break; + case 4: + tdm_tx_sel = PCM186X_TDM_TX_SEL_4CH; + break; + case 6: + tdm_tx_sel = PCM186X_TDM_TX_SEL_6CH; + break; + default: + return -EINVAL; + } + + snd_soc_update_bits(codec, PCM186X_TDM_TX_SEL, + PCM186X_TDM_TX_SEL_MASK, tdm_tx_sel); + + /* In DSP/TDM mode, the LRCLK divider must be 256 */ + div_lrck = 256; + + /* Configure 1/256 duty cycle for LRCK */ + snd_soc_update_bits(codec, PCM186X_PCM_CFG, + PCM186X_PCM_CFG_TDM_LRCK_MODE, + PCM186X_PCM_CFG_TDM_LRCK_MODE); + } + + /* Only configure clock dividers in master mode. */ + if (priv->is_master_mode) { + div_bck = priv->sysclk / (div_lrck * rate); + + dev_dbg(codec->dev, + "%s() master_clk=%u div_bck=%u div_lrck=%u\n", + __func__, priv->sysclk, div_bck, div_lrck); + + snd_soc_write(codec, PCM186X_BCK_DIV, div_bck - 1); + snd_soc_write(codec, PCM186X_LRK_DIV, div_lrck - 1); + } + + return 0; +} + +static int pcm186x_set_fmt(struct snd_soc_dai *dai, unsigned int format) +{ + struct snd_soc_codec *codec = dai->codec; + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + u8 clk_ctrl = 0; + u8 pcm_cfg = 0; + + dev_dbg(codec->dev, "%s() format=0x%x\n", __func__, format); + + /* set master/slave audio interface */ + switch (format & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + if (!priv->sysclk) { + dev_err(codec->dev, "operating in master mode requires sysclock to be configured\n"); + return -EINVAL; + } + clk_ctrl |= PCM186X_CLK_CTRL_MST_MODE; + priv->is_master_mode = true; + break; + case SND_SOC_DAIFMT_CBS_CFS: + priv->is_master_mode = false; + break; + default: + dev_err(codec->dev, "Invalid DAI master/slave interface\n"); + return -EINVAL; + } + + /* set interface polarity */ + switch (format & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + default: + dev_err(codec->dev, "Inverted DAI clocks not supported\n"); + return -EINVAL; + } + + /* set interface format */ + switch (format & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + pcm_cfg = PCM186X_PCM_CFG_FMT_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + pcm_cfg = PCM186X_PCM_CFG_FMT_LEFTJ; + break; + case SND_SOC_DAIFMT_DSP_A: + priv->tdm_offset += 1; + /* Fall through... DSP_A uses the same basic config as DSP_B + * except we need to shift the TDM output by one BCK cycle + */ + case SND_SOC_DAIFMT_DSP_B: + priv->is_tdm_mode = true; + pcm_cfg = PCM186X_PCM_CFG_FMT_TDM; + break; + default: + dev_err(codec->dev, "Invalid DAI format\n"); + return -EINVAL; + } + + snd_soc_update_bits(codec, PCM186X_CLK_CTRL, + PCM186X_CLK_CTRL_MST_MODE, clk_ctrl); + + snd_soc_write(codec, PCM186X_TDM_TX_OFFSET, priv->tdm_offset); + + snd_soc_update_bits(codec, PCM186X_PCM_CFG, + PCM186X_PCM_CFG_FMT_MASK, pcm_cfg); + + return 0; +} + +static int pcm186x_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, + unsigned int rx_mask, int slots, int slot_width) +{ + struct snd_soc_codec *codec = dai->codec; + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + unsigned int first_slot, last_slot, tdm_offset; + + dev_dbg(codec->dev, + "%s() tx_mask=0x%x rx_mask=0x%x slots=%d slot_width=%d\n", + __func__, tx_mask, rx_mask, slots, slot_width); + + if (!tx_mask) { + dev_err(codec->dev, "tdm tx mask must not be 0\n"); + return -EINVAL; + } + + first_slot = __ffs(tx_mask); + last_slot = __fls(tx_mask); + + if (last_slot - first_slot != hweight32(tx_mask) - 1) { + dev_err(codec->dev, "tdm tx mask must be contiguous\n"); + return -EINVAL; + } + + tdm_offset = first_slot * slot_width; + + if (tdm_offset > 255) { + dev_err(codec->dev, "tdm tx slot selection out of bounds\n"); + return -EINVAL; + } + + priv->tdm_offset = tdm_offset; + + return 0; +} + +static int pcm186x_set_dai_sysclk(struct snd_soc_dai *dai, int clk_id, + unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = dai->codec; + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + + dev_dbg(codec->dev, "%s() clk_id=%d freq=%u dir=%d\n", + __func__, clk_id, freq, dir); + + priv->sysclk = freq; + + return 0; +} + +const struct snd_soc_dai_ops pcm186x_dai_ops = { + .set_sysclk = pcm186x_set_dai_sysclk, + .set_tdm_slot = pcm186x_set_tdm_slot, + .set_fmt = pcm186x_set_fmt, + .hw_params = pcm186x_hw_params, +}; + +static struct snd_soc_dai_driver pcm1863_dai = { + .name = "pcm1863-aif", + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = PCM186X_RATES, + .formats = PCM186X_FORMATS, + }, + .ops = &pcm186x_dai_ops, +}; + +static struct snd_soc_dai_driver pcm1865_dai = { + .name = "pcm1865-aif", + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 4, + .rates = PCM186X_RATES, + .formats = PCM186X_FORMATS, + }, + .ops = &pcm186x_dai_ops, +}; + +static int pcm186x_power_on(struct snd_soc_codec *codec) +{ + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + int ret = 0; + + ret = regulator_bulk_enable(ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret) + return ret; + + regcache_cache_only(priv->regmap, false); + ret = regcache_sync(priv->regmap); + if (ret) { + dev_err(codec->dev, "Failed to restore cache\n"); + regcache_cache_only(priv->regmap, true); + regulator_bulk_disable(ARRAY_SIZE(priv->supplies), + priv->supplies); + return ret; + } + + snd_soc_update_bits(codec, PCM186X_POWER_CTRL, + PCM186X_PWR_CTRL_PWRDN, 0); + + return 0; +} + +static int pcm186x_power_off(struct snd_soc_codec *codec) +{ + struct pcm186x_priv *priv = snd_soc_codec_get_drvdata(codec); + int ret; + + snd_soc_update_bits(codec, PCM186X_POWER_CTRL, + PCM186X_PWR_CTRL_PWRDN, PCM186X_PWR_CTRL_PWRDN); + + regcache_cache_only(priv->regmap, true); + + ret = regulator_bulk_disable(ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret) + return ret; + + return 0; +} + +static int pcm186x_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + dev_dbg(codec->dev, "## %s: %d -> %d\n", __func__, + snd_soc_codec_get_bias_level(codec), level); + + switch (level) { + case SND_SOC_BIAS_ON: + break; + case SND_SOC_BIAS_PREPARE: + break; + case SND_SOC_BIAS_STANDBY: + if (snd_soc_codec_get_bias_level(codec) == SND_SOC_BIAS_OFF) + pcm186x_power_on(codec); + break; + case SND_SOC_BIAS_OFF: + pcm186x_power_off(codec); + break; + } + + return 0; +} + +static struct snd_soc_codec_driver soc_codec_dev_pcm1863 = { + .set_bias_level = pcm186x_set_bias_level, + + .component_driver = { + .controls = pcm1863_snd_controls, + .num_controls = ARRAY_SIZE(pcm1863_snd_controls), + .dapm_widgets = pcm1863_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(pcm1863_dapm_widgets), + .dapm_routes = pcm1863_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(pcm1863_dapm_routes), + }, +}; + +static struct snd_soc_codec_driver soc_codec_dev_pcm1865 = { + .set_bias_level = pcm186x_set_bias_level, + .suspend_bias_off = true, + + .component_driver = { + .controls = pcm1865_snd_controls, + .num_controls = ARRAY_SIZE(pcm1865_snd_controls), + .dapm_widgets = pcm1865_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(pcm1865_dapm_widgets), + .dapm_routes = pcm1865_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(pcm1865_dapm_routes), + }, +}; + +static bool pcm186x_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case PCM186X_PAGE: + case PCM186X_DEVICE_STATUS: + case PCM186X_FSAMPLE_STATUS: + case PCM186X_DIV_STATUS: + case PCM186X_CLK_STATUS: + case PCM186X_SUPPLY_STATUS: + case PCM186X_MMAP_STAT_CTRL: + case PCM186X_MMAP_ADDRESS: + return true; + } + + return false; +} + +static const struct regmap_range_cfg pcm186x_range = { + .name = "Pages", + .range_max = PCM186X_MAX_REGISTER, + .selector_reg = PCM186X_PAGE, + .selector_mask = 0xff, + .window_len = PCM186X_PAGE_LEN, +}; + +const struct regmap_config pcm186x_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .volatile_reg = pcm186x_volatile, + + .ranges = &pcm186x_range, + .num_ranges = 1, + + .max_register = PCM186X_MAX_REGISTER, + + .cache_type = REGCACHE_RBTREE, +}; +EXPORT_SYMBOL_GPL(pcm186x_regmap); + +int pcm186x_probe(struct device *dev, enum pcm186x_type type, int irq, + struct regmap *regmap) +{ + struct pcm186x_priv *priv; + int i, ret; + + priv = devm_kzalloc(dev, sizeof(struct pcm186x_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + dev_set_drvdata(dev, priv); + priv->regmap = regmap; + + for (i = 0; i < ARRAY_SIZE(priv->supplies); i++) + priv->supplies[i].supply = pcm186x_supply_names[i]; + + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret) { + dev_err(dev, "failed to request supplies: %d\n", ret); + return ret; + } + + ret = regulator_bulk_enable(ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret) { + dev_err(dev, "failed enable supplies: %d\n", ret); + return ret; + } + + /* Reset device registers for a consistent power-on like state */ + ret = regmap_write(regmap, PCM186X_PAGE, PCM186X_RESET); + if (ret) { + dev_err(dev, "failed to write device: %d\n", ret); + return ret; + } + + ret = regulator_bulk_disable(ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret) { + dev_err(dev, "failed disable supplies: %d\n", ret); + return ret; + } + + switch (type) { + case PCM1865: + case PCM1864: + ret = snd_soc_register_codec(dev, &soc_codec_dev_pcm1865, + &pcm1865_dai, 1); + break; + case PCM1863: + case PCM1862: + default: + ret = snd_soc_register_codec(dev, &soc_codec_dev_pcm1863, + &pcm1863_dai, 1); + } + if (ret) { + dev_err(dev, "failed to register CODEC: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(pcm186x_probe); + +int pcm186x_remove(struct device *dev) +{ + snd_soc_unregister_codec(dev); + + return 0; +} +EXPORT_SYMBOL_GPL(pcm186x_remove); + +MODULE_AUTHOR("Andreas Dannenberg "); +MODULE_AUTHOR("Andrew F. Davis "); +MODULE_DESCRIPTION("PCM186x Universal Audio ADC driver"); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/codecs/pcm186x.h b/sound/soc/codecs/pcm186x.h new file mode 100644 index 000000000000..b630111bb3c4 --- /dev/null +++ b/sound/soc/codecs/pcm186x.h @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Texas Instruments PCM186x Universal Audio ADC + * + * Copyright (C) 2015-2017 Texas Instruments Incorporated - http://www.ti.com + * Andreas Dannenberg + * Andrew F. Davis + */ + +#ifndef _PCM186X_H_ +#define _PCM186X_H_ + +#include +#include + +enum pcm186x_type { + PCM1862, + PCM1863, + PCM1864, + PCM1865, +}; + +#define PCM186X_RATES SNDRV_PCM_RATE_8000_192000 +#define PCM186X_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S20_3LE |\ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE) + +#define PCM186X_PAGE_LEN 0x0100 +#define PCM186X_PAGE_BASE(n) (PCM186X_PAGE_LEN * n) + +/* The page selection register address is the same on all pages */ +#define PCM186X_PAGE 0 + +/* Register Definitions - Page 0 */ +#define PCM186X_PGA_VAL_CH1_L (PCM186X_PAGE_BASE(0) + 1) +#define PCM186X_PGA_VAL_CH1_R (PCM186X_PAGE_BASE(0) + 2) +#define PCM186X_PGA_VAL_CH2_L (PCM186X_PAGE_BASE(0) + 3) +#define PCM186X_PGA_VAL_CH2_R (PCM186X_PAGE_BASE(0) + 4) +#define PCM186X_PGA_CTRL (PCM186X_PAGE_BASE(0) + 5) +#define PCM186X_ADC1_INPUT_SEL_L (PCM186X_PAGE_BASE(0) + 6) +#define PCM186X_ADC1_INPUT_SEL_R (PCM186X_PAGE_BASE(0) + 7) +#define PCM186X_ADC2_INPUT_SEL_L (PCM186X_PAGE_BASE(0) + 8) +#define PCM186X_ADC2_INPUT_SEL_R (PCM186X_PAGE_BASE(0) + 9) +#define PCM186X_AUXADC_INPUT_SEL (PCM186X_PAGE_BASE(0) + 10) +#define PCM186X_PCM_CFG (PCM186X_PAGE_BASE(0) + 11) +#define PCM186X_TDM_TX_SEL (PCM186X_PAGE_BASE(0) + 12) +#define PCM186X_TDM_TX_OFFSET (PCM186X_PAGE_BASE(0) + 13) +#define PCM186X_TDM_RX_OFFSET (PCM186X_PAGE_BASE(0) + 14) +#define PCM186X_DPGA_VAL_CH1_L (PCM186X_PAGE_BASE(0) + 15) +#define PCM186X_GPIO1_0_CTRL (PCM186X_PAGE_BASE(0) + 16) +#define PCM186X_GPIO3_2_CTRL (PCM186X_PAGE_BASE(0) + 17) +#define PCM186X_GPIO1_0_DIR_CTRL (PCM186X_PAGE_BASE(0) + 18) +#define PCM186X_GPIO3_2_DIR_CTRL (PCM186X_PAGE_BASE(0) + 19) +#define PCM186X_GPIO_IN_OUT (PCM186X_PAGE_BASE(0) + 20) +#define PCM186X_GPIO_PULL_CTRL (PCM186X_PAGE_BASE(0) + 21) +#define PCM186X_DPGA_VAL_CH1_R (PCM186X_PAGE_BASE(0) + 22) +#define PCM186X_DPGA_VAL_CH2_L (PCM186X_PAGE_BASE(0) + 23) +#define PCM186X_DPGA_VAL_CH2_R (PCM186X_PAGE_BASE(0) + 24) +#define PCM186X_DPGA_GAIN_CTRL (PCM186X_PAGE_BASE(0) + 25) +#define PCM186X_DPGA_MIC_CTRL (PCM186X_PAGE_BASE(0) + 26) +#define PCM186X_DIN_RESAMP_CTRL (PCM186X_PAGE_BASE(0) + 27) +#define PCM186X_CLK_CTRL (PCM186X_PAGE_BASE(0) + 32) +#define PCM186X_DSP1_CLK_DIV (PCM186X_PAGE_BASE(0) + 33) +#define PCM186X_DSP2_CLK_DIV (PCM186X_PAGE_BASE(0) + 34) +#define PCM186X_ADC_CLK_DIV (PCM186X_PAGE_BASE(0) + 35) +#define PCM186X_PLL_SCK_DIV (PCM186X_PAGE_BASE(0) + 37) +#define PCM186X_BCK_DIV (PCM186X_PAGE_BASE(0) + 38) +#define PCM186X_LRK_DIV (PCM186X_PAGE_BASE(0) + 39) +#define PCM186X_PLL_CTRL (PCM186X_PAGE_BASE(0) + 40) +#define PCM186X_PLL_P_DIV (PCM186X_PAGE_BASE(0) + 41) +#define PCM186X_PLL_R_DIV (PCM186X_PAGE_BASE(0) + 42) +#define PCM186X_PLL_J_DIV (PCM186X_PAGE_BASE(0) + 43) +#define PCM186X_PLL_D_DIV_LSB (PCM186X_PAGE_BASE(0) + 44) +#define PCM186X_PLL_D_DIV_MSB (PCM186X_PAGE_BASE(0) + 45) +#define PCM186X_SIGDET_MODE (PCM186X_PAGE_BASE(0) + 48) +#define PCM186X_SIGDET_MASK (PCM186X_PAGE_BASE(0) + 49) +#define PCM186X_SIGDET_STAT (PCM186X_PAGE_BASE(0) + 50) +#define PCM186X_SIGDET_LOSS_TIME (PCM186X_PAGE_BASE(0) + 52) +#define PCM186X_SIGDET_SCAN_TIME (PCM186X_PAGE_BASE(0) + 53) +#define PCM186X_SIGDET_INT_INTVL (PCM186X_PAGE_BASE(0) + 54) +#define PCM186X_SIGDET_DC_REF_CH1_L (PCM186X_PAGE_BASE(0) + 64) +#define PCM186X_SIGDET_DC_DIFF_CH1_L (PCM186X_PAGE_BASE(0) + 65) +#define PCM186X_SIGDET_DC_LEV_CH1_L (PCM186X_PAGE_BASE(0) + 66) +#define PCM186X_SIGDET_DC_REF_CH1_R (PCM186X_PAGE_BASE(0) + 67) +#define PCM186X_SIGDET_DC_DIFF_CH1_R (PCM186X_PAGE_BASE(0) + 68) +#define PCM186X_SIGDET_DC_LEV_CH1_R (PCM186X_PAGE_BASE(0) + 69) +#define PCM186X_SIGDET_DC_REF_CH2_L (PCM186X_PAGE_BASE(0) + 70) +#define PCM186X_SIGDET_DC_DIFF_CH2_L (PCM186X_PAGE_BASE(0) + 71) +#define PCM186X_SIGDET_DC_LEV_CH2_L (PCM186X_PAGE_BASE(0) + 72) +#define PCM186X_SIGDET_DC_REF_CH2_R (PCM186X_PAGE_BASE(0) + 73) +#define PCM186X_SIGDET_DC_DIFF_CH2_R (PCM186X_PAGE_BASE(0) + 74) +#define PCM186X_SIGDET_DC_LEV_CH2_R (PCM186X_PAGE_BASE(0) + 75) +#define PCM186X_SIGDET_DC_REF_CH3_L (PCM186X_PAGE_BASE(0) + 76) +#define PCM186X_SIGDET_DC_DIFF_CH3_L (PCM186X_PAGE_BASE(0) + 77) +#define PCM186X_SIGDET_DC_LEV_CH3_L (PCM186X_PAGE_BASE(0) + 78) +#define PCM186X_SIGDET_DC_REF_CH3_R (PCM186X_PAGE_BASE(0) + 79) +#define PCM186X_SIGDET_DC_DIFF_CH3_R (PCM186X_PAGE_BASE(0) + 80) +#define PCM186X_SIGDET_DC_LEV_CH3_R (PCM186X_PAGE_BASE(0) + 81) +#define PCM186X_SIGDET_DC_REF_CH4_L (PCM186X_PAGE_BASE(0) + 82) +#define PCM186X_SIGDET_DC_DIFF_CH4_L (PCM186X_PAGE_BASE(0) + 83) +#define PCM186X_SIGDET_DC_LEV_CH4_L (PCM186X_PAGE_BASE(0) + 84) +#define PCM186X_SIGDET_DC_REF_CH4_R (PCM186X_PAGE_BASE(0) + 85) +#define PCM186X_SIGDET_DC_DIFF_CH4_R (PCM186X_PAGE_BASE(0) + 86) +#define PCM186X_SIGDET_DC_LEV_CH4_R (PCM186X_PAGE_BASE(0) + 87) +#define PCM186X_AUXADC_DATA_CTRL (PCM186X_PAGE_BASE(0) + 88) +#define PCM186X_AUXADC_DATA_LSB (PCM186X_PAGE_BASE(0) + 89) +#define PCM186X_AUXADC_DATA_MSB (PCM186X_PAGE_BASE(0) + 90) +#define PCM186X_INT_ENABLE (PCM186X_PAGE_BASE(0) + 96) +#define PCM186X_INT_FLAG (PCM186X_PAGE_BASE(0) + 97) +#define PCM186X_INT_POL_WIDTH (PCM186X_PAGE_BASE(0) + 98) +#define PCM186X_POWER_CTRL (PCM186X_PAGE_BASE(0) + 112) +#define PCM186X_FILTER_MUTE_CTRL (PCM186X_PAGE_BASE(0) + 113) +#define PCM186X_DEVICE_STATUS (PCM186X_PAGE_BASE(0) + 114) +#define PCM186X_FSAMPLE_STATUS (PCM186X_PAGE_BASE(0) + 115) +#define PCM186X_DIV_STATUS (PCM186X_PAGE_BASE(0) + 116) +#define PCM186X_CLK_STATUS (PCM186X_PAGE_BASE(0) + 117) +#define PCM186X_SUPPLY_STATUS (PCM186X_PAGE_BASE(0) + 120) + +/* Register Definitions - Page 1 */ +#define PCM186X_MMAP_STAT_CTRL (PCM186X_PAGE_BASE(1) + 1) +#define PCM186X_MMAP_ADDRESS (PCM186X_PAGE_BASE(1) + 2) +#define PCM186X_MEM_WDATA0 (PCM186X_PAGE_BASE(1) + 4) +#define PCM186X_MEM_WDATA1 (PCM186X_PAGE_BASE(1) + 5) +#define PCM186X_MEM_WDATA2 (PCM186X_PAGE_BASE(1) + 6) +#define PCM186X_MEM_WDATA3 (PCM186X_PAGE_BASE(1) + 7) +#define PCM186X_MEM_RDATA0 (PCM186X_PAGE_BASE(1) + 8) +#define PCM186X_MEM_RDATA1 (PCM186X_PAGE_BASE(1) + 9) +#define PCM186X_MEM_RDATA2 (PCM186X_PAGE_BASE(1) + 10) +#define PCM186X_MEM_RDATA3 (PCM186X_PAGE_BASE(1) + 11) + +/* Register Definitions - Page 3 */ +#define PCM186X_OSC_PWR_DOWN_CTRL (PCM186X_PAGE_BASE(3) + 18) +#define PCM186X_MIC_BIAS_CTRL (PCM186X_PAGE_BASE(3) + 21) + +/* Register Definitions - Page 253 */ +#define PCM186X_CURR_TRIM_CTRL (PCM186X_PAGE_BASE(253) + 20) + +#define PCM186X_MAX_REGISTER PCM186X_CURR_TRIM_CTRL + +/* PCM186X_PAGE */ +#define PCM186X_RESET 0xff + +/* PCM186X_ADCX_INPUT_SEL_X */ +#define PCM186X_ADC_INPUT_SEL_POL BIT(7) +#define PCM186X_ADC_INPUT_SEL_MASK GENMASK(5, 0) + +/* PCM186X_PCM_CFG */ +#define PCM186X_PCM_CFG_RX_WLEN_MASK GENMASK(7, 6) +#define PCM186X_PCM_CFG_RX_WLEN_SHIFT 6 +#define PCM186X_PCM_CFG_RX_WLEN_32 0x00 +#define PCM186X_PCM_CFG_RX_WLEN_24 0x01 +#define PCM186X_PCM_CFG_RX_WLEN_20 0x02 +#define PCM186X_PCM_CFG_RX_WLEN_16 0x03 +#define PCM186X_PCM_CFG_TDM_LRCK_MODE BIT(4) +#define PCM186X_PCM_CFG_TX_WLEN_MASK GENMASK(3, 2) +#define PCM186X_PCM_CFG_TX_WLEN_SHIFT 2 +#define PCM186X_PCM_CFG_TX_WLEN_32 0x00 +#define PCM186X_PCM_CFG_TX_WLEN_24 0x01 +#define PCM186X_PCM_CFG_TX_WLEN_20 0x02 +#define PCM186X_PCM_CFG_TX_WLEN_16 0x03 +#define PCM186X_PCM_CFG_FMT_MASK GENMASK(1, 0) +#define PCM186X_PCM_CFG_FMT_SHIFT 0 +#define PCM186X_PCM_CFG_FMT_I2S 0x00 +#define PCM186X_PCM_CFG_FMT_LEFTJ 0x01 +#define PCM186X_PCM_CFG_FMT_RIGHTJ 0x02 +#define PCM186X_PCM_CFG_FMT_TDM 0x03 + +/* PCM186X_TDM_TX_SEL */ +#define PCM186X_TDM_TX_SEL_2CH 0x00 +#define PCM186X_TDM_TX_SEL_4CH 0x01 +#define PCM186X_TDM_TX_SEL_6CH 0x02 +#define PCM186X_TDM_TX_SEL_MASK 0x03 + +/* PCM186X_CLK_CTRL */ +#define PCM186X_CLK_CTRL_SCK_XI_SEL1 BIT(7) +#define PCM186X_CLK_CTRL_SCK_XI_SEL0 BIT(6) +#define PCM186X_CLK_CTRL_SCK_SRC_PLL BIT(5) +#define PCM186X_CLK_CTRL_MST_MODE BIT(4) +#define PCM186X_CLK_CTRL_ADC_SRC_PLL BIT(3) +#define PCM186X_CLK_CTRL_DSP2_SRC_PLL BIT(2) +#define PCM186X_CLK_CTRL_DSP1_SRC_PLL BIT(1) +#define PCM186X_CLK_CTRL_CLKDET_EN BIT(0) + +/* PCM186X_PLL_CTRL */ +#define PCM186X_PLL_CTRL_LOCK BIT(4) +#define PCM186X_PLL_CTRL_REF_SEL BIT(1) +#define PCM186X_PLL_CTRL_EN BIT(0) + +/* PCM186X_POWER_CTRL */ +#define PCM186X_PWR_CTRL_PWRDN BIT(2) +#define PCM186X_PWR_CTRL_SLEEP BIT(1) +#define PCM186X_PWR_CTRL_STBY BIT(0) + +/* PCM186X_CLK_STATUS */ +#define PCM186X_CLK_STATUS_LRCKHLT BIT(6) +#define PCM186X_CLK_STATUS_BCKHLT BIT(5) +#define PCM186X_CLK_STATUS_SCKHLT BIT(4) +#define PCM186X_CLK_STATUS_LRCKERR BIT(2) +#define PCM186X_CLK_STATUS_BCKERR BIT(1) +#define PCM186X_CLK_STATUS_SCKERR BIT(0) + +/* PCM186X_SUPPLY_STATUS */ +#define PCM186X_SUPPLY_STATUS_DVDD BIT(2) +#define PCM186X_SUPPLY_STATUS_AVDD BIT(1) +#define PCM186X_SUPPLY_STATUS_LDO BIT(0) + +/* PCM186X_MMAP_STAT_CTRL */ +#define PCM186X_MMAP_STAT_DONE BIT(4) +#define PCM186X_MMAP_STAT_BUSY BIT(2) +#define PCM186X_MMAP_STAT_R_REQ BIT(1) +#define PCM186X_MMAP_STAT_W_REQ BIT(0) + +extern const struct regmap_config pcm186x_regmap; + +int pcm186x_probe(struct device *dev, enum pcm186x_type type, int irq, + struct regmap *regmap); +int pcm186x_remove(struct device *dev); + +#endif /* _PCM186X_H_ */ -- cgit v1.2.3 From eb2a8168b9fd69f66199d9d7e86d23fecfab4e33 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 5 Dec 2017 14:52:55 -0600 Subject: ASoC: pcm186x: Add PCM186x binding documentation Add the dt-binding documentation for the TI PCM186x 2ch and 4ch Audio ADCs With Universal Front End. Signed-off-by: Andrew F. Davis Acked-by: Rob Herring Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/pcm186x.txt | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/pcm186x.txt diff --git a/Documentation/devicetree/bindings/sound/pcm186x.txt b/Documentation/devicetree/bindings/sound/pcm186x.txt new file mode 100644 index 000000000000..1087f4855980 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/pcm186x.txt @@ -0,0 +1,42 @@ +Texas Instruments PCM186x Universal Audio ADC + +These devices support both I2C and SPI (configured with pin strapping +on the board). + +Required properties: + + - compatible : "ti,pcm1862", + "ti,pcm1863", + "ti,pcm1864", + "ti,pcm1865" + + - reg : The I2C address of the device for I2C, the chip select + number for SPI. + + - avdd-supply: Analog core power supply (3.3v) + - dvdd-supply: Digital core power supply + - iovdd-supply: Digital IO power supply + See regulator/regulator.txt for more information + +CODEC input pins: + * VINL1 + * VINR1 + * VINL2 + * VINR2 + * VINL3 + * VINR3 + * VINL4 + * VINR4 + +The pins can be used in referring sound node's audio-routing property. + +Example: + + pcm186x: audio-codec@4a { + compatible = "ti,pcm1865"; + reg = <0x4a>; + + avdd-supply = <®_3v3_analog>; + dvdd-supply = <®_3v3>; + iovdd-supply = <®_1v8>; + }; -- cgit v1.2.3 From 4f7f5551a760eb0124267be65763008169db7087 Mon Sep 17 00:00:00 2001 From: Masamitsu Yamazaki Date: Wed, 15 Nov 2017 07:33:14 +0000 Subject: ipmi: Stop timers before cleaning up the module System may crash after unloading ipmi_si.ko module because a timer may remain and fire after the module cleaned up resources. cleanup_one_si() contains the following processing. /* * Make sure that interrupts, the timer and the thread are * stopped and will not run again. */ if (to_clean->irq_cleanup) to_clean->irq_cleanup(to_clean); wait_for_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off * in the BMC. Note that timers and CPU interrupts are off, * so no need for locks. */ while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); } si_state changes as following in the while loop calling poll(to_clean). SI_GETTING_MESSAGES => SI_CHECKING_ENABLES => SI_SETTING_ENABLES => SI_GETTING_EVENTS => SI_NORMAL As written in the code comments above, timers are expected to stop before the polling loop and not to run again. But the timer is set again in the following process when si_state becomes SI_SETTING_ENABLES. => poll => smi_event_handler => handle_transaction_done // smi_info->si_state == SI_SETTING_ENABLES => start_getting_events => start_new_msg => smi_mod_timer => mod_timer As a result, before the timer set in start_new_msg() expires, the polling loop may see si_state becoming SI_NORMAL and the module clean-up finishes. For example, hard LOCKUP and panic occurred as following. smi_timeout was called after smi_event_handler, kcs_event and hangs at port_inb() trying to access I/O port after release. [exception RIP: port_inb+19] RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006 RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000 RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40 RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426 R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000 R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 --- --- To fix the problem I defined a flag, timer_can_start, as member of struct smi_info. The flag is enabled immediately after initializing the timer and disabled immediately before waiting for timer deletion. Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs") Signed-off-by: Yamazaki Masamitsu [Adjusted for recent changes in the driver.] Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_intf.c | 44 +++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 71d33a1807e4..99b0513bb55b 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -199,6 +199,9 @@ struct smi_info { /* The timer for this si. */ struct timer_list si_timer; + /* This flag is set, if the timer can be set */ + bool timer_can_start; + /* This flag is set, if the timer is running (timer_pending() isn't enough) */ bool timer_running; @@ -355,6 +358,8 @@ out: static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) { + if (!smi_info->timer_can_start) + return; smi_info->last_timeout_jiffies = jiffies; mod_timer(&smi_info->si_timer, new_val); smi_info->timer_running = true; @@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); } -static void start_check_enables(struct smi_info *smi_info, bool start_timer) +static void start_check_enables(struct smi_info *smi_info) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - if (start_timer) - start_new_msg(smi_info, msg, 2); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info, bool start_timer) +static void start_clear_flags(struct smi_info *smi_info) { unsigned char msg[3]; @@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - if (start_timer) - start_new_msg(smi_info, msg, 3); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + start_new_msg(smi_info, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) +static inline bool disable_si_irq(struct smi_info *smi_info) { if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info, start_timer); + start_check_enables(smi_info); return true; } return false; @@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info, true); + start_check_enables(smi_info); return true; } return false; @@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info, true)) + if (!disable_si_irq(smi_info)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -483,7 +482,7 @@ retry: /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info, true); + start_clear_flags(smi_info); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -866,7 +865,7 @@ restart: * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->io.irq) { - start_check_enables(smi_info, true); + start_check_enables(smi_info); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -1167,6 +1166,7 @@ static int smi_start_processing(void *send_info, /* Set up the timer that drives the interface. */ setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi); + new_smi->timer_can_start = true; smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); /* Try to claim any interrupts. */ @@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info) check_set_rcv_irq(smi_info); } -static inline void wait_for_timer_and_thread(struct smi_info *smi_info) +static inline void stop_timer_and_thread(struct smi_info *smi_info) { if (smi_info->thread != NULL) kthread_stop(smi_info->thread); + + smi_info->timer_can_start = false; if (smi_info->timer_running) del_timer_sync(&smi_info->si_timer); } @@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi, false); + start_clear_flags(new_smi); /* * IRQ is defined to be set when non-zero. req_events will @@ -2238,7 +2240,7 @@ out_err_remove_attrs: dev_set_drvdata(new_smi->io.dev, NULL); out_err_stop_timer: - wait_for_timer_and_thread(new_smi); + stop_timer_and_thread(new_smi); out_err: new_smi->interrupt_disabled = true; @@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean) */ if (to_clean->io.irq_cleanup) to_clean->io.irq_cleanup(&to_clean->io); - wait_for_timer_and_thread(to_clean); + stop_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off @@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean) schedule_timeout_uninterruptible(1); } if (to_clean->handlers) - disable_si_irq(to_clean, false); + disable_si_irq(to_clean); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); -- cgit v1.2.3 From aece09024414b54158e03aa45f4a4436e7cb996c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Dec 2017 17:37:17 +0300 Subject: staging: ccree: Uninitialized return in ssi_ahash_import() The return value isn't initialized on some success paths. Fixes: c5f39d07860c ("staging: ccree: fix leak of import() after init()") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/ccree/ssi_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/ccree/ssi_hash.c b/drivers/staging/ccree/ssi_hash.c index 1799d3f26a9e..2035835b62dc 100644 --- a/drivers/staging/ccree/ssi_hash.c +++ b/drivers/staging/ccree/ssi_hash.c @@ -1769,7 +1769,7 @@ static int ssi_ahash_import(struct ahash_request *req, const void *in) struct device *dev = drvdata_to_dev(ctx->drvdata); struct ahash_req_ctx *state = ahash_request_ctx(req); u32 tmp; - int rc; + int rc = 0; memcpy(&tmp, in, sizeof(u32)); if (tmp != CC_EXPORT_MAGIC) { -- cgit v1.2.3 From 202fc673c626e4ffe6b888c469b248ecc6d50265 Mon Sep 17 00:00:00 2001 From: Marcus Wolf Date: Wed, 8 Nov 2017 19:13:56 +0200 Subject: staging: pi433: Fixes issue with bit shift in rf69_get_modulation Fixes issue with bit shift in rf69_get_modulation Signed-off-by: Marcus Wolf Signed-off-by: Greg Kroah-Hartman --- drivers/staging/pi433/rf69.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/pi433/rf69.c b/drivers/staging/pi433/rf69.c index e69a2153c999..12c9df9cddde 100644 --- a/drivers/staging/pi433/rf69.c +++ b/drivers/staging/pi433/rf69.c @@ -102,7 +102,7 @@ enum modulation rf69_get_modulation(struct spi_device *spi) currentValue = READ_REG(REG_DATAMODUL); - switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE >> 3) { // TODO improvement: change 3 to define + switch (currentValue & MASK_DATAMODUL_MODULATION_TYPE) { case DATAMODUL_MODULATION_TYPE_OOK: return OOK; case DATAMODUL_MODULATION_TYPE_FSK: return FSK; default: return undefined; -- cgit v1.2.3 From d1b726a9018e3f684ce190a1cbe012cb64f363d8 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 6 Dec 2017 16:17:27 +0100 Subject: ASoC: fsl_asrc: protect macro argument Protect macro argument with parentheses to avoid ambiguity. This fixes a warning seen with clang: warning: logical not is only applied to the left hand side of this comparison Signed-off-by: Stefan Agner Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_asrc.h b/sound/soc/fsl/fsl_asrc.h index 0f163abe4ba3..ec33dab4b909 100644 --- a/sound/soc/fsl/fsl_asrc.h +++ b/sound/soc/fsl/fsl_asrc.h @@ -57,7 +57,7 @@ #define REG_ASRDOC 0x74 #define REG_ASRDI(i) (REG_ASRDIA + (i << 3)) #define REG_ASRDO(i) (REG_ASRDOA + (i << 3)) -#define REG_ASRDx(x, i) (x == IN ? REG_ASRDI(i) : REG_ASRDO(i)) +#define REG_ASRDx(x, i) ((x) == IN ? REG_ASRDI(i) : REG_ASRDO(i)) #define REG_ASRIDRHA 0x80 #define REG_ASRIDRLA 0x84 -- cgit v1.2.3 From c7b92172a61b91936be985cb9bc499a4ebc6489b Mon Sep 17 00:00:00 2001 From: Stefan Potyra Date: Wed, 6 Dec 2017 16:03:24 +0100 Subject: ASoC: rockchip: disable clock on error Disable the clocks in rk_spdif_probe when an error occurs after one of the clocks has been enabled previously. Found by Linux Driver Verification project (linuxtesting.org). Fixes: f874b80e1571 ASoC: rockchip: Add rockchip SPDIF transceiver driver Signed-off-by: Stefan Potyra Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_spdif.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c index ee5055d47d13..a89fe9b6463b 100644 --- a/sound/soc/rockchip/rockchip_spdif.c +++ b/sound/soc/rockchip/rockchip_spdif.c @@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev) spdif->mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(spdif->mclk)) { dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n"); - return PTR_ERR(spdif->mclk); + ret = PTR_ERR(spdif->mclk); + goto err_disable_hclk; } ret = clk_prepare_enable(spdif->mclk); if (ret) { dev_err(spdif->dev, "clock enable failed %d\n", ret); - return ret; + goto err_disable_clocks; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(regs)) - return PTR_ERR(regs); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + goto err_disable_clocks; + } spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs, &rk_spdif_regmap_config); if (IS_ERR(spdif->regmap)) { dev_err(&pdev->dev, "Failed to initialise managed register map\n"); - return PTR_ERR(spdif->regmap); + ret = PTR_ERR(spdif->regmap); + goto err_disable_clocks; } spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR; @@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev) err_pm_runtime: pm_runtime_disable(&pdev->dev); +err_disable_clocks: + clk_disable_unprepare(spdif->mclk); +err_disable_hclk: + clk_disable_unprepare(spdif->hclk); return ret; } -- cgit v1.2.3 From c9b41fcf272b4926b373d21c2b83dfe374313780 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 6 Dec 2017 15:26:21 +0100 Subject: regmap: allow to disable all locking mechanisms We have a use case in the at24 EEPROM driver (recently converted to using regmap instead of raw i2c/smbus calls) where we read from/write to the regmap in a loop, while protecting the entire loop with a mutex. Currently this implicitly makes us use two mutexes - one in the driver and one in regmap. While browsing the code for similar use cases I noticed a significant number of places where locking *seems* redundant. Allow users to completely disable any locking mechanisms in regmap config. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 9 ++++++++- include/linux/regmap.h | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 8d516a9bfc01..72917b2fc10e 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -459,6 +459,11 @@ static void regmap_unlock_hwlock_irqrestore(void *__map) } #endif +static void regmap_lock_unlock_empty(void *__map) +{ + +} + static void regmap_lock_mutex(void *__map) { struct regmap *map = __map; @@ -669,7 +674,9 @@ struct regmap *__regmap_init(struct device *dev, goto err; } - if (config->lock && config->unlock) { + if (config->disable_locking) { + map->lock = map->unlock = regmap_lock_unlock_empty; + } else if (config->lock && config->unlock) { map->lock = config->lock; map->unlock = config->unlock; map->lock_arg = config->lock_arg; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 15eddc1353ba..072a90229e34 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -264,6 +264,9 @@ typedef void (*regmap_unlock)(void *); * field is NULL but precious_table (see below) is not, the * check is performed on such table (a register is precious if * it belongs to one of the ranges specified by precious_table). + * @disable_locking: This regmap is either protected by external means or + * is guaranteed not be be accessed from multiple threads. + * Don't use any locking mechanisms. * @lock: Optional lock callback (overrides regmap's default lock * function, based on spinlock or mutex). * @unlock: As above for unlocking. @@ -333,6 +336,8 @@ struct regmap_config { bool (*readable_reg)(struct device *dev, unsigned int reg); bool (*volatile_reg)(struct device *dev, unsigned int reg); bool (*precious_reg)(struct device *dev, unsigned int reg); + + bool disable_locking; regmap_lock lock; regmap_unlock unlock; void *lock_arg; -- cgit v1.2.3 From b2ca3bdd07f68ca63fdb8e45f1fe039ba6af54a2 Mon Sep 17 00:00:00 2001 From: "Subhransu S. Prusty" Date: Wed, 6 Dec 2017 16:34:01 +0530 Subject: ASoC: Intel: Skylake: Remove second shim read in register_poll No need to read the register again if the value read has already matched the target during the loop. So remove the second shim read. Signed-off-by: Subhransu S. Prusty Signed-off-by: Guneshwor Singh Acked-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/common/sst-dsp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/intel/common/sst-dsp.c b/sound/soc/intel/common/sst-dsp.c index 11c0805393ff..fd82f4b1d4a0 100644 --- a/sound/soc/intel/common/sst-dsp.c +++ b/sound/soc/intel/common/sst-dsp.c @@ -269,7 +269,7 @@ int sst_dsp_register_poll(struct sst_dsp *ctx, u32 offset, u32 mask, */ timeout = jiffies + msecs_to_jiffies(time); - while (((sst_dsp_shim_read_unlocked(ctx, offset) & mask) != target) + while ((((reg = sst_dsp_shim_read_unlocked(ctx, offset)) & mask) != target) && time_before(jiffies, timeout)) { k++; if (k > 10) @@ -278,8 +278,6 @@ int sst_dsp_register_poll(struct sst_dsp *ctx, u32 offset, u32 mask, usleep_range(s, 2*s); } - reg = sst_dsp_shim_read_unlocked(ctx, offset); - if ((reg & mask) == target) { dev_dbg(ctx->dev, "FW Poll Status: reg=%#x %s successful\n", reg, operation); -- cgit v1.2.3 From 437623554e89f388648a31c35e1e5e4c7cb09004 Mon Sep 17 00:00:00 2001 From: Pradeep Tewani Date: Wed, 6 Dec 2017 16:34:02 +0530 Subject: ASoC: Intel: Skylake: Parse vendor tokens to build A-State table A-State table is a power management table which allows the driver to configure the DSP clock source corresponding to various load thresholds. The table contains upto 3 A-State entries. The patch adds and parses the corresponding A-State tokens to build the table. Signed-off-by: Pradeep Tewani Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- include/uapi/sound/snd_sst_tokens.h | 17 ++++++++++++- sound/soc/intel/skylake/skl-topology.c | 44 +++++++++++++++++++++++++++++++++- sound/soc/intel/skylake/skl.h | 17 +++++++++++++ 3 files changed, 76 insertions(+), 2 deletions(-) diff --git a/include/uapi/sound/snd_sst_tokens.h b/include/uapi/sound/snd_sst_tokens.h index f691e421f5e8..9e38fea11b2b 100644 --- a/include/uapi/sound/snd_sst_tokens.h +++ b/include/uapi/sound/snd_sst_tokens.h @@ -221,6 +221,17 @@ * %SKL_TKN_MM_U32_NUM_IN_FMT: Number of input formats * %SKL_TKN_MM_U32_NUM_OUT_FMT: Number of output formats * + * %SKL_TKN_U32_ASTATE_IDX: Table Index for the A-State entry to be filled + * with kcps and clock source + * + * %SKL_TKN_U32_ASTATE_COUNT: Number of valid entries in A-State table + * + * %SKL_TKN_U32_ASTATE_KCPS: Specifies the core load threshold (in kilo + * cycles per second) below which DSP is clocked + * from source specified by clock source. + * + * %SKL_TKN_U32_ASTATE_CLK_SRC: Clock source for A-State entry + * * module_id and loadable flags dont have tokens as these values will be * read from the DSP FW manifest * @@ -308,7 +319,11 @@ enum SKL_TKNS { SKL_TKN_MM_U32_NUM_IN_FMT, SKL_TKN_MM_U32_NUM_OUT_FMT, - SKL_TKN_MAX = SKL_TKN_MM_U32_NUM_OUT_FMT, + SKL_TKN_U32_ASTATE_IDX, + SKL_TKN_U32_ASTATE_COUNT, + SKL_TKN_U32_ASTATE_KCPS, + SKL_TKN_U32_ASTATE_CLK_SRC, + SKL_TKN_MAX = SKL_TKN_U32_ASTATE_CLK_SRC, }; #endif diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 1200b7c6af56..d8d110b3be01 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -3037,11 +3037,13 @@ static int skl_tplg_get_int_tkn(struct device *dev, struct snd_soc_tplg_vendor_value_elem *tkn_elem, struct skl *skl) { - int tkn_count = 0, ret; + int tkn_count = 0, ret, size; static int mod_idx, res_val_idx, intf_val_idx, dir, pin_idx; struct skl_module_res *res = NULL; struct skl_module_iface *fmt = NULL; struct skl_module *mod = NULL; + static struct skl_astate_param *astate_table; + static int astate_cfg_idx, count; int i; if (skl->modules) { @@ -3074,6 +3076,46 @@ static int skl_tplg_get_int_tkn(struct device *dev, mod_idx = tkn_elem->value; break; + case SKL_TKN_U32_ASTATE_COUNT: + if (astate_table != NULL) { + dev_err(dev, "More than one entry for A-State count"); + return -EINVAL; + } + + if (tkn_elem->value > SKL_MAX_ASTATE_CFG) { + dev_err(dev, "Invalid A-State count %d\n", + tkn_elem->value); + return -EINVAL; + } + + size = tkn_elem->value * sizeof(struct skl_astate_param) + + sizeof(count); + skl->cfg.astate_cfg = devm_kzalloc(dev, size, GFP_KERNEL); + if (!skl->cfg.astate_cfg) + return -ENOMEM; + + astate_table = skl->cfg.astate_cfg->astate_table; + count = skl->cfg.astate_cfg->count = tkn_elem->value; + break; + + case SKL_TKN_U32_ASTATE_IDX: + if (tkn_elem->value >= count) { + dev_err(dev, "Invalid A-State index %d\n", + tkn_elem->value); + return -EINVAL; + } + + astate_cfg_idx = tkn_elem->value; + break; + + case SKL_TKN_U32_ASTATE_KCPS: + astate_table[astate_cfg_idx].kcps = tkn_elem->value; + break; + + case SKL_TKN_U32_ASTATE_CLK_SRC: + astate_table[astate_cfg_idx].clk_src = tkn_elem->value; + break; + case SKL_TKN_U8_IN_PIN_TYPE: case SKL_TKN_U8_OUT_PIN_TYPE: case SKL_TKN_U8_IN_QUEUE_COUNT: diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h index 554ad6b5a823..46dda88ba139 100644 --- a/sound/soc/intel/skylake/skl.h +++ b/sound/soc/intel/skylake/skl.h @@ -29,6 +29,8 @@ #define SKL_SUSPEND_DELAY 2000 +#define SKL_MAX_ASTATE_CFG 3 + #define AZX_PCIREG_PGCTL 0x44 #define AZX_PGCTL_LSRMD_MASK (1 << 4) #define AZX_PCIREG_CGCTL 0x48 @@ -46,6 +48,20 @@ struct skl_dsp_resource { struct skl_debug; +struct skl_astate_param { + u32 kcps; + u32 clk_src; +}; + +struct skl_astate_config { + u32 count; + struct skl_astate_param astate_table[0]; +}; + +struct skl_fw_config { + struct skl_astate_config *astate_cfg; +}; + struct skl { struct hdac_ext_bus ebus; struct pci_dev *pci; @@ -77,6 +93,7 @@ struct skl { u8 nr_modules; struct skl_module **modules; bool use_tplg_pcm; + struct skl_fw_config cfg; }; #define skl_to_ebus(s) (&(s)->ebus) -- cgit v1.2.3 From 9452314d92d600e8702533b10f10ec440aad5db9 Mon Sep 17 00:00:00 2001 From: Pradeep Tewani Date: Wed, 6 Dec 2017 16:34:03 +0530 Subject: ASoC: Intel: Skylake: Configure DSP clock source DSP clock source is configured by sending the A-State table to the FW. Add the large config set IPC to configure the desired clock source Signed-off-by: Pradeep Tewani Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-messages.c | 18 ++++++++++++++++++ sound/soc/intel/skylake/skl-pcm.c | 6 ++++++ sound/soc/intel/skylake/skl-sst-dsp.h | 3 +++ 3 files changed, 27 insertions(+) diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c index f637829833e6..4e63213a8d55 100644 --- a/sound/soc/intel/skylake/skl-messages.c +++ b/sound/soc/intel/skylake/skl-messages.c @@ -55,6 +55,19 @@ static int skl_free_dma_buf(struct device *dev, struct snd_dma_buffer *dmab) return 0; } +#define SKL_ASTATE_PARAM_ID 4 + +void skl_dsp_set_astate_cfg(struct skl_sst *ctx, u32 cnt, void *data) +{ + struct skl_ipc_large_config_msg msg = {0}; + + msg.large_param_id = SKL_ASTATE_PARAM_ID; + msg.param_data_size = (cnt * sizeof(struct skl_astate_param) + + sizeof(cnt)); + + skl_ipc_set_large_config(&ctx->ipc, &msg, data); +} + #define NOTIFICATION_PARAM_ID 3 #define NOTIFICATION_MASK 0xf @@ -409,6 +422,11 @@ int skl_resume_dsp(struct skl *skl) return ret; skl_dsp_enable_notification(skl->skl_sst, false); + + if (skl->cfg.astate_cfg != NULL) { + skl_dsp_set_astate_cfg(skl->skl_sst, skl->cfg.astate_cfg->count, + skl->cfg.astate_cfg); + } return ret; } diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index 18138dc872d9..cc6535ab84d1 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1350,6 +1350,12 @@ static int skl_platform_soc_probe(struct snd_soc_platform *platform) skl_populate_modules(skl); skl->skl_sst->update_d0i3c = skl_update_d0i3c; skl_dsp_enable_notification(skl->skl_sst, false); + + if (skl->cfg.astate_cfg != NULL) { + skl_dsp_set_astate_cfg(skl->skl_sst, + skl->cfg.astate_cfg->count, + skl->cfg.astate_cfg); + } } pm_runtime_mark_last_busy(platform->dev); pm_runtime_put_autosuspend(platform->dev); diff --git a/sound/soc/intel/skylake/skl-sst-dsp.h b/sound/soc/intel/skylake/skl-sst-dsp.h index eba20d37ba8c..b8e799ed65ef 100644 --- a/sound/soc/intel/skylake/skl-sst-dsp.h +++ b/sound/soc/intel/skylake/skl-sst-dsp.h @@ -251,6 +251,9 @@ void skl_freeup_uuid_list(struct skl_sst *ctx); int skl_dsp_strip_extended_manifest(struct firmware *fw); void skl_dsp_enable_notification(struct skl_sst *ctx, bool enable); + +void skl_dsp_set_astate_cfg(struct skl_sst *ctx, u32 cnt, void *data); + int skl_sst_ctx_init(struct device *dev, int irq, const char *fw_name, struct skl_dsp_loader_ops dsp_ops, struct skl_sst **dsp, struct sst_dsp_device *skl_dev); -- cgit v1.2.3 From e02b03303f13b6a571f01b4d84b69440696d2dde Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Wed, 6 Dec 2017 16:34:04 +0530 Subject: ASoC: Intel: Skylake: Do not check dev_type for dmic link type Some BIOS have inconsistent dev_type value for DMIC link type. Since there is only one device type for DMIC link type, remove device type check if link type is NHLT_LINK_DMIC. Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-nhlt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index d14c50a60289..3eaac41090ca 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt, if ((epnt->virtual_bus_id == instance_id) && (epnt->linktype == link_type) && - (epnt->direction == dirn) && - (epnt->device_type == dev_type)) - return true; - else - return false; + (epnt->direction == dirn)) { + /* do not check dev_type for DMIC link type */ + if (epnt->linktype == NHLT_LINK_DMIC) + return true; + + if (epnt->device_type == dev_type) + return true; + } + + return false; } struct nhlt_specific_cfg -- cgit v1.2.3 From 0fb02ba36d01a04dab03c0a424607844ef4dadbf Mon Sep 17 00:00:00 2001 From: Puneeth Prabhu Date: Wed, 6 Dec 2017 16:34:05 +0530 Subject: ASoC: hdac_hdmi: Refresh sysfs during hdmi device probe All nodes of hdmi codec widgets are not updated in sysfs interface (/sys/bus/hdaudio/devices//widgets/) as the vendor widget is not programmed to enable all the converters and pins during init. So, refresh the sysfs widget interface after enabling all pins and converters. Signed-off-by: Puneeth Prabhu Signed-off-by: Subhransu S. Prusty Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 3a35ede7027d..b706547c46d5 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -2042,6 +2042,7 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) "Failed in parse and map nid with err: %d\n", ret); return ret; } + snd_hdac_refresh_widgets(hdev, true); /* ASoC specific initialization */ ret = snd_soc_register_codec(&hdev->dev, &hdmi_hda_codec, -- cgit v1.2.3 From 45a6008bfcdc3e620dcf1b2330766345097afe9c Mon Sep 17 00:00:00 2001 From: Puneeth Prabhu Date: Wed, 6 Dec 2017 16:34:06 +0530 Subject: ASoC: hdac_hdmi: Remove redundant assignments Assignments for start_nid, end_nid and num_nodes of hdac_device structure are already done in init. So, remove the redundant assignments. Signed-off-by: Puneeth Prabhu Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index b706547c46d5..68a4a6b4e68e 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1465,10 +1465,7 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev, return -EINVAL; } - hdev->num_nodes = num_nodes; - hdev->start_nid = nid; - - for (i = 0; i < hdev->num_nodes; i++, nid++) { + for (i = 0; i < num_nodes; i++, nid++) { unsigned int caps; unsigned int type; @@ -1494,8 +1491,6 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev, } } - hdev->end_nid = nid; - if (!hdmi->num_pin || !hdmi->num_cvt) { ret = -EIO; goto free_widgets; -- cgit v1.2.3 From 769e40f0c17df53bac8999939993a280c1ea3dbd Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 23:09:00 +0530 Subject: ASoC: hisilicon: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Signed-off-by: Mark Brown --- sound/soc/hisilicon/hi6210-i2s.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/hisilicon/hi6210-i2s.c b/sound/soc/hisilicon/hi6210-i2s.c index 0c8f86d4020e..07a57209e055 100644 --- a/sound/soc/hisilicon/hi6210-i2s.c +++ b/sound/soc/hisilicon/hi6210-i2s.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "hi6210-i2s.h" -- cgit v1.2.3 From a821df3f1af72aa6a0d573eea94a7dd2613e9f4e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 09:36:33 +1100 Subject: cifs: fix NULL deref in SMB2_read Signed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5331631386a2..01346b8b6edb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, cifs_small_buf_release(req); rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; - shdr = get_sync_hdr(rsp); - if (shdr->Status == STATUS_END_OF_FILE) { + if (rc) { + if (rc != -ENODATA) { + cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); + cifs_dbg(VFS, "Send error in read = %d\n", rc); + } free_rsp_buf(resp_buftype, rsp_iov.iov_base); - return 0; + return rc == -ENODATA ? 0 : rc; } - if (rc) { - cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); - cifs_dbg(VFS, "Send error in read = %d\n", rc); - } else { - *nbytes = le32_to_cpu(rsp->DataLength); - if ((*nbytes > CIFS_MAX_MSGSIZE) || - (*nbytes > io_parms->length)) { - cifs_dbg(FYI, "bad length %d for count %d\n", - *nbytes, io_parms->length); - rc = -EIO; - *nbytes = 0; - } + *nbytes = le32_to_cpu(rsp->DataLength); + if ((*nbytes > CIFS_MAX_MSGSIZE) || + (*nbytes > io_parms->length)) { + cifs_dbg(FYI, "bad length %d for count %d\n", + *nbytes, io_parms->length); + rc = -EIO; + *nbytes = 0; } + shdr = get_sync_hdr(rsp); + if (*buf) { memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes); free_rsp_buf(resp_buftype, rsp_iov.iov_base); -- cgit v1.2.3 From 5702591fc6a3f409f460def104ee149330dac82d Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Tue, 21 Nov 2017 14:47:56 +0100 Subject: CIFS: don't log STATUS_NOT_FOUND errors for DFS cifs.ko makes DFS queries regardless of the type of the server and non-DFS servers are common. This often results in superfluous logging of non-critical errors. Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e06740436b92..ed88ab8a4774 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, } while (rc == -EAGAIN); if (rc) { - cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); + if (rc != -ENOENT) + cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); goto out; } -- cgit v1.2.3 From fcf38cdf332a81b20a59e3ebaea81f6b316bbe0c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 5 Dec 2017 22:57:43 -0800 Subject: kyber: fix another domain token wait queue hang Commit 8cf466602028 ("kyber: fix hang on domain token wait queue") fixed a hang caused by leaving wait entries on the domain token wait queue after the __sbitmap_queue_get() retry succeeded, making that wait entry a "dud" which won't in turn wake more entries up. However, we can also get a dud entry if kyber_get_domain_token() fails once but is then called again and succeeds. This can happen if the hardware queue is rerun for some other reason, or, more likely, kyber_dispatch_request() tries the same domain twice. The fix is to remove our entry from the wait queue whenever we successfully get a token. The only complication is that we might be on one of many wait queues in the struct sbitmap_queue, but that's easily fixed by remembering which wait queue we were put on. While we're here, only initialize the wait queue entry once instead of on every wait, and use spin_lock_irq() instead of spin_lock_irqsave(), since this is always called from process context with irqs enabled. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b4df317c2916..f95c60774ce8 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -100,9 +100,13 @@ struct kyber_hctx_data { unsigned int cur_domain; unsigned int batching; wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; + struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS]; }; +static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, + void *key); + static int rq_sched_domain(const struct request *rq) { unsigned int op = rq->cmd_flags; @@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) for (i = 0; i < KYBER_NUM_DOMAINS; i++) { INIT_LIST_HEAD(&khd->rqs[i]); + init_waitqueue_func_entry(&khd->domain_wait[i], + kyber_domain_wake); + khd->domain_wait[i].private = hctx; INIT_LIST_HEAD(&khd->domain_wait[i].entry); atomic_set(&khd->wait_index[i], 0); } @@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, int nr; nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) - return nr; /* * If we failed to get a domain token, make sure the hardware queue is * run when one becomes available. Note that this is serialized on * khd->lock, but we still need to be careful about the waker. */ - if (list_empty_careful(&wait->entry)) { - init_waitqueue_func_entry(wait, kyber_domain_wake); - wait->private = hctx; + if (nr < 0 && list_empty_careful(&wait->entry)) { ws = sbq_wait_ptr(domain_tokens, &khd->wait_index[sched_domain]); + khd->domain_ws[sched_domain] = ws; add_wait_queue(&ws->wait, wait); /* * Try again in case a token was freed before we got on the wait - * queue. The waker may have already removed the entry from the - * wait queue, but list_del_init() is okay with that. + * queue. */ nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) { - unsigned long flags; + } - spin_lock_irqsave(&ws->wait.lock, flags); - list_del_init(&wait->entry); - spin_unlock_irqrestore(&ws->wait.lock, flags); - } + /* + * If we got a token while we were on the wait queue, remove ourselves + * from the wait queue to ensure that all wake ups make forward + * progress. It's possible that the waker already deleted the entry + * between the !list_empty_careful() check and us grabbing the lock, but + * list_del_init() is okay with that. + */ + if (nr >= 0 && !list_empty_careful(&wait->entry)) { + ws = khd->domain_ws[sched_domain]; + spin_lock_irq(&ws->wait.lock); + list_del_init(&wait->entry); + spin_unlock_irq(&ws->wait.lock); } + return nr; } -- cgit v1.2.3 From 00ef0ef2cccb0350eae368e565c98453a9305b05 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 11:44:15 +0100 Subject: sched/headers: Constify object_is_on_stack() object_is_on_stack() doesn't modify its argument and should never do it. Make it const. Signed-off-by: Sascha Hauer Cc: FUJITA Tomonori Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/20171205104415.17147-1-s.hauer@pengutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched/task_stack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cb4828aaa34f..6a841929073f 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -78,7 +78,7 @@ static inline void put_task_stack(struct task_struct *tsk) {} #define task_stack_end_corrupted(task) \ (*(end_of_stack(task)) != STACK_END_MAGIC) -static inline int object_is_on_stack(void *obj) +static inline int object_is_on_stack(const void *obj) { void *stack = task_stack_page(current); -- cgit v1.2.3 From 7912af5c835bd86f2b0347a480e0f40e2fab30d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Dec 2017 14:55:05 -0600 Subject: PCI: Add pci_get_domain_bus_and_slot() stub The coretemp driver build fails when CONFIG_PCI is not enabled because it uses a function that does not have a stub for that config case, so add the function stub. ../drivers/hwmon/coretemp.c: In function 'adjust_tjmax': ../drivers/hwmon/coretemp.c:250:9: error: implicit declaration of function 'pci_get_domain_bus_and_slot' [-Werror=implicit-function-declaration] struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); ../drivers/hwmon/coretemp.c:250:32: warning: initialization makes pointer from integer without a cast [enabled by default] struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); Signed-off-by: Randy Dunlap [bhelgaas: identical patch also by Arnd Bergmann ] Signed-off-by: Bjorn Helgaas Acked-by: Guenter Roeck --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0403894147a3..c170c9250c8b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1674,6 +1674,9 @@ static inline struct pci_dev *pci_get_slot(struct pci_bus *bus, static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain, + unsigned int bus, unsigned int devfn) +{ return NULL; } static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; } -- cgit v1.2.3 From 470195f82e4ea550b7c37736a12bf3fa565295ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 29 Nov 2017 15:12:27 +0100 Subject: x86/PCI: Fix infinite loop in search for 64bit BAR placement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Break the loop if we can't find some address space for a 64bit BAR. Signed-off-by: Christian König Signed-off-by: Bjorn Helgaas --- arch/x86/pci/fixup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 1e996df687a3..5328e86f73eb 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -696,8 +696,13 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) res->end = 0xfd00000000ull - 1; /* Just grab the free area behind system memory for this */ - while ((conflict = request_resource_conflict(&iomem_resource, res))) + while ((conflict = request_resource_conflict(&iomem_resource, res))) { + if (conflict->end >= res->end) { + kfree(res); + return; + } res->start = conflict->end + 1; + } dev_info(&dev->dev, "adding root bus resource %pR\n", res); -- cgit v1.2.3 From a19e2696135efb471981c1ae1ec3cb2b70c41a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 29 Nov 2017 15:12:28 +0100 Subject: x86/PCI: Only enable a 64bit BAR on single-socket AMD Family 15h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we have a multi-socket system, each CPU core needs the same setup. Since this is tricky to do in the fixup code, don't enable a 64bit BAR on multi-socket systems for now. Signed-off-by: Christian König Signed-off-by: Bjorn Helgaas --- arch/x86/pci/fixup.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 5328e86f73eb..e663d6bf1328 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -665,6 +665,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) unsigned i; u32 base, limit, high; struct resource *res, *conflict; + struct pci_dev *other; + + /* Check that we are the only device of that type */ + other = pci_get_device(dev->vendor, dev->device, NULL); + if (other != dev || + (other = pci_get_device(dev->vendor, dev->device, other))) { + /* This is a multi-socket system, don't touch it for now */ + pci_dev_put(other); + return; + } for (i = 0; i < 8; i++) { pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base); @@ -719,10 +729,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) pci_bus_add_resource(dev->bus, res, 0); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); #endif -- cgit v1.2.3 From b638823a7bbd251d442042b0e9522100bdaa5b66 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Tue, 5 Dec 2017 12:34:56 +0000 Subject: ARM: davinci: Use platform_device_register_full() to create pdev for dm365's eDMA Convert the DM365 EDMA platform device creation to use struct platform_device_info XXXXXX __initconst and platform_device_register_full() This will allow us to specify the dma_mask for the device in an upcoming patch. Without this, EDMA on DM365 refuses to probe. Fixes: 7ab388e85faa ("ARM: davinci: Use platform_device_register_full() to create pdev for eDMA") Reviewed-by: Peter Ujfalusi Signed-off-by: Alejandro Mery Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf..9bd17bc77b5c 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -925,12 +925,13 @@ static struct resource edma_resources[] = { /* not using TC*_ERR */ }; -static struct platform_device dm365_edma_device = { - .name = "edma", - .id = 0, - .dev.platform_data = &dm365_edma_pdata, - .num_resources = ARRAY_SIZE(edma_resources), - .resource = edma_resources, +static const struct platform_device_info dm365_edma_device __initconst = { + .name = "edma", + .id = 0, + .res = edma_resources, + .num_res = ARRAY_SIZE(edma_resources), + .data = &dm365_edma_pdata, + .size_data = sizeof(dm365_edma_pdata), }; static struct resource dm365_asp_resources[] = { @@ -1428,13 +1429,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg, static int __init dm365_init_devices(void) { + struct platform_device *edma_pdev; int ret = 0; if (!cpu_is_davinci_dm365()) return 0; davinci_cfg_reg(DM365_INT_EDMA_CC); - platform_device_register(&dm365_edma_device); + edma_pdev = platform_device_register_full(&dm365_edma_device); + if (IS_ERR(edma_pdev)) { + pr_warn("%s: Failed to register eDMA\n", __func__); + return PTR_ERR(edma_pdev); + } platform_device_register(&dm365_mdio_device); platform_device_register(&dm365_emac_device); -- cgit v1.2.3 From 621f96bcb49412010876a1e6e006f748b91d9e75 Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Tue, 5 Dec 2017 12:34:57 +0000 Subject: ARM: davinci: Add dma_mask to dm365's eDMA device Add dma_mask to dm365's EDMA device. Without a valid dma_mask, EDMA on DM365 refuses to probe. Fixes: cef5b0da4019 ("ARM: davinci: Add dma_mask to eDMA devices") Reviewed-by: Peter Ujfalusi Signed-off-by: Alejandro Mery Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 9bd17bc77b5c..103316f01a22 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -928,6 +928,7 @@ static struct resource edma_resources[] = { static const struct platform_device_info dm365_edma_device __initconst = { .name = "edma", .id = 0, + .dma_mask = DMA_BIT_MASK(32), .res = edma_resources, .num_res = ARRAY_SIZE(edma_resources), .data = &dm365_edma_pdata, -- cgit v1.2.3 From c5a88cd2e1c508868922bafa0a5c3365986b98e5 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 3 Dec 2017 16:04:53 -0600 Subject: ARM: dts: da850-lego-ev3: Fix battery voltage gpio This fixes the battery voltage monitoring gpio-hog settings. When the gpio is low, it turns off the battery voltage to the ADC chip. However, this needs to be on all of the time so that we can monitor battery voltage. Also, there was a typo that prevented pinmuxing from working correctly. Signed-off-by: David Lechner Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850-lego-ev3.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts index 413dbd5d9f64..81942ae83e1f 100644 --- a/arch/arm/boot/dts/da850-lego-ev3.dts +++ b/arch/arm/boot/dts/da850-lego-ev3.dts @@ -178,7 +178,7 @@ */ battery { pinctrl-names = "default"; - pintctrl-0 = <&battery_pins>; + pinctrl-0 = <&battery_pins>; compatible = "lego,ev3-battery"; io-channels = <&adc 4>, <&adc 3>; io-channel-names = "voltage", "current"; @@ -392,7 +392,7 @@ batt_volt_en { gpio-hog; gpios = <6 GPIO_ACTIVE_HIGH>; - output-low; + output-high; }; }; -- cgit v1.2.3 From 947134d9b00f342415af7eddd42a5fce7262a1b9 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 4 Dec 2017 11:45:21 -0500 Subject: x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation Documentation/x86/topology.txt defines smp_num_siblings as "The number of threads in a core". Since commit bbb65d2d365e ("x86: use cpuid vector 0xb when available for detecting cpu topology") smp_num_siblings is the maximum number of threads in a core. If Simultaneous MultiThreading (SMT) is disabled on a system, smp_num_siblings is 2 and not 1 as expected. Use topology_max_smt_threads(), which contains the active numer of threads, in the __max_logical_packages calculation. On a single socket, single core, single thread system __max_smt_threads has not been updated when the __max_logical_packages calculation happens, so its zero which makes the package estimate fail. Initialize it to one, which is the minimum number of threads on a core. [ tglx: Folded the __max_smt_threads fix in ] Fixes: b4c0a7326f5d ("x86/smpboot: Fix __max_logical_packages estimate") Reported-by: Jakub Kicinski Signed-off-by: Prarit Bhargava Tested-by: Jakub Kicinski Cc: netdev@vger.kernel.org Cc: "netdev@vger.kernel.org" Cc: Clark Williams Link: https://lkml.kernel.org/r/20171204164521.17870-1-prarit@redhat.com --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 05a97d5fe298..35cb20994e32 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; /* Maximum number of SMT threads on any online core */ -int __max_smt_threads __read_mostly; +int __read_mostly __max_smt_threads = 1; /* Flag to indicate if a complete sched domain rebuild is required */ bool x86_topology_update; @@ -1304,7 +1304,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) * Today neither Intel nor AMD support heterogenous systems so * extrapolate the boot cpu's data to all packages. */ - ncpus = cpu_data(0).booted_cores * smp_num_siblings; + ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); pr_info("Max logical packages: %u\n", __max_logical_packages); -- cgit v1.2.3 From 08529078d8d9adf689bf39cc38d53979a0869970 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2017 15:40:55 +0300 Subject: x86/boot/compressed/64: Detect and handle 5-level paging at boot-time Prerequisite for fixing the current problem of instantaneous reboots when a 5-level paging kernel is booted on 4-level paging hardware. At the same time this change prepares the decompression code to boot-time switching between 4- and 5-level paging. [ tglx: Folded the GCC < 5 fix. ] Fixes: 77ef56e4f0fb ("x86: Enable 5-level paging support via CONFIG_X86_5LEVEL=y") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: linux-mm@kvack.org Cc: Cyrill Gorcunov Cc: Borislav Petkov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20171204124059.63515-2-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/head_64.S | 16 ++++++++++++---- arch/x86/boot/compressed/pgtable_64.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 arch/x86/boot/compressed/pgtable_64.c diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 1e9c322e973a..f25e1530e064 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-y += $(obj)/pgtable_64.o endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 20919b4f3133..fc313e29fe2c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -305,10 +305,18 @@ ENTRY(startup_64) leaq boot_stack_end(%rbx), %rsp #ifdef CONFIG_X86_5LEVEL - /* Check if 5-level paging has already enabled */ - movq %cr4, %rax - testl $X86_CR4_LA57, %eax - jnz lvl5 + /* + * Check if we need to enable 5-level paging. + * RSI holds real mode data and need to be preserved across + * a function call. + */ + pushq %rsi + call l5_paging_required + popq %rsi + + /* If l5_paging_required() returned zero, we're done here. */ + cmpq $0, %rax + je lvl5 /* * At this point we are in long mode with 4-level paging enabled, diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c new file mode 100644 index 000000000000..b4469a37e9a1 --- /dev/null +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -0,0 +1,28 @@ +#include + +/* + * __force_order is used by special_insns.h asm code to force instruction + * serialization. + * + * It is not referenced from the code, but GCC < 5 with -fPIE would fail + * due to an undefined symbol. Define it to make these ancient GCCs work. + */ +unsigned long __force_order; + +int l5_paging_required(void) +{ + /* Check if leaf 7 is supported. */ + + if (native_cpuid_eax(0) < 7) + return 0; + + /* Check if la57 is supported. */ + if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return 0; + + /* Check if 5-level paging has already been enabled. */ + if (native_read_cr4() & X86_CR4_LA57) + return 0; + + return 1; +} -- cgit v1.2.3 From 6d7e0ba2d2be9e50cccba213baf07e0e183c1b24 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2017 15:40:56 +0300 Subject: x86/boot/compressed/64: Print error if 5-level paging is not supported If the machine does not support the paging mode for which the kernel was compiled, the boot process cannot continue. It's not possible to let the kernel detect the mismatch as it does not even reach the point where cpu features can be evaluted due to a triple fault in the KASLR setup. Instead of instantaneous silent reboot, emit an error message which gives the user the information why the boot fails. Fixes: 77ef56e4f0fb ("x86: Enable 5-level paging support via CONFIG_X86_5LEVEL=y") Reported-by: Borislav Petkov Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Andi Kleen Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: linux-mm@kvack.org Cc: Cyrill Gorcunov Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20171204124059.63515-3-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/misc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b50c42455e25..98761a1576ce 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -169,6 +169,16 @@ void __puthex(unsigned long value) } } +static bool l5_supported(void) +{ + /* Check if leaf 7 is supported. */ + if (native_cpuid_eax(0) < 7) + return 0; + + /* Check if la57 is supported. */ + return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); +} + #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) @@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, console_init(); debug_putstr("early console in extract_kernel\n"); + if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { + error("This linux kernel as configured requires 5-level paging\n" + "This CPU does not support the required 'cr4.la57' feature\n" + "Unable to boot - please use a kernel appropriate for your CPU\n"); + } + free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; -- cgit v1.2.3 From 7cb4774e2d3282d29edd00762167876a27cc7d2a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 17:54:38 +0100 Subject: HID: core: lower log level for unknown main item tags to warnings Given all the effort distros have done with splash-screens to give users a nice clean boot experience, we really want dmesg --level=err to not print anything unless there is a real problem with either the hardware or the kernel. Buggy HID descriptors unfortunately happen all too often, so lower the log level to warning keep the console clear of error messages such as: [ 441.079664] apple 0005:05AC:0239.0003: unknown main item tag 0x0 Signed-off-by: Hans de Goede Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index f3fcb836a1f9..0c3f608131cf 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) ret = hid_add_field(parser, HID_FEATURE_REPORT, data); break; default: - hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag); + hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag); ret = 0; } -- cgit v1.2.3 From b860b419d970f286294fbfb2b21a4028fd8ee442 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 12:21:35 +0100 Subject: mfd: Fix RTS5227 (and others) powermanagement Commit 8275b77a1513 ("mfd: rts5249: Add support for RTS5250S power saving") adds powersaving support for device-ids 5249 524a and 525a. But as a side effect it breaks ASPM support for all the other device-ids, causing e.g. the Haswell CPU on a Lenovo T440s to not go into a higher c-state then PC3, while previously it would go to PC7, causing the machine to idle at 7.4W instead of 6.6W! The problem here is the new option.dev_aspm_mode field, which only gets explicitly initialized in the new code for the device-ids 5249 524a and 525a. Leaving the dev_aspm_mode 0 for the other device-ids. The default dev_aspm_mode 0 is mapped to DEV_ASPM_DISABLE, but the old behavior of calling rtsx_pci_enable_aspm() when idle and rtsx_pci_disable_aspm() when busy happens when dev_aspm_mode == DEV_ASPM_DYNAMIC. This commit changes the enum so that 0 = DEV_ASPM_DYNAMIC matching the old default behavior, fixing the pm regression with the other device-ids. Fixes: 8275b77a1513 ("mfd: rts5249: Add support for RTS5250S power saving") Signed-off-by: Hans de Goede Acked-by: Rui Feng Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a2a1318a3d0c..c3d3f04d8cc6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) enum dev_aspm_mode { - DEV_ASPM_DISABLE = 0, DEV_ASPM_DYNAMIC, DEV_ASPM_BACKDOOR, DEV_ASPM_STATIC, + DEV_ASPM_DISABLE, }; /* -- cgit v1.2.3 From 04271ce9601f1686db480ea11ea1848394d9e6a2 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 28 Nov 2017 15:55:07 +0530 Subject: i2c-cht-wc: constify platform_device_id platform_device_id are not supposed to change at runtime. All functions working with platform_device_id provided by work with const platform_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Reviewed-by: Hans de Goede Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cht-wc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c index 0d05dadb2dc5..44cffad43701 100644 --- a/drivers/i2c/busses/i2c-cht-wc.c +++ b/drivers/i2c/busses/i2c-cht-wc.c @@ -379,7 +379,7 @@ static int cht_wc_i2c_adap_i2c_remove(struct platform_device *pdev) return 0; } -static struct platform_device_id cht_wc_i2c_adap_id_table[] = { +static const struct platform_device_id cht_wc_i2c_adap_id_table[] = { { .name = "cht_wcove_ext_chgr" }, {}, }; -- cgit v1.2.3 From b458a3490e46dddd5b63f59b458c9b6d2284a63f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Dec 2017 11:09:21 +0100 Subject: spi: rspi: Do not set SPCR_SPE in qspi_set_config_register() The R-Car Gen2 Hardware User Manual Rev. 2.00 states: If the master/slave mode select bit (MSTR) is modified while the SPI function enable bit (SPE) is set to 1 (that is, this module is enabled), the subsequent operation cannot be guaranteed. Hence do not set SPCR_SPE when setting SPCR_MSTR, just like the .set_config_register() implementations for other RSPI variants do. Note that when booted from QSPI, the boot loader will have set SPCR_MSTR already, hence usually the bit is never modified by the Linux driver. Reported-by: Yoshihiro Shimoda Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-rspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c index 2ce875764ca6..0835a8d88fb8 100644 --- a/drivers/spi/spi-rspi.c +++ b/drivers/spi/spi-rspi.c @@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size) /* Sets SPCMD */ rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0); - /* Enables SPI function in master mode */ - rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR); + /* Sets RSPI mode */ + rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR); return 0; } -- cgit v1.2.3 From c810daba0ab5226084a56893a789af427a801146 Mon Sep 17 00:00:00 2001 From: Takuo Koguchi Date: Thu, 7 Dec 2017 16:20:14 +0900 Subject: spi: sun4i: disable clocks in the remove function mclk and hclk need to be disabled. Since pm_runtime_disable does not disable the clocks, use pm_runtime_force_suspend instead. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Takuo Koguchi Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- drivers/spi/spi-sun4i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index c5cd635c28f3..41410031f8e9 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -525,7 +525,7 @@ err_free_master: static int sun4i_spi_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); return 0; } -- cgit v1.2.3 From 866f7ed7d67936dcdbcddc111c8af878c918fe7c Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Thu, 7 Dec 2017 12:58:33 +0200 Subject: ALSA: usb-audio: Add native DSD support for Esoteric D-05X Adds VID:PID of Esoteric D-05X to the TEAC device id's. Renames the is_teac_50X_dac() function to is_teac_dsd_dac() to cover broader device family from the same corporation sharing the same USB audio implementation. Signed-off-by: Jussi Laako Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 77eecaa4db1f..a66ef5777887 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id) /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch * between PCM/DOP and native DSD mode */ -static bool is_teac_50X_dac(unsigned int id) +static bool is_teac_dsd_dac(unsigned int id) { switch (id) { case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */ + case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */ return true; } return false; @@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs, break; } mdelay(20); - } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) { + } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) { /* Vendor mode switch cmd is required. */ switch (fmt->altsetting) { case 3: /* DSD mode (DSD_U32) requested */ @@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } /* TEAC devices with USB DAC functionality */ - if (is_teac_50X_dac(chip->usb_id)) { + if (is_teac_dsd_dac(chip->usb_id)) { if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; } -- cgit v1.2.3 From 2b4584d00a6bc02b63ab3c7213060d41a74bdff1 Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Thu, 7 Dec 2017 18:06:20 +0530 Subject: ALSA: hda - Add vendor id for Cannonlake HDMI codec Cannonlake HDMI codec has the same nid as Geminilake. This adds the codec entry for it. Signed-off-by: Guneshwor Singh Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c19c81d230bd..b4f1b6e88305 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b) #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \ ((codec)->core.vendor_id == 0x80862800)) +#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ || is_skylake(codec) || is_broxton(codec) \ - || is_kabylake(codec)) || is_geminilake(codec) - + || is_kabylake(codec)) || is_geminilake(codec) \ + || is_cannonlake(codec) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec)) @@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI", patch_i915_hsw_hdmi), HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi), +HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), -- cgit v1.2.3 From 0d5c8633b173dd64f0005bba83501c8462463e65 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Dec 2017 13:30:09 +0000 Subject: regulator: fix incorrect indentation of two assignment statements Remove extraneous space to fix indentation on a couple of assignment statements. Signed-off-by: Colin Ian King Signed-off-by: Mark Brown --- drivers/regulator/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b64b7916507f..365b32e3f505 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2605,8 +2605,8 @@ int regulator_get_hardware_vsel_register(struct regulator *regulator, if (ops->set_voltage_sel != regulator_set_voltage_sel_regmap) return -EOPNOTSUPP; - *vsel_reg = rdev->desc->vsel_reg; - *vsel_mask = rdev->desc->vsel_mask; + *vsel_reg = rdev->desc->vsel_reg; + *vsel_mask = rdev->desc->vsel_mask; return 0; } -- cgit v1.2.3 From dce231a484f72983cd7d5832aed4da04870a4d47 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Dec 2017 14:11:32 +0000 Subject: ASoC: pcm186x: make pcm186x_dai_ops and pcm186x_adc_input_channel_sel_value static pcm186x_dai_ops and pcm186x_adc_input_channel_sel_value are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 'pcm186x_dai_ops' was not declared. Should it be static? symbol 'pcm186x_adc_input_channel_sel_value' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Mark Brown --- sound/soc/codecs/pcm186x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/pcm186x.c b/sound/soc/codecs/pcm186x.c index f7aa56e20169..cdb51427facc 100644 --- a/sound/soc/codecs/pcm186x.c +++ b/sound/soc/codecs/pcm186x.c @@ -59,7 +59,7 @@ static const struct snd_kcontrol_new pcm1865_snd_controls[] = { pcm186x_pga_tlv), }; -const unsigned int pcm186x_adc_input_channel_sel_value[] = { +static const unsigned int pcm186x_adc_input_channel_sel_value[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x20, 0x30 @@ -475,7 +475,7 @@ static int pcm186x_set_dai_sysclk(struct snd_soc_dai *dai, int clk_id, return 0; } -const struct snd_soc_dai_ops pcm186x_dai_ops = { +static const struct snd_soc_dai_ops pcm186x_dai_ops = { .set_sysclk = pcm186x_set_dai_sysclk, .set_tdm_slot = pcm186x_set_tdm_slot, .set_fmt = pcm186x_set_fmt, -- cgit v1.2.3 From 2d9bbd02c54094ceffa555143b0d68cd06504d63 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 7 Dec 2017 15:04:53 +0100 Subject: spi: sun6i: disable/unprepare clocks on remove sun6i_spi_probe() uses sun6i_spi_runtime_resume() to prepare/enable clocks, so sun6i_spi_remove() should use sun6i_spi_runtime_suspend() to disable/unprepare them if we're not suspended. Replacing pm_runtime_disable() by pm_runtime_force_suspend() will ensure that sun6i_spi_runtime_suspend() is called if needed. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 3558fe900e8af (spi: sunxi: Add Allwinner A31 SPI controller driver) Signed-off-by: Tobias Jordan Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- drivers/spi/spi-sun6i.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c index fb38234249a8..8533f4edd00a 100644 --- a/drivers/spi/spi-sun6i.c +++ b/drivers/spi/spi-sun6i.c @@ -541,7 +541,7 @@ err_free_master: static int sun6i_spi_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); + pm_runtime_force_suspend(&pdev->dev); return 0; } -- cgit v1.2.3 From 283c35062f778fc40f8eb5dc004ca37a7208ea66 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 7 Dec 2017 09:38:51 -0600 Subject: ASoC: tlv320aic31xx: Fix GPIO header includes Use of gpiod_* needs , add this here. Fixes: b6b247cd5e37 ("ASoC: tlv320aic31xx: Switch GPIO handling to use gpiod_* API") Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 38fd6ea275fb..13471a900085 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From cd15da88c358e9987a3b16bc821c980dd2377776 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 7 Dec 2017 09:38:52 -0600 Subject: ASoC: tlv320aic31xx: Use fwnode APIs over raw OF calls Use fwnode_* API instead of of_*, the results are the same but fwnode_* is cleaner and we get ACPI support. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 77 +++++++++++++++------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 13471a900085..655c99db2426 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -157,7 +157,9 @@ struct aic31xx_priv { u8 i2c_regs_status; struct device *dev; struct regmap *regmap; + enum aic31xx_type codec_type; struct gpio_desc *gpio_reset; + int micbias_vg; struct aic31xx_pdata pdata; struct regulator_bulk_data supplies[AIC31XX_NUM_SUPPLIES]; struct aic31xx_disable_nb disable_nb[AIC31XX_NUM_SUPPLIES]; @@ -450,7 +452,7 @@ static int mic_bias_event(struct snd_soc_dapm_widget *w, /* change mic bias voltage to user defined */ snd_soc_update_bits(codec, AIC31XX_MICBIAS, AIC31XX_MICBIAS_MASK, - aic31xx->pdata.micbias_vg << + aic31xx->micbias_vg << AIC31XX_MICBIAS_SHIFT); dev_dbg(codec->dev, "%s: turned on\n", __func__); break; @@ -673,14 +675,14 @@ static int aic31xx_add_controls(struct snd_soc_codec *codec) int ret = 0; struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); - if (!(aic31xx->pdata.codec_type & DAC31XX_BIT)) + if (!(aic31xx->codec_type & DAC31XX_BIT)) ret = snd_soc_add_codec_controls( codec, aic31xx_snd_controls, ARRAY_SIZE(aic31xx_snd_controls)); if (ret) return ret; - if (aic31xx->pdata.codec_type & AIC31XX_STEREO_CLASS_D_BIT) + if (aic31xx->codec_type & AIC31XX_STEREO_CLASS_D_BIT) ret = snd_soc_add_codec_controls( codec, aic311x_snd_controls, ARRAY_SIZE(aic311x_snd_controls)); @@ -698,7 +700,7 @@ static int aic31xx_add_widgets(struct snd_soc_codec *codec) struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); int ret = 0; - if (aic31xx->pdata.codec_type & DAC31XX_BIT) { + if (aic31xx->codec_type & DAC31XX_BIT) { ret = snd_soc_dapm_new_controls( dapm, dac31xx_dapm_widgets, ARRAY_SIZE(dac31xx_dapm_widgets)); @@ -722,7 +724,7 @@ static int aic31xx_add_widgets(struct snd_soc_codec *codec) return ret; } - if (aic31xx->pdata.codec_type & AIC31XX_STEREO_CLASS_D_BIT) { + if (aic31xx->codec_type & AIC31XX_STEREO_CLASS_D_BIT) { ret = snd_soc_dapm_new_controls( dapm, aic311x_dapm_widgets, ARRAY_SIZE(aic311x_dapm_widgets)); @@ -1279,42 +1281,6 @@ static const struct of_device_id tlv320aic31xx_of_match[] = { {}, }; MODULE_DEVICE_TABLE(of, tlv320aic31xx_of_match); - -static void aic31xx_pdata_from_of(struct aic31xx_priv *aic31xx) -{ - struct device_node *np = aic31xx->dev->of_node; - unsigned int value = MICBIAS_2_0V; - int ret; - - of_property_read_u32(np, "ai31xx-micbias-vg", &value); - switch (value) { - case MICBIAS_2_0V: - case MICBIAS_2_5V: - case MICBIAS_AVDDV: - aic31xx->pdata.micbias_vg = value; - break; - default: - dev_err(aic31xx->dev, - "Bad ai31xx-micbias-vg value %d DT\n", - value); - aic31xx->pdata.micbias_vg = MICBIAS_2_0V; - } - - ret = of_get_named_gpio(np, "reset-gpios", 0); - if (ret > 0) { - aic31xx->pdata.gpio_reset = ret; - } else { - ret = of_get_named_gpio(np, "gpio-reset", 0); - if (ret > 0) { - dev_warn(aic31xx->dev, "Using deprecated property \"gpio-reset\", please update your DT"); - aic31xx->pdata.gpio_reset = ret; - } - } -} -#else /* CONFIG_OF */ -static void aic31xx_pdata_from_of(struct aic31xx_priv *aic31xx) -{ -} #endif /* CONFIG_OF */ #ifdef CONFIG_ACPI @@ -1329,6 +1295,7 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct aic31xx_priv *aic31xx; + unsigned int micbias_value = MICBIAS_2_0V; int i, ret; dev_dbg(&i2c->dev, "## %s: %s codec_type = %d\n", __func__, @@ -1347,15 +1314,29 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, } aic31xx->dev = &i2c->dev; - aic31xx->pdata.codec_type = id->driver_data; + aic31xx->codec_type = id->driver_data; dev_set_drvdata(aic31xx->dev, aic31xx); - if (dev_get_platdata(aic31xx->dev)) - memcpy(&aic31xx->pdata, dev_get_platdata(aic31xx->dev), - sizeof(aic31xx->pdata)); - else if (aic31xx->dev->of_node) - aic31xx_pdata_from_of(aic31xx); + fwnode_property_read_u32(aic31xx->dev->fwnode, "ai31xx-micbias-vg", + &micbias_value); + switch (micbias_value) { + case MICBIAS_2_0V: + case MICBIAS_2_5V: + case MICBIAS_AVDDV: + aic31xx->micbias_vg = micbias_value; + break; + default: + dev_err(aic31xx->dev, "Bad ai31xx-micbias-vg value %d\n", + micbias_value); + aic31xx->micbias_vg = MICBIAS_2_0V; + } + + if (dev_get_platdata(aic31xx->dev)) { + memcpy(&aic31xx->pdata, dev_get_platdata(aic31xx->dev), sizeof(aic31xx->pdata)); + aic31xx->codec_type = aic31xx->pdata.codec_type; + aic31xx->micbias_vg = aic31xx->pdata.micbias_vg; + } aic31xx->gpio_reset = devm_gpiod_get_optional(aic31xx->dev, "reset", GPIOD_OUT_LOW); @@ -1375,7 +1356,7 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, return ret; } - if (aic31xx->pdata.codec_type & DAC31XX_BIT) + if (aic31xx->codec_type & DAC31XX_BIT) return snd_soc_register_codec(&i2c->dev, &soc_codec_driver_aic31xx, dac31xx_dai_driver, -- cgit v1.2.3 From 0ce918c9e070bf4fd17af0d76096ad184815bd79 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Thu, 7 Dec 2017 09:38:56 -0600 Subject: ASoC: tlv320aic31xx: Reset registers during power up Add a reset function that toggles the reset line if available or uses the software reset command otherwise. Use this in power up to ensure the registers are in a sane state. This is useful when the driver module is reloaded, or after Kexec, warm-reboots, etc.. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic31xx.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 655c99db2426..858cb8be445f 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1055,6 +1055,22 @@ static int aic31xx_regulator_event(struct notifier_block *nb, return 0; } +static int aic31xx_reset(struct aic31xx_priv *aic31xx) +{ + int ret = 0; + + if (aic31xx->gpio_reset) { + gpiod_set_value(aic31xx->gpio_reset, 1); + ndelay(10); /* At least 10ns */ + gpiod_set_value(aic31xx->gpio_reset, 0); + } else { + ret = regmap_write(aic31xx->regmap, AIC31XX_RESET, 1); + } + mdelay(1); /* At least 1ms */ + + return ret; +} + static void aic31xx_clk_on(struct snd_soc_codec *codec) { struct aic31xx_priv *aic31xx = snd_soc_codec_get_drvdata(codec); @@ -1098,11 +1114,13 @@ static int aic31xx_power_on(struct snd_soc_codec *codec) if (ret) return ret; - if (aic31xx->gpio_reset) { - gpiod_set_value(aic31xx->gpio_reset, 0); - udelay(100); - } regcache_cache_only(aic31xx->regmap, false); + + /* Reset device registers for a consistent power-on like state */ + ret = aic31xx_reset(aic31xx); + if (ret < 0) + dev_err(aic31xx->dev, "Could not reset device: %d\n", ret); + ret = regcache_sync(aic31xx->regmap); if (ret) { dev_err(codec->dev, -- cgit v1.2.3 From 4cae8ff136782d77b108cb3a5ba53e60597ba3a6 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 5 Dec 2017 22:30:01 +0200 Subject: IB/core: Bound check alternate path port number The alternate port number is used as an array index in the IB security implementation, invalid values can result in a kernel panic. Cc: # v4.12 Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 16d55710b116..d0202bb176a4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1971,6 +1971,12 @@ static int modify_qp(struct ib_uverbs_file *file, goto release_qp; } + if ((cmd->base.attr_mask & IB_QP_ALT_PATH) && + !rdma_is_port_valid(qp->device, cmd->base.alt_port_num)) { + ret = -EINVAL; + goto release_qp; + } + attr->qp_state = cmd->base.qp_state; attr->cur_qp_state = cmd->base.cur_qp_state; attr->path_mtu = cmd->base.path_mtu; -- cgit v1.2.3 From 0fbe8f575b15585eec3326e43708fbbc024e8486 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 5 Dec 2017 22:30:02 +0200 Subject: IB/core: Don't enforce PKey security on SMI MADs Per the infiniband spec an SMI MAD can have any PKey. Checking the pkey on SMI MADs is not necessary, and it seems that some older adapters using the mthca driver don't follow the convention of using the default PKey, resulting in false denials, or errors querying the PKey cache. SMI MAD security is still enforced, only agents allowed to manage the subnet are able to receive or send SMI MADs. Reported-by: Chris Blake Cc: # v4.12 Fixes: 47a2b338fe63 ("IB/core: Enforce security on management datagrams") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/security.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a337386652b0..feafdb961c48 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -739,8 +739,11 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index) if (!rdma_protocol_ib(map->agent.device, map->agent.port_num)) return 0; - if (map->agent.qp->qp_type == IB_QPT_SMI && !map->agent.smp_allowed) - return -EACCES; + if (map->agent.qp->qp_type == IB_QPT_SMI) { + if (!map->agent.smp_allowed) + return -EACCES; + return 0; + } return ib_security_pkey_access(map->agent.device, map->agent.port_num, -- cgit v1.2.3 From 4d02ebd9bbbdde1d524e62b540b0402cee7bbcdf Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 5 Dec 2017 22:30:03 +0200 Subject: IB/mlx4: Fix RSS hash fields restrictions Mistakenly the driver didn't allow RSS hash fields combinations which involve both IPv4 and IPv6 protocols. This bug caused to failures for user's use cases for RSS. Consequently, this patch fixes this bug and allows any combination that the HW can support. Additionally, the patch fixes the driver to return an error in case the user provides an unsupported mask for RSS hash fields. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 013049bcdb53..caf490ab24c8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -666,6 +666,19 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, return (-EOPNOTSUPP); } + if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 | + MLX4_IB_RX_HASH_DST_IPV4 | + MLX4_IB_RX_HASH_SRC_IPV6 | + MLX4_IB_RX_HASH_DST_IPV6 | + MLX4_IB_RX_HASH_SRC_PORT_TCP | + MLX4_IB_RX_HASH_DST_PORT_TCP | + MLX4_IB_RX_HASH_SRC_PORT_UDP | + MLX4_IB_RX_HASH_DST_PORT_UDP)) { + pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n", + ucmd->rx_hash_fields_mask); + return (-EOPNOTSUPP); + } + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) && (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { rss_ctx->flags = MLX4_RSS_IPV4; @@ -691,11 +704,11 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, return (-EOPNOTSUPP); } - if (rss_ctx->flags & MLX4_RSS_IPV4) { + if (rss_ctx->flags & MLX4_RSS_IPV4) rss_ctx->flags |= MLX4_RSS_UDP_IPV4; - } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + if (rss_ctx->flags & MLX4_RSS_IPV6) rss_ctx->flags |= MLX4_RSS_UDP_IPV6; - } else { + if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) { pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n"); return (-EOPNOTSUPP); } @@ -707,15 +720,14 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) && (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { - if (rss_ctx->flags & MLX4_RSS_IPV4) { + if (rss_ctx->flags & MLX4_RSS_IPV4) rss_ctx->flags |= MLX4_RSS_TCP_IPV4; - } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + if (rss_ctx->flags & MLX4_RSS_IPV6) rss_ctx->flags |= MLX4_RSS_TCP_IPV6; - } else { + if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) { pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n"); return (-EOPNOTSUPP); } - } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) || (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n"); -- cgit v1.2.3 From d0e312fe3d34c1bc014a7f8ec6540d05e8077483 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 5 Dec 2017 22:30:04 +0200 Subject: RDMA/netlink: Fix general protection fault The RDMA netlink core code checks validity of messages by ensuring that type and operand are in range. It works well for almost all clients except NLDEV, which has cb_table less than number of operands. Request to access such operand will trigger the following kernel panic. This patch updates all places where cb_table is declared for the consistency, but only NLDEV is actually need it. general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 0 PID: 522 Comm: syz-executor6 Not tainted 4.13.0+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 task: ffff8800657799c0 task.stack: ffff8800695d000 RIP: 0010:rdma_nl_rcv_msg+0x13a/0x4c0 RSP: 0018:ffff8800695d7838 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: 1ffff1000d2baf0b RCX: 00000000704ff4d7 RDX: 0000000000000000 RSI: ffffffff81ddb03c RDI: 00000003827fa6bc RBP: ffff8800695d7900 R08: ffffffff82ec0578 R09: 0000000000000000 R10: ffff8800695d7900 R11: 0000000000000001 R12: 000000000000001c R13: ffff880069d31e00 R14: 00000000ffffffff R15: ffff880069d357c0 FS: 00007fee6acb8700(0000) GS:ffff88006ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000201a9000 CR3: 0000000059766000 CR4: 00000000000006b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? rdma_nl_multicast+0x80/0x80 rdma_nl_rcv+0x36b/0x4d0 ? ibnl_put_attr+0xc0/0xc0 netlink_unicast+0x4bd/0x6d0 ? netlink_sendskb+0x50/0x50 ? drop_futex_key_refs.isra.4+0x68/0xb0 netlink_sendmsg+0x9ab/0xbd0 ? nlmsg_notify+0x140/0x140 ? wake_up_q+0xa1/0xf0 ? drop_futex_key_refs.isra.4+0x68/0xb0 sock_sendmsg+0x88/0xd0 sock_write_iter+0x228/0x3c0 ? sock_sendmsg+0xd0/0xd0 ? do_futex+0x3e5/0xb20 ? iov_iter_init+0xaf/0x1d0 __vfs_write+0x46e/0x640 ? sched_clock_cpu+0x1b/0x190 ? __vfs_read+0x620/0x620 ? __fget+0x23a/0x390 ? rw_verify_area+0xca/0x290 vfs_write+0x192/0x490 SyS_write+0xde/0x1c0 ? SyS_read+0x1c0/0x1c0 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0xad RIP: 0033:0x7fee6a74a219 RSP: 002b:00007fee6acb7d58 EFLAGS: 00000212 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000638000 RCX: 00007fee6a74a219 RDX: 0000000000000078 RSI: 0000000020141000 RDI: 0000000000000006 RBP: 0000000000000046 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: ffff8800695d7f98 R13: 0000000020141000 R14: 0000000000000006 R15: 00000000ffffffff Code: d6 48 b8 00 00 00 00 00 fc ff df 66 41 81 e4 ff 03 44 8d 72 ff 4a 8d 3c b5 c0 a6 7f 82 44 89 b5 4c ff ff ff 48 89 f9 48 c1 e9 03 <0f> b6 0c 01 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: rdma_nl_rcv_msg+0x13a/0x4c0 RSP: ffff8800695d7838 ---[ end trace ba085d123959c8ec ]--- Kernel panic - not syncing: Fatal exception Cc: syzkaller Fixes: b4c598a67ea1 ("RDMA/netlink: Implement nldev device dumpit calback") Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/iwcm.c | 2 +- drivers/infiniband/core/nldev.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f6983357145d..6294a7001d33 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4458,7 +4458,7 @@ out: return skb->len; } -static const struct rdma_nl_cbs cma_cb_table[] = { +static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = { [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, }; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5e1be4949d5f..30914f3baa5f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1146,7 +1146,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); -static const struct rdma_nl_cbs ibnl_ls_cb_table[] = { +static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = { [RDMA_NL_LS_OP_RESOLVE] = { .doit = ib_nl_handle_resolve_resp, .flags = RDMA_NL_ADMIN_PERM, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index e9e189ec7502..5d676cff41f4 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason) } EXPORT_SYMBOL(iwcm_reject_msg); -static struct rdma_nl_cbs iwcm_nl_cb_table[] = { +static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 2fae850a3eff..9a05245a1acf 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -303,7 +303,7 @@ out: cb->args[0] = idx; return skb->len; } -static const struct rdma_nl_cbs nldev_cb_table[] = { +static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, -- cgit v1.2.3 From 335ebf6fa35ca1c59b73f76fad19b249d3550e86 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 30 Nov 2017 09:41:56 -0800 Subject: iw_cxgb4: only clear the ARMED bit if a notification is needed In __flush_qp(), the CQ ARMED bit was being cleared regardless of whether any notification is actually needed. This resulted in the iser termination logic getting stuck in ib_drain_sq() because the CQ was not marked ARMED and thus the drain CQE notification wasn't triggered. This new bug was exposed when this commit was merged: commit cbb40fadd31c ("iw_cxgb4: only call the cq comp_handler when the cq is armed") Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5ee7fe433136..355e288ec969 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1285,21 +1285,21 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, spin_unlock_irqrestore(&rchp->lock, flag); if (schp == rchp) { - if (t4_clear_cq_armed(&rchp->cq) && - (rq_flushed || sq_flushed)) { + if ((rq_flushed || sq_flushed) && + t4_clear_cq_armed(&rchp->cq)) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } } else { - if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } - if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + if (sq_flushed && t4_clear_cq_armed(&schp->cq)) { spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); -- cgit v1.2.3 From f5f263fed66f75a4482d7ad49392b4283a05885a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Dec 2017 11:02:43 +0530 Subject: cpu_cooling: Make of_cpufreq_power_cooling_register() parse DT All the callers of of_cpufreq_power_cooling_register() have almost identical code and it makes more sense to move that code into the helper as its all about reading DT properties. This got rid of lot of redundant code. Acked-by: Eduardo Valentin Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/thermal/cpu-cooling-api.txt | 7 ++--- drivers/cpufreq/arm_big_little.c | 23 +-------------- drivers/cpufreq/cpufreq-dt.c | 27 +---------------- drivers/cpufreq/mediatek-cpufreq.c | 22 +------------- drivers/cpufreq/qoriq-cpufreq.c | 14 +-------- drivers/thermal/cpu_cooling.c | 49 +++++++++++++++++++------------ include/linux/cpu_cooling.h | 15 ++-------- 7 files changed, 41 insertions(+), 116 deletions(-) diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt index 71653584cd03..4f6f5e9bb4d6 100644 --- a/Documentation/thermal/cpu-cooling-api.txt +++ b/Documentation/thermal/cpu-cooling-api.txt @@ -51,8 +51,7 @@ Dynamic power). "plat_static_func" is a function to calculate the static power consumed by these cpus (See 2.2 Static power). 1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register( - struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance, - get_static_t plat_static_func) + struct cpufreq_policy *policy) Similar to cpufreq_power_cooling_register, this function register a cpufreq cooling device with power extensions using the device tree @@ -76,8 +75,8 @@ cpu. If you are using CONFIG_CPUFREQ_DT then the device. The `plat_static_func` parameter of `cpufreq_power_cooling_register()` -and `of_cpufreq_power_cooling_register()` is optional. If you don't -provide it, only dynamic power will be considered. +is optional. If you don't provide it, only dynamic power will be +considered. 2.1 Dynamic power diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 65ec5f01aa8d..3d5ed4ef3927 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -526,34 +526,13 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy) static void bL_cpufreq_ready(struct cpufreq_policy *policy) { - struct device *cpu_dev = get_cpu_device(policy->cpu); int cur_cluster = cpu_to_cluster(policy->cpu); - struct device_node *np; /* Do not register a cpu_cooling device if we are in IKS mode */ if (cur_cluster >= MAX_CLUSTERS) return; - np = of_node_get(cpu_dev->of_node); - if (WARN_ON(!np)) - return; - - if (of_find_property(np, "#cooling-cells", NULL)) { - u32 power_coefficient = 0; - - of_property_read_u32(np, "dynamic-power-coefficient", - &power_coefficient); - - cdev[cur_cluster] = of_cpufreq_power_cooling_register(np, - policy, power_coefficient, NULL); - if (IS_ERR(cdev[cur_cluster])) { - dev_err(cpu_dev, - "running cpufreq without cooling device: %ld\n", - PTR_ERR(cdev[cur_cluster])); - cdev[cur_cluster] = NULL; - } - } - of_node_put(np); + cdev[cur_cluster] = of_cpufreq_power_cooling_register(policy); } static struct cpufreq_driver bL_cpufreq_driver = { diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 545946ad0752..1e7bec7694ab 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -319,33 +319,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy) static void cpufreq_ready(struct cpufreq_policy *policy) { struct private_data *priv = policy->driver_data; - struct device_node *np = of_node_get(priv->cpu_dev->of_node); - if (WARN_ON(!np)) - return; - - /* - * For now, just loading the cooling device; - * thermal DT code takes care of matching them. - */ - if (of_find_property(np, "#cooling-cells", NULL)) { - u32 power_coefficient = 0; - - of_property_read_u32(np, "dynamic-power-coefficient", - &power_coefficient); - - priv->cdev = of_cpufreq_power_cooling_register(np, - policy, power_coefficient, NULL); - if (IS_ERR(priv->cdev)) { - dev_err(priv->cpu_dev, - "running cpufreq without cooling device: %ld\n", - PTR_ERR(priv->cdev)); - - priv->cdev = NULL; - } - } - - of_node_put(np); + priv->cdev = of_cpufreq_power_cooling_register(policy); } static struct cpufreq_driver dt_cpufreq_driver = { diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index e0d5090b303d..6ff783e1b18a 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -310,28 +310,8 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, static void mtk_cpufreq_ready(struct cpufreq_policy *policy) { struct mtk_cpu_dvfs_info *info = policy->driver_data; - struct device_node *np = of_node_get(info->cpu_dev->of_node); - u32 capacitance = 0; - if (WARN_ON(!np)) - return; - - if (of_find_property(np, "#cooling-cells", NULL)) { - of_property_read_u32(np, DYNAMIC_POWER, &capacitance); - - info->cdev = of_cpufreq_power_cooling_register(np, - policy, capacitance, NULL); - - if (IS_ERR(info->cdev)) { - dev_err(info->cpu_dev, - "running cpufreq without cooling device: %ld\n", - PTR_ERR(info->cdev)); - - info->cdev = NULL; - } - } - - of_node_put(np); + info->cdev = of_cpufreq_power_cooling_register(policy); } static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 4ada55b8856e..3a665c18e14e 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -275,20 +275,8 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy, static void qoriq_cpufreq_ready(struct cpufreq_policy *policy) { struct cpu_data *cpud = policy->driver_data; - struct device_node *np = of_get_cpu_node(policy->cpu, NULL); - if (of_find_property(np, "#cooling-cells", NULL)) { - cpud->cdev = of_cpufreq_cooling_register(np, policy); - - if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) { - pr_err("cpu%d is not running as cooling device: %ld\n", - policy->cpu, PTR_ERR(cpud->cdev)); - - cpud->cdev = NULL; - } - } - - of_node_put(np); + cpud->cdev = of_cpufreq_power_cooling_register(policy); } static struct cpufreq_driver qoriq_cpufreq_driver = { diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index dc63aba092e4..a31eb03c788e 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -873,38 +873,51 @@ EXPORT_SYMBOL(cpufreq_power_cooling_register); /** * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions - * @np: a valid struct device_node to the cooling device device tree node - * @policy: cpufreq policy - * @capacitance: dynamic power coefficient for these cpus - * @plat_static_func: function to calculate the static power consumed by these - * cpus (optional) + * @policy: CPUFreq policy. * * This interface function registers the cpufreq cooling device with * the name "thermal-cpufreq-%x". This api can support multiple * instances of cpufreq cooling devices. Using this API, the cpufreq - * cooling device will be linked to the device tree node provided. + * cooling device will be linked to the device tree node of the provided + * policy's CPU. * Using this function, the cooling device will implement the power * extensions by using a simple cpu power model. The cpus must have * registered their OPPs using the OPP library. * - * An optional @plat_static_func may be provided to calculate the - * static power consumed by these cpus. If the platform's static - * power consumption is unknown or negligible, make it NULL. + * It also takes into account, if property present in policy CPU node, the + * static power consumed by the cpu. * * Return: a valid struct thermal_cooling_device pointer on success, - * on failure, it returns a corresponding ERR_PTR(). + * and NULL on failure. */ struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct device_node *np, - struct cpufreq_policy *policy, - u32 capacitance, - get_static_t plat_static_func) +of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) { - if (!np) - return ERR_PTR(-EINVAL); + struct device_node *np = of_get_cpu_node(policy->cpu, NULL); + struct thermal_cooling_device *cdev = NULL; + u32 capacitance = 0; + + if (!np) { + pr_err("cpu_cooling: OF node not available for cpu%d\n", + policy->cpu); + return NULL; + } - return __cpufreq_cooling_register(np, policy, capacitance, - plat_static_func); + if (of_find_property(np, "#cooling-cells", NULL)) { + of_property_read_u32(np, "dynamic-power-coefficient", + &capacitance); + + cdev = __cpufreq_cooling_register(np, policy, capacitance, + NULL); + if (IS_ERR(cdev)) { + pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n", + policy->cpu, PTR_ERR(cdev)); + cdev = NULL; + } + } + + of_node_put(np); + return cdev; } EXPORT_SYMBOL(of_cpufreq_power_cooling_register); diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index d4292ebc5c8b..f09d4feb34f4 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -56,10 +56,7 @@ of_cpufreq_cooling_register(struct device_node *np, struct cpufreq_policy *policy); struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct device_node *np, - struct cpufreq_policy *policy, - u32 capacitance, - get_static_t plat_static_func); +of_cpufreq_power_cooling_register(struct cpufreq_policy *policy); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, @@ -69,10 +66,7 @@ of_cpufreq_cooling_register(struct device_node *np, } static inline struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct device_node *np, - struct cpufreq_policy *policy, - u32 capacitance, - get_static_t plat_static_func) +of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) { return NULL; } @@ -105,10 +99,7 @@ of_cpufreq_cooling_register(struct device_node *np, } static inline struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct device_node *np, - struct cpufreq_policy *policy, - u32 capacitance, - get_static_t plat_static_func) +of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) { return NULL; } -- cgit v1.2.3 From ba0966da208ef0793486502a0e6b929fbd6d4223 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Dec 2017 11:02:44 +0530 Subject: cpu_cooling: Remove unused cpufreq_power_cooling_register() It isn't used by anyone, drop it. Acked-by: Eduardo Valentin Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/thermal/cpu-cooling-api.txt | 24 +++--------------------- drivers/thermal/cpu_cooling.c | 30 ------------------------------ include/linux/cpu_cooling.h | 10 ---------- 3 files changed, 3 insertions(+), 61 deletions(-) diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt index 4f6f5e9bb4d6..ea61e8bf7e2b 100644 --- a/Documentation/thermal/cpu-cooling-api.txt +++ b/Documentation/thermal/cpu-cooling-api.txt @@ -36,28 +36,14 @@ the user. The registration APIs returns the cooling device pointer. np: pointer to the cooling device device tree node clip_cpus: cpumask of cpus where the frequency constraints will happen. -1.1.3 struct thermal_cooling_device *cpufreq_power_cooling_register( - const struct cpumask *clip_cpus, u32 capacitance, - get_static_t plat_static_func) - -Similar to cpufreq_cooling_register, this function registers a cpufreq -cooling device. Using this function, the cooling device will -implement the power extensions by using a simple cpu power model. The -cpus must have registered their OPPs using the OPP library. - -The additional parameters are needed for the power model (See 2. Power -models). "capacitance" is the dynamic power coefficient (See 2.1 -Dynamic power). "plat_static_func" is a function to calculate the -static power consumed by these cpus (See 2.2 Static power). - -1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register( +1.1.3 struct thermal_cooling_device *of_cpufreq_power_cooling_register( struct cpufreq_policy *policy) -Similar to cpufreq_power_cooling_register, this function register a +Similar to cpufreq_cooling_register, this function register a cpufreq cooling device with power extensions using the device tree information supplied by the np parameter. -1.1.5 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) +1.1.4 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) This interface function unregisters the "thermal-cpufreq-%x" cooling device. @@ -74,10 +60,6 @@ cpu. If you are using CONFIG_CPUFREQ_DT then the `cpufreq_frequency_table` should already be assigned to the cpu device. -The `plat_static_func` parameter of `cpufreq_power_cooling_register()` -is optional. If you don't provide it, only dynamic power will be -considered. - 2.1 Dynamic power The dynamic power consumption of a processor depends on many factors. diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index a31eb03c788e..10199f7e1196 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -841,36 +841,6 @@ of_cpufreq_cooling_register(struct device_node *np, } EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); -/** - * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions - * @policy: cpufreq policy - * @capacitance: dynamic power coefficient for these cpus - * @plat_static_func: function to calculate the static power consumed by these - * cpus (optional) - * - * This interface function registers the cpufreq cooling device with - * the name "thermal-cpufreq-%x". This api can support multiple - * instances of cpufreq cooling devices. Using this function, the - * cooling device will implement the power extensions by using a - * simple cpu power model. The cpus must have registered their OPPs - * using the OPP library. - * - * An optional @plat_static_func may be provided to calculate the - * static power consumed by these cpus. If the platform's static - * power consumption is unknown or negligible, make it NULL. - * - * Return: a valid struct thermal_cooling_device pointer on success, - * on failure, it returns a corresponding ERR_PTR(). - */ -struct thermal_cooling_device * -cpufreq_power_cooling_register(struct cpufreq_policy *policy, u32 capacitance, - get_static_t plat_static_func) -{ - return __cpufreq_cooling_register(NULL, policy, capacitance, - plat_static_func); -} -EXPORT_SYMBOL(cpufreq_power_cooling_register); - /** * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions * @policy: CPUFreq policy. diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index f09d4feb34f4..c35778960a9c 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -41,10 +41,6 @@ typedef int (*get_static_t)(cpumask_t *cpumask, int interval, struct thermal_cooling_device * cpufreq_cooling_register(struct cpufreq_policy *policy); -struct thermal_cooling_device * -cpufreq_power_cooling_register(struct cpufreq_policy *policy, - u32 capacitance, get_static_t plat_static_func); - /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. @@ -84,12 +80,6 @@ cpufreq_cooling_register(struct cpufreq_policy *policy) { return ERR_PTR(-ENOSYS); } -static inline struct thermal_cooling_device * -cpufreq_power_cooling_register(struct cpufreq_policy *policy, - u32 capacitance, get_static_t plat_static_func) -{ - return NULL; -} static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, -- cgit v1.2.3 From 3ebb62ffc4e4817c3288cdf7ed67ccaf453385e3 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Dec 2017 11:02:45 +0530 Subject: cpu_cooling: Keep only one of_cpufreq*cooling_register() helper of_cpufreq_cooling_register() isn't used by anyone and so can be removed, but then we would be left with two routines: cpufreq_cooling_register() and of_cpufreq_power_cooling_register() that would look odd. Remove current implementation of of_cpufreq_cooling_register() and rename of_cpufreq_power_cooling_register() as of_cpufreq_cooling_register(). This simplifies lots of stuff. Acked-by: Eduardo Valentin Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/thermal/cpu-cooling-api.txt | 14 ++------ drivers/cpufreq/arm_big_little.c | 2 +- drivers/cpufreq/cpufreq-dt.c | 2 +- drivers/cpufreq/mediatek-cpufreq.c | 2 +- drivers/cpufreq/qoriq-cpufreq.c | 2 +- drivers/thermal/cpu_cooling.c | 28 ++-------------- include/linux/cpu_cooling.h | 53 ++++++++----------------------- 7 files changed, 23 insertions(+), 80 deletions(-) diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt index ea61e8bf7e2b..7a1c89db0419 100644 --- a/Documentation/thermal/cpu-cooling-api.txt +++ b/Documentation/thermal/cpu-cooling-api.txt @@ -26,24 +26,16 @@ the user. The registration APIs returns the cooling device pointer. clip_cpus: cpumask of cpus where the frequency constraints will happen. 1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register( - struct device_node *np, const struct cpumask *clip_cpus) + struct cpufreq_policy *policy) This interface function registers the cpufreq cooling device with the name "thermal-cpufreq-%x" linking it with a device tree node, in order to bind it via the thermal DT code. This api can support multiple instances of cpufreq cooling devices. - np: pointer to the cooling device device tree node - clip_cpus: cpumask of cpus where the frequency constraints will happen. - -1.1.3 struct thermal_cooling_device *of_cpufreq_power_cooling_register( - struct cpufreq_policy *policy) - -Similar to cpufreq_cooling_register, this function register a -cpufreq cooling device with power extensions using the device tree -information supplied by the np parameter. + policy: CPUFreq policy. -1.1.4 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) +1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) This interface function unregisters the "thermal-cpufreq-%x" cooling device. diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index 3d5ed4ef3927..c56b57dcfda5 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -532,7 +532,7 @@ static void bL_cpufreq_ready(struct cpufreq_policy *policy) if (cur_cluster >= MAX_CLUSTERS) return; - cdev[cur_cluster] = of_cpufreq_power_cooling_register(policy); + cdev[cur_cluster] = of_cpufreq_cooling_register(policy); } static struct cpufreq_driver bL_cpufreq_driver = { diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 1e7bec7694ab..de3d104c25d7 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -320,7 +320,7 @@ static void cpufreq_ready(struct cpufreq_policy *policy) { struct private_data *priv = policy->driver_data; - priv->cdev = of_cpufreq_power_cooling_register(policy); + priv->cdev = of_cpufreq_cooling_register(policy); } static struct cpufreq_driver dt_cpufreq_driver = { diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 6ff783e1b18a..f95975b76d98 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -311,7 +311,7 @@ static void mtk_cpufreq_ready(struct cpufreq_policy *policy) { struct mtk_cpu_dvfs_info *info = policy->driver_data; - info->cdev = of_cpufreq_power_cooling_register(policy); + info->cdev = of_cpufreq_cooling_register(policy); } static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 3a665c18e14e..0562761a3dec 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -276,7 +276,7 @@ static void qoriq_cpufreq_ready(struct cpufreq_policy *policy) { struct cpu_data *cpud = policy->driver_data; - cpud->cdev = of_cpufreq_power_cooling_register(policy); + cpud->cdev = of_cpufreq_cooling_register(policy); } static struct cpufreq_driver qoriq_cpufreq_driver = { diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 10199f7e1196..3371caf3095c 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -819,7 +819,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register); /** * of_cpufreq_cooling_register - function to create cpufreq cooling device. - * @np: a valid struct device_node to the cooling device device tree node * @policy: cpufreq policy * * This interface function registers the cpufreq cooling device with the name @@ -827,29 +826,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register); * cooling devices. Using this API, the cpufreq cooling device will be * linked to the device tree node provided. * - * Return: a valid struct thermal_cooling_device pointer on success, - * on failure, it returns a corresponding ERR_PTR(). - */ -struct thermal_cooling_device * -of_cpufreq_cooling_register(struct device_node *np, - struct cpufreq_policy *policy) -{ - if (!np) - return ERR_PTR(-EINVAL); - - return __cpufreq_cooling_register(np, policy, 0, NULL); -} -EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); - -/** - * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions - * @policy: CPUFreq policy. - * - * This interface function registers the cpufreq cooling device with - * the name "thermal-cpufreq-%x". This api can support multiple - * instances of cpufreq cooling devices. Using this API, the cpufreq - * cooling device will be linked to the device tree node of the provided - * policy's CPU. * Using this function, the cooling device will implement the power * extensions by using a simple cpu power model. The cpus must have * registered their OPPs using the OPP library. @@ -861,7 +837,7 @@ EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); * and NULL on failure. */ struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) +of_cpufreq_cooling_register(struct cpufreq_policy *policy) { struct device_node *np = of_get_cpu_node(policy->cpu, NULL); struct thermal_cooling_device *cdev = NULL; @@ -889,7 +865,7 @@ of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) of_node_put(np); return cdev; } -EXPORT_SYMBOL(of_cpufreq_power_cooling_register); +EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); /** * cpufreq_cooling_unregister - function to remove cpufreq cooling device. diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index c35778960a9c..fd0ea8ddca93 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -41,33 +41,6 @@ typedef int (*get_static_t)(cpumask_t *cpumask, int interval, struct thermal_cooling_device * cpufreq_cooling_register(struct cpufreq_policy *policy); -/** - * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. - * @np: a valid struct device_node to the cooling device device tree node. - * @policy: cpufreq policy. - */ -#ifdef CONFIG_THERMAL_OF -struct thermal_cooling_device * -of_cpufreq_cooling_register(struct device_node *np, - struct cpufreq_policy *policy); - -struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct cpufreq_policy *policy); -#else -static inline struct thermal_cooling_device * -of_cpufreq_cooling_register(struct device_node *np, - struct cpufreq_policy *policy) -{ - return ERR_PTR(-ENOSYS); -} - -static inline struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) -{ - return NULL; -} -#endif - /** * cpufreq_cooling_unregister - function to remove cpufreq cooling device. * @cdev: thermal cooling device pointer. @@ -81,24 +54,26 @@ cpufreq_cooling_register(struct cpufreq_policy *policy) return ERR_PTR(-ENOSYS); } -static inline struct thermal_cooling_device * -of_cpufreq_cooling_register(struct device_node *np, - struct cpufreq_policy *policy) +static inline +void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { - return ERR_PTR(-ENOSYS); + return; } +#endif /* CONFIG_CPU_THERMAL */ +#if defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL) +/** + * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. + * @policy: cpufreq policy. + */ +struct thermal_cooling_device * +of_cpufreq_cooling_register(struct cpufreq_policy *policy); +#else static inline struct thermal_cooling_device * -of_cpufreq_power_cooling_register(struct cpufreq_policy *policy) +of_cpufreq_cooling_register(struct cpufreq_policy *policy) { return NULL; } - -static inline -void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) -{ - return; -} -#endif /* CONFIG_CPU_THERMAL */ +#endif /* defined(CONFIG_THERMAL_OF) && defined(CONFIG_CPU_THERMAL) */ #endif /* __CPU_COOLING_H__ */ -- cgit v1.2.3 From 84fe2cab48590e4373978e4ef2031c977de98995 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 5 Dec 2017 11:02:46 +0530 Subject: cpu_cooling: Drop static-power related stuff No one has used it for the last two and half years (since it was introduced by commit c36cf0717631 (thermal: cpu_cooling: implement the power cooling device API), get rid of it. Acked-by: Eduardo Valentin Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/thermal/cpu_cooling.c | 106 +++++------------------------------------ include/linux/cpu_cooling.h | 3 -- include/trace/events/thermal.h | 10 ++-- 3 files changed, 16 insertions(+), 103 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 3371caf3095c..dfd23245f778 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -88,7 +88,6 @@ struct time_in_idle { * @policy: cpufreq policy. * @node: list_head to link all cpufreq_cooling_device together. * @idle_time: idle time stats - * @plat_get_static_power: callback to calculate the static power * * This structure is required for keeping information of each registered * cpufreq_cooling_device. @@ -104,7 +103,6 @@ struct cpufreq_cooling_device { struct cpufreq_policy *policy; struct list_head node; struct time_in_idle *idle_time; - get_static_t plat_get_static_power; }; static DEFINE_IDA(cpufreq_ida); @@ -318,60 +316,6 @@ static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu, return load; } -/** - * get_static_power() - calculate the static power consumed by the cpus - * @cpufreq_cdev: struct &cpufreq_cooling_device for this cpu cdev - * @tz: thermal zone device in which we're operating - * @freq: frequency in KHz - * @power: pointer in which to store the calculated static power - * - * Calculate the static power consumed by the cpus described by - * @cpu_actor running at frequency @freq. This function relies on a - * platform specific function that should have been provided when the - * actor was registered. If it wasn't, the static power is assumed to - * be negligible. The calculated static power is stored in @power. - * - * Return: 0 on success, -E* on failure. - */ -static int get_static_power(struct cpufreq_cooling_device *cpufreq_cdev, - struct thermal_zone_device *tz, unsigned long freq, - u32 *power) -{ - struct dev_pm_opp *opp; - unsigned long voltage; - struct cpufreq_policy *policy = cpufreq_cdev->policy; - struct cpumask *cpumask = policy->related_cpus; - unsigned long freq_hz = freq * 1000; - struct device *dev; - - if (!cpufreq_cdev->plat_get_static_power) { - *power = 0; - return 0; - } - - dev = get_cpu_device(policy->cpu); - WARN_ON(!dev); - - opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true); - if (IS_ERR(opp)) { - dev_warn_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", - freq_hz, PTR_ERR(opp)); - return -EINVAL; - } - - voltage = dev_pm_opp_get_voltage(opp); - dev_pm_opp_put(opp); - - if (voltage == 0) { - dev_err_ratelimited(dev, "Failed to get voltage for frequency %lu\n", - freq_hz); - return -EINVAL; - } - - return cpufreq_cdev->plat_get_static_power(cpumask, tz->passive_delay, - voltage, power); -} - /** * get_dynamic_power() - calculate the dynamic power * @cpufreq_cdev: &cpufreq_cooling_device for this cdev @@ -491,8 +435,8 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, u32 *power) { unsigned long freq; - int i = 0, cpu, ret; - u32 static_power, dynamic_power, total_load = 0; + int i = 0, cpu; + u32 total_load = 0; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; struct cpufreq_policy *policy = cpufreq_cdev->policy; u32 *load_cpu = NULL; @@ -522,22 +466,15 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, cpufreq_cdev->last_load = total_load; - dynamic_power = get_dynamic_power(cpufreq_cdev, freq); - ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); - if (ret) { - kfree(load_cpu); - return ret; - } + *power = get_dynamic_power(cpufreq_cdev, freq); if (load_cpu) { trace_thermal_power_cpu_get_power(policy->related_cpus, freq, - load_cpu, i, dynamic_power, - static_power); + load_cpu, i, *power); kfree(load_cpu); } - *power = static_power + dynamic_power; return 0; } @@ -561,8 +498,6 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) { unsigned int freq, num_cpus; - u32 static_power, dynamic_power; - int ret; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; /* Request state should be less than max_level */ @@ -572,13 +507,9 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus); freq = cpufreq_cdev->freq_table[state].frequency; - dynamic_power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; - ret = get_static_power(cpufreq_cdev, tz, freq, &static_power); - if (ret) - return ret; + *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; - *power = static_power + dynamic_power; - return ret; + return 0; } /** @@ -606,21 +537,14 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, unsigned long *state) { unsigned int cur_freq, target_freq; - int ret; - s32 dyn_power; - u32 last_load, normalised_power, static_power; + u32 last_load, normalised_power; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; struct cpufreq_policy *policy = cpufreq_cdev->policy; cur_freq = cpufreq_quick_get(policy->cpu); - ret = get_static_power(cpufreq_cdev, tz, cur_freq, &static_power); - if (ret) - return ret; - - dyn_power = power - static_power; - dyn_power = dyn_power > 0 ? dyn_power : 0; + power = power > 0 ? power : 0; last_load = cpufreq_cdev->last_load ?: 1; - normalised_power = (dyn_power * 100) / last_load; + normalised_power = (power * 100) / last_load; target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power); *state = get_level(cpufreq_cdev, target_freq); @@ -671,8 +595,6 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, * @policy: cpufreq policy * Normally this should be same as cpufreq policy->related_cpus. * @capacitance: dynamic power coefficient for these cpus - * @plat_static_func: function to calculate the static power consumed by these - * cpus (optional) * * This interface function registers the cpufreq cooling device with the name * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq @@ -684,8 +606,7 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, */ static struct thermal_cooling_device * __cpufreq_cooling_register(struct device_node *np, - struct cpufreq_policy *policy, u32 capacitance, - get_static_t plat_static_func) + struct cpufreq_policy *policy, u32 capacitance) { struct thermal_cooling_device *cdev; struct cpufreq_cooling_device *cpufreq_cdev; @@ -755,8 +676,6 @@ __cpufreq_cooling_register(struct device_node *np, } if (capacitance) { - cpufreq_cdev->plat_get_static_power = plat_static_func; - ret = update_freq_table(cpufreq_cdev, capacitance); if (ret) { cdev = ERR_PTR(ret); @@ -813,7 +732,7 @@ free_cdev: struct thermal_cooling_device * cpufreq_cooling_register(struct cpufreq_policy *policy) { - return __cpufreq_cooling_register(NULL, policy, 0, NULL); + return __cpufreq_cooling_register(NULL, policy, 0); } EXPORT_SYMBOL_GPL(cpufreq_cooling_register); @@ -853,8 +772,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) of_property_read_u32(np, "dynamic-power-coefficient", &capacitance); - cdev = __cpufreq_cooling_register(np, policy, capacitance, - NULL); + cdev = __cpufreq_cooling_register(np, policy, capacitance); if (IS_ERR(cdev)) { pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n", policy->cpu, PTR_ERR(cdev)); diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index fd0ea8ddca93..de0dafb9399d 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -30,9 +30,6 @@ struct cpufreq_policy; -typedef int (*get_static_t)(cpumask_t *cpumask, int interval, - unsigned long voltage, u32 *power); - #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 78946640fe03..135e5421f003 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -94,9 +94,9 @@ TRACE_EVENT(thermal_zone_trip, #ifdef CONFIG_CPU_THERMAL TRACE_EVENT(thermal_power_cpu_get_power, TP_PROTO(const struct cpumask *cpus, unsigned long freq, u32 *load, - size_t load_len, u32 dynamic_power, u32 static_power), + size_t load_len, u32 dynamic_power), - TP_ARGS(cpus, freq, load, load_len, dynamic_power, static_power), + TP_ARGS(cpus, freq, load, load_len, dynamic_power), TP_STRUCT__entry( __bitmask(cpumask, num_possible_cpus()) @@ -104,7 +104,6 @@ TRACE_EVENT(thermal_power_cpu_get_power, __dynamic_array(u32, load, load_len) __field(size_t, load_len ) __field(u32, dynamic_power ) - __field(u32, static_power ) ), TP_fast_assign( @@ -115,13 +114,12 @@ TRACE_EVENT(thermal_power_cpu_get_power, load_len * sizeof(*load)); __entry->load_len = load_len; __entry->dynamic_power = dynamic_power; - __entry->static_power = static_power; ), - TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d static_power=%d", + TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d", __get_bitmask(cpumask), __entry->freq, __print_array(__get_dynamic_array(load), __entry->load_len, 4), - __entry->dynamic_power, __entry->static_power) + __entry->dynamic_power) ); TRACE_EVENT(thermal_power_cpu_limit, -- cgit v1.2.3 From 96307a0a75d8f1847debefd6a402339aac43e224 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Dec 2017 14:26:09 +0100 Subject: netfilter: ipt_CLUSTERIP: fix clusterip_net_exit build regression The added check produces a build error when CONFIG_PROC_FS is disabled: net/ipv4/netfilter/ipt_CLUSTERIP.c: In function 'clusterip_net_exit': net/ipv4/netfilter/ipt_CLUSTERIP.c:822:28: error: 'cn' undeclared (first use in this function) This moves the variable declaration out of the #ifdef to make it available to the WARN_ON_ONCE(). Fixes: 613d0776d3fe ("netfilter: exit_net cleanup check added") Signed-off-by: Arnd Bergmann Reviewed-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e35b8d074f06..69060e3abe85 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -813,8 +813,8 @@ static int clusterip_net_init(struct net *net) static void clusterip_net_exit(struct net *net) { -#ifdef CONFIG_PROC_FS struct clusterip_net *cn = net_generic(net, clusterip_net_id); +#ifdef CONFIG_PROC_FS proc_remove(cn->procdir); cn->procdir = NULL; #endif -- cgit v1.2.3 From 7f6344896d3e5be3ccfef1c5c98ef24940e5f229 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sat, 11 Nov 2017 23:17:26 +0530 Subject: powercap: Simplify powercap_init() Simplify powercap_init() by reducing the number of redundant assignments in it. Signed-off-by: Arvind Yadav [ rjw: Subject+changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/powercap/powercap_sys.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 5b10b50f8686..64b2b2501a79 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -673,15 +673,13 @@ EXPORT_SYMBOL_GPL(powercap_unregister_control_type); static int __init powercap_init(void) { - int result = 0; + int result; result = seed_constraint_attributes(); if (result) return result; - result = class_register(&powercap_class); - - return result; + return class_register(&powercap_class); } device_initcall(powercap_init); -- cgit v1.2.3 From 7e70aa789d4a0c89dbfbd2c8a974a4df717475ec Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 5 Dec 2017 15:52:56 +0800 Subject: scsi: core: run queue if SCSI device queue isn't ready and queue is idle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit 0df21c86bdbf ("scsi: implement .get_budget and .put_budget for blk-mq"), we run queue after 3ms if queue is idle and SCSI device queue isn't ready, which is done in handling BLK_STS_RESOURCE. After commit 0df21c86bdbf is introduced, queue won't be run any more under this situation. IO hang is observed when timeout happened, and this patch fixes the IO hang issue by running queue after delay in scsi_dev_queue_ready, just like non-mq. This issue can be triggered by the following script[1]. There is another issue which can be covered by running idle queue: when .get_budget() is called on request coming from hctx->dispatch_list, if one request just completes during .get_budget(), we can't depend on SCSI's restart to make progress any more. This patch fixes the race too. With this patch, we basically recover to previous behaviour (before commit 0df21c86bdbf) of handling idle queue when running out of resource. [1] script for test/verify SCSI timeout rmmod scsi_debug modprobe scsi_debug max_queue=1 DEVICE=`ls -d /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*/block/* | head -1 | xargs basename` DISK_DIR=`ls -d /sys/block/$DEVICE/device/scsi_disk/*` echo "using scsi device $DEVICE" echo "-1" >/sys/bus/pseudo/drivers/scsi_debug/every_nth echo "temporary write through" >$DISK_DIR/cache_type echo "128" >/sys/bus/pseudo/drivers/scsi_debug/opts echo none > /sys/block/$DEVICE/queue/scheduler dd if=/dev/$DEVICE of=/dev/null bs=1M iflag=direct count=1 & sleep 5 echo "0" >/sys/bus/pseudo/drivers/scsi_debug/opts wait echo "SUCCESS" Fixes: 0df21c86bdbf ("scsi: implement .get_budget and .put_budget for blk-mq") Signed-off-by: Ming Lei Tested-by: Holger Hoffstätte Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 00742c50cd44..d9ca1dfab154 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1967,6 +1967,8 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) out_put_device: put_device(&sdev->sdev_gendev); out: + if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev)) + blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); return false; } -- cgit v1.2.3 From 48d83282db077f93b2cf40de120f4d6f29eb293b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Dec 2017 15:14:18 +0100 Subject: scsi: bfa: fix type conversion warning A regression fix introduced a harmless type mismatch warning: drivers/scsi/bfa/bfad_bsg.c: In function 'bfad_im_bsg_vendor_request': drivers/scsi/bfa/bfad_bsg.c:3137:35: error: initialization of 'struct bfad_im_port_s *' from 'long unsigned int' makes pointer from integer without a cast [-Werror=int-conversion] struct bfad_im_port_s *im_port = shost->hostdata[0]; ^~~~~ drivers/scsi/bfa/bfad_bsg.c: In function 'bfad_im_bsg_els_ct_request': drivers/scsi/bfa/bfad_bsg.c:3353:35: error: initialization of 'struct bfad_im_port_s *' from 'long unsigned int' makes pointer from integer without a cast [-Werror=int-conversion] struct bfad_im_port_s *im_port = shost->hostdata[0]; This changes the code back to shost_priv() once more, but encapsulates it in an inline function to document the rather unusual way of using the private data only as a pointer to the previously allocated structure. I did not try to get rid of the extra indirection level entirely, which would have been rather invasive and required reworking the entire initialization sequence. Fixes: 45349821ab3a ("scsi: bfa: fix access to bfad_im_port_s") Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/bfa/bfad_bsg.c | 4 ++-- drivers/scsi/bfa/bfad_im.c | 6 ++++-- drivers/scsi/bfa/bfad_im.h | 10 ++++++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c index 09ef68c8225f..b2fa195adc7a 100644 --- a/drivers/scsi/bfa/bfad_bsg.c +++ b/drivers/scsi/bfa/bfad_bsg.c @@ -3136,7 +3136,7 @@ bfad_im_bsg_vendor_request(struct bsg_job *job) struct fc_bsg_reply *bsg_reply = job->reply; uint32_t vendor_cmd = bsg_request->rqst_data.h_vendor.vendor_cmd[0]; struct Scsi_Host *shost = fc_bsg_to_shost(job); - struct bfad_im_port_s *im_port = shost->hostdata[0]; + struct bfad_im_port_s *im_port = bfad_get_im_port(shost); struct bfad_s *bfad = im_port->bfad; void *payload_kbuf; int rc = -EINVAL; @@ -3352,7 +3352,7 @@ bfad_im_bsg_els_ct_request(struct bsg_job *job) { struct bfa_bsg_data *bsg_data; struct Scsi_Host *shost = fc_bsg_to_shost(job); - struct bfad_im_port_s *im_port = shost->hostdata[0]; + struct bfad_im_port_s *im_port = bfad_get_im_port(shost); struct bfad_s *bfad = im_port->bfad; bfa_bsg_fcpt_t *bsg_fcpt; struct bfad_fcxp *drv_fcxp; diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c index 24e657a4ec80..c05d6e91e4bd 100644 --- a/drivers/scsi/bfa/bfad_im.c +++ b/drivers/scsi/bfa/bfad_im.c @@ -546,6 +546,7 @@ int bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port, struct device *dev) { + struct bfad_im_port_pointer *im_portp; int error = 1; mutex_lock(&bfad_mutex); @@ -564,7 +565,8 @@ bfad_im_scsi_host_alloc(struct bfad_s *bfad, struct bfad_im_port_s *im_port, goto out_free_idr; } - im_port->shost->hostdata[0] = (unsigned long)im_port; + im_portp = shost_priv(im_port->shost); + im_portp->p = im_port; im_port->shost->unique_id = im_port->idr_id; im_port->shost->this_id = -1; im_port->shost->max_id = MAX_FCP_TARGET; @@ -748,7 +750,7 @@ bfad_scsi_host_alloc(struct bfad_im_port_s *im_port, struct bfad_s *bfad) sht->sg_tablesize = bfad->cfg_data.io_max_sge; - return scsi_host_alloc(sht, sizeof(unsigned long)); + return scsi_host_alloc(sht, sizeof(struct bfad_im_port_pointer)); } void diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h index c81ec2a77ef5..06ce4ba2b7bc 100644 --- a/drivers/scsi/bfa/bfad_im.h +++ b/drivers/scsi/bfa/bfad_im.h @@ -69,6 +69,16 @@ struct bfad_im_port_s { struct fc_vport *fc_vport; }; +struct bfad_im_port_pointer { + struct bfad_im_port_s *p; +}; + +static inline struct bfad_im_port_s *bfad_get_im_port(struct Scsi_Host *host) +{ + struct bfad_im_port_pointer *im_portp = shost_priv(host); + return im_portp->p; +} + enum bfad_itnim_state { ITNIM_STATE_NONE, ITNIM_STATE_ONLINE, -- cgit v1.2.3 From 75bf50f4aaa1c78d769d854ab3d975884909e4fb Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Thu, 7 Dec 2017 21:54:27 +0100 Subject: xfrm: fix xfrm_do_migrate() with AEAD e.g(AES-GCM) copy geniv when cloning the xfrm state. x->geniv was not copied to the new state and migration would fail. xfrm_do_migrate .. xfrm_state_clone() .. .. esp_init_aead() crypto_alloc_aead() crypto_alloc_tfm() crypto_find_alg() return EAGAIN and failed Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1f5cee2269af..88d0a563e141 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1344,6 +1344,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, if (orig->aead) { x->aead = xfrm_algo_aead_clone(orig->aead); + x->geniv = orig->geniv; if (!x->aead) goto error; } -- cgit v1.2.3 From a555e9d86ee384d9d3cb3310a57aed33f7e053d4 Mon Sep 17 00:00:00 2001 From: Cheng Jian Date: Thu, 7 Dec 2017 21:30:43 +0800 Subject: sched/fair: Remove unused 'curr' parameter from wakeup_gran The first parameter of wakeup_gran(), 'curr', is unnecessary now. Signed-off-by: Cheng Jian Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: huawei.libin@huawei.com Cc: xiexiuqi@huawei.com Link: http://lkml.kernel.org/r/1512653443-179848-1-git-send-email-cj.chengjian@huawei.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2fe3aa853e4d..2915c0d95107 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6449,8 +6449,7 @@ static void task_dead_fair(struct task_struct *p) } #endif /* CONFIG_SMP */ -static unsigned long -wakeup_gran(struct sched_entity *curr, struct sched_entity *se) +static unsigned long wakeup_gran(struct sched_entity *se) { unsigned long gran = sysctl_sched_wakeup_granularity; @@ -6492,7 +6491,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) if (vdiff <= 0) return -1; - gran = wakeup_gran(curr, se); + gran = wakeup_gran(se); if (vdiff > gran) return 1; -- cgit v1.2.3 From 732706afe1cc46ef48493b3d2b69c98f36314ae4 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 8 Dec 2017 08:07:25 +0100 Subject: xfrm: Fix stack-out-of-bounds with misconfigured transport mode policies. On policies with a transport mode template, we pass the addresses from the flowi to xfrm_state_find(), assuming that the IP addresses (and address family) don't change during transformation. Unfortunately our policy template validation is not strict enough. It is possible to configure policies with transport mode template where the address family of the template does not match the selectors address family. This lead to stack-out-of-bound reads because we compare arddesses of the wrong family. Fix this by refusing such a configuration, address family can not change on transport mode. We use the assumption that, on transport mode, the first templates address family must match the address family of the policy selector. Subsequent transport mode templates must mach the address family of the previous template. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ff58c37469d6..bdb48e5dba04 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) { + u16 prev_family; int i; if (nr > XFRM_MAX_DEPTH) return -EINVAL; + prev_family = family; + for (i = 0; i < nr; i++) { /* We never validated the ut->family value, so many * applications simply leave it at zero. The check was @@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; + if ((ut[i].mode == XFRM_MODE_TRANSPORT) && + (ut[i].family != prev_family)) + return -EINVAL; + + prev_family = ut[i].family; + switch (ut[i].family) { case AF_INET: break; -- cgit v1.2.3 From 040d786032bf59002d374b86d75b04d97624005c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 30 Nov 2017 11:59:22 +0800 Subject: ceph: drop negative child dentries before try pruning inode's alias Negative child dentry holds reference on inode's alias, it makes d_prune_aliases() do nothing. Cc: stable@vger.kernel.org Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index ab69dcb70e8a..1b468250e947 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc, return request_close_session(mdsc, session); } +static bool drop_negative_children(struct dentry *dentry) +{ + struct dentry *child; + bool all_negative = true; + + if (!d_is_dir(dentry)) + goto out; + + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) { + if (d_really_is_positive(child)) { + all_negative = false; + break; + } + } + spin_unlock(&dentry->d_lock); + + if (all_negative) + shrink_dcache_parent(dentry); +out: + return all_negative; +} + /* * Trim old(er) caps. * @@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) if ((used | wanted) & ~oissued & mine) goto out; /* we need these caps */ - session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ __ceph_remove_cap(cap, true); + session->s_trim_caps--; } else { + struct dentry *dentry; /* try dropping referring dentries */ spin_unlock(&ci->i_ceph_lock); - d_prune_aliases(inode); - dout("trim_caps_cb %p cap %p pruned, count now %d\n", - inode, cap, atomic_read(&inode->i_count)); + dentry = d_find_any_alias(inode); + if (dentry && drop_negative_children(dentry)) { + int count; + dput(dentry); + d_prune_aliases(inode); + count = atomic_read(&inode->i_count); + if (count == 1) + session->s_trim_caps--; + dout("trim_caps_cb %p cap %p pruned, count now %d\n", + inode, cap, count); + } else { + dput(dentry); + } return 0; } -- cgit v1.2.3 From 451df7d110b82998c04a80d0de0f1e79aaa7792a Mon Sep 17 00:00:00 2001 From: Alejandro Mery Date: Fri, 8 Dec 2017 10:35:58 +0000 Subject: ARM: davinci: fix mmc entries in dm365's dma_slave_map fix mmc entries in dm365's dma_slave_map to match the actual device names Fixes: 0c750e1fe481 ("ARM: davinci: dm365: Add dma_slave_map to edma") Signed-off-by: Alejandro Mery Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 103316f01a22..5ace9380626a 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = { { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, - { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, - { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, - { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, - { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, + { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, + { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, + { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, + { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, }; static struct edma_soc_info dm365_edma_pdata = { -- cgit v1.2.3 From 33cd3c07a976e11c3c4cc6b0b3db6760ad1590c5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Dec 2017 12:16:22 +0000 Subject: drm/armada: fix leak of crtc structure Fix the leak of the CRTC structure in the failure paths of armada_drm_crtc_create(). Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2e065facdce7..50a3a97b4289 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -1225,17 +1225,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", dcrtc); - if (ret < 0) { - kfree(dcrtc); - return ret; - } + if (ret < 0) + goto err_crtc; if (dcrtc->variant->init) { ret = dcrtc->variant->init(dcrtc, dev); - if (ret) { - kfree(dcrtc); - return ret; - } + if (ret) + goto err_crtc; } /* Ensure AXI pipeline is enabled */ @@ -1246,13 +1242,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, dcrtc->crtc.port = port; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; + if (!primary) { + ret = -ENOMEM; + goto err_crtc; + } ret = armada_drm_plane_init(primary); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_universal_plane_init(drm, &primary->base, 0, @@ -1263,7 +1261,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL, @@ -1282,6 +1280,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, err_crtc_init: primary->base.funcs->destroy(&primary->base); +err_crtc: + kfree(dcrtc); + return ret; } -- cgit v1.2.3 From 2bf57436d52b241044133fb0e2c7fd8320c6b02e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Dec 2017 12:16:22 +0000 Subject: drm/armada: fix SRAM powerdown Avoid powering down the overlay SRAM banks when disabling the primary plane, thereby masking any overlay video. This feature is supposed to allow us to cut the bandwidth required while displaying full-frame overlay video. Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 50a3a97b4289..400a133c0576 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -744,15 +744,14 @@ void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, if (plane->fb) drm_framebuffer_put(plane->fb); - /* Power down the Y/U/V FIFOs */ - sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; - /* Power down most RAMs and FIFOs if this is the primary plane */ if (plane->type == DRM_PLANE_TYPE_PRIMARY) { - sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | - CFG_PDWN32x32 | CFG_PDWN64x66; + sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | + CFG_PDWN32x32 | CFG_PDWN64x66; dma_ctrl0_mask = CFG_GRA_ENA; } else { + /* Power down the Y/U/V FIFOs */ + sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; dma_ctrl0_mask = CFG_DMA_ENA; } -- cgit v1.2.3 From 9c898c495490b129bd4445630e3c6641e8389fc8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Dec 2017 12:16:22 +0000 Subject: drm/armada: fix UV swap code The UV swap code was not always programming things correctly when the source origin box has been offset. Fix this. Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.h | 2 ++ drivers/gpu/drm/armada/armada_overlay.c | 38 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index bab11f483575..bfd3514fbe9b 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -42,6 +42,8 @@ struct armada_plane_work { }; struct armada_plane_state { + u16 src_x; + u16 src_y; u32 src_hw; u32 dst_hw; u32 dst_yx; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index b411b608821a..aba947696178 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -99,6 +99,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, { struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + const struct drm_format_info *format; struct drm_rect src = { .x1 = src_x, .y1 = src_y, @@ -117,7 +118,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; uint32_t val, ctrl0; unsigned idx = 0; - bool visible; + bool visible, fb_changed; int ret; trace_armada_ovl_plane_update(plane, crtc, fb, @@ -138,6 +139,18 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (!visible) ctrl0 &= ~CFG_DMA_ENA; + /* + * Shifting a YUV packed format image by one pixel causes the U/V + * planes to swap. Compensate for it by also toggling the UV swap. + */ + format = fb->format; + if (format->num_planes == 1 && src.x1 >> 16 & (format->hsub - 1)) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + fb_changed = plane->fb != fb || + dplane->base.state.src_x != src.x1 >> 16 || + dplane->base.state.src_y != src.y1 >> 16; + if (!dcrtc->plane) { dcrtc->plane = plane; armada_ovl_update_attr(&dplane->prop, dcrtc); @@ -145,7 +158,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, /* FIXME: overlay on an interlaced display */ /* Just updating the position/size? */ - if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) { + if (!fb_changed && dplane->base.state.ctrl0 == ctrl0) { val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; dplane->base.state.src_hw = val; @@ -169,9 +182,8 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) armada_drm_plane_work_cancel(dcrtc, &dplane->base); - if (plane->fb != fb) { - u32 addrs[3], pixel_format; - int num_planes, hsub; + if (fb_changed) { + u32 addrs[3]; /* * Take a reference on the new framebuffer - we want to @@ -182,23 +194,11 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, if (plane->fb) armada_ovl_retire_fb(dplane, plane->fb); - src_y = src.y1 >> 16; - src_x = src.x1 >> 16; + dplane->base.state.src_y = src_y = src.y1 >> 16; + dplane->base.state.src_x = src_x = src.x1 >> 16; armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y); - pixel_format = fb->format->format; - hsub = drm_format_horz_chroma_subsampling(pixel_format); - num_planes = fb->format->num_planes; - - /* - * Annoyingly, shifting a YUYV-format image by one pixel - * causes the U/V planes to toggle. Toggle the UV swap. - * (Unfortunately, this causes momentary colour flickering.) - */ - if (src_x & (hsub - 1) && num_planes == 1) - ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y0); armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], -- cgit v1.2.3 From d6a48965db3d5f9b524ebfdd8c1fe3a4175d8e35 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Dec 2017 12:16:22 +0000 Subject: drm/armada: improve efficiency of armada_drm_plane_calc_addrs() Lookup the drm_format_info structure once when computing all the framebuffer plane addresses by using drm_format_info(), rather than repetitive lookups via drm_format_plane_cpp(). Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 400a133c0576..7f7b3e738679 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -168,8 +168,9 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc) void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y) { + const struct drm_format_info *format = fb->format; + unsigned int num_planes = format->num_planes; u32 addr = drm_fb_obj(fb)->dev_addr; - int num_planes = fb->format->num_planes; int i; if (num_planes > 3) @@ -177,7 +178,7 @@ void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, for (i = 0; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + - x * fb->format->cpp[i]; + x * format->cpp[i]; for (; i < 3; i++) addrs[i] = 0; } -- cgit v1.2.3 From de0ea9ad2f548dd9e555cac27cf7ade1db5b26ea Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 8 Dec 2017 12:16:22 +0000 Subject: drm/armada: fix YUV planar format framebuffer offsets We weren't correctly calculating the YUV planar offsets for subsampled chroma planes correctly - fix up the coordinates for planes 1 and 2. Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 7f7b3e738679..a0f4d2a2a481 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -176,7 +176,13 @@ void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, if (num_planes > 3) num_planes = 3; - for (i = 0; i < num_planes; i++) + addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] + + x * format->cpp[0]; + + y /= format->vsub; + x /= format->hsub; + + for (i = 1; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + x * format->cpp[i]; for (; i < 3; i++) -- cgit v1.2.3 From 64e279d6ccae6eecd94222464342cb5909c716dc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2017 15:23:40 +0100 Subject: ACPICA: Update information in MAINTAINERS Update the ACPICA information in MAINTAINERS to reflect recent maintainership changes. Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..328da252f018 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -321,7 +321,7 @@ F: drivers/acpi/apei/ ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore -M: Lv Zheng +M: Erik Schmauss M: "Rafael J. Wysocki" L: linux-acpi@vger.kernel.org L: devel@acpica.org -- cgit v1.2.3 From 5790eabc6e7c3ce2d6ca2e3bbf4de467ce2b64b3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 8 Dec 2017 17:31:37 +0200 Subject: ptr_ring: fix up after recent ptr_ring changes Add more stubs to make it build. Fixes: 81fbfe8a ("ptr_ring: use kmalloc_array()") Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/ptr_ring.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 38bb171aceba..e6e81305ef46 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -16,24 +16,41 @@ #define unlikely(x) (__builtin_expect(!!(x), 0)) #define likely(x) (__builtin_expect(!!(x), 1)) #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) +#define SIZE_MAX (~(size_t)0) + typedef pthread_spinlock_t spinlock_t; typedef int gfp_t; -static void *kmalloc(unsigned size, gfp_t gfp) -{ - return memalign(64, size); -} +#define __GFP_ZERO 0x1 -static void *kzalloc(unsigned size, gfp_t gfp) +static void *kmalloc(unsigned size, gfp_t gfp) { void *p = memalign(64, size); if (!p) return p; - memset(p, 0, size); + if (gfp & __GFP_ZERO) + memset(p, 0, size); return p; } +static inline void *kzalloc(unsigned size, gfp_t flags) +{ + return kmalloc(size, flags | __GFP_ZERO); +} + +static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + return kmalloc(n * size, flags); +} + +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + return kmalloc_array(n, size, flags | __GFP_ZERO); +} + static void kfree(void *p) { if (p) -- cgit v1.2.3 From c1fd0abee0d52eb7e2871194b6c79d54792f515f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 7 Dec 2017 22:42:27 -0500 Subject: dm mpath: fix bio-based multipath queue_if_no_path handling Commit ca5beb76 ("dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH") caused bio-based DM-multipath to fail mptest's "test_02_sdev_delete". Restoring the logic that existed prior to commit ca5beb76 fixes this bio-based DM-multipath regression. Also verified all mptest tests pass with request-based DM-multipath. This commit effectively reverts commit ca5beb76 -- but it does so without reintroducing the need to take the m->lock spinlock in must_push_back_{rq,bio}. Fixes: ca5beb76 ("dm mpath: micro-optimize the hot path relative to MPATHF_QUEUE_IF_NO_PATH") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 49 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 35a2a2fa477f..f7810cc869ac 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -457,6 +457,38 @@ do { \ dm_noflush_suspending((m)->ti)); \ } while (0) +/* + * Check whether bios must be queued in the device-mapper core rather + * than here in the target. + * + * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold + * the same value then we are not between multipath_presuspend() + * and multipath_resume() calls and we have no need to check + * for the DMF_NOFLUSH_SUSPENDING flag. + */ +static bool __must_push_back(struct multipath *m, unsigned long flags) +{ + return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) != + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) && + dm_noflush_suspending(m->ti)); +} + +/* + * Following functions use READ_ONCE to get atomic access to + * all m->flags to avoid taking spinlock + */ +static bool must_push_back_rq(struct multipath *m) +{ + unsigned long flags = READ_ONCE(m->flags); + return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags); +} + +static bool must_push_back_bio(struct multipath *m) +{ + unsigned long flags = READ_ONCE(m->flags); + return __must_push_back(m, flags); +} + /* * Map cloned requests (request-based multipath) */ @@ -478,7 +510,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + if (must_push_back_rq(m)) return DM_MAPIO_DELAY_REQUEUE; dm_report_EIO(m); /* Failed */ return DM_MAPIO_KILL; @@ -553,7 +585,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m } if (!pgpath) { - if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) + if (must_push_back_bio(m)) return DM_MAPIO_REQUEUE; dm_report_EIO(m); return DM_MAPIO_KILL; @@ -651,8 +683,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || (!save_old_value && queue_if_no_path)); - assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, - queue_if_no_path || dm_noflush_suspending(m->ti)); + assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -1486,7 +1517,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, fail_path(pgpath); if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + !must_push_back_rq(m)) { if (error == BLK_STS_IOERR) dm_report_EIO(m); /* complete with the original error */ @@ -1521,8 +1552,12 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - dm_report_EIO(m); - *error = BLK_STS_IOERR; + if (must_push_back_bio(m)) { + r = DM_ENDIO_REQUEUE; + } else { + dm_report_EIO(m); + *error = BLK_STS_IOERR; + } goto done; } -- cgit v1.2.3 From fbc7c07ec23c040179384a1f16b62b6030eb6bdd Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 6 Dec 2017 09:27:30 -0800 Subject: dm bufio: fix shrinker scans when (nr_to_scan < retain_target) When system is under memory pressure it is observed that dm bufio shrinker often reclaims only one buffer per scan. This change fixes the following two issues in dm bufio shrinker that cause this behavior: 1. ((nr_to_scan - freed) <= retain_target) condition is used to terminate slab scan process. This assumes that nr_to_scan is equal to the LRU size, which might not be correct because do_shrink_slab() in vmscan.c calculates nr_to_scan using multiple inputs. As a result when nr_to_scan is less than retain_target (64) the scan will terminate after the first iteration, effectively reclaiming one buffer per scan and making scans very inefficient. This hurts vmscan performance especially because mutex is acquired/released every time dm_bufio_shrink_scan() is called. New implementation uses ((LRU size - freed) <= retain_target) condition for scan termination. LRU size can be safely determined inside __scan() because this function is called after dm_bufio_lock(). 2. do_shrink_slab() uses value returned by dm_bufio_shrink_count() to determine number of freeable objects in the slab. However dm_bufio always retains retain_target buffers in its LRU and will terminate a scan when this mark is reached. Therefore returning the entire LRU size from dm_bufio_shrink_count() is misleading because that does not represent the number of freeable objects that slab will reclaim during a scan. Returning (LRU size - retain_target) better represents the number of freeable objects in the slab. This way do_shrink_slab() returns 0 when (LRU size < retain_target) and vmscan will not try to scan this shrinker avoiding scans that will not reclaim any memory. Test: tested using Android device running /system/extras/alloc-stress that generates memory pressure and causes intensive shrinker scans Signed-off-by: Suren Baghdasaryan Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index b8ac591aaaa7..c546b567f3b5 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, int l; struct dm_buffer *b, *tmp; unsigned long freed = 0; - unsigned long count = nr_to_scan; + unsigned long count = c->n_buffers[LIST_CLEAN] + + c->n_buffers[LIST_DIRTY]; unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { @@ -1647,8 +1648,11 @@ static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); + unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) + + READ_ONCE(c->n_buffers[LIST_DIRTY]); + unsigned long retain_target = get_retain_buffers(c); - return READ_ONCE(c->n_buffers[LIST_CLEAN]) + READ_ONCE(c->n_buffers[LIST_DIRTY]); + return (count < retain_target) ? 0 : (count - retain_target); } /* -- cgit v1.2.3 From bd3486ded7a0c313a6575343e6c2b21d14476645 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 5 Dec 2017 08:45:30 -0600 Subject: usb: musb: da8xx: fix babble condition handling When babble condition happens, the musb controller might automatically turns off VBUS. On DA8xx platform, the controller generates drvvbus interrupt for turning off VBUS along with the babble interrupt. In this case, we should handle the babble interrupt first and recover from the babble condition. This change ignores the drvvbus interrupt if babble interrupt is also generated at the same time, so the babble recovery routine works properly. Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/da8xx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c index 0397606a211b..6c036de63272 100644 --- a/drivers/usb/musb/da8xx.c +++ b/drivers/usb/musb/da8xx.c @@ -284,7 +284,15 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci) musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE; portstate(musb->port1_status |= USB_PORT_STAT_POWER); del_timer(&musb->dev_timer); - } else { + } else if (!(musb->int_usb & MUSB_INTR_BABBLE)) { + /* + * When babble condition happens, drvvbus interrupt + * is also generated. Ignore this drvvbus interrupt + * and let babble interrupt handler recovers the + * controller; otherwise, the host-mode flag is lost + * due to the MUSB_DEV_MODE() call below and babble + * recovery logic will not be called. + */ musb->is_active = 0; MUSB_DEV_MODE(musb); otg->default_a = 0; -- cgit v1.2.3 From 62354454625741f0569c2cbe45b2d192f8fd258e Mon Sep 17 00:00:00 2001 From: David Kozub Date: Tue, 5 Dec 2017 22:40:04 +0100 Subject: USB: uas and storage: Add US_FL_BROKEN_FUA for another JMicron JMS567 ID There is another JMS567-based USB3 UAS enclosure (152d:0578) that fails with the following error: [sda] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE [sda] tag#0 Sense Key : Illegal Request [current] [sda] tag#0 Add. Sense: Invalid field in cdb The issue occurs both with UAS (occasionally) and mass storage (immediately after mounting a FS on a disk in the enclosure). Enabling US_FL_BROKEN_FUA quirk solves this issue. This patch adds an UNUSUAL_DEV with US_FL_BROKEN_FUA for the enclosure for both UAS and mass storage. Signed-off-by: David Kozub Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_devs.h | 7 +++++++ drivers/usb/storage/unusual_uas.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 2968046e7c05..f72d045ee9ef 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2100,6 +2100,13 @@ UNUSUAL_DEV( 0x152d, 0x0567, 0x0114, 0x0116, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA ), +/* Reported by David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* * Reported by Alexandre Oliva * JMicron responds to USN and several other SCSI ioctls with a diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index d520374a824e..e6127fb21c12 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -129,6 +129,13 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES), +/* Reported-by: David Kozub */ +UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, + "JMicron", + "JMS567", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_BROKEN_FUA), + /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, "VIA", -- cgit v1.2.3 From 82a2b827c96883d8b39a58bba23d222d6b0de7ff Mon Sep 17 00:00:00 2001 From: Julien BOIBESSOT Date: Tue, 5 Dec 2017 16:09:04 +0100 Subject: tools/usbip: fixes potential (minor) "buffer overflow" (detected on recent gcc with -Werror) Fixes following build error: vhci_driver.c: In function 'refresh_imported_device_list': vhci_driver.c:118:37: error: 'snprintf' output may be truncated before the last format character [-Werror=format-truncation=] snprintf(status, sizeof(status), "status.%d", i); ^~~~~~~~~~~ vhci_driver.c:118:4: note: 'snprintf' output between 9 and 18 bytes into a destination of size 17 snprintf(status, sizeof(status), "status.%d", i); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Signed-off-by: Julien BOIBESSOT Acked-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/vhci_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index 8a1cd1616de4..627d1dfc332b 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -106,7 +106,7 @@ static int parse_status(const char *value) return 0; } -#define MAX_STATUS_NAME 16 +#define MAX_STATUS_NAME 18 static int refresh_imported_device_list(void) { -- cgit v1.2.3 From 635f545a7e8be7596b9b2b6a43cab6bbd5a88e43 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:47 -0700 Subject: usbip: fix stub_rx: get_pipe() to validate endpoint number get_pipe() routine doesn't validate the input endpoint number and uses to reference ep_in and ep_out arrays. Invalid endpoint number can trigger BUG(). Range check the epnum and returning error instead of calling BUG(). Change caller stub_recv_cmd_submit() to handle the get_pipe() error return. Reported-by: Secunia Research Cc: stable Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 536e037f541f..4d61063c259d 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -328,15 +328,15 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) struct usb_host_endpoint *ep; struct usb_endpoint_descriptor *epd = NULL; + if (epnum < 0 || epnum > 15) + goto err_ret; + if (dir == USBIP_DIR_IN) ep = udev->ep_in[epnum & 0x7f]; else ep = udev->ep_out[epnum & 0x7f]; - if (!ep) { - dev_err(&sdev->udev->dev, "no such endpoint?, %d\n", - epnum); - BUG(); - } + if (!ep) + goto err_ret; epd = &ep->desc; if (usb_endpoint_xfer_control(epd)) { @@ -367,9 +367,10 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) return usb_rcvisocpipe(udev, epnum); } +err_ret: /* NOT REACHED */ - dev_err(&sdev->udev->dev, "get pipe, epnum %d\n", epnum); - return 0; + dev_err(&sdev->udev->dev, "get pipe() invalid epnum %d\n", epnum); + return -1; } static void masking_bogus_flags(struct urb *urb) @@ -435,6 +436,9 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, struct usb_device *udev = sdev->udev; int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); + if (pipe == -1) + return; + priv = stub_priv_alloc(sdev, pdu); if (!priv) return; -- cgit v1.2.3 From c6688ef9f29762e65bce325ef4acd6c675806366 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:48 -0700 Subject: usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input Harden CMD_SUBMIT path to handle malicious input that could trigger large memory allocations. Add checks to validate transfer_buffer_length and number_of_packets to protect against bad input requesting for unbounded memory allocations. Validate early in get_pipe() and return failure. Reported-by: Secunia Research Cc: stable Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 4d61063c259d..493ac2928391 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -322,11 +322,13 @@ static struct stub_priv *stub_priv_alloc(struct stub_device *sdev, return priv; } -static int get_pipe(struct stub_device *sdev, int epnum, int dir) +static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) { struct usb_device *udev = sdev->udev; struct usb_host_endpoint *ep; struct usb_endpoint_descriptor *epd = NULL; + int epnum = pdu->base.ep; + int dir = pdu->base.direction; if (epnum < 0 || epnum > 15) goto err_ret; @@ -339,6 +341,15 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) goto err_ret; epd = &ep->desc; + + /* validate transfer_buffer_length */ + if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", + pdu->u.cmd_submit.transfer_buffer_length); + return -1; + } + if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -361,6 +372,21 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) } if (usb_endpoint_xfer_isoc(epd)) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(epd); + maxp *= usb_endpoint_maxp_mult(epd); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&sdev->udev->dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + return -1; + } if (dir == USBIP_DIR_OUT) return usb_sndisocpipe(udev, epnum); else @@ -369,7 +395,7 @@ static int get_pipe(struct stub_device *sdev, int epnum, int dir) err_ret: /* NOT REACHED */ - dev_err(&sdev->udev->dev, "get pipe() invalid epnum %d\n", epnum); + dev_err(&sdev->udev->dev, "CMD_SUBMIT: invalid epnum %d\n", epnum); return -1; } @@ -434,7 +460,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, struct stub_priv *priv; struct usbip_device *ud = &sdev->ud; struct usb_device *udev = sdev->udev; - int pipe = get_pipe(sdev, pdu->base.ep, pdu->base.direction); + int pipe = get_pipe(sdev, pdu); if (pipe == -1) return; @@ -456,7 +482,8 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0 && + pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); -- cgit v1.2.3 From 2f2d0088eb93db5c649d2a5e34a3800a8a935fc5 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:49 -0700 Subject: usbip: prevent vhci_hcd driver from leaking a socket pointer address When a client has a USB device attached over IP, the vhci_hcd driver is locally leaking a socket pointer address via the /sys/devices/platform/vhci_hcd/status file (world-readable) and in debug output when "usbip --debug port" is run. Fix it to not leak. The socket pointer address is not used at the moment and it was made visible as a convenient way to find IP address from socket pointer address by looking up /proc/net/{tcp,tcp6}. As this opens a security hole, the fix replaces socket pointer address with sockfd. Reported-by: Secunia Research Cc: stable Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.h | 1 + drivers/usb/usbip/vhci_sysfs.c | 25 ++++++++++++++++--------- tools/usb/usbip/libsrc/vhci_driver.c | 8 ++++---- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h index e5de35c8c505..473fb8a87289 100644 --- a/drivers/usb/usbip/usbip_common.h +++ b/drivers/usb/usbip/usbip_common.h @@ -256,6 +256,7 @@ struct usbip_device { /* lock for status */ spinlock_t lock; + int sockfd; struct socket *tcp_socket; struct task_struct *tcp_rx; diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index e78f7472cac4..091f76b7196d 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -17,15 +17,20 @@ /* * output example: - * hub port sta spd dev socket local_busid - * hs 0000 004 000 00000000 c5a7bb80 1-2.3 + * hub port sta spd dev sockfd local_busid + * hs 0000 004 000 00000000 3 1-2.3 * ................................................ - * ss 0008 004 000 00000000 d8cee980 2-3.4 + * ss 0008 004 000 00000000 4 2-3.4 * ................................................ * - * IP address can be retrieved from a socket pointer address by looking - * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a - * port number and its peer IP address. + * Output includes socket fd instead of socket pointer address to avoid + * leaking kernel memory address in: + * /sys/devices/platform/vhci_hcd.0/status and in debug output. + * The socket pointer address is not used at the moment and it was made + * visible as a convenient way to find IP address from socket pointer + * address by looking up /proc/net/{tcp,tcp6}. As this opens a security + * hole, the change is made to use sockfd instead. + * */ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vdev) { @@ -39,8 +44,8 @@ static void port_show_vhci(char **out, int hub, int port, struct vhci_device *vd if (vdev->ud.status == VDEV_ST_USED) { *out += sprintf(*out, "%03u %08x ", vdev->speed, vdev->devid); - *out += sprintf(*out, "%16p %s", - vdev->ud.tcp_socket, + *out += sprintf(*out, "%u %s", + vdev->ud.sockfd, dev_name(&vdev->udev->dev)); } else { @@ -160,7 +165,8 @@ static ssize_t nports_show(struct device *dev, struct device_attribute *attr, char *s = out; /* - * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, thus the * 2. + * Half the ports are for SPEED_HIGH and half for SPEED_SUPER, + * thus the * 2. */ out += sprintf(out, "%d\n", VHCI_PORTS * vhci_num_controllers); return out - s; @@ -366,6 +372,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr, vdev->devid = devid; vdev->speed = speed; + vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; vdev->ud.status = VDEV_ST_NOTASSIGNED; diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index 627d1dfc332b..c9c81614a66a 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -50,14 +50,14 @@ static int parse_status(const char *value) while (*c != '\0') { int port, status, speed, devid; - unsigned long socket; + int sockfd; char lbusid[SYSFS_BUS_ID_SIZE]; struct usbip_imported_device *idev; char hub[3]; - ret = sscanf(c, "%2s %d %d %d %x %lx %31s\n", + ret = sscanf(c, "%2s %d %d %d %x %u %31s\n", hub, &port, &status, &speed, - &devid, &socket, lbusid); + &devid, &sockfd, lbusid); if (ret < 5) { dbg("sscanf failed: %d", ret); @@ -66,7 +66,7 @@ static int parse_status(const char *value) dbg("hub %s port %d status %d speed %d devid %x", hub, port, status, speed, devid); - dbg("socket %lx lbusid %s", socket, lbusid); + dbg("sockfd %u lbusid %s", sockfd, lbusid); /* if a device is connected, look at it */ idev = &vhci_driver->idev[port]; -- cgit v1.2.3 From be6123df1ea8f01ee2f896a16c2b7be3e4557a5a Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 7 Dec 2017 14:16:50 -0700 Subject: usbip: fix stub_send_ret_submit() vulnerability to null transfer_buffer stub_send_ret_submit() handles urb with a potential null transfer_buffer, when it replays a packet with potential malicious data that could contain a null buffer. Add a check for the condition when actual_length > 0 and transfer_buffer is null. Reported-by: Secunia Research Cc: stable Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_tx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index b18bce96c212..53172b1f6257 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -167,6 +167,13 @@ static int stub_send_ret_submit(struct stub_device *sdev) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&sdev->udev->dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) iovnum = 2 + urb->number_of_packets; else -- cgit v1.2.3 From 50dd2ea8ef67a1617e0c0658bcbec4b9fb03b936 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 8 Dec 2017 16:15:20 +0000 Subject: ASoC: wm_adsp: Fix validation of firmware and coeff lengths The checks for whether another region/block header could be present are subtracting the size from the current offset. Obviously we should instead subtract the offset from the size. The checks for whether the region/block data fit in the file are adding the data size to the current offset and header size, without checking for integer overflow. Rearrange these so that overflow is impossible. Signed-off-by: Ben Hutchings Acked-by: Charles Keepax Tested-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/wm_adsp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 65c059b5ffd7..66e32f5d2917 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) le64_to_cpu(footer->timestamp)); while (pos < firmware->size && - pos - firmware->size > sizeof(*region)) { + sizeof(*region) < firmware->size - pos) { region = (void *)&(firmware->data[pos]); region_name = "Unknown"; reg = 0; @@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); - if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > - firmware->size) { + if (le32_to_cpu(region->len) > + firmware->size - pos - sizeof(*region)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, regions, region_name, @@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) blocks = 0; while (pos < firmware->size && - pos - firmware->size > sizeof(*blk)) { + sizeof(*blk) < firmware->size - pos) { blk = (void *)(&firmware->data[pos]); type = le16_to_cpu(blk->type); @@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { - if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > - firmware->size) { + if (le32_to_cpu(blk->len) > + firmware->size - pos - sizeof(*blk)) { adsp_err(dsp, "%s.%d: %s region len %d bytes exceeds file length %zu\n", file, blocks, region_name, -- cgit v1.2.3 From 5d9b70f7d52eb14bb37861c663bae44de9521c35 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 8 Dec 2017 18:10:05 +0200 Subject: xhci: Don't add a virt_dev to the devs array before it's fully allocated Avoid null pointer dereference if some function is walking through the devs array accessing members of a new virt_dev that is mid allocation. Add the virt_dev to xhci->devs[i] _after_ the virt_device and all its members are properly allocated. issue found by KASAN: null-ptr-deref in xhci_find_slot_id_by_port "Quick analysis suggests that xhci_alloc_virt_device() is not mutex protected. If so, there is a time frame where xhci->devs[slot_id] is set but not fully initialized. Specifically, xhci->devs[i]->udev can be NULL." Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 15f7d422885f..3a29b32a3bd0 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -971,10 +971,9 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, return 0; } - xhci->devs[slot_id] = kzalloc(sizeof(*xhci->devs[slot_id]), flags); - if (!xhci->devs[slot_id]) + dev = kzalloc(sizeof(*dev), flags); + if (!dev) return 0; - dev = xhci->devs[slot_id]; /* Allocate the (output) device context that will be used in the HC. */ dev->out_ctx = xhci_alloc_container_ctx(xhci, XHCI_CTX_TYPE_DEVICE, flags); @@ -1015,9 +1014,17 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id, trace_xhci_alloc_virt_device(dev); + xhci->devs[slot_id] = dev; + return 1; fail: - xhci_free_virt_device(xhci, slot_id); + + if (dev->in_ctx) + xhci_free_container_ctx(xhci, dev->in_ctx); + if (dev->out_ctx) + xhci_free_container_ctx(xhci, dev->out_ctx); + kfree(dev); + return 0; } -- cgit v1.2.3 From 72b663a99c074a8d073e7ecdae446cfb024ef551 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 8 Dec 2017 18:10:06 +0200 Subject: usb: xhci: fix TDS for MTK xHCI1.1 For MTK's xHCI 1.0 or latter, TD size is the number of max packet sized packets remaining in the TD, not including this TRB (following spec). For MTK's xHCI 0.96 and older, TD size is the number of max packet sized packets remaining in the TD, including this TRB (not following spec). Cc: stable Signed-off-by: Chunfeng Yun Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6eb87c6e4d24..c5cbc685c691 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3112,7 +3112,7 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, { u32 maxp, total_packet_count; - /* MTK xHCI is mostly 0.97 but contains some features from 1.0 */ + /* MTK xHCI 0.96 contains some features from 1.0 */ if (xhci->hci_version < 0x100 && !(xhci->quirks & XHCI_MTK_HOST)) return ((td_total_len - transferred) >> 10); @@ -3121,8 +3121,8 @@ static u32 xhci_td_remainder(struct xhci_hcd *xhci, int transferred, trb_buff_len == td_total_len) return 0; - /* for MTK xHCI, TD size doesn't include this TRB */ - if (xhci->quirks & XHCI_MTK_HOST) + /* for MTK xHCI 0.96, TD size include this TRB, but not in 1.x */ + if ((xhci->quirks & XHCI_MTK_HOST) && (xhci->hci_version < 0x100)) trb_buff_len = 0; maxp = usb_endpoint_maxp(&urb->ep->desc); -- cgit v1.2.3 From 0f0be40ba59c2d5fdfea48e3ff93f6165d616440 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 8 Dec 2017 15:18:53 +0100 Subject: ASoC: atmel-classd: select correct Kconfig symbol SND_ATMEL_SOC_CLASSD selects SND_ATMEL_SOC_DMA but the driver itself handles its own DMA operations and doesn't need anything from atmel-pcm-dma.c or atmel_ssc_dai.c. Replace SND_ATMEL_SOC_DMA by SND_SOC_GENERIC_DMAENGINE_PCM which is the only one actually required. This may end up in a configuration leading to a link error: sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_set_audio': atmel_ssc_dai.c:(.text+0x79c): undefined reference to `atmel_pcm_dma_platform_register' atmel_ssc_dai.c:(.text+0x79c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `atmel_pcm_dma_platform_register' sound/soc/atmel/atmel_ssc_dai.o: In function `atmel_ssc_put_audio': atmel_ssc_dai.c:(.text+0xf24): undefined reference to `atmel_pcm_dma_platform_unregister' atmel_ssc_dai.c:(.text+0xf24): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `atmel_pcm_dma_platform_unregister' Tested on sama5d2 xplained with the following configuration where nothing selects SND_ATMEL_SOC_DMA: CONFIG_SND_ATMEL_SOC=y CONFIG_SND_ATMEL_SOC_CLASSD=y Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Fixes: e0a25b6d1862 ("ASoC: atmel-classd: add the Audio Class D Amplifier") Signed-off-by: Alexandre Belloni Signed-off-by: Mark Brown --- sound/soc/atmel/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..dcee145dd179 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731 config SND_ATMEL_SOC_CLASSD tristate "Atmel ASoC driver for boards using CLASSD" depends on ARCH_AT91 || COMPILE_TEST - select SND_ATMEL_SOC_DMA + select SND_SOC_GENERIC_DMAENGINE_PCM select REGMAP_MMIO help Say Y if you want to add support for Atmel ASoC driver for boards using -- cgit v1.2.3 From 4362934a75ff2a399fd0bcd75937907115770020 Mon Sep 17 00:00:00 2001 From: Naveen Manohar Date: Fri, 8 Dec 2017 09:30:18 +0530 Subject: ASoC: Intel: Change kern log level to avoid unwanted messages patch suppresses the warning message "control load not supported" as this is a debug information to help debug issues in topology. Signed-off-by: Naveen Manohar Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index a072bcf209d2..81923da18ac2 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt, break; default: - dev_warn(bus->dev, "Control load not supported %d:%d:%d\n", + dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n", hdr->ops.get, hdr->ops.put, hdr->ops.info); break; } -- cgit v1.2.3 From 33f801366bdf3f8b67dfe325b84f4051a090d01e Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Thu, 7 Dec 2017 22:15:38 -0800 Subject: ASoC: rsnd: ssi: fix race condition in rsnd_ssi_pointer_update Currently there is race condition between set of byte_pos and wrap it around when new buffer starts. If .pointer is called in-between it will result in inconsistent pointer position be returned from .pointer callback. This patch increments buffer pointer atomically to avoid this issue. Signed-off-by: Jiada Wang Reviewed-by: Takashi Sakamoto Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssi.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index fece1e5f582f..cbf3bf312d23 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, int byte) { struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + bool ret = false; + int byte_pos; - ssi->byte_pos += byte; + byte_pos = ssi->byte_pos + byte; - if (ssi->byte_pos >= ssi->next_period_byte) { + if (byte_pos >= ssi->next_period_byte) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); ssi->period_pos++; ssi->next_period_byte += ssi->byte_per_period; if (ssi->period_pos >= runtime->periods) { - ssi->byte_pos = 0; + byte_pos = 0; ssi->period_pos = 0; ssi->next_period_byte = ssi->byte_per_period; } - return true; + ret = true; } - return false; + WRITE_ONCE(ssi->byte_pos, byte_pos); + + return ret; } /* @@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod, struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - *pointer = bytes_to_frames(runtime, ssi->byte_pos); + *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos)); return 0; } -- cgit v1.2.3 From 2e2d53da81af6b2222c6b4e025a5d01b37b4449b Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Thu, 7 Dec 2017 22:15:39 -0800 Subject: ASoC: rsnd: ssi: remove unnesessary period_pos period_pos can always be calculated by byte_pos and byte_per_period, there is no reason to maintain this variable in rsnd_dai_stream. This patch removes period_pos from rsnd_ssi and calculates next_period_byte with consideration of actual byte_pos value. Signed-off-by: Jiada Wang Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssi.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index cbf3bf312d23..f21202429000 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -80,7 +80,6 @@ struct rsnd_ssi { unsigned int usrcnt; int byte_pos; - int period_pos; int byte_per_period; int next_period_byte; }; @@ -421,7 +420,6 @@ static void rsnd_ssi_pointer_init(struct rsnd_mod *mod, struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); ssi->byte_pos = 0; - ssi->period_pos = 0; ssi->byte_per_period = runtime->period_size * runtime->channels * samples_to_bytes(runtime, 1); @@ -453,13 +451,12 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, if (byte_pos >= ssi->next_period_byte) { struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + int period_pos = byte_pos / ssi->byte_per_period; - ssi->period_pos++; - ssi->next_period_byte += ssi->byte_per_period; + ssi->next_period_byte = (period_pos + 1) * ssi->byte_per_period; - if (ssi->period_pos >= runtime->periods) { + if (period_pos >= runtime->periods) { byte_pos = 0; - ssi->period_pos = 0; ssi->next_period_byte = ssi->byte_per_period; } -- cgit v1.2.3 From a914e44693d41ba43604afa8c435c98a6d2c7cb1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 8 Dec 2017 06:23:11 +0000 Subject: ASoC: rsnd: more clear rsnd_get_dalign() for DALIGN On Renesas sound device, DALIGN which exchanges channel position is needed because SW and HW are using defferent data order if 16bit data. It is not needed when 24bit data. rsnd_get_dalign() returns necessary value, but it was confusable code. This patch makes it more simple. Tested-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index d76ad46a6fd9..8e50b284230d 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -294,11 +294,12 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io) struct rsnd_mod *ssiu = rsnd_io_to_mod_ssiu(io); struct rsnd_mod *target; struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - u32 val = 0x76543210; - u32 mask = ~0; /* - * *Hardware* L/R and *Software* L/R are inverted. + * *Hardware* L/R and *Software* L/R are inverted for 16bit data. + * 31..16 15...0 + * HW: [L ch] [R ch] + * SW: [R ch] [L ch] * We need to care about inversion timing to control * Playback/Capture correctly. * The point is [DVC] needs *Hardware* L/R, [MEM] needs *Software* L/R @@ -325,27 +326,13 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io) target = cmd ? cmd : ssiu; } - mask <<= runtime->channels * 4; - val = val & mask; - - switch (runtime->sample_bits) { - case 16: - val |= 0x67452301 & ~mask; - break; - case 32: - val |= 0x76543210 & ~mask; - break; - } - - /* - * exchange channeles on SRC if possible, - * otherwise, R/L volume settings on DVC - * changes inverted channels - */ - if (mod == target) - return val; - else + /* Non target mod or 24bit data needs normal DALIGN */ + if ((runtime->sample_bits != 16) || + (mod != target)) return 0x76543210; + /* Target mod needs inverted DALIGN when 16bit */ + else + return 0x67452301; } u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod) -- cgit v1.2.3 From babc8110057cb9ca542c3c1666cbda4e8ccf9250 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Sat, 2 Dec 2017 18:40:39 +0100 Subject: drm/vc4: Release fence after signalling We were never releasing the initial fence reference that is obtained through dma_fence_init. Link: https://github.com/anholt/linux/issues/122 Fixes: cdec4d361323 ("drm/vc4: Expose dma-buf fences for V3D rendering.") Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1512236444-301-1-git-send-email-stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_gem.c | 4 +++- drivers/gpu/drm/vc4/vc4_irq.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 6c32c89a83a9..638540943c61 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -888,8 +888,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) /* If we got force-completed because of GPU reset rather than * through our IRQ handler, signal the fence now. */ - if (exec->fence) + if (exec->fence) { dma_fence_signal(exec->fence); + dma_fence_put(exec->fence); + } if (exec->bo) { for (i = 0; i < exec->bo_count; i++) { diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 61b2e5377993..26eddbb62893 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -139,6 +139,7 @@ vc4_irq_finish_render_job(struct drm_device *dev) list_move_tail(&exec->head, &vc4->job_done_list); if (exec->fence) { dma_fence_signal_locked(exec->fence); + dma_fence_put(exec->fence); exec->fence = NULL; } vc4_submit_next_render_job(dev); -- cgit v1.2.3 From eaf0ec303bd73f6b2c18f48542974a710fadfeb9 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 10:16:15 -0800 Subject: fs: xfs: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/scrub/scrub.c | 1 - fs/xfs/scrub/trace.c | 1 - fs/xfs/xfs_reflink.c | 2 -- fs/xfs/xfs_trace.c | 1 - 4 files changed, 5 deletions(-) diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 9c42c4efd01e..ab3aef2ae823 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -46,7 +46,6 @@ #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/scrub.h" #include "scrub/btree.h" /* diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 472080e75788..86daed0e3a45 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -26,7 +26,6 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cc041a29eb70..cf7c8f81bebb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -49,8 +49,6 @@ #include "xfs_alloc.h" #include "xfs_quota_defs.h" #include "xfs_quota.h" -#include "xfs_btree.h" -#include "xfs_bmap_btree.h" #include "xfs_reflink.h" #include "xfs_iomap.h" #include "xfs_rmap_btree.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 5d95fe348294..35f3546b6af5 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -24,7 +24,6 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_da_btree.h" -- cgit v1.2.3 From f59cf5c29919d17b61913c3360a7bd29b72975c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 4 Dec 2017 17:32:55 -0800 Subject: xfs: remove "no-allocation" reservations for file creations If we create a new file we will need an inode, and usually some metadata in the parent direction. Aiming for everything to go well despite the lack of a reservation leads to dirty transactions cancelled under a heavy create/delete load. This patch removes those nospace transactions, which will lead to slightly earlier ENOSPC on some workloads, but instead prevent file system shutdowns due to cancelling dirty transactions for others. A customer could observe assertations failures and shutdowns due to cancelation of dirty transactions during heavy NFS workloads as shown below: 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728125] XFS: Assertion failed: error != -ENOSPC, file: fs/xfs/xfs_inode.c, line: 1262 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728222] Call Trace: 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728246] [] dump_stack+0x63/0x81 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728262] [] warn_slowpath_common+0x8a/0xc0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728264] [] warn_slowpath_null+0x1a/0x20 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728285] [] asswarn+0x33/0x40 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728308] [] xfs_create+0x7be/0x7d0 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728329] [] xfs_generic_create+0x1fb/0x2e0 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728348] [] xfs_vn_mknod+0x14/0x20 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728366] [] xfs_vn_create+0x13/0x20 [xfs] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728380] [] vfs_create+0xd5/0x140 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728390] [] do_nfsd_create+0x499/0x610 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728396] [] nfsd3_proc_create+0x135/0x210 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728401] [] nfsd_dispatch+0xc3/0x210 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728416] [] svc_process_common+0x453/0x6f0 [sunrpc] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728423] [] svc_process+0x113/0x1f0 [sunrpc] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728427] [] nfsd+0x10f/0x180 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728432] [] ? nfsd_destroy+0x80/0x80 [nfsd] 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728438] [] kthread+0xd8/0xf0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728441] [] ? kthread_create_on_node+0x1b0/0x1b0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728451] [] ret_from_fork+0x42/0x70 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728453] [] ? kthread_create_on_node+0x1b0/0x1b0 2017-05-30 21:17:06 kernel: WARNING: [ 2670.728454] ---[ end trace f9822c842fec81d4 ]--- 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728477] XFS (sdb): Internal error xfs_trans_cancel at line 983 of file fs/xfs/xfs_trans.c. Caller xfs_create+0x4ee/0x7d0 [xfs] 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728684] XFS (sdb): Corruption of in-memory data detected. Shutting down filesystem 2017-05-30 21:17:06 kernel: ALERT: [ 2670.728685] XFS (sdb): Please umount the filesystem and rectify the problem(s) Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ialloc.c | 10 +++------- fs/xfs/libxfs/xfs_ialloc.h | 1 - fs/xfs/xfs_inode.c | 33 +++++++-------------------------- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_qm.c | 4 ++-- fs/xfs/xfs_symlink.c | 15 +-------------- 6 files changed, 14 insertions(+), 51 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index de3f04a98656..3b57ef0f2f76 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -920,8 +920,7 @@ STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ - umode_t mode, /* bits set to indicate file type */ - int okalloc) /* ok to allocate more space */ + umode_t mode) /* bits set to indicate file type */ { xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ @@ -978,9 +977,6 @@ xfs_ialloc_ag_select( return agno; } - if (!okalloc) - goto nextag; - if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, tp, agno, flags); if (error) @@ -1680,7 +1676,6 @@ xfs_dialloc( struct xfs_trans *tp, xfs_ino_t parent, umode_t mode, - int okalloc, struct xfs_buf **IO_agbp, xfs_ino_t *inop) { @@ -1692,6 +1687,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + int okalloc = 1; if (*IO_agbp) { /* @@ -1707,7 +1703,7 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + start_agno = xfs_ialloc_ag_select(tp, parent, mode); if (start_agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index d2bdcd5e7312..66a8de0b1caa 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -81,7 +81,6 @@ xfs_dialloc( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ xfs_ino_t *inop); /* inode number allocated */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 801274126648..b41952a4ddd8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -749,7 +749,6 @@ xfs_ialloc( xfs_nlink_t nlink, dev_t rdev, prid_t prid, - int okalloc, xfs_buf_t **ialloc_context, xfs_inode_t **ipp) { @@ -765,7 +764,7 @@ xfs_ialloc( * Call the space management code to pick * the on-disk inode to be allocated. */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, + error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, ialloc_context, &ino); if (error) return error; @@ -957,7 +956,6 @@ xfs_dir_ialloc( xfs_nlink_t nlink, dev_t rdev, prid_t prid, /* project id */ - int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be locked. */ int *committed) @@ -988,8 +986,8 @@ xfs_dir_ialloc( * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, - &ialloc_context, &ip); + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, + &ip); /* * Return an error if we were unable to allocate a new inode. @@ -1061,7 +1059,7 @@ xfs_dir_ialloc( * this call should always succeed. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - okalloc, &ialloc_context, &ip); + &ialloc_context, &ip); /* * If we get an error at this point, return to the caller @@ -1182,11 +1180,6 @@ xfs_create( xfs_flush_inodes(mp); error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); } - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1203,19 +1196,13 @@ xfs_create( if (error) goto out_trans_cancel; - if (!resblks) { - error = xfs_dir_canenter(tp, dp, name); - if (error) - goto out_trans_cancel; - } - /* * A newly created regular or special file just has one directory * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip, + NULL); if (error) goto out_trans_cancel; @@ -1340,11 +1327,6 @@ xfs_create_tmpfile( tres = &M_RES(mp)->tr_create_tmpfile; error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1353,8 +1335,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index cc13c3763721..b2136af9289f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -428,7 +428,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, - xfs_nlink_t, dev_t, prid_t, int, + xfs_nlink_t, dev_t, prid_t, struct xfs_inode **, int *); /* from xfs_file.c */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 010a13a201aa..ec952dfad359 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -793,8 +793,8 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, - &committed); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip, + &committed); if (error) { xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 68d3ca2c4968..2e9e793a8f9d 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -232,11 +232,6 @@ xfs_symlink( resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp); - if (error == -ENOSPC && fs_blocks == 0) { - resblks = 0; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0, - &tp); - } if (error) goto out_release_inode; @@ -259,14 +254,6 @@ xfs_symlink( if (error) goto out_trans_cancel; - /* - * Check for ability to enter directory entry, if no space reserved. - */ - if (!resblks) { - error = xfs_dir_canenter(tp, dp, link_name); - if (error) - goto out_trans_cancel; - } /* * Initialize the bmap freelist prior to calling either * bmapi or the directory create code. @@ -277,7 +264,7 @@ xfs_symlink( * Allocate an inode for the symlink. */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, - prid, resblks > 0, &ip, NULL); + prid, &ip, NULL); if (error) goto out_trans_cancel; -- cgit v1.2.3 From b7e0b6ff54dd92febbb1914ab93cd6a21622e169 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 6 Dec 2017 16:13:35 -0800 Subject: xfs: make iomap_begin functions trim iomaps consistently Historically, the XFS iomap_begin function only returned mappings for exactly the range queried, i.e. it doesn't do XFS_BMAPI_ENTIRE lookups. The current vfs iomap consumers are only set up to deal with trimmed mappings. xfs_xattr_iomap_begin does BMAPI_ENTIRE lookups, which is inconsistent with the current iomap usage. Remove the flag so that both iomap_begin functions behave the same way. FWIW this also fixes a behavioral regression in xattr FIEMAP that was introduced in 4.8 wherein attr fork extents are no longer trimmed like they used to be. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_iomap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 33eb4fb2e3fd..7ab52a8bc0a9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin( ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, - &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); + &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: xfs_iunlock(ip, lockmode); -- cgit v1.2.3 From d7ee946942bdd12394809305e3df05aa4c8b7b8f Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Wed, 11 Oct 2017 07:01:31 +0200 Subject: VFS: Handle lazytime in do_mount() Since commit e462ec50cb5fa ("VFS: Differentiate mount flags (MS_*) from internal superblock flags") the lazytime mount option doesn't get passed on anymore. Fix the issue by handling the option in do_mount(). Reviewed-by: Lukas Czerner Signed-off-by: Markus Trippelsdorf Signed-off-by: Al Viro --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namespace.c b/fs/namespace.c index e158ec6b527b..9d1374ab6e06 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, SB_DIRSYNC | SB_SILENT | SB_POSIXACL | + SB_LAZYTIME | SB_I_VERSION); if (flags & MS_REMOUNT) -- cgit v1.2.3 From 7bf5234db7cce45fa9ff237ce0f45da2bd277cad Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 28 Apr 2017 09:40:00 -0700 Subject: xtensa: add -mno-serialize-volatile to CFLAGS By default xtensa gcc inserts memw for all volatile object accesses. This is too pessimistic for the kernel: there should be no "normal" volatile objects, and all special objects, like MMIO or objects shared between CPUs should have explicit barriers. Signed-off-by: Max Filippov --- arch/xtensa/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 7ee02fe4a63d..a206598b5d95 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -46,6 +46,7 @@ KBUILD_CFLAGS += -ffreestanding -D__linux__ KBUILD_CFLAGS += -pipe -mlongcalls KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,) +KBUILD_CFLAGS += $(call cc-option,-mno-serialize-volatile,) ifneq ($(CONFIG_LD_NO_RELAX),) LDFLAGS := --no-relax -- cgit v1.2.3 From f8f02ca73cd8d1e2ac61ea1e5f0574a8c1f472fa Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 3 Dec 2017 20:55:35 -0800 Subject: xtensa: build kernel with text-section-literals vmlinux.lds.S doesn't do anything special with literals, so instead of keeping them separate put them into the corresponding text sections. Drop explicit .literal sections from the vmlinux.lds.S, use standard section macros. Mark literal pool locations in the assembly sources. Unfortunately assembler doesn't put literals into .init sections and external libgcc may still have .literal sections, so sed transformation to the linker script is still needed. Signed-off-by: Max Filippov --- arch/xtensa/Makefile | 6 +-- arch/xtensa/boot/boot-redboot/bootstrap.S | 1 + arch/xtensa/kernel/Makefile | 3 -- arch/xtensa/kernel/align.S | 2 +- arch/xtensa/kernel/entry.S | 8 +++ arch/xtensa/kernel/setup.c | 16 +++--- arch/xtensa/kernel/vectors.S | 14 ++--- arch/xtensa/kernel/vmlinux.lds.S | 90 +++++++++---------------------- 8 files changed, 52 insertions(+), 88 deletions(-) diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index a206598b5d95..3a934b72a272 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -42,12 +42,12 @@ export PLATFORM # temporarily until string.h is fixed KBUILD_CFLAGS += -ffreestanding -D__linux__ - -KBUILD_CFLAGS += -pipe -mlongcalls - +KBUILD_CFLAGS += -pipe -mlongcalls -mtext-section-literals KBUILD_CFLAGS += $(call cc-option,-mforce-no-pic,) KBUILD_CFLAGS += $(call cc-option,-mno-serialize-volatile,) +KBUILD_AFLAGS += -mlongcalls -mtext-section-literals + ifneq ($(CONFIG_LD_NO_RELAX),) LDFLAGS := --no-relax endif diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S index bf7fabe6310d..bbf3b4b080cd 100644 --- a/arch/xtensa/boot/boot-redboot/bootstrap.S +++ b/arch/xtensa/boot/boot-redboot/bootstrap.S @@ -42,6 +42,7 @@ __start_a0: .align 4 .section .text, "ax" + .literal_position .begin literal_prefix .text /* put literals in here! */ diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index bb8d55775a97..91907590d183 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -17,9 +17,6 @@ obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_S32C1I_SELFTEST) += s32c1i_selftest.o -AFLAGS_head.o += -mtext-section-literals -AFLAGS_mxhead.o += -mtext-section-literals - # In the Xtensa architecture, assembly generates literals which must always # precede the L32R instruction with a relative offset less than 256 kB. # Therefore, the .text and .literal section must be combined in parenthesis diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index 890004af03a9..24b3189d7841 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -155,7 +155,7 @@ * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception */ - + .literal_position ENTRY(fast_unaligned) /* Note: We don't expect the address to be aligned on a word diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 37a239556889..5d5707831626 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -125,6 +125,7 @@ * * Note: _user_exception might be at an odd address. Don't use call0..call12 */ + .literal_position ENTRY(user_exception) @@ -777,6 +778,8 @@ ENDPROC(kernel_exception) * When we get here, a0 is trashed and saved to excsave[debuglevel] */ + .literal_position + ENTRY(debug_exception) rsr a0, SREG_EPS + XCHAL_DEBUGLEVEL @@ -916,6 +919,8 @@ ENDPROC(debug_exception) unrecoverable_text: .ascii "Unrecoverable error in exception handler\0" + .literal_position + ENTRY(unrecoverable_exception) movi a0, 1 @@ -1117,6 +1122,8 @@ ENDPROC(fast_syscall_unrecoverable) * j done */ + .literal_position + #ifdef CONFIG_FAST_SYSCALL_XTENSA #define TRY \ @@ -1887,6 +1894,7 @@ ENDPROC(fast_store_prohibited) * void system_call (struct pt_regs* regs, int exccause) * a2 a3 */ + .literal_position ENTRY(system_call) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 08175df7a69e..253a0178f1bd 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -277,13 +277,13 @@ extern char _end[]; extern char _stext[]; extern char _WindowVectors_text_start; extern char _WindowVectors_text_end; -extern char _DebugInterruptVector_literal_start; +extern char _DebugInterruptVector_text_start; extern char _DebugInterruptVector_text_end; -extern char _KernelExceptionVector_literal_start; +extern char _KernelExceptionVector_text_start; extern char _KernelExceptionVector_text_end; -extern char _UserExceptionVector_literal_start; +extern char _UserExceptionVector_text_start; extern char _UserExceptionVector_text_end; -extern char _DoubleExceptionVector_literal_start; +extern char _DoubleExceptionVector_text_start; extern char _DoubleExceptionVector_text_end; #if XCHAL_EXCM_LEVEL >= 2 extern char _Level2InterruptVector_text_start; @@ -339,16 +339,16 @@ void __init setup_arch(char **cmdline_p) mem_reserve(__pa(&_WindowVectors_text_start), __pa(&_WindowVectors_text_end)); - mem_reserve(__pa(&_DebugInterruptVector_literal_start), + mem_reserve(__pa(&_DebugInterruptVector_text_start), __pa(&_DebugInterruptVector_text_end)); - mem_reserve(__pa(&_KernelExceptionVector_literal_start), + mem_reserve(__pa(&_KernelExceptionVector_text_start), __pa(&_KernelExceptionVector_text_end)); - mem_reserve(__pa(&_UserExceptionVector_literal_start), + mem_reserve(__pa(&_UserExceptionVector_text_start), __pa(&_UserExceptionVector_text_end)); - mem_reserve(__pa(&_DoubleExceptionVector_literal_start), + mem_reserve(__pa(&_DoubleExceptionVector_text_start), __pa(&_DoubleExceptionVector_text_end)); #if XCHAL_EXCM_LEVEL >= 2 diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 332e9d635fb6..2bc85051c680 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -205,9 +205,6 @@ ENDPROC(_KernelExceptionVector) */ .section .DoubleExceptionVector.text, "ax" - .begin literal_prefix .DoubleExceptionVector - .globl _DoubleExceptionVector_WindowUnderflow - .globl _DoubleExceptionVector_WindowOverflow ENTRY(_DoubleExceptionVector) @@ -217,8 +214,12 @@ ENTRY(_DoubleExceptionVector) /* Check for kernel double exception (usually fatal). */ rsr a2, ps - _bbci.l a2, PS_UM_BIT, .Lksp + _bbsi.l a2, PS_UM_BIT, 1f + j .Lksp + .align 4 + .literal_position +1: /* Check if we are currently handling a window exception. */ /* Note: We don't need to indicate that we enter a critical section. */ @@ -475,11 +476,8 @@ _DoubleExceptionVector_handle_exception: rotw -3 j 1b - ENDPROC(_DoubleExceptionVector) - .end literal_prefix - .text /* * Fixup handler for TLB miss in double exception handler for window owerflow. @@ -508,6 +506,8 @@ ENDPROC(_DoubleExceptionVector) * a3: exctable, original value in excsave1 */ + .literal_position + ENTRY(window_overflow_restore_a0_fixup) rsr a0, ps diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 162c77e53ca8..70b731edc7b8 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -45,24 +45,16 @@ jiffies = jiffies_64; LONG(sym ## _end); \ LONG(LOADADDR(section)) -/* Macro to define a section for a vector. - * - * Use of the MIN function catches the types of errors illustrated in - * the following example: - * - * Assume the section .DoubleExceptionVector.literal is completely - * full. Then a programmer adds code to .DoubleExceptionVector.text - * that produces another literal. The final literal position will - * overlay onto the first word of the adjacent code section - * .DoubleExceptionVector.text. (In practice, the literals will - * overwrite the code, and the first few instructions will be - * garbage.) +/* + * Macro to define a section for a vector. When CONFIG_VECTORS_OFFSET is + * defined code for every vector is located with other init data. At startup + * time head.S copies code for every vector to its final position according + * to description recorded in the corresponding RELOCATE_ENTRY. */ #ifdef CONFIG_VECTORS_OFFSET -#define SECTION_VECTOR(sym, section, addr, max_prevsec_size, prevsec) \ - section addr : AT((MIN(LOADADDR(prevsec) + max_prevsec_size, \ - LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3) \ +#define SECTION_VECTOR(sym, section, addr, prevsec) \ + section addr : AT(((LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3) \ { \ . = ALIGN(4); \ sym ## _start = ABSOLUTE(.); \ @@ -112,26 +104,19 @@ SECTIONS #if XCHAL_EXCM_LEVEL >= 6 SECTION_VECTOR (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR) #endif - SECTION_VECTOR (.DebugInterruptVector.literal, DEBUG_VECTOR_VADDR - 4) SECTION_VECTOR (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR) - SECTION_VECTOR (.KernelExceptionVector.literal, KERNEL_VECTOR_VADDR - 4) SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR) - SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4) SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR) - SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 20) SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR) #endif + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + ENTRY_TEXT TEXT_TEXT - VMLINUX_SYMBOL(__sched_text_start) = .; - *(.sched.literal .sched.text) - VMLINUX_SYMBOL(__sched_text_end) = .; - VMLINUX_SYMBOL(__cpuidle_text_start) = .; - *(.cpuidle.literal .cpuidle.text) - VMLINUX_SYMBOL(__cpuidle_text_end) = .; - VMLINUX_SYMBOL(__lock_text_start) = .; - *(.spinlock.literal .spinlock.text) - VMLINUX_SYMBOL(__lock_text_end) = .; + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT } _etext = .; @@ -196,8 +181,6 @@ SECTIONS .KernelExceptionVector.text); RELOCATE_ENTRY(_UserExceptionVector_text, .UserExceptionVector.text); - RELOCATE_ENTRY(_DoubleExceptionVector_literal, - .DoubleExceptionVector.literal); RELOCATE_ENTRY(_DoubleExceptionVector_text, .DoubleExceptionVector.text); RELOCATE_ENTRY(_DebugInterruptVector_text, @@ -230,25 +213,19 @@ SECTIONS SECTION_VECTOR (_WindowVectors_text, .WindowVectors.text, - WINDOW_VECTORS_VADDR, 4, + WINDOW_VECTORS_VADDR, .dummy) - SECTION_VECTOR (_DebugInterruptVector_literal, - .DebugInterruptVector.literal, - DEBUG_VECTOR_VADDR - 4, - SIZEOF(.WindowVectors.text), - .WindowVectors.text) SECTION_VECTOR (_DebugInterruptVector_text, .DebugInterruptVector.text, DEBUG_VECTOR_VADDR, - 4, - .DebugInterruptVector.literal) + .WindowVectors.text) #undef LAST #define LAST .DebugInterruptVector.text #if XCHAL_EXCM_LEVEL >= 2 SECTION_VECTOR (_Level2InterruptVector_text, .Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR, - SIZEOF(LAST), LAST) + LAST) # undef LAST # define LAST .Level2InterruptVector.text #endif @@ -256,7 +233,7 @@ SECTIONS SECTION_VECTOR (_Level3InterruptVector_text, .Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR, - SIZEOF(LAST), LAST) + LAST) # undef LAST # define LAST .Level3InterruptVector.text #endif @@ -264,7 +241,7 @@ SECTIONS SECTION_VECTOR (_Level4InterruptVector_text, .Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR, - SIZEOF(LAST), LAST) + LAST) # undef LAST # define LAST .Level4InterruptVector.text #endif @@ -272,7 +249,7 @@ SECTIONS SECTION_VECTOR (_Level5InterruptVector_text, .Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR, - SIZEOF(LAST), LAST) + LAST) # undef LAST # define LAST .Level5InterruptVector.text #endif @@ -280,40 +257,23 @@ SECTIONS SECTION_VECTOR (_Level6InterruptVector_text, .Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR, - SIZEOF(LAST), LAST) + LAST) # undef LAST # define LAST .Level6InterruptVector.text #endif - SECTION_VECTOR (_KernelExceptionVector_literal, - .KernelExceptionVector.literal, - KERNEL_VECTOR_VADDR - 4, - SIZEOF(LAST), LAST) -#undef LAST SECTION_VECTOR (_KernelExceptionVector_text, .KernelExceptionVector.text, KERNEL_VECTOR_VADDR, - 4, - .KernelExceptionVector.literal) - SECTION_VECTOR (_UserExceptionVector_literal, - .UserExceptionVector.literal, - USER_VECTOR_VADDR - 4, - SIZEOF(.KernelExceptionVector.text), - .KernelExceptionVector.text) + LAST) +#undef LAST SECTION_VECTOR (_UserExceptionVector_text, .UserExceptionVector.text, USER_VECTOR_VADDR, - 4, - .UserExceptionVector.literal) - SECTION_VECTOR (_DoubleExceptionVector_literal, - .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 20, - SIZEOF(.UserExceptionVector.text), - .UserExceptionVector.text) + .KernelExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 20, - .DoubleExceptionVector.literal) + .UserExceptionVector.text) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; @@ -323,7 +283,6 @@ SECTIONS SECTION_VECTOR (_SecondaryResetVector_text, .SecondaryResetVector.text, RESET_VECTOR1_VADDR, - SIZEOF(.DoubleExceptionVector.text), .DoubleExceptionVector.text) . = LOADADDR(.SecondaryResetVector.text)+SIZEOF(.SecondaryResetVector.text); @@ -373,5 +332,4 @@ SECTIONS /* Sections to be discarded */ DISCARDS - /DISCARD/ : { *(.exit.literal) } } -- cgit v1.2.3 From 2da03d4114b2587f0e8e45f4862074e34daee64e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 9 Dec 2017 18:44:11 -0800 Subject: xtensa: use call instead of callx in assembly code Now that xtensa assembly sources are compiled with -mlongcalls let the assembler and linker relax call instructions into l32r + callx where needed. This change makes the code cleaner and potentially a bit faster. Signed-off-by: Max Filippov --- arch/xtensa/kernel/coprocessor.S | 3 +-- arch/xtensa/kernel/entry.S | 56 ++++++++++++++-------------------------- arch/xtensa/kernel/head.S | 10 +++---- arch/xtensa/kernel/vectors.S | 3 +-- 4 files changed, 24 insertions(+), 48 deletions(-) diff --git a/arch/xtensa/kernel/coprocessor.S b/arch/xtensa/kernel/coprocessor.S index 3a98503ad11a..4f8b52d575a2 100644 --- a/arch/xtensa/kernel/coprocessor.S +++ b/arch/xtensa/kernel/coprocessor.S @@ -212,8 +212,7 @@ ENDPROC(coprocessor_restore) ENTRY(fast_coprocessor_double) wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 + call0 unrecoverable_exception ENDPROC(fast_coprocessor_double) diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 5d5707831626..5a2110bb5902 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -476,8 +476,7 @@ common_exception_return: 1: irq_save a2, a3 #ifdef CONFIG_TRACE_IRQFLAGS - movi a4, trace_hardirqs_off - callx4 a4 + call4 trace_hardirqs_off #endif /* Jump if we are returning from kernel exceptions. */ @@ -504,24 +503,20 @@ common_exception_return: /* Call do_signal() */ #ifdef CONFIG_TRACE_IRQFLAGS - movi a4, trace_hardirqs_on - callx4 a4 + call4 trace_hardirqs_on #endif rsil a2, 0 - movi a4, do_notify_resume # int do_notify_resume(struct pt_regs*) mov a6, a1 - callx4 a4 + call4 do_notify_resume # int do_notify_resume(struct pt_regs*) j 1b 3: /* Reschedule */ #ifdef CONFIG_TRACE_IRQFLAGS - movi a4, trace_hardirqs_on - callx4 a4 + call4 trace_hardirqs_on #endif rsil a2, 0 - movi a4, schedule # void schedule (void) - callx4 a4 + call4 schedule # void schedule (void) j 1b #ifdef CONFIG_PREEMPT @@ -532,8 +527,7 @@ common_exception_return: l32i a4, a2, TI_PRE_COUNT bnez a4, 4f - movi a4, preempt_schedule_irq - callx4 a4 + call4 preempt_schedule_irq j 1b #endif @@ -546,23 +540,20 @@ common_exception_return: 5: #ifdef CONFIG_HAVE_HW_BREAKPOINT _bbci.l a4, TIF_DB_DISABLED, 7f - movi a4, restore_dbreak - callx4 a4 + call4 restore_dbreak 7: #endif #ifdef CONFIG_DEBUG_TLB_SANITY l32i a4, a1, PT_DEPC bgeui a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 4f - movi a4, check_tlb_sanity - callx4 a4 + call4 check_tlb_sanity #endif 6: 4: #ifdef CONFIG_TRACE_IRQFLAGS extui a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH bgei a4, LOCKLEVEL, 1f - movi a4, trace_hardirqs_on - callx4 a4 + call4 trace_hardirqs_on 1: #endif /* Restore optional registers. */ @@ -938,10 +929,8 @@ ENTRY(unrecoverable_exception) movi a0, 0 addi a1, a1, PT_REGS_OFFSET - movi a4, panic movi a6, unrecoverable_text - - callx4 a4 + call4 panic 1: j 1b @@ -1078,8 +1067,7 @@ ENTRY(fast_syscall_unrecoverable) xsr a2, depc # restore a2, depc wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 + call0 unrecoverable_exception ENDPROC(fast_syscall_unrecoverable) @@ -1418,14 +1406,12 @@ ENTRY(fast_syscall_spill_registers) rsync movi a6, SIGSEGV - movi a4, do_exit - callx4 a4 + call4 do_exit /* shouldn't return, so panic */ wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 # should not return + call0 unrecoverable_exception # should not return 1: j 1b @@ -1571,8 +1557,8 @@ ENDPROC(fast_syscall_spill_registers) ENTRY(fast_second_level_miss_double_kernel) -1: movi a0, unrecoverable_exception - callx0 a0 # should not return +1: + call0 unrecoverable_exception # should not return 1: j 1b ENDPROC(fast_second_level_miss_double_kernel) @@ -1904,9 +1890,8 @@ ENTRY(system_call) l32i a3, a2, PT_AREG2 mov a6, a2 - movi a4, do_syscall_trace_enter s32i a3, a2, PT_SYSCALL - callx4 a4 + call4 do_syscall_trace_enter mov a3, a6 /* syscall = sys_call_table[syscall_nr] */ @@ -1938,9 +1923,8 @@ ENTRY(system_call) 1: /* regs->areg[2] = return_value */ s32i a6, a2, PT_AREG2 - movi a4, do_syscall_trace_leave mov a6, a2 - callx4 a4 + call4 do_syscall_trace_leave retw ENDPROC(system_call) @@ -2056,12 +2040,10 @@ ENTRY(ret_from_fork) /* void schedule_tail (struct task_struct *prev) * Note: prev is still in a6 (return value from fake call4 frame) */ - movi a4, schedule_tail - callx4 a4 + call4 schedule_tail - movi a4, do_syscall_trace_leave mov a6, a1 - callx4 a4 + call4 do_syscall_trace_leave j common_exception_return diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 23ce62e60435..9c4e9433e536 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -264,11 +264,8 @@ ENTRY(_startup) /* init_arch kick-starts the linux kernel */ - movi a4, init_arch - callx4 a4 - - movi a4, start_kernel - callx4 a4 + call4 init_arch + call4 start_kernel should_never_return: j should_never_return @@ -294,8 +291,7 @@ should_never_return: movi a6, 0 wsr a6, excsave1 - movi a4, secondary_start_kernel - callx4 a4 + call4 secondary_start_kernel j should_never_return #endif /* CONFIG_SMP */ diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index 2bc85051c680..841503d3307c 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -305,8 +305,7 @@ _DoubleExceptionVector_WindowUnderflow: .Lunrecoverable: rsr a3, excsave1 wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 + call0 unrecoverable_exception .Lfixup:/* Check for a fixup handler or if we were in a critical section. */ -- cgit v1.2.3 From 0013aceb307482ba83a5b6a29f6ba1791be0d32b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 9 Dec 2017 21:18:47 -0800 Subject: xtensa: clean up fixups in assembly code Remove duplicate definitions of EX() and similar TRY/CATCH and SRC/DST macros from assembly sources and put single definition into asm/asmmacro.h Signed-off-by: Max Filippov --- arch/xtensa/include/asm/asmmacro.h | 7 +++ arch/xtensa/kernel/entry.S | 33 ++---------- arch/xtensa/lib/checksum.S | 74 +++++++++++--------------- arch/xtensa/lib/memset.S | 36 +++++-------- arch/xtensa/lib/strncpy_user.S | 52 ++++++++---------- arch/xtensa/lib/strnlen_user.S | 19 +++---- arch/xtensa/lib/usercopy.S | 106 +++++++++++++++++-------------------- 7 files changed, 133 insertions(+), 194 deletions(-) diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 746dcc8b5abc..d2a4415a4c08 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -150,5 +150,12 @@ __endl \ar \as .endm +/* Load or store instructions that may cause exceptions use the EX macro. */ + +#define EX(handler) \ + .section __ex_table, "a"; \ + .word 97f, handler; \ + .previous \ +97: #endif /* _XTENSA_ASMMACRO_H */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 5a2110bb5902..a27a9a65635b 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -1094,35 +1095,12 @@ ENDPROC(fast_syscall_unrecoverable) * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception * * Note: we don't have to save a2; a2 holds the return value - * - * We use the two macros TRY and CATCH: - * - * TRY adds an entry to the __ex_table fixup table for the immediately - * following instruction. - * - * CATCH catches any exception that occurred at one of the preceding TRY - * statements and continues from there - * - * Usage TRY l32i a0, a1, 0 - * - * done: rfe - * CATCH - * j done */ .literal_position #ifdef CONFIG_FAST_SYSCALL_XTENSA -#define TRY \ - .section __ex_table, "a"; \ - .word 66f, 67f; \ - .text; \ -66: - -#define CATCH \ -67: - ENTRY(fast_syscall_xtensa) s32i a7, a2, PT_AREG7 # we need an additional register @@ -1136,9 +1114,9 @@ ENTRY(fast_syscall_xtensa) .Lswp: /* Atomic compare and swap */ -TRY l32i a0, a3, 0 # read old value +EX(.Leac) l32i a0, a3, 0 # read old value bne a0, a4, 1f # same as old value? jump -TRY s32i a5, a3, 0 # different, modify value +EX(.Leac) s32i a5, a3, 0 # different, modify value l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, 1 # and return 1 @@ -1151,12 +1129,12 @@ TRY s32i a5, a3, 0 # different, modify value .Lnswp: /* Atomic set, add, and exg_add. */ -TRY l32i a7, a3, 0 # orig +EX(.Leac) l32i a7, a3, 0 # orig addi a6, a6, -SYS_XTENSA_ATOMIC_SET add a0, a4, a7 # + arg moveqz a0, a4, a6 # set addi a6, a6, SYS_XTENSA_ATOMIC_SET -TRY s32i a0, a3, 0 # write new value +EX(.Leac) s32i a0, a3, 0 # write new value mov a0, a2 mov a2, a7 @@ -1164,7 +1142,6 @@ TRY s32i a0, a3, 0 # write new value l32i a0, a0, PT_AREG0 # restore a0 rfe -CATCH .Leac: l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, -EFAULT diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S index 4eb573d2720e..528fe0dd9339 100644 --- a/arch/xtensa/lib/checksum.S +++ b/arch/xtensa/lib/checksum.S @@ -14,9 +14,10 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include +#include /* * computes a partial checksum, e.g. for TCP/UDP fragments @@ -175,23 +176,8 @@ ENDPROC(csum_partial) /* * Copy from ds while checksumming, otherwise like csum_partial - * - * The macros SRC and DST specify the type of access for the instruction. - * thus we can call a custom exception handler for each access type. */ -#define SRC(y...) \ - 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6001f ; \ - .previous - -#define DST(y...) \ - 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6002f ; \ - .previous - /* unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, int sum, int *src_err_ptr, int *dst_err_ptr) @@ -244,28 +230,28 @@ ENTRY(csum_partial_copy_generic) add a10, a10, a2 /* a10 = end of last 32-byte src chunk */ .Loop5: #endif -SRC( l32i a9, a2, 0 ) -SRC( l32i a8, a2, 4 ) -DST( s32i a9, a3, 0 ) -DST( s32i a8, a3, 4 ) +EX(10f) l32i a9, a2, 0 +EX(10f) l32i a8, a2, 4 +EX(11f) s32i a9, a3, 0 +EX(11f) s32i a8, a3, 4 ONES_ADD(a5, a9) ONES_ADD(a5, a8) -SRC( l32i a9, a2, 8 ) -SRC( l32i a8, a2, 12 ) -DST( s32i a9, a3, 8 ) -DST( s32i a8, a3, 12 ) +EX(10f) l32i a9, a2, 8 +EX(10f) l32i a8, a2, 12 +EX(11f) s32i a9, a3, 8 +EX(11f) s32i a8, a3, 12 ONES_ADD(a5, a9) ONES_ADD(a5, a8) -SRC( l32i a9, a2, 16 ) -SRC( l32i a8, a2, 20 ) -DST( s32i a9, a3, 16 ) -DST( s32i a8, a3, 20 ) +EX(10f) l32i a9, a2, 16 +EX(10f) l32i a8, a2, 20 +EX(11f) s32i a9, a3, 16 +EX(11f) s32i a8, a3, 20 ONES_ADD(a5, a9) ONES_ADD(a5, a8) -SRC( l32i a9, a2, 24 ) -SRC( l32i a8, a2, 28 ) -DST( s32i a9, a3, 24 ) -DST( s32i a8, a3, 28 ) +EX(10f) l32i a9, a2, 24 +EX(10f) l32i a8, a2, 28 +EX(11f) s32i a9, a3, 24 +EX(11f) s32i a8, a3, 28 ONES_ADD(a5, a9) ONES_ADD(a5, a8) addi a2, a2, 32 @@ -284,8 +270,8 @@ DST( s32i a8, a3, 28 ) add a10, a10, a2 /* a10 = end of last 4-byte src chunk */ .Loop6: #endif -SRC( l32i a9, a2, 0 ) -DST( s32i a9, a3, 0 ) +EX(10f) l32i a9, a2, 0 +EX(11f) s32i a9, a3, 0 ONES_ADD(a5, a9) addi a2, a2, 4 addi a3, a3, 4 @@ -315,8 +301,8 @@ DST( s32i a9, a3, 0 ) add a10, a10, a2 /* a10 = end of last 2-byte src chunk */ .Loop7: #endif -SRC( l16ui a9, a2, 0 ) -DST( s16i a9, a3, 0 ) +EX(10f) l16ui a9, a2, 0 +EX(11f) s16i a9, a3, 0 ONES_ADD(a5, a9) addi a2, a2, 2 addi a3, a3, 2 @@ -326,8 +312,8 @@ DST( s16i a9, a3, 0 ) 4: /* This section processes a possible trailing odd byte. */ _bbci.l a4, 0, 8f /* 1-byte chunk */ -SRC( l8ui a9, a2, 0 ) -DST( s8i a9, a3, 0 ) +EX(10f) l8ui a9, a2, 0 +EX(11f) s8i a9, a3, 0 #ifdef __XTENSA_EB__ slli a9, a9, 8 /* shift byte to bits 8..15 */ #endif @@ -350,10 +336,10 @@ DST( s8i a9, a3, 0 ) add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */ .Loop8: #endif -SRC( l8ui a9, a2, 0 ) -SRC( l8ui a8, a2, 1 ) -DST( s8i a9, a3, 0 ) -DST( s8i a8, a3, 1 ) +EX(10f) l8ui a9, a2, 0 +EX(10f) l8ui a8, a2, 1 +EX(11f) s8i a9, a3, 0 +EX(11f) s8i a8, a3, 1 #ifdef __XTENSA_EB__ slli a9, a9, 8 /* combine into a single 16-bit value */ #else /* for checksum computation */ @@ -381,7 +367,7 @@ ENDPROC(csum_partial_copy_generic) a12 = original dst for exception handling */ -6001: +10: _movi a2, -EFAULT s32i a2, a6, 0 /* src_err_ptr */ @@ -403,7 +389,7 @@ ENDPROC(csum_partial_copy_generic) 2: retw -6002: +11: movi a2, -EFAULT s32i a2, a7, 0 /* dst_err_ptr */ movi a2, 0 diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index 10b8c400f175..7a724edaf4f1 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -12,6 +12,7 @@ */ #include +#include /* * void *memset(void *dst, int c, size_t length) @@ -28,15 +29,6 @@ * the alignment labels). */ -/* Load or store instructions that may cause exceptions use the EX macro. */ - -#define EX(insn,reg1,reg2,offset,handler) \ -9: insn reg1, reg2, offset; \ - .section __ex_table, "a"; \ - .word 9b, handler; \ - .previous - - .text .align 4 .global memset @@ -73,10 +65,10 @@ memset: add a6, a6, a5 # a6 = end of last 16B chunk #endif /* !XCHAL_HAVE_LOOPS */ .Loop1: - EX(s32i, a3, a5, 0, memset_fixup) - EX(s32i, a3, a5, 4, memset_fixup) - EX(s32i, a3, a5, 8, memset_fixup) - EX(s32i, a3, a5, 12, memset_fixup) +EX(10f) s32i a3, a5, 0 +EX(10f) s32i a3, a5, 4 +EX(10f) s32i a3, a5, 8 +EX(10f) s32i a3, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS blt a5, a6, .Loop1 @@ -84,23 +76,23 @@ memset: .Loop1done: bbci.l a4, 3, .L2 # set 8 bytes - EX(s32i, a3, a5, 0, memset_fixup) - EX(s32i, a3, a5, 4, memset_fixup) +EX(10f) s32i a3, a5, 0 +EX(10f) s32i a3, a5, 4 addi a5, a5, 8 .L2: bbci.l a4, 2, .L3 # set 4 bytes - EX(s32i, a3, a5, 0, memset_fixup) +EX(10f) s32i a3, a5, 0 addi a5, a5, 4 .L3: bbci.l a4, 1, .L4 # set 2 bytes - EX(s16i, a3, a5, 0, memset_fixup) +EX(10f) s16i a3, a5, 0 addi a5, a5, 2 .L4: bbci.l a4, 0, .L5 # set 1 byte - EX(s8i, a3, a5, 0, memset_fixup) +EX(10f) s8i a3, a5, 0 .L5: .Lret1: retw @@ -114,7 +106,7 @@ memset: bbci.l a5, 0, .L20 # branch if dst alignment half-aligned # dst is only byte aligned # set 1 byte - EX(s8i, a3, a5, 0, memset_fixup) +EX(10f) s8i a3, a5, 0 addi a5, a5, 1 addi a4, a4, -1 # now retest if dst aligned @@ -122,7 +114,7 @@ memset: .L20: # dst half-aligned # set 2 bytes - EX(s16i, a3, a5, 0, memset_fixup) +EX(10f) s16i a3, a5, 0 addi a5, a5, 2 addi a4, a4, -2 j .L0 # dst is now aligned, return to main algorithm @@ -141,7 +133,7 @@ memset: add a6, a5, a4 # a6 = ending address #endif /* !XCHAL_HAVE_LOOPS */ .Lbyteloop: - EX(s8i, a3, a5, 0, memset_fixup) +EX(10f) s8i a3, a5, 0 addi a5, a5, 1 #if !XCHAL_HAVE_LOOPS blt a5, a6, .Lbyteloop @@ -155,6 +147,6 @@ memset: /* We return zero if a failure occurred. */ -memset_fixup: +10: movi a2, 0 retw diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 1ad0ecf45368..827e1b393f3f 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -11,16 +11,9 @@ * Copyright (C) 2002 Tensilica Inc. */ -#include #include - -/* Load or store instructions that may cause exceptions use the EX macro. */ - -#define EX(insn,reg1,reg2,offset,handler) \ -9: insn reg1, reg2, offset; \ - .section __ex_table, "a"; \ - .word 9b, handler; \ - .previous +#include +#include /* * char *__strncpy_user(char *dst, const char *src, size_t len) @@ -75,9 +68,9 @@ __strncpy_user: j .Ldstunaligned .Lsrc1mod2: # src address is odd - EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 +EX(11f) l8ui a9, a3, 0 # get byte 0 addi a3, a3, 1 # advance src pointer - EX(s8i, a9, a11, 0, fixup_s) # store byte 0 +EX(10f) s8i a9, a11, 0 # store byte 0 beqz a9, .Lret # if byte 0 is zero addi a11, a11, 1 # advance dst pointer addi a4, a4, -1 # decrement len @@ -85,16 +78,16 @@ __strncpy_user: bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned .Lsrc2mod4: # src address is 2 mod 4 - EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 +EX(11f) l8ui a9, a3, 0 # get byte 0 /* 1-cycle interlock */ - EX(s8i, a9, a11, 0, fixup_s) # store byte 0 +EX(10f) s8i a9, a11, 0 # store byte 0 beqz a9, .Lret # if byte 0 is zero addi a11, a11, 1 # advance dst pointer addi a4, a4, -1 # decrement len beqz a4, .Lret # if len is zero - EX(l8ui, a9, a3, 1, fixup_l) # get byte 0 +EX(11f) l8ui a9, a3, 1 # get byte 0 addi a3, a3, 2 # advance src pointer - EX(s8i, a9, a11, 0, fixup_s) # store byte 0 +EX(10f) s8i a9, a11, 0 # store byte 0 beqz a9, .Lret # if byte 0 is zero addi a11, a11, 1 # advance dst pointer addi a4, a4, -1 # decrement len @@ -117,12 +110,12 @@ __strncpy_user: add a12, a12, a11 # a12 = end of last 4B chunck #endif .Loop1: - EX(l32i, a9, a3, 0, fixup_l) # get word from src +EX(11f) l32i a9, a3, 0 # get word from src addi a3, a3, 4 # advance src pointer bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a6, .Lz1 # if byte 1 is zero bnone a9, a7, .Lz2 # if byte 2 is zero - EX(s32i, a9, a11, 0, fixup_s) # store word to dst +EX(10f) s32i a9, a11, 0 # store word to dst bnone a9, a8, .Lz3 # if byte 3 is zero addi a11, a11, 4 # advance dst pointer #if !XCHAL_HAVE_LOOPS @@ -132,7 +125,7 @@ __strncpy_user: .Loop1done: bbci.l a4, 1, .L100 # copy 2 bytes - EX(l16ui, a9, a3, 0, fixup_l) +EX(11f) l16ui a9, a3, 0 addi a3, a3, 2 # advance src pointer #ifdef __XTENSA_EB__ bnone a9, a7, .Lz0 # if byte 2 is zero @@ -141,13 +134,13 @@ __strncpy_user: bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a6, .Lz1 # if byte 1 is zero #endif - EX(s16i, a9, a11, 0, fixup_s) +EX(10f) s16i a9, a11, 0 addi a11, a11, 2 # advance dst pointer .L100: bbci.l a4, 0, .Lret - EX(l8ui, a9, a3, 0, fixup_l) +EX(11f) l8ui a9, a3, 0 /* slot */ - EX(s8i, a9, a11, 0, fixup_s) +EX(10f) s8i a9, a11, 0 beqz a9, .Lret # if byte is zero addi a11, a11, 1-3 # advance dst ptr 1, but also cancel # the effect of adding 3 in .Lz3 code @@ -161,14 +154,14 @@ __strncpy_user: #ifdef __XTENSA_EB__ movi a9, 0 #endif /* __XTENSA_EB__ */ - EX(s8i, a9, a11, 0, fixup_s) +EX(10f) s8i a9, a11, 0 sub a2, a11, a2 # compute strlen retw .Lz1: # byte 1 is zero #ifdef __XTENSA_EB__ extui a9, a9, 16, 16 #endif /* __XTENSA_EB__ */ - EX(s16i, a9, a11, 0, fixup_s) +EX(10f) s16i a9, a11, 0 addi a11, a11, 1 # advance dst pointer sub a2, a11, a2 # compute strlen retw @@ -176,9 +169,9 @@ __strncpy_user: #ifdef __XTENSA_EB__ extui a9, a9, 16, 16 #endif /* __XTENSA_EB__ */ - EX(s16i, a9, a11, 0, fixup_s) +EX(10f) s16i a9, a11, 0 movi a9, 0 - EX(s8i, a9, a11, 2, fixup_s) +EX(10f) s8i a9, a11, 2 addi a11, a11, 2 # advance dst pointer sub a2, a11, a2 # compute strlen retw @@ -196,9 +189,9 @@ __strncpy_user: add a12, a11, a4 # a12 = ending address #endif /* XCHAL_HAVE_LOOPS */ .Lnextbyte: - EX(l8ui, a9, a3, 0, fixup_l) +EX(11f) l8ui a9, a3, 0 addi a3, a3, 1 - EX(s8i, a9, a11, 0, fixup_s) +EX(10f) s8i a9, a11, 0 beqz a9, .Lunalignedend addi a11, a11, 1 #if !XCHAL_HAVE_LOOPS @@ -218,8 +211,7 @@ __strncpy_user: * implementation in memset(). Thus, we differentiate between * load/store fixups. */ -fixup_s: -fixup_l: +10: +11: movi a2, -EFAULT retw - diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S index 4c03b1e581e9..9404ac46ce4c 100644 --- a/arch/xtensa/lib/strnlen_user.S +++ b/arch/xtensa/lib/strnlen_user.S @@ -12,14 +12,7 @@ */ #include - -/* Load or store instructions that may cause exceptions use the EX macro. */ - -#define EX(insn,reg1,reg2,offset,handler) \ -9: insn reg1, reg2, offset; \ - .section __ex_table, "a"; \ - .word 9b, handler; \ - .previous +#include /* * size_t __strnlen_user(const char *s, size_t len) @@ -77,7 +70,7 @@ __strnlen_user: add a10, a10, a4 # a10 = end of last 4B chunk #endif /* XCHAL_HAVE_LOOPS */ .Loop: - EX(l32i, a9, a4, 4, lenfixup) # get next word of string +EX(10f) l32i a9, a4, 4 # get next word of string addi a4, a4, 4 # advance string pointer bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a6, .Lz1 # if byte 1 is zero @@ -88,7 +81,7 @@ __strnlen_user: #endif .Ldone: - EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks +EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks bbci.l a3, 1, .L100 # check two more bytes (bytes 0, 1 of word) @@ -125,14 +118,14 @@ __strnlen_user: retw .L1mod2: # address is odd - EX(l8ui, a9, a4, 4, lenfixup) # get byte 0 +EX(10f) l8ui a9, a4, 4 # get byte 0 addi a4, a4, 1 # advance string pointer beqz a9, .Lz3 # if byte 0 is zero bbci.l a4, 1, .Laligned # if string pointer is now word-aligned .L2mod4: # address is 2 mod 4 addi a4, a4, 2 # advance ptr for aligned access - EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string +EX(10f) l32i a9, a4, 0 # get word with first two bytes of string bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero # byte 3 is zero @@ -142,6 +135,6 @@ __strnlen_user: .section .fixup, "ax" .align 4 -lenfixup: +10: movi a2, 0 retw diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index d9cd766bde3e..4172b73b0364 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -54,6 +54,7 @@ */ #include +#include #ifdef __XTENSA_EB__ #define ALIGN(R, W0, W1) src R, W0, W1 @@ -63,15 +64,6 @@ #define SSA8(R) ssa8l R #endif -/* Load or store instructions that may cause exceptions use the EX macro. */ - -#define EX(insn,reg1,reg2,offset,handler) \ -9: insn reg1, reg2, offset; \ - .section __ex_table, "a"; \ - .word 9b, handler; \ - .previous - - .text .align 4 .global __xtensa_copy_user @@ -102,9 +94,9 @@ __xtensa_copy_user: bltui a4, 7, .Lbytecopy # do short copies byte by byte # copy 1 byte - EX(l8ui, a6, a3, 0, fixup) +EX(10f) l8ui a6, a3, 0 addi a3, a3, 1 - EX(s8i, a6, a5, 0, fixup) +EX(10f) s8i a6, a5, 0 addi a5, a5, 1 addi a4, a4, -1 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then @@ -112,11 +104,11 @@ __xtensa_copy_user: .Ldst2mod4: # dst 16-bit aligned # copy 2 bytes bltui a4, 6, .Lbytecopy # do short copies byte by byte - EX(l8ui, a6, a3, 0, fixup) - EX(l8ui, a7, a3, 1, fixup) +EX(10f) l8ui a6, a3, 0 +EX(10f) l8ui a7, a3, 1 addi a3, a3, 2 - EX(s8i, a6, a5, 0, fixup) - EX(s8i, a7, a5, 1, fixup) +EX(10f) s8i a6, a5, 0 +EX(10f) s8i a7, a5, 1 addi a5, a5, 2 addi a4, a4, -2 j .Ldstaligned # dst is now aligned, return to main algorithm @@ -135,9 +127,9 @@ __xtensa_copy_user: add a7, a3, a4 # a7 = end address for source #endif /* !XCHAL_HAVE_LOOPS */ .Lnextbyte: - EX(l8ui, a6, a3, 0, fixup) +EX(10f) l8ui a6, a3, 0 addi a3, a3, 1 - EX(s8i, a6, a5, 0, fixup) +EX(10f) s8i a6, a5, 0 addi a5, a5, 1 #if !XCHAL_HAVE_LOOPS blt a3, a7, .Lnextbyte @@ -161,15 +153,15 @@ __xtensa_copy_user: add a8, a8, a3 # a8 = end of last 16B source chunk #endif /* !XCHAL_HAVE_LOOPS */ .Loop1: - EX(l32i, a6, a3, 0, fixup) - EX(l32i, a7, a3, 4, fixup) - EX(s32i, a6, a5, 0, fixup) - EX(l32i, a6, a3, 8, fixup) - EX(s32i, a7, a5, 4, fixup) - EX(l32i, a7, a3, 12, fixup) - EX(s32i, a6, a5, 8, fixup) +EX(10f) l32i a6, a3, 0 +EX(10f) l32i a7, a3, 4 +EX(10f) s32i a6, a5, 0 +EX(10f) l32i a6, a3, 8 +EX(10f) s32i a7, a5, 4 +EX(10f) l32i a7, a3, 12 +EX(10f) s32i a6, a5, 8 addi a3, a3, 16 - EX(s32i, a7, a5, 12, fixup) +EX(10f) s32i a7, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS blt a3, a8, .Loop1 @@ -177,31 +169,31 @@ __xtensa_copy_user: .Loop1done: bbci.l a4, 3, .L2 # copy 8 bytes - EX(l32i, a6, a3, 0, fixup) - EX(l32i, a7, a3, 4, fixup) +EX(10f) l32i a6, a3, 0 +EX(10f) l32i a7, a3, 4 addi a3, a3, 8 - EX(s32i, a6, a5, 0, fixup) - EX(s32i, a7, a5, 4, fixup) +EX(10f) s32i a6, a5, 0 +EX(10f) s32i a7, a5, 4 addi a5, a5, 8 .L2: bbci.l a4, 2, .L3 # copy 4 bytes - EX(l32i, a6, a3, 0, fixup) +EX(10f) l32i a6, a3, 0 addi a3, a3, 4 - EX(s32i, a6, a5, 0, fixup) +EX(10f) s32i a6, a5, 0 addi a5, a5, 4 .L3: bbci.l a4, 1, .L4 # copy 2 bytes - EX(l16ui, a6, a3, 0, fixup) +EX(10f) l16ui a6, a3, 0 addi a3, a3, 2 - EX(s16i, a6, a5, 0, fixup) +EX(10f) s16i a6, a5, 0 addi a5, a5, 2 .L4: bbci.l a4, 0, .L5 # copy 1 byte - EX(l8ui, a6, a3, 0, fixup) - EX(s8i, a6, a5, 0, fixup) +EX(10f) l8ui a6, a3, 0 +EX(10f) s8i a6, a5, 0 .L5: movi a2, 0 # return success for len bytes copied retw @@ -217,7 +209,7 @@ __xtensa_copy_user: # copy 16 bytes per iteration for word-aligned dst and unaligned src and a10, a3, a8 # save unalignment offset for below sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) - EX(l32i, a6, a3, 0, fixup) # load first word +EX(10f) l32i a6, a3, 0 # load first word #if XCHAL_HAVE_LOOPS loopnez a7, .Loop2done #else /* !XCHAL_HAVE_LOOPS */ @@ -226,19 +218,19 @@ __xtensa_copy_user: add a12, a12, a3 # a12 = end of last 16B source chunk #endif /* !XCHAL_HAVE_LOOPS */ .Loop2: - EX(l32i, a7, a3, 4, fixup) - EX(l32i, a8, a3, 8, fixup) +EX(10f) l32i a7, a3, 4 +EX(10f) l32i a8, a3, 8 ALIGN( a6, a6, a7) - EX(s32i, a6, a5, 0, fixup) - EX(l32i, a9, a3, 12, fixup) +EX(10f) s32i a6, a5, 0 +EX(10f) l32i a9, a3, 12 ALIGN( a7, a7, a8) - EX(s32i, a7, a5, 4, fixup) - EX(l32i, a6, a3, 16, fixup) +EX(10f) s32i a7, a5, 4 +EX(10f) l32i a6, a3, 16 ALIGN( a8, a8, a9) - EX(s32i, a8, a5, 8, fixup) +EX(10f) s32i a8, a5, 8 addi a3, a3, 16 ALIGN( a9, a9, a6) - EX(s32i, a9, a5, 12, fixup) +EX(10f) s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS blt a3, a12, .Loop2 @@ -246,39 +238,39 @@ __xtensa_copy_user: .Loop2done: bbci.l a4, 3, .L12 # copy 8 bytes - EX(l32i, a7, a3, 4, fixup) - EX(l32i, a8, a3, 8, fixup) +EX(10f) l32i a7, a3, 4 +EX(10f) l32i a8, a3, 8 ALIGN( a6, a6, a7) - EX(s32i, a6, a5, 0, fixup) +EX(10f) s32i a6, a5, 0 addi a3, a3, 8 ALIGN( a7, a7, a8) - EX(s32i, a7, a5, 4, fixup) +EX(10f) s32i a7, a5, 4 addi a5, a5, 8 mov a6, a8 .L12: bbci.l a4, 2, .L13 # copy 4 bytes - EX(l32i, a7, a3, 4, fixup) +EX(10f) l32i a7, a3, 4 addi a3, a3, 4 ALIGN( a6, a6, a7) - EX(s32i, a6, a5, 0, fixup) +EX(10f) s32i a6, a5, 0 addi a5, a5, 4 mov a6, a7 .L13: add a3, a3, a10 # readjust a3 with correct misalignment bbci.l a4, 1, .L14 # copy 2 bytes - EX(l8ui, a6, a3, 0, fixup) - EX(l8ui, a7, a3, 1, fixup) +EX(10f) l8ui a6, a3, 0 +EX(10f) l8ui a7, a3, 1 addi a3, a3, 2 - EX(s8i, a6, a5, 0, fixup) - EX(s8i, a7, a5, 1, fixup) +EX(10f) s8i a6, a5, 0 +EX(10f) s8i a7, a5, 1 addi a5, a5, 2 .L14: bbci.l a4, 0, .L15 # copy 1 byte - EX(l8ui, a6, a3, 0, fixup) - EX(s8i, a6, a5, 0, fixup) +EX(10f) l8ui a6, a3, 0 +EX(10f) s8i a6, a5, 0 .L15: movi a2, 0 # return success for len bytes copied retw @@ -294,7 +286,7 @@ __xtensa_copy_user: */ -fixup: +10: sub a2, a5, a2 /* a2 <-- bytes copied */ sub a2, a11, a2 /* a2 <-- bytes not copied */ retw -- cgit v1.2.3 From fbb871e220672a8e9e4e7870da5b206fe05904b2 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 9 Dec 2017 21:21:35 -0800 Subject: xtensa: clean up word alignment macros in assembly code Remove duplicate definitions of ALIGN/src_b/__src_b and SSA8/ssa8/__ssa8 from assembly sources and put single definition into asm/asmmacro.h Signed-off-by: Max Filippov --- arch/xtensa/include/asm/asmmacro.h | 33 +++++++++++++++++++++++++ arch/xtensa/kernel/align.S | 5 +--- arch/xtensa/lib/memcopy.S | 49 +++++++++++++------------------------- arch/xtensa/lib/usercopy.S | 24 +++++++------------ 4 files changed, 59 insertions(+), 52 deletions(-) diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index d2a4415a4c08..7f2ae5872151 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -158,4 +158,37 @@ .previous \ 97: + +/* + * Extract unaligned word that is split between two registers w0 and w1 + * into r regardless of machine endianness. SAR must be loaded with the + * starting bit of the word (see __ssa8). + */ + + .macro __src_b r, w0, w1 +#ifdef __XTENSA_EB__ + src \r, \w0, \w1 +#else + src \r, \w1, \w0 +#endif + .endm + +/* + * Load 2 lowest address bits of r into SAR for __src_b to extract unaligned + * word starting at r from two registers loaded from consecutive aligned + * addresses covering r regardless of machine endianness. + * + * r 0 1 2 3 + * LE SAR 0 8 16 24 + * BE SAR 32 24 16 8 + */ + + .macro __ssa8 r +#ifdef __XTENSA_EB__ + ssa8b \r +#else + ssa8l \r +#endif + .endm + #endif /* _XTENSA_ASMMACRO_H */ diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S index 24b3189d7841..9301452e521e 100644 --- a/arch/xtensa/kernel/align.S +++ b/arch/xtensa/kernel/align.S @@ -19,6 +19,7 @@ #include #include #include +#include #include #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION @@ -66,8 +67,6 @@ #define INSN_T 24 #define INSN_OP1 16 -.macro __src_b r, w0, w1; src \r, \w0, \w1; .endm -.macro __ssa8 r; ssa8b \r; .endm .macro __ssa8r r; ssa8l \r; .endm .macro __sh r, s; srl \r, \s; .endm .macro __sl r, s; sll \r, \s; .endm @@ -81,8 +80,6 @@ #define INSN_T 4 #define INSN_OP1 12 -.macro __src_b r, w0, w1; src \r, \w1, \w0; .endm -.macro __ssa8 r; ssa8l \r; .endm .macro __ssa8r r; ssa8b \r; .endm .macro __sh r, s; sll \r, \s; .endm .macro __sl r, s; srl \r, \s; .endm diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index b1c219acabe7..9bda748a1e3e 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -10,22 +10,7 @@ */ #include - - .macro src_b r, w0, w1 -#ifdef __XTENSA_EB__ - src \r, \w0, \w1 -#else - src \r, \w1, \w0 -#endif - .endm - - .macro ssa8 r -#ifdef __XTENSA_EB__ - ssa8b \r -#else - ssa8l \r -#endif - .endm +#include /* * void *memcpy(void *dst, const void *src, size_t len); @@ -209,7 +194,7 @@ memcpy: .Lsrcunaligned: _beqz a4, .Ldone # avoid loading anything for zero-length copies # copy 16 bytes per iteration for word-aligned dst and unaligned src - ssa8 a3 # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset /* set to 1 when running on ISS (simulator) with the lint or ferret client, or 0 to save a few cycles */ @@ -229,16 +214,16 @@ memcpy: .Loop2: l32i a7, a3, 4 l32i a8, a3, 8 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 l32i a9, a3, 12 - src_b a7, a7, a8 + __src_b a7, a7, a8 s32i a7, a5, 4 l32i a6, a3, 16 - src_b a8, a8, a9 + __src_b a8, a8, a9 s32i a8, a5, 8 addi a3, a3, 16 - src_b a9, a9, a6 + __src_b a9, a9, a6 s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS @@ -249,10 +234,10 @@ memcpy: # copy 8 bytes l32i a7, a3, 4 l32i a8, a3, 8 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 addi a3, a3, 8 - src_b a7, a7, a8 + __src_b a7, a7, a8 s32i a7, a5, 4 addi a5, a5, 8 mov a6, a8 @@ -261,7 +246,7 @@ memcpy: # copy 4 bytes l32i a7, a3, 4 addi a3, a3, 4 - src_b a6, a6, a7 + __src_b a6, a6, a7 s32i a6, a5, 0 addi a5, a5, 4 mov a6, a7 @@ -485,7 +470,7 @@ memmove: .Lbacksrcunaligned: _beqz a4, .Lbackdone # avoid loading anything for zero-length copies # copy 16 bytes per iteration for word-aligned dst and unaligned src - ssa8 a3 # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with * the lint or ferret client, or 0 * to save a few cycles */ @@ -506,15 +491,15 @@ memmove: l32i a7, a3, 12 l32i a8, a3, 8 addi a5, a5, -16 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 12 l32i a9, a3, 4 - src_b a7, a8, a7 + __src_b a7, a8, a7 s32i a7, a5, 8 l32i a6, a3, 0 - src_b a8, a9, a8 + __src_b a8, a9, a8 s32i a8, a5, 4 - src_b a9, a6, a9 + __src_b a9, a6, a9 s32i a9, a5, 0 #if !XCHAL_HAVE_LOOPS bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start @@ -526,9 +511,9 @@ memmove: l32i a7, a3, 4 l32i a8, a3, 0 addi a5, a5, -8 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 4 - src_b a7, a8, a7 + __src_b a7, a8, a7 s32i a7, a5, 0 mov a6, a8 .Lback12: @@ -537,7 +522,7 @@ memmove: addi a3, a3, -4 l32i a7, a3, 0 addi a5, a5, -4 - src_b a6, a7, a6 + __src_b a6, a7, a6 s32i a6, a5, 0 mov a6, a7 .Lback13: diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 4172b73b0364..0959b6e71f11 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -56,14 +56,6 @@ #include #include -#ifdef __XTENSA_EB__ -#define ALIGN(R, W0, W1) src R, W0, W1 -#define SSA8(R) ssa8b R -#else -#define ALIGN(R, W0, W1) src R, W1, W0 -#define SSA8(R) ssa8l R -#endif - .text .align 4 .global __xtensa_copy_user @@ -81,7 +73,7 @@ __xtensa_copy_user: # per iteration movi a8, 3 # if source is also aligned, bnone a3, a8, .Laligned # then use word copy - SSA8( a3) # set shift amount from byte offset + __ssa8 a3 # set shift amount from byte offset bnez a4, .Lsrcunaligned movi a2, 0 # return success for len==0 retw @@ -220,16 +212,16 @@ EX(10f) l32i a6, a3, 0 # load first word .Loop2: EX(10f) l32i a7, a3, 4 EX(10f) l32i a8, a3, 8 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 EX(10f) l32i a9, a3, 12 - ALIGN( a7, a7, a8) + __src_b a7, a7, a8 EX(10f) s32i a7, a5, 4 EX(10f) l32i a6, a3, 16 - ALIGN( a8, a8, a9) + __src_b a8, a8, a9 EX(10f) s32i a8, a5, 8 addi a3, a3, 16 - ALIGN( a9, a9, a6) + __src_b a9, a9, a6 EX(10f) s32i a9, a5, 12 addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS @@ -240,10 +232,10 @@ EX(10f) s32i a9, a5, 12 # copy 8 bytes EX(10f) l32i a7, a3, 4 EX(10f) l32i a8, a3, 8 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 addi a3, a3, 8 - ALIGN( a7, a7, a8) + __src_b a7, a7, a8 EX(10f) s32i a7, a5, 4 addi a5, a5, 8 mov a6, a8 @@ -252,7 +244,7 @@ EX(10f) s32i a7, a5, 4 # copy 4 bytes EX(10f) l32i a7, a3, 4 addi a3, a3, 4 - ALIGN( a6, a6, a7) + __src_b a6, a6, a7 EX(10f) s32i a6, a5, 0 addi a5, a5, 4 mov a6, a7 -- cgit v1.2.3 From 5cf97ebd8b40e2b1791136fc1476d17365864b18 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 9 Dec 2017 21:22:37 -0800 Subject: xtensa: clean up functions in assembly code Use ENTRY and ENDPROC throughout arch/xtensa/lib assembly sources. Introduce asm/linkage.h and define xtensa-specific __ALIGN macro there. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/linkage.h | 9 +++++++++ arch/xtensa/lib/memcopy.S | 30 ++++++++++-------------------- arch/xtensa/lib/memset.S | 8 ++++---- arch/xtensa/lib/strncpy_user.S | 8 ++++---- arch/xtensa/lib/strnlen_user.S | 9 +++++---- arch/xtensa/lib/usercopy.S | 8 ++++---- 6 files changed, 36 insertions(+), 36 deletions(-) create mode 100644 arch/xtensa/include/asm/linkage.h diff --git a/arch/xtensa/include/asm/linkage.h b/arch/xtensa/include/asm/linkage.h new file mode 100644 index 000000000000..0ba9973235d9 --- /dev/null +++ b/arch/xtensa/include/asm/linkage.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .align 4 +#define __ALIGN_STR ".align 4" + +#endif diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index 9bda748a1e3e..24d650864c3a 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -9,6 +9,7 @@ * Copyright (C) 2002 - 2012 Tensilica Inc. */ +#include #include #include @@ -108,10 +109,7 @@ addi a5, a5, 2 j .Ldstaligned # dst is now aligned, return to main algorithm - .align 4 - .global memcpy - .type memcpy,@function -memcpy: +ENTRY(memcpy) entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len @@ -273,14 +271,14 @@ memcpy: s8i a6, a5, 0 retw +ENDPROC(memcpy) /* * void bcopy(const void *src, void *dest, size_t n); */ - .align 4 - .global bcopy - .type bcopy,@function -bcopy: + +ENTRY(bcopy) + entry sp, 16 # minimal stack frame # a2=src, a3=dst, a4=len mov a5, a3 @@ -288,6 +286,8 @@ bcopy: mov a2, a5 j .Lmovecommon # go to common code for memmove+bcopy +ENDPROC(bcopy) + /* * void *memmove(void *dst, const void *src, size_t len); * @@ -376,10 +376,7 @@ bcopy: j .Lbackdstaligned # dst is now aligned, # return to main algorithm - .align 4 - .global memmove - .type memmove,@function -memmove: +ENTRY(memmove) entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len @@ -551,11 +548,4 @@ memmove: s8i a6, a5, 0 retw - -/* - * Local Variables: - * mode:fundamental - * comment-start: "# " - * comment-start-skip: "# *" - * End: - */ +ENDPROC(memmove) diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index 7a724edaf4f1..a6cd04ba966f 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -11,6 +11,7 @@ * Copyright (C) 2002 Tensilica Inc. */ +#include #include #include @@ -30,10 +31,8 @@ */ .text -.align 4 -.global memset -.type memset,@function -memset: +ENTRY(memset) + entry sp, 16 # minimal stack frame # a2/ dst, a3/ c, a4/ length extui a3, a3, 0, 8 # mask to just 8 bits @@ -141,6 +140,7 @@ EX(10f) s8i a3, a5, 0 .Lbytesetdone: retw +ENDPROC(memset) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 827e1b393f3f..5fce16b67dca 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -12,6 +12,7 @@ */ #include +#include #include #include @@ -47,10 +48,8 @@ # a12/ tmp .text -.align 4 -.global __strncpy_user -.type __strncpy_user,@function -__strncpy_user: +ENTRY(__strncpy_user) + entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len mov a11, a2 # leave dst in return value register @@ -202,6 +201,7 @@ EX(10f) s8i a9, a11, 0 sub a2, a11, a2 # compute strlen retw +ENDPROC(__strncpy_user) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S index 9404ac46ce4c..0b956ce7f386 100644 --- a/arch/xtensa/lib/strnlen_user.S +++ b/arch/xtensa/lib/strnlen_user.S @@ -11,6 +11,7 @@ * Copyright (C) 2002 Tensilica Inc. */ +#include #include #include @@ -42,10 +43,8 @@ # a10/ tmp .text -.align 4 -.global __strnlen_user -.type __strnlen_user,@function -__strnlen_user: +ENTRY(__strnlen_user) + entry sp, 16 # minimal stack frame # a2/ s, a3/ len addi a4, a2, -4 # because we overincrement at the end; @@ -133,6 +132,8 @@ EX(10f) l32i a9, a4, 0 # get word with first two bytes of string sub a2, a4, a2 # subtract to get length retw +ENDPROC(__strnlen_user) + .section .fixup, "ax" .align 4 10: diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 0959b6e71f11..64ab1971324f 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -53,14 +53,13 @@ * a11/ original length */ +#include #include #include .text - .align 4 - .global __xtensa_copy_user - .type __xtensa_copy_user,@function -__xtensa_copy_user: +ENTRY(__xtensa_copy_user) + entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len mov a5, a2 # copy dst so that a2 is return value @@ -267,6 +266,7 @@ EX(10f) s8i a6, a5, 0 movi a2, 0 # return success for len bytes copied retw +ENDPROC(__xtensa_copy_user) .section .fixup, "ax" .align 4 -- cgit v1.2.3 From f4431396be5b26a9960daf502d129b1b5d126f5e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 4 Dec 2017 10:47:43 -0800 Subject: xtensa: consolidate kernel stack size related definitions Define kernel stack size in kmem_layout and use it in current_thread_info, GET_THREAD_INFO, THREAD_SIZE and THERAD_SIZE_ORDER definitions. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/current.h | 4 ++-- arch/xtensa/include/asm/kmem_layout.h | 3 +++ arch/xtensa/include/asm/ptrace.h | 3 +-- arch/xtensa/include/asm/thread_info.h | 13 +++++++------ 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 47e46dcf5d49..5d98a7ad4251 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -11,6 +11,8 @@ #ifndef _XTENSA_CURRENT_H #define _XTENSA_CURRENT_H +#include + #ifndef __ASSEMBLY__ #include @@ -26,8 +28,6 @@ static inline struct task_struct *get_current(void) #else -#define CURRENT_SHIFT 13 - #define GET_CURRENT(reg,sp) \ GET_THREAD_INFO(reg,sp); \ l32i reg, reg, TI_TASK \ diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 561f8729bcde..28f9260a766c 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -71,4 +71,7 @@ #endif +#define KERNEL_STACK_SHIFT 13 +#define KERNEL_STACK_SIZE (1 << KERNEL_STACK_SHIFT) + #endif diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index e2d9c5eb10bd..05beae3c6376 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -10,6 +10,7 @@ #ifndef _XTENSA_PTRACE_H #define _XTENSA_PTRACE_H +#include #include /* @@ -38,8 +39,6 @@ * +-----------------------+ -------- */ -#define KERNEL_STACK_SIZE (2 * PAGE_SIZE) - /* Offsets for exception_handlers[] (3 x 64-entries x 4-byte tables). */ #define EXC_TABLE_KSTK 0x004 /* Kernel Stack */ diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 7be2400f745a..71c9865218ce 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -11,7 +11,9 @@ #ifndef _XTENSA_THREAD_INFO_H #define _XTENSA_THREAD_INFO_H -#ifdef __KERNEL__ +#include + +#define CURRENT_SHIFT KERNEL_STACK_SHIFT #ifndef __ASSEMBLY__ # include @@ -84,7 +86,7 @@ struct thread_info { static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - __asm__("extui %0,a1,0,13\n\t" + __asm__("extui %0, a1, 0, "__stringify(CURRENT_SHIFT)"\n\t" "xor %0, a1, %0" : "=&r" (ti) : ); return ti; } @@ -93,7 +95,7 @@ static inline struct thread_info *current_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg,sp) \ - extui reg, sp, 0, 13; \ + extui reg, sp, 0, CURRENT_SHIFT; \ xor reg, sp, reg #endif @@ -130,8 +132,7 @@ static inline struct thread_info *current_thread_info(void) */ #define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */ -#define THREAD_SIZE 8192 //(2*PAGE_SIZE) -#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE KERNEL_STACK_SIZE +#define THREAD_SIZE_ORDER (KERNEL_STACK_SHIFT - PAGE_SHIFT) -#endif /* __KERNEL__ */ #endif /* _XTENSA_THREAD_INFO */ -- cgit v1.2.3 From aa6476f76c1678d5d1087b39d3047601f0139ef0 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 8 Aug 2017 14:06:14 -0700 Subject: xtensa: print hardware config ID on startup Print hardware config ID on startup and config ID recorded in the configuration if it doesn't match one read from the hardware. Signed-off-by: Max Filippov --- arch/xtensa/kernel/setup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 253a0178f1bd..3732c91b7200 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -317,6 +317,13 @@ static inline int mem_reserve(unsigned long start, unsigned long end) void __init setup_arch(char **cmdline_p) { + pr_info("config ID: %08x:%08x\n", + get_sr(SREG_EPC), get_sr(SREG_EXCSAVE)); + if (get_sr(SREG_EPC) != XCHAL_HW_CONFIGID0 || + get_sr(SREG_EXCSAVE) != XCHAL_HW_CONFIGID1) + pr_info("built for config ID: %08x:%08x\n", + XCHAL_HW_CONFIGID0, XCHAL_HW_CONFIGID1); + *cmdline_p = command_line; platform_setup(cmdline_p); strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); @@ -582,12 +589,14 @@ c_show(struct seq_file *f, void *slot) "model\t\t: Xtensa " XCHAL_HW_VERSION_NAME "\n" "core ID\t\t: " XCHAL_CORE_ID "\n" "build ID\t: 0x%x\n" + "config ID\t: %08x:%08x\n" "byte order\t: %s\n" "cpu MHz\t\t: %lu.%02lu\n" "bogomips\t: %lu.%02lu\n", num_online_cpus(), cpumask_pr_args(cpu_online_mask), XCHAL_BUILD_UNIQUE_ID, + get_sr(SREG_EPC), get_sr(SREG_EXCSAVE), XCHAL_HAVE_BE ? "big" : "little", ccount_freq/1000000, (ccount_freq/10000) % 100, -- cgit v1.2.3 From 6f6a23a213be51728502b88741ba6a10cda2441d Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Mon, 27 Nov 2017 10:45:01 -0500 Subject: dmaengine: dmatest: move callback wait queue to thread context Commit adfa543e7314 ("dmatest: don't use set_freezable_with_signal()") introduced a bug (that is in fact documented by the patch commit text) that leaves behind a dangling pointer. Since the done_wait structure is allocated on the stack, future invocations to the DMATEST can produce undesirable results (e.g., corrupted spinlocks). Commit a9df21e34b42 ("dmaengine: dmatest: warn user when dma test times out") attempted to WARN the user that the stack was likely corrupted but did not fix the actual issue. This patch fixes the issue by pushing the wait queue and callback structs into the the thread structure. If a failure occurs due to time, dmaengine_terminate_all will force the callback to safely call wake_up_all() without possibility of using a freed pointer. Cc: stable@vger.kernel.org Bug: https://bugzilla.kernel.org/show_bug.cgi?id=197605 Fixes: adfa543e7314 ("dmatest: don't use set_freezable_with_signal()") Reviewed-by: Sinan Kaya Suggested-by: Shunyong Yang Signed-off-by: Adam Wallis Signed-off-by: Vinod Koul --- drivers/dma/dmatest.c | 55 +++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 47edc7fbf91f..ec5f9d2bc820 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)"); #define PATTERN_COUNT_MASK 0x1f #define PATTERN_MEMSET_IDX 0x01 +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + struct dmatest_thread { struct list_head node; struct dmatest_info *info; @@ -165,6 +171,8 @@ struct dmatest_thread { u8 **dsts; u8 **udsts; enum dma_transaction_type type; + wait_queue_head_t done_wait; + struct dmatest_done test_done; bool done; }; @@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, return error_count; } -/* poor man's completion - we want to use wait_event_freezable() on it */ -struct dmatest_done { - bool done; - wait_queue_head_t *wait; -}; static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; - - done->done = true; - wake_up_all(done->wait); + struct dmatest_thread *thread = + container_of(arg, struct dmatest_thread, done_wait); + if (!thread->done) { + done->done = true; + wake_up_all(done->wait); + } else { + /* + * If thread->done, it means that this callback occurred + * after the parent thread has cleaned up. This can + * happen in the case that driver doesn't implement + * the terminate_all() functionality and a dma operation + * did not occur within the timeout period + */ + WARN(1, "dmatest: Kernel memory may be corrupted!!\n"); + } } static unsigned int min_odd(unsigned int x, unsigned int y) @@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) */ static int dmatest_func(void *data) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; - struct dmatest_done done = { .wait = &done_wait }; + struct dmatest_done *done = &thread->test_done; struct dmatest_info *info; struct dmatest_params *params; struct dma_chan *chan; @@ -673,9 +687,9 @@ static int dmatest_func(void *data) continue; } - done.done = false; + done->done = false; tx->callback = dmatest_callback; - tx->callback_param = &done; + tx->callback_param = done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { @@ -688,21 +702,12 @@ static int dmatest_func(void *data) } dma_async_issue_pending(chan); - wait_event_freezable_timeout(done_wait, done.done, + wait_event_freezable_timeout(thread->done_wait, done->done, msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - if (!done.done) { - /* - * We're leaving the timed out dma operation with - * dangling pointer to done_wait. To make this - * correct, we'll need to allocate wait_done for - * each test iteration and perform "who's gonna - * free it this time?" dancing. For now, just - * leave it dangling. - */ - WARN(1, "dmatest: Kernel stack may be corrupted!!\n"); + if (!done->done) { dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); @@ -789,7 +794,7 @@ err_thread_type: dmatest_KBs(runtime, total_len), ret); /* terminate all transfers on specified channels */ - if (ret) + if (ret || failed_tests) dmaengine_terminate_all(chan); thread->done = true; @@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info, thread->info = info; thread->chan = dtc->chan; thread->type = type; + thread->test_done.wait = &thread->done_wait; + init_waitqueue_head(&thread->done_wait); smp_wmb(); thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); -- cgit v1.2.3 From eb9436966fdc84cebdf222952a99898ab46d9bb0 Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Wed, 6 Dec 2017 14:28:27 +0100 Subject: dmaengine: jz4740: disable/unprepare clk if probe fails in error path of jz4740_dma_probe(), call clk_disable_unprepare() to clean up. Found by Linux Driver Verification project (linuxtesting.org). Fixes: 25ce6c35fea0 MIPS: jz4740: Remove custom DMA API Signed-off-by: Tobias Jordan Signed-off-by: Vinod Koul --- drivers/dma/dma-jz4740.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c index d50273fed715..afd5e10f8927 100644 --- a/drivers/dma/dma-jz4740.c +++ b/drivers/dma/dma-jz4740.c @@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) - return ret; + goto err_clk; irq = platform_get_irq(pdev, 0); ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev); @@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_clk: + clk_disable_unprepare(dmadev->clk); return ret; } -- cgit v1.2.3 From 996fc4477a0ea28226b30d175f053fb6f9a4fa36 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Dec 2017 23:44:11 -0500 Subject: ext4: add missing error check in __ext4_new_inode() It's possible for ext4_get_acl() to return an ERR_PTR. So we need to add a check for this case in __ext4_new_inode(). Otherwise on an error we can end up oops the kernel. This was getting triggered by xfstests generic/388, which is a test which exercises the shutdown code path. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/ialloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b4267d72f249..b32cf263750d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, #ifdef CONFIG_EXT4_FS_POSIX_ACL struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(p)) + return ERR_CAST(p); if (p) { int acl_size = p->a_count * sizeof(ext4_acl_entry); -- cgit v1.2.3 From f5f00e7dcc4161f07b76ff1a854e8b1ea7a1ed41 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Tue, 5 Dec 2017 14:45:32 +0800 Subject: drm/i915/gvt: Fix pipe A enable as default for vgpu observed igt drv_module_reload test case failure on 4.15.0 rc2 kernel with panic due to no active pipe available. the gpu will reset during unload/load and make pipe config reg lost which can cause kernel panic issue happen. this patch is to move pipe enabling to emulate_mointor_status_chagne to handle vgpu reset case as well. Fixes: 7e6059020894 ("drm/i915/gvt: enabled pipe A default on creating vgpu") Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 355120865efd..309f3fa6794a 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, int type, unsigned int resolution) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); if (WARN_ON(resolution >= GVT_EDID_NUM)) @@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); - vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; + return 0; } -- cgit v1.2.3 From 0afe9d4ab9d40c281bdcdd118661fe8e4bdcef18 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 9 Dec 2017 21:10:10 +0100 Subject: mac80211: fix locking in ieee80211_sta_tear_down_BA_sessions Due to overlap between commit 1281103770e9 ("mac80211: Simplify locking in ieee80211_sta_tear_down_BA_sessions()") and the way that Luca modified commit 72e2c3438ba3 ("mac80211: tear down RX aggregations first") when sending it upstream from Intel's internal tree, we get the following warning: WARNING: CPU: 0 PID: 5472 at net/mac80211/agg-tx.c:315 ___ieee80211_stop_tx_ba_session+0x158/0x1f0 since there's no appropriate locking around the call to ___ieee80211_stop_tx_ba_session; Sara's original just had a call to the locked __ieee80211_stop_tx_ba_session (one less underscore) but it looks like Luca modified both of the calls when fixing it up for upstream, leading to the problem at hand. Move the locking appropriately to fix this problem. Reported-by: Kalle Valo Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 167f83b853e6..1621b6ab17ba 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -291,16 +291,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, int i; mutex_lock(&sta->ampdu_mlme.mtx); - for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + for (i = 0; i < IEEE80211_NUM_TIDS; i++) ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS, reason != AGG_STOP_DESTROY_STA && reason != AGG_STOP_PEER_REQUEST); - } - mutex_unlock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) ___ieee80211_stop_tx_ba_session(sta, i, reason); + mutex_unlock(&sta->ampdu_mlme.mtx); /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); -- cgit v1.2.3 From 438c84c2f0c794f75ab55ce65c505b01bfce4480 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 11 Dec 2017 11:28:10 +0100 Subject: ovl: don't follow redirects if redirect_dir=off Overlayfs is following redirects even when redirects are disabled. If this is unintentional (probably the majority of cases) then this can be a problem. E.g. upper layer comes from untrusted USB drive, and attacker crafts a redirect to enable read access to otherwise unreadable directories. If "redirect_dir=off", then turn off following as well as creation of redirects. If "redirect_dir=follow", then turn on following, but turn off creation of redirects (which is what "redirect_dir=off" does now). This is a backward incompatible change, so make it dependent on a config option. Reported-by: David Howells Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.txt | 34 +++++++++++++++++ fs/overlayfs/Kconfig | 10 +++++ fs/overlayfs/namei.c | 16 ++++++++ fs/overlayfs/ovl_entry.h | 2 + fs/overlayfs/super.c | 68 ++++++++++++++++++++++++--------- 5 files changed, 113 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 8caa60734647..e6a5f4912b6d 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -156,6 +156,40 @@ handle it in two different ways: root of the overlay. Finally the directory is moved to the new location. +There are several ways to tune the "redirect_dir" feature. + +Kernel config options: + +- OVERLAY_FS_REDIRECT_DIR: + If this is enabled, then redirect_dir is turned on by default. +- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW: + If this is enabled, then redirects are always followed by default. Enabling + this results in a less secure configuration. Enable this option only when + worried about backward compatibility with kernels that have the redirect_dir + feature and follow redirects even if turned off. + +Module options (can also be changed through /sys/module/overlay/parameters/*): + +- "redirect_dir=BOOL": + See OVERLAY_FS_REDIRECT_DIR kernel config option above. +- "redirect_always_follow=BOOL": + See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above. +- "redirect_max=NUM": + The maximum number of bytes in an absolute redirect (default is 256). + +Mount options: + +- "redirect_dir=on": + Redirects are enabled. +- "redirect_dir=follow": + Redirects are not created, but followed. +- "redirect_dir=off": + Redirects are not created and only followed if "redirect_always_follow" + feature is enabled in the kernel/module config. +- "redirect_dir=nofollow": + Redirects are not created and not followed (equivalent to "redirect_dir=off" + if "redirect_always_follow" feature is not enabled). + Non-directories --------------- diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index cbfc196e5dc5..5ac415466861 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR an overlay which has redirects on a kernel that doesn't support this feature will have unexpected results. +config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW + bool "Overlayfs: follow redirects even if redirects are turned off" + default y + depends on OVERLAY_FS + help + Disable this to get a possibly more secure configuration, but that + might not be backward compatible with previous kernels. + + For more information, see Documentation/filesystems/overlayfs.txt + config OVERLAY_FS_INDEX bool "Overlayfs: turn on inodes index feature by default" depends on OVERLAY_FS diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 625ed8066570..2a12dc2e9840 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, if (d.stop) break; + /* + * Following redirects can have security consequences: it's like + * a symlink into the lower layer without the permission checks. + * This is only a problem if the upper layer is untrusted (e.g + * comes from an USB drive). This can allow a non-readable file + * or directory to become readable. + * + * Only following redirects when redirects are enabled disables + * this attack vector when not necessary. + */ + err = -EPERM; + if (d.redirect && !ofs->config.redirect_follow) { + pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry); + goto out_put; + } + if (d.redirect && d.redirect[0] == '/' && poe != roe) { poe = roe; diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 752bab645879..9d0bc03bf6e4 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -14,6 +14,8 @@ struct ovl_config { char *workdir; bool default_permissions; bool redirect_dir; + bool redirect_follow; + const char *redirect_mode; bool index; }; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 288d20f9a55a..13a8a8617e44 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); MODULE_PARM_DESC(ovl_redirect_dir_def, "Default to on or off for the redirect_dir feature"); +static bool ovl_redirect_always_follow = + IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); +module_param_named(redirect_always_follow, ovl_redirect_always_follow, + bool, 0644); +MODULE_PARM_DESC(ovl_redirect_always_follow, + "Follow redirects even if redirect_dir feature is turned off"); + static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); module_param_named(index, ovl_index_def, bool, 0644); MODULE_PARM_DESC(ovl_index_def, @@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs) kfree(ofs->config.lowerdir); kfree(ofs->config.upperdir); kfree(ofs->config.workdir); + kfree(ofs->config.redirect_mode); if (ofs->creator_cred) put_cred(ofs->creator_cred); kfree(ofs); @@ -295,6 +303,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs) return (!ofs->upper_mnt || !ofs->workdir); } +static const char *ovl_redirect_mode_def(void) +{ + return ovl_redirect_dir_def ? "on" : "off"; +} + /** * ovl_show_options * @@ -313,12 +326,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry) } if (ofs->config.default_permissions) seq_puts(m, ",default_permissions"); - if (ofs->config.redirect_dir != ovl_redirect_dir_def) - seq_printf(m, ",redirect_dir=%s", - ofs->config.redirect_dir ? "on" : "off"); + if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) + seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); if (ofs->config.index != ovl_index_def) - seq_printf(m, ",index=%s", - ofs->config.index ? "on" : "off"); + seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); return 0; } @@ -348,8 +359,7 @@ enum { OPT_UPPERDIR, OPT_WORKDIR, OPT_DEFAULT_PERMISSIONS, - OPT_REDIRECT_DIR_ON, - OPT_REDIRECT_DIR_OFF, + OPT_REDIRECT_DIR, OPT_INDEX_ON, OPT_INDEX_OFF, OPT_ERR, @@ -360,8 +370,7 @@ static const match_table_t ovl_tokens = { {OPT_UPPERDIR, "upperdir=%s"}, {OPT_WORKDIR, "workdir=%s"}, {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, - {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, - {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, + {OPT_REDIRECT_DIR, "redirect_dir=%s"}, {OPT_INDEX_ON, "index=on"}, {OPT_INDEX_OFF, "index=off"}, {OPT_ERR, NULL} @@ -390,10 +399,37 @@ static char *ovl_next_opt(char **s) return sbegin; } +static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) +{ + if (strcmp(mode, "on") == 0) { + config->redirect_dir = true; + /* + * Does not make sense to have redirect creation without + * redirect following. + */ + config->redirect_follow = true; + } else if (strcmp(mode, "follow") == 0) { + config->redirect_follow = true; + } else if (strcmp(mode, "off") == 0) { + if (ovl_redirect_always_follow) + config->redirect_follow = true; + } else if (strcmp(mode, "nofollow") != 0) { + pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", + mode); + return -EINVAL; + } + + return 0; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; + config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); + if (!config->redirect_mode) + return -ENOMEM; + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -428,12 +464,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->default_permissions = true; break; - case OPT_REDIRECT_DIR_ON: - config->redirect_dir = true; - break; - - case OPT_REDIRECT_DIR_OFF: - config->redirect_dir = false; + case OPT_REDIRECT_DIR: + kfree(config->redirect_mode); + config->redirect_mode = match_strdup(&args[0]); + if (!config->redirect_mode) + return -ENOMEM; break; case OPT_INDEX_ON: @@ -458,7 +493,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) config->workdir = NULL; } - return 0; + return ovl_parse_redirect_mode(config, config->redirect_mode); } #define OVL_WORKDIR_NAME "work" @@ -1160,7 +1195,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (!cred) goto out_err; - ofs->config.redirect_dir = ovl_redirect_dir_def; ofs->config.index = ovl_index_def; err = ovl_parse_opt((char *) data, &ofs->config); if (err) -- cgit v1.2.3 From 08d8f8a5b094b66b29936e8751b4a818b8db1207 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 27 Nov 2017 10:12:44 -0500 Subject: ovl: Pass ovl_get_nlink() parameters in right order Right now we seem to be passing index as "lowerdentry" and origin.dentry as "upperdentry". IIUC, we should pass these parameters in reversed order and this looks like a bug. Signed-off-by: Vivek Goyal Acked-by: Amir Goldstein Fixes: caf70cb2ba5d ("ovl: cleanup orphan index entries") Cc: #v4.13 Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 2a12dc2e9840..beb945e1963c 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower, /* Check if index is orphan and don't warn before cleaning it */ if (d_inode(index)->i_nlink == 1 && - ovl_get_nlink(index, origin.dentry, 0) == 0) + ovl_get_nlink(origin.dentry, index, 0) == 0) err = -ENOENT; dput(origin.dentry); -- cgit v1.2.3 From b02a16e6413a2f782e542ef60bad9ff6bf212f8a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 29 Nov 2017 07:35:21 +0200 Subject: ovl: update ctx->pos on impure dir iteration This fixes a regression with readdir of impure dir in overlayfs that is shared to VM via 9p fs. Reported-by: Miguel Bernal Marin Fixes: 4edb83bb1041 ("ovl: constant d_ino for non-merge dirs") Cc: #4.14 Signed-off-by: Amir Goldstein Tested-by: Miguel Bernal Marin Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0daa4354fec4..51088849ce97 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx) return PTR_ERR(rdt.cache); } - return iterate_dir(od->realfile, &rdt.ctx); + err = iterate_dir(od->realfile, &rdt.ctx); + ctx->pos = rdt.ctx.pos; + + return err; } -- cgit v1.2.3 From e8d4bfe3a71537284a90561f77c85dea6c154369 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 29 Nov 2017 10:01:32 +0800 Subject: ovl: Sync upper dirty data when syncing overlayfs When executing filesystem sync or umount on overlayfs, dirty data does not get synced as expected on upper filesystem. This patch fixes sync filesystem method to keep data consistency for overlayfs. Signed-off-by: Chengguang Xu Fixes: e593b2bf513d ("ovl: properly implement sync_filesystem()") Cc: #4.11 Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 13a8a8617e44..76440feb79f6 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -252,6 +252,7 @@ static void ovl_put_super(struct super_block *sb) ovl_free_fs(ofs); } +/* Sync real dirty inodes in upper filesystem (if it exists) */ static int ovl_sync_fs(struct super_block *sb, int wait) { struct ovl_fs *ofs = sb->s_fs_info; @@ -260,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait) if (!ofs->upper_mnt) return 0; - upper_sb = ofs->upper_mnt->mnt_sb; - if (!upper_sb->s_op->sync_fs) + + /* + * If this is a sync(2) call or an emergency sync, all the super blocks + * will be iterated, including upper_sb, so no need to do anything. + * + * If this is a syncfs(2) call, then we do need to call + * sync_filesystem() on upper_sb, but enough if we do it when being + * called with wait == 1. + */ + if (!wait) return 0; - /* real inodes have already been synced by sync_filesystem(ovl_sb) */ + upper_sb = ofs->upper_mnt->mnt_sb; + down_read(&upper_sb->s_umount); - ret = upper_sb->s_op->sync_fs(upper_sb, wait); + ret = sync_filesystem(upper_sb); up_read(&upper_sb->s_umount); + return ret; } -- cgit v1.2.3 From 7879cb43f9a75710af439c6bd81c94de1aa3d740 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Tue, 28 Nov 2017 00:09:23 +0100 Subject: ovl: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: fs/overlayfs/overlayfs.h:179:11-17: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 13eab09a6b6f..b489099ccd49 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) { struct dentry *ret = vfs_tmpfile(dentry, mode, 0); - int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; + int err = PTR_ERR_OR_ZERO(ret); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); return ret; -- cgit v1.2.3 From 8722e095f5a44d0e409e45c5ddc2ee9cf589c777 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Thu, 30 Nov 2017 15:31:06 +0000 Subject: usb: dwc3: gadget: Wait longer for controller to end command processing DWC3_DEPCMD_ENDTRANSFER has been witnessed to require around 600 iterations before controller would become idle again after unplugging the USB cable with AIO reads submitted. Bump timeout from 500 iterations to 1000 so dwc3_stop_active_transfer does not receive -ETIMEDOUT and does not WARN: [ 81.326273] ------------[ cut here ]------------ [ 81.335341] WARNING: CPU: 0 PID: 1874 at drivers/usb/dwc3/gadget.c:2627 dwc3_stop_active_transfer.constprop.23+0x69/0xc0 [dwc3] [ 81.347094] Modules linked in: usb_f_fs libcomposite configfs bnep btsdio bluetooth ecdh_generic brcmfmac brcmutil dwc3 intel_powerclamp coretemp ulpi kvm_intel udc_core kvm irqbypass crc32_pclmul crc32c_intel pcbc dwc3_pci aesni_intel aes_i586 crypto_simd cryptd ehci_pci ehci_hcd basincove_gpadc industrialio gpio_keys usbcore usb_common [ 81.378142] CPU: 0 PID: 1874 Comm: irq/34-dwc3 Not tainted 4.14.0-edison+ #119 [ 81.385545] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 81.394548] task: f5b1be00 task.stack: f420a000 [ 81.399219] EIP: dwc3_stop_active_transfer.constprop.23+0x69/0xc0 [dwc3] [ 81.406086] EFLAGS: 00010086 CPU: 0 [ 81.409672] EAX: 0000001f EBX: f5729800 ECX: c132a2a2 EDX: 00000000 [ 81.416096] ESI: f4054014 EDI: f41cf400 EBP: f420be10 ESP: f420bdf4 [ 81.422521] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 81.428061] CR0: 80050033 CR2: b7a3f000 CR3: 01d94000 CR4: 001006d0 [ 81.434483] Call Trace: [ 81.437063] __dwc3_gadget_ep_disable+0xa3/0x2b0 [dwc3] [ 81.442438] ? _raw_spin_lock_irqsave+0x32/0x40 [ 81.447135] dwc3_gadget_ep_disable+0xbf/0xe0 [dwc3] [ 81.452269] usb_ep_disable+0x1c/0xd0 [udc_core] [ 81.457048] ffs_func_eps_disable.isra.15+0x3b/0x90 [usb_f_fs] [ 81.463070] ffs_func_set_alt+0x7d/0x310 [usb_f_fs] [ 81.468132] ffs_func_disable+0x14/0x20 [usb_f_fs] [ 81.473075] reset_config+0x5b/0x90 [libcomposite] [ 81.478023] composite_disconnect+0x2b/0x50 [libcomposite] [ 81.483685] dwc3_disconnect_gadget+0x39/0x50 [dwc3] [ 81.488808] dwc3_gadget_disconnect_interrupt+0x21b/0x250 [dwc3] [ 81.495014] dwc3_thread_interrupt+0x2a8/0xf70 [dwc3] [ 81.500219] ? __schedule+0x78c/0x7e0 [ 81.504027] irq_thread_fn+0x18/0x30 [ 81.507715] ? irq_thread+0xb7/0x180 [ 81.511400] irq_thread+0x111/0x180 [ 81.515000] ? irq_finalize_oneshot+0xe0/0xe0 [ 81.519490] ? wake_threads_waitq+0x30/0x30 [ 81.523806] kthread+0x107/0x110 [ 81.527131] ? disable_percpu_irq+0x50/0x50 [ 81.531439] ? kthread_stop+0x150/0x150 [ 81.535397] ret_from_fork+0x19/0x24 [ 81.539136] Code: 89 d8 c7 45 ec 00 00 00 00 c7 45 f0 00 00 00 00 c7 45 f4 00 00 00 00 e8 56 ef ff ff 85 c0 74 12 50 68 b9 1c 14 f8 e8 64 0f f7 c8 <0f> ff 58 5a 8d 76 00 8b 83 98 00 00 00 c6 83 a0 00 00 00 00 83 [ 81.559295] ---[ end trace f3133eec81a473b8 ]--- Number of iterations measured on 4 consecutive unplugs: [ 1088.799777] dwc3_send_gadget_ep_cmd(cmd=331016, params={0, 0, 0}) iterated 605 times [ 1222.024986] dwc3_send_gadget_ep_cmd(cmd=331016, params={0, 0, 0}) iterated 580 times [ 1317.590452] dwc3_send_gadget_ep_cmd(cmd=331016, params={0, 0, 0}) iterated 598 times [ 1453.218314] dwc3_send_gadget_ep_cmd(cmd=331016, params={0, 0, 0}) iterated 594 times Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 981fd986cf82..01e595bb1ff1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -259,7 +259,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, { const struct usb_endpoint_descriptor *desc = dep->endpoint.desc; struct dwc3 *dwc = dep->dwc; - u32 timeout = 500; + u32 timeout = 1000; u32 reg; int cmd_status = 0; -- cgit v1.2.3 From ded600ea9fb51a495d2fcd21e90351df876488e8 Mon Sep 17 00:00:00 2001 From: Andreas Platschek Date: Thu, 7 Dec 2017 11:32:20 +0100 Subject: usb: dwc3: of-simple: fix missing clk_disable_unprepare If of_clk_get() fails, the clean-up of already initialized clocks should be the same as when clk_prepare_enable() fails. Thus a clk_disable_unprepare() for each clock should be called before the clk_put(). Found by Linux Driver Verification project (linuxtesting.org). Fixes: 16adc674d0d6 ("usb: dwc3: ep0: fix setup_packet_pending initialization") Signed-off-by: Andreas Platschek Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index c4a4d7bd2766..762370dd7c75 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -51,8 +51,10 @@ static int dwc3_of_simple_clk_init(struct dwc3_of_simple *simple, int count) clk = of_clk_get(np, i); if (IS_ERR(clk)) { - while (--i >= 0) + while (--i >= 0) { + clk_disable_unprepare(simple->clks[i]); clk_put(simple->clks[i]); + } return PTR_ERR(clk); } -- cgit v1.2.3 From a0d8c4cfdf31a9576f683628e50b76714c785ef1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 7 Dec 2017 13:40:24 +0900 Subject: usb: dwc3: of-simple: set dev_pm_ops dwc3_of_simple_dev_pm_ops has never been used since the initial support by commit 16adc674d0d6 ("usb: dwc3: add generic OF glue layer"). I guess it just missed to set .pm struct member. Signed-off-by: Masahiro Yamada Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 762370dd7c75..7ae0eefc7cc7 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -205,6 +205,7 @@ static struct platform_driver dwc3_of_simple_driver = { .driver = { .name = "dwc3-of-simple", .of_match_table = of_dwc3_simple_match, + .pm = &dwc3_of_simple_dev_pm_ops, }, }; -- cgit v1.2.3 From ec5bb87e4e2a1d3a35563a7bcfac9febf67aba9d Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Wed, 6 Dec 2017 12:49:04 +0530 Subject: usb: dwc3: gadget: Fix PCM1 for ISOC EP with ep->mult less than 3 For isochronous endpoints with ep->mult less than 3, PCM1 value of trb->size in set incorrectly. For ep->mult = 2, this is set to 0/-1 and for ep->mult = 1, this is set to -2. This is because the initial mult is set to ep->mult - 1 instead of 2. Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 01e595bb1ff1..639dd1b163a0 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -912,7 +912,7 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, */ if (speed == USB_SPEED_HIGH) { struct usb_ep *ep = &dep->endpoint; - unsigned int mult = ep->mult - 1; + unsigned int mult = 2; unsigned int maxp = usb_endpoint_maxp(ep->desc); if (length <= (2 * maxp)) -- cgit v1.2.3 From 9273083a1530891360e9fe4fad26ae96810db499 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Thu, 30 Nov 2017 12:16:37 +0400 Subject: usb: dwc2: Fix TxFIFOn sizes and total TxFIFO size issues In host mode reading from DPTXSIZn returning invalid value in dwc2_check_param_tx_fifo_sizes function. In total TxFIFO size calculations unnecessarily reducing by ep_info. hw->total_fifo_size can be fully allocated for FIFO's. Added num_dev_in_eps member in dwc2_hw_params structure to save number of IN EPs. Added g_tx_fifo_size array in dwc2_hw_params structure to store power on reset values of DPTXSIZn registers in forced device mode. Updated dwc2_hsotg_tx_fifo_count() function to get TxFIFO count from num_dev_in_eps. Updated dwc2_get_dev_hwparams() function to store DPTXFSIZn in g_tx_fifo_size array. dwc2_get_host/dev_hwparams() functions call moved after num_dev_in_eps set from hwcfg4. Modified dwc2_check_param_tx_fifo_sizes() function to check TxFIFOn sizes based on g_tx_fifo_size array. Removed ep_info subtraction during calculation of tx_addr_max in dwc2_hsotg_tx_fifo_total_depth() function. Also removed dwc2_hsotg_ep_info_size() function as no more need. Acked-by: John Youn Signed-off-by: Gevorg Sahakyan Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.h | 4 ++++ drivers/usb/dwc2/gadget.c | 42 ++---------------------------------------- drivers/usb/dwc2/params.c | 29 +++++++++++++++++++---------- 3 files changed, 25 insertions(+), 50 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index f66c94130cac..31749c79045f 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -537,6 +537,7 @@ struct dwc2_core_params { * 2 - Internal DMA * @power_optimized Are power optimizations enabled? * @num_dev_ep Number of device endpoints available + * @num_dev_in_eps Number of device IN endpoints available * @num_dev_perio_in_ep Number of device periodic IN endpoints * available * @dev_token_q_depth Device Mode IN Token Sequence Learning Queue @@ -565,6 +566,7 @@ struct dwc2_core_params { * 2 - 8 or 16 bits * @snpsid: Value from SNPSID register * @dev_ep_dirs: Direction of device endpoints (GHWCFG1) + * @g_tx_fifo_size[] Power-on values of TxFIFO sizes */ struct dwc2_hw_params { unsigned op_mode:3; @@ -586,12 +588,14 @@ struct dwc2_hw_params { unsigned fs_phy_type:2; unsigned i2c_enable:1; unsigned num_dev_ep:4; + unsigned num_dev_in_eps : 4; unsigned num_dev_perio_in_ep:4; unsigned total_fifo_size:16; unsigned power_optimized:1; unsigned utmi_phy_data_width:2; u32 snpsid; u32 dev_ep_dirs; + u32 g_tx_fifo_size[MAX_EPS_CHANNELS]; }; /* Size of control and EP0 buffers */ diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 88529d092503..e4c3ce0de5de 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -195,55 +195,18 @@ int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg) { if (hsotg->hw_params.en_multiple_tx_fifo) /* In dedicated FIFO mode we need count of IN EPs */ - return (dwc2_readl(hsotg->regs + GHWCFG4) & - GHWCFG4_NUM_IN_EPS_MASK) >> GHWCFG4_NUM_IN_EPS_SHIFT; + return hsotg->hw_params.num_dev_in_eps; else /* In shared FIFO mode we need count of Periodic IN EPs */ return hsotg->hw_params.num_dev_perio_in_ep; } -/** - * dwc2_hsotg_ep_info_size - return Endpoint Info Control block size in DWORDs - */ -static int dwc2_hsotg_ep_info_size(struct dwc2_hsotg *hsotg) -{ - int val = 0; - int i; - u32 ep_dirs; - - /* - * Don't need additional space for ep info control registers in - * slave mode. - */ - if (!using_dma(hsotg)) { - dev_dbg(hsotg->dev, "Buffer DMA ep info size 0\n"); - return 0; - } - - /* - * Buffer DMA mode - 1 location per endpoit - * Descriptor DMA mode - 4 locations per endpoint - */ - ep_dirs = hsotg->hw_params.dev_ep_dirs; - - for (i = 0; i <= hsotg->hw_params.num_dev_ep; i++) { - val += ep_dirs & 3 ? 1 : 2; - ep_dirs >>= 2; - } - - if (using_desc_dma(hsotg)) - val = val * 4; - - return val; -} - /** * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for * device mode TX FIFOs */ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg) { - int ep_info_size; int addr; int tx_addr_max; u32 np_tx_fifo_size; @@ -252,8 +215,7 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg) hsotg->params.g_np_tx_fifo_size); /* Get Endpoint Info Control block size in DWORDs. */ - ep_info_size = dwc2_hsotg_ep_info_size(hsotg); - tx_addr_max = hsotg->hw_params.total_fifo_size - ep_info_size; + tx_addr_max = hsotg->hw_params.total_fifo_size; addr = hsotg->params.g_rx_fifo_size + np_tx_fifo_size; if (tx_addr_max <= addr) diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index ef73af6e03a9..03fd20f0b496 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -484,8 +484,7 @@ static void dwc2_check_param_tx_fifo_sizes(struct dwc2_hsotg *hsotg) } for (fifo = 1; fifo <= fifo_count; fifo++) { - dptxfszn = (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) & - FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT; + dptxfszn = hsotg->hw_params.g_tx_fifo_size[fifo]; if (hsotg->params.g_tx_fifo_size[fifo] < min || hsotg->params.g_tx_fifo_size[fifo] > dptxfszn) { @@ -609,6 +608,7 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg) struct dwc2_hw_params *hw = &hsotg->hw_params; bool forced; u32 gnptxfsiz; + int fifo, fifo_count; if (hsotg->dr_mode == USB_DR_MODE_HOST) return; @@ -617,6 +617,14 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg) gnptxfsiz = dwc2_readl(hsotg->regs + GNPTXFSIZ); + fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); + + for (fifo = 1; fifo <= fifo_count; fifo++) { + hw->g_tx_fifo_size[fifo] = + (dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)) & + FIFOSIZE_DEPTH_MASK) >> FIFOSIZE_DEPTH_SHIFT; + } + if (forced) dwc2_clear_force_mode(hsotg); @@ -661,14 +669,6 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg) hwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4); grxfsiz = dwc2_readl(hsotg->regs + GRXFSIZ); - /* - * Host specific hardware parameters. Reading these parameters - * requires the controller to be in host mode. The mode will - * be forced, if necessary, to read these values. - */ - dwc2_get_host_hwparams(hsotg); - dwc2_get_dev_hwparams(hsotg); - /* hwcfg1 */ hw->dev_ep_dirs = hwcfg1; @@ -711,6 +711,8 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg) hw->en_multiple_tx_fifo = !!(hwcfg4 & GHWCFG4_DED_FIFO_EN); hw->num_dev_perio_in_ep = (hwcfg4 & GHWCFG4_NUM_DEV_PERIO_IN_EP_MASK) >> GHWCFG4_NUM_DEV_PERIO_IN_EP_SHIFT; + hw->num_dev_in_eps = (hwcfg4 & GHWCFG4_NUM_IN_EPS_MASK) >> + GHWCFG4_NUM_IN_EPS_SHIFT; hw->dma_desc_enable = !!(hwcfg4 & GHWCFG4_DESC_DMA); hw->power_optimized = !!(hwcfg4 & GHWCFG4_POWER_OPTIMIZ); hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >> @@ -719,6 +721,13 @@ int dwc2_get_hwparams(struct dwc2_hsotg *hsotg) /* fifo sizes */ hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >> GRXFSIZ_DEPTH_SHIFT; + /* + * Host specific hardware parameters. Reading these parameters + * requires the controller to be in host mode. The mode will + * be forced, if necessary, to read these values. + */ + dwc2_get_host_hwparams(hsotg); + dwc2_get_dev_hwparams(hsotg); return 0; } -- cgit v1.2.3 From 2b4f27c36bcd46e820ddb9a8e6fe6a63fa4250b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 01:18:57 -0800 Subject: crypto: skcipher - set walk.iv for zero-length inputs All the ChaCha20 algorithms as well as the ARM bit-sliced AES-XTS algorithms call skcipher_walk_virt(), then access the IV (walk.iv) before checking whether any bytes need to be processed (walk.nbytes). But if the input is empty, then skcipher_walk_virt() doesn't set the IV, and the algorithms crash trying to use the uninitialized IV pointer. Fix it by setting the IV earlier in skcipher_walk_virt(). Also fix it for the AEAD walk functions. This isn't a perfect solution because we can't actually align the IV to ->cra_alignmask unless there are bytes to process, for one because the temporary buffer for the aligned IV is freed by skcipher_walk_done(), which is only called when there are bytes to process. Thus, algorithms that require aligned IVs will still need to avoid accessing the IV when walk.nbytes == 0. Still, many algorithms/architectures are fine with IVs having any alignment, and even for those that aren't, a misaligned pointer bug is much less severe than an uninitialized pointer bug. This change also matches the behavior of the older blkcipher_walk API. Fixes: 0cabf2af6f5a ("crypto: skcipher - Fix crash on zero-length input") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/skcipher.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 778e0ff42bfa..11af5fd6a443 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, walk->total = req->cryptlen; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->iv = req->iv; - walk->oiv = req->iv; - walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; @@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, int err; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); - walk->iv = req->iv; - walk->oiv = req->iv; - if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else -- cgit v1.2.3 From 11edb555966ed2c66c533d17c604f9d7e580a829 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 29 Nov 2017 12:02:23 +0100 Subject: crypto: af_alg - wait for data at beginning of recvmsg The wait for data is a non-atomic operation that can sleep and therefore potentially release the socket lock. The release of the socket lock allows another thread to modify the context data structure. The waiting operation for new data therefore must be called at the beginning of recvmsg. This prevents a race condition where checks of the members of the context data structure are performed by recvmsg while there is a potential for modification of these values. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 6 ------ crypto/algif_aead.c | 6 ++++++ crypto/algif_skcipher.c | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 358749c38894..f1a2caf1b59b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1137,12 +1137,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, if (!af_alg_readable(sk)) break; - if (!ctx->used) { - err = af_alg_wait_for_data(sk, flags); - if (err) - return err; - } - seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 805f485ddf1b..c8a32bef208a 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t usedpages = 0; /* [in] RX bufs to be used from user */ size_t processed = 0; /* [in] TX bufs to be consumed */ + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* * Data length provided by caller via sendmsg/sendpage that has not * yet been processed. diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 30cff827dd8f..6fb595cd63ac 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, int err = 0; size_t len = 0; + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + crypto_skcipher_reqsize(tfm)); -- cgit v1.2.3 From 9abffc6f2efe46c3564c04312e52e07622d40e51 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 30 Nov 2017 13:39:27 +0100 Subject: crypto: mcryptd - protect the per-CPU queue with a lock mcryptd_enqueue_request() grabs the per-CPU queue struct and protects access to it with disabled preemption. Then it schedules a worker on the same CPU. The worker in mcryptd_queue_worker() guards access to the same per-CPU variable with disabled preemption. If we take CPU-hotplug into account then it is possible that between queue_work_on() and the actual invocation of the worker the CPU goes down and the worker will be scheduled on _another_ CPU. And here the preempt_disable() protection does not work anymore. The easiest thing is to add a spin_lock() to guard access to the list. Another detail: mcryptd_queue_worker() is not processing more than MCRYPTD_BATCH invocation in a row. If there are still items left, then it will invoke queue_work() to proceed with more later. *I* would suggest to simply drop that check because it does not use a system workqueue and the workqueue is already marked as "CPU_INTENSIVE". And if preemption is required then the scheduler should do it. However if queue_work() is used then the work item is marked as CPU unbound. That means it will try to run on the local CPU but it may run on another CPU as well. Especially with CONFIG_DEBUG_WQ_FORCE_RR_CPU=y. Again, the preempt_disable() won't work here but lock which was introduced will help. In order to keep work-item on the local CPU (and avoid RR) I changed it to queue_work_on(). Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu --- crypto/mcryptd.c | 23 ++++++++++------------- include/crypto/mcryptd.h | 1 + 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 4e6472658852..eca04d3729b3 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h index cceafa01f907..b67404fc4b34 100644 --- a/include/crypto/mcryptd.h +++ b/include/crypto/mcryptd.h @@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast( struct mcryptd_cpu_queue { struct crypto_queue queue; + spinlock_t q_lock; struct work_struct work; }; -- cgit v1.2.3 From d53c5135792319e095bb126bc43b2ee98586f7fe Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 8 Dec 2017 11:50:37 +0100 Subject: crypto: af_alg - fix race accessing cipher request When invoking an asynchronous cipher operation, the invocation of the callback may be performed before the subsequent operations in the initial code path are invoked. The callback deletes the cipher request data structure which implies that after the invocation of the asynchronous cipher operation, this data structure must not be accessed any more. The setting of the return code size with the request data structure must therefore be moved before the invocation of the asynchronous cipher operation. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory management") Fixes: d887c52d6ae4 ("crypto: algif_aead - overhaul memory management") Reported-by: syzbot Cc: # v4.14+ Signed-off-by: Stephan Mueller Acked-by: Jonathan Cameron Signed-off-by: Herbert Xu --- crypto/algif_aead.c | 10 +++++----- crypto/algif_skcipher.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index c8a32bef208a..b73db2b27656 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -291,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = outlen; + aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); @@ -298,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, crypto_aead_decrypt(&areq->cra_u.aead_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = outlen; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6fb595cd63ac..baef9bfccdda 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -125,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = len; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, af_alg_async_cb, areq); @@ -133,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = len; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { -- cgit v1.2.3 From 4564b187c16327045d87596e8980c65ba7b84c50 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 11 Dec 2017 12:33:47 +0100 Subject: nl80211: fix nl80211_send_iface() error paths Evidently I introduced a locking bug in my change here, the nla_put_failure sometimes needs to unlock. Fix it. Fixes: 44905265bc15 ("nl80211: don't expose wdev->ssid for most interfaces") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b1ac23ca20c8..213d0c498c97 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag case NL80211_IFTYPE_AP: if (wdev->ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) - goto nla_put_failure; + goto nla_put_failure_locked; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: @@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (!ssid_ie) break; if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) - goto nla_put_failure; + goto nla_put_failure_locked; break; } default: @@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; + nla_put_failure_locked: + wdev_unlock(wdev); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; -- cgit v1.2.3 From cd430a244cd5d3ca0f4053718eabdf42bc12c517 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 Dec 2017 14:52:03 +0300 Subject: ASoC: nuc900: Fix platform_get_irq() error checking some more The error handling doesn't work here because "nuc900_audio->irq_num" is unsigned. Also we should be checking for < 0 and not <= 0 but I believe that's harmless. The platform_get_irq() comments don't talk about the return values... Fixes: fa8cc38165c2 ("ASoC: nuc900: Fix platform_get_irq's error checking") Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown --- sound/soc/nuc900/nuc900-ac97.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c index 5e4fbd2d3479..71fce7c85c93 100644 --- a/sound/soc/nuc900/nuc900-ac97.c +++ b/sound/soc/nuc900/nuc900-ac97.c @@ -345,11 +345,10 @@ static int nuc900_ac97_drvprobe(struct platform_device *pdev) goto out; } - nuc900_audio->irq_num = platform_get_irq(pdev, 0); - if (nuc900_audio->irq_num <= 0) { - ret = nuc900_audio->irq_num < 0 ? nuc900_audio->irq_num : -EBUSY; + ret = platform_get_irq(pdev, 0); + if (ret < 0) goto out; - } + nuc900_audio->irq_num = ret; nuc900_ac97_data = nuc900_audio; -- cgit v1.2.3 From 65a12b3aafed5fc59f4ce41b22b752b1729e6701 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 Dec 2017 14:52:28 +0300 Subject: ASoC: nuc900: Fix a loop timeout test We should be finishing the loop with timeout set to zero but because this is a post-op we finish with timeout == -1. Fixes: 1082e2703a2d ("ASoC: NUC900/audio: add nuc900 audio driver support") Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown --- sound/soc/nuc900/nuc900-ac97.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/nuc900/nuc900-ac97.c b/sound/soc/nuc900/nuc900-ac97.c index 71fce7c85c93..81b09d740ed9 100644 --- a/sound/soc/nuc900/nuc900-ac97.c +++ b/sound/soc/nuc900/nuc900-ac97.c @@ -67,7 +67,7 @@ static unsigned short nuc900_ac97_read(struct snd_ac97 *ac97, /* polling the AC_R_FINISH */ while (!(AUDIO_READ(nuc900_audio->mmio + ACTL_ACCON) & AC_R_FINISH) - && timeout--) + && --timeout) mdelay(1); if (!timeout) { @@ -121,7 +121,7 @@ static void nuc900_ac97_write(struct snd_ac97 *ac97, unsigned short reg, /* polling the AC_W_FINISH */ while ((AUDIO_READ(nuc900_audio->mmio + ACTL_ACCON) & AC_W_FINISH) - && timeout--) + && --timeout) mdelay(1); if (!timeout) -- cgit v1.2.3 From 87684d338a22d15e47b16ee68f569d74ad1d076e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Dec 2017 14:54:25 +0300 Subject: ASoC: Intel: Skylake: Re-order some code to silence a warning I get a Smatch warning here: sound/soc/intel/skylake/skl-nhlt.c:335 skl_get_ssp_clks() error: testing array offset 'j' after use. The code is harmless, but the checker is right that we should swap these two conditions so we verify that the offset is within bounds before we use it. Signed-off-by: Dan Carpenter Reviewed-by: Sriram Periyasamy Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-nhlt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index ca5dc2be7b68..bde7f40f29f5 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -322,8 +322,8 @@ static void skl_get_ssp_clks(struct skl *skl, struct skl_ssp_clk *ssp_clks, rate = channels * bps * fs; /* check if the rate is added already to the given SSP's sclk */ - for (j = 0; (sclk[id].rate_cfg[j].rate != 0) && - (j < SKL_MAX_CLK_RATES); j++) { + for (j = 0; (j < SKL_MAX_CLK_RATES) && + (sclk[id].rate_cfg[j].rate != 0); j++) { if (sclk[id].rate_cfg[j].rate == rate) { present = true; break; -- cgit v1.2.3 From 5f3a86014eadbcf559ab64cf26ce29510319228b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 8 Nov 2017 15:34:54 +0900 Subject: mmc: slot-gpio: call gpiod_to_irq() only when MMC_CAP_NEEDS_POLL is unset It is not efficient to call gpiod_to_irq() regardless the flag, then ignore the returned irq if MMC_CAP_NEEDS_POLL. Move gpiod_to_irq() after the MMC_CAP_NEEDS_POLL check. Signed-off-by: Masahiro Yamada Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 863f1dbbfc1b..f7c6e0542de7 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -121,20 +121,18 @@ EXPORT_SYMBOL(mmc_gpio_request_ro); void mmc_gpiod_request_cd_irq(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; - int ret, irq; + int irq = -EINVAL; + int ret; if (host->slot.cd_irq >= 0 || !ctx || !ctx->cd_gpio) return; - irq = gpiod_to_irq(ctx->cd_gpio); - /* - * Even if gpiod_to_irq() returns a valid IRQ number, the platform might - * still prefer to poll, e.g., because that IRQ number is already used - * by another unit and cannot be shared. + * Do not use IRQ if the platform prefers to poll, e.g., because that + * IRQ number is already used by another unit and cannot be shared. */ - if (irq >= 0 && host->caps & MMC_CAP_NEEDS_POLL) - irq = -EINVAL; + if (!(host->caps & MMC_CAP_NEEDS_POLL)) + irq = gpiod_to_irq(ctx->cd_gpio); if (irq >= 0) { if (!ctx->cd_gpio_isr) -- cgit v1.2.3 From 97618aca1440b5addc5c3d78659d3e176be23b80 Mon Sep 17 00:00:00 2001 From: "yinbo.zhu" Date: Wed, 8 Nov 2017 17:09:50 +0800 Subject: mmc: sdhci-of-esdhc: fix eMMC couldn't work after kexec The bit eSDHC_TBCTL[TB_EN] couldn't be reset by eSDHC_SYSCTL[RSTA] which is used to reset for all. The driver should make sure it's cleared before card initialization, otherwise the initialization would fail. Signed-off-by: yinbo.zhu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-esdhc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 1f424374bbbb..d74030f3bd12 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -785,6 +785,10 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) pltfm_host = sdhci_priv(host); esdhc = sdhci_pltfm_priv(pltfm_host); + val = sdhci_readl(host, ESDHC_TBCTL); + val &= ~ESDHC_TB_EN; + sdhci_writel(host, val, ESDHC_TBCTL); + host_ver = sdhci_readw(host, SDHCI_HOST_VERSION); esdhc->vendor_ver = (host_ver & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT; -- cgit v1.2.3 From 96455380ece1c786f0a8822ea8b312e6445a3d93 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 14 Nov 2017 23:55:20 +0100 Subject: mmc: core: use usleep_range rather than HZ magic in mmc_delay() Documentation/timers/timers-howto.txt recommends to use usleep_range for delays 1-20ms. Let's adhere to it. No need for messing with HZ and still do busy looping these days. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 71e6c6d7ceb7..b2877e2d740f 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -62,12 +62,10 @@ void mmc_set_initial_state(struct mmc_host *host); static inline void mmc_delay(unsigned int ms) { - if (ms < 1000 / HZ) { - cond_resched(); - mdelay(ms); - } else { + if (ms <= 20) + usleep_range(ms * 1000, ms * 1250); + else msleep(ms); - } } void mmc_rescan(struct work_struct *work); -- cgit v1.2.3 From 754febcc6b749bb05ebb06b0b9cfdda6157e8cfd Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 14 Nov 2017 23:51:04 +0100 Subject: mmc: tmio: use usleep_range consistently There are a few udelay() left which are in a range that they should be usleep_range() these days. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 583bf3262df5..d6ca57be16c2 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -806,7 +806,7 @@ static int tmio_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) if (ret == 0) set_bit(i, host->taps); - mdelay(1); + usleep_range(1000, 1200); } ret = host->select_tuning(host); @@ -958,7 +958,7 @@ static void tmio_mmc_power_on(struct tmio_mmc_host *host, unsigned short vdd) * 100us were not enough. Is this the same 140us delay, as in * tmio_mmc_set_ios()? */ - udelay(200); + usleep_range(200, 300); } /* * It seems, VccQ should be switched on after Vcc, this is also what the @@ -966,7 +966,7 @@ static void tmio_mmc_power_on(struct tmio_mmc_host *host, unsigned short vdd) */ if (!IS_ERR(mmc->supply.vqmmc) && !ret) { ret = regulator_enable(mmc->supply.vqmmc); - udelay(200); + usleep_range(200, 300); } if (ret < 0) @@ -1059,7 +1059,7 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } /* Let things settle. delay taken from winCE driver */ - udelay(140); + usleep_range(140, 200); if (PTR_ERR(host->mrq) == -EINTR) dev_dbg(&host->pdev->dev, "%s.%d: IOS interrupted: clk %u, mode %u", -- cgit v1.2.3 From 448f2f8775d1e8a62a14506b6da38bcedce5eb22 Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Wed, 15 Nov 2017 16:25:49 +0100 Subject: dt-bindings: mmc: renesas_sdhi: Add r8a77995 support Adds bindings for the R-Car D3 SoC's SDHI IP. Signed-off-by: Ulrich Hecht Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/tmio_mmc.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt index 3c6762430fd9..d8685cb83325 100644 --- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt +++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt @@ -26,6 +26,7 @@ Required properties: "renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC "renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC "renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC + "renesas,sdhi-r8a77995" - SDHI IP on R8A77995 SoC "renesas,sdhi-shmobile" - a generic sh-mobile SDHI controller "renesas,rcar-gen1-sdhi" - a generic R-Car Gen1 SDHI controller "renesas,rcar-gen2-sdhi" - a generic R-Car Gen2 or RZ/G1 -- cgit v1.2.3 From 1907e38680af8f492f3da20cb36e3f33cfd971bf Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Tue, 28 Nov 2017 04:44:55 +0100 Subject: mmc: sunxi: fix mojibake in module metadata It had an U+FFFD: not a corrupted character but a literal well-formed replacement marker. Signed-off-by: Adam Borowski Acked-by: Maxime Ripard Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index cc98355dbdb9..8fef5c17696e 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -3,7 +3,7 @@ * (C) Copyright 2007-2011 Reuuimlla Technology Co., Ltd. * (C) Copyright 2007-2011 Aaron Maoye * (C) Copyright 2013-2014 O2S GmbH - * (C) Copyright 2013-2014 David Lanzend�rfer + * (C) Copyright 2013-2014 David Lanzendörfer * (C) Copyright 2013-2014 Hans de Goede * (C) Copyright 2017 Sootech SA * @@ -1393,5 +1393,5 @@ module_platform_driver(sunxi_mmc_driver); MODULE_DESCRIPTION("Allwinner's SD/MMC Card Controller Driver"); MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("David Lanzend�rfer "); +MODULE_AUTHOR("David Lanzendörfer "); MODULE_ALIAS("platform:sunxi-mmc"); -- cgit v1.2.3 From 4512bd370b111dd7ffc437ddd3179391df68fe1b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:40:58 +0200 Subject: mmc: block: No need to export mmc_cleanup_queue() mmc_cleanup_queue() is not used by a different module. Do not export it. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/queue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 4f33d277b125..26f8da30ebe5 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -270,7 +270,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq) mq->card = NULL; } -EXPORT_SYMBOL(mmc_cleanup_queue); /** * mmc_queue_suspend - suspend a MMC request queue -- cgit v1.2.3 From 41e3efd07d5a02c80f503e29d755aa1bbb4245de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:40:59 +0200 Subject: mmc: block: Simplify cleaning up the queue Use blk_cleanup_queue() to shutdown the queue when the driver is removed, and instead get an extra reference to the queue to prevent the queue being freed before the final mmc_blk_put(). Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 17 ++++++++++++----- drivers/mmc/core/queue.c | 2 ++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ccfa98af1dd3..e44f6d90aeb4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -189,7 +189,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) md->usage--; if (md->usage == 0) { int devidx = mmc_get_devidx(md->disk); - blk_cleanup_queue(md->queue.queue); + blk_put_queue(md->queue.queue); ida_simple_remove(&mmc_blk_ida, devidx); put_disk(md->disk); kfree(md); @@ -2156,6 +2156,17 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, md->queue.blkdata = md; + /* + * Keep an extra reference to the queue so that we can shutdown the + * queue (i.e. call blk_cleanup_queue()) while there are still + * references to the 'md'. The corresponding blk_put_queue() is in + * mmc_blk_put(). + */ + if (!blk_get_queue(md->queue.queue)) { + mmc_cleanup_queue(&md->queue); + goto err_putdisk; + } + md->disk->major = MMC_BLOCK_MAJOR; md->disk->first_minor = devidx * perdev_minors; md->disk->fops = &mmc_bdops; @@ -2471,10 +2482,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) * from being accepted. */ card = md->queue.card; - spin_lock_irq(md->queue.queue->queue_lock); - queue_flag_set(QUEUE_FLAG_BYPASS, md->queue.queue); - spin_unlock_irq(md->queue.queue->queue_lock); - blk_set_queue_dying(md->queue.queue); mmc_cleanup_queue(&md->queue); if (md->disk->flags & GENHD_FL_UP) { device_remove_file(disk_to_dev(md->disk), &md->force_ro); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 26f8da30ebe5..ae6d9da68735 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -268,6 +268,8 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_start_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); + blk_cleanup_queue(q); + mq->card = NULL; } -- cgit v1.2.3 From afab1bb8b40c61458e009fdc323c9740f95fcd5b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:00 +0200 Subject: mmc: core: Make mmc_pre_req() and mmc_post_req() available Make mmc_pre_req() and mmc_post_req() available to the card drivers. Later patches will make use of this. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/core.c | 31 ------------------------------- drivers/mmc/core/core.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1f0f44f4dd5f..7ca6e4866a8b 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -657,37 +657,6 @@ bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq) } EXPORT_SYMBOL(mmc_is_req_done); -/** - * mmc_pre_req - Prepare for a new request - * @host: MMC host to prepare command - * @mrq: MMC request to prepare for - * - * mmc_pre_req() is called in prior to mmc_start_req() to let - * host prepare for the new request. Preparation of a request may be - * performed while another request is running on the host. - */ -static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq) -{ - if (host->ops->pre_req) - host->ops->pre_req(host, mrq); -} - -/** - * mmc_post_req - Post process a completed request - * @host: MMC host to post process command - * @mrq: MMC request to post process for - * @err: Error, if non zero, clean up any resources made in pre_req - * - * Let the host post process a completed request. Post processing of - * a request may be performed while another reuqest is running. - */ -static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq, - int err) -{ - if (host->ops->post_req) - host->ops->post_req(host, mrq, err); -} - /** * mmc_finalize_areq() - finalize an asynchronous request * @host: MMC host to finalize any ongoing request on diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index b2877e2d740f..3e3d21304e5f 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -150,4 +150,35 @@ int mmc_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq); void mmc_cqe_post_req(struct mmc_host *host, struct mmc_request *mrq); int mmc_cqe_recovery(struct mmc_host *host); +/** + * mmc_pre_req - Prepare for a new request + * @host: MMC host to prepare command + * @mrq: MMC request to prepare for + * + * mmc_pre_req() is called in prior to mmc_start_req() to let + * host prepare for the new request. Preparation of a request may be + * performed while another request is running on the host. + */ +static inline void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq) +{ + if (host->ops->pre_req) + host->ops->pre_req(host, mrq); +} + +/** + * mmc_post_req - Post process a completed request + * @host: MMC host to post process command + * @mrq: MMC request to post process for + * @err: Error, if non zero, clean up any resources made in pre_req + * + * Let the host post process a completed request. Post processing of + * a request may be performed while another request is running. + */ +static inline void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq, + int err) +{ + if (host->ops->post_req) + host->ops->post_req(host, mrq, err); +} + #endif -- cgit v1.2.3 From 6d3898a6a517d0effa1d1e337c03b16bafb6fc96 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:01 +0200 Subject: mmc: block: Add error-handling comments Add error-handling comments to explain what would also be done for blk-mq if it used the legacy error-handling. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index e44f6d90aeb4..7dcd5d5b203b 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1911,7 +1911,11 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) case MMC_BLK_SUCCESS: case MMC_BLK_PARTIAL: /* - * A block was successfully transferred. + * Reset success, and accept bytes_xfered. For + * MMC_BLK_PARTIAL re-submit the remaining request. For + * MMC_BLK_SUCCESS error out the remaining request (it + * could not be re-submitted anyway if a next request + * had already begun). */ mmc_blk_reset_success(md, type); @@ -1931,6 +1935,14 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) } break; case MMC_BLK_CMD_ERR: + /* + * For SD cards, get bytes written, but do not accept + * bytes_xfered if that fails. For MMC cards accept + * bytes_xfered. Then try to reset. If reset fails then + * error out the remaining request, otherwise retry + * once (N.B mmc_blk_reset() will not succeed twice in a + * row). + */ req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending); if (mmc_blk_reset(md, card->host, type)) { if (req_pending) @@ -1947,11 +1959,20 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) } break; case MMC_BLK_RETRY: + /* + * Do not accept bytes_xfered, but retry up to 5 times, + * otherwise same as abort. + */ retune_retry_done = brq->retune_retry_done; if (retry++ < 5) break; /* Fall through */ case MMC_BLK_ABORT: + /* + * Do not accept bytes_xfered, but try to reset. If + * reset succeeds, try once more, otherwise error out + * the request. + */ if (!mmc_blk_reset(md, card->host, type)) break; mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); @@ -1960,6 +1981,13 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) case MMC_BLK_DATA_ERR: { int err; + /* + * Do not accept bytes_xfered, but try to reset. If + * reset succeeds, try once more. If reset fails with + * ENODEV which means the partition is wrong, then error + * out the request. Otherwise attempt to read one sector + * at a time. + */ err = mmc_blk_reset(md, card->host, type); if (!err) break; @@ -1971,6 +1999,10 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) /* Fall through */ } case MMC_BLK_ECC_ERR: + /* + * Do not accept bytes_xfered. If reading more than one + * sector, try reading one sector at a time. + */ if (brq->data.blocks > 1) { /* Redo read one sector at a time */ pr_warn("%s: retrying using single block read\n", @@ -1992,10 +2024,12 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) } break; case MMC_BLK_NOMEDIUM: + /* Do not accept bytes_xfered. Error out the request */ mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); return; default: + /* Do not accept bytes_xfered. Error out the request */ pr_err("%s: Unhandled return value (%d)", old_req->rq_disk->disk_name, status); mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); -- cgit v1.2.3 From c3d53d0da69d127f488dc85638e9440220b268e8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:02 +0200 Subject: mmc: core: Add parameter use_blk_mq Until mmc has blk-mq support fully implemented and tested, add a parameter use_blk_mq, set to true if config option MMC_MQ_DEFAULT is selected, which it is by default. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/Kconfig | 10 ++++++++++ drivers/mmc/core/core.c | 7 +++++++ drivers/mmc/core/core.h | 2 ++ drivers/mmc/core/host.c | 2 ++ drivers/mmc/core/host.h | 4 ++++ include/linux/mmc/host.h | 1 + 6 files changed, 26 insertions(+) diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig index ec21388311db..42565562577c 100644 --- a/drivers/mmc/Kconfig +++ b/drivers/mmc/Kconfig @@ -12,6 +12,16 @@ menuconfig MMC If you want MMC/SD/SDIO support, you should say Y here and also to your specific host controller driver. +config MMC_MQ_DEFAULT + bool "MMC: use blk-mq I/O path by default" + depends on MMC && BLOCK + default y + ---help--- + This option enables the new blk-mq based I/O path for MMC block + devices by default. With the option the mmc_core.use_blk_mq + module/boot option defaults to Y, without it to N, but it can + still be overridden either way. + if MMC source "drivers/mmc/core/Kconfig" diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 7ca6e4866a8b..617802f45386 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -66,6 +66,13 @@ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); +#ifdef CONFIG_MMC_MQ_DEFAULT +bool mmc_use_blk_mq = true; +#else +bool mmc_use_blk_mq = false; +#endif +module_param_named(use_blk_mq, mmc_use_blk_mq, bool, S_IWUSR | S_IRUGO); + static int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay) { diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 3e3d21304e5f..136617d2f971 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -35,6 +35,8 @@ struct mmc_bus_ops { int (*reset)(struct mmc_host *); }; +extern bool mmc_use_blk_mq; + void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops); void mmc_detach_bus(struct mmc_host *host); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 64b03d6eaf18..409a68a96a0a 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -404,6 +404,8 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) host->fixed_drv_type = -EINVAL; + host->use_blk_mq = mmc_use_blk_mq; + return host; } diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index fb689a1065ed..6eaf558e62d6 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -74,6 +74,10 @@ static inline bool mmc_card_hs400es(struct mmc_card *card) return card->host->ios.enhanced_strobe; } +static inline bool mmc_host_use_blk_mq(struct mmc_host *host) +{ + return host->use_blk_mq; +} #endif diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index e7743eca1021..ce2075d6f429 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -380,6 +380,7 @@ struct mmc_host { unsigned int doing_retune:1; /* re-tuning in progress */ unsigned int retune_now:1; /* do re-tuning at next req */ unsigned int retune_paused:1; /* re-tuning is temporarily disabled */ + unsigned int use_blk_mq:1; /* use blk-mq */ int rescan_disable; /* disable card detection */ int rescan_entered; /* used with nonremovable devices */ -- cgit v1.2.3 From 81196976ed946cbf36bb41ddda402853c7df7cfa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:03 +0200 Subject: mmc: block: Add blk-mq support Define and use a blk-mq queue. Discards and flushes are processed synchronously, but reads and writes asynchronously. In order to support slow DMA unmapping, DMA unmapping is not done until after the next request is started. That means the request is not completed until then. If there is no next request then the completion is done by queued work. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 502 ++++++++++++++++++++++++++++++++++++++++++++++- drivers/mmc/core/block.h | 9 + drivers/mmc/core/queue.c | 296 +++++++++++++++++++++++++--- drivers/mmc/core/queue.h | 32 +++ 4 files changed, 808 insertions(+), 31 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 7dcd5d5b203b..7874c3bbf6b5 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1220,6 +1220,14 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } +static void mmc_blk_end_request(struct request *req, blk_status_t error) +{ + if (req->mq_ctx) + blk_mq_end_request(req, error); + else + blk_end_request_all(req, error); +} + /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this @@ -1281,7 +1289,7 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) break; } mq_rq->drv_op_result = ret; - blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK); + mmc_blk_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); } static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) @@ -1324,7 +1332,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) else mmc_blk_reset_success(md, type); fail: - blk_end_request(req, status, blk_rq_bytes(req)); + mmc_blk_end_request(req, status); } static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, @@ -1394,7 +1402,7 @@ out_retry: if (!err) mmc_blk_reset_success(md, type); out: - blk_end_request(req, status, blk_rq_bytes(req)); + mmc_blk_end_request(req, status); } static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) @@ -1404,7 +1412,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) int ret = 0; ret = mmc_flush_cache(card); - blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK); + mmc_blk_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); } /* @@ -1481,11 +1489,9 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) } } -static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, - struct mmc_async_req *areq) +static enum mmc_blk_status __mmc_blk_err_check(struct mmc_card *card, + struct mmc_queue_req *mq_mrq) { - struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, - areq); struct mmc_blk_request *brq = &mq_mrq->brq; struct request *req = mmc_queue_req_to_req(mq_mrq); int need_retune = card->host->need_retune; @@ -1591,6 +1597,15 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, return MMC_BLK_SUCCESS; } +static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, + struct mmc_async_req *areq) +{ + struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, + areq); + + return __mmc_blk_err_check(card, mq_mrq); +} + static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, int disable_multi, bool *do_rel_wr_p, bool *do_data_tag_p) @@ -1783,6 +1798,477 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, mqrq->areq.err_check = mmc_blk_err_check; } +#define MMC_MAX_RETRIES 5 +#define MMC_NO_RETRIES (MMC_MAX_RETRIES + 1) + +#define MMC_READ_SINGLE_RETRIES 2 + +/* Single sector read during recovery */ +static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_request *mrq = &mqrq->brq.mrq; + struct mmc_card *card = mq->card; + struct mmc_host *host = card->host; + blk_status_t error = BLK_STS_OK; + int retries = 0; + + do { + u32 status; + int err; + + mmc_blk_rw_rq_prep(mqrq, card, 1, mq); + + mmc_wait_for_req(host, mrq); + + err = mmc_send_status(card, &status); + if (err) + goto error_exit; + + if (!mmc_host_is_spi(host) && + R1_CURRENT_STATE(status) != R1_STATE_TRAN) { + u32 stop_status = 0; + bool gen_err = false; + + err = send_stop(card, + DIV_ROUND_UP(mrq->data->timeout_ns, + 1000000), + req, &gen_err, &stop_status); + if (err) + goto error_exit; + } + + if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES) + continue; + + retries = 0; + + if (mrq->cmd->error || + mrq->data->error || + (!mmc_host_is_spi(host) && + (mrq->cmd->resp[0] & CMD_ERRORS || status & CMD_ERRORS))) + error = BLK_STS_IOERR; + else + error = BLK_STS_OK; + + } while (blk_update_request(req, error, 512)); + + return; + +error_exit: + mrq->data->bytes_xfered = 0; + blk_update_request(req, BLK_STS_IOERR, 512); + /* Let it try the remaining request again */ + if (mqrq->retries > MMC_MAX_RETRIES - 1) + mqrq->retries = MMC_MAX_RETRIES - 1; +} + +static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) +{ + int type = rq_data_dir(req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_blk_request *brq = &mqrq->brq; + struct mmc_blk_data *md = mq->blkdata; + struct mmc_card *card = mq->card; + static enum mmc_blk_status status; + + brq->retune_retry_done = mqrq->retries; + + status = __mmc_blk_err_check(card, mqrq); + + mmc_retune_release(card->host); + + /* + * Requests are completed by mmc_blk_mq_complete_rq() which sets simple + * policy: + * 1. A request that has transferred at least some data is considered + * successful and will be requeued if there is remaining data to + * transfer. + * 2. Otherwise the number of retries is incremented and the request + * will be requeued if there are remaining retries. + * 3. Otherwise the request will be errored out. + * That means mmc_blk_mq_complete_rq() is controlled by bytes_xfered and + * mqrq->retries. So there are only 4 possible actions here: + * 1. do not accept the bytes_xfered value i.e. set it to zero + * 2. change mqrq->retries to determine the number of retries + * 3. try to reset the card + * 4. read one sector at a time + */ + switch (status) { + case MMC_BLK_SUCCESS: + case MMC_BLK_PARTIAL: + /* Reset success, and accept bytes_xfered */ + mmc_blk_reset_success(md, type); + break; + case MMC_BLK_CMD_ERR: + /* + * For SD cards, get bytes written, but do not accept + * bytes_xfered if that fails. For MMC cards accept + * bytes_xfered. Then try to reset. If reset fails then + * error out the remaining request, otherwise retry + * once (N.B mmc_blk_reset() will not succeed twice in a + * row). + */ + if (mmc_card_sd(card)) { + u32 blocks; + int err; + + err = mmc_sd_num_wr_blocks(card, &blocks); + if (err) + brq->data.bytes_xfered = 0; + else + brq->data.bytes_xfered = blocks << 9; + } + if (mmc_blk_reset(md, card->host, type)) + mqrq->retries = MMC_NO_RETRIES; + else + mqrq->retries = MMC_MAX_RETRIES - 1; + break; + case MMC_BLK_RETRY: + /* + * Do not accept bytes_xfered, but retry up to 5 times, + * otherwise same as abort. + */ + brq->data.bytes_xfered = 0; + if (mqrq->retries < MMC_MAX_RETRIES) + break; + /* Fall through */ + case MMC_BLK_ABORT: + /* + * Do not accept bytes_xfered, but try to reset. If + * reset succeeds, try once more, otherwise error out + * the request. + */ + brq->data.bytes_xfered = 0; + if (mmc_blk_reset(md, card->host, type)) + mqrq->retries = MMC_NO_RETRIES; + else + mqrq->retries = MMC_MAX_RETRIES - 1; + break; + case MMC_BLK_DATA_ERR: { + int err; + + /* + * Do not accept bytes_xfered, but try to reset. If + * reset succeeds, try once more. If reset fails with + * ENODEV which means the partition is wrong, then error + * out the request. Otherwise attempt to read one sector + * at a time. + */ + brq->data.bytes_xfered = 0; + err = mmc_blk_reset(md, card->host, type); + if (!err) { + mqrq->retries = MMC_MAX_RETRIES - 1; + break; + } + if (err == -ENODEV) { + mqrq->retries = MMC_NO_RETRIES; + break; + } + /* Fall through */ + } + case MMC_BLK_ECC_ERR: + /* + * Do not accept bytes_xfered. If reading more than one + * sector, try reading one sector at a time. + */ + brq->data.bytes_xfered = 0; + /* FIXME: Missing single sector read for large sector size */ + if (brq->data.blocks > 1 && !mmc_large_sector(card)) { + /* Redo read one sector at a time */ + pr_warn("%s: retrying using single block read\n", + req->rq_disk->disk_name); + mmc_blk_read_single(mq, req); + } else { + mqrq->retries = MMC_NO_RETRIES; + } + break; + case MMC_BLK_NOMEDIUM: + /* Do not accept bytes_xfered. Error out the request */ + brq->data.bytes_xfered = 0; + mqrq->retries = MMC_NO_RETRIES; + break; + default: + /* Do not accept bytes_xfered. Error out the request */ + brq->data.bytes_xfered = 0; + mqrq->retries = MMC_NO_RETRIES; + pr_err("%s: Unhandled return value (%d)", + req->rq_disk->disk_name, status); + break; + } +} + +static void mmc_blk_mq_complete_rq(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + unsigned int nr_bytes = mqrq->brq.data.bytes_xfered; + + if (nr_bytes) { + if (blk_update_request(req, BLK_STS_OK, nr_bytes)) + blk_mq_requeue_request(req, true); + else + __blk_mq_end_request(req, BLK_STS_OK); + } else if (!blk_rq_bytes(req)) { + __blk_mq_end_request(req, BLK_STS_IOERR); + } else if (mqrq->retries++ < MMC_MAX_RETRIES) { + blk_mq_requeue_request(req, true); + } else { + if (mmc_card_removed(mq->card)) + req->rq_flags |= RQF_QUIET; + blk_mq_end_request(req, BLK_STS_IOERR); + } +} + +static bool mmc_blk_urgent_bkops_needed(struct mmc_queue *mq, + struct mmc_queue_req *mqrq) +{ + return mmc_card_mmc(mq->card) && !mmc_host_is_spi(mq->card->host) && + (mqrq->brq.cmd.resp[0] & R1_EXCEPTION_EVENT || + mqrq->brq.stop.resp[0] & R1_EXCEPTION_EVENT); +} + +static void mmc_blk_urgent_bkops(struct mmc_queue *mq, + struct mmc_queue_req *mqrq) +{ + if (mmc_blk_urgent_bkops_needed(mq, mqrq)) + mmc_start_bkops(mq->card, true); +} + +void mmc_blk_mq_complete(struct request *req) +{ + struct mmc_queue *mq = req->q->queuedata; + + mmc_blk_mq_complete_rq(mq, req); +} + +static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, + struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + + mmc_blk_mq_rw_recovery(mq, req); + + mmc_blk_urgent_bkops(mq, mqrq); +} + +static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req) +{ + struct request_queue *q = req->q; + unsigned long flags; + bool put_card; + + spin_lock_irqsave(q->queue_lock, flags); + + mq->in_flight[mmc_issue_type(mq, req)] -= 1; + + put_card = (mmc_tot_in_flight(mq) == 0); + + spin_unlock_irqrestore(q->queue_lock, flags); + + if (put_card) + mmc_put_card(mq->card, &mq->ctx); +} + +static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_request *mrq = &mqrq->brq.mrq; + struct mmc_host *host = mq->card->host; + + mmc_post_req(host, mrq, 0); + + blk_mq_complete_request(req); + + mmc_blk_mq_dec_in_flight(mq, req); +} + +static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq, + struct request **prev_req) +{ + mutex_lock(&mq->complete_lock); + + if (!mq->complete_req) + goto out_unlock; + + mmc_blk_mq_poll_completion(mq, mq->complete_req); + + if (prev_req) + *prev_req = mq->complete_req; + else + mmc_blk_mq_post_req(mq, mq->complete_req); + + mq->complete_req = NULL; + +out_unlock: + mutex_unlock(&mq->complete_lock); +} + +void mmc_blk_mq_complete_work(struct work_struct *work) +{ + struct mmc_queue *mq = container_of(work, struct mmc_queue, + complete_work); + + mmc_blk_mq_complete_prev_req(mq, NULL); +} + +static void mmc_blk_mq_req_done(struct mmc_request *mrq) +{ + struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req, + brq.mrq); + struct request *req = mmc_queue_req_to_req(mqrq); + struct request_queue *q = req->q; + struct mmc_queue *mq = q->queuedata; + unsigned long flags; + bool waiting; + + /* + * We cannot complete the request in this context, so record that there + * is a request to complete, and that a following request does not need + * to wait (although it does need to complete complete_req first). + */ + spin_lock_irqsave(q->queue_lock, flags); + mq->complete_req = req; + mq->rw_wait = false; + waiting = mq->waiting; + spin_unlock_irqrestore(q->queue_lock, flags); + + /* + * If 'waiting' then the waiting task will complete this request, + * otherwise queue a work to do it. Note that complete_work may still + * race with the dispatch of a following request. + */ + if (waiting) + wake_up(&mq->wait); + else + kblockd_schedule_work(&mq->complete_work); +} + +static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) +{ + struct request_queue *q = mq->queue; + unsigned long flags; + bool done; + + /* + * Wait while there is another request in progress. Also indicate that + * there is a request waiting to start. + */ + spin_lock_irqsave(q->queue_lock, flags); + done = !mq->rw_wait; + mq->waiting = !done; + spin_unlock_irqrestore(q->queue_lock, flags); + + return done; +} + +static int mmc_blk_rw_wait(struct mmc_queue *mq, struct request **prev_req) +{ + int err = 0; + + wait_event(mq->wait, mmc_blk_rw_wait_cond(mq, &err)); + + /* Always complete the previous request if there is one */ + mmc_blk_mq_complete_prev_req(mq, prev_req); + + return err; +} + +static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq, + struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_host *host = mq->card->host; + struct request *prev_req = NULL; + int err = 0; + + mmc_blk_rw_rq_prep(mqrq, mq->card, 0, mq); + + mqrq->brq.mrq.done = mmc_blk_mq_req_done; + + mmc_pre_req(host, &mqrq->brq.mrq); + + err = mmc_blk_rw_wait(mq, &prev_req); + if (err) + goto out_post_req; + + mq->rw_wait = true; + + err = mmc_start_request(host, &mqrq->brq.mrq); + + if (prev_req) + mmc_blk_mq_post_req(mq, prev_req); + + if (err) { + mq->rw_wait = false; + mmc_retune_release(host); + } + +out_post_req: + if (err) + mmc_post_req(host, &mqrq->brq.mrq, err); + + return err; +} + +static int mmc_blk_wait_for_idle(struct mmc_queue *mq, struct mmc_host *host) +{ + return mmc_blk_rw_wait(mq, NULL); +} + +enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) +{ + struct mmc_blk_data *md = mq->blkdata; + struct mmc_card *card = md->queue.card; + struct mmc_host *host = card->host; + int ret; + + ret = mmc_blk_part_switch(card, md->part_type); + if (ret) + return MMC_REQ_FAILED_TO_START; + + switch (mmc_issue_type(mq, req)) { + case MMC_ISSUE_SYNC: + ret = mmc_blk_wait_for_idle(mq, host); + if (ret) + return MMC_REQ_BUSY; + switch (req_op(req)) { + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + mmc_blk_issue_drv_op(mq, req); + break; + case REQ_OP_DISCARD: + mmc_blk_issue_discard_rq(mq, req); + break; + case REQ_OP_SECURE_ERASE: + mmc_blk_issue_secdiscard_rq(mq, req); + break; + case REQ_OP_FLUSH: + mmc_blk_issue_flush(mq, req); + break; + default: + WARN_ON_ONCE(1); + return MMC_REQ_FAILED_TO_START; + } + return MMC_REQ_FINISHED; + case MMC_ISSUE_ASYNC: + switch (req_op(req)) { + case REQ_OP_READ: + case REQ_OP_WRITE: + ret = mmc_blk_mq_issue_rw_rq(mq, req); + break; + default: + WARN_ON_ONCE(1); + ret = -EINVAL; + } + if (!ret) + return MMC_REQ_STARTED; + return ret == -EBUSY ? MMC_REQ_BUSY : MMC_REQ_FAILED_TO_START; + default: + WARN_ON_ONCE(1); + return MMC_REQ_FAILED_TO_START; + } +} + static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card, struct mmc_blk_request *brq, struct request *req, bool old_req_pending) diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h index 5946636101ef..6d34e87b18f6 100644 --- a/drivers/mmc/core/block.h +++ b/drivers/mmc/core/block.h @@ -7,4 +7,13 @@ struct request; void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req); +enum mmc_issued; + +enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req); +void mmc_blk_mq_complete(struct request *req); + +struct work_struct; + +void mmc_blk_mq_complete_work(struct work_struct *work); + #endif diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index ae6d9da68735..54bec4c6c9bd 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -22,6 +22,7 @@ #include "block.h" #include "core.h" #include "card.h" +#include "host.h" /* * Prepare a MMC request. This just filters out odd stuff. @@ -34,10 +35,25 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) return BLKPREP_KILL; req->rq_flags |= RQF_DONTPREP; + req_to_mmc_queue_req(req)->retries = 0; return BLKPREP_OK; } +enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req) +{ + if (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_WRITE) + return MMC_ISSUE_ASYNC; + + return MMC_ISSUE_SYNC; +} + +static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, + bool reserved) +{ + return BLK_EH_RESET_TIMER; +} + static int mmc_queue_thread(void *d) { struct mmc_queue *mq = d; @@ -154,11 +170,10 @@ static void mmc_queue_setup_discard(struct request_queue *q, * @req: the request * @gfp: memory allocation policy */ -static int mmc_init_request(struct request_queue *q, struct request *req, - gfp_t gfp) +static int __mmc_init_request(struct mmc_queue *mq, struct request *req, + gfp_t gfp) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); - struct mmc_queue *mq = q->queuedata; struct mmc_card *card = mq->card; struct mmc_host *host = card->host; @@ -169,6 +184,12 @@ static int mmc_init_request(struct request_queue *q, struct request *req, return 0; } +static int mmc_init_request(struct request_queue *q, struct request *req, + gfp_t gfp) +{ + return __mmc_init_request(q->queuedata, req, gfp); +} + static void mmc_exit_request(struct request_queue *q, struct request *req) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); @@ -177,6 +198,112 @@ static void mmc_exit_request(struct request_queue *q, struct request *req) mq_rq->sg = NULL; } +static int mmc_mq_init_request(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx, unsigned int numa_node) +{ + return __mmc_init_request(set->driver_data, req, GFP_KERNEL); +} + +static void mmc_mq_exit_request(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx) +{ + struct mmc_queue *mq = set->driver_data; + + mmc_exit_request(mq->queue, req); +} + +/* + * We use BLK_MQ_F_BLOCKING and have only 1 hardware queue, which means requests + * will not be dispatched in parallel. + */ +static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *req = bd->rq; + struct request_queue *q = req->q; + struct mmc_queue *mq = q->queuedata; + struct mmc_card *card = mq->card; + enum mmc_issue_type issue_type; + enum mmc_issued issued; + bool get_card; + int ret; + + if (mmc_card_removed(mq->card)) { + req->rq_flags |= RQF_QUIET; + return BLK_STS_IOERR; + } + + issue_type = mmc_issue_type(mq, req); + + spin_lock_irq(q->queue_lock); + + switch (issue_type) { + case MMC_ISSUE_ASYNC: + break; + default: + /* + * Timeouts are handled by mmc core, and we don't have a host + * API to abort requests, so we can't handle the timeout anyway. + * However, when the timeout happens, blk_mq_complete_request() + * no longer works (to stop the request disappearing under us). + * To avoid racing with that, set a large timeout. + */ + req->timeout = 600 * HZ; + break; + } + + mq->in_flight[issue_type] += 1; + get_card = (mmc_tot_in_flight(mq) == 1); + + spin_unlock_irq(q->queue_lock); + + if (!(req->rq_flags & RQF_DONTPREP)) { + req_to_mmc_queue_req(req)->retries = 0; + req->rq_flags |= RQF_DONTPREP; + } + + if (get_card) + mmc_get_card(card, &mq->ctx); + + blk_mq_start_request(req); + + issued = mmc_blk_mq_issue_rq(mq, req); + + switch (issued) { + case MMC_REQ_BUSY: + ret = BLK_STS_RESOURCE; + break; + case MMC_REQ_FAILED_TO_START: + ret = BLK_STS_IOERR; + break; + default: + ret = BLK_STS_OK; + break; + } + + if (issued != MMC_REQ_STARTED) { + bool put_card = false; + + spin_lock_irq(q->queue_lock); + mq->in_flight[issue_type] -= 1; + if (mmc_tot_in_flight(mq) == 0) + put_card = true; + spin_unlock_irq(q->queue_lock); + if (put_card) + mmc_put_card(card, &mq->ctx); + } + + return ret; +} + +static const struct blk_mq_ops mmc_mq_ops = { + .queue_rq = mmc_mq_queue_rq, + .init_request = mmc_mq_init_request, + .exit_request = mmc_mq_exit_request, + .complete = mmc_blk_mq_complete, + .timeout = mmc_mq_timed_out, +}; + static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) { struct mmc_host *host = card->host; @@ -198,6 +325,70 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) /* Initialize thread_sem even if it is not used */ sema_init(&mq->thread_sem, 1); + + INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work); + + mutex_init(&mq->complete_lock); + + init_waitqueue_head(&mq->wait); +} + +static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth, + const struct blk_mq_ops *mq_ops, spinlock_t *lock) +{ + int ret; + + memset(&mq->tag_set, 0, sizeof(mq->tag_set)); + mq->tag_set.ops = mq_ops; + mq->tag_set.queue_depth = q_depth; + mq->tag_set.numa_node = NUMA_NO_NODE; + mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE | + BLK_MQ_F_BLOCKING; + mq->tag_set.nr_hw_queues = 1; + mq->tag_set.cmd_size = sizeof(struct mmc_queue_req); + mq->tag_set.driver_data = mq; + + ret = blk_mq_alloc_tag_set(&mq->tag_set); + if (ret) + return ret; + + mq->queue = blk_mq_init_queue(&mq->tag_set); + if (IS_ERR(mq->queue)) { + ret = PTR_ERR(mq->queue); + goto free_tag_set; + } + + mq->queue->queue_lock = lock; + mq->queue->queuedata = mq; + + return 0; + +free_tag_set: + blk_mq_free_tag_set(&mq->tag_set); + + return ret; +} + +/* Set queue depth to get a reasonable value for q->nr_requests */ +#define MMC_QUEUE_DEPTH 64 + +static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card, + spinlock_t *lock) +{ + int q_depth; + int ret; + + q_depth = MMC_QUEUE_DEPTH; + + ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock); + if (ret) + return ret; + + blk_queue_rq_timeout(mq->queue, 60 * HZ); + + mmc_setup_queue(mq, card); + + return 0; } /** @@ -216,6 +407,10 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, int ret = -ENOMEM; mq->card = card; + + if (mmc_host_use_blk_mq(host)) + return mmc_mq_init(mq, card, lock); + mq->queue = blk_alloc_queue(GFP_KERNEL); if (!mq->queue) return -ENOMEM; @@ -251,11 +446,70 @@ cleanup_queue: return ret; } +static void mmc_mq_queue_suspend(struct mmc_queue *mq) +{ + blk_mq_quiesce_queue(mq->queue); + + /* + * The host remains claimed while there are outstanding requests, so + * simply claiming and releasing here ensures there are none. + */ + mmc_claim_host(mq->card->host); + mmc_release_host(mq->card->host); +} + +static void mmc_mq_queue_resume(struct mmc_queue *mq) +{ + blk_mq_unquiesce_queue(mq->queue); +} + +static void __mmc_queue_suspend(struct mmc_queue *mq) +{ + struct request_queue *q = mq->queue; + unsigned long flags; + + if (!mq->suspended) { + mq->suspended |= true; + + spin_lock_irqsave(q->queue_lock, flags); + blk_stop_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); + + down(&mq->thread_sem); + } +} + +static void __mmc_queue_resume(struct mmc_queue *mq) +{ + struct request_queue *q = mq->queue; + unsigned long flags; + + if (mq->suspended) { + mq->suspended = false; + + up(&mq->thread_sem); + + spin_lock_irqsave(q->queue_lock, flags); + blk_start_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); + } +} + void mmc_cleanup_queue(struct mmc_queue *mq) { struct request_queue *q = mq->queue; unsigned long flags; + if (q->mq_ops) { + /* + * The legacy code handled the possibility of being suspended, + * so do that here too. + */ + if (blk_queue_quiesced(q)) + blk_mq_unquiesce_queue(q); + goto out_cleanup; + } + /* Make sure the queue isn't suspended, as that will deadlock */ mmc_queue_resume(mq); @@ -268,8 +522,16 @@ void mmc_cleanup_queue(struct mmc_queue *mq) blk_start_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); +out_cleanup: blk_cleanup_queue(q); + /* + * A request can be completed before the next request, potentially + * leaving a complete_work with nothing to do. Such a work item might + * still be queued at this point. Flush it. + */ + flush_work(&mq->complete_work); + mq->card = NULL; } @@ -284,17 +546,11 @@ void mmc_cleanup_queue(struct mmc_queue *mq) void mmc_queue_suspend(struct mmc_queue *mq) { struct request_queue *q = mq->queue; - unsigned long flags; - - if (!mq->suspended) { - mq->suspended |= true; - - spin_lock_irqsave(q->queue_lock, flags); - blk_stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - down(&mq->thread_sem); - } + if (q->mq_ops) + mmc_mq_queue_suspend(mq); + else + __mmc_queue_suspend(mq); } /** @@ -304,17 +560,11 @@ void mmc_queue_suspend(struct mmc_queue *mq) void mmc_queue_resume(struct mmc_queue *mq) { struct request_queue *q = mq->queue; - unsigned long flags; - if (mq->suspended) { - mq->suspended = false; - - up(&mq->thread_sem); - - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } + if (q->mq_ops) + mmc_mq_queue_resume(mq); + else + __mmc_queue_resume(mq); } /* diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 547b457c4251..ce9249852f26 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -8,6 +8,19 @@ #include #include +enum mmc_issued { + MMC_REQ_STARTED, + MMC_REQ_BUSY, + MMC_REQ_FAILED_TO_START, + MMC_REQ_FINISHED, +}; + +enum mmc_issue_type { + MMC_ISSUE_SYNC, + MMC_ISSUE_ASYNC, + MMC_ISSUE_MAX, +}; + static inline struct mmc_queue_req *req_to_mmc_queue_req(struct request *rq) { return blk_mq_rq_to_pdu(rq); @@ -57,12 +70,15 @@ struct mmc_queue_req { int drv_op_result; void *drv_op_data; unsigned int ioc_count; + int retries; }; struct mmc_queue { struct mmc_card *card; struct task_struct *thread; struct semaphore thread_sem; + struct mmc_ctx ctx; + struct blk_mq_tag_set tag_set; bool suspended; bool asleep; struct mmc_blk_data *blkdata; @@ -74,6 +90,14 @@ struct mmc_queue { * associated mmc_queue_req data. */ int qcnt; + + int in_flight[MMC_ISSUE_MAX]; + bool rw_wait; + bool waiting; + wait_queue_head_t wait; + struct request *complete_req; + struct mutex complete_lock; + struct work_struct complete_work; }; extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *, @@ -84,4 +108,12 @@ extern void mmc_queue_resume(struct mmc_queue *); extern unsigned int mmc_queue_map_sg(struct mmc_queue *, struct mmc_queue_req *); +enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req); + +static inline int mmc_tot_in_flight(struct mmc_queue *mq) +{ + return mq->in_flight[MMC_ISSUE_SYNC] + + mq->in_flight[MMC_ISSUE_ASYNC]; +} + #endif -- cgit v1.2.3 From 1e8e55b67030c6a2fef893d428bdcd611f73705c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:04 +0200 Subject: mmc: block: Add CQE support Add CQE support to the block driver, including: - optionally using DCMD for flush requests - "manually" issuing discard requests - issuing read / write requests to the CQE - supporting block-layer timeouts - handling recovery - supporting re-tuning CQE offers 25% - 50% better random multi-threaded I/O. There is a slight (e.g. 2%) drop in sequential read speed but no observable change to sequential write. CQE automatically sends the commands to complete requests. However it only supports reads / writes and so-called "direct commands" (DCMD). Furthermore DCMD is limited to one command at a time, but discards require 3 commands. That makes issuing discards through CQE very awkward, but some CQE's don't support DCMD anyway. So for discards, the existing non-CQE approach is taken, where the mmc core code issues the 3 commands one at a time i.e. mmc_erase(). Where DCMD is used, is for issuing flushes. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 150 ++++++++++++++++++++++++++++++++++++++++++- drivers/mmc/core/block.h | 2 + drivers/mmc/core/queue.c | 162 +++++++++++++++++++++++++++++++++++++++++++++-- drivers/mmc/core/queue.h | 18 ++++++ 4 files changed, 326 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 7874c3bbf6b5..7275ac5d6799 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -112,6 +112,7 @@ struct mmc_blk_data { #define MMC_BLK_WRITE BIT(1) #define MMC_BLK_DISCARD BIT(2) #define MMC_BLK_SECDISCARD BIT(3) +#define MMC_BLK_CQE_RECOVERY BIT(4) /* * Only set in main mmc_blk_data associated @@ -1730,6 +1731,138 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, *do_data_tag_p = do_data_tag; } +#define MMC_CQE_RETRIES 2 + +static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_request *mrq = &mqrq->brq.mrq; + struct request_queue *q = req->q; + struct mmc_host *host = mq->card->host; + unsigned long flags; + bool put_card; + int err; + + mmc_cqe_post_req(host, mrq); + + if (mrq->cmd && mrq->cmd->error) + err = mrq->cmd->error; + else if (mrq->data && mrq->data->error) + err = mrq->data->error; + else + err = 0; + + if (err) { + if (mqrq->retries++ < MMC_CQE_RETRIES) + blk_mq_requeue_request(req, true); + else + blk_mq_end_request(req, BLK_STS_IOERR); + } else if (mrq->data) { + if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered)) + blk_mq_requeue_request(req, true); + else + __blk_mq_end_request(req, BLK_STS_OK); + } else { + blk_mq_end_request(req, BLK_STS_OK); + } + + spin_lock_irqsave(q->queue_lock, flags); + + mq->in_flight[mmc_issue_type(mq, req)] -= 1; + + put_card = (mmc_tot_in_flight(mq) == 0); + + mmc_cqe_check_busy(mq); + + spin_unlock_irqrestore(q->queue_lock, flags); + + if (!mq->cqe_busy) + blk_mq_run_hw_queues(q, true); + + if (put_card) + mmc_put_card(mq->card, &mq->ctx); +} + +void mmc_blk_cqe_recovery(struct mmc_queue *mq) +{ + struct mmc_card *card = mq->card; + struct mmc_host *host = card->host; + int err; + + pr_debug("%s: CQE recovery start\n", mmc_hostname(host)); + + err = mmc_cqe_recovery(host); + if (err) + mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY); + else + mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY); + + pr_debug("%s: CQE recovery done\n", mmc_hostname(host)); +} + +static void mmc_blk_cqe_req_done(struct mmc_request *mrq) +{ + struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req, + brq.mrq); + struct request *req = mmc_queue_req_to_req(mqrq); + struct request_queue *q = req->q; + struct mmc_queue *mq = q->queuedata; + + /* + * Block layer timeouts race with completions which means the normal + * completion path cannot be used during recovery. + */ + if (mq->in_recovery) + mmc_blk_cqe_complete_rq(mq, req); + else + blk_mq_complete_request(req); +} + +static int mmc_blk_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq) +{ + mrq->done = mmc_blk_cqe_req_done; + mrq->recovery_notifier = mmc_cqe_recovery_notifier; + + return mmc_cqe_start_req(host, mrq); +} + +static struct mmc_request *mmc_blk_cqe_prep_dcmd(struct mmc_queue_req *mqrq, + struct request *req) +{ + struct mmc_blk_request *brq = &mqrq->brq; + + memset(brq, 0, sizeof(*brq)); + + brq->mrq.cmd = &brq->cmd; + brq->mrq.tag = req->tag; + + return &brq->mrq; +} + +static int mmc_blk_cqe_issue_flush(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_request *mrq = mmc_blk_cqe_prep_dcmd(mqrq, req); + + mrq->cmd->opcode = MMC_SWITCH; + mrq->cmd->arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) | + (EXT_CSD_FLUSH_CACHE << 16) | + (1 << 8) | + EXT_CSD_CMD_SET_NORMAL; + mrq->cmd->flags = MMC_CMD_AC | MMC_RSP_R1B; + + return mmc_blk_cqe_start_req(mq->card->host, mrq); +} + +static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + + mmc_blk_data_prep(mq, mqrq, 0, NULL, NULL); + + return mmc_blk_cqe_start_req(mq->card->host, &mqrq->brq.mrq); +} + static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, int disable_multi, @@ -2038,7 +2171,10 @@ void mmc_blk_mq_complete(struct request *req) { struct mmc_queue *mq = req->q->queuedata; - mmc_blk_mq_complete_rq(mq, req); + if (mq->use_cqe) + mmc_blk_cqe_complete_rq(mq, req); + else + mmc_blk_mq_complete_rq(mq, req); } static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, @@ -2212,6 +2348,9 @@ out_post_req: static int mmc_blk_wait_for_idle(struct mmc_queue *mq, struct mmc_host *host) { + if (mq->use_cqe) + return host->cqe_ops->cqe_wait_for_idle(host); + return mmc_blk_rw_wait(mq, NULL); } @@ -2250,11 +2389,18 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) return MMC_REQ_FAILED_TO_START; } return MMC_REQ_FINISHED; + case MMC_ISSUE_DCMD: case MMC_ISSUE_ASYNC: switch (req_op(req)) { + case REQ_OP_FLUSH: + ret = mmc_blk_cqe_issue_flush(mq, req); + break; case REQ_OP_READ: case REQ_OP_WRITE: - ret = mmc_blk_mq_issue_rw_rq(mq, req); + if (mq->use_cqe) + ret = mmc_blk_cqe_issue_rw_rq(mq, req); + else + ret = mmc_blk_mq_issue_rw_rq(mq, req); break; default: WARN_ON_ONCE(1); diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h index 6d34e87b18f6..f472ce5d5647 100644 --- a/drivers/mmc/core/block.h +++ b/drivers/mmc/core/block.h @@ -7,6 +7,8 @@ struct request; void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req); +void mmc_blk_cqe_recovery(struct mmc_queue *mq); + enum mmc_issued; enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 54bec4c6c9bd..8d632d2f5199 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -40,18 +40,142 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) return BLKPREP_OK; } +static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) +{ + /* Allow only 1 DCMD at a time */ + return mq->in_flight[MMC_ISSUE_DCMD]; +} + +void mmc_cqe_check_busy(struct mmc_queue *mq) +{ + if ((mq->cqe_busy & MMC_CQE_DCMD_BUSY) && !mmc_cqe_dcmd_busy(mq)) + mq->cqe_busy &= ~MMC_CQE_DCMD_BUSY; + + mq->cqe_busy &= ~MMC_CQE_QUEUE_FULL; +} + +static inline bool mmc_cqe_can_dcmd(struct mmc_host *host) +{ + return host->caps2 & MMC_CAP2_CQE_DCMD; +} + +enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host, + struct request *req) +{ + switch (req_op(req)) { + case REQ_OP_DRV_IN: + case REQ_OP_DRV_OUT: + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + return MMC_ISSUE_SYNC; + case REQ_OP_FLUSH: + return mmc_cqe_can_dcmd(host) ? MMC_ISSUE_DCMD : MMC_ISSUE_SYNC; + default: + return MMC_ISSUE_ASYNC; + } +} + enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req) { + struct mmc_host *host = mq->card->host; + + if (mq->use_cqe) + return mmc_cqe_issue_type(host, req); + if (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_WRITE) return MMC_ISSUE_ASYNC; return MMC_ISSUE_SYNC; } +static void __mmc_cqe_recovery_notifier(struct mmc_queue *mq) +{ + if (!mq->recovery_needed) { + mq->recovery_needed = true; + schedule_work(&mq->recovery_work); + } +} + +void mmc_cqe_recovery_notifier(struct mmc_request *mrq) +{ + struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req, + brq.mrq); + struct request *req = mmc_queue_req_to_req(mqrq); + struct request_queue *q = req->q; + struct mmc_queue *mq = q->queuedata; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __mmc_cqe_recovery_notifier(mq); + spin_unlock_irqrestore(q->queue_lock, flags); +} + +static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_request *mrq = &mqrq->brq.mrq; + struct mmc_queue *mq = req->q->queuedata; + struct mmc_host *host = mq->card->host; + enum mmc_issue_type issue_type = mmc_issue_type(mq, req); + bool recovery_needed = false; + + switch (issue_type) { + case MMC_ISSUE_ASYNC: + case MMC_ISSUE_DCMD: + if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) { + if (recovery_needed) + __mmc_cqe_recovery_notifier(mq); + return BLK_EH_RESET_TIMER; + } + /* No timeout */ + return BLK_EH_HANDLED; + default: + /* Timeout is handled by mmc core */ + return BLK_EH_RESET_TIMER; + } +} + static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, bool reserved) { - return BLK_EH_RESET_TIMER; + struct request_queue *q = req->q; + struct mmc_queue *mq = q->queuedata; + unsigned long flags; + int ret; + + spin_lock_irqsave(q->queue_lock, flags); + + if (mq->recovery_needed || !mq->use_cqe) + ret = BLK_EH_RESET_TIMER; + else + ret = mmc_cqe_timed_out(req); + + spin_unlock_irqrestore(q->queue_lock, flags); + + return ret; +} + +static void mmc_mq_recovery_handler(struct work_struct *work) +{ + struct mmc_queue *mq = container_of(work, struct mmc_queue, + recovery_work); + struct request_queue *q = mq->queue; + + mmc_get_card(mq->card, &mq->ctx); + + mq->in_recovery = true; + + mmc_blk_cqe_recovery(mq); + + mq->in_recovery = false; + + spin_lock_irq(q->queue_lock); + mq->recovery_needed = false; + spin_unlock_irq(q->queue_lock); + + mmc_put_card(mq->card, &mq->ctx); + + blk_mq_run_hw_queues(q, true); } static int mmc_queue_thread(void *d) @@ -223,9 +347,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, struct request_queue *q = req->q; struct mmc_queue *mq = q->queuedata; struct mmc_card *card = mq->card; + struct mmc_host *host = card->host; enum mmc_issue_type issue_type; enum mmc_issued issued; - bool get_card; + bool get_card, cqe_retune_ok; int ret; if (mmc_card_removed(mq->card)) { @@ -237,7 +362,19 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(q->queue_lock); + if (mq->recovery_needed) { + spin_unlock_irq(q->queue_lock); + return BLK_STS_RESOURCE; + } + switch (issue_type) { + case MMC_ISSUE_DCMD: + if (mmc_cqe_dcmd_busy(mq)) { + mq->cqe_busy |= MMC_CQE_DCMD_BUSY; + spin_unlock_irq(q->queue_lock); + return BLK_STS_RESOURCE; + } + break; case MMC_ISSUE_ASYNC: break; default: @@ -254,6 +391,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, mq->in_flight[issue_type] += 1; get_card = (mmc_tot_in_flight(mq) == 1); + cqe_retune_ok = (mmc_cqe_qcnt(mq) == 1); spin_unlock_irq(q->queue_lock); @@ -265,6 +403,11 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx, if (get_card) mmc_get_card(card, &mq->ctx); + if (mq->use_cqe) { + host->retune_now = host->need_retune && cqe_retune_ok && + !host->hold_retune; + } + blk_mq_start_request(req); issued = mmc_blk_mq_issue_rq(mq, req); @@ -326,6 +469,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) /* Initialize thread_sem even if it is not used */ sema_init(&mq->thread_sem, 1); + INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work); mutex_init(&mq->complete_lock); @@ -375,10 +519,18 @@ free_tag_set: static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock) { + struct mmc_host *host = card->host; int q_depth; int ret; - q_depth = MMC_QUEUE_DEPTH; + /* + * The queue depth for CQE must match the hardware because the request + * tag is used to index the hardware queue. + */ + if (mq->use_cqe) + q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth); + else + q_depth = MMC_QUEUE_DEPTH; ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock); if (ret) @@ -408,7 +560,9 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, mq->card = card; - if (mmc_host_use_blk_mq(host)) + mq->use_cqe = host->cqe_enabled; + + if (mq->use_cqe || mmc_host_use_blk_mq(host)) return mmc_mq_init(mq, card, lock); mq->queue = blk_alloc_queue(GFP_KERNEL); diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index ce9249852f26..1d7d3b0afff8 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -17,6 +17,7 @@ enum mmc_issued { enum mmc_issue_type { MMC_ISSUE_SYNC, + MMC_ISSUE_DCMD, MMC_ISSUE_ASYNC, MMC_ISSUE_MAX, }; @@ -92,8 +93,15 @@ struct mmc_queue { int qcnt; int in_flight[MMC_ISSUE_MAX]; + unsigned int cqe_busy; +#define MMC_CQE_DCMD_BUSY BIT(0) +#define MMC_CQE_QUEUE_FULL BIT(1) + bool use_cqe; + bool recovery_needed; + bool in_recovery; bool rw_wait; bool waiting; + struct work_struct recovery_work; wait_queue_head_t wait; struct request *complete_req; struct mutex complete_lock; @@ -108,11 +116,21 @@ extern void mmc_queue_resume(struct mmc_queue *); extern unsigned int mmc_queue_map_sg(struct mmc_queue *, struct mmc_queue_req *); +void mmc_cqe_check_busy(struct mmc_queue *mq); +void mmc_cqe_recovery_notifier(struct mmc_request *mrq); + enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req); static inline int mmc_tot_in_flight(struct mmc_queue *mq) { return mq->in_flight[MMC_ISSUE_SYNC] + + mq->in_flight[MMC_ISSUE_DCMD] + + mq->in_flight[MMC_ISSUE_ASYNC]; +} + +static inline int mmc_cqe_qcnt(struct mmc_queue *mq) +{ + return mq->in_flight[MMC_ISSUE_DCMD] + mq->in_flight[MMC_ISSUE_ASYNC]; } -- cgit v1.2.3 From a4080225f51dcea129d26185a35acfbb3770a32d Mon Sep 17 00:00:00 2001 From: Venkat Gopalakrishnan Date: Wed, 29 Nov 2017 15:41:05 +0200 Subject: mmc: cqhci: support for command queue enabled host This patch adds CMDQ support for command-queue compatible hosts. Command queue is added in eMMC-5.1 specification. This enables the controller to process upto 32 requests at a time. Adrian Hunter contributed renaming to cqhci, recovery, suspend and resume, cqhci_off, cqhci_wait_for_idle, and external timeout handling. Signed-off-by: Asutosh Das Signed-off-by: Sujit Reddy Thumma Signed-off-by: Konstantin Dorfman Signed-off-by: Venkat Gopalakrishnan Signed-off-by: Subhash Jadavani Signed-off-by: Ritesh Harjani Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/host/Kconfig | 13 + drivers/mmc/host/Makefile | 1 + drivers/mmc/host/cqhci.c | 1150 +++++++++++++++++++++++++++++++++++++++++++++ drivers/mmc/host/cqhci.h | 240 ++++++++++ 4 files changed, 1404 insertions(+) create mode 100644 drivers/mmc/host/cqhci.c create mode 100644 drivers/mmc/host/cqhci.h diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 567028c9219a..3092b7085cb5 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -857,6 +857,19 @@ config MMC_SUNXI This selects support for the SD/MMC Host Controller on Allwinner sunxi SoCs. +config MMC_CQHCI + tristate "Command Queue Host Controller Interface support" + depends on HAS_DMA + help + This selects the Command Queue Host Controller Interface (CQHCI) + support present in host controllers of Qualcomm Technologies, Inc + amongst others. + This controller supports eMMC devices with command queue support. + + If you have a controller with this interface, say Y or M here. + + If unsure, say N. + config MMC_TOSHIBA_PCI tristate "Toshiba Type A SD/MMC Card Interface Driver" depends on PCI diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index a43cf0d5a5d3..407a011026cd 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_MMC_SDHCI_ST) += sdhci-st.o obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32) += sdhci-pic32.o obj-$(CONFIG_MMC_SDHCI_BRCMSTB) += sdhci-brcmstb.o obj-$(CONFIG_MMC_SDHCI_OMAP) += sdhci-omap.o +obj-$(CONFIG_MMC_CQHCI) += cqhci.o ifeq ($(CONFIG_CB710_DEBUG),y) CFLAGS-cb710-mmc += -DDEBUG diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c new file mode 100644 index 000000000000..159270e947cf --- /dev/null +++ b/drivers/mmc/host/cqhci.c @@ -0,0 +1,1150 @@ +/* Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cqhci.h" + +#define DCMD_SLOT 31 +#define NUM_SLOTS 32 + +struct cqhci_slot { + struct mmc_request *mrq; + unsigned int flags; +#define CQHCI_EXTERNAL_TIMEOUT BIT(0) +#define CQHCI_COMPLETED BIT(1) +#define CQHCI_HOST_CRC BIT(2) +#define CQHCI_HOST_TIMEOUT BIT(3) +#define CQHCI_HOST_OTHER BIT(4) +}; + +static inline u8 *get_desc(struct cqhci_host *cq_host, u8 tag) +{ + return cq_host->desc_base + (tag * cq_host->slot_sz); +} + +static inline u8 *get_link_desc(struct cqhci_host *cq_host, u8 tag) +{ + u8 *desc = get_desc(cq_host, tag); + + return desc + cq_host->task_desc_len; +} + +static inline dma_addr_t get_trans_desc_dma(struct cqhci_host *cq_host, u8 tag) +{ + return cq_host->trans_desc_dma_base + + (cq_host->mmc->max_segs * tag * + cq_host->trans_desc_len); +} + +static inline u8 *get_trans_desc(struct cqhci_host *cq_host, u8 tag) +{ + return cq_host->trans_desc_base + + (cq_host->trans_desc_len * cq_host->mmc->max_segs * tag); +} + +static void setup_trans_desc(struct cqhci_host *cq_host, u8 tag) +{ + u8 *link_temp; + dma_addr_t trans_temp; + + link_temp = get_link_desc(cq_host, tag); + trans_temp = get_trans_desc_dma(cq_host, tag); + + memset(link_temp, 0, cq_host->link_desc_len); + if (cq_host->link_desc_len > 8) + *(link_temp + 8) = 0; + + if (tag == DCMD_SLOT && (cq_host->mmc->caps2 & MMC_CAP2_CQE_DCMD)) { + *link_temp = CQHCI_VALID(0) | CQHCI_ACT(0) | CQHCI_END(1); + return; + } + + *link_temp = CQHCI_VALID(1) | CQHCI_ACT(0x6) | CQHCI_END(0); + + if (cq_host->dma64) { + __le64 *data_addr = (__le64 __force *)(link_temp + 4); + + data_addr[0] = cpu_to_le64(trans_temp); + } else { + __le32 *data_addr = (__le32 __force *)(link_temp + 4); + + data_addr[0] = cpu_to_le32(trans_temp); + } +} + +static void cqhci_set_irqs(struct cqhci_host *cq_host, u32 set) +{ + cqhci_writel(cq_host, set, CQHCI_ISTE); + cqhci_writel(cq_host, set, CQHCI_ISGE); +} + +#define DRV_NAME "cqhci" + +#define CQHCI_DUMP(f, x...) \ + pr_err("%s: " DRV_NAME ": " f, mmc_hostname(mmc), ## x) + +static void cqhci_dumpregs(struct cqhci_host *cq_host) +{ + struct mmc_host *mmc = cq_host->mmc; + + CQHCI_DUMP("============ CQHCI REGISTER DUMP ===========\n"); + + CQHCI_DUMP("Caps: 0x%08x | Version: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_CAP), + cqhci_readl(cq_host, CQHCI_VER)); + CQHCI_DUMP("Config: 0x%08x | Control: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_CFG), + cqhci_readl(cq_host, CQHCI_CTL)); + CQHCI_DUMP("Int stat: 0x%08x | Int enab: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_IS), + cqhci_readl(cq_host, CQHCI_ISTE)); + CQHCI_DUMP("Int sig: 0x%08x | Int Coal: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_ISGE), + cqhci_readl(cq_host, CQHCI_IC)); + CQHCI_DUMP("TDL base: 0x%08x | TDL up32: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_TDLBA), + cqhci_readl(cq_host, CQHCI_TDLBAU)); + CQHCI_DUMP("Doorbell: 0x%08x | TCN: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_TDBR), + cqhci_readl(cq_host, CQHCI_TCN)); + CQHCI_DUMP("Dev queue: 0x%08x | Dev Pend: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_DQS), + cqhci_readl(cq_host, CQHCI_DPT)); + CQHCI_DUMP("Task clr: 0x%08x | SSC1: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_TCLR), + cqhci_readl(cq_host, CQHCI_SSC1)); + CQHCI_DUMP("SSC2: 0x%08x | DCMD rsp: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_SSC2), + cqhci_readl(cq_host, CQHCI_CRDCT)); + CQHCI_DUMP("RED mask: 0x%08x | TERRI: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_RMEM), + cqhci_readl(cq_host, CQHCI_TERRI)); + CQHCI_DUMP("Resp idx: 0x%08x | Resp arg: 0x%08x\n", + cqhci_readl(cq_host, CQHCI_CRI), + cqhci_readl(cq_host, CQHCI_CRA)); + + if (cq_host->ops->dumpregs) + cq_host->ops->dumpregs(mmc); + else + CQHCI_DUMP(": ===========================================\n"); +} + +/** + * The allocated descriptor table for task, link & transfer descritors + * looks like: + * |----------| + * |task desc | |->|----------| + * |----------| | |trans desc| + * |link desc-|->| |----------| + * |----------| . + * . . + * no. of slots max-segs + * . |----------| + * |----------| + * The idea here is to create the [task+trans] table and mark & point the + * link desc to the transfer desc table on a per slot basis. + */ +static int cqhci_host_alloc_tdl(struct cqhci_host *cq_host) +{ + int i = 0; + + /* task descriptor can be 64/128 bit irrespective of arch */ + if (cq_host->caps & CQHCI_TASK_DESC_SZ_128) { + cqhci_writel(cq_host, cqhci_readl(cq_host, CQHCI_CFG) | + CQHCI_TASK_DESC_SZ, CQHCI_CFG); + cq_host->task_desc_len = 16; + } else { + cq_host->task_desc_len = 8; + } + + /* + * 96 bits length of transfer desc instead of 128 bits which means + * ADMA would expect next valid descriptor at the 96th bit + * or 128th bit + */ + if (cq_host->dma64) { + if (cq_host->quirks & CQHCI_QUIRK_SHORT_TXFR_DESC_SZ) + cq_host->trans_desc_len = 12; + else + cq_host->trans_desc_len = 16; + cq_host->link_desc_len = 16; + } else { + cq_host->trans_desc_len = 8; + cq_host->link_desc_len = 8; + } + + /* total size of a slot: 1 task & 1 transfer (link) */ + cq_host->slot_sz = cq_host->task_desc_len + cq_host->link_desc_len; + + cq_host->desc_size = cq_host->slot_sz * cq_host->num_slots; + + cq_host->data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs * + (cq_host->num_slots - 1); + + pr_debug("%s: cqhci: desc_size: %zu data_sz: %zu slot-sz: %d\n", + mmc_hostname(cq_host->mmc), cq_host->desc_size, cq_host->data_size, + cq_host->slot_sz); + + /* + * allocate a dma-mapped chunk of memory for the descriptors + * allocate a dma-mapped chunk of memory for link descriptors + * setup each link-desc memory offset per slot-number to + * the descriptor table. + */ + cq_host->desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc), + cq_host->desc_size, + &cq_host->desc_dma_base, + GFP_KERNEL); + cq_host->trans_desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc), + cq_host->data_size, + &cq_host->trans_desc_dma_base, + GFP_KERNEL); + if (!cq_host->desc_base || !cq_host->trans_desc_base) + return -ENOMEM; + + pr_debug("%s: cqhci: desc-base: 0x%p trans-base: 0x%p\n desc_dma 0x%llx trans_dma: 0x%llx\n", + mmc_hostname(cq_host->mmc), cq_host->desc_base, cq_host->trans_desc_base, + (unsigned long long)cq_host->desc_dma_base, + (unsigned long long)cq_host->trans_desc_dma_base); + + for (; i < (cq_host->num_slots); i++) + setup_trans_desc(cq_host, i); + + return 0; +} + +static void __cqhci_enable(struct cqhci_host *cq_host) +{ + struct mmc_host *mmc = cq_host->mmc; + u32 cqcfg; + + cqcfg = cqhci_readl(cq_host, CQHCI_CFG); + + /* Configuration must not be changed while enabled */ + if (cqcfg & CQHCI_ENABLE) { + cqcfg &= ~CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + } + + cqcfg &= ~(CQHCI_DCMD | CQHCI_TASK_DESC_SZ); + + if (mmc->caps2 & MMC_CAP2_CQE_DCMD) + cqcfg |= CQHCI_DCMD; + + if (cq_host->caps & CQHCI_TASK_DESC_SZ_128) + cqcfg |= CQHCI_TASK_DESC_SZ; + + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + + cqhci_writel(cq_host, lower_32_bits(cq_host->desc_dma_base), + CQHCI_TDLBA); + cqhci_writel(cq_host, upper_32_bits(cq_host->desc_dma_base), + CQHCI_TDLBAU); + + cqhci_writel(cq_host, cq_host->rca, CQHCI_SSC2); + + cqhci_set_irqs(cq_host, 0); + + cqcfg |= CQHCI_ENABLE; + + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + + mmc->cqe_on = true; + + if (cq_host->ops->enable) + cq_host->ops->enable(mmc); + + /* Ensure all writes are done before interrupts are enabled */ + wmb(); + + cqhci_set_irqs(cq_host, CQHCI_IS_MASK); + + cq_host->activated = true; +} + +static void __cqhci_disable(struct cqhci_host *cq_host) +{ + u32 cqcfg; + + cqcfg = cqhci_readl(cq_host, CQHCI_CFG); + cqcfg &= ~CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + + cq_host->mmc->cqe_on = false; + + cq_host->activated = false; +} + +int cqhci_suspend(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + + if (cq_host->enabled) + __cqhci_disable(cq_host); + + return 0; +} +EXPORT_SYMBOL(cqhci_suspend); + +int cqhci_resume(struct mmc_host *mmc) +{ + /* Re-enable is done upon first request */ + return 0; +} +EXPORT_SYMBOL(cqhci_resume); + +static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + int err; + + if (cq_host->enabled) + return 0; + + cq_host->rca = card->rca; + + err = cqhci_host_alloc_tdl(cq_host); + if (err) + return err; + + __cqhci_enable(cq_host); + + cq_host->enabled = true; + +#ifdef DEBUG + cqhci_dumpregs(cq_host); +#endif + return 0; +} + +/* CQHCI is idle and should halt immediately, so set a small timeout */ +#define CQHCI_OFF_TIMEOUT 100 + +static void cqhci_off(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + ktime_t timeout; + bool timed_out; + u32 reg; + + if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt) + return; + + if (cq_host->ops->disable) + cq_host->ops->disable(mmc, false); + + cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL); + + timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT); + while (1) { + timed_out = ktime_compare(ktime_get(), timeout) > 0; + reg = cqhci_readl(cq_host, CQHCI_CTL); + if ((reg & CQHCI_HALT) || timed_out) + break; + } + + if (timed_out) + pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc)); + else + pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc)); + + mmc->cqe_on = false; +} + +static void cqhci_disable(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + + if (!cq_host->enabled) + return; + + cqhci_off(mmc); + + __cqhci_disable(cq_host); + + dmam_free_coherent(mmc_dev(mmc), cq_host->data_size, + cq_host->trans_desc_base, + cq_host->trans_desc_dma_base); + + dmam_free_coherent(mmc_dev(mmc), cq_host->desc_size, + cq_host->desc_base, + cq_host->desc_dma_base); + + cq_host->trans_desc_base = NULL; + cq_host->desc_base = NULL; + + cq_host->enabled = false; +} + +static void cqhci_prep_task_desc(struct mmc_request *mrq, + u64 *data, bool intr) +{ + u32 req_flags = mrq->data->flags; + + *data = CQHCI_VALID(1) | + CQHCI_END(1) | + CQHCI_INT(intr) | + CQHCI_ACT(0x5) | + CQHCI_FORCED_PROG(!!(req_flags & MMC_DATA_FORCED_PRG)) | + CQHCI_DATA_TAG(!!(req_flags & MMC_DATA_DAT_TAG)) | + CQHCI_DATA_DIR(!!(req_flags & MMC_DATA_READ)) | + CQHCI_PRIORITY(!!(req_flags & MMC_DATA_PRIO)) | + CQHCI_QBAR(!!(req_flags & MMC_DATA_QBR)) | + CQHCI_REL_WRITE(!!(req_flags & MMC_DATA_REL_WR)) | + CQHCI_BLK_COUNT(mrq->data->blocks) | + CQHCI_BLK_ADDR((u64)mrq->data->blk_addr); + + pr_debug("%s: cqhci: tag %d task descriptor 0x016%llx\n", + mmc_hostname(mrq->host), mrq->tag, (unsigned long long)*data); +} + +static int cqhci_dma_map(struct mmc_host *host, struct mmc_request *mrq) +{ + int sg_count; + struct mmc_data *data = mrq->data; + + if (!data) + return -EINVAL; + + sg_count = dma_map_sg(mmc_dev(host), data->sg, + data->sg_len, + (data->flags & MMC_DATA_WRITE) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (!sg_count) { + pr_err("%s: sg-len: %d\n", __func__, data->sg_len); + return -ENOMEM; + } + + return sg_count; +} + +static void cqhci_set_tran_desc(u8 *desc, dma_addr_t addr, int len, bool end, + bool dma64) +{ + __le32 *attr = (__le32 __force *)desc; + + *attr = (CQHCI_VALID(1) | + CQHCI_END(end ? 1 : 0) | + CQHCI_INT(0) | + CQHCI_ACT(0x4) | + CQHCI_DAT_LENGTH(len)); + + if (dma64) { + __le64 *dataddr = (__le64 __force *)(desc + 4); + + dataddr[0] = cpu_to_le64(addr); + } else { + __le32 *dataddr = (__le32 __force *)(desc + 4); + + dataddr[0] = cpu_to_le32(addr); + } +} + +static int cqhci_prep_tran_desc(struct mmc_request *mrq, + struct cqhci_host *cq_host, int tag) +{ + struct mmc_data *data = mrq->data; + int i, sg_count, len; + bool end = false; + bool dma64 = cq_host->dma64; + dma_addr_t addr; + u8 *desc; + struct scatterlist *sg; + + sg_count = cqhci_dma_map(mrq->host, mrq); + if (sg_count < 0) { + pr_err("%s: %s: unable to map sg lists, %d\n", + mmc_hostname(mrq->host), __func__, sg_count); + return sg_count; + } + + desc = get_trans_desc(cq_host, tag); + + for_each_sg(data->sg, sg, sg_count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + + if ((i+1) == sg_count) + end = true; + cqhci_set_tran_desc(desc, addr, len, end, dma64); + desc += cq_host->trans_desc_len; + } + + return 0; +} + +static void cqhci_prep_dcmd_desc(struct mmc_host *mmc, + struct mmc_request *mrq) +{ + u64 *task_desc = NULL; + u64 data = 0; + u8 resp_type; + u8 *desc; + __le64 *dataddr; + struct cqhci_host *cq_host = mmc->cqe_private; + u8 timing; + + if (!(mrq->cmd->flags & MMC_RSP_PRESENT)) { + resp_type = 0x0; + timing = 0x1; + } else { + if (mrq->cmd->flags & MMC_RSP_R1B) { + resp_type = 0x3; + timing = 0x0; + } else { + resp_type = 0x2; + timing = 0x1; + } + } + + task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot); + memset(task_desc, 0, cq_host->task_desc_len); + data |= (CQHCI_VALID(1) | + CQHCI_END(1) | + CQHCI_INT(1) | + CQHCI_QBAR(1) | + CQHCI_ACT(0x5) | + CQHCI_CMD_INDEX(mrq->cmd->opcode) | + CQHCI_CMD_TIMING(timing) | CQHCI_RESP_TYPE(resp_type)); + *task_desc |= data; + desc = (u8 *)task_desc; + pr_debug("%s: cqhci: dcmd: cmd: %d timing: %d resp: %d\n", + mmc_hostname(mmc), mrq->cmd->opcode, timing, resp_type); + dataddr = (__le64 __force *)(desc + 4); + dataddr[0] = cpu_to_le64((u64)mrq->cmd->arg); + +} + +static void cqhci_post_req(struct mmc_host *host, struct mmc_request *mrq) +{ + struct mmc_data *data = mrq->data; + + if (data) { + dma_unmap_sg(mmc_dev(host), data->sg, data->sg_len, + (data->flags & MMC_DATA_READ) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } +} + +static inline int cqhci_tag(struct mmc_request *mrq) +{ + return mrq->cmd ? DCMD_SLOT : mrq->tag; +} + +static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + int err = 0; + u64 data = 0; + u64 *task_desc = NULL; + int tag = cqhci_tag(mrq); + struct cqhci_host *cq_host = mmc->cqe_private; + unsigned long flags; + + if (!cq_host->enabled) { + pr_err("%s: cqhci: not enabled\n", mmc_hostname(mmc)); + return -EINVAL; + } + + /* First request after resume has to re-enable */ + if (!cq_host->activated) + __cqhci_enable(cq_host); + + if (!mmc->cqe_on) { + cqhci_writel(cq_host, 0, CQHCI_CTL); + mmc->cqe_on = true; + pr_debug("%s: cqhci: CQE on\n", mmc_hostname(mmc)); + if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) { + pr_err("%s: cqhci: CQE failed to exit halt state\n", + mmc_hostname(mmc)); + } + if (cq_host->ops->enable) + cq_host->ops->enable(mmc); + } + + if (mrq->data) { + task_desc = (__le64 __force *)get_desc(cq_host, tag); + cqhci_prep_task_desc(mrq, &data, 1); + *task_desc = cpu_to_le64(data); + err = cqhci_prep_tran_desc(mrq, cq_host, tag); + if (err) { + pr_err("%s: cqhci: failed to setup tx desc: %d\n", + mmc_hostname(mmc), err); + return err; + } + } else { + cqhci_prep_dcmd_desc(mmc, mrq); + } + + spin_lock_irqsave(&cq_host->lock, flags); + + if (cq_host->recovery_halt) { + err = -EBUSY; + goto out_unlock; + } + + cq_host->slot[tag].mrq = mrq; + cq_host->slot[tag].flags = 0; + + cq_host->qcnt += 1; + + cqhci_writel(cq_host, 1 << tag, CQHCI_TDBR); + if (!(cqhci_readl(cq_host, CQHCI_TDBR) & (1 << tag))) + pr_debug("%s: cqhci: doorbell not set for tag %d\n", + mmc_hostname(mmc), tag); +out_unlock: + spin_unlock_irqrestore(&cq_host->lock, flags); + + if (err) + cqhci_post_req(mmc, mrq); + + return err; +} + +static void cqhci_recovery_needed(struct mmc_host *mmc, struct mmc_request *mrq, + bool notify) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + + if (!cq_host->recovery_halt) { + cq_host->recovery_halt = true; + pr_debug("%s: cqhci: recovery needed\n", mmc_hostname(mmc)); + wake_up(&cq_host->wait_queue); + if (notify && mrq->recovery_notifier) + mrq->recovery_notifier(mrq); + } +} + +static unsigned int cqhci_error_flags(int error1, int error2) +{ + int error = error1 ? error1 : error2; + + switch (error) { + case -EILSEQ: + return CQHCI_HOST_CRC; + case -ETIMEDOUT: + return CQHCI_HOST_TIMEOUT; + default: + return CQHCI_HOST_OTHER; + } +} + +static void cqhci_error_irq(struct mmc_host *mmc, u32 status, int cmd_error, + int data_error) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + struct cqhci_slot *slot; + u32 terri; + int tag; + + spin_lock(&cq_host->lock); + + terri = cqhci_readl(cq_host, CQHCI_TERRI); + + pr_debug("%s: cqhci: error IRQ status: 0x%08x cmd error %d data error %d TERRI: 0x%08x\n", + mmc_hostname(mmc), status, cmd_error, data_error, terri); + + /* Forget about errors when recovery has already been triggered */ + if (cq_host->recovery_halt) + goto out_unlock; + + if (!cq_host->qcnt) { + WARN_ONCE(1, "%s: cqhci: error when idle. IRQ status: 0x%08x cmd error %d data error %d TERRI: 0x%08x\n", + mmc_hostname(mmc), status, cmd_error, data_error, + terri); + goto out_unlock; + } + + if (CQHCI_TERRI_C_VALID(terri)) { + tag = CQHCI_TERRI_C_TASK(terri); + slot = &cq_host->slot[tag]; + if (slot->mrq) { + slot->flags = cqhci_error_flags(cmd_error, data_error); + cqhci_recovery_needed(mmc, slot->mrq, true); + } + } + + if (CQHCI_TERRI_D_VALID(terri)) { + tag = CQHCI_TERRI_D_TASK(terri); + slot = &cq_host->slot[tag]; + if (slot->mrq) { + slot->flags = cqhci_error_flags(data_error, cmd_error); + cqhci_recovery_needed(mmc, slot->mrq, true); + } + } + + if (!cq_host->recovery_halt) { + /* + * The only way to guarantee forward progress is to mark at + * least one task in error, so if none is indicated, pick one. + */ + for (tag = 0; tag < NUM_SLOTS; tag++) { + slot = &cq_host->slot[tag]; + if (!slot->mrq) + continue; + slot->flags = cqhci_error_flags(data_error, cmd_error); + cqhci_recovery_needed(mmc, slot->mrq, true); + break; + } + } + +out_unlock: + spin_unlock(&cq_host->lock); +} + +static void cqhci_finish_mrq(struct mmc_host *mmc, unsigned int tag) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + struct cqhci_slot *slot = &cq_host->slot[tag]; + struct mmc_request *mrq = slot->mrq; + struct mmc_data *data; + + if (!mrq) { + WARN_ONCE(1, "%s: cqhci: spurious TCN for tag %d\n", + mmc_hostname(mmc), tag); + return; + } + + /* No completions allowed during recovery */ + if (cq_host->recovery_halt) { + slot->flags |= CQHCI_COMPLETED; + return; + } + + slot->mrq = NULL; + + cq_host->qcnt -= 1; + + data = mrq->data; + if (data) { + if (data->error) + data->bytes_xfered = 0; + else + data->bytes_xfered = data->blksz * data->blocks; + } + + mmc_cqe_request_done(mmc, mrq); +} + +irqreturn_t cqhci_irq(struct mmc_host *mmc, u32 intmask, int cmd_error, + int data_error) +{ + u32 status; + unsigned long tag = 0, comp_status; + struct cqhci_host *cq_host = mmc->cqe_private; + + status = cqhci_readl(cq_host, CQHCI_IS); + cqhci_writel(cq_host, status, CQHCI_IS); + + pr_debug("%s: cqhci: IRQ status: 0x%08x\n", mmc_hostname(mmc), status); + + if ((status & CQHCI_IS_RED) || cmd_error || data_error) + cqhci_error_irq(mmc, status, cmd_error, data_error); + + if (status & CQHCI_IS_TCC) { + /* read TCN and complete the request */ + comp_status = cqhci_readl(cq_host, CQHCI_TCN); + cqhci_writel(cq_host, comp_status, CQHCI_TCN); + pr_debug("%s: cqhci: TCN: 0x%08lx\n", + mmc_hostname(mmc), comp_status); + + spin_lock(&cq_host->lock); + + for_each_set_bit(tag, &comp_status, cq_host->num_slots) { + /* complete the corresponding mrq */ + pr_debug("%s: cqhci: completing tag %lu\n", + mmc_hostname(mmc), tag); + cqhci_finish_mrq(mmc, tag); + } + + if (cq_host->waiting_for_idle && !cq_host->qcnt) { + cq_host->waiting_for_idle = false; + wake_up(&cq_host->wait_queue); + } + + spin_unlock(&cq_host->lock); + } + + if (status & CQHCI_IS_TCL) + wake_up(&cq_host->wait_queue); + + if (status & CQHCI_IS_HAC) + wake_up(&cq_host->wait_queue); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL(cqhci_irq); + +static bool cqhci_is_idle(struct cqhci_host *cq_host, int *ret) +{ + unsigned long flags; + bool is_idle; + + spin_lock_irqsave(&cq_host->lock, flags); + is_idle = !cq_host->qcnt || cq_host->recovery_halt; + *ret = cq_host->recovery_halt ? -EBUSY : 0; + cq_host->waiting_for_idle = !is_idle; + spin_unlock_irqrestore(&cq_host->lock, flags); + + return is_idle; +} + +static int cqhci_wait_for_idle(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + int ret; + + wait_event(cq_host->wait_queue, cqhci_is_idle(cq_host, &ret)); + + return ret; +} + +static bool cqhci_timeout(struct mmc_host *mmc, struct mmc_request *mrq, + bool *recovery_needed) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + int tag = cqhci_tag(mrq); + struct cqhci_slot *slot = &cq_host->slot[tag]; + unsigned long flags; + bool timed_out; + + spin_lock_irqsave(&cq_host->lock, flags); + timed_out = slot->mrq == mrq; + if (timed_out) { + slot->flags |= CQHCI_EXTERNAL_TIMEOUT; + cqhci_recovery_needed(mmc, mrq, false); + *recovery_needed = cq_host->recovery_halt; + } + spin_unlock_irqrestore(&cq_host->lock, flags); + + if (timed_out) { + pr_err("%s: cqhci: timeout for tag %d\n", + mmc_hostname(mmc), tag); + cqhci_dumpregs(cq_host); + } + + return timed_out; +} + +static bool cqhci_tasks_cleared(struct cqhci_host *cq_host) +{ + return !(cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_CLEAR_ALL_TASKS); +} + +static bool cqhci_clear_all_tasks(struct mmc_host *mmc, unsigned int timeout) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + bool ret; + u32 ctl; + + cqhci_set_irqs(cq_host, CQHCI_IS_TCL); + + ctl = cqhci_readl(cq_host, CQHCI_CTL); + ctl |= CQHCI_CLEAR_ALL_TASKS; + cqhci_writel(cq_host, ctl, CQHCI_CTL); + + wait_event_timeout(cq_host->wait_queue, cqhci_tasks_cleared(cq_host), + msecs_to_jiffies(timeout) + 1); + + cqhci_set_irqs(cq_host, 0); + + ret = cqhci_tasks_cleared(cq_host); + + if (!ret) + pr_debug("%s: cqhci: Failed to clear tasks\n", + mmc_hostname(mmc)); + + return ret; +} + +static bool cqhci_halted(struct cqhci_host *cq_host) +{ + return cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT; +} + +static bool cqhci_halt(struct mmc_host *mmc, unsigned int timeout) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + bool ret; + u32 ctl; + + if (cqhci_halted(cq_host)) + return true; + + cqhci_set_irqs(cq_host, CQHCI_IS_HAC); + + ctl = cqhci_readl(cq_host, CQHCI_CTL); + ctl |= CQHCI_HALT; + cqhci_writel(cq_host, ctl, CQHCI_CTL); + + wait_event_timeout(cq_host->wait_queue, cqhci_halted(cq_host), + msecs_to_jiffies(timeout) + 1); + + cqhci_set_irqs(cq_host, 0); + + ret = cqhci_halted(cq_host); + + if (!ret) + pr_debug("%s: cqhci: Failed to halt\n", mmc_hostname(mmc)); + + return ret; +} + +/* + * After halting we expect to be able to use the command line. We interpret the + * failure to halt to mean the data lines might still be in use (and the upper + * layers will need to send a STOP command), so we set the timeout based on a + * generous command timeout. + */ +#define CQHCI_START_HALT_TIMEOUT 5 + +static void cqhci_recovery_start(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + + pr_debug("%s: cqhci: %s\n", mmc_hostname(mmc), __func__); + + WARN_ON(!cq_host->recovery_halt); + + cqhci_halt(mmc, CQHCI_START_HALT_TIMEOUT); + + if (cq_host->ops->disable) + cq_host->ops->disable(mmc, true); + + mmc->cqe_on = false; +} + +static int cqhci_error_from_flags(unsigned int flags) +{ + if (!flags) + return 0; + + /* CRC errors might indicate re-tuning so prefer to report that */ + if (flags & CQHCI_HOST_CRC) + return -EILSEQ; + + if (flags & (CQHCI_EXTERNAL_TIMEOUT | CQHCI_HOST_TIMEOUT)) + return -ETIMEDOUT; + + return -EIO; +} + +static void cqhci_recover_mrq(struct cqhci_host *cq_host, unsigned int tag) +{ + struct cqhci_slot *slot = &cq_host->slot[tag]; + struct mmc_request *mrq = slot->mrq; + struct mmc_data *data; + + if (!mrq) + return; + + slot->mrq = NULL; + + cq_host->qcnt -= 1; + + data = mrq->data; + if (data) { + data->bytes_xfered = 0; + data->error = cqhci_error_from_flags(slot->flags); + } else { + mrq->cmd->error = cqhci_error_from_flags(slot->flags); + } + + mmc_cqe_request_done(cq_host->mmc, mrq); +} + +static void cqhci_recover_mrqs(struct cqhci_host *cq_host) +{ + int i; + + for (i = 0; i < cq_host->num_slots; i++) + cqhci_recover_mrq(cq_host, i); +} + +/* + * By now the command and data lines should be unused so there is no reason for + * CQHCI to take a long time to halt, but if it doesn't halt there could be + * problems clearing tasks, so be generous. + */ +#define CQHCI_FINISH_HALT_TIMEOUT 20 + +/* CQHCI could be expected to clear it's internal state pretty quickly */ +#define CQHCI_CLEAR_TIMEOUT 20 + +static void cqhci_recovery_finish(struct mmc_host *mmc) +{ + struct cqhci_host *cq_host = mmc->cqe_private; + unsigned long flags; + u32 cqcfg; + bool ok; + + pr_debug("%s: cqhci: %s\n", mmc_hostname(mmc), __func__); + + WARN_ON(!cq_host->recovery_halt); + + ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT); + + if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT)) + ok = false; + + /* + * The specification contradicts itself, by saying that tasks cannot be + * cleared if CQHCI does not halt, but if CQHCI does not halt, it should + * be disabled/re-enabled, but not to disable before clearing tasks. + * Have a go anyway. + */ + if (!ok) { + pr_debug("%s: cqhci: disable / re-enable\n", mmc_hostname(mmc)); + cqcfg = cqhci_readl(cq_host, CQHCI_CFG); + cqcfg &= ~CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + cqcfg |= CQHCI_ENABLE; + cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + /* Be sure that there are no tasks */ + ok = cqhci_halt(mmc, CQHCI_FINISH_HALT_TIMEOUT); + if (!cqhci_clear_all_tasks(mmc, CQHCI_CLEAR_TIMEOUT)) + ok = false; + WARN_ON(!ok); + } + + cqhci_recover_mrqs(cq_host); + + WARN_ON(cq_host->qcnt); + + spin_lock_irqsave(&cq_host->lock, flags); + cq_host->qcnt = 0; + cq_host->recovery_halt = false; + mmc->cqe_on = false; + spin_unlock_irqrestore(&cq_host->lock, flags); + + /* Ensure all writes are done before interrupts are re-enabled */ + wmb(); + + cqhci_writel(cq_host, CQHCI_IS_HAC | CQHCI_IS_TCL, CQHCI_IS); + + cqhci_set_irqs(cq_host, CQHCI_IS_MASK); + + pr_debug("%s: cqhci: recovery done\n", mmc_hostname(mmc)); +} + +static const struct mmc_cqe_ops cqhci_cqe_ops = { + .cqe_enable = cqhci_enable, + .cqe_disable = cqhci_disable, + .cqe_request = cqhci_request, + .cqe_post_req = cqhci_post_req, + .cqe_off = cqhci_off, + .cqe_wait_for_idle = cqhci_wait_for_idle, + .cqe_timeout = cqhci_timeout, + .cqe_recovery_start = cqhci_recovery_start, + .cqe_recovery_finish = cqhci_recovery_finish, +}; + +struct cqhci_host *cqhci_pltfm_init(struct platform_device *pdev) +{ + struct cqhci_host *cq_host; + struct resource *cqhci_memres = NULL; + + /* check and setup CMDQ interface */ + cqhci_memres = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "cqhci_mem"); + if (!cqhci_memres) { + dev_dbg(&pdev->dev, "CMDQ not supported\n"); + return ERR_PTR(-EINVAL); + } + + cq_host = devm_kzalloc(&pdev->dev, sizeof(*cq_host), GFP_KERNEL); + if (!cq_host) + return ERR_PTR(-ENOMEM); + cq_host->mmio = devm_ioremap(&pdev->dev, + cqhci_memres->start, + resource_size(cqhci_memres)); + if (!cq_host->mmio) { + dev_err(&pdev->dev, "failed to remap cqhci regs\n"); + return ERR_PTR(-EBUSY); + } + dev_dbg(&pdev->dev, "CMDQ ioremap: done\n"); + + return cq_host; +} +EXPORT_SYMBOL(cqhci_pltfm_init); + +static unsigned int cqhci_ver_major(struct cqhci_host *cq_host) +{ + return CQHCI_VER_MAJOR(cqhci_readl(cq_host, CQHCI_VER)); +} + +static unsigned int cqhci_ver_minor(struct cqhci_host *cq_host) +{ + u32 ver = cqhci_readl(cq_host, CQHCI_VER); + + return CQHCI_VER_MINOR1(ver) * 10 + CQHCI_VER_MINOR2(ver); +} + +int cqhci_init(struct cqhci_host *cq_host, struct mmc_host *mmc, + bool dma64) +{ + int err; + + cq_host->dma64 = dma64; + cq_host->mmc = mmc; + cq_host->mmc->cqe_private = cq_host; + + cq_host->num_slots = NUM_SLOTS; + cq_host->dcmd_slot = DCMD_SLOT; + + mmc->cqe_ops = &cqhci_cqe_ops; + + mmc->cqe_qdepth = NUM_SLOTS; + if (mmc->caps2 & MMC_CAP2_CQE_DCMD) + mmc->cqe_qdepth -= 1; + + cq_host->slot = devm_kcalloc(mmc_dev(mmc), cq_host->num_slots, + sizeof(*cq_host->slot), GFP_KERNEL); + if (!cq_host->slot) { + err = -ENOMEM; + goto out_err; + } + + spin_lock_init(&cq_host->lock); + + init_completion(&cq_host->halt_comp); + init_waitqueue_head(&cq_host->wait_queue); + + pr_info("%s: CQHCI version %u.%02u\n", + mmc_hostname(mmc), cqhci_ver_major(cq_host), + cqhci_ver_minor(cq_host)); + + return 0; + +out_err: + pr_err("%s: CQHCI version %u.%02u failed to initialize, error %d\n", + mmc_hostname(mmc), cqhci_ver_major(cq_host), + cqhci_ver_minor(cq_host), err); + return err; +} +EXPORT_SYMBOL(cqhci_init); + +MODULE_AUTHOR("Venkat Gopalakrishnan "); +MODULE_DESCRIPTION("Command Queue Host Controller Interface driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mmc/host/cqhci.h b/drivers/mmc/host/cqhci.h new file mode 100644 index 000000000000..2d39d361b322 --- /dev/null +++ b/drivers/mmc/host/cqhci.h @@ -0,0 +1,240 @@ +/* Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef LINUX_MMC_CQHCI_H +#define LINUX_MMC_CQHCI_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* registers */ +/* version */ +#define CQHCI_VER 0x00 +#define CQHCI_VER_MAJOR(x) (((x) & GENMASK(11, 8)) >> 8) +#define CQHCI_VER_MINOR1(x) (((x) & GENMASK(7, 4)) >> 4) +#define CQHCI_VER_MINOR2(x) ((x) & GENMASK(3, 0)) + +/* capabilities */ +#define CQHCI_CAP 0x04 +/* configuration */ +#define CQHCI_CFG 0x08 +#define CQHCI_DCMD 0x00001000 +#define CQHCI_TASK_DESC_SZ 0x00000100 +#define CQHCI_ENABLE 0x00000001 + +/* control */ +#define CQHCI_CTL 0x0C +#define CQHCI_CLEAR_ALL_TASKS 0x00000100 +#define CQHCI_HALT 0x00000001 + +/* interrupt status */ +#define CQHCI_IS 0x10 +#define CQHCI_IS_HAC BIT(0) +#define CQHCI_IS_TCC BIT(1) +#define CQHCI_IS_RED BIT(2) +#define CQHCI_IS_TCL BIT(3) + +#define CQHCI_IS_MASK (CQHCI_IS_TCC | CQHCI_IS_RED) + +/* interrupt status enable */ +#define CQHCI_ISTE 0x14 + +/* interrupt signal enable */ +#define CQHCI_ISGE 0x18 + +/* interrupt coalescing */ +#define CQHCI_IC 0x1C +#define CQHCI_IC_ENABLE BIT(31) +#define CQHCI_IC_RESET BIT(16) +#define CQHCI_IC_ICCTHWEN BIT(15) +#define CQHCI_IC_ICCTH(x) ((x & 0x1F) << 8) +#define CQHCI_IC_ICTOVALWEN BIT(7) +#define CQHCI_IC_ICTOVAL(x) (x & 0x7F) + +/* task list base address */ +#define CQHCI_TDLBA 0x20 + +/* task list base address upper */ +#define CQHCI_TDLBAU 0x24 + +/* door-bell */ +#define CQHCI_TDBR 0x28 + +/* task completion notification */ +#define CQHCI_TCN 0x2C + +/* device queue status */ +#define CQHCI_DQS 0x30 + +/* device pending tasks */ +#define CQHCI_DPT 0x34 + +/* task clear */ +#define CQHCI_TCLR 0x38 + +/* send status config 1 */ +#define CQHCI_SSC1 0x40 + +/* send status config 2 */ +#define CQHCI_SSC2 0x44 + +/* response for dcmd */ +#define CQHCI_CRDCT 0x48 + +/* response mode error mask */ +#define CQHCI_RMEM 0x50 + +/* task error info */ +#define CQHCI_TERRI 0x54 + +#define CQHCI_TERRI_C_INDEX(x) ((x) & GENMASK(5, 0)) +#define CQHCI_TERRI_C_TASK(x) (((x) & GENMASK(12, 8)) >> 8) +#define CQHCI_TERRI_C_VALID(x) ((x) & BIT(15)) +#define CQHCI_TERRI_D_INDEX(x) (((x) & GENMASK(21, 16)) >> 16) +#define CQHCI_TERRI_D_TASK(x) (((x) & GENMASK(28, 24)) >> 24) +#define CQHCI_TERRI_D_VALID(x) ((x) & BIT(31)) + +/* command response index */ +#define CQHCI_CRI 0x58 + +/* command response argument */ +#define CQHCI_CRA 0x5C + +#define CQHCI_INT_ALL 0xF +#define CQHCI_IC_DEFAULT_ICCTH 31 +#define CQHCI_IC_DEFAULT_ICTOVAL 1 + +/* attribute fields */ +#define CQHCI_VALID(x) ((x & 1) << 0) +#define CQHCI_END(x) ((x & 1) << 1) +#define CQHCI_INT(x) ((x & 1) << 2) +#define CQHCI_ACT(x) ((x & 0x7) << 3) + +/* data command task descriptor fields */ +#define CQHCI_FORCED_PROG(x) ((x & 1) << 6) +#define CQHCI_CONTEXT(x) ((x & 0xF) << 7) +#define CQHCI_DATA_TAG(x) ((x & 1) << 11) +#define CQHCI_DATA_DIR(x) ((x & 1) << 12) +#define CQHCI_PRIORITY(x) ((x & 1) << 13) +#define CQHCI_QBAR(x) ((x & 1) << 14) +#define CQHCI_REL_WRITE(x) ((x & 1) << 15) +#define CQHCI_BLK_COUNT(x) ((x & 0xFFFF) << 16) +#define CQHCI_BLK_ADDR(x) ((x & 0xFFFFFFFF) << 32) + +/* direct command task descriptor fields */ +#define CQHCI_CMD_INDEX(x) ((x & 0x3F) << 16) +#define CQHCI_CMD_TIMING(x) ((x & 1) << 22) +#define CQHCI_RESP_TYPE(x) ((x & 0x3) << 23) + +/* transfer descriptor fields */ +#define CQHCI_DAT_LENGTH(x) ((x & 0xFFFF) << 16) +#define CQHCI_DAT_ADDR_LO(x) ((x & 0xFFFFFFFF) << 32) +#define CQHCI_DAT_ADDR_HI(x) ((x & 0xFFFFFFFF) << 0) + +struct cqhci_host_ops; +struct mmc_host; +struct cqhci_slot; + +struct cqhci_host { + const struct cqhci_host_ops *ops; + void __iomem *mmio; + struct mmc_host *mmc; + + spinlock_t lock; + + /* relative card address of device */ + unsigned int rca; + + /* 64 bit DMA */ + bool dma64; + int num_slots; + int qcnt; + + u32 dcmd_slot; + u32 caps; +#define CQHCI_TASK_DESC_SZ_128 0x1 + + u32 quirks; +#define CQHCI_QUIRK_SHORT_TXFR_DESC_SZ 0x1 + + bool enabled; + bool halted; + bool init_done; + bool activated; + bool waiting_for_idle; + bool recovery_halt; + + size_t desc_size; + size_t data_size; + + u8 *desc_base; + + /* total descriptor size */ + u8 slot_sz; + + /* 64/128 bit depends on CQHCI_CFG */ + u8 task_desc_len; + + /* 64 bit on 32-bit arch, 128 bit on 64-bit */ + u8 link_desc_len; + + u8 *trans_desc_base; + /* same length as transfer descriptor */ + u8 trans_desc_len; + + dma_addr_t desc_dma_base; + dma_addr_t trans_desc_dma_base; + + struct completion halt_comp; + wait_queue_head_t wait_queue; + struct cqhci_slot *slot; +}; + +struct cqhci_host_ops { + void (*dumpregs)(struct mmc_host *mmc); + void (*write_l)(struct cqhci_host *host, u32 val, int reg); + u32 (*read_l)(struct cqhci_host *host, int reg); + void (*enable)(struct mmc_host *mmc); + void (*disable)(struct mmc_host *mmc, bool recovery); +}; + +static inline void cqhci_writel(struct cqhci_host *host, u32 val, int reg) +{ + if (unlikely(host->ops->write_l)) + host->ops->write_l(host, val, reg); + else + writel_relaxed(val, host->mmio + reg); +} + +static inline u32 cqhci_readl(struct cqhci_host *host, int reg) +{ + if (unlikely(host->ops->read_l)) + return host->ops->read_l(host, reg); + else + return readl_relaxed(host->mmio + reg); +} + +struct platform_device; + +irqreturn_t cqhci_irq(struct mmc_host *mmc, u32 intmask, int cmd_error, + int data_error); +int cqhci_init(struct cqhci_host *cq_host, struct mmc_host *mmc, bool dma64); +struct cqhci_host *cqhci_pltfm_init(struct platform_device *pdev); +int cqhci_suspend(struct mmc_host *mmc); +int cqhci_resume(struct mmc_host *mmc); + +#endif -- cgit v1.2.3 From 8ee82bda230fc972c7ee3bb15ce1260eefb4721c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:06 +0200 Subject: mmc: sdhci-pci: Add CQHCI support for Intel GLK Add CQHCI initialization and implement CQHCI operations for Intel GLK. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/host/Kconfig | 1 + drivers/mmc/host/sdhci-pci-core.c | 155 +++++++++++++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 3092b7085cb5..2b02a9788bb6 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -81,6 +81,7 @@ config MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER config MMC_SDHCI_PCI tristate "SDHCI support on PCI bus" depends on MMC_SDHCI && PCI + select MMC_CQHCI help This selects the PCI Secure Digital Host Controller Interface. Most controllers found today are PCI devices. diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 3e4f04fd5175..110c634cfb43 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -30,6 +30,8 @@ #include #include +#include "cqhci.h" + #include "sdhci.h" #include "sdhci-pci.h" @@ -116,6 +118,28 @@ int sdhci_pci_resume_host(struct sdhci_pci_chip *chip) return 0; } + +static int sdhci_cqhci_suspend(struct sdhci_pci_chip *chip) +{ + int ret; + + ret = cqhci_suspend(chip->slots[0]->host->mmc); + if (ret) + return ret; + + return sdhci_pci_suspend_host(chip); +} + +static int sdhci_cqhci_resume(struct sdhci_pci_chip *chip) +{ + int ret; + + ret = sdhci_pci_resume_host(chip); + if (ret) + return ret; + + return cqhci_resume(chip->slots[0]->host->mmc); +} #endif #ifdef CONFIG_PM @@ -166,8 +190,48 @@ static int sdhci_pci_runtime_resume_host(struct sdhci_pci_chip *chip) return 0; } + +static int sdhci_cqhci_runtime_suspend(struct sdhci_pci_chip *chip) +{ + int ret; + + ret = cqhci_suspend(chip->slots[0]->host->mmc); + if (ret) + return ret; + + return sdhci_pci_runtime_suspend_host(chip); +} + +static int sdhci_cqhci_runtime_resume(struct sdhci_pci_chip *chip) +{ + int ret; + + ret = sdhci_pci_runtime_resume_host(chip); + if (ret) + return ret; + + return cqhci_resume(chip->slots[0]->host->mmc); +} #endif +static u32 sdhci_cqhci_irq(struct sdhci_host *host, u32 intmask) +{ + int cmd_error = 0; + int data_error = 0; + + if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error)) + return intmask; + + cqhci_irq(host->mmc, intmask, cmd_error, data_error); + + return 0; +} + +static void sdhci_pci_dumpregs(struct mmc_host *mmc) +{ + sdhci_dumpregs(mmc_priv(mmc)); +} + /*****************************************************************************\ * * * Hardware specific quirk handling * @@ -583,6 +647,18 @@ static const struct sdhci_ops sdhci_intel_byt_ops = { .voltage_switch = sdhci_intel_voltage_switch, }; +static const struct sdhci_ops sdhci_intel_glk_ops = { + .set_clock = sdhci_set_clock, + .set_power = sdhci_intel_set_power, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + .voltage_switch = sdhci_intel_voltage_switch, + .irq = sdhci_cqhci_irq, +}; + static void byt_read_dsm(struct sdhci_pci_slot *slot) { struct intel_host *intel_host = sdhci_pci_priv(slot); @@ -612,12 +688,80 @@ static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) { int ret = byt_emmc_probe_slot(slot); + slot->host->mmc->caps2 |= MMC_CAP2_CQE; + if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) { slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES, slot->host->mmc_host_ops.hs400_enhanced_strobe = intel_hs400_enhanced_strobe; + slot->host->mmc->caps2 |= MMC_CAP2_CQE_DCMD; + } + + return ret; +} + +static void glk_cqe_enable(struct mmc_host *mmc) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 reg; + + /* + * CQE gets stuck if it sees Buffer Read Enable bit set, which can be + * the case after tuning, so ensure the buffer is drained. + */ + reg = sdhci_readl(host, SDHCI_PRESENT_STATE); + while (reg & SDHCI_DATA_AVAILABLE) { + sdhci_readl(host, SDHCI_BUFFER); + reg = sdhci_readl(host, SDHCI_PRESENT_STATE); + } + + sdhci_cqe_enable(mmc); +} + +static const struct cqhci_host_ops glk_cqhci_ops = { + .enable = glk_cqe_enable, + .disable = sdhci_cqe_disable, + .dumpregs = sdhci_pci_dumpregs, +}; + +static int glk_emmc_add_host(struct sdhci_pci_slot *slot) +{ + struct device *dev = &slot->chip->pdev->dev; + struct sdhci_host *host = slot->host; + struct cqhci_host *cq_host; + bool dma64; + int ret; + + ret = sdhci_setup_host(host); + if (ret) + return ret; + + cq_host = devm_kzalloc(dev, sizeof(*cq_host), GFP_KERNEL); + if (!cq_host) { + ret = -ENOMEM; + goto cleanup; } + cq_host->mmio = host->ioaddr + 0x200; + cq_host->quirks |= CQHCI_QUIRK_SHORT_TXFR_DESC_SZ; + cq_host->ops = &glk_cqhci_ops; + + dma64 = host->flags & SDHCI_USE_64_BIT_DMA; + if (dma64) + cq_host->caps |= CQHCI_TASK_DESC_SZ_128; + + ret = cqhci_init(cq_host, host->mmc, dma64); + if (ret) + goto cleanup; + + ret = __sdhci_add_host(host); + if (ret) + goto cleanup; + + return 0; + +cleanup: + sdhci_cleanup_host(host); return ret; } @@ -699,11 +843,20 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = { .allow_runtime_pm = true, .probe_slot = glk_emmc_probe_slot, + .add_host = glk_emmc_add_host, +#ifdef CONFIG_PM_SLEEP + .suspend = sdhci_cqhci_suspend, + .resume = sdhci_cqhci_resume, +#endif +#ifdef CONFIG_PM + .runtime_suspend = sdhci_cqhci_runtime_suspend, + .runtime_resume = sdhci_cqhci_runtime_resume, +#endif .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 | SDHCI_QUIRK2_STOP_WITH_TC, - .ops = &sdhci_intel_byt_ops, + .ops = &sdhci_intel_glk_ops, .priv_size = sizeof(struct intel_host), }; -- cgit v1.2.3 From 10f21df4a23540b5da8e88d1030ff8c37818e04f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:07 +0200 Subject: mmc: block: blk-mq: Add support for direct completion For blk-mq, add support for completing requests directly in the ->done callback. That means that error handling and urgent background operations must be handled by recovery_work in that case. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 129 ++++++++++++++++++++++++++++++++++++++--------- drivers/mmc/core/block.h | 1 + drivers/mmc/core/host.h | 5 ++ drivers/mmc/core/queue.c | 5 +- drivers/mmc/core/queue.h | 1 + include/linux/mmc/host.h | 1 + 6 files changed, 116 insertions(+), 26 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 7275ac5d6799..a710a6e95307 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2131,6 +2131,22 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) } } +static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) +{ + mmc_blk_eval_resp_error(brq); + + return brq->sbc.error || brq->cmd.error || brq->stop.error || + brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; +} + +static inline void mmc_blk_rw_reset_success(struct mmc_queue *mq, + struct request *req) +{ + int type = rq_data_dir(req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; + + mmc_blk_reset_success(mq->blkdata, type); +} + static void mmc_blk_mq_complete_rq(struct mmc_queue *mq, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); @@ -2213,14 +2229,43 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req) mmc_post_req(host, mrq, 0); - blk_mq_complete_request(req); + /* + * Block layer timeouts race with completions which means the normal + * completion path cannot be used during recovery. + */ + if (mq->in_recovery) + mmc_blk_mq_complete_rq(mq, req); + else + blk_mq_complete_request(req); mmc_blk_mq_dec_in_flight(mq, req); } +void mmc_blk_mq_recovery(struct mmc_queue *mq) +{ + struct request *req = mq->recovery_req; + struct mmc_host *host = mq->card->host; + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + + mq->recovery_req = NULL; + mq->rw_wait = false; + + if (mmc_blk_rq_error(&mqrq->brq)) { + mmc_retune_hold_now(host); + mmc_blk_mq_rw_recovery(mq, req); + } + + mmc_blk_urgent_bkops(mq, mqrq); + + mmc_blk_mq_post_req(mq, req); +} + static void mmc_blk_mq_complete_prev_req(struct mmc_queue *mq, struct request **prev_req) { + if (mmc_host_done_complete(mq->card->host)) + return; + mutex_lock(&mq->complete_lock); if (!mq->complete_req) @@ -2254,29 +2299,56 @@ static void mmc_blk_mq_req_done(struct mmc_request *mrq) struct request *req = mmc_queue_req_to_req(mqrq); struct request_queue *q = req->q; struct mmc_queue *mq = q->queuedata; + struct mmc_host *host = mq->card->host; unsigned long flags; - bool waiting; - /* - * We cannot complete the request in this context, so record that there - * is a request to complete, and that a following request does not need - * to wait (although it does need to complete complete_req first). - */ - spin_lock_irqsave(q->queue_lock, flags); - mq->complete_req = req; - mq->rw_wait = false; - waiting = mq->waiting; - spin_unlock_irqrestore(q->queue_lock, flags); + if (!mmc_host_done_complete(host)) { + bool waiting; - /* - * If 'waiting' then the waiting task will complete this request, - * otherwise queue a work to do it. Note that complete_work may still - * race with the dispatch of a following request. - */ - if (waiting) + /* + * We cannot complete the request in this context, so record + * that there is a request to complete, and that a following + * request does not need to wait (although it does need to + * complete complete_req first). + */ + spin_lock_irqsave(q->queue_lock, flags); + mq->complete_req = req; + mq->rw_wait = false; + waiting = mq->waiting; + spin_unlock_irqrestore(q->queue_lock, flags); + + /* + * If 'waiting' then the waiting task will complete this + * request, otherwise queue a work to do it. Note that + * complete_work may still race with the dispatch of a following + * request. + */ + if (waiting) + wake_up(&mq->wait); + else + kblockd_schedule_work(&mq->complete_work); + + return; + } + + /* Take the recovery path for errors or urgent background operations */ + if (mmc_blk_rq_error(&mqrq->brq) || + mmc_blk_urgent_bkops_needed(mq, mqrq)) { + spin_lock_irqsave(q->queue_lock, flags); + mq->recovery_needed = true; + mq->recovery_req = req; + spin_unlock_irqrestore(q->queue_lock, flags); wake_up(&mq->wait); - else - kblockd_schedule_work(&mq->complete_work); + schedule_work(&mq->recovery_work); + return; + } + + mmc_blk_rw_reset_success(mq, req); + + mq->rw_wait = false; + wake_up(&mq->wait); + + mmc_blk_mq_post_req(mq, req); } static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) @@ -2286,11 +2358,16 @@ static bool mmc_blk_rw_wait_cond(struct mmc_queue *mq, int *err) bool done; /* - * Wait while there is another request in progress. Also indicate that - * there is a request waiting to start. + * Wait while there is another request in progress, but not if recovery + * is needed. Also indicate whether there is a request waiting to start. */ spin_lock_irqsave(q->queue_lock, flags); - done = !mq->rw_wait; + if (mq->recovery_needed) { + *err = -EBUSY; + done = true; + } else { + done = !mq->rw_wait; + } mq->waiting = !done; spin_unlock_irqrestore(q->queue_lock, flags); @@ -2334,10 +2411,12 @@ static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq, if (prev_req) mmc_blk_mq_post_req(mq, prev_req); - if (err) { + if (err) mq->rw_wait = false; + + /* Release re-tuning here where there is no synchronization required */ + if (err || mmc_host_done_complete(host)) mmc_retune_release(host); - } out_post_req: if (err) diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h index f472ce5d5647..b126418fd163 100644 --- a/drivers/mmc/core/block.h +++ b/drivers/mmc/core/block.h @@ -13,6 +13,7 @@ enum mmc_issued; enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req); void mmc_blk_mq_complete(struct request *req); +void mmc_blk_mq_recovery(struct mmc_queue *mq); struct work_struct; diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 6eaf558e62d6..8ca284e079e3 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -41,6 +41,11 @@ static inline int mmc_host_cmd23(struct mmc_host *host) return host->caps & MMC_CAP_CMD23; } +static inline bool mmc_host_done_complete(struct mmc_host *host) +{ + return host->caps & MMC_CAP_DONE_COMPLETE; +} + static inline int mmc_boot_partition_access(struct mmc_host *host) { return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 8d632d2f5199..d8394007bc99 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -165,7 +165,10 @@ static void mmc_mq_recovery_handler(struct work_struct *work) mq->in_recovery = true; - mmc_blk_cqe_recovery(mq); + if (mq->use_cqe) + mmc_blk_cqe_recovery(mq); + else + mmc_blk_mq_recovery(mq); mq->in_recovery = false; diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 1d7d3b0afff8..34f601c6dd39 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -103,6 +103,7 @@ struct mmc_queue { bool waiting; struct work_struct recovery_work; wait_queue_head_t wait; + struct request *recovery_req; struct request *complete_req; struct mutex complete_lock; struct work_struct complete_work; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ce2075d6f429..f3e13c50f6b0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -324,6 +324,7 @@ struct mmc_host { #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ +#define MMC_CAP_DONE_COMPLETE (1 << 27) /* RW reqs can be completed within mmc_request_done() */ #define MMC_CAP_CD_WAKE (1 << 28) /* Enable card detect wake */ #define MMC_CAP_CMD_DURING_TFR (1 << 29) /* Commands during data transfer */ #define MMC_CAP_CMD23 (1 << 30) /* CMD23 supported. */ -- cgit v1.2.3 From 88a516461ee07a994c0e7016faf85f3466de1d09 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:08 +0200 Subject: mmc: block: blk-mq: Separate card polling from recovery Recovery is simpler to understand if it is only used for errors. Create a separate function for card polling. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a710a6e95307..6d2c42c1c33a 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2139,6 +2139,26 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } +static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + bool gen_err = false; + int err; + + if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) + return 0; + + err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req, &gen_err); + + /* Copy the general error bit so it will be seen later on */ + if (gen_err) { + mqrq->brq.stop.resp[0] |= R1_ERROR; + err = err ? err : -EIO; + } + + return err; +} + static inline void mmc_blk_rw_reset_success(struct mmc_queue *mq, struct request *req) { @@ -2197,8 +2217,15 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_host *host = mq->card->host; - mmc_blk_mq_rw_recovery(mq, req); + if (mmc_blk_rq_error(&mqrq->brq) || + mmc_blk_card_busy(mq->card, req)) { + mmc_blk_mq_rw_recovery(mq, req); + } else { + mmc_blk_rw_reset_success(mq, req); + mmc_retune_release(host); + } mmc_blk_urgent_bkops(mq, mqrq); } -- cgit v1.2.3 From c89b4851c67fb7354862850ae181de883269487d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:09 +0200 Subject: mmc: block: Make card_busy_detect() accumulate all response error bits Make card_busy_detect() accumulate all response error bits. Later patches will make use of this. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 6d2c42c1c33a..30fc012353ae 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -923,7 +923,8 @@ static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks) } static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, - bool hw_busy_detect, struct request *req, bool *gen_err) + bool hw_busy_detect, struct request *req, + u32 *resp_errs) { unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); int err = 0; @@ -937,11 +938,9 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, return err; } - if (status & R1_ERROR) { - pr_err("%s: %s: error sending status cmd, status %#x\n", - req->rq_disk->disk_name, __func__, status); - *gen_err = true; - } + /* Accumulate any response error bits seen */ + if (resp_errs) + *resp_errs |= status; /* We may rely on the host hw to handle busy detection.*/ if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) && @@ -970,6 +969,24 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, return err; } +static int card_busy_detect_err(struct mmc_card *card, unsigned int timeout_ms, + bool hw_busy_detect, struct request *req, + bool *gen_err) +{ + u32 resp_errs = 0; + int err; + + err = card_busy_detect(card, timeout_ms, hw_busy_detect, req, + &resp_errs); + if (resp_errs & R1_ERROR) { + pr_err("%s: %s: error sending status cmd, status %#x\n", + req->rq_disk->disk_name, __func__, resp_errs); + *gen_err = true; + } + + return err; +} + static int send_stop(struct mmc_card *card, unsigned int timeout_ms, struct request *req, bool *gen_err, u32 *stop_status) { @@ -1012,7 +1029,8 @@ static int send_stop(struct mmc_card *card, unsigned int timeout_ms, *gen_err = true; } - return card_busy_detect(card, timeout_ms, use_r1b_resp, req, gen_err); + return card_busy_detect_err(card, timeout_ms, use_r1b_resp, req, + gen_err); } #define ERR_NOMEDIUM 3 @@ -1553,8 +1571,8 @@ static enum mmc_blk_status __mmc_blk_err_check(struct mmc_card *card, gen_err = true; } - err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req, - &gen_err); + err = card_busy_detect_err(card, MMC_BLK_TIMEOUT_MS, false, req, + &gen_err); if (err) return MMC_BLK_CMD_ERR; } @@ -2148,7 +2166,8 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) return 0; - err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req, &gen_err); + err = card_busy_detect_err(card, MMC_BLK_TIMEOUT_MS, false, req, + &gen_err); /* Copy the general error bit so it will be seen later on */ if (gen_err) { -- cgit v1.2.3 From f47a1fe346b1568df0e9b158574b2939432313df Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:10 +0200 Subject: mmc: block: blk-mq: Check error bits and save the exception bit when polling card busy Check error bits and save the exception bit when polling card busy. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 30fc012353ae..c446d17b48c4 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1457,15 +1457,18 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, } } -#define CMD_ERRORS \ - (R1_OUT_OF_RANGE | /* Command argument out of range */ \ - R1_ADDRESS_ERROR | /* Misaligned address */ \ +#define CMD_ERRORS_EXCL_OOR \ + (R1_ADDRESS_ERROR | /* Misaligned address */ \ R1_BLOCK_LEN_ERROR | /* Transferred block length incorrect */\ R1_WP_VIOLATION | /* Tried to write to protected block */ \ R1_CARD_ECC_FAILED | /* Card ECC failed */ \ R1_CC_ERROR | /* Card controller error */ \ R1_ERROR) /* General/unknown error */ +#define CMD_ERRORS \ + (CMD_ERRORS_EXCL_OOR | \ + R1_OUT_OF_RANGE) /* Command argument out of range */ \ + static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) { u32 val; @@ -2157,24 +2160,40 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } +static inline bool mmc_blk_oor_valid(struct mmc_blk_request *brq) +{ + return !!brq->mrq.sbc; +} + +static inline u32 mmc_blk_stop_err_bits(struct mmc_blk_request *brq) +{ + return mmc_blk_oor_valid(brq) ? CMD_ERRORS : CMD_ERRORS_EXCL_OOR; +} + static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); - bool gen_err = false; + u32 status = 0; int err; if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) return 0; - err = card_busy_detect_err(card, MMC_BLK_TIMEOUT_MS, false, req, - &gen_err); + err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req, &status); - /* Copy the general error bit so it will be seen later on */ - if (gen_err) { - mqrq->brq.stop.resp[0] |= R1_ERROR; + /* + * Do not assume data transferred correctly if there are any error bits + * set. + */ + if (status & mmc_blk_stop_err_bits(&mqrq->brq)) { + mqrq->brq.data.bytes_xfered = 0; err = err ? err : -EIO; } + /* Copy the exception bit so it will be seen later on */ + if (mmc_card_mmc(card) && status & R1_EXCEPTION_EVENT) + mqrq->brq.cmd.resp[0] |= R1_EXCEPTION_EVENT; + return err; } -- cgit v1.2.3 From 7701885e56cee3de4447c0653f9059b62844983b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:11 +0200 Subject: mmc: block: Check the timeout correctly in card_busy_detect() Pedantically, ensure the status is checked for the last time after the full timeout has passed. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index c446d17b48c4..f7c387c27ac0 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -931,6 +931,8 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, u32 status; do { + bool done = time_after(jiffies, timeout); + err = __mmc_send_status(card, &status, 5); if (err) { pr_err("%s: error %d requesting status\n", @@ -951,7 +953,7 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, * Timeout if the device never becomes ready for data and never * leaves the program state. */ - if (time_after(jiffies, timeout)) { + if (done) { pr_err("%s: Card stuck in programming state! %s %s\n", mmc_hostname(card->host), req->rq_disk->disk_name, __func__); -- cgit v1.2.3 From 0987c6b046e199b9b922a585c62e9503486fe0bc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:12 +0200 Subject: mmc: block: Check for transfer state in card_busy_detect() The card is required to return to transfer state. Since that is the state required to start another transfer, check for that state instead of programming state. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f7c387c27ac0..0b40fc2ebf77 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -922,6 +922,16 @@ static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks) return 0; } +static inline bool mmc_blk_in_tran_state(u32 status) +{ + /* + * Some cards mishandle the status bits, so make sure to check both the + * busy indication and the card state. + */ + return status & R1_READY_FOR_DATA && + (R1_CURRENT_STATE(status) == R1_STATE_TRAN); +} + static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, bool hw_busy_detect, struct request *req, u32 *resp_errs) @@ -954,9 +964,9 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, * leaves the program state. */ if (done) { - pr_err("%s: Card stuck in programming state! %s %s\n", + pr_err("%s: Card stuck in wrong state! %s %s status: %#x\n", mmc_hostname(card->host), - req->rq_disk->disk_name, __func__); + req->rq_disk->disk_name, __func__, status); return -ETIMEDOUT; } @@ -965,8 +975,7 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, * so make sure to check both the busy * indication and the card state. */ - } while (!(status & R1_READY_FOR_DATA) || - (R1_CURRENT_STATE(status) == R1_STATE_PRG)); + } while (!mmc_blk_in_tran_state(status)); return err; } -- cgit v1.2.3 From 92c0a0cc9483c6b9cc1b61273d30a0a601cb5e15 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:13 +0200 Subject: mmc: block: Add timeout_clks when calculating timeout According to the specification, total access time is derived from both TAAC and NSAC, which means the timeout should add both timeout_ns and timeout_clks. Host drivers do that, so make the block driver do that too. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 0b40fc2ebf77..46e63aec1fcb 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -922,6 +922,34 @@ static int mmc_sd_num_wr_blocks(struct mmc_card *card, u32 *written_blocks) return 0; } +static unsigned int mmc_blk_clock_khz(struct mmc_host *host) +{ + if (host->actual_clock) + return host->actual_clock / 1000; + + /* Clock may be subject to a divisor, fudge it by a factor of 2. */ + if (host->ios.clock) + return host->ios.clock / 2000; + + /* How can there be no clock */ + WARN_ON_ONCE(1); + return 100; /* 100 kHz is minimum possible value */ +} + +static unsigned int mmc_blk_data_timeout_ms(struct mmc_host *host, + struct mmc_data *data) +{ + unsigned int ms = DIV_ROUND_UP(data->timeout_ns, 1000000); + unsigned int khz; + + if (data->timeout_clks) { + khz = mmc_blk_clock_khz(host); + ms += DIV_ROUND_UP(data->timeout_clks, khz); + } + + return ms; +} + static inline bool mmc_blk_in_tran_state(u32 status) { /* @@ -1169,9 +1197,10 @@ static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, */ if (R1_CURRENT_STATE(status) == R1_STATE_DATA || R1_CURRENT_STATE(status) == R1_STATE_RCV) { - err = send_stop(card, - DIV_ROUND_UP(brq->data.timeout_ns, 1000000), - req, gen_err, &stop_status); + unsigned int timeout; + + timeout = mmc_blk_data_timeout_ms(card->host, &brq->data); + err = send_stop(card, timeout, req, gen_err, &stop_status); if (err) { pr_err("%s: error %d sending stop command\n", req->rq_disk->disk_name, err); @@ -1977,6 +2006,7 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; int retries = 0; + unsigned int timeout = mmc_blk_data_timeout_ms(host, mrq->data); do { u32 status; @@ -1995,10 +2025,8 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) u32 stop_status = 0; bool gen_err = false; - err = send_stop(card, - DIV_ROUND_UP(mrq->data->timeout_ns, - 1000000), - req, &gen_err, &stop_status); + err = send_stop(card, timeout, req, &gen_err, + &stop_status); if (err) goto error_exit; } -- cgit v1.2.3 From 6b7a363d2ce83e3940dc0c3628e478fe95f23985 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:14 +0200 Subject: mmc: block: Reduce polling timeout from 10 minutes to 10 seconds Set a 10 second timeout for polling write request busy state. Note, mmc core is setting a 3 second timeout for SD cards, and SDHCI has long had a 10 second software timer to timeout the whole request, so 10 seconds should be ample. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 46e63aec1fcb..9d323ed34f82 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -63,7 +63,13 @@ MODULE_ALIAS("mmc:block"); #endif #define MODULE_PARAM_PREFIX "mmcblk." -#define MMC_BLK_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */ +/* + * Set a 10 second timeout for polling write request busy state. Note, mmc core + * is setting a 3 second timeout for SD cards, and SDHCI has long had a 10 + * second software timer to timeout the whole request, so 10 seconds should be + * ample. + */ +#define MMC_BLK_TIMEOUT_MS (10 * 1000) #define MMC_SANITIZE_REQ_TIMEOUT 240000 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16) -- cgit v1.2.3 From 7eb43d537166c7d767af450901acd0ecbf94625c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:15 +0200 Subject: mmc: block: blk-mq: Stop using legacy recovery There are only a few things the recovery needs to do. Primarily, it just needs to: Determine the number of bytes transferred Get the card back to transfer state Determine whether to retry There are also a couple of additional features: Reset the card before the last retry Read one sector at a time The legacy code spent much effort analyzing command errors, but commands fail fast, so it is simpler just to give all command errors the same number of retries. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/block.c | 304 +++++++++++++++++++++++++---------------------- 1 file changed, 161 insertions(+), 143 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 9d323ed34f82..bd7ead343500 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1557,9 +1557,11 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) } } -static enum mmc_blk_status __mmc_blk_err_check(struct mmc_card *card, - struct mmc_queue_req *mq_mrq) +static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, + struct mmc_async_req *areq) { + struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, + areq); struct mmc_blk_request *brq = &mq_mrq->brq; struct request *req = mmc_queue_req_to_req(mq_mrq); int need_retune = card->host->need_retune; @@ -1665,15 +1667,6 @@ static enum mmc_blk_status __mmc_blk_err_check(struct mmc_card *card, return MMC_BLK_SUCCESS; } -static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, - struct mmc_async_req *areq) -{ - struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, - areq); - - return __mmc_blk_err_check(card, mq_mrq); -} - static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, int disable_multi, bool *do_rel_wr_p, bool *do_data_tag_p) @@ -1999,8 +1992,39 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, } #define MMC_MAX_RETRIES 5 +#define MMC_DATA_RETRIES 2 #define MMC_NO_RETRIES (MMC_MAX_RETRIES + 1) +static int mmc_blk_send_stop(struct mmc_card *card, unsigned int timeout) +{ + struct mmc_command cmd = { + .opcode = MMC_STOP_TRANSMISSION, + .flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC, + /* Some hosts wait for busy anyway, so provide a busy timeout */ + .busy_timeout = timeout, + }; + + return mmc_wait_for_cmd(card->host, &cmd, 5); +} + +static int mmc_blk_fix_state(struct mmc_card *card, struct request *req) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_blk_request *brq = &mqrq->brq; + unsigned int timeout = mmc_blk_data_timeout_ms(card->host, &brq->data); + int err; + + mmc_retune_hold_now(card->host); + + mmc_blk_send_stop(card, timeout); + + err = card_busy_detect(card, timeout, false, req, NULL); + + mmc_retune_release(card->host); + + return err; +} + #define MMC_READ_SINGLE_RETRIES 2 /* Single sector read during recovery */ @@ -2012,7 +2036,6 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; int retries = 0; - unsigned int timeout = mmc_blk_data_timeout_ms(host, mrq->data); do { u32 status; @@ -2027,12 +2050,8 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) goto error_exit; if (!mmc_host_is_spi(host) && - R1_CURRENT_STATE(status) != R1_STATE_TRAN) { - u32 stop_status = 0; - bool gen_err = false; - - err = send_stop(card, timeout, req, &gen_err, - &stop_status); + !mmc_blk_in_tran_state(status)) { + err = mmc_blk_fix_state(card, req); if (err) goto error_exit; } @@ -2062,6 +2081,60 @@ error_exit: mqrq->retries = MMC_MAX_RETRIES - 1; } +static inline bool mmc_blk_oor_valid(struct mmc_blk_request *brq) +{ + return !!brq->mrq.sbc; +} + +static inline u32 mmc_blk_stop_err_bits(struct mmc_blk_request *brq) +{ + return mmc_blk_oor_valid(brq) ? CMD_ERRORS : CMD_ERRORS_EXCL_OOR; +} + +/* + * Check for errors the host controller driver might not have seen such as + * response mode errors or invalid card state. + */ +static bool mmc_blk_status_error(struct request *req, u32 status) +{ + struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); + struct mmc_blk_request *brq = &mqrq->brq; + struct mmc_queue *mq = req->q->queuedata; + u32 stop_err_bits; + + if (mmc_host_is_spi(mq->card->host)) + return 0; + + stop_err_bits = mmc_blk_stop_err_bits(brq); + + return brq->cmd.resp[0] & CMD_ERRORS || + brq->stop.resp[0] & stop_err_bits || + status & stop_err_bits || + (rq_data_dir(req) == WRITE && !mmc_blk_in_tran_state(status)); +} + +static inline bool mmc_blk_cmd_started(struct mmc_blk_request *brq) +{ + return !brq->sbc.error && !brq->cmd.error && + !(brq->cmd.resp[0] & CMD_ERRORS); +} + +/* + * Requests are completed by mmc_blk_mq_complete_rq() which sets simple + * policy: + * 1. A request that has transferred at least some data is considered + * successful and will be requeued if there is remaining data to + * transfer. + * 2. Otherwise the number of retries is incremented and the request + * will be requeued if there are remaining retries. + * 3. Otherwise the request will be errored out. + * That means mmc_blk_mq_complete_rq() is controlled by bytes_xfered and + * mqrq->retries. So there are only 4 possible actions here: + * 1. do not accept the bytes_xfered value i.e. set it to zero + * 2. change mqrq->retries to determine the number of retries + * 3. try to reset the card + * 4. read one sector at a time + */ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) { int type = rq_data_dir(req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; @@ -2069,131 +2142,86 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) struct mmc_blk_request *brq = &mqrq->brq; struct mmc_blk_data *md = mq->blkdata; struct mmc_card *card = mq->card; - static enum mmc_blk_status status; - - brq->retune_retry_done = mqrq->retries; + u32 status; + u32 blocks; + int err; - status = __mmc_blk_err_check(card, mqrq); + /* + * Some errors the host driver might not have seen. Set the number of + * bytes transferred to zero in that case. + */ + err = __mmc_send_status(card, &status, 0); + if (err || mmc_blk_status_error(req, status)) + brq->data.bytes_xfered = 0; mmc_retune_release(card->host); /* - * Requests are completed by mmc_blk_mq_complete_rq() which sets simple - * policy: - * 1. A request that has transferred at least some data is considered - * successful and will be requeued if there is remaining data to - * transfer. - * 2. Otherwise the number of retries is incremented and the request - * will be requeued if there are remaining retries. - * 3. Otherwise the request will be errored out. - * That means mmc_blk_mq_complete_rq() is controlled by bytes_xfered and - * mqrq->retries. So there are only 4 possible actions here: - * 1. do not accept the bytes_xfered value i.e. set it to zero - * 2. change mqrq->retries to determine the number of retries - * 3. try to reset the card - * 4. read one sector at a time + * Try again to get the status. This also provides an opportunity for + * re-tuning. */ - switch (status) { - case MMC_BLK_SUCCESS: - case MMC_BLK_PARTIAL: - /* Reset success, and accept bytes_xfered */ - mmc_blk_reset_success(md, type); - break; - case MMC_BLK_CMD_ERR: - /* - * For SD cards, get bytes written, but do not accept - * bytes_xfered if that fails. For MMC cards accept - * bytes_xfered. Then try to reset. If reset fails then - * error out the remaining request, otherwise retry - * once (N.B mmc_blk_reset() will not succeed twice in a - * row). - */ - if (mmc_card_sd(card)) { - u32 blocks; - int err; + if (err) + err = __mmc_send_status(card, &status, 0); - err = mmc_sd_num_wr_blocks(card, &blocks); - if (err) - brq->data.bytes_xfered = 0; - else - brq->data.bytes_xfered = blocks << 9; - } - if (mmc_blk_reset(md, card->host, type)) - mqrq->retries = MMC_NO_RETRIES; - else - mqrq->retries = MMC_MAX_RETRIES - 1; - break; - case MMC_BLK_RETRY: - /* - * Do not accept bytes_xfered, but retry up to 5 times, - * otherwise same as abort. - */ - brq->data.bytes_xfered = 0; - if (mqrq->retries < MMC_MAX_RETRIES) - break; - /* Fall through */ - case MMC_BLK_ABORT: - /* - * Do not accept bytes_xfered, but try to reset. If - * reset succeeds, try once more, otherwise error out - * the request. - */ - brq->data.bytes_xfered = 0; - if (mmc_blk_reset(md, card->host, type)) - mqrq->retries = MMC_NO_RETRIES; - else - mqrq->retries = MMC_MAX_RETRIES - 1; - break; - case MMC_BLK_DATA_ERR: { - int err; + /* + * Nothing more to do after the number of bytes transferred has been + * updated and there is no card. + */ + if (err && mmc_detect_card_removed(card->host)) + return; - /* - * Do not accept bytes_xfered, but try to reset. If - * reset succeeds, try once more. If reset fails with - * ENODEV which means the partition is wrong, then error - * out the request. Otherwise attempt to read one sector - * at a time. - */ - brq->data.bytes_xfered = 0; - err = mmc_blk_reset(md, card->host, type); - if (!err) { - mqrq->retries = MMC_MAX_RETRIES - 1; - break; - } - if (err == -ENODEV) { - mqrq->retries = MMC_NO_RETRIES; - break; - } - /* Fall through */ + /* Try to get back to "tran" state */ + if (!mmc_host_is_spi(mq->card->host) && + (err || !mmc_blk_in_tran_state(status))) + err = mmc_blk_fix_state(mq->card, req); + + /* + * Special case for SD cards where the card might record the number of + * blocks written. + */ + if (!err && mmc_blk_cmd_started(brq) && mmc_card_sd(card) && + rq_data_dir(req) == WRITE) { + if (mmc_sd_num_wr_blocks(card, &blocks)) + brq->data.bytes_xfered = 0; + else + brq->data.bytes_xfered = blocks << 9; } - case MMC_BLK_ECC_ERR: - /* - * Do not accept bytes_xfered. If reading more than one - * sector, try reading one sector at a time. - */ - brq->data.bytes_xfered = 0; - /* FIXME: Missing single sector read for large sector size */ - if (brq->data.blocks > 1 && !mmc_large_sector(card)) { - /* Redo read one sector at a time */ - pr_warn("%s: retrying using single block read\n", - req->rq_disk->disk_name); - mmc_blk_read_single(mq, req); - } else { - mqrq->retries = MMC_NO_RETRIES; - } - break; - case MMC_BLK_NOMEDIUM: - /* Do not accept bytes_xfered. Error out the request */ - brq->data.bytes_xfered = 0; - mqrq->retries = MMC_NO_RETRIES; - break; - default: - /* Do not accept bytes_xfered. Error out the request */ - brq->data.bytes_xfered = 0; + + /* Reset if the card is in a bad state */ + if (!mmc_host_is_spi(mq->card->host) && + err && mmc_blk_reset(md, card->host, type)) { + pr_err("%s: recovery failed!\n", req->rq_disk->disk_name); mqrq->retries = MMC_NO_RETRIES; - pr_err("%s: Unhandled return value (%d)", - req->rq_disk->disk_name, status); - break; + return; + } + + /* + * If anything was done, just return and if there is anything remaining + * on the request it will get requeued. + */ + if (brq->data.bytes_xfered) + return; + + /* Reset before last retry */ + if (mqrq->retries + 1 == MMC_MAX_RETRIES) + mmc_blk_reset(md, card->host, type); + + /* Command errors fail fast, so use all MMC_MAX_RETRIES */ + if (brq->sbc.error || brq->cmd.error) + return; + + /* Reduce the remaining retries for data errors */ + if (mqrq->retries < MMC_MAX_RETRIES - MMC_DATA_RETRIES) { + mqrq->retries = MMC_MAX_RETRIES - MMC_DATA_RETRIES; + return; + } + + /* FIXME: Missing single sector read for large sector size */ + if (!mmc_large_sector(card) && rq_data_dir(req) == READ && + brq->data.blocks > 1) { + /* Read one sector at a time */ + mmc_blk_read_single(mq, req); + return; } } @@ -2205,16 +2233,6 @@ static inline bool mmc_blk_rq_error(struct mmc_blk_request *brq) brq->data.error || brq->cmd.resp[0] & CMD_ERRORS; } -static inline bool mmc_blk_oor_valid(struct mmc_blk_request *brq) -{ - return !!brq->mrq.sbc; -} - -static inline u32 mmc_blk_stop_err_bits(struct mmc_blk_request *brq) -{ - return mmc_blk_oor_valid(brq) ? CMD_ERRORS : CMD_ERRORS_EXCL_OOR; -} - static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); -- cgit v1.2.3 From 42f532da3a44843668dbacc1838a028b0a9b7373 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:16 +0200 Subject: mmc: mmc_test: Do not use mmc_start_areq() anymore The block driver's blk-mq paths do not use mmc_start_areq(). In order to remove mmc_start_areq() entirely, start by removing it from mmc_test. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson Tested-by: Linus Walleij --- drivers/mmc/core/mmc_test.c | 122 ++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 68 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 478869805b96..9311c8de2061 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -171,11 +171,6 @@ struct mmc_test_multiple_rw { enum mmc_test_prep_media prepare; }; -struct mmc_test_async_req { - struct mmc_async_req areq; - struct mmc_test_card *test; -}; - /*******************************************************************/ /* General helper functions */ /*******************************************************************/ @@ -741,30 +736,6 @@ static int mmc_test_check_result(struct mmc_test_card *test, return ret; } -static enum mmc_blk_status mmc_test_check_result_async(struct mmc_card *card, - struct mmc_async_req *areq) -{ - struct mmc_test_async_req *test_async = - container_of(areq, struct mmc_test_async_req, areq); - int ret; - - mmc_test_wait_busy(test_async->test); - - /* - * FIXME: this would earlier just casts a regular error code, - * either of the kernel type -ERRORCODE or the local test framework - * RESULT_* errorcode, into an enum mmc_blk_status and return as - * result check. Instead, convert it to some reasonable type by just - * returning either MMC_BLK_SUCCESS or MMC_BLK_CMD_ERR. - * If possible, a reasonable error code should be returned. - */ - ret = mmc_test_check_result(test_async->test, areq->mrq); - if (ret) - return MMC_BLK_CMD_ERR; - - return MMC_BLK_SUCCESS; -} - /* * Checks that a "short transfer" behaved as expected */ @@ -831,6 +802,45 @@ static struct mmc_test_req *mmc_test_req_alloc(void) return rq; } +static void mmc_test_wait_done(struct mmc_request *mrq) +{ + complete(&mrq->completion); +} + +static int mmc_test_start_areq(struct mmc_test_card *test, + struct mmc_request *mrq, + struct mmc_request *prev_mrq) +{ + struct mmc_host *host = test->card->host; + int err = 0; + + if (mrq) { + init_completion(&mrq->completion); + mrq->done = mmc_test_wait_done; + mmc_pre_req(host, mrq); + } + + if (prev_mrq) { + wait_for_completion(&prev_mrq->completion); + err = mmc_test_wait_busy(test); + if (!err) + err = mmc_test_check_result(test, prev_mrq); + } + + if (!err && mrq) { + err = mmc_start_request(host, mrq); + if (err) + mmc_retune_release(host); + } + + if (prev_mrq) + mmc_post_req(host, prev_mrq, 0); + + if (err && mrq) + mmc_post_req(host, mrq, err); + + return err; +} static int mmc_test_nonblock_transfer(struct mmc_test_card *test, struct scatterlist *sg, unsigned sg_len, @@ -838,17 +848,10 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test, unsigned blksz, int write, int count) { struct mmc_test_req *rq1, *rq2; - struct mmc_test_async_req test_areq[2]; - struct mmc_async_req *done_areq; - struct mmc_async_req *cur_areq = &test_areq[0].areq; - struct mmc_async_req *other_areq = &test_areq[1].areq; - enum mmc_blk_status status; + struct mmc_request *mrq, *prev_mrq; int i; int ret = RESULT_OK; - test_areq[0].test = test; - test_areq[1].test = test; - rq1 = mmc_test_req_alloc(); rq2 = mmc_test_req_alloc(); if (!rq1 || !rq2) { @@ -856,33 +859,25 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test, goto err; } - cur_areq->mrq = &rq1->mrq; - cur_areq->err_check = mmc_test_check_result_async; - other_areq->mrq = &rq2->mrq; - other_areq->err_check = mmc_test_check_result_async; + mrq = &rq1->mrq; + prev_mrq = NULL; for (i = 0; i < count; i++) { - mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr, - blocks, blksz, write); - done_areq = mmc_start_areq(test->card->host, cur_areq, &status); - - if (status != MMC_BLK_SUCCESS || (!done_areq && i > 0)) { - ret = RESULT_FAIL; + mmc_test_req_reset(container_of(mrq, struct mmc_test_req, mrq)); + mmc_test_prepare_mrq(test, mrq, sg, sg_len, dev_addr, blocks, + blksz, write); + ret = mmc_test_start_areq(test, mrq, prev_mrq); + if (ret) goto err; - } - if (done_areq) - mmc_test_req_reset(container_of(done_areq->mrq, - struct mmc_test_req, mrq)); + if (!prev_mrq) + prev_mrq = &rq2->mrq; - swap(cur_areq, other_areq); + swap(mrq, prev_mrq); dev_addr += blocks; } - done_areq = mmc_start_areq(test->card->host, NULL, &status); - if (status != MMC_BLK_SUCCESS) - ret = RESULT_FAIL; - + ret = mmc_test_start_areq(test, NULL, prev_mrq); err: kfree(rq1); kfree(rq2); @@ -2356,11 +2351,9 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, struct mmc_test_req *rq = mmc_test_req_alloc(); struct mmc_host *host = test->card->host; struct mmc_test_area *t = &test->area; - struct mmc_test_async_req test_areq = { .test = test }; struct mmc_request *mrq; unsigned long timeout; bool expired = false; - enum mmc_blk_status blkstat = MMC_BLK_SUCCESS; int ret = 0, cmd_ret; u32 status = 0; int count = 0; @@ -2373,9 +2366,6 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, mrq->sbc = &rq->sbc; mrq->cap_cmd_during_tfr = true; - test_areq.areq.mrq = mrq; - test_areq.areq.err_check = mmc_test_check_result_async; - mmc_test_prepare_mrq(test, mrq, t->sg, t->sg_len, dev_addr, t->blocks, 512, write); @@ -2388,11 +2378,9 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, /* Start ongoing data request */ if (use_areq) { - mmc_start_areq(host, &test_areq.areq, &blkstat); - if (blkstat != MMC_BLK_SUCCESS) { - ret = RESULT_FAIL; + ret = mmc_test_start_areq(test, mrq, NULL); + if (ret) goto out_free; - } } else { mmc_wait_for_req(host, mrq); } @@ -2426,9 +2414,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test, /* Wait for data request to complete */ if (use_areq) { - mmc_start_areq(host, NULL, &blkstat); - if (blkstat != MMC_BLK_SUCCESS) - ret = RESULT_FAIL; + ret = mmc_test_start_areq(test, NULL, mrq); } else { mmc_wait_for_req_done(test->card->host, mrq); } -- cgit v1.2.3 From aa95014445769f3ac204f85ff85efe11bbd0bc8c Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 30 Nov 2017 05:54:24 +0800 Subject: mmc: block: blk-mq: fix boolreturn.cocci warnings drivers/mmc/core/block.c:2106:9-10: WARNING: return of 0/1 in function 'mmc_blk_status_error' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Fixes:7eb43d537166 ("mmc: block: blk-mq: Stop using legacy recovery") CC: Adrian Hunter Signed-off-by: Fengguang Wu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index bd7ead343500..ab384ba6cb37 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2103,7 +2103,7 @@ static bool mmc_blk_status_error(struct request *req, u32 status) u32 stop_err_bits; if (mmc_host_is_spi(mq->card->host)) - return 0; + return false; stop_err_bits = mmc_blk_stop_err_bits(brq); -- cgit v1.2.3 From 15ff2946b3c9661b14fc5123902dad28e1f13f3e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 30 Nov 2017 11:37:38 +0000 Subject: mmc: block: make function mmc_cqe_issue_type static The function mmc_cqe_issue_type is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: drivers/mmc/core/queue.c:62:21: warning: symbol 'mmc_cqe_issue_type' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index d8394007bc99..5db388081789 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -59,8 +59,8 @@ static inline bool mmc_cqe_can_dcmd(struct mmc_host *host) return host->caps2 & MMC_CAP2_CQE_DCMD; } -enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host, - struct request *req) +static enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host, + struct request *req) { switch (req_op(req)) { case REQ_OP_DRV_IN: -- cgit v1.2.3 From c14e60963ec1e0595250955271abfe4d5e96b3cb Mon Sep 17 00:00:00 2001 From: Ulrich Hecht Date: Wed, 29 Nov 2017 17:06:45 +0100 Subject: mmc: renesas_sdhi: enable R-Car D3 (r8a77995) support Whitelists for internal DMAC implementation. Signed-off-by: Ulrich Hecht Reviewed-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson Acked-by: Wolfram Sang --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 41cbe84c1d18..396ae8a1c250 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -255,6 +255,7 @@ static const struct soc_device_attribute gen3_soc_whitelist[] = { { .soc_id = "r8a7795", .revision = "ES1.*" }, { .soc_id = "r8a7795", .revision = "ES2.0" }, { .soc_id = "r8a7796", .revision = "ES1.0" }, + { .soc_id = "r8a77995", .revision = "ES1.0" }, { /* sentinel */ } }; -- cgit v1.2.3 From 043f2dca367c752bf8a570130f4f0ace4b4be4a8 Mon Sep 17 00:00:00 2001 From: Milan Stevanovic Date: Tue, 28 Nov 2017 01:02:57 +0100 Subject: mmc: sdhci-of-arasan: Add sdhci_arasan_set_power The power register needs to have a valid voltage set even when the power supply is managed by an external regulator. Signed-off-by: Milan Stevanovic Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 0720ea717011..fb572066a88b 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -262,6 +262,17 @@ static int sdhci_arasan_voltage_switch(struct mmc_host *mmc, return -EINVAL; } +static void sdhci_arasan_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + if (!IS_ERR(host->mmc->supply.vmmc)) { + struct mmc_host *mmc = host->mmc; + + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + } + sdhci_set_power_noreg(host, mode, vdd); +} + static const struct sdhci_ops sdhci_arasan_ops = { .set_clock = sdhci_arasan_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, @@ -269,6 +280,7 @@ static const struct sdhci_ops sdhci_arasan_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_arasan_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_arasan_set_power, }; static const struct sdhci_pltfm_data sdhci_arasan_pdata = { -- cgit v1.2.3 From a5b97be2a7bbfc20d75f51f0969f102015edab6d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Nov 2017 12:53:22 +0100 Subject: mmc_test: use ktime_get_ts64 for timestamps Calling getnstimeofday() can suffer from time jumps and from the y2038 overflow, so it is not appropriate here. Using ktime_get_ts64() solves both problems. Using ktime_get() with ktime_t timestamps would also work, but it seems that we mainly want to print the times as seconds+nanoseconds, so it would require an extra division in the output. Signed-off-by: Arnd Bergmann Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_test.c | 88 ++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 9311c8de2061..f96bbb8014e1 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -101,7 +101,7 @@ struct mmc_test_transfer_result { struct list_head link; unsigned int count; unsigned int sectors; - struct timespec ts; + struct timespec64 ts; unsigned int rate; unsigned int iops; }; @@ -510,14 +510,11 @@ static int mmc_test_map_sg_max_scatter(struct mmc_test_mem *mem, /* * Calculate transfer rate in bytes per second. */ -static unsigned int mmc_test_rate(uint64_t bytes, struct timespec *ts) +static unsigned int mmc_test_rate(uint64_t bytes, struct timespec64 *ts) { uint64_t ns; - ns = ts->tv_sec; - ns *= 1000000000; - ns += ts->tv_nsec; - + ns = timespec64_to_ns(ts); bytes *= 1000000000; while (ns > UINT_MAX) { @@ -537,7 +534,7 @@ static unsigned int mmc_test_rate(uint64_t bytes, struct timespec *ts) * Save transfer results for future usage */ static void mmc_test_save_transfer_result(struct mmc_test_card *test, - unsigned int count, unsigned int sectors, struct timespec ts, + unsigned int count, unsigned int sectors, struct timespec64 ts, unsigned int rate, unsigned int iops) { struct mmc_test_transfer_result *tr; @@ -562,21 +559,21 @@ static void mmc_test_save_transfer_result(struct mmc_test_card *test, * Print the transfer rate. */ static void mmc_test_print_rate(struct mmc_test_card *test, uint64_t bytes, - struct timespec *ts1, struct timespec *ts2) + struct timespec64 *ts1, struct timespec64 *ts2) { unsigned int rate, iops, sectors = bytes >> 9; - struct timespec ts; + struct timespec64 ts; - ts = timespec_sub(*ts2, *ts1); + ts = timespec64_sub(*ts2, *ts1); rate = mmc_test_rate(bytes, &ts); iops = mmc_test_rate(100, &ts); /* I/O ops per sec x 100 */ - pr_info("%s: Transfer of %u sectors (%u%s KiB) took %lu.%09lu " + pr_info("%s: Transfer of %u sectors (%u%s KiB) took %llu.%09u " "seconds (%u kB/s, %u KiB/s, %u.%02u IOPS)\n", mmc_hostname(test->card->host), sectors, sectors >> 1, - (sectors & 1 ? ".5" : ""), (unsigned long)ts.tv_sec, - (unsigned long)ts.tv_nsec, rate / 1000, rate / 1024, + (sectors & 1 ? ".5" : ""), (u64)ts.tv_sec, + (u32)ts.tv_nsec, rate / 1000, rate / 1024, iops / 100, iops % 100); mmc_test_save_transfer_result(test, 1, sectors, ts, rate, iops); @@ -586,24 +583,24 @@ static void mmc_test_print_rate(struct mmc_test_card *test, uint64_t bytes, * Print the average transfer rate. */ static void mmc_test_print_avg_rate(struct mmc_test_card *test, uint64_t bytes, - unsigned int count, struct timespec *ts1, - struct timespec *ts2) + unsigned int count, struct timespec64 *ts1, + struct timespec64 *ts2) { unsigned int rate, iops, sectors = bytes >> 9; uint64_t tot = bytes * count; - struct timespec ts; + struct timespec64 ts; - ts = timespec_sub(*ts2, *ts1); + ts = timespec64_sub(*ts2, *ts1); rate = mmc_test_rate(tot, &ts); iops = mmc_test_rate(count * 100, &ts); /* I/O ops per sec x 100 */ pr_info("%s: Transfer of %u x %u sectors (%u x %u%s KiB) took " - "%lu.%09lu seconds (%u kB/s, %u KiB/s, " + "%llu.%09u seconds (%u kB/s, %u KiB/s, " "%u.%02u IOPS, sg_len %d)\n", mmc_hostname(test->card->host), count, sectors, count, sectors >> 1, (sectors & 1 ? ".5" : ""), - (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec, + (u64)ts.tv_sec, (u32)ts.tv_nsec, rate / 1000, rate / 1024, iops / 100, iops % 100, test->area.sg_len); @@ -1444,7 +1441,7 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, int max_scatter, int timed, int count, bool nonblock, int min_sg_len) { - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret = 0; int i; struct mmc_test_area *t = &test->area; @@ -1470,7 +1467,7 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, return ret; if (timed) - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); if (nonblock) ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len, dev_addr, t->blocks, 512, write, count); @@ -1484,7 +1481,7 @@ static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz, return ret; if (timed) - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); if (timed) mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2); @@ -1742,7 +1739,7 @@ static int mmc_test_profile_trim_perf(struct mmc_test_card *test) struct mmc_test_area *t = &test->area; unsigned long sz; unsigned int dev_addr; - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret; if (!mmc_can_trim(test->card)) @@ -1753,19 +1750,19 @@ static int mmc_test_profile_trim_perf(struct mmc_test_card *test) for (sz = 512; sz < t->max_sz; sz <<= 1) { dev_addr = t->dev_addr + (sz >> 9); - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); ret = mmc_erase(test->card, dev_addr, sz >> 9, MMC_TRIM_ARG); if (ret) return ret; - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_rate(test, sz, &ts1, &ts2); } dev_addr = t->dev_addr; - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); ret = mmc_erase(test->card, dev_addr, sz >> 9, MMC_TRIM_ARG); if (ret) return ret; - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_rate(test, sz, &ts1, &ts2); return 0; } @@ -1774,19 +1771,19 @@ static int mmc_test_seq_read_perf(struct mmc_test_card *test, unsigned long sz) { struct mmc_test_area *t = &test->area; unsigned int dev_addr, i, cnt; - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret; cnt = t->max_sz / sz; dev_addr = t->dev_addr; - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); for (i = 0; i < cnt; i++) { ret = mmc_test_area_io(test, sz, dev_addr, 0, 0, 0); if (ret) return ret; dev_addr += (sz >> 9); } - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_avg_rate(test, sz, cnt, &ts1, &ts2); return 0; } @@ -1813,7 +1810,7 @@ static int mmc_test_seq_write_perf(struct mmc_test_card *test, unsigned long sz) { struct mmc_test_area *t = &test->area; unsigned int dev_addr, i, cnt; - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret; ret = mmc_test_area_erase(test); @@ -1821,14 +1818,14 @@ static int mmc_test_seq_write_perf(struct mmc_test_card *test, unsigned long sz) return ret; cnt = t->max_sz / sz; dev_addr = t->dev_addr; - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); for (i = 0; i < cnt; i++) { ret = mmc_test_area_io(test, sz, dev_addr, 1, 0, 0); if (ret) return ret; dev_addr += (sz >> 9); } - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_avg_rate(test, sz, cnt, &ts1, &ts2); return 0; } @@ -1859,7 +1856,7 @@ static int mmc_test_profile_seq_trim_perf(struct mmc_test_card *test) struct mmc_test_area *t = &test->area; unsigned long sz; unsigned int dev_addr, i, cnt; - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret; if (!mmc_can_trim(test->card)) @@ -1877,7 +1874,7 @@ static int mmc_test_profile_seq_trim_perf(struct mmc_test_card *test) return ret; cnt = t->max_sz / sz; dev_addr = t->dev_addr; - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); for (i = 0; i < cnt; i++) { ret = mmc_erase(test->card, dev_addr, sz >> 9, MMC_TRIM_ARG); @@ -1885,7 +1882,7 @@ static int mmc_test_profile_seq_trim_perf(struct mmc_test_card *test) return ret; dev_addr += (sz >> 9); } - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_avg_rate(test, sz, cnt, &ts1, &ts2); } return 0; @@ -1907,7 +1904,7 @@ static int mmc_test_rnd_perf(struct mmc_test_card *test, int write, int print, { unsigned int dev_addr, cnt, rnd_addr, range1, range2, last_ea = 0, ea; unsigned int ssz; - struct timespec ts1, ts2, ts; + struct timespec64 ts1, ts2, ts; int ret; ssz = sz >> 9; @@ -1916,10 +1913,10 @@ static int mmc_test_rnd_perf(struct mmc_test_card *test, int write, int print, range1 = rnd_addr / test->card->pref_erase; range2 = range1 / ssz; - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); for (cnt = 0; cnt < UINT_MAX; cnt++) { - getnstimeofday(&ts2); - ts = timespec_sub(ts2, ts1); + ktime_get_ts64(&ts2); + ts = timespec64_sub(ts2, ts1); if (ts.tv_sec >= 10) break; ea = mmc_test_rnd_num(range1); @@ -1993,7 +1990,7 @@ static int mmc_test_seq_perf(struct mmc_test_card *test, int write, { struct mmc_test_area *t = &test->area; unsigned int dev_addr, i, cnt, sz, ssz; - struct timespec ts1, ts2; + struct timespec64 ts1, ts2; int ret; sz = t->max_tfr; @@ -2020,7 +2017,7 @@ static int mmc_test_seq_perf(struct mmc_test_card *test, int write, cnt = tot_sz / sz; dev_addr &= 0xffff0000; /* Round to 64MiB boundary */ - getnstimeofday(&ts1); + ktime_get_ts64(&ts1); for (i = 0; i < cnt; i++) { ret = mmc_test_area_io(test, sz, dev_addr, write, max_scatter, 0); @@ -2028,7 +2025,7 @@ static int mmc_test_seq_perf(struct mmc_test_card *test, int write, return ret; dev_addr += ssz; } - getnstimeofday(&ts2); + ktime_get_ts64(&ts2); mmc_test_print_avg_rate(test, sz, cnt, &ts1, &ts2); @@ -3052,10 +3049,9 @@ static int mtf_test_show(struct seq_file *sf, void *data) seq_printf(sf, "Test %d: %d\n", gr->testcase + 1, gr->result); list_for_each_entry(tr, &gr->tr_lst, link) { - seq_printf(sf, "%u %d %lu.%09lu %u %u.%02u\n", + seq_printf(sf, "%u %d %llu.%09u %u %u.%02u\n", tr->count, tr->sectors, - (unsigned long)tr->ts.tv_sec, - (unsigned long)tr->ts.tv_nsec, + (u64)tr->ts.tv_sec, (u32)tr->ts.tv_nsec, tr->rate, tr->iops / 100, tr->iops % 100); } } -- cgit v1.2.3 From 34597a3f60b1639ec8da440ec12afbfd057fb885 Mon Sep 17 00:00:00 2001 From: Shah Nehal-Bakulchandra Date: Fri, 1 Dec 2017 15:38:52 +0530 Subject: mmc: sdhci-acpi: Add support for ACPI HID of AMD Controller with HS400 This patch supports HS400 for AMD upcoming emmc 5.0 controller.The HS400 and HS200 mode requires hardware work around also. This patch adds the quirks for the same. Signed-off-by: Nehal-bakulchandra Shah Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 79 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index b988997a1e80..1b1ce804d2d7 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -446,6 +446,83 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_qcom_sd = { .caps = MMC_CAP_NONREMOVABLE, }; +/* AMD sdhci reset dll register. */ +#define SDHCI_AMD_RESET_DLL_REGISTER 0x908 + +static int amd_select_drive_strength(struct mmc_card *card, + unsigned int max_dtr, int host_drv, + int card_drv, int *drv_type) +{ + return MMC_SET_DRIVER_TYPE_A; +} + +static void sdhci_acpi_amd_hs400_dll(struct sdhci_host *host) +{ + /* AMD Platform requires dll setting */ + sdhci_writel(host, 0x40003210, SDHCI_AMD_RESET_DLL_REGISTER); + usleep_range(10, 20); + sdhci_writel(host, 0x40033210, SDHCI_AMD_RESET_DLL_REGISTER); +} + +/* + * For AMD Platform it is required to disable the tuning + * bit first controller to bring to HS Mode from HS200 + * mode, later enable to tune to HS400 mode. + */ +static void amd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + unsigned int old_timing = host->timing; + + sdhci_set_ios(mmc, ios); + if (old_timing == MMC_TIMING_MMC_HS200 && + ios->timing == MMC_TIMING_MMC_HS) + sdhci_writew(host, 0x9, SDHCI_HOST_CONTROL2); + if (old_timing != MMC_TIMING_MMC_HS400 && + ios->timing == MMC_TIMING_MMC_HS400) { + sdhci_writew(host, 0x80, SDHCI_HOST_CONTROL2); + sdhci_acpi_amd_hs400_dll(host); + } +} + +static const struct sdhci_ops sdhci_acpi_ops_amd = { + .set_clock = sdhci_set_clock, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, +}; + +static const struct sdhci_acpi_chip sdhci_acpi_chip_amd = { + .ops = &sdhci_acpi_ops_amd, +}; + +static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev, + const char *hid, const char *uid) +{ + struct sdhci_acpi_host *c = platform_get_drvdata(pdev); + struct sdhci_host *host = c->host; + + sdhci_read_caps(host); + if (host->caps1 & SDHCI_SUPPORT_DDR50) + host->mmc->caps = MMC_CAP_1_8V_DDR; + + if ((host->caps1 & SDHCI_SUPPORT_SDR104) && + (host->mmc->caps & MMC_CAP_1_8V_DDR)) + host->mmc->caps2 = MMC_CAP2_HS400_1_8V; + + host->mmc_host_ops.select_drive_strength = amd_select_drive_strength; + host->mmc_host_ops.set_ios = amd_set_ios; + return 0; +} + +static const struct sdhci_acpi_slot sdhci_acpi_slot_amd_emmc = { + .chip = &sdhci_acpi_chip_amd, + .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE, + .quirks = SDHCI_QUIRK_32BIT_DMA_ADDR | SDHCI_QUIRK_32BIT_DMA_SIZE | + SDHCI_QUIRK_32BIT_ADMA_SIZE, + .probe_slot = sdhci_acpi_emmc_amd_probe_slot, +}; + struct sdhci_acpi_uid_slot { const char *hid; const char *uid; @@ -469,6 +546,7 @@ static const struct sdhci_acpi_uid_slot sdhci_acpi_uids[] = { { "PNP0D40" }, { "QCOM8051", NULL, &sdhci_acpi_slot_qcom_sd_3v }, { "QCOM8052", NULL, &sdhci_acpi_slot_qcom_sd }, + { "AMDI0040", NULL, &sdhci_acpi_slot_amd_emmc }, { }, }; @@ -485,6 +563,7 @@ static const struct acpi_device_id sdhci_acpi_ids[] = { { "PNP0D40" }, { "QCOM8051" }, { "QCOM8052" }, + { "AMDI0040" }, { }, }; MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids); -- cgit v1.2.3 From d2383318c5a626312d166217e3788e54b1650c56 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Dec 2017 14:55:30 +0200 Subject: mmc: core: Ensure cmd_completion is initialized mmc_test now uses mmc_start_request() to test sending commands during "ongoing" asynchronous transfers, i.e. tests: Commands during non-blocking read - use Set Block Count (CMD23) Commands during non-blocking write - use Set Block Count (CMD23) mmc_start_request() was not initializing cmd_completion, but cmd_completion is used by "ongoing" transfers, so move initialization of cmd_completion into making mmc_start_request(). Fixes: cb39f61e9b1e ("mmc: core: Export a few functions needed for blkmq support") Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 617802f45386..455abbf4f41e 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -348,6 +348,8 @@ int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) { int err; + init_completion(&mrq->cmd_completion); + mmc_retune_hold(host); if (mmc_card_removed(host->card)) @@ -418,8 +420,6 @@ static int __mmc_start_data_req(struct mmc_host *host, struct mmc_request *mrq) mrq->done = mmc_wait_data_done; mrq->host = host; - init_completion(&mrq->cmd_completion); - err = mmc_start_request(host, mrq); if (err) { mrq->cmd->error = err; @@ -439,8 +439,6 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq) init_completion(&mrq->completion); mrq->done = mmc_wait_done; - init_completion(&mrq->cmd_completion); - err = mmc_start_request(host, mrq); if (err) { mrq->cmd->error = err; -- cgit v1.2.3 From 23a185254ace8e63dc4ca36e0315aed9440ae749 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Dec 2017 14:55:31 +0200 Subject: mmc: mmc_test: Ensure command queue is disabled for testing mmc_test disables the command queue because none of the tests use the command queue. However the Reset Test will re-enable it, so disable it in that case too. Fixes: 9d4579a85c84 ("mmc: mmc_test: Disable Command Queue while mmc_test is used") Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_test.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index f96bbb8014e1..ef18daeaa4cc 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -2320,10 +2320,17 @@ static int mmc_test_reset(struct mmc_test_card *test) int err; err = mmc_hw_reset(host); - if (!err) + if (!err) { + /* + * Reset will re-enable the card's command queue, but tests + * expect it to be disabled. + */ + if (card->ext_csd.cmdq_en) + mmc_cmdq_disable(card); return RESULT_OK; - else if (err == -EOPNOTSUPP) + } else if (err == -EOPNOTSUPP) { return RESULT_UNSUP_HOST; + } return RESULT_FAIL; } -- cgit v1.2.3 From f2bc600008bd6f7f5d0b6b56238d14f95cd454d2 Mon Sep 17 00:00:00 2001 From: "yinbo.zhu" Date: Fri, 1 Dec 2017 15:09:34 +0800 Subject: mmc: sdhci-of-esdhc: fix the mmc error after sleep on ls1046ardb When system wakes up from sleep on ls1046ardb, the SD operation fails with mmc error messages since ESDHC_TB_EN bit couldn't be cleaned by eSDHC_SYSCTL[RSTA]. It's proper to clean this bit in esdhc_reset() rather than in probe. Signed-off-by: yinbo.zhu Acked-by: Yangbo Lu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-esdhc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index d74030f3bd12..4ffa6b173a21 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -589,10 +589,18 @@ static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width) static void esdhc_reset(struct sdhci_host *host, u8 mask) { + u32 val; + sdhci_reset(host, mask); sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); + + if (mask & SDHCI_RESET_ALL) { + val = sdhci_readl(host, ESDHC_TBCTL); + val &= ~ESDHC_TB_EN; + sdhci_writel(host, val, ESDHC_TBCTL); + } } /* The SCFG, Supplemental Configuration Unit, provides SoC specific @@ -785,10 +793,6 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host) pltfm_host = sdhci_priv(host); esdhc = sdhci_pltfm_priv(pltfm_host); - val = sdhci_readl(host, ESDHC_TBCTL); - val &= ~ESDHC_TB_EN; - sdhci_writel(host, val, ESDHC_TBCTL); - host_ver = sdhci_readw(host, SDHCI_HOST_VERSION); esdhc->vendor_ver = (host_ver & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT; -- cgit v1.2.3 From 1bec43a3b181baebdf8a4cd739b480a9132601d7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:17 +0200 Subject: mmc: core: Remove option not to use blk-mq Remove config option MMC_MQ_DEFAULT and parameter mmc_use_blk_mq, so that blk-mq must be used always. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/Kconfig | 10 ---------- drivers/mmc/core/core.c | 7 ------- drivers/mmc/core/core.h | 2 -- drivers/mmc/core/host.c | 2 -- drivers/mmc/core/host.h | 2 +- 5 files changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig index 42565562577c..ec21388311db 100644 --- a/drivers/mmc/Kconfig +++ b/drivers/mmc/Kconfig @@ -12,16 +12,6 @@ menuconfig MMC If you want MMC/SD/SDIO support, you should say Y here and also to your specific host controller driver. -config MMC_MQ_DEFAULT - bool "MMC: use blk-mq I/O path by default" - depends on MMC && BLOCK - default y - ---help--- - This option enables the new blk-mq based I/O path for MMC block - devices by default. With the option the mmc_core.use_blk_mq - module/boot option defaults to Y, without it to N, but it can - still be overridden either way. - if MMC source "drivers/mmc/core/Kconfig" diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 455abbf4f41e..2a137976107f 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -66,13 +66,6 @@ static const unsigned freqs[] = { 400000, 300000, 200000, 100000 }; bool use_spi_crc = 1; module_param(use_spi_crc, bool, 0); -#ifdef CONFIG_MMC_MQ_DEFAULT -bool mmc_use_blk_mq = true; -#else -bool mmc_use_blk_mq = false; -#endif -module_param_named(use_blk_mq, mmc_use_blk_mq, bool, S_IWUSR | S_IRUGO); - static int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay) { diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 136617d2f971..3e3d21304e5f 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -35,8 +35,6 @@ struct mmc_bus_ops { int (*reset)(struct mmc_host *); }; -extern bool mmc_use_blk_mq; - void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops); void mmc_detach_bus(struct mmc_host *host); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 409a68a96a0a..64b03d6eaf18 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -404,8 +404,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) host->fixed_drv_type = -EINVAL; - host->use_blk_mq = mmc_use_blk_mq; - return host; } diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 8ca284e079e3..6d896869e5c6 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -81,7 +81,7 @@ static inline bool mmc_card_hs400es(struct mmc_card *card) static inline bool mmc_host_use_blk_mq(struct mmc_host *host) { - return host->use_blk_mq; + return true; } #endif -- cgit v1.2.3 From 0fbfd12518303e9b32ac9fd231439459eac848f9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:18 +0200 Subject: mmc: block: Remove code no longer needed after the switch to blk-mq Remove code no longer needed after the switch to blk-mq. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 723 +---------------------------------------------- drivers/mmc/core/block.h | 2 - drivers/mmc/core/queue.c | 240 +--------------- drivers/mmc/core/queue.h | 15 - 4 files changed, 16 insertions(+), 964 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ab384ba6cb37..579fc0bd722f 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -967,8 +967,7 @@ static inline bool mmc_blk_in_tran_state(u32 status) } static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, - bool hw_busy_detect, struct request *req, - u32 *resp_errs) + struct request *req, u32 *resp_errs) { unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms); int err = 0; @@ -988,11 +987,6 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, if (resp_errs) *resp_errs |= status; - /* We may rely on the host hw to handle busy detection.*/ - if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) && - hw_busy_detect) - break; - /* * Timeout if the device never becomes ready for data and never * leaves the program state. @@ -1014,243 +1008,6 @@ static int card_busy_detect(struct mmc_card *card, unsigned int timeout_ms, return err; } -static int card_busy_detect_err(struct mmc_card *card, unsigned int timeout_ms, - bool hw_busy_detect, struct request *req, - bool *gen_err) -{ - u32 resp_errs = 0; - int err; - - err = card_busy_detect(card, timeout_ms, hw_busy_detect, req, - &resp_errs); - if (resp_errs & R1_ERROR) { - pr_err("%s: %s: error sending status cmd, status %#x\n", - req->rq_disk->disk_name, __func__, resp_errs); - *gen_err = true; - } - - return err; -} - -static int send_stop(struct mmc_card *card, unsigned int timeout_ms, - struct request *req, bool *gen_err, u32 *stop_status) -{ - struct mmc_host *host = card->host; - struct mmc_command cmd = {}; - int err; - bool use_r1b_resp = rq_data_dir(req) == WRITE; - - /* - * Normally we use R1B responses for WRITE, but in cases where the host - * has specified a max_busy_timeout we need to validate it. A failure - * means we need to prevent the host from doing hw busy detection, which - * is done by converting to a R1 response instead. - */ - if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) - use_r1b_resp = false; - - cmd.opcode = MMC_STOP_TRANSMISSION; - if (use_r1b_resp) { - cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC; - cmd.busy_timeout = timeout_ms; - } else { - cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC; - } - - err = mmc_wait_for_cmd(host, &cmd, 5); - if (err) - return err; - - *stop_status = cmd.resp[0]; - - /* No need to check card status in case of READ. */ - if (rq_data_dir(req) == READ) - return 0; - - if (!mmc_host_is_spi(host) && - (*stop_status & R1_ERROR)) { - pr_err("%s: %s: general error sending stop command, resp %#x\n", - req->rq_disk->disk_name, __func__, *stop_status); - *gen_err = true; - } - - return card_busy_detect_err(card, timeout_ms, use_r1b_resp, req, - gen_err); -} - -#define ERR_NOMEDIUM 3 -#define ERR_RETRY 2 -#define ERR_ABORT 1 -#define ERR_CONTINUE 0 - -static int mmc_blk_cmd_error(struct request *req, const char *name, int error, - bool status_valid, u32 status) -{ - switch (error) { - case -EILSEQ: - /* response crc error, retry the r/w cmd */ - pr_err("%s: %s sending %s command, card status %#x\n", - req->rq_disk->disk_name, "response CRC error", - name, status); - return ERR_RETRY; - - case -ETIMEDOUT: - pr_err("%s: %s sending %s command, card status %#x\n", - req->rq_disk->disk_name, "timed out", name, status); - - /* If the status cmd initially failed, retry the r/w cmd */ - if (!status_valid) { - pr_err("%s: status not valid, retrying timeout\n", - req->rq_disk->disk_name); - return ERR_RETRY; - } - - /* - * If it was a r/w cmd crc error, or illegal command - * (eg, issued in wrong state) then retry - we should - * have corrected the state problem above. - */ - if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) { - pr_err("%s: command error, retrying timeout\n", - req->rq_disk->disk_name); - return ERR_RETRY; - } - - /* Otherwise abort the command */ - return ERR_ABORT; - - default: - /* We don't understand the error code the driver gave us */ - pr_err("%s: unknown error %d sending read/write command, card status %#x\n", - req->rq_disk->disk_name, error, status); - return ERR_ABORT; - } -} - -/* - * Initial r/w and stop cmd error recovery. - * We don't know whether the card received the r/w cmd or not, so try to - * restore things back to a sane state. Essentially, we do this as follows: - * - Obtain card status. If the first attempt to obtain card status fails, - * the status word will reflect the failed status cmd, not the failed - * r/w cmd. If we fail to obtain card status, it suggests we can no - * longer communicate with the card. - * - Check the card state. If the card received the cmd but there was a - * transient problem with the response, it might still be in a data transfer - * mode. Try to send it a stop command. If this fails, we can't recover. - * - If the r/w cmd failed due to a response CRC error, it was probably - * transient, so retry the cmd. - * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry. - * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or - * illegal cmd, retry. - * Otherwise we don't understand what happened, so abort. - */ -static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req, - struct mmc_blk_request *brq, bool *ecc_err, bool *gen_err) -{ - bool prev_cmd_status_valid = true; - u32 status, stop_status = 0; - int err, retry; - - if (mmc_card_removed(card)) - return ERR_NOMEDIUM; - - /* - * Try to get card status which indicates both the card state - * and why there was no response. If the first attempt fails, - * we can't be sure the returned status is for the r/w command. - */ - for (retry = 2; retry >= 0; retry--) { - err = __mmc_send_status(card, &status, 0); - if (!err) - break; - - /* Re-tune if needed */ - mmc_retune_recheck(card->host); - - prev_cmd_status_valid = false; - pr_err("%s: error %d sending status command, %sing\n", - req->rq_disk->disk_name, err, retry ? "retry" : "abort"); - } - - /* We couldn't get a response from the card. Give up. */ - if (err) { - /* Check if the card is removed */ - if (mmc_detect_card_removed(card->host)) - return ERR_NOMEDIUM; - return ERR_ABORT; - } - - /* Flag ECC errors */ - if ((status & R1_CARD_ECC_FAILED) || - (brq->stop.resp[0] & R1_CARD_ECC_FAILED) || - (brq->cmd.resp[0] & R1_CARD_ECC_FAILED)) - *ecc_err = true; - - /* Flag General errors */ - if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) - if ((status & R1_ERROR) || - (brq->stop.resp[0] & R1_ERROR)) { - pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n", - req->rq_disk->disk_name, __func__, - brq->stop.resp[0], status); - *gen_err = true; - } - - /* - * Check the current card state. If it is in some data transfer - * mode, tell it to stop (and hopefully transition back to TRAN.) - */ - if (R1_CURRENT_STATE(status) == R1_STATE_DATA || - R1_CURRENT_STATE(status) == R1_STATE_RCV) { - unsigned int timeout; - - timeout = mmc_blk_data_timeout_ms(card->host, &brq->data); - err = send_stop(card, timeout, req, gen_err, &stop_status); - if (err) { - pr_err("%s: error %d sending stop command\n", - req->rq_disk->disk_name, err); - /* - * If the stop cmd also timed out, the card is probably - * not present, so abort. Other errors are bad news too. - */ - return ERR_ABORT; - } - - if (stop_status & R1_CARD_ECC_FAILED) - *ecc_err = true; - } - - /* Check for set block count errors */ - if (brq->sbc.error) - return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error, - prev_cmd_status_valid, status); - - /* Check for r/w command errors */ - if (brq->cmd.error) - return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error, - prev_cmd_status_valid, status); - - /* Data errors */ - if (!brq->stop.error) - return ERR_CONTINUE; - - /* Now for stop errors. These aren't fatal to the transfer. */ - pr_info("%s: error %d sending stop command, original cmd response %#x, card status %#x\n", - req->rq_disk->disk_name, brq->stop.error, - brq->cmd.resp[0], status); - - /* - * Subsitute in our own stop status as this will give the error - * state which happened during the execution of the r/w command. - */ - if (stop_status) { - brq->stop.resp[0] = stop_status; - brq->stop.error = 0; - } - return ERR_CONTINUE; -} - static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host, int type) { @@ -1285,14 +1042,6 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) md->reset_done &= ~type; } -static void mmc_blk_end_request(struct request *req, blk_status_t error) -{ - if (req->mq_ctx) - blk_mq_end_request(req, error); - else - blk_end_request_all(req, error); -} - /* * The non-block commands come back from the block layer after it queued it and * processed it with all other requests and then they get issued in this @@ -1354,7 +1103,7 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) break; } mq_rq->drv_op_result = ret; - mmc_blk_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); + blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); } static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) @@ -1397,7 +1146,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req) else mmc_blk_reset_success(md, type); fail: - mmc_blk_end_request(req, status); + blk_mq_end_request(req, status); } static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, @@ -1467,7 +1216,7 @@ out_retry: if (!err) mmc_blk_reset_success(md, type); out: - mmc_blk_end_request(req, status); + blk_mq_end_request(req, status); } static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) @@ -1477,7 +1226,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) int ret = 0; ret = mmc_flush_cache(card); - mmc_blk_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); + blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK); } /* @@ -1557,116 +1306,6 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) } } -static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, - struct mmc_async_req *areq) -{ - struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req, - areq); - struct mmc_blk_request *brq = &mq_mrq->brq; - struct request *req = mmc_queue_req_to_req(mq_mrq); - int need_retune = card->host->need_retune; - bool ecc_err = false; - bool gen_err = false; - - /* - * sbc.error indicates a problem with the set block count - * command. No data will have been transferred. - * - * cmd.error indicates a problem with the r/w command. No - * data will have been transferred. - * - * stop.error indicates a problem with the stop command. Data - * may have been transferred, or may still be transferring. - */ - - mmc_blk_eval_resp_error(brq); - - if (brq->sbc.error || brq->cmd.error || - brq->stop.error || brq->data.error) { - switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) { - case ERR_RETRY: - return MMC_BLK_RETRY; - case ERR_ABORT: - return MMC_BLK_ABORT; - case ERR_NOMEDIUM: - return MMC_BLK_NOMEDIUM; - case ERR_CONTINUE: - break; - } - } - - /* - * Check for errors relating to the execution of the - * initial command - such as address errors. No data - * has been transferred. - */ - if (brq->cmd.resp[0] & CMD_ERRORS) { - pr_err("%s: r/w command failed, status = %#x\n", - req->rq_disk->disk_name, brq->cmd.resp[0]); - return MMC_BLK_ABORT; - } - - /* - * Everything else is either success, or a data error of some - * kind. If it was a write, we may have transitioned to - * program mode, which we have to wait for it to complete. - */ - if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) { - int err; - - /* Check stop command response */ - if (brq->stop.resp[0] & R1_ERROR) { - pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n", - req->rq_disk->disk_name, __func__, - brq->stop.resp[0]); - gen_err = true; - } - - err = card_busy_detect_err(card, MMC_BLK_TIMEOUT_MS, false, req, - &gen_err); - if (err) - return MMC_BLK_CMD_ERR; - } - - /* if general error occurs, retry the write operation. */ - if (gen_err) { - pr_warn("%s: retrying write for general error\n", - req->rq_disk->disk_name); - return MMC_BLK_RETRY; - } - - /* Some errors (ECC) are flagged on the next commmand, so check stop, too */ - if (brq->data.error || brq->stop.error) { - if (need_retune && !brq->retune_retry_done) { - pr_debug("%s: retrying because a re-tune was needed\n", - req->rq_disk->disk_name); - brq->retune_retry_done = 1; - return MMC_BLK_RETRY; - } - pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n", - req->rq_disk->disk_name, brq->data.error ?: brq->stop.error, - (unsigned)blk_rq_pos(req), - (unsigned)blk_rq_sectors(req), - brq->cmd.resp[0], brq->stop.resp[0]); - - if (rq_data_dir(req) == READ) { - if (ecc_err) - return MMC_BLK_ECC_ERR; - return MMC_BLK_DATA_ERR; - } else { - return MMC_BLK_CMD_ERR; - } - } - - if (!brq->data.bytes_xfered) - return MMC_BLK_RETRY; - - if (blk_rq_bytes(req) != brq->data.bytes_xfered) - return MMC_BLK_PARTIAL; - - return MMC_BLK_SUCCESS; -} - static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, int disable_multi, bool *do_rel_wr_p, bool *do_data_tag_p) @@ -1782,8 +1421,6 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, brq->data.sg_len = i; } - mqrq->areq.mrq = &brq->mrq; - if (do_rel_wr_p) *do_rel_wr_p = do_rel_wr; @@ -1987,8 +1624,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC; brq->mrq.sbc = &brq->sbc; } - - mqrq->areq.err_check = mmc_blk_err_check; } #define MMC_MAX_RETRIES 5 @@ -2018,7 +1653,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req) mmc_blk_send_stop(card, timeout); - err = card_busy_detect(card, timeout, false, req, NULL); + err = card_busy_detect(card, timeout, req, NULL); mmc_retune_release(card->host); @@ -2242,7 +1877,7 @@ static int mmc_blk_card_busy(struct mmc_card *card, struct request *req) if (mmc_host_is_spi(card->host) || rq_data_dir(req) == READ) return 0; - err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, false, req, &status); + err = card_busy_detect(card, MMC_BLK_TIMEOUT_MS, req, &status); /* * Do not assume data transferred correctly if there are any error bits @@ -2622,350 +2257,6 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req) } } -static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card, - struct mmc_blk_request *brq, struct request *req, - bool old_req_pending) -{ - bool req_pending; - - /* - * If this is an SD card and we're writing, we can first - * mark the known good sectors as ok. - * - * If the card is not SD, we can still ok written sectors - * as reported by the controller (which might be less than - * the real number of written sectors, but never more). - */ - if (mmc_card_sd(card)) { - u32 blocks; - int err; - - err = mmc_sd_num_wr_blocks(card, &blocks); - if (err) - req_pending = old_req_pending; - else - req_pending = blk_end_request(req, BLK_STS_OK, blocks << 9); - } else { - req_pending = blk_end_request(req, BLK_STS_OK, brq->data.bytes_xfered); - } - return req_pending; -} - -static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card, - struct request *req, - struct mmc_queue_req *mqrq) -{ - if (mmc_card_removed(card)) - req->rq_flags |= RQF_QUIET; - while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req))); - mq->qcnt--; -} - -/** - * mmc_blk_rw_try_restart() - tries to restart the current async request - * @mq: the queue with the card and host to restart - * @req: a new request that want to be started after the current one - */ -static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req, - struct mmc_queue_req *mqrq) -{ - if (!req) - return; - - /* - * If the card was removed, just cancel everything and return. - */ - if (mmc_card_removed(mq->card)) { - req->rq_flags |= RQF_QUIET; - blk_end_request_all(req, BLK_STS_IOERR); - mq->qcnt--; /* FIXME: just set to 0? */ - return; - } - /* Else proceed and try to restart the current async request */ - mmc_blk_rw_rq_prep(mqrq, mq->card, 0, mq); - mmc_start_areq(mq->card->host, &mqrq->areq, NULL); -} - -static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) -{ - struct mmc_blk_data *md = mq->blkdata; - struct mmc_card *card = md->queue.card; - struct mmc_blk_request *brq; - int disable_multi = 0, retry = 0, type, retune_retry_done = 0; - enum mmc_blk_status status; - struct mmc_queue_req *mqrq_cur = NULL; - struct mmc_queue_req *mq_rq; - struct request *old_req; - struct mmc_async_req *new_areq; - struct mmc_async_req *old_areq; - bool req_pending = true; - - if (new_req) { - mqrq_cur = req_to_mmc_queue_req(new_req); - mq->qcnt++; - } - - if (!mq->qcnt) - return; - - do { - if (new_req) { - /* - * When 4KB native sector is enabled, only 8 blocks - * multiple read or write is allowed - */ - if (mmc_large_sector(card) && - !IS_ALIGNED(blk_rq_sectors(new_req), 8)) { - pr_err("%s: Transfer size is not 4KB sector size aligned\n", - new_req->rq_disk->disk_name); - mmc_blk_rw_cmd_abort(mq, card, new_req, mqrq_cur); - return; - } - - mmc_blk_rw_rq_prep(mqrq_cur, card, 0, mq); - new_areq = &mqrq_cur->areq; - } else - new_areq = NULL; - - old_areq = mmc_start_areq(card->host, new_areq, &status); - if (!old_areq) { - /* - * We have just put the first request into the pipeline - * and there is nothing more to do until it is - * complete. - */ - return; - } - - /* - * An asynchronous request has been completed and we proceed - * to handle the result of it. - */ - mq_rq = container_of(old_areq, struct mmc_queue_req, areq); - brq = &mq_rq->brq; - old_req = mmc_queue_req_to_req(mq_rq); - type = rq_data_dir(old_req) == READ ? MMC_BLK_READ : MMC_BLK_WRITE; - - switch (status) { - case MMC_BLK_SUCCESS: - case MMC_BLK_PARTIAL: - /* - * Reset success, and accept bytes_xfered. For - * MMC_BLK_PARTIAL re-submit the remaining request. For - * MMC_BLK_SUCCESS error out the remaining request (it - * could not be re-submitted anyway if a next request - * had already begun). - */ - mmc_blk_reset_success(md, type); - - req_pending = blk_end_request(old_req, BLK_STS_OK, - brq->data.bytes_xfered); - /* - * If the blk_end_request function returns non-zero even - * though all data has been transferred and no errors - * were returned by the host controller, it's a bug. - */ - if (status == MMC_BLK_SUCCESS && req_pending) { - pr_err("%s BUG rq_tot %d d_xfer %d\n", - __func__, blk_rq_bytes(old_req), - brq->data.bytes_xfered); - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - return; - } - break; - case MMC_BLK_CMD_ERR: - /* - * For SD cards, get bytes written, but do not accept - * bytes_xfered if that fails. For MMC cards accept - * bytes_xfered. Then try to reset. If reset fails then - * error out the remaining request, otherwise retry - * once (N.B mmc_blk_reset() will not succeed twice in a - * row). - */ - req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending); - if (mmc_blk_reset(md, card->host, type)) { - if (req_pending) - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - else - mq->qcnt--; - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - } - if (!req_pending) { - mq->qcnt--; - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - } - break; - case MMC_BLK_RETRY: - /* - * Do not accept bytes_xfered, but retry up to 5 times, - * otherwise same as abort. - */ - retune_retry_done = brq->retune_retry_done; - if (retry++ < 5) - break; - /* Fall through */ - case MMC_BLK_ABORT: - /* - * Do not accept bytes_xfered, but try to reset. If - * reset succeeds, try once more, otherwise error out - * the request. - */ - if (!mmc_blk_reset(md, card->host, type)) - break; - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - case MMC_BLK_DATA_ERR: { - int err; - - /* - * Do not accept bytes_xfered, but try to reset. If - * reset succeeds, try once more. If reset fails with - * ENODEV which means the partition is wrong, then error - * out the request. Otherwise attempt to read one sector - * at a time. - */ - err = mmc_blk_reset(md, card->host, type); - if (!err) - break; - if (err == -ENODEV) { - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - } - /* Fall through */ - } - case MMC_BLK_ECC_ERR: - /* - * Do not accept bytes_xfered. If reading more than one - * sector, try reading one sector at a time. - */ - if (brq->data.blocks > 1) { - /* Redo read one sector at a time */ - pr_warn("%s: retrying using single block read\n", - old_req->rq_disk->disk_name); - disable_multi = 1; - break; - } - /* - * After an error, we redo I/O one sector at a - * time, so we only reach here after trying to - * read a single sector. - */ - req_pending = blk_end_request(old_req, BLK_STS_IOERR, - brq->data.blksz); - if (!req_pending) { - mq->qcnt--; - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - } - break; - case MMC_BLK_NOMEDIUM: - /* Do not accept bytes_xfered. Error out the request */ - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - default: - /* Do not accept bytes_xfered. Error out the request */ - pr_err("%s: Unhandled return value (%d)", - old_req->rq_disk->disk_name, status); - mmc_blk_rw_cmd_abort(mq, card, old_req, mq_rq); - mmc_blk_rw_try_restart(mq, new_req, mqrq_cur); - return; - } - - if (req_pending) { - /* - * In case of a incomplete request - * prepare it again and resend. - */ - mmc_blk_rw_rq_prep(mq_rq, card, - disable_multi, mq); - mmc_start_areq(card->host, - &mq_rq->areq, NULL); - mq_rq->brq.retune_retry_done = retune_retry_done; - } - } while (req_pending); - - mq->qcnt--; -} - -void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) -{ - int ret; - struct mmc_blk_data *md = mq->blkdata; - struct mmc_card *card = md->queue.card; - - if (req && !mq->qcnt) - /* claim host only for the first request */ - mmc_get_card(card, NULL); - - ret = mmc_blk_part_switch(card, md->part_type); - if (ret) { - if (req) { - blk_end_request_all(req, BLK_STS_IOERR); - } - goto out; - } - - if (req) { - switch (req_op(req)) { - case REQ_OP_DRV_IN: - case REQ_OP_DRV_OUT: - /* - * Complete ongoing async transfer before issuing - * ioctl()s - */ - if (mq->qcnt) - mmc_blk_issue_rw_rq(mq, NULL); - mmc_blk_issue_drv_op(mq, req); - break; - case REQ_OP_DISCARD: - /* - * Complete ongoing async transfer before issuing - * discard. - */ - if (mq->qcnt) - mmc_blk_issue_rw_rq(mq, NULL); - mmc_blk_issue_discard_rq(mq, req); - break; - case REQ_OP_SECURE_ERASE: - /* - * Complete ongoing async transfer before issuing - * secure erase. - */ - if (mq->qcnt) - mmc_blk_issue_rw_rq(mq, NULL); - mmc_blk_issue_secdiscard_rq(mq, req); - break; - case REQ_OP_FLUSH: - /* - * Complete ongoing async transfer before issuing - * flush. - */ - if (mq->qcnt) - mmc_blk_issue_rw_rq(mq, NULL); - mmc_blk_issue_flush(mq, req); - break; - default: - /* Normal request, just issue it */ - mmc_blk_issue_rw_rq(mq, req); - card->host->context_info.is_waiting_last_req = false; - break; - } - } else { - /* No request, flushing the pipeline with NULL */ - mmc_blk_issue_rw_rq(mq, NULL); - card->host->context_info.is_waiting_last_req = false; - } - -out: - if (!mq->qcnt) - mmc_put_card(card, NULL); -} - static inline int mmc_blk_readonly(struct mmc_card *card) { return mmc_card_readonly(card) || diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h index b126418fd163..31153f656f41 100644 --- a/drivers/mmc/core/block.h +++ b/drivers/mmc/core/block.h @@ -5,8 +5,6 @@ struct mmc_queue; struct request; -void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req); - void mmc_blk_cqe_recovery(struct mmc_queue *mq); enum mmc_issued; diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 5db388081789..421fab7250ac 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -24,22 +24,6 @@ #include "card.h" #include "host.h" -/* - * Prepare a MMC request. This just filters out odd stuff. - */ -static int mmc_prep_request(struct request_queue *q, struct request *req) -{ - struct mmc_queue *mq = q->queuedata; - - if (mq && mmc_card_removed(mq->card)) - return BLKPREP_KILL; - - req->rq_flags |= RQF_DONTPREP; - req_to_mmc_queue_req(req)->retries = 0; - - return BLKPREP_OK; -} - static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq) { /* Allow only 1 DCMD at a time */ @@ -181,86 +165,6 @@ static void mmc_mq_recovery_handler(struct work_struct *work) blk_mq_run_hw_queues(q, true); } -static int mmc_queue_thread(void *d) -{ - struct mmc_queue *mq = d; - struct request_queue *q = mq->queue; - struct mmc_context_info *cntx = &mq->card->host->context_info; - - current->flags |= PF_MEMALLOC; - - down(&mq->thread_sem); - do { - struct request *req; - - spin_lock_irq(q->queue_lock); - set_current_state(TASK_INTERRUPTIBLE); - req = blk_fetch_request(q); - mq->asleep = false; - cntx->is_waiting_last_req = false; - cntx->is_new_req = false; - if (!req) { - /* - * Dispatch queue is empty so set flags for - * mmc_request_fn() to wake us up. - */ - if (mq->qcnt) - cntx->is_waiting_last_req = true; - else - mq->asleep = true; - } - spin_unlock_irq(q->queue_lock); - - if (req || mq->qcnt) { - set_current_state(TASK_RUNNING); - mmc_blk_issue_rq(mq, req); - cond_resched(); - } else { - if (kthread_should_stop()) { - set_current_state(TASK_RUNNING); - break; - } - up(&mq->thread_sem); - schedule(); - down(&mq->thread_sem); - } - } while (1); - up(&mq->thread_sem); - - return 0; -} - -/* - * Generic MMC request handler. This is called for any queue on a - * particular host. When the host is not busy, we look for a request - * on any queue on this host, and attempt to issue it. This may - * not be the queue we were asked to process. - */ -static void mmc_request_fn(struct request_queue *q) -{ - struct mmc_queue *mq = q->queuedata; - struct request *req; - struct mmc_context_info *cntx; - - if (!mq) { - while ((req = blk_fetch_request(q)) != NULL) { - req->rq_flags |= RQF_QUIET; - __blk_end_request_all(req, BLK_STS_IOERR); - } - return; - } - - cntx = &mq->card->host->context_info; - - if (cntx->is_waiting_last_req) { - cntx->is_new_req = true; - wake_up_interruptible(&cntx->wait); - } - - if (mq->asleep) - wake_up_process(mq->thread); -} - static struct scatterlist *mmc_alloc_sg(int sg_len, gfp_t gfp) { struct scatterlist *sg; @@ -311,12 +215,6 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req, return 0; } -static int mmc_init_request(struct request_queue *q, struct request *req, - gfp_t gfp) -{ - return __mmc_init_request(q->queuedata, req, gfp); -} - static void mmc_exit_request(struct request_queue *q, struct request *req) { struct mmc_queue_req *mq_rq = req_to_mmc_queue_req(req); @@ -469,9 +367,6 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) blk_queue_max_segments(mq->queue, host->max_segs); blk_queue_max_segment_size(mq->queue, host->max_seg_size); - /* Initialize thread_sem even if it is not used */ - sema_init(&mq->thread_sem, 1); - INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work); @@ -559,51 +454,15 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock, const char *subname) { struct mmc_host *host = card->host; - int ret = -ENOMEM; mq->card = card; mq->use_cqe = host->cqe_enabled; - if (mq->use_cqe || mmc_host_use_blk_mq(host)) - return mmc_mq_init(mq, card, lock); - - mq->queue = blk_alloc_queue(GFP_KERNEL); - if (!mq->queue) - return -ENOMEM; - mq->queue->queue_lock = lock; - mq->queue->request_fn = mmc_request_fn; - mq->queue->init_rq_fn = mmc_init_request; - mq->queue->exit_rq_fn = mmc_exit_request; - mq->queue->cmd_size = sizeof(struct mmc_queue_req); - mq->queue->queuedata = mq; - mq->qcnt = 0; - ret = blk_init_allocated_queue(mq->queue); - if (ret) { - blk_cleanup_queue(mq->queue); - return ret; - } - - blk_queue_prep_rq(mq->queue, mmc_prep_request); - - mmc_setup_queue(mq, card); - - mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d%s", - host->index, subname ? subname : ""); - - if (IS_ERR(mq->thread)) { - ret = PTR_ERR(mq->thread); - goto cleanup_queue; - } - - return 0; - -cleanup_queue: - blk_cleanup_queue(mq->queue); - return ret; + return mmc_mq_init(mq, card, lock); } -static void mmc_mq_queue_suspend(struct mmc_queue *mq) +void mmc_queue_suspend(struct mmc_queue *mq) { blk_mq_quiesce_queue(mq->queue); @@ -615,71 +474,22 @@ static void mmc_mq_queue_suspend(struct mmc_queue *mq) mmc_release_host(mq->card->host); } -static void mmc_mq_queue_resume(struct mmc_queue *mq) +void mmc_queue_resume(struct mmc_queue *mq) { blk_mq_unquiesce_queue(mq->queue); } -static void __mmc_queue_suspend(struct mmc_queue *mq) -{ - struct request_queue *q = mq->queue; - unsigned long flags; - - if (!mq->suspended) { - mq->suspended |= true; - - spin_lock_irqsave(q->queue_lock, flags); - blk_stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - - down(&mq->thread_sem); - } -} - -static void __mmc_queue_resume(struct mmc_queue *mq) -{ - struct request_queue *q = mq->queue; - unsigned long flags; - - if (mq->suspended) { - mq->suspended = false; - - up(&mq->thread_sem); - - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } -} - void mmc_cleanup_queue(struct mmc_queue *mq) { struct request_queue *q = mq->queue; - unsigned long flags; - if (q->mq_ops) { - /* - * The legacy code handled the possibility of being suspended, - * so do that here too. - */ - if (blk_queue_quiesced(q)) - blk_mq_unquiesce_queue(q); - goto out_cleanup; - } - - /* Make sure the queue isn't suspended, as that will deadlock */ - mmc_queue_resume(mq); - - /* Then terminate our worker thread */ - kthread_stop(mq->thread); - - /* Empty the queue */ - spin_lock_irqsave(q->queue_lock, flags); - q->queuedata = NULL; - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); + /* + * The legacy code handled the possibility of being suspended, + * so do that here too. + */ + if (blk_queue_quiesced(q)) + blk_mq_unquiesce_queue(q); -out_cleanup: blk_cleanup_queue(q); /* @@ -692,38 +502,6 @@ out_cleanup: mq->card = NULL; } -/** - * mmc_queue_suspend - suspend a MMC request queue - * @mq: MMC queue to suspend - * - * Stop the block request queue, and wait for our thread to - * complete any outstanding requests. This ensures that we - * won't suspend while a request is being processed. - */ -void mmc_queue_suspend(struct mmc_queue *mq) -{ - struct request_queue *q = mq->queue; - - if (q->mq_ops) - mmc_mq_queue_suspend(mq); - else - __mmc_queue_suspend(mq); -} - -/** - * mmc_queue_resume - resume a previously suspended MMC request queue - * @mq: MMC queue to resume - */ -void mmc_queue_resume(struct mmc_queue *mq) -{ - struct request_queue *q = mq->queue; - - if (q->mq_ops) - mmc_mq_queue_resume(mq); - else - __mmc_queue_resume(mq); -} - /* * Prepare the sg list(s) to be handed of to the host driver */ diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index 34f601c6dd39..17e59d50b496 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -34,7 +34,6 @@ static inline struct request *mmc_queue_req_to_req(struct mmc_queue_req *mqr) return blk_mq_rq_from_pdu(mqr); } -struct task_struct; struct mmc_blk_data; struct mmc_blk_ioc_data; @@ -44,7 +43,6 @@ struct mmc_blk_request { struct mmc_command cmd; struct mmc_command stop; struct mmc_data data; - int retune_retry_done; }; /** @@ -66,7 +64,6 @@ enum mmc_drv_op { struct mmc_queue_req { struct mmc_blk_request brq; struct scatterlist *sg; - struct mmc_async_req areq; enum mmc_drv_op drv_op; int drv_op_result; void *drv_op_data; @@ -76,22 +73,10 @@ struct mmc_queue_req { struct mmc_queue { struct mmc_card *card; - struct task_struct *thread; - struct semaphore thread_sem; struct mmc_ctx ctx; struct blk_mq_tag_set tag_set; - bool suspended; - bool asleep; struct mmc_blk_data *blkdata; struct request_queue *queue; - /* - * FIXME: this counter is not a very reliable way of keeping - * track of how many requests that are ongoing. Switch to just - * letting the block core keep track of requests and per-request - * associated mmc_queue_req data. - */ - int qcnt; - int in_flight[MMC_ISSUE_MAX]; unsigned int cqe_busy; #define MMC_CQE_DCMD_BUSY BIT(0) -- cgit v1.2.3 From 41acc8ec04f32abb16e035ca1c9fe4d52819601e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 11 Dec 2017 02:24:23 +0000 Subject: ASoC: rsnd: don't use runtime->sample_bits Current rsnd driver is judging 16bit/24bit data by using runtime->sample_bits, but it is indicating physical size, not format size. This is confusable code. This patch uses snd_pcm_format_width() to be more correct code. Tested-by: Hiroyuki Yokoyama Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 12 ++++-------- sound/soc/sh/rcar/ssi.c | 9 +++------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 8e50b284230d..a96ebebd96de 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -274,10 +274,10 @@ u32 rsnd_get_adinr_bit(struct rsnd_mod *mod, struct rsnd_dai_stream *io) struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); struct device *dev = rsnd_priv_to_dev(priv); - switch (runtime->sample_bits) { + switch (snd_pcm_format_width(runtime->format)) { case 16: return 8 << 16; - case 32: + case 24: return 0 << 16; } @@ -327,7 +327,7 @@ u32 rsnd_get_dalign(struct rsnd_mod *mod, struct rsnd_dai_stream *io) } /* Non target mod or 24bit data needs normal DALIGN */ - if ((runtime->sample_bits != 16) || + if ((snd_pcm_format_width(runtime->format) != 16) || (mod != target)) return 0x76543210; /* Target mod needs inverted DALIGN when 16bit */ @@ -362,12 +362,8 @@ u32 rsnd_get_busif_shift(struct rsnd_dai_stream *io, struct rsnd_mod *mod) * HW 24bit data is located as 0x******00 * */ - switch (runtime->sample_bits) { - case 16: + if (snd_pcm_format_width(runtime->format) == 16) return 0; - case 32: - break; - } for (i = 0; i < ARRAY_SIZE(playback_mods); i++) { tmod = rsnd_io_to_mod(io, mods[i]); diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index f21202429000..5a70fdc3c680 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -370,11 +370,11 @@ static void rsnd_ssi_config_init(struct rsnd_mod *mod, if (rsnd_io_is_play(io)) cr_own |= TRMD; - switch (runtime->sample_bits) { + switch (snd_pcm_format_width(runtime->format)) { case 16: cr_own |= DWL_16; break; - case 32: + case 24: cr_own |= DWL_24; break; } @@ -677,11 +677,8 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, rsnd_ssi_pointer_offset(mod, io, 0)); int shift = 0; - switch (runtime->sample_bits) { - case 32: + if (snd_pcm_format_width(runtime->format) == 24) shift = 8; - break; - } /* * 8/16/32 data can be assesse to TDR/RDR register -- cgit v1.2.3 From d8d9b9730cd62c9c7d24d5277542da98c09ea728 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 11 Dec 2017 02:40:22 +0000 Subject: ASoC: rsnd: PIO related function cleanup SSI had shared counting pointer position method between PIO/DMA mode before. But now DMA mode is using DMAEngine feature to get it. Thus, this counting pointer position method is needed for only PIO mode. We don't need to share code anymore. This patch names PIO related functions as rsnd_ssi_pio_xxx(), and merged/cleanuped each feature. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssi.c | 153 +++++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 81 deletions(-) diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 5a70fdc3c680..97a9db892a8f 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -79,6 +79,7 @@ struct rsnd_ssi { int irq; unsigned int usrcnt; + /* for PIO */ int byte_pos; int byte_per_period; int next_period_byte; @@ -413,61 +414,6 @@ static void rsnd_ssi_register_setup(struct rsnd_mod *mod) ssi->cr_en); } -static void rsnd_ssi_pointer_init(struct rsnd_mod *mod, - struct rsnd_dai_stream *io) -{ - struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - - ssi->byte_pos = 0; - ssi->byte_per_period = runtime->period_size * - runtime->channels * - samples_to_bytes(runtime, 1); - ssi->next_period_byte = ssi->byte_per_period; -} - -static int rsnd_ssi_pointer_offset(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - int additional) -{ - struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - int pos = ssi->byte_pos + additional; - - pos %= (runtime->periods * ssi->byte_per_period); - - return pos; -} - -static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod, - struct rsnd_dai_stream *io, - int byte) -{ - struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); - bool ret = false; - int byte_pos; - - byte_pos = ssi->byte_pos + byte; - - if (byte_pos >= ssi->next_period_byte) { - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - int period_pos = byte_pos / ssi->byte_per_period; - - ssi->next_period_byte = (period_pos + 1) * ssi->byte_per_period; - - if (period_pos >= runtime->periods) { - byte_pos = 0; - ssi->next_period_byte = ssi->byte_per_period; - } - - ret = true; - } - - WRITE_ONCE(ssi->byte_pos, byte_pos); - - return ret; -} - /* * SSI mod common functions */ @@ -481,8 +427,6 @@ static int rsnd_ssi_init(struct rsnd_mod *mod, if (!rsnd_ssi_is_run_mods(mod, io)) return 0; - rsnd_ssi_pointer_init(mod, io); - ssi->usrcnt++; rsnd_mod_power_on(mod); @@ -653,6 +597,8 @@ static int rsnd_ssi_irq(struct rsnd_mod *mod, return 0; } +static bool rsnd_ssi_pio_interrupt(struct rsnd_mod *mod, + struct rsnd_dai_stream *io); static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, struct rsnd_dai_stream *io) { @@ -671,27 +617,8 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, status = rsnd_ssi_status_get(mod); /* PIO only */ - if (!is_dma && (status & DIRQ)) { - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - u32 *buf = (u32 *)(runtime->dma_area + - rsnd_ssi_pointer_offset(mod, io, 0)); - int shift = 0; - - if (snd_pcm_format_width(runtime->format) == 24) - shift = 8; - - /* - * 8/16/32 data can be assesse to TDR/RDR register - * directly as 32bit data - * see rsnd_ssi_init() - */ - if (rsnd_io_is_play(io)) - rsnd_mod_write(mod, SSITDR, (*buf) << shift); - else - *buf = (rsnd_mod_read(mod, SSIRDR) >> shift); - - elapsed = rsnd_ssi_pointer_update(mod, io, sizeof(*buf)); - } + if (!is_dma && (status & DIRQ)) + elapsed = rsnd_ssi_pio_interrupt(mod, io); /* DMA only */ if (is_dma && (status & (UIRQ | OIRQ))) @@ -829,7 +756,71 @@ static int rsnd_ssi_common_remove(struct rsnd_mod *mod, return 0; } -static int rsnd_ssi_pointer(struct rsnd_mod *mod, +/* + * SSI PIO functions + */ +static bool rsnd_ssi_pio_interrupt(struct rsnd_mod *mod, + struct rsnd_dai_stream *io) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + u32 *buf = (u32 *)(runtime->dma_area + ssi->byte_pos); + int shift = 0; + int byte_pos; + bool elapsed = false; + + if (snd_pcm_format_width(runtime->format) == 24) + shift = 8; + + /* + * 8/16/32 data can be assesse to TDR/RDR register + * directly as 32bit data + * see rsnd_ssi_init() + */ + if (rsnd_io_is_play(io)) + rsnd_mod_write(mod, SSITDR, (*buf) << shift); + else + *buf = (rsnd_mod_read(mod, SSIRDR) >> shift); + + byte_pos = ssi->byte_pos + sizeof(*buf); + + if (byte_pos >= ssi->next_period_byte) { + int period_pos = byte_pos / ssi->byte_per_period; + + if (period_pos >= runtime->periods) { + byte_pos = 0; + period_pos = 0; + } + + ssi->next_period_byte = (period_pos + 1) * ssi->byte_per_period; + + elapsed = true; + } + + WRITE_ONCE(ssi->byte_pos, byte_pos); + + return elapsed; +} + +static int rsnd_ssi_pio_init(struct rsnd_mod *mod, + struct rsnd_dai_stream *io, + struct rsnd_priv *priv) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); + + if (!rsnd_ssi_is_parent(mod, io)) { + ssi->byte_pos = 0; + ssi->byte_per_period = runtime->period_size * + runtime->channels * + samples_to_bytes(runtime, 1); + ssi->next_period_byte = ssi->byte_per_period; + } + + return rsnd_ssi_init(mod, io, priv); +} + +static int rsnd_ssi_pio_pointer(struct rsnd_mod *mod, struct rsnd_dai_stream *io, snd_pcm_uframes_t *pointer) { @@ -845,12 +836,12 @@ static struct rsnd_mod_ops rsnd_ssi_pio_ops = { .name = SSI_NAME, .probe = rsnd_ssi_common_probe, .remove = rsnd_ssi_common_remove, - .init = rsnd_ssi_init, + .init = rsnd_ssi_pio_init, .quit = rsnd_ssi_quit, .start = rsnd_ssi_start, .stop = rsnd_ssi_stop, .irq = rsnd_ssi_irq, - .pointer= rsnd_ssi_pointer, + .pointer = rsnd_ssi_pio_pointer, .pcm_new = rsnd_ssi_pcm_new, .hw_params = rsnd_ssi_hw_params, }; -- cgit v1.2.3 From 9f761183947b91aacc4ed5c2a1a39ac08b938b6c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 11 Dec 2017 02:43:41 +0000 Subject: ASoC: rsnd: remove unneeded "is_graph" from __rsnd_dai_probe() Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index a96ebebd96de..64d5ecb86528 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1017,7 +1017,7 @@ of_node_compatible: static void __rsnd_dai_probe(struct rsnd_priv *priv, struct device_node *dai_np, - int dai_i, int is_graph) + int dai_i) { struct device_node *playback, *capture; struct rsnd_dai_stream *io_playback; @@ -1116,13 +1116,13 @@ static int rsnd_dai_probe(struct rsnd_priv *priv) dai_i = 0; if (is_graph) { for_each_endpoint_of_node(dai_node, dai_np) { - __rsnd_dai_probe(priv, dai_np, dai_i, is_graph); + __rsnd_dai_probe(priv, dai_np, dai_i); rsnd_ssi_parse_hdmi_connection(priv, dai_np, dai_i); dai_i++; } } else { for_each_child_of_node(dai_node, dai_np) - __rsnd_dai_probe(priv, dai_np, dai_i++, is_graph); + __rsnd_dai_probe(priv, dai_np, dai_i++); } return 0; -- cgit v1.2.3 From 126b62700386da782f83579e9b0431ea76c2da3d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Nov 2017 15:41:19 +0200 Subject: mmc: core: Remove code no longer needed after the switch to blk-mq Remove code no longer needed after the switch to blk-mq. Signed-off-by: Adrian Hunter Acked-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/bus.c | 2 - drivers/mmc/core/core.c | 183 +---------------------------------------------- drivers/mmc/core/core.h | 8 --- drivers/mmc/core/host.h | 5 -- include/linux/mmc/host.h | 3 - 5 files changed, 1 insertion(+), 200 deletions(-) diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index 7586ff2ad1f1..fc92c6c1c9a4 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -351,8 +351,6 @@ int mmc_add_card(struct mmc_card *card) #ifdef CONFIG_DEBUG_FS mmc_add_card_debugfs(card); #endif - mmc_init_context_info(card->host); - card->dev.of_node = mmc_of_find_child_device(card->host, 0); device_enable_async_suspend(&card->dev); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 2a137976107f..fd64e6d425e5 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -363,20 +363,6 @@ int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq) } EXPORT_SYMBOL(mmc_start_request); -/* - * mmc_wait_data_done() - done callback for data request - * @mrq: done data request - * - * Wakes up mmc context, passed as a callback to host controller driver - */ -static void mmc_wait_data_done(struct mmc_request *mrq) -{ - struct mmc_context_info *context_info = &mrq->host->context_info; - - context_info->is_done_rcv = true; - wake_up_interruptible(&context_info->wait); -} - static void mmc_wait_done(struct mmc_request *mrq) { complete(&mrq->completion); @@ -394,35 +380,6 @@ static inline void mmc_wait_ongoing_tfr_cmd(struct mmc_host *host) wait_for_completion(&ongoing_mrq->cmd_completion); } -/* - *__mmc_start_data_req() - starts data request - * @host: MMC host to start the request - * @mrq: data request to start - * - * Sets the done callback to be called when request is completed by the card. - * Starts data mmc request execution - * If an ongoing transfer is already in progress, wait for the command line - * to become available before sending another command. - */ -static int __mmc_start_data_req(struct mmc_host *host, struct mmc_request *mrq) -{ - int err; - - mmc_wait_ongoing_tfr_cmd(host); - - mrq->done = mmc_wait_data_done; - mrq->host = host; - - err = mmc_start_request(host, mrq); - if (err) { - mrq->cmd->error = err; - mmc_complete_cmd(mrq); - mmc_wait_data_done(mrq); - } - - return err; -} - static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq) { int err; @@ -648,132 +605,10 @@ EXPORT_SYMBOL(mmc_cqe_recovery); */ bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq) { - if (host->areq) - return host->context_info.is_done_rcv; - else - return completion_done(&mrq->completion); + return completion_done(&mrq->completion); } EXPORT_SYMBOL(mmc_is_req_done); -/** - * mmc_finalize_areq() - finalize an asynchronous request - * @host: MMC host to finalize any ongoing request on - * - * Returns the status of the ongoing asynchronous request, but - * MMC_BLK_SUCCESS if no request was going on. - */ -static enum mmc_blk_status mmc_finalize_areq(struct mmc_host *host) -{ - struct mmc_context_info *context_info = &host->context_info; - enum mmc_blk_status status; - - if (!host->areq) - return MMC_BLK_SUCCESS; - - while (1) { - wait_event_interruptible(context_info->wait, - (context_info->is_done_rcv || - context_info->is_new_req)); - - if (context_info->is_done_rcv) { - struct mmc_command *cmd; - - context_info->is_done_rcv = false; - cmd = host->areq->mrq->cmd; - - if (!cmd->error || !cmd->retries || - mmc_card_removed(host->card)) { - status = host->areq->err_check(host->card, - host->areq); - break; /* return status */ - } else { - mmc_retune_recheck(host); - pr_info("%s: req failed (CMD%u): %d, retrying...\n", - mmc_hostname(host), - cmd->opcode, cmd->error); - cmd->retries--; - cmd->error = 0; - __mmc_start_request(host, host->areq->mrq); - continue; /* wait for done/new event again */ - } - } - - return MMC_BLK_NEW_REQUEST; - } - - mmc_retune_release(host); - - /* - * Check BKOPS urgency for each R1 response - */ - if (host->card && mmc_card_mmc(host->card) && - ((mmc_resp_type(host->areq->mrq->cmd) == MMC_RSP_R1) || - (mmc_resp_type(host->areq->mrq->cmd) == MMC_RSP_R1B)) && - (host->areq->mrq->cmd->resp[0] & R1_EXCEPTION_EVENT)) { - mmc_start_bkops(host->card, true); - } - - return status; -} - -/** - * mmc_start_areq - start an asynchronous request - * @host: MMC host to start command - * @areq: asynchronous request to start - * @ret_stat: out parameter for status - * - * Start a new MMC custom command request for a host. - * If there is on ongoing async request wait for completion - * of that request and start the new one and return. - * Does not wait for the new request to complete. - * - * Returns the completed request, NULL in case of none completed. - * Wait for the an ongoing request (previoulsy started) to complete and - * return the completed request. If there is no ongoing request, NULL - * is returned without waiting. NULL is not an error condition. - */ -struct mmc_async_req *mmc_start_areq(struct mmc_host *host, - struct mmc_async_req *areq, - enum mmc_blk_status *ret_stat) -{ - enum mmc_blk_status status; - int start_err = 0; - struct mmc_async_req *previous = host->areq; - - /* Prepare a new request */ - if (areq) - mmc_pre_req(host, areq->mrq); - - /* Finalize previous request */ - status = mmc_finalize_areq(host); - if (ret_stat) - *ret_stat = status; - - /* The previous request is still going on... */ - if (status == MMC_BLK_NEW_REQUEST) - return NULL; - - /* Fine so far, start the new request! */ - if (status == MMC_BLK_SUCCESS && areq) - start_err = __mmc_start_data_req(host, areq->mrq); - - /* Postprocess the old request at this point */ - if (host->areq) - mmc_post_req(host, host->areq->mrq, 0); - - /* Cancel a prepared request if it was not started. */ - if ((status != MMC_BLK_SUCCESS || start_err) && areq) - mmc_post_req(host, areq->mrq, -EINVAL); - - if (status != MMC_BLK_SUCCESS) - host->areq = NULL; - else - host->areq = areq; - - return previous; -} -EXPORT_SYMBOL(mmc_start_areq); - /** * mmc_wait_for_req - start a request and wait for completion * @host: MMC host to start command @@ -2961,22 +2796,6 @@ void mmc_unregister_pm_notifier(struct mmc_host *host) } #endif -/** - * mmc_init_context_info() - init synchronization context - * @host: mmc host - * - * Init struct context_info needed to implement asynchronous - * request mechanism, used by mmc core, host driver and mmc requests - * supplier. - */ -void mmc_init_context_info(struct mmc_host *host) -{ - host->context_info.is_new_req = false; - host->context_info.is_done_rcv = false; - host->context_info.is_waiting_last_req = false; - init_waitqueue_head(&host->context_info.wait); -} - static int __init mmc_init(void) { int ret; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 3e3d21304e5f..d6303d69071b 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -89,8 +89,6 @@ void mmc_remove_host_debugfs(struct mmc_host *host); void mmc_add_card_debugfs(struct mmc_card *card); void mmc_remove_card_debugfs(struct mmc_card *card); -void mmc_init_context_info(struct mmc_host *host); - int mmc_execute_tuning(struct mmc_card *card); int mmc_hs200_to_hs400(struct mmc_card *card); int mmc_hs400_to_hs200(struct mmc_card *card); @@ -108,12 +106,6 @@ bool mmc_is_req_done(struct mmc_host *host, struct mmc_request *mrq); int mmc_start_request(struct mmc_host *host, struct mmc_request *mrq); -struct mmc_async_req; - -struct mmc_async_req *mmc_start_areq(struct mmc_host *host, - struct mmc_async_req *areq, - enum mmc_blk_status *ret_stat); - int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, unsigned int arg); int mmc_can_erase(struct mmc_card *card); diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h index 6d896869e5c6..06ec19b5bf9f 100644 --- a/drivers/mmc/core/host.h +++ b/drivers/mmc/core/host.h @@ -79,10 +79,5 @@ static inline bool mmc_card_hs400es(struct mmc_card *card) return card->host->ios.enhanced_strobe; } -static inline bool mmc_host_use_blk_mq(struct mmc_host *host) -{ - return true; -} - #endif diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f3e13c50f6b0..85146235231e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -424,9 +424,6 @@ struct mmc_host { struct dentry *debugfs_root; - struct mmc_async_req *areq; /* active async req */ - struct mmc_context_info context_info; /* async synchronization info */ - /* Ongoing data transfer that allows commands during transfer */ struct mmc_request *ongoing_mrq; -- cgit v1.2.3 From 0562315b86372d2cdd9cc8924b92cfab37049fbc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Dec 2017 09:31:06 +0200 Subject: mmc: cqhci: Ensure macro parameters are wrapped in parentheses Absence of parentheses is not affecting current code, but ensure macro parameters are wrapped in parentheses. Reported-by: Dan Carpenter Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/cqhci.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/mmc/host/cqhci.h b/drivers/mmc/host/cqhci.h index 2d39d361b322..9e68286a07b4 100644 --- a/drivers/mmc/host/cqhci.h +++ b/drivers/mmc/host/cqhci.h @@ -61,9 +61,9 @@ #define CQHCI_IC_ENABLE BIT(31) #define CQHCI_IC_RESET BIT(16) #define CQHCI_IC_ICCTHWEN BIT(15) -#define CQHCI_IC_ICCTH(x) ((x & 0x1F) << 8) +#define CQHCI_IC_ICCTH(x) (((x) & 0x1F) << 8) #define CQHCI_IC_ICTOVALWEN BIT(7) -#define CQHCI_IC_ICTOVAL(x) (x & 0x7F) +#define CQHCI_IC_ICTOVAL(x) ((x) & 0x7F) /* task list base address */ #define CQHCI_TDLBA 0x20 @@ -119,31 +119,31 @@ #define CQHCI_IC_DEFAULT_ICTOVAL 1 /* attribute fields */ -#define CQHCI_VALID(x) ((x & 1) << 0) -#define CQHCI_END(x) ((x & 1) << 1) -#define CQHCI_INT(x) ((x & 1) << 2) -#define CQHCI_ACT(x) ((x & 0x7) << 3) +#define CQHCI_VALID(x) (((x) & 1) << 0) +#define CQHCI_END(x) (((x) & 1) << 1) +#define CQHCI_INT(x) (((x) & 1) << 2) +#define CQHCI_ACT(x) (((x) & 0x7) << 3) /* data command task descriptor fields */ -#define CQHCI_FORCED_PROG(x) ((x & 1) << 6) -#define CQHCI_CONTEXT(x) ((x & 0xF) << 7) -#define CQHCI_DATA_TAG(x) ((x & 1) << 11) -#define CQHCI_DATA_DIR(x) ((x & 1) << 12) -#define CQHCI_PRIORITY(x) ((x & 1) << 13) -#define CQHCI_QBAR(x) ((x & 1) << 14) -#define CQHCI_REL_WRITE(x) ((x & 1) << 15) -#define CQHCI_BLK_COUNT(x) ((x & 0xFFFF) << 16) -#define CQHCI_BLK_ADDR(x) ((x & 0xFFFFFFFF) << 32) +#define CQHCI_FORCED_PROG(x) (((x) & 1) << 6) +#define CQHCI_CONTEXT(x) (((x) & 0xF) << 7) +#define CQHCI_DATA_TAG(x) (((x) & 1) << 11) +#define CQHCI_DATA_DIR(x) (((x) & 1) << 12) +#define CQHCI_PRIORITY(x) (((x) & 1) << 13) +#define CQHCI_QBAR(x) (((x) & 1) << 14) +#define CQHCI_REL_WRITE(x) (((x) & 1) << 15) +#define CQHCI_BLK_COUNT(x) (((x) & 0xFFFF) << 16) +#define CQHCI_BLK_ADDR(x) (((x) & 0xFFFFFFFF) << 32) /* direct command task descriptor fields */ -#define CQHCI_CMD_INDEX(x) ((x & 0x3F) << 16) -#define CQHCI_CMD_TIMING(x) ((x & 1) << 22) -#define CQHCI_RESP_TYPE(x) ((x & 0x3) << 23) +#define CQHCI_CMD_INDEX(x) (((x) & 0x3F) << 16) +#define CQHCI_CMD_TIMING(x) (((x) & 1) << 22) +#define CQHCI_RESP_TYPE(x) (((x) & 0x3) << 23) /* transfer descriptor fields */ -#define CQHCI_DAT_LENGTH(x) ((x & 0xFFFF) << 16) -#define CQHCI_DAT_ADDR_LO(x) ((x & 0xFFFFFFFF) << 32) -#define CQHCI_DAT_ADDR_HI(x) ((x & 0xFFFFFFFF) << 0) +#define CQHCI_DAT_LENGTH(x) (((x) & 0xFFFF) << 16) +#define CQHCI_DAT_ADDR_LO(x) (((x) & 0xFFFFFFFF) << 32) +#define CQHCI_DAT_ADDR_HI(x) (((x) & 0xFFFFFFFF) << 0) struct cqhci_host_ops; struct mmc_host; -- cgit v1.2.3 From 2361bfb055f948eac6583fa3c75a014da84fe554 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Dec 2017 14:55:16 +0300 Subject: mmc: block: blk-mq: Potential NULL deref on mmc_blk_alloc_req() failure mmc_blk_alloc_req() is supposed to return error pointers but there is one path where we forget to set the error code and accidentally return NULL. The callers are not expecting that and will have a NULL pointer dereference. Fixes: 41e3efd07d5a ("mmc: block: Simplify cleaning up the queue") Signed-off-by: Dan Carpenter Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 579fc0bd722f..654fc1ebd675 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2328,6 +2328,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, */ if (!blk_get_queue(md->queue.queue)) { mmc_cleanup_queue(&md->queue); + ret = -ENODEV; goto err_putdisk; } -- cgit v1.2.3 From 0cc1a0f4519e26d6498bd85c6e648b21a6cdd3ea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Dec 2017 15:04:58 +0200 Subject: mmc: sdhci-acpi: Add setup_host() callback Add a ->setup_host() callback so that device-specific changes can be made to the mmc host controller before it is added. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 1b1ce804d2d7..f7445cf8f7dd 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -76,6 +76,7 @@ struct sdhci_acpi_slot { size_t priv_size; int (*probe_slot)(struct platform_device *, const char *, const char *); int (*remove_slot)(struct platform_device *); + int (*setup_host)(struct platform_device *pdev); }; struct sdhci_acpi_host { @@ -688,10 +689,20 @@ static int sdhci_acpi_probe(struct platform_device *pdev) } } - err = sdhci_add_host(host); + err = sdhci_setup_host(host); if (err) goto err_free; + if (c->slot && c->slot->setup_host) { + err = c->slot->setup_host(pdev); + if (err) + goto err_cleanup; + } + + err = __sdhci_add_host(host); + if (err) + goto err_cleanup; + if (c->use_runtime_pm) { pm_runtime_set_active(dev); pm_suspend_ignore_children(dev, 1); @@ -704,6 +715,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev) return 0; +err_cleanup: + sdhci_cleanup_host(c->host); err_free: sdhci_free_host(c->host); return err; -- cgit v1.2.3 From 0acccf4141a1ac37edab8ed905e97bf7c4be3bce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 8 Dec 2017 15:08:18 +0200 Subject: mmc: sdhci-acpi: Avoid broken UHS transfer modes on Intel CHT Intel DSM function 8 has been used to identify transfer modes that are not working on some CHT boards. Add support for that. Signed-off-by: Adrian Hunter Tested-by: Carlo Caione Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index f7445cf8f7dd..264f10327bf9 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -97,14 +97,21 @@ static inline bool sdhci_acpi_flag(struct sdhci_acpi_host *c, unsigned int flag) return c->slot && (c->slot->flags & flag); } +#define INTEL_DSM_HS_CAPS_SDR25 BIT(0) +#define INTEL_DSM_HS_CAPS_DDR50 BIT(1) +#define INTEL_DSM_HS_CAPS_SDR50 BIT(2) +#define INTEL_DSM_HS_CAPS_SDR104 BIT(3) + enum { INTEL_DSM_FNS = 0, INTEL_DSM_V18_SWITCH = 3, INTEL_DSM_V33_SWITCH = 4, + INTEL_DSM_HS_CAPS = 8, }; struct intel_host { u32 dsm_fns; + u32 hs_caps; }; static const guid_t intel_dsm_guid = @@ -153,6 +160,8 @@ static void intel_dsm_init(struct intel_host *intel_host, struct device *dev, { int err; + intel_host->hs_caps = ~0; + err = __intel_dsm(intel_host, dev, INTEL_DSM_FNS, &intel_host->dsm_fns); if (err) { pr_debug("%s: DSM not supported, error %d\n", @@ -162,6 +171,8 @@ static void intel_dsm_init(struct intel_host *intel_host, struct device *dev, pr_debug("%s: DSM function mask %#x\n", mmc_hostname(mmc), intel_host->dsm_fns); + + intel_dsm(intel_host, dev, INTEL_DSM_HS_CAPS, &intel_host->hs_caps); } static int intel_start_signal_voltage_switch(struct mmc_host *mmc, @@ -399,6 +410,26 @@ static int intel_probe_slot(struct platform_device *pdev, const char *hid, return 0; } +static int intel_setup_host(struct platform_device *pdev) +{ + struct sdhci_acpi_host *c = platform_get_drvdata(pdev); + struct intel_host *intel_host = sdhci_acpi_priv(c); + + if (!(intel_host->hs_caps & INTEL_DSM_HS_CAPS_SDR25)) + c->host->mmc->caps &= ~MMC_CAP_UHS_SDR25; + + if (!(intel_host->hs_caps & INTEL_DSM_HS_CAPS_SDR50)) + c->host->mmc->caps &= ~MMC_CAP_UHS_SDR50; + + if (!(intel_host->hs_caps & INTEL_DSM_HS_CAPS_DDR50)) + c->host->mmc->caps &= ~MMC_CAP_UHS_DDR50; + + if (!(intel_host->hs_caps & INTEL_DSM_HS_CAPS_SDR104)) + c->host->mmc->caps &= ~MMC_CAP_UHS_SDR104; + + return 0; +} + static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { .chip = &sdhci_acpi_chip_int, .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | @@ -410,6 +441,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { SDHCI_QUIRK2_STOP_WITH_TC | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400, .probe_slot = intel_probe_slot, + .setup_host = intel_setup_host, .priv_size = sizeof(struct intel_host), }; @@ -422,6 +454,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = { .flags = SDHCI_ACPI_RUNTIME_PM, .pm_caps = MMC_PM_KEEP_POWER, .probe_slot = intel_probe_slot, + .setup_host = intel_setup_host, .priv_size = sizeof(struct intel_host), }; @@ -433,6 +466,7 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = { SDHCI_QUIRK2_STOP_WITH_TC, .caps = MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_AGGRESSIVE_PM, .probe_slot = intel_probe_slot, + .setup_host = intel_setup_host, .priv_size = sizeof(struct intel_host), }; -- cgit v1.2.3 From 91516a2a4734614d62ee3ed921f8f88acc67c000 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Sat, 9 Dec 2017 23:47:55 +0100 Subject: mmc: core: apply NO_CMD23 quirk to some specific cards To get an usdhc Apacer and some ATP SD cards work reliable, CMD23 needs to be disabled. This has been tested on i.MX6 (sdhci-esdhc) and rk3288 (dw_mmc-rockchip). Without this patch on i.MX6 (sdhci-esdhc): $ dd if=/dev/urandom of=/mnt/test bs=1M count=10 conv=fsync | | mmc0: starting CMD25 arg 00a71f00 flags 000000b5 | mmc0: blksz 512 blocks 1024 flags 00000100 tsac 3000 ms nsac 0 | mmc0: CMD12 arg 00000000 flags 0000049d | sdhci [sdhci_irq()]: *** mmc0 got interrupt: 0x00000001 | mmc0: Timeout waiting for hardware interrupt. Without this patch on rk3288 (dw_mmc-rockchip): | mmc1: Card stuck in programming state! mmcblk1 card_busy_detect | dwmmc_rockchip ff0c0000.dwmmc: Busy; trying anyway | mmc_host mmc1: Bus speed (slot 0) = 400000Hz (slot req 400000Hz, | actual 400000HZ div = 0) | mmc1: card never left busy state | mmc1: tried to reset card, got error -110 | blk_update_request: I/O error, dev mmcblk1, sector 139778 | Buffer I/O error on dev mmcblk1p1, logical block 131586, lost async | page write Signed-off-by: Christoph Fritz Cc: # v4.14+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/card.h | 2 ++ drivers/mmc/core/quirks.h | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h index f06cd91964ce..79a5b985ccf5 100644 --- a/drivers/mmc/core/card.h +++ b/drivers/mmc/core/card.h @@ -75,9 +75,11 @@ struct mmc_fixup { #define EXT_CSD_REV_ANY (-1u) #define CID_MANFID_SANDISK 0x2 +#define CID_MANFID_ATP 0x9 #define CID_MANFID_TOSHIBA 0x11 #define CID_MANFID_MICRON 0x13 #define CID_MANFID_SAMSUNG 0x15 +#define CID_MANFID_APACER 0x27 #define CID_MANFID_KINGSTON 0x70 #define CID_MANFID_HYNIX 0x90 diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index f664e9cbc9f8..75d317623852 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -52,6 +52,14 @@ static const struct mmc_fixup mmc_blk_fixups[] = { MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_BLK_NO_CMD23), + /* + * Some SD cards lockup while using CMD23 multiblock transfers. + */ + MMC_FIXUP("AF SD", CID_MANFID_ATP, CID_OEMID_ANY, add_quirk_sd, + MMC_QUIRK_BLK_NO_CMD23), + MMC_FIXUP("APUSD", CID_MANFID_APACER, 0x5048, add_quirk_sd, + MMC_QUIRK_BLK_NO_CMD23), + /* * Some MMC cards need longer data read timeout than indicated in CSD. */ -- cgit v1.2.3 From f5b5702ac55b11113a94d6228d191c7f827b7a3b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Dec 2017 10:14:27 +0100 Subject: netfilter: exthdr: add missign attributes to policy Add missing netlink attribute policy. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a0a93d987a3b..47ec1046ad11 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -214,6 +214,8 @@ static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = { .type = NLA_U32 }, [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, + [NFTA_EXTHDR_OP] = { .type = NLA_U32 }, + [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, }; static int nft_exthdr_init(const struct nft_ctx *ctx, -- cgit v1.2.3 From 3487972d7fa6c5143951436ada5933dcf0ec659d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Dec 2017 02:41:18 +0100 Subject: PM / sleep: Avoid excess pm_runtime_enable() calls in device_resume() Middle-layer code doing suspend-time optimizations for devices with the DPM_FLAG_SMART_SUSPEND flag set (currently, the PCI bus type and the ACPI PM domain) needs to make the core skip ->thaw_early and ->thaw callbacks for those devices in some cases and it sets the power.direct_complete flag for them for this purpose. However, it turns out that setting power.direct_complete outside of the PM core is a bad idea as it triggers an excess invocation of pm_runtime_enable() in device_resume(). For this reason, provide a helper to clear power.is_late_suspended and power.is_suspended to be invoked by the middle-layer code in question instead of setting power.direct_complete and make that code call the new helper. Fixes: c4b65157aeef (PCI / PM: Take SMART_SUSPEND driver flag into account) Fixes: 05087360fd7a (ACPI / PM: Take SMART_SUSPEND driver flag into account) Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Bjorn Helgaas --- drivers/acpi/device_pm.c | 2 +- drivers/base/power/main.c | 15 +++++++++++++++ drivers/pci/pci-driver.c | 2 +- include/linux/pm.h | 1 + 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e4ffaeec9ec2..a4c8ad98560d 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1138,7 +1138,7 @@ int acpi_subsys_thaw_noirq(struct device *dev) * skip all of the subsequent "thaw" callbacks for the device. */ if (dev_pm_smart_suspend_and_suspended(dev)) { - dev->power.direct_complete = true; + dev_pm_skip_next_resume_phases(dev); return 0; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index db2f04415927..08744b572af6 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -525,6 +525,21 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) /*------------------------- Resume routines -------------------------*/ +/** + * dev_pm_skip_next_resume_phases - Skip next system resume phases for device. + * @dev: Target device. + * + * Make the core skip the "early resume" and "resume" phases for @dev. + * + * This function can be called by middle-layer code during the "noirq" phase of + * system resume if necessary, but not by device drivers. + */ +void dev_pm_skip_next_resume_phases(struct device *dev) +{ + dev->power.is_late_suspended = false; + dev->power.is_suspended = false; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 7f47bb72bf30..945099d49f8f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -999,7 +999,7 @@ static int pci_pm_thaw_noirq(struct device *dev) * the subsequent "thaw" callbacks for the device. */ if (dev_pm_smart_suspend_and_suspended(dev)) { - dev->power.direct_complete = true; + dev_pm_skip_next_resume_phases(dev); return 0; } diff --git a/include/linux/pm.h b/include/linux/pm.h index 65d39115f06d..492ed473ba7e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -765,6 +765,7 @@ extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); +extern void dev_pm_skip_next_resume_phases(struct device *dev); extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); #else /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From 1ac8aa8d0568606485451ea860a6c6c3fad0d42d Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 30 Nov 2017 11:06:15 -0600 Subject: ipmi_si: Fix oops with PCI devices When the IPMI PCI code was split out, some code was consolidated for setting the io_setup field in the io structure. The PCI code needed this set before registration to probe register spacing, though, so restore the old code for that function. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=197999 Signed-off-by: Corey Minyard Tested-by: Meelis Roos --- drivers/char/ipmi/ipmi_si_pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c index 99771f5cad07..27dd11c49d21 100644 --- a/drivers/char/ipmi/ipmi_si_pci.c +++ b/drivers/char/ipmi/ipmi_si_pci.c @@ -103,10 +103,13 @@ static int ipmi_pci_probe(struct pci_dev *pdev, io.addr_source_cleanup = ipmi_pci_cleanup; io.addr_source_data = pdev; - if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) + if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) { io.addr_type = IPMI_IO_ADDR_SPACE; - else + io.io_setup = ipmi_si_port_setup; + } else { io.addr_type = IPMI_MEM_ADDR_SPACE; + io.io_setup = ipmi_si_mem_setup; + } io.addr_data = pci_resource_start(pdev, 0); io.regspacing = ipmi_pci_probe_regspacing(&io); -- cgit v1.2.3 From 51614b26a029515dd3bc43a8c0e16a9ee51bbf52 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 6 Dec 2017 04:25:44 -0500 Subject: ipmi_si: fix crash on parisc This patch fixes ipmi crash on parisc introduced in the kernel 4.15-rc. The pointer io.io_setup is not initialized and thus it causes crash in try_smi_init when attempting to call new_smi->io.io_setup. Signed-off-by: Mikulas Patocka Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_si_parisc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c index 090b073ab441..6b10f0e18a95 100644 --- a/drivers/char/ipmi/ipmi_si_parisc.c +++ b/drivers/char/ipmi/ipmi_si_parisc.c @@ -10,6 +10,8 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev) { struct si_sm_io io; + memset(&io, 0, sizeof(io)); + io.si_type = SI_KCS; io.addr_source = SI_DEVICETREE; io.addr_type = IPMI_MEM_ADDR_SPACE; -- cgit v1.2.3 From 03dd604e1d515ca1ab02aaae12162e0a077858e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 Dec 2017 12:54:44 +0100 Subject: x86/apic: Remove local var in flat_send_IPI_allbutself() No code changed: # arch/x86/kernel/apic/apic_flat_64.o: text data bss dec hex filename 1838 624 0 2462 99e apic_flat_64.o.before 1838 624 0 2462 99e apic_flat_64.o.after md5: aa2ae687d94bc4534f86ae6865dabd6a apic_flat_64.o.before.asm 42148da76ba8f9a236c33f8803bd2a6b apic_flat_64.o.after.asm md5 sum is different due to asm output offsets changing. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171211115444.26577-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_flat_64.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b64..f58a49769bc6 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -84,12 +84,8 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) static void flat_send_IPI_allbutself(int vector) { int cpu = smp_processor_id(); -#ifdef CONFIG_HOTPLUG_CPU - int hotplug = 1; -#else - int hotplug = 0; -#endif - if (hotplug || vector == NMI_VECTOR) { + + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) { if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { unsigned long mask = cpumask_bits(cpu_online_mask)[0]; -- cgit v1.2.3 From 7f6f60a1ba52538c16f26930bfbcfe193d9d746a Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sat, 9 Dec 2017 12:16:10 +0800 Subject: mm/early_ioremap: Fix boot hang with earlyprintk=efi,keep earlyprintk=efi,keep does not work any more with a warning in mm/early_ioremap.c: WARN_ON(system_state != SYSTEM_BOOTING): Boot just hangs because of the earlyprintk within the earlyprintk implementation code itself. This is caused by a new introduced middle state in: 69a78ff226fe ("init: Introduce SYSTEM_SCHEDULING state") early_ioremap() is fine in both SYSTEM_BOOTING and SYSTEM_SCHEDULING states, original condition should be updated accordingly. Signed-off-by: Dave Young Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: bp@suse.de Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20171209041610.GA3249@dhcp-128-65.nay.redhat.com Signed-off-by: Ingo Molnar --- mm/early_ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index d04ac1ec0559..1826f191e72c 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -111,7 +111,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) enum fixed_addresses idx; int i, slot; - WARN_ON(system_state != SYSTEM_BOOTING); + WARN_ON(system_state >= SYSTEM_RUNNING); slot = -1; for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { -- cgit v1.2.3 From 6d60ce384d1d5ca32b595244db4077a419acc687 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 27 Nov 2017 08:51:39 +0100 Subject: x86/mm/kmmio: Fix mmiotrace for page unaligned addresses If something calls ioremap() with an address not aligned to PAGE_SIZE, the returned address might be not aligned as well. This led to a probe registered on exactly the returned address, but the entire page was armed for mmiotracing. On calling iounmap() the address passed to unregister_kmmio_probe() was PAGE_SIZE aligned by the caller leading to a complete freeze of the machine. We should always page align addresses while (un)registerung mappings, because the mmiotracer works on top of pages, not mappings. We still keep track of the probes based on their real addresses and lengths though, because the mmiotrace still needs to know what are mapped memory regions. Also move the call to mmiotrace_iounmap() prior page aligning the address, so that all probes are unregistered properly, otherwise the kernel ends up failing memory allocations randomly after disabling the mmiotracer. Tested-by: Lyude Signed-off-by: Karol Herbst Acked-by: Pekka Paalanen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: nouveau@lists.freedesktop.org Link: http://lkml.kernel.org/r/20171127075139.4928-1-kherbst@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 ++-- arch/x86/mm/kmmio.c | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6e4573b1da34..c45b6ec5357b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr) return; } + mmiotrace_iounmap(addr); + addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); - mmiotrace_iounmap(addr); - /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c21c2ed04612..58477ec3d66d 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p) unsigned long flags; int ret = 0; unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); unsigned int l; pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); - if (get_kmmio_probe(p->addr)) { + if (get_kmmio_probe(addr)) { ret = -EEXIST; goto out; } - pte = lookup_address(p->addr, &l); + pte = lookup_address(addr, &l); if (!pte) { ret = -EINVAL; goto out; @@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p) kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { - if (add_kmmio_fault_page(p->addr + size)) + if (add_kmmio_fault_page(addr + size)) pr_err("Unable to set page fault.\n"); size += page_level_size(l); } @@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; unsigned int l; pte_t *pte; - pte = lookup_address(p->addr, &l); + pte = lookup_address(addr, &l); if (!pte) return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { - release_kmmio_fault_page(p->addr + size, &release_list); + release_kmmio_fault_page(addr + size, &release_list); size += page_level_size(l); } list_del_rcu(&p->list); -- cgit v1.2.3 From 2064a5ab04707c55003e099e5abbf19a0826bbac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 3 Dec 2017 13:19:00 -0800 Subject: sched/core: Fix kernel-doc warnings after code movement Fix the following kernel-doc warnings after code restructuring: ../kernel/sched/core.c:5113: warning: No description found for parameter 't' ../kernel/sched/core.c:5113: warning: Excess function parameter 'interval' description in 'sched_rr_get_interval' get rid of set_fs()") Signed-off-by: Randy Dunlap Cc: Al Viro Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: abca5fc535a3e ("sched_rr_get_interval(): move compat to native, Link: http://lkml.kernel.org/r/995c6ded-b32e-bbe4-d9f5-4d42d121aff1@infradead.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 75554f366fd3..644fa2e3d993 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) return ret; } -/** - * sys_sched_rr_get_interval - return the default timeslice of a process. - * @pid: pid of the process. - * @interval: userspace pointer to the timeslice value. - * - * this syscall writes the default timeslice value of a given process - * into the user-space timespec buffer. A value of '0' means infinity. - * - * Return: On success, 0 and the timeslice is in @interval. Otherwise, - * an error code. - */ static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) { struct task_struct *p; @@ -5144,6 +5133,17 @@ out_unlock: return retval; } +/** + * sys_sched_rr_get_interval - return the default timeslice of a process. + * @pid: pid of the process. + * @interval: userspace pointer to the timeslice value. + * + * this syscall writes the default timeslice value of a given process + * into the user-space timespec buffer. A value of '0' means infinity. + * + * Return: On success, 0 and the timeslice is in @interval. Otherwise, + * an error code. + */ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { -- cgit v1.2.3 From 01dfee9582d9b4403c4902df096ed8b43d55181c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 8 Dec 2017 11:56:14 +0900 Subject: workqueue: remove unneeded kallsyms include The filw was converted from print_symbol() to %pf some time ago (044c782ce3a901fb "workqueue: fix checkpatch issues"). kallsyms does not seem to be needed anymore. Signed-off-by: Sergey Senozhatsky Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 45ce93f3dd1f..43d18cb46308 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 86ad5c97ce5ccdda1459d35370fd5e105721bb8d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 9 Dec 2017 14:49:14 +0300 Subject: RISC-V: Logical vs Bitwise typo In the current code, there is a ! logical NOT where a bitwise ~ NOT was intended. It means that we never return -EINVAL. Signed-off-by: Dan Carpenter Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index a2ae936a093e..79c78668258e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; /* Check the reserved flags. */ - if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL)) return -EINVAL; flush_icache_mm(mm, local); -- cgit v1.2.3 From 3cfa5008081db845c6c53d531ec34e9c84a9fd99 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 5 Dec 2017 17:48:11 -0800 Subject: RISC-V: Resurrect smp_mb__after_spinlock() I removed this last week because of an incorrect comment: smp_mb__after_spinlock() is actually still used, and is necessary on RISC-V. It's been resurrected, with a comment that describes what it actually does this time. Thanks to Andrea for finding the bug! Fixes: 3343eb6806f3 ("RISC-V: Remove smb_mb__{before,after}_spinlock()") CC: Andrea Parri Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/barrier.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 773c4e039cd7..c0319cbf1eec 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,6 +38,25 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) +/* + * This is a very specific barrier: it's currently only used in two places in + * the kernel, both in the scheduler. See include/linux/spinlock.h for the two + * orderings it guarantees, but the "critical section is RCsc" guarantee + * mandates a barrier on RISC-V. The sequence looks like: + * + * lr.aq lock + * sc lock <= LOCKED + * smp_mb__after_spinlock() + * // critical section + * lr lock + * sc.rl lock <= UNLOCKED + * + * The AQ/RL pair provides a RCpc critical section, but there's not really any + * way we can take advantage of that here because the ordering is only enforced + * on that one lock. Thus, we're just doing a full fence. + */ +#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw) + #include #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 27b0174525325bf18919597016483a709f3372f8 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 8 Dec 2017 11:23:23 -0800 Subject: RISC-V: Remove unused CONFIG_HVC_RISCV_SBI code This is code that probably should never have made it into the kernel in the first place: it depends on a driver that hadn't been reviewed yet. During the HVC_SBI_RISCV review process a better way of doing this was suggested, but that means this code is defunct. It's compile-time disabled in 4.15 because the driver isn't in, so I think it's safe to just remove this for now. CC: Greg KH Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 8fbb6749910d..cb7b0c63014e 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -38,10 +38,6 @@ #include #include -#ifdef CONFIG_HVC_RISCV_SBI -#include -#endif - #ifdef CONFIG_DUMMY_CONSOLE struct screen_info screen_info = { .orig_video_lines = 30, @@ -212,13 +208,6 @@ static void __init setup_bootmem(void) void __init setup_arch(char **cmdline_p) { -#if defined(CONFIG_HVC_RISCV_SBI) - if (likely(early_console == NULL)) { - early_console = &riscv_sbi_early_console_dev; - register_console(early_console); - } -#endif - #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -- cgit v1.2.3 From a8ceb5dbfde1092b466936bca0ff3be127ecf38e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 5 Dec 2017 21:29:37 +0200 Subject: ptr_ring: add barriers Users of ptr_ring expect that it's safe to give the data structure a pointer and have it be available to consumers, but that actually requires an smb_wmb or a stronger barrier. In absence of such barriers and on architectures that reorder writes, consumer might read an un=initialized value from an skb pointer stored in the skb array. This was observed causing crashes. To fix, add memory barriers. The barrier we use is a wmb, the assumption being that producers do not need to read the value so we do not need to order these reads. Reported-by: George Cherian Suggested-by: Jason Wang Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 37b4bb2545b3..6866df4f31b5 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -101,12 +101,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + r->queue[r->producer++] = ptr; if (unlikely(r->producer >= r->size)) r->producer = 0; @@ -275,6 +281,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) if (ptr) __ptr_ring_discard_one(r); + /* Make sure anyone accessing data through the pointer is up to date. */ + /* Pairs with smp_wmb in __ptr_ring_produce. */ + smp_read_barrier_depends(); return ptr; } -- cgit v1.2.3 From 23715275e4fb6f64358a499d20928a9e93819f2f Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 11 Dec 2017 18:19:33 +0300 Subject: netfilter: ip6t_MASQUERADE: add dependency on conntrack module After commit 4d3a57f23dec ("netfilter: conntrack: do not enable connection tracking unless needed") conntrack is disabled by default unless some module explicitly declares dependency in particular network namespace. Fixes: a357b3f80bc8 ("netfilter: nat: add dependencies on conntrack module") Signed-off-by: Konstantin Khlebnikov Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_MASQUERADE.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 2b1a15846f9a..92c0047e7e33 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -33,13 +33,19 @@ static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) if (range->flags & NF_NAT_RANGE_MAP_IPS) return -EINVAL; - return 0; + return nf_ct_netns_get(par->net, par->family); +} + +static void masquerade_tg6_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static struct xt_target masquerade_tg6_reg __read_mostly = { .name = "MASQUERADE", .family = NFPROTO_IPV6, .checkentry = masquerade_tg6_checkentry, + .destroy = masquerade_tg6_destroy, .target = masquerade_tg6, .targetsize = sizeof(struct nf_nat_range), .table = "nat", -- cgit v1.2.3 From f24e5834a2c3f6c5f814a417f858226f0a010ade Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 4 Dec 2017 14:13:05 +0000 Subject: arm64: Initialise high_memory global variable earlier The high_memory global variable is used by cma_declare_contiguous(.) before it is defined. We don't notice this as we compute __pa(high_memory - 1), and it looks like we're processing a VA from the direct linear map. This problem becomes apparent when we flip the kernel virtual address space and the linear map is moved to the bottom of the kernel VA space. This patch moves the initialisation of high_memory before it used. Cc: Fixes: f7426b983a6a ("mm: cma: adjust address limit to avoid hitting low/high memory boundary") Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5960bef0170d..00e7b900ca41 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -476,6 +476,8 @@ void __init arm64_memblock_init(void) reserve_elfcorehdr(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; + dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -502,7 +504,6 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); - high_memory = __va((max << PAGE_SHIFT) - 1) + 1; memblock_dump_all(); } -- cgit v1.2.3 From 8781bcbc5e69d7da69e84c7044ca0284848d5d01 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 1 Dec 2017 17:22:14 +0000 Subject: arm64: mm: Fix pte_mkclean, pte_mkdirty semantics On systems with hardware dirty bit management, the ltp madvise09 unit test fails due to dirty bit information being lost and pages being incorrectly freed. This was bisected to: arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect() Reverting this commit leads to a separate problem, that the unit test retains pages that should have been dropped due to the function madvise_free_pte_range(.) not cleaning pte's properly. Currently pte_mkclean only clears the software dirty bit, thus the following code sequence can appear: pte = pte_mkclean(pte); if (pte_dirty(pte)) // this condition can return true with HW DBM! This patch also adjusts pte_mkclean to set PTE_RDONLY thus effectively clearing both the SW and HW dirty information. In order for this to function on systems without HW DBM, we need to also adjust pte_mkdirty to remove the read only bit from writable pte's to avoid infinite fault loops. Cc: Fixes: 64c26841b349 ("arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect()") Reported-by: Bhupinder Thakur Tested-by: Bhupinder Thakur Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 149d05fb9421..3ff03a755c32 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -149,12 +149,20 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + + return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + + if (pte_write(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + + return pte; } static inline pte_t pte_mkold(pte_t pte) @@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * ptep_set_wrprotect - mark read-only while preserving the hardware update of - * the Access Flag. + * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. */ #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t old_pte, pte; - /* - * ptep_set_wrprotect() is only called on CoW mappings which are - * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE && - * PTE_RDONLY) or writable and software-dirty (PTE_WRITE && - * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and - * protection_map[]. There is no race with the hardware update of the - * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) - * is set. - */ - VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep), - "%s: potential race with hardware DBM", __func__); pte = READ_ONCE(*ptep); do { old_pte = pte; + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); -- cgit v1.2.3 From f1e2400a80ff55eb7c5f4fd9d7eb163fd0de9a2c Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 8 Dec 2017 12:08:11 +0100 Subject: net: phy: meson-gxl: detect LPA corruption The purpose of this change is to fix the incorrect detection of the link partner (LP) advertised capabilities which sometimes happens with this PHY (roughly 1 time in a dozen) This issue may cause the link to be negotiated at 10Mbps/Full or 10Mbps/Half when 100MBps/Full is actually possible. In some case, the link is even completely broken and no communication is possible. To detect the corruption, we must look for a magic undocumented bit in the WOL bank (hint given by the SoC vendor kernel) but this is not enough to cover all cases. We also have to look at the LPA ack. If the LP supports Aneg but did not ack our base code when aneg is completed, we assume something went wrong. The detection of a corrupted LPA triggers a restart of the aneg process. This solves the problem but may take up to 6 retries to complete. Fixes: 7334b3e47aee ("net: phy: Add Meson GXL Internal PHY driver") Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/meson-gxl.c | 74 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index 1ea69b7585d9..700007dd4be5 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -22,6 +22,7 @@ #include #include #include +#include static int meson_gxl_config_init(struct phy_device *phydev) { @@ -50,6 +51,77 @@ static int meson_gxl_config_init(struct phy_device *phydev) return 0; } +/* This function is provided to cope with the possible failures of this phy + * during aneg process. When aneg fails, the PHY reports that aneg is done + * but the value found in MII_LPA is wrong: + * - Early failures: MII_LPA is just 0x0001. if MII_EXPANSION reports that + * the link partner (LP) supports aneg but the LP never acked our base + * code word, it is likely that we never sent it to begin with. + * - Late failures: MII_LPA is filled with a value which seems to make sense + * but it actually is not what the LP is advertising. It seems that we + * can detect this using a magic bit in the WOL bank (reg 12 - bit 12). + * If this particular bit is not set when aneg is reported being done, + * it means MII_LPA is likely to be wrong. + * + * In both case, forcing a restart of the aneg process solve the problem. + * When this failure happens, the first retry is usually successful but, + * in some cases, it may take up to 6 retries to get a decent result + */ +int meson_gxl_read_status(struct phy_device *phydev) +{ + int ret, wol, lpa, exp; + + if (phydev->autoneg == AUTONEG_ENABLE) { + ret = genphy_aneg_done(phydev); + if (ret < 0) + return ret; + else if (!ret) + goto read_status_continue; + + /* Need to access WOL bank, make sure the access is open */ + ret = phy_write(phydev, 0x14, 0x0000); + if (ret) + return ret; + ret = phy_write(phydev, 0x14, 0x0400); + if (ret) + return ret; + ret = phy_write(phydev, 0x14, 0x0000); + if (ret) + return ret; + ret = phy_write(phydev, 0x14, 0x0400); + if (ret) + return ret; + + /* Request LPI_STATUS WOL register */ + ret = phy_write(phydev, 0x14, 0x8D80); + if (ret) + return ret; + + /* Read LPI_STATUS value */ + wol = phy_read(phydev, 0x15); + if (wol < 0) + return wol; + + lpa = phy_read(phydev, MII_LPA); + if (lpa < 0) + return lpa; + + exp = phy_read(phydev, MII_EXPANSION); + if (exp < 0) + return exp; + + if (!(wol & BIT(12)) || + ((exp & EXPANSION_NWAY) && !(lpa & LPA_LPACK))) { + /* Looks like aneg failed after all */ + phydev_dbg(phydev, "LPA corruption - aneg restart\n"); + return genphy_restart_aneg(phydev); + } + } + +read_status_continue: + return genphy_read_status(phydev); +} + static struct phy_driver meson_gxl_phy[] = { { .phy_id = 0x01814400, @@ -60,7 +132,7 @@ static struct phy_driver meson_gxl_phy[] = { .config_init = meson_gxl_config_init, .config_aneg = genphy_config_aneg, .aneg_done = genphy_aneg_done, - .read_status = genphy_read_status, + .read_status = meson_gxl_read_status, .suspend = genphy_suspend, .resume = genphy_resume, }, -- cgit v1.2.3 From 2aab6b40b03154a263463a5d992ddd7d122a016a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 8 Dec 2017 16:35:40 +0100 Subject: net: sh_eth: do not advertise Gigabit capabilities when not available Not all variants of the sh_eth hardware have Gigabit support. Unfortunately, the current driver doesn't tell the PHY about the limited MAC capabilities. Due to this, if you have a Gigabit capable PHY, the PHY will advertise its Gigabit capability and establish a link at 1Gbit/s, even though the MAC doesn't support it. In order to avoid this, we use the recently introduced phy_set_max_speed() to tell the PHY to not advertise speed higher than 100 MBit/s. Tested on a SH7786 platform, with a Gigabit PHY. Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index db72d13cebb9..75323000c364 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1892,6 +1892,16 @@ static int sh_eth_phy_init(struct net_device *ndev) return PTR_ERR(phydev); } + /* mask with MAC supported features */ + if (mdp->cd->register_type != SH_ETH_REG_GIGABIT) { + int err = phy_set_max_speed(phydev, SPEED_100); + if (err) { + netdev_err(ndev, "failed to limit PHY to 100 Mbit/s\n"); + phy_disconnect(phydev); + return err; + } + } + phy_attached_info(phydev); return 0; -- cgit v1.2.3 From 93c647643b48f0131f02e45da3bd367d80443291 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Wed, 6 Dec 2017 12:12:27 -0800 Subject: netlink: Add netns check on taps Currently, a nlmon link inside a child namespace can observe systemwide netlink activity. Filter the traffic so that nlmon can only sniff netlink messages from its own netns. Test case: vpnns -- bash -c "ip link add nlmon0 type nlmon; \ ip link set nlmon0 up; \ tcpdump -i nlmon0 -q -w /tmp/nlmon.pcap -U" & sudo ip xfrm state add src 10.1.1.1 dst 10.1.1.2 proto esp \ spi 0x1 mode transport \ auth sha1 0x6162633132330000000000000000000000000000 \ enc aes 0x00000000000000000000000000000000 grep --binary abc123 /tmp/nlmon.pcap Signed-off-by: Kevin Cernekee Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b9e0ee4e22f5..79cc1bf36e4a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -253,6 +253,9 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, struct sock *sk = skb->sk; int ret = -ENOMEM; + if (!net_eq(dev_net(dev), sock_net(sk))) + return 0; + dev_hold(dev); if (is_vmalloc_addr(skb->head)) -- cgit v1.2.3 From 9122caf99b85c0f16938419547d5a9a84ae287a4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 12 Oct 2017 18:16:41 -0400 Subject: tracing, rcu: Hide trace event rcu_nocb_wake when not used The trace event rcu_nocb_wake is only used when CONFIG_RCU_NOCB_CPU is defined. But the trace event is defined regardless. As defined trace events take up memory, it is a waste to have it defined when not used. Surround the trace event with an #ifdef to have it only defined when it is used. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 59d40c454aa0..dbca79ea0677 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -243,6 +243,7 @@ TRACE_EVENT(rcu_exp_funnel_lock, __entry->grphi, __entry->gpevent) ); +#ifdef CONFIG_RCU_NOCB_CPU /* * Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended * to assist debugging of these handoffs. @@ -285,6 +286,7 @@ TRACE_EVENT(rcu_nocb_wake, TP_printk("%s %d %s", __entry->rcuname, __entry->cpu, __entry->reason) ); +#endif /* * Tracepoint for tasks blocking within preemptible-RCU read-side -- cgit v1.2.3 From efd88b02bb9e6b8b73a20ea611e5d07ed6d4af34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:52:41 -0700 Subject: rcu: Add comment giving debug strategy for double call_rcu() The following statement has for some reason proven non-intuitive: WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); This commit therefore adds a comment that states that this warning usually triggers in response to a double call_rcu(), which is sort of like a double free. The comment also suggests building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y to track down the double call_rcu(). Reported-by: David Howells Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c0ca2ccf0c..1bdc0481aaf1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2789,6 +2789,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rdp->n_force_qs_snap = rsp->n_force_qs; } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; + + /* + * The following usually indicates a double call_rcu(). To track + * this down, try building with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. + */ WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0)); local_irq_restore(flags); -- cgit v1.2.3 From 84b12b752f41cd3d25d75692c2145d816e42926c Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Fri, 17 Nov 2017 21:40:15 +0600 Subject: rcu: Remove have_rcu_nocb_mask from tree_plugin.h Currently have_rcu_nocb_mask is used to avoid double allocation of rcu_nocb_mask during boot up. Due to different representation of cpumask_var_t on different kernel config CPUMASK=y(or n) it was okay. But now we have a helper cpumask_available(), which can be utilized to check whether rcu_nocb_mask has been allocated or not without using a variable. Removing the variable also reduces vmlinux size. Unpatched version: text data bss dec hex filename 13050393 7852470 14543408 35446271 21cddff vmlinux Patched version: text data bss dec hex filename 13050390 7852438 14543408 35446236 21cdddc vmlinux Signed-off-by: Rakib Mullick Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Steven Rostedt Cc: Mathieu Desnoyers Cc: Lai Jiangshan Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index db85ca3975f1..13a8e08f1998 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -61,7 +61,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ -static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ @@ -1752,7 +1751,6 @@ static void increment_cpu_stall_ticks(void) static int __init rcu_nocb_setup(char *str) { alloc_bootmem_cpumask_var(&rcu_nocb_mask); - have_rcu_nocb_mask = true; cpulist_parse(str, rcu_nocb_mask); return 1; } @@ -1801,7 +1799,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) /* Is the specified CPU a no-CBs CPU? */ bool rcu_is_nocb_cpu(int cpu) { - if (have_rcu_nocb_mask) + if (cpumask_available(rcu_nocb_mask)) return cpumask_test_cpu(cpu, rcu_nocb_mask); return false; } @@ -2295,14 +2293,13 @@ void __init rcu_init_nohz(void) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ - if (!have_rcu_nocb_mask && need_rcu_nocb_mask) { + if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) { if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); return; } - have_rcu_nocb_mask = true; } - if (!have_rcu_nocb_mask) + if (!cpumask_available(rcu_nocb_mask)) return; #if defined(CONFIG_NO_HZ_FULL) @@ -2428,7 +2425,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ struct rcu_data *rdp_prev = NULL; - if (!have_rcu_nocb_mask) + if (!cpumask_available(rcu_nocb_mask)) return; if (ls == -1) { ls = int_sqrt(nr_cpu_ids); -- cgit v1.2.3 From cc1321c96f855525fbd847fec130f000daa1bb1f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Oct 2017 11:05:03 -0700 Subject: torture: Reduce #ifdefs for preempt_schedule() This commit adds a torture_preempt_schedule() that is nothingness in !PREEMPT builds and is preempt_schedule() otherwise. Then torture_preempt_schedule() is used to eliminate several ugly #ifdefs, both in rcutorture and in locktorture. Signed-off-by: Paul E. McKenney --- include/linux/torture.h | 6 ++++++ kernel/locking/locktorture.c | 24 ++++++------------------ kernel/rcu/rcutorture.c | 4 +--- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/include/linux/torture.h b/include/linux/torture.h index a45702eb3e7b..907d266aaddc 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -96,4 +96,10 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) +#ifdef CONFIG_PREEMPT +#define torture_preempt_schedule() preempt_schedule() +#else +#define torture_preempt_schedule() +#endif + #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f24582d4dad3..617cea2520b3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -130,10 +130,8 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_lock_busted_write_unlock(void) @@ -179,10 +177,8 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) @@ -352,10 +348,8 @@ static void torture_mutex_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 5); else mdelay(longdelay_ms / 5); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_mutex_unlock(void) __releases(torture_mutex) @@ -507,10 +501,8 @@ static void torture_rtmutex_delay(struct torture_random_state *trsp) if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rtmutex_unlock(void) __releases(torture_rtmutex) @@ -547,10 +539,8 @@ static void torture_rwsem_write_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 10); else mdelay(longdelay_ms / 10); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_write(void) __releases(torture_rwsem) @@ -574,10 +564,8 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); -#ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000))) - preempt_schedule(); /* Allow test to be preempted. */ -#endif + torture_preempt_schedule(); /* Allow test to be preempted. */ } static void torture_rwsem_up_read(void) __releases(torture_rwsem) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 74f6b0146b98..e7d3cce84214 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -315,11 +315,9 @@ static void rcu_read_delay(struct torture_random_state *rrsp) } if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT if (!preempt_count() && !(torture_random(rrsp) % (nrealreaders * 20000))) - preempt_schedule(); /* No QS if preempt_disable() in effect */ -#endif + torture_preempt_schedule(); /* QS only if preemptible. */ } static void rcu_torture_read_unlock(int idx) __releases(RCU) -- cgit v1.2.3 From e8302739aa2204d52dacf9e9619cb6e755fa997a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Oct 2017 11:23:42 -0700 Subject: rcutorture: Preempt RCU-preempt readers more vigorously This commit attempts to make a very rare rcutorture failure happen more often by increasing the fraction of RCU-preempt read-side critical sections that are preempted. Signed-off-by: Paul E. McKenney --- kernel/rcu/rcutorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index e7d3cce84214..1074ecc3f72f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -316,7 +316,7 @@ static void rcu_read_delay(struct torture_random_state *rrsp) if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); if (!preempt_count() && - !(torture_random(rrsp) % (nrealreaders * 20000))) + !(torture_random(rrsp) % (nrealreaders * 500))) torture_preempt_schedule(); /* QS only if preemptible. */ } -- cgit v1.2.3 From 2adfa4210f8f35cdfb4e08318cc06b99752964c2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:20 +0900 Subject: rcutorture/configinit: Fix build directory error message The 'configinit.sh' script checks the format of optional argument for the build directory, printing an error message if the format is not valid. However, the error message uses the wrong variable, indicating an empty string even though the user entered a non-empty (but erroneous) string. This commit fixes the script to use the correct variable. Fixes: c87b9c601ac8 ("rcutorture: Add KVM-based test framework") Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/configinit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index 51f66a7ce876..c15f270e121d 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -51,7 +51,7 @@ then mkdir $builddir fi else - echo Bad build directory: \"$builddir\" + echo Bad build directory: \"$buildloc\" exit 2 fi fi -- cgit v1.2.3 From 3a0b3bbbff0f69c59e753dddf97e4e334b7fa997 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:21 +0900 Subject: rcutorture: Remove unused script, config2frag.sh The 'config2frag.sh' script is not used, so this commit removes it. Fixes: c87b9c601ac8 ("rcutorture: Add KVM-based test framework") Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/config2frag.sh | 25 ---------------------- 1 file changed, 25 deletions(-) delete mode 100755 tools/testing/selftests/rcutorture/bin/config2frag.sh diff --git a/tools/testing/selftests/rcutorture/bin/config2frag.sh b/tools/testing/selftests/rcutorture/bin/config2frag.sh deleted file mode 100755 index 56f51ae13d73..000000000000 --- a/tools/testing/selftests/rcutorture/bin/config2frag.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Usage: config2frag.sh < .config > configfrag -# -# Converts the "# CONFIG_XXX is not set" to "CONFIG_XXX=n" so that the -# resulting file becomes a legitimate Kconfig fragment. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney - -LANG=C sed -e 's/^# CONFIG_\([a-zA-Z0-9_]*\) is not set$/CONFIG_\1=n/' -- cgit v1.2.3 From e5ed531dca4f569397ee5df60cd8ea2684c9aeff Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:22 +0900 Subject: rcutorture/kvm.sh: Remove unused variable, `alldone` The variable `alldone` is defined but not used within an awk script. This commit therefore removes it. Fixes:53954671033d ("rcutorture: Do better bin packing") Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index ccd49e958fd2..7eb8d14e2aab 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -238,7 +238,6 @@ BEGIN { } END { - alldone = 0; batch = 0; nc = -1; -- cgit v1.2.3 From 8dcd6f3fe206c0bb8996e59386a04027b1c2fb9b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:23 +0900 Subject: rcutorture/kvm.sh: Use consistent help text for --qemu-args The '--qemu-args' option's help text is wrongly copied from '--qemu-cmd' option and its argument type description message format is inconsistent with other arguments. This commit fixes the usage and type messages to be consistent with others. Fixes: e9ce640001c6 ("rcutorture: Add --qemu-args argument to kvm.sh") Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 7eb8d14e2aab..64d96fc3dd62 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -70,7 +70,7 @@ usage () { echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --no-initrd" - echo " --qemu-args qemu-system-..." + echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" echo " --torture rcu" @@ -150,7 +150,7 @@ do TORTURE_INITRD=""; export TORTURE_INITRD ;; --qemu-args|--qemu-arg) - checkarg --qemu-args "-qemu args" $# "$2" '^-' '^error' + checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error' TORTURE_QEMU_ARG="$2" shift ;; -- cgit v1.2.3 From 512e3bd0b554eb25d8816ab3954e0f39c98e8183 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:24 +0900 Subject: rcutorture/kvm.sh: Support execution from any directory The 'kvm.sh' rcutorture script requires that it be invoked from the top of Linux-kernel source tree. It is just a subtle restriction, but users using it for the first time could forget the restriction and be confused. Moreover, it makes commands a little longer, which can be frustrating. This commit therefore lets users invoke the script from any location. Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 64d96fc3dd62..d2a4fd94de6a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -1,8 +1,7 @@ #!/bin/bash # # Run a series of 14 tests under KVM. These are not particularly -# well-selected or well-tuned, but are the current set. Run from the -# top level of the source tree. +# well-selected or well-tuned, but are the current set. # # Edit the definitions below to set the locations of the various directories, # as well as the test duration. @@ -34,6 +33,8 @@ T=${TMPDIR-/tmp}/kvm.sh.$$ trap 'rm -rf $T' 0 mkdir $T +cd `dirname $scriptname`/../../../../../ + dur=$((30*60)) dryrun="" KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM -- cgit v1.2.3 From 81394e3f6df8f72895354fe29a1ef60cb0765a78 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:25 +0900 Subject: rcutorture/kvm-recheck-*: Improve result directory readability check The kvm-recheck-(lock|rcu|rcuperf).sh scripts check whether the user-specified results directory exists. If not, it prints out error message that says the specified directory is unreadable. To make the message more precise, this commit adds a readability check. Fixes: 2193e1604eac ("rcutorture: Abstract kvm-recheck.sh") Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh index 43f764098e50..2de92f43ee8c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh @@ -23,7 +23,7 @@ # Authors: Paul E. McKenney i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 559e01ac86be..9e34656bf659 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -23,7 +23,7 @@ # Authors: Paul E. McKenney i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh index 8f3121afc716..6138fd94abfe 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh @@ -23,7 +23,7 @@ # Authors: Paul E. McKenney i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else -- cgit v1.2.3 From fa48beb5f485a82a15f777198c770feb6d01c794 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:27 +0900 Subject: rcutorture: Simplify logging Both the 'kvm.sh' and 'kvm-test-1-run.sh' scripts log messages by printing the message to 'stdout' and then also printing it into the log file. Generation of the message thus occurs twice, once for 'stdout' and once for the log file. Moreover, many of the messages contain 'date' output, which results in date being invoked twice (once for stdout print, once for log file write). As a result, the date information in stdout and log file can differ, which could cause confusion. This commit therefore simplifies the logging procedure by using 'tee'. Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- .../selftests/rcutorture/bin/kvm-test-1-run.sh | 4 +-- tools/testing/selftests/rcutorture/bin/kvm.sh | 32 ++++++++-------------- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ab14b97c942c..0406c67378cb 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -154,9 +154,7 @@ cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` vcpus=`identify_qemu_vcpus` if test $cpu_count -gt $vcpus then - echo CPU count limited from $cpu_count to $vcpus - touch $resdir/Warnings - echo CPU count limited from $cpu_count to $vcpus >> $resdir/Warnings + echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings cpu_count=$vcpus fi qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index d2a4fd94de6a..7d1f607f0f76 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -331,8 +331,7 @@ awk < $T/cfgcpu.pack \ # Dump out the scripting required to run one test batch. function dump(first, pastlast, batchnum) { - print "echo ----Start batch " batchnum ": `date`"; - print "echo ----Start batch " batchnum ": `date` >> " rd "/log"; + print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "needqemurun=" jn=1 for (j = first; j < pastlast; j++) { @@ -349,21 +348,18 @@ function dump(first, pastlast, batchnum) ovf = "-ovf"; else ovf = ""; - print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; print "rm -f " builddir ".*"; print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log"; jn++; } for (j = 1; j < jn; j++) { @@ -371,8 +367,7 @@ function dump(first, pastlast, batchnum) print "rm -f " builddir ".ready" print "if test -f \"" rd cfr[j] "/builtkernel\"" print "then" - print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date`"; - print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` >> " rd "/log"; + print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log"; print "\tneedqemurun=1" print "fi" } @@ -386,31 +381,26 @@ function dump(first, pastlast, batchnum) njitter = ja[1]; if (TORTURE_BUILDONLY && njitter != 0) { njitter = 0; - print "echo Build-only run, so suppressing jitter >> " rd "/log" + print "echo Build-only run, so suppressing jitter | tee -a " rd "log" } if (TORTURE_BUILDONLY) { print "needqemurun=" } print "if test -n \"$needqemurun\"" print "then" - print "\techo ---- Starting kernels. `date`"; - print "\techo ---- Starting kernels. `date` >> " rd "/log"; + print "\techo ---- Starting kernels. `date` | tee -a " rd "log"; for (j = 0; j < njitter; j++) print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&" print "\twait" - print "\techo ---- All kernel runs complete. `date`"; - print "\techo ---- All kernel runs complete. `date` >> " rd "/log"; + print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log"; print "else" print "\twait" - print "\techo ---- No kernel runs. `date`"; - print "\techo ---- No kernel runs. `date` >> " rd "/log"; + print "\techo ---- No kernel runs. `date` | tee -a " rd "log"; print "fi" for (j = 1; j < jn; j++) { builddir=KVM "/b" j - print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:"; - print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: >> " rd "/log"; - print "cat " rd cfr[j] "/kvm-test-1-run.sh.out"; - print "cat " rd cfr[j] "/kvm-test-1-run.sh.out >> " rd "/log"; + print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log"; + print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log"; } } -- cgit v1.2.3 From feef2d286a098c3510322d5c1348432899489214 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:28 +0900 Subject: rcutorture: Simplify functions.sh include path Inclusions of 'functions.sh' from 'kvm-test-1-run.sh' and 'kvm-recheck*.sh' use its absolute path. Because the directory containing 'functions.sh' is already in PATH, the full path is unnecessary. This commit therefore simplifies the inclusions to use the short relative path. Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-recheck.sh | 2 +- tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 9e34656bf659..c2e1bb6d0cba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -30,7 +30,7 @@ else echo Unreadable results directory: $i exit 1 fi -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh index f79b0e9e84fc..963f71289d22 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh @@ -26,7 +26,7 @@ # Authors: Paul E. McKenney i="$1" -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100 then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh index 6138fd94abfe..ccebf772fa1e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh @@ -31,7 +31,7 @@ else exit 1 fi PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh if kvm-recheck-rcuperf-ftrace.sh $i then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index f659346d3358..f7e988f369dd 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -25,7 +25,7 @@ # Authors: Paul E. McKenney PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh for rd in "$@" do firsttime=1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 0406c67378cb..1b78a12740e5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -42,7 +42,7 @@ T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ trap 'rm -rf $T' 0 mkdir $T -. $KVM/bin/functions.sh +. functions.sh . $CONFIGFRAG/ver_functions.sh config_template=${1} -- cgit v1.2.3 From af0695d3fcbf8ac387eb48d1356d1956c6af7fd9 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Nov 2017 19:17:26 +0900 Subject: rcutorture/kvm-build.sh: Skip build directory check Check for build-directory existence and write permissions are provided in both 'kvm-test-1-run.sh' an 'kvm-build.sh'. Because the 'kvm-build.sh' is dependent on 'kvm-test-1-run.sh' ('kvm-build.sh' uses variables that defined from its caller.), these checks are unnecessarily duplicated. This commit therefore removes the check in from the 'kvm-build.sh' script. Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/kvm-build.sh | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index fb66d0173638..34d126734cde 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -29,11 +29,6 @@ then exit 1 fi builddir=${2} -if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir" -then - echo "kvm-build.sh :$builddir: Not a writable directory, cannot build into it" - exit 1 -fi T=${TMPDIR-/tmp}/test-linux.sh.$$ trap 'rm -rf $T' 0 -- cgit v1.2.3 From 8f9dd8317386b0bcb20cf0bfc832ba2ea67f44d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 7 Nov 2017 14:10:03 -0800 Subject: torture: Place all torture-test modules in one MAINTAINERS group There is some confusion about where patches to kernel/torture.c and kernel/locking/locktorture.c should be sent. This commit therefore updates MAINTAINERS appropriately. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) --- MAINTAINERS | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..bdeb64f8bf54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8194,6 +8194,7 @@ F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h F: lib/locking*.[ch] F: kernel/locking/ +X: kernel/locking/locktorture.c LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) M: "Richard Russon (FlatCap)" @@ -11451,15 +11452,6 @@ L: linux-wireless@vger.kernel.org S: Orphan F: drivers/net/wireless/ray* -RCUTORTURE MODULE -M: Josh Triplett -M: "Paul E. McKenney" -L: linux-kernel@vger.kernel.org -S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git -F: Documentation/RCU/torture.txt -F: kernel/rcu/rcutorture.c - RCUTORTURE TEST FRAMEWORK M: "Paul E. McKenney" M: Josh Triplett @@ -13748,6 +13740,18 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/topstar-laptop.c +TORTURE-TEST MODULES +M: Davidlohr Bueso +M: "Paul E. McKenney" +M: Josh Triplett +L: linux-kernel@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git +F: Documentation/RCU/torture.txt +F: kernel/torture.c +F: kernel/rcu/rcutorture.c +F: kernel/locking/locktorture.c + TOSHIBA ACPI EXTRAS DRIVER M: Azael Avalos L: platform-driver-x86@vger.kernel.org -- cgit v1.2.3 From f2f762608f45353b0b8c37507824f95bb716c3d5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 15 May 2017 02:07:22 -0700 Subject: locking/locktorture: Fix rwsem reader_delay We should account for nreader threads, not writers in this callback. Could even trigger a div by 0 if the user explicitly disables writers. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 617cea2520b3..a307a79e6b0b 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -560,7 +560,7 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealreaders_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); -- cgit v1.2.3 From 2ce77d16db4240dd2e422fc0a5c26d3e2ec03446 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 15 May 2017 02:07:23 -0700 Subject: locking/locktorture: Fix num reader/writer corner cases Things can explode for locktorture if the user does combinations of nwriters_stress=0 nreaders_stress=0. Fix this by not assuming we always want to torture writer threads. Reported-by: Jeremy Linton Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney Reviewed-by: Jeremy Linton Tested-by: Jeremy Linton --- kernel/locking/locktorture.c | 76 +++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index a307a79e6b0b..2a1fc2a58910 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -703,8 +703,7 @@ static void __torture_print_stats(char *page, { bool fail = 0; int i, n_stress; - long max = 0; - long min = statp[0].n_lock_acquired; + long max = 0, min = statp ? statp[0].n_lock_acquired : 0; long long sum = 0; n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; @@ -811,7 +810,7 @@ static void lock_torture_cleanup(void) * such, only perform the underlying torture-specific cleanups, * and avoid anything related to locktorture. */ - if (!cxt.lwsa) + if (!cxt.lwsa && !cxt.lrsa) goto end; if (writer_tasks) { @@ -886,6 +885,13 @@ static int __init lock_torture_init(void) firsterr = -EINVAL; goto unwind; } + + if (nwriters_stress == 0 && nreaders_stress == 0) { + pr_alert("lock-torture: must run at least one locking thread\n"); + firsterr = -EINVAL; + goto unwind; + } + if (cxt.cur_ops->init) cxt.cur_ops->init(); @@ -909,17 +915,19 @@ static int __init lock_torture_init(void) #endif /* Initialize the statistics so that each run gets its own numbers. */ + if (nwriters_stress) { + lock_is_write_held = 0; + cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); + if (cxt.lwsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } - lock_is_write_held = 0; - cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); - if (cxt.lwsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < cxt.nrealwriters_stress; i++) { - cxt.lwsa[i].n_lock_fail = 0; - cxt.lwsa[i].n_lock_acquired = 0; + for (i = 0; i < cxt.nrealwriters_stress; i++) { + cxt.lwsa[i].n_lock_fail = 0; + cxt.lwsa[i].n_lock_acquired = 0; + } } if (cxt.cur_ops->readlock) { @@ -936,19 +944,21 @@ static int __init lock_torture_init(void) cxt.nrealreaders_stress = cxt.nrealwriters_stress; } - lock_is_read_held = 0; - cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); - if (cxt.lrsa == NULL) { - VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); - firsterr = -ENOMEM; - kfree(cxt.lwsa); - cxt.lwsa = NULL; - goto unwind; - } - - for (i = 0; i < cxt.nrealreaders_stress; i++) { - cxt.lrsa[i].n_lock_fail = 0; - cxt.lrsa[i].n_lock_acquired = 0; + if (nreaders_stress) { + lock_is_read_held = 0; + cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); + if (cxt.lrsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); + firsterr = -ENOMEM; + kfree(cxt.lwsa); + cxt.lwsa = NULL; + goto unwind; + } + + for (i = 0; i < cxt.nrealreaders_stress; i++) { + cxt.lrsa[i].n_lock_fail = 0; + cxt.lrsa[i].n_lock_acquired = 0; + } } } @@ -978,12 +988,14 @@ static int __init lock_torture_init(void) goto unwind; } - writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), - GFP_KERNEL); - if (writer_tasks == NULL) { - VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); - firsterr = -ENOMEM; - goto unwind; + if (nwriters_stress) { + writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), + GFP_KERNEL); + if (writer_tasks == NULL) { + VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } } if (cxt.cur_ops->readlock) { -- cgit v1.2.3 From 4ced3314fd3a73dabac4e8a41747883eff36c3e8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 15:01:02 -0800 Subject: torture: Make stutter less vulnerable to compilers and races The stutter_wait() function repeatedly fetched stutter_pause_test, and should really just fetch it once on each pass. The races should be harmless, but why have the races? Also, the whole point of the value "2" for stutter_pause_test is to get everyone to start at very nearly the same time, but the value "2" was the first jiffy of the stutter rather than the last jiffy of the stutter. This commit rearranges the code to be more sensible. Signed-off-by: Paul E. McKenney --- kernel/torture.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kernel/torture.c b/kernel/torture.c index 52781e838541..3bcbd4fbfe18 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -573,18 +573,21 @@ static int stutter; */ void stutter_wait(const char *title) { + int spt; + cond_resched_rcu_qs(); - while (READ_ONCE(stutter_pause_test) || - (torture_runnable && !READ_ONCE(*torture_runnable))) { - if (stutter_pause_test) - if (READ_ONCE(stutter_pause_test) == 1) - schedule_timeout_interruptible(1); - else - while (READ_ONCE(stutter_pause_test)) - cond_resched(); - else + spt = READ_ONCE(stutter_pause_test); + while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) { + if (spt == 1) { + schedule_timeout_interruptible(1); + } else if (spt == 2) { + while (READ_ONCE(stutter_pause_test)) + cond_resched(); + } else { schedule_timeout_interruptible(round_jiffies_relative(HZ)); + } torture_shutdown_absorb(title); + spt = READ_ONCE(stutter_pause_test); } } EXPORT_SYMBOL_GPL(stutter_wait); @@ -597,17 +600,15 @@ static int torture_stutter(void *arg) { VERBOSE_TOROUT_STRING("torture_stutter task started"); do { - if (!torture_must_stop()) { - if (stutter > 1) { - schedule_timeout_interruptible(stutter - 1); - WRITE_ONCE(stutter_pause_test, 2); - } - schedule_timeout_interruptible(1); + if (!torture_must_stop() && stutter > 1) { WRITE_ONCE(stutter_pause_test, 1); + schedule_timeout_interruptible(stutter - 1); + WRITE_ONCE(stutter_pause_test, 2); + schedule_timeout_interruptible(1); } + WRITE_ONCE(stutter_pause_test, 0); if (!torture_must_stop()) schedule_timeout_interruptible(stutter); - WRITE_ONCE(stutter_pause_test, 0); torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); torture_kthread_stopping("torture_stutter"); -- cgit v1.2.3 From a2f2577d96ad060b65eb909dd39b57d676754119 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 20:19:17 -0800 Subject: torture: Eliminate torture_runnable and perf_runnable The purpose of torture_runnable is to allow rcutorture and locktorture to be started and stopped via sysfs when they are built into the kernel (as in not compiled as loadable modules). However, the 0444 permissions for both instances of torture_runnable prevent this use case from ever being put into practice. Given that there have been no complaints about this deficiency, it is reasonable to conclude that no one actually makes use of this sysfs capability. The perf_runnable module parameter for rcuperf is in the same situation. This commit therefore removes both torture_runnable instances as well as perf_runnable. Reported-by: Thomas Gleixner Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 9 --------- Documentation/locking/locktorture.txt | 5 ----- include/linux/torture.h | 2 +- kernel/locking/locktorture.c | 6 +----- kernel/rcu/rcuperf.c | 6 +----- kernel/rcu/rcutorture.c | 6 +----- kernel/torture.c | 6 ++---- tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh | 1 - tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh | 1 - .../selftests/rcutorture/configs/rcuperf/ver_functions.sh | 1 - 10 files changed, 6 insertions(+), 37 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..66d471f0b92e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2049,9 +2049,6 @@ This tests the locking primitive's ability to transition abruptly to and from idle. - locktorture.torture_runnable= [BOOT] - Start locktorture running at boot time. - locktorture.torture_type= [KNL] Specify the locking implementation to test. @@ -3459,9 +3456,6 @@ the same as for rcuperf.nreaders. N, where N is the number of CPUs - rcuperf.perf_runnable= [BOOT] - Start rcuperf running at boot time. - rcuperf.perf_type= [KNL] Specify the RCU implementation to test. @@ -3595,9 +3589,6 @@ Test RCU's dyntick-idle handling. See also the rcutorture.shuffle_interval parameter. - rcutorture.torture_runnable= [BOOT] - Start rcutorture running at boot time. - rcutorture.torture_type= [KNL] Specify the RCU implementation to test. diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt index a2ef3a929bf1..6a8df4cd19bf 100644 --- a/Documentation/locking/locktorture.txt +++ b/Documentation/locking/locktorture.txt @@ -57,11 +57,6 @@ torture_type Type of lock to torture. By default, only spinlocks will o "rwsem_lock": read/write down() and up() semaphore pairs. -torture_runnable Start locktorture at boot time in the case where the - module is built into the kernel, otherwise wait for - torture_runnable to be set via sysfs before starting. - By default it will begin once the module is loaded. - ** Torture-framework (RCU + locking) ** diff --git a/include/linux/torture.h b/include/linux/torture.h index 907d266aaddc..66272862070b 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -79,7 +79,7 @@ void stutter_wait(const char *title); int torture_stutter_init(int s); /* Initialization and cleanup. */ -bool torture_init_begin(char *ttype, bool v, int *runnable); +bool torture_init_begin(char *ttype, bool v); void torture_init_end(void); bool torture_cleanup_begin(void); void torture_cleanup_end(void); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 2a1fc2a58910..6850ffd69125 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -77,10 +77,6 @@ struct lock_stress_stats { long n_lock_acquired; }; -int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init"); - /* Forward reference. */ static void lock_torture_cleanup(void); @@ -866,7 +862,7 @@ static int __init lock_torture_init(void) &percpu_rwsem_lock_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c index 1f87a02c3399..d1ebdf9868bb 100644 --- a/kernel/rcu/rcuperf.c +++ b/kernel/rcu/rcuperf.c @@ -106,10 +106,6 @@ static int rcu_perf_writer_state; #define MAX_MEAS 10000 #define MIN_MEAS 100 -static int perf_runnable = IS_ENABLED(MODULE); -module_param(perf_runnable, int, 0444); -MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot"); - /* * Operations vector for selecting different types of tests. */ @@ -646,7 +642,7 @@ rcu_perf_init(void) &tasks_ops, }; - if (!torture_init_begin(perf_type, verbose, &perf_runnable)) + if (!torture_init_begin(perf_type, verbose)) return -EBUSY; /* Process args and tell the world that the perf'er is on the job. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 1074ecc3f72f..308e6fdbced8 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -187,10 +187,6 @@ static const char *rcu_torture_writer_state_getname(void) return rcu_torture_writer_state_names[i]; } -static int torture_runnable = IS_ENABLED(MODULE); -module_param(torture_runnable, int, 0444); -MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot"); - #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) #define rcu_can_boost() 1 #else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ @@ -1729,7 +1725,7 @@ rcu_torture_init(void) &sched_ops, &tasks_ops, }; - if (!torture_init_begin(torture_type, verbose, &torture_runnable)) + if (!torture_init_begin(torture_type, verbose)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ diff --git a/kernel/torture.c b/kernel/torture.c index 3bcbd4fbfe18..572576ad9f58 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -61,7 +61,6 @@ static bool verbose; #define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ static int fullstop = FULLSTOP_RMMOD; static DEFINE_MUTEX(fullstop_mutex); -static int *torture_runnable; #ifdef CONFIG_HOTPLUG_CPU @@ -577,7 +576,7 @@ void stutter_wait(const char *title) cond_resched_rcu_qs(); spt = READ_ONCE(stutter_pause_test); - while (spt || (torture_runnable && !READ_ONCE(*torture_runnable))) { + while (spt) { if (spt == 1) { schedule_timeout_interruptible(1); } else if (spt == 2) { @@ -649,7 +648,7 @@ static void torture_stutter_cleanup(void) * The runnable parameter points to a flag that controls whether or not * the test is currently runnable. If there is no such flag, pass in NULL. */ -bool torture_init_begin(char *ttype, bool v, int *runnable) +bool torture_init_begin(char *ttype, bool v) { mutex_lock(&fullstop_mutex); if (torture_type != NULL) { @@ -661,7 +660,6 @@ bool torture_init_begin(char *ttype, bool v, int *runnable) } torture_type = ttype; verbose = v; - torture_runnable = runnable; fullstop = FULLSTOP_DONTSTOP; return true; } diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh index 252aae618984..80eb646e1319 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh @@ -38,6 +38,5 @@ per_version_boot_params () { echo $1 `locktorture_param_onoff "$1" "$2"` \ locktorture.stat_interval=15 \ locktorture.shutdown_secs=$3 \ - locktorture.torture_runnable=1 \ locktorture.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh index ffb85ed786fa..24ec91041957 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -51,7 +51,6 @@ per_version_boot_params () { `rcutorture_param_n_barrier_cbs "$1"` \ rcutorture.stat_interval=15 \ rcutorture.shutdown_secs=$3 \ - rcutorture.torture_runnable=1 \ rcutorture.test_no_idle_hz=1 \ rcutorture.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh index 34f2a1b35ee5..b9603115d7c7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh @@ -46,7 +46,6 @@ rcuperf_param_nwriters () { per_version_boot_params () { echo $1 `rcuperf_param_nreaders "$1"` \ `rcuperf_param_nwriters "$1"` \ - rcuperf.perf_runnable=1 \ rcuperf.shutdown=1 \ rcuperf.verbose=1 } -- cgit v1.2.3 From 29d3939084583b26a5487be64b9523e61468f1be Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Nov 2017 22:07:59 -0800 Subject: torture: Save a line in stutter_wait(): while -> for Signed-off-by: Paul E. McKenney --- kernel/torture.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/torture.c b/kernel/torture.c index 572576ad9f58..37b94012a3f8 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -576,7 +576,7 @@ void stutter_wait(const char *title) cond_resched_rcu_qs(); spt = READ_ONCE(stutter_pause_test); - while (spt) { + for (; spt; spt = READ_ONCE(stutter_pause_test)) { if (spt == 1) { schedule_timeout_interruptible(1); } else if (spt == 2) { @@ -586,7 +586,6 @@ void stutter_wait(const char *title) schedule_timeout_interruptible(round_jiffies_relative(HZ)); } torture_shutdown_absorb(title); - spt = READ_ONCE(stutter_pause_test); } } EXPORT_SYMBOL_GPL(stutter_wait); -- cgit v1.2.3 From 0f7cda2b824bb2afe0d75716a8664117fa03f5e0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 1 Dec 2017 12:10:00 -0800 Subject: Kconfig: Make STRICT_DEVMEM default-y on x86 and arm64 Distros have been shipping with CONFIG_STRICT_DEVMEM=y for years now. It is probably time to flip this default for x86 and arm64. Signed-off-by: Kees Cook Acked-by: Laura Abbott Cc: Andrew Morton Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/20171201201000.GA44539@beast Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 947d3e2ed5c2..39b123d04a36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1985,7 +1985,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED - default y if TILE || PPC + default y if TILE || PPC || X86 || ARM64 ---help--- If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental -- cgit v1.2.3 From e7ed9d9bd0375c74fe6e27d8bc73d3c6f4c8c3bc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 30 Nov 2017 16:12:02 -0800 Subject: uprobes/x86: Emulate push insns for uprobe on x86 Uprobe is a tracing mechanism for userspace programs. Typical uprobe will incur overhead of two traps. First trap is caused by replaced trap insn, and the second trap is to execute the original displaced insn in user space. To reduce the overhead, kernel provides hooks for architectures to emulate the original insn and skip the second trap. In x86, emulation is done for certain branch insns. This patch extends the emulation to "push " insns. These insns are typical in the beginning of the function. For example, bcc in https://github.com/iovisor/bcc repo provides tools to measure funclantency, detect memleak, etc. The tools will place uprobes in the beginning of function and possibly uretprobes at the end of function. This patch is able to reduce the trap overhead for uprobe from 2 to 1. Without this patch, uretprobe will typically incur three traps. With this patch, if the function starts with "push" insn, the number of traps can be reduced from 3 to 2. An experiment was conducted on two local VMs, fedora 26 64-bit VM and 32-bit VM, both 4 processors and 4GB memory, booted with latest tip repo (and this patch). The host is MacBook with intel i7 processor. The test program looks like: #include #include #include #include static void test() __attribute__((noinline)); void test() {} int main() { struct timeval start, end; gettimeofday(&start, NULL); for (int i = 0; i < 1000000; i++) { test(); } gettimeofday(&end, NULL); printf("%ld\n", ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec))); return 0; } The program is compiled without optimization, and the first insn for function "test" is "push %rbp". The host is relatively idle. Before the test run, the uprobe is inserted as below for uprobe: echo 'p :' > /sys/kernel/debug/tracing/uprobe_events echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable and for uretprobe: echo 'r :' > /sys/kernel/debug/tracing/uprobe_events echo 1 > /sys/kernel/debug/tracing/events/uprobes/enable Unit: microsecond(usec) per loop iteration x86_64 W/ this patch W/O this patch uprobe 1.55 3.1 uretprobe 2.0 3.6 x86_32 W/ this patch W/O this patch uprobe 1.41 3.5 uretprobe 1.75 4.0 You can see that this patch significantly reduced the overhead, 50% for uprobe and 44% for uretprobe on x86_64, and even more on x86_32. Signed-off-by: Yonghong Song Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20171201001202.3706564-1-yhs@fb.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uprobes.h | 4 ++ arch/x86/kernel/uprobes.c | 107 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 74f4c2ff6427..d8bfa98fca98 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -53,6 +53,10 @@ struct arch_uprobe { u8 fixups; u8 ilen; } defparam; + struct { + u8 reg_offset; /* to the start of pt_regs */ + u8 ilen; + } push; }; }; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index a3755d293a48..85c7ef23d99f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -528,11 +528,11 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return 0; } -static int push_ret_address(struct pt_regs *regs, unsigned long ip) +static int emulate_push_stack(struct pt_regs *regs, unsigned long val) { unsigned long new_sp = regs->sp - sizeof_long(); - if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) + if (copy_to_user((void __user *)new_sp, &val, sizeof_long())) return -EFAULT; regs->sp = new_sp; @@ -566,7 +566,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs regs->ip += correction; } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { regs->sp += sizeof_long(); /* Pop incorrect return address */ - if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) + if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; } /* popf; tell the caller to not touch TF */ @@ -655,7 +655,7 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (push_ret_address(regs, new_ip)) + if (emulate_push_stack(regs, new_ip)) return false; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; @@ -665,6 +665,16 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) return true; } +static bool push_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + unsigned long *src_ptr = (void *)regs + auprobe->push.reg_offset; + + if (emulate_push_stack(regs, *src_ptr)) + return false; + regs->ip += auprobe->push.ilen; + return true; +} + static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { BUG_ON(!branch_is_call(auprobe)); @@ -703,6 +713,10 @@ static const struct uprobe_xol_ops branch_xol_ops = { .post_xol = branch_post_xol_op, }; +static const struct uprobe_xol_ops push_xol_ops = { + .emulate = push_emulate_op, +}; + /* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { @@ -750,6 +764,87 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return 0; } +/* Returns -ENOSYS if push_xol_ops doesn't handle this insn */ +static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) +{ + u8 opc1 = OPCODE1(insn), reg_offset = 0; + + if (opc1 < 0x50 || opc1 > 0x57) + return -ENOSYS; + + if (insn->length > 2) + return -ENOSYS; + if (insn->length == 2) { + /* only support rex_prefix 0x41 (x64 only) */ +#ifdef CONFIG_X86_64 + if (insn->rex_prefix.nbytes != 1 || + insn->rex_prefix.bytes[0] != 0x41) + return -ENOSYS; + + switch (opc1) { + case 0x50: + reg_offset = offsetof(struct pt_regs, r8); + break; + case 0x51: + reg_offset = offsetof(struct pt_regs, r9); + break; + case 0x52: + reg_offset = offsetof(struct pt_regs, r10); + break; + case 0x53: + reg_offset = offsetof(struct pt_regs, r11); + break; + case 0x54: + reg_offset = offsetof(struct pt_regs, r12); + break; + case 0x55: + reg_offset = offsetof(struct pt_regs, r13); + break; + case 0x56: + reg_offset = offsetof(struct pt_regs, r14); + break; + case 0x57: + reg_offset = offsetof(struct pt_regs, r15); + break; + } +#else + return -ENOSYS; +#endif + } else { + switch (opc1) { + case 0x50: + reg_offset = offsetof(struct pt_regs, ax); + break; + case 0x51: + reg_offset = offsetof(struct pt_regs, cx); + break; + case 0x52: + reg_offset = offsetof(struct pt_regs, dx); + break; + case 0x53: + reg_offset = offsetof(struct pt_regs, bx); + break; + case 0x54: + reg_offset = offsetof(struct pt_regs, sp); + break; + case 0x55: + reg_offset = offsetof(struct pt_regs, bp); + break; + case 0x56: + reg_offset = offsetof(struct pt_regs, si); + break; + case 0x57: + reg_offset = offsetof(struct pt_regs, di); + break; + } + } + + auprobe->push.reg_offset = reg_offset; + auprobe->push.ilen = insn->length; + auprobe->ops = &push_xol_ops; + return 0; +} + /** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @mm: the probed address space. @@ -771,6 +866,10 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, if (ret != -ENOSYS) return ret; + ret = push_setup_xol_ops(auprobe, &insn); + if (ret != -ENOSYS) + return ret; + /* * Figure out which fixups default_post_xol_op() will need to perform, * and annotate defparam->fixups accordingly. -- cgit v1.2.3 From f79ce87fa49da778a1ad54c7d3c6755e13cf8489 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 30 Nov 2017 22:51:20 +0800 Subject: x86/build: Don't verify mtools configuration file for isoimage If mtools.conf is not generated before, 'make isoimage' could complain: Kernel: arch/x86/boot/bzImage is ready (#597) GENIMAGE arch/x86/boot/image.iso *** Missing file: arch/x86/boot/mtools.conf arch/x86/boot/Makefile:144: recipe for target 'isoimage' failed mtools.conf is not used for isoimage generation, so do not check it. Signed-off-by: Changbin Du Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4366d57af1 ("x86/build: Factor out fdimage/isoimage generation commands to standalone script") Link: http://lkml.kernel.org/r/1512053480-8083-1-git-send-email-changbin.du@intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/genimage.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index 49f4970f693b..c9e8499fbfe7 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -44,9 +44,9 @@ FDINITRD=$6 # Make sure the files actually exist verify "$FBZIMAGE" -verify "$MTOOLSRC" genbzdisk() { + verify "$MTOOLSRC" mformat a: syslinux $FIMAGE echo "$KCMDLINE" | mcopy - a:syslinux.cfg @@ -57,6 +57,7 @@ genbzdisk() { } genfdimage144() { + verify "$MTOOLSRC" dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null mformat v: syslinux $FIMAGE @@ -68,6 +69,7 @@ genfdimage144() { } genfdimage288() { + verify "$MTOOLSRC" dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null mformat w: syslinux $FIMAGE -- cgit v1.2.3 From 0a373d4fc248cb707821d7dad54ce6d5bcb0cdfe Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 30 Nov 2017 15:35:54 +0300 Subject: x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y Stackdepot doesn't work well with CONFIG_UNWINDER_GUESS=y. The 'guess' unwinder generate awfully large and inaccurate stacktraces, thus stackdepot can't deduplicate stacktraces because they all look like unique. Eventually stackdepot reaches its capacity limit: WARNING: CPU: 0 PID: 545 at lib/stackdepot.c:119 depot_save_stack+0x28e/0x550 Call Trace: ? kasan_kmalloc+0x144/0x160 ? depot_save_stack+0x1f5/0x550 ? do_raw_spin_unlock+0xda/0xf0 ? preempt_count_sub+0x13/0xc0 <...90 lines...> ? do_raw_spin_unlock+0xda/0xf0 Add a STACKDEPOT=n dependency to UNWINDER_GUESS to avoid the problem. Reported-by: kernel test robot Reported-by: Fengguang Wu Signed-off-by: Andrey Ryabinin Acked-by: Dmitry Vyukov Acked-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171130123554.4330-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6293a8768a91..672441c008c7 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_GUESS bool "Guess unwinder" depends on EXPERT + depends on !STACKDEPOT ---help--- This option enables the "guess" unwinder for unwinding kernel stack traces. It scans the stack and reports every kernel text address it -- cgit v1.2.3 From 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Sun, 10 Dec 2017 03:50:58 +0000 Subject: net: ipv4: fix for a race condition in raw_sendmsg inet->hdrincl is racy, and could lead to uninitialized stack pointer usage, so its value should be read only once. Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after raw_probe_proto_opt") Signed-off-by: Mohamed Ghannam Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 33b70bfd1122..125c1eab3eaa 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -513,11 +513,16 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; + int hdrincl; err = -EMSGSIZE; if (len > 0xFFFF) goto out; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields + */ + hdrincl = inet->hdrincl; /* * Check the flags. */ @@ -593,7 +598,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ - if (inet->hdrincl) + if (hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) @@ -615,12 +620,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | - (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), + (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sk->sk_uid); - if (!inet->hdrincl) { + if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; @@ -645,7 +650,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = raw_send_hdrinc(sk, &fl4, msg, len, &rt, msg->msg_flags, &ipc.sockc); -- cgit v1.2.3 From 2342b8d95bcae5946e1b9b8d58645f37500ef2e7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 10 Dec 2017 15:40:51 +0800 Subject: sctp: make sure stream nums can match optlen in sctp_setsockopt_reset_streams Now in sctp_setsockopt_reset_streams, it only does the check optlen < sizeof(*params) for optlen. But it's not enough, as params->srs_number_streams should also match optlen. If the streams in params->srs_stream_list are less than stream nums in params->srs_number_streams, later when dereferencing the stream list, it could cause a slab-out-of-bounds crash, as reported by syzbot. This patch is to fix it by also checking the stream numbers in sctp_setsockopt_reset_streams to make sure at least it's not greater than the streams in the list. Fixes: 7f9d68ac944e ("sctp: implement sender-side procedures for SSN Reset Request Parameter") Reported-by: Dmitry Vyukov Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index eb17a911aa29..3253f724a995 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3891,13 +3891,17 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, struct sctp_association *asoc; int retval = -EINVAL; - if (optlen < sizeof(struct sctp_reset_streams)) + if (optlen < sizeof(*params)) return -EINVAL; params = memdup_user(optval, optlen); if (IS_ERR(params)) return PTR_ERR(params); + if (params->srs_number_streams * sizeof(__u16) > + optlen - sizeof(*params)) + goto out; + asoc = sctp_id2assoc(sk, params->srs_assoc_id); if (!asoc) goto out; -- cgit v1.2.3 From 200809716aed1cac586fcac4c0551a688439be1f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 10 Dec 2017 16:56:00 +0800 Subject: fou: fix some member types in guehdr guehdr struct is used to build or parse gue packets, which are always in big endian. It's better to define all guehdr members as __beXX types. Also, in validate_gue_flags it's not good to use a __be32 variable for both Standard flags(__be16) and Private flags (__be32), and pass it to other funcions. This patch could fix a bunch of sparse warnings from fou. Fixes: 5024c33ac354 ("gue: Add infrastructure for flags and options") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/gue.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/gue.h b/include/net/gue.h index 2fdb29ca74c2..fdad41469b65 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -44,10 +44,10 @@ struct guehdr { #else #error "Please fix " #endif - __u8 proto_ctype; - __u16 flags; + __u8 proto_ctype; + __be16 flags; }; - __u32 word; + __be32 word; }; }; @@ -84,11 +84,10 @@ static inline size_t guehdr_priv_flags_len(__be32 flags) * if there is an unknown standard or private flags, or the options length for * the flags exceeds the options length specific in hlen of the GUE header. */ -static inline int validate_gue_flags(struct guehdr *guehdr, - size_t optlen) +static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen) { + __be16 flags = guehdr->flags; size_t len; - __be32 flags = guehdr->flags; if (flags & ~GUE_FLAGS_ALL) return 1; @@ -101,12 +100,13 @@ static inline int validate_gue_flags(struct guehdr *guehdr, /* Private flags are last four bytes accounted in * guehdr_flags_len */ - flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV); + __be32 pflags = *(__be32 *)((void *)&guehdr[1] + + len - GUE_LEN_PRIV); - if (flags & ~GUE_PFLAGS_ALL) + if (pflags & ~GUE_PFLAGS_ALL) return 1; - len += guehdr_priv_flags_len(flags); + len += guehdr_priv_flags_len(pflags); if (len > optlen) return 1; } -- cgit v1.2.3 From 9d5afec6b8bd46d6ed821aa1579634437f58ef1f Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Mon, 11 Dec 2017 15:00:57 -0500 Subject: ext4: fix crash when a directory's i_size is too small On a ppc64 machine, when mounting a fuzzed ext2 image (generated by fsfuzzer) the following call trace is seen, VFS: brelse: Trying to free free buffer WARNING: CPU: 1 PID: 6913 at /root/repos/linux/fs/buffer.c:1165 .__brelse.part.6+0x24/0x40 .__brelse.part.6+0x20/0x40 (unreliable) .ext4_find_entry+0x384/0x4f0 .ext4_lookup+0x84/0x250 .lookup_slow+0xdc/0x230 .walk_component+0x268/0x400 .path_lookupat+0xec/0x2d0 .filename_lookup+0x9c/0x1d0 .vfs_statx+0x98/0x140 .SyS_newfstatat+0x48/0x80 system_call+0x58/0x6c This happens because the directory that ext4_find_entry() looks up has inode->i_size that is less than the block size of the filesystem. This causes 'nblocks' to have a value of zero. ext4_bread_batch() ends up not reading any of the directory file's blocks. This renders the entries in bh_use[] array to continue to have garbage data. buffer_uptodate() on bh_use[0] can then return a zero value upon which brelse() function is invoked. This commit fixes the bug by returning -ENOENT when the directory file has no associated blocks. Reported-by: Abdul Haleem Signed-off-by: Chandan Rajendra Cc: stable@vger.kernel.org --- fs/ext4/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 798b3ac680db..e750d68fbcb5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, "falling back\n")); } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); + if (!nblocks) { + ret = NULL; + goto cleanup_and_exit; + } start = EXT4_I(dir)->i_dir_start_lookup; if (start >= nblocks) start = 0; -- cgit v1.2.3 From c058ecf6e455fac7346d46197a02398ead90851f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 27 Nov 2017 13:16:32 -0800 Subject: iw_cxgb4: only insert drain cqes if wq is flushed Only insert our special drain CQEs to support ib_drain_sq/rq() after the wq is flushed. Otherwise, existing but not yet polled CQEs can be returned out of order to the user application. This can happen when the QP has exited RTS but not yet flushed the QP, which can happen during a normal close (vs abortive close). In addition never count the drain CQEs when determining how many CQEs need to be synthesized during the flush operation. This latter issue should never happen if the QP is properly flushed before inserting the drain CQE, but I wanted to avoid corrupting the CQ state. So we handle it and log a warning once. Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic") Signed-off-by: Steve Wise Cc: stable@vger.kernel.org Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 5 +++++ drivers/infiniband/hw/cxgb4/qp.c | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index ea55e95cd2c5..b7bfc536e00f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -395,6 +395,11 @@ next_cqe: static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) { + if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { + WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); + return 0; + } + if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) return 0; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 355e288ec969..38bddd02a943 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -868,7 +868,12 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); - if (t4_wq_in_error(&qhp->wq)) { + + /* + * If the qp has been flushed, then just insert a special + * drain cqe. + */ + if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); complete_sq_drain_wr(qhp, wr); return err; @@ -1011,7 +1016,12 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); - if (t4_wq_in_error(&qhp->wq)) { + + /* + * If the qp has been flushed, then just insert a special + * drain cqe. + */ + if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); complete_rq_drain_wr(qhp, wr); return err; -- cgit v1.2.3 From 68a213d325c23d39f109f4c7c824b906a7d209de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 2 Nov 2017 21:25:24 +0100 Subject: platform/x86: dell-laptop: Fix keyboard max lighting for Dell Latitude E6410 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This machine reports number of keyboard backlight led levels, instead of value of the last led level index. Therefore max_brightness properly needs to be subtracted by 1 to match led max_brightness API. Signed-off-by: Pali Rohár Reported-by: Gabriel M. Elder Link: https://bugzilla.kernel.org/show_bug.cgi?id=196913 Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/dell-laptop.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index bf897b1832b1..cd4725e7e0b5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -37,6 +37,7 @@ struct quirk_entry { u8 touchpad_led; + u8 kbd_led_levels_off_1; int needs_kbd_timeouts; /* @@ -67,6 +68,10 @@ static struct quirk_entry quirk_dell_xps13_9333 = { .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 }, }; +static struct quirk_entry quirk_dell_latitude_e6410 = { + .kbd_led_levels_off_1 = 1, +}; + static struct platform_driver platform_driver = { .driver = { .name = "dell-laptop", @@ -269,6 +274,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = { }, .driver_data = &quirk_dell_xps13_9333, }, + { + .callback = dmi_matched, + .ident = "Dell Latitude E6410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6410"), + }, + .driver_data = &quirk_dell_latitude_e6410, + }, { } }; @@ -1149,6 +1163,9 @@ static int kbd_get_info(struct kbd_info *info) units = (buffer->output[2] >> 8) & 0xFF; info->levels = (buffer->output[2] >> 16) & 0xFF; + if (quirks && quirks->kbd_led_levels_off_1 && info->levels) + info->levels--; + if (units & BIT(0)) info->seconds = (buffer->output[3] >> 0) & 0xFF; if (units & BIT(1)) -- cgit v1.2.3 From bff5bf9db1c9453ffd0a78abed3e2d040c092fd9 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 4 Dec 2017 10:26:17 +1000 Subject: platform/x86: asus-wireless: send an EV_SYN/SYN_REPORT between state changes Sending the switch state change twice within the same frame is invalid evdev protocol and only works if the client handles keys immediately as well. Processing events immediately is incorrect, it forces a fake order of events that does not exist on the device. Recent versions of libinput changed to only process the device state and SYN_REPORT time, so now the key event is lost. https://bugs.freedesktop.org/show_bug.cgi?id=104041 Signed-off-by: Peter Hutterer Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/asus-wireless.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c index f3796164329e..d4aeac3477f5 100644 --- a/drivers/platform/x86/asus-wireless.c +++ b/drivers/platform/x86/asus-wireless.c @@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event) return; } input_report_key(data->idev, KEY_RFKILL, 1); + input_sync(data->idev); input_report_key(data->idev, KEY_RFKILL, 0); input_sync(data->idev); } -- cgit v1.2.3 From 532298b95075144bcccf56d792f3fb3fbef2d5d0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 11 Dec 2017 13:54:27 +0300 Subject: platform/x86: dell-wmi: check for kmalloc() errors This allocation won't fail in the current kernel because it's small but not checking for kmalloc() failures introduces static checker warnings so let's fix it. Signed-off-by: Dan Carpenter Reviewed-by: Mario Limonciello Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/dell-wmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 39d2f4518483..fb25b20df316 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -639,6 +639,8 @@ static int dell_wmi_events_set_enabled(bool enable) int ret; buffer = kzalloc(sizeof(struct calling_interface_buffer), GFP_KERNEL); + if (!buffer) + return -ENOMEM; buffer->cmd_class = CLASS_INFO; buffer->cmd_select = SELECT_APP_REGISTRATION; buffer->input[0] = 0x10000; -- cgit v1.2.3 From 621f6401fdeefe96dfe9eab4b167c7c39f552bb0 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 11 Dec 2017 15:03:33 +0800 Subject: scsi: libsas: fix length error in sas_smp_handler() The return value of smp_execute_task_sg() is the untransferred residual, but bsg_job_done() requires the length of payload received. This makes SMP passthrough commands from userland by sg ioctl to libsas get a wrong response. The userland tools such as smp_utils failed because of these wrong responses: ~#smp_discover /dev/bsg/expander-2\:13 response too short, len=0 ~#smp_discover /dev/bsg/expander-2\:134 response too short, len=0 Fix this by passing the actual received length to bsg_job_done(). And if smp_execute_task_sg() returns 0, this means received length is exactly the buffer length. [mkp: typo] Fixes: 651a01364994 ("scsi: scsi_transport_sas: switch to bsg-lib for SMP passthrough") Cc: # v4.14+ Signed-off-by: Jason Yan Reported-by: chenqilin Tested-by: chenqilin CC: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_expander.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 174e5eff6155..c7f21661b3cd 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2145,7 +2145,7 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, struct sas_rphy *rphy) { struct domain_device *dev; - unsigned int reslen = 0; + unsigned int rcvlen = 0; int ret = -EINVAL; /* no rphy means no smp target support (ie aic94xx host) */ @@ -2179,12 +2179,12 @@ void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, ret = smp_execute_task_sg(dev, job->request_payload.sg_list, job->reply_payload.sg_list); - if (ret > 0) { - /* positive number is the untransferred residual */ - reslen = ret; + if (ret >= 0) { + /* bsg_job_done() requires the length received */ + rcvlen = job->reply_payload.payload_len - ret; ret = 0; } out: - bsg_job_done(job, ret, reslen); + bsg_job_done(job, ret, rcvlen); } -- cgit v1.2.3 From 3e5c63565aca5fbd1cc150cb2ca77154fc50fa0c Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 11 Dec 2017 10:09:30 +0100 Subject: scsi: MAINTAINERS: change FCoE list to linux-scsi fcoe-devel@open-fcoe.org is defunct and all patches are routed via the SCSI tree anyways. So update MAINTAINERS accordingly. Signed-off-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cd7e12dc6af4..37841b52a5b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5352,7 +5352,7 @@ F: drivers/media/tuners/fc2580* FCOE SUBSYSTEM (libfc, libfcoe, fcoe) M: Johannes Thumshirn -L: fcoe-devel@open-fcoe.org +L: linux-scsi@vger.kernel.org W: www.Open-FCoE.org S: Supported F: drivers/scsi/libfc/ -- cgit v1.2.3 From 14e3062fb18532175af4d1c4073597999f7a2248 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 5 Dec 2017 16:57:51 -0800 Subject: scsi: core: Fix a scsi_show_rq() NULL pointer dereference Avoid that scsi_show_rq() triggers a NULL pointer dereference if called after sd_uninit_command(). Swap the NULL pointer assignment and the mempool_free() call in sd_uninit_command() to make it less likely that scsi_show_rq() triggers a use-after-free. Note: even with these changes scsi_show_rq() can trigger a use-after-free but that's a lesser evil than e.g. suppressing debug information for T10 PI Type 2 commands completely. This patch fixes the following oops: BUG: unable to handle kernel NULL pointer dereference at (null) IP: scsi_format_opcode_name+0x1a/0x1c0 CPU: 1 PID: 1881 Comm: cat Not tainted 4.14.0-rc2.blk_mq_io_hang+ #516 Call Trace: __scsi_format_command+0x27/0xc0 scsi_show_rq+0x5c/0xc0 __blk_mq_debugfs_rq_show+0x116/0x130 blk_mq_debugfs_rq_show+0xe/0x10 seq_read+0xfe/0x3b0 full_proxy_read+0x54/0x90 __vfs_read+0x37/0x160 vfs_read+0x96/0x130 SyS_read+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xa5 [mkp: added Type 2] Fixes: 0eebd005dd07 ("scsi: Implement blk_mq_ops.show_rq()") Reported-by: Ming Lei Signed-off-by: Bart Van Assche Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: Ming Lei Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: stable@vger.kernel.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debugfs.c | 6 ++++-- drivers/scsi/sd.c | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c index 01f08c03f2c1..c3765d29fd3f 100644 --- a/drivers/scsi/scsi_debugfs.c +++ b/drivers/scsi/scsi_debugfs.c @@ -8,9 +8,11 @@ void scsi_show_rq(struct seq_file *m, struct request *rq) { struct scsi_cmnd *cmd = container_of(scsi_req(rq), typeof(*cmd), req); int msecs = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc); - char buf[80]; + const u8 *const cdb = READ_ONCE(cmd->cmnd); + char buf[80] = "(?)"; - __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len); + if (cdb) + __scsi_format_command(buf, sizeof(buf), cdb, cmd->cmd_len); seq_printf(m, ", .cmd=%s, .retries=%d, allocated %d.%03d s ago", buf, cmd->retries, msecs / 1000, msecs % 1000); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 24fe68522716..a028ab3322a9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1312,6 +1312,7 @@ static int sd_init_command(struct scsi_cmnd *cmd) static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = SCpnt->request; + u8 *cmnd; if (SCpnt->flags & SCMD_ZONE_WRITE_LOCK) sd_zbc_write_unlock_zone(SCpnt); @@ -1320,9 +1321,10 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) __free_page(rq->special_vec.bv_page); if (SCpnt->cmnd != scsi_req(rq)->cmd) { - mempool_free(SCpnt->cmnd, sd_cdb_pool); + cmnd = SCpnt->cmnd; SCpnt->cmnd = NULL; SCpnt->cmd_len = 0; + mempool_free(cmnd, sd_cdb_pool); } } -- cgit v1.2.3 From d2950278d2d04ff5314abeb38d9c59c4e7c0ee53 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 11 Dec 2017 18:23:09 +0100 Subject: xfrm: put policies when reusing pcpu xdst entry We need to put the policies when re-using the pcpu xdst entry, else this leaks the reference. Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 038ec68f6901..70aa5cb0c659 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1839,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + xfrm_pols_put(pols, num_pols); while (err > 0) xfrm_state_put(xfrm[--err]); return xdst; -- cgit v1.2.3 From d2b3c353595a855794f8b9df5b5bdbe8deb0c413 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 4 Dec 2017 12:11:02 +0300 Subject: pinctrl: cherryview: Mask all interrupts on Intel_Strago based systems Guenter Roeck reported an interrupt storm on a prototype system which is based on Cyan Chromebook. The root cause turned out to be a incorrectly configured pin that triggers spurious interrupts. This will be fixed in coreboot but currently we need to prevent the interrupt storm from happening by masking all interrupts (but not GPEs) on those systems. Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953 Fixes: bcb48cca23ec ("pinctrl: cherryview: Do not mask all interrupts in probe") Reported-and-tested-by: Guenter Roeck Reported-by: Dmitry Torokhov Signed-off-by: Mika Westerberg Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index bdedb6325c72..4471fd94e1fe 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) clear_bit(i, chip->irq.valid_mask); } + /* + * The same set of machines in chv_no_valid_mask[] have incorrectly + * configured GPIOs that generate spurious interrupts so we use + * this same list to apply another quirk for them. + * + * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953. + */ + if (!need_valid_mask) { + /* + * Mask all interrupts the community is able to generate + * but leave the ones that can only generate GPEs unmasked. + */ + chv_writel(GENMASK(31, pctrl->community->nirqs), + pctrl->regs + CHV_INTMASK); + } + /* Clear all interrupts */ chv_writel(0xffff, pctrl->regs + CHV_INTSTAT); -- cgit v1.2.3 From f87f3a328dbbb3e79dd53e7e889ced9222512649 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Nov 2017 18:42:18 +0000 Subject: locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK Commit: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") removed the definition of raw_spin_can_lock(), causing the GENERIC_LOCKBREAK spin_lock() routines to poll the ->break_lock field when waiting on a lock. This has been reported to cause a deadlock during boot on s390, because the ->break_lock field is also set by the waiters, and can potentially remain set indefinitely if no other CPUs come in to take the lock after it has been released. This patch removes the explicit spinning on ->break_lock from the waiters, instead relying on the outer trylock() operation to determine when the lock is available. Reported-by: Sebastian Ott Tested-by: Sebastian Ott Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Thomas Gleixner Fixes: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") Link: http://lkml.kernel.org/r/1511894539-7988-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/spinlock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 1fd1a7543cdd..0ebb253e2199 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -68,8 +68,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ \ if (!(lock)->break_lock) \ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(&lock->raw_lock); \ + \ + arch_##op##_relax(&lock->raw_lock); \ } \ (lock)->break_lock = 0; \ } \ @@ -88,8 +88,8 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ \ if (!(lock)->break_lock) \ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(&lock->raw_lock); \ + \ + arch_##op##_relax(&lock->raw_lock); \ } \ (lock)->break_lock = 0; \ return flags; \ -- cgit v1.2.3 From d89c70356acf11b7cf47ca5cfcafae5062a85451 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Nov 2017 18:42:19 +0000 Subject: locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y When CONFIG_GENERIC_LOCKBEAK=y, locking structures grow an extra int ->break_lock field which is used to implement raw_spin_is_contended() by setting the field to 1 when waiting on a lock and clearing it to zero when holding a lock. However, there are a few problems with this approach: - There is a write-write race between a CPU successfully taking the lock (and subsequently writing break_lock = 0) and a waiter waiting on the lock (and subsequently writing break_lock = 1). This could result in a contended lock being reported as uncontended and vice-versa. - On machines with store buffers, nothing guarantees that the writes to break_lock are visible to other CPUs at any particular time. - READ_ONCE/WRITE_ONCE are not used, so the field is potentially susceptible to harmful compiler optimisations, Consequently, the usefulness of this field is unclear and we'd be better off removing it and allowing architectures to implement raw_spin_is_contended() by providing a definition of arch_spin_is_contended(), as they can when CONFIG_GENERIC_LOCKBREAK=n. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1511894539-7988-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock_types.h | 3 --- include/linux/spinlock.h | 5 ----- include/linux/spinlock_types.h | 3 --- kernel/locking/spinlock.c | 9 +-------- 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e93e36..857a72ceb794 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -10,9 +10,6 @@ */ typedef struct { arch_rwlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a39186194cd6..3bf273538840 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -107,16 +107,11 @@ do { \ #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) -#ifdef CONFIG_GENERIC_LOCKBREAK -#define raw_spin_is_contended(lock) ((lock)->break_lock) -#else - #ifdef arch_spin_is_contended #define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) #else #define raw_spin_is_contended(lock) (((void)(lock), 0)) #endif /*arch_spin_is_contended*/ -#endif /* * This barrier must provide two things: diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb13a5d..24b4e6f2c1a2 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,9 +19,6 @@ typedef struct raw_spinlock { arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 0ebb253e2199..936f3d14dd6b 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ break; \ preempt_enable(); \ \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - \ arch_##op##_relax(&lock->raw_lock); \ } \ - (lock)->break_lock = 0; \ } \ \ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ @@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ local_irq_restore(flags); \ preempt_enable(); \ \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - \ arch_##op##_relax(&lock->raw_lock); \ } \ - (lock)->break_lock = 0; \ + \ return flags; \ } \ \ -- cgit v1.2.3 From 81bf665d00baf1aef01118c6c9e51520e57c0757 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Tue, 12 Dec 2017 02:12:31 +0530 Subject: x86/headers: Remove duplicate #includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: ard.biesheuvel@linaro.org Cc: boris.ostrovsky@oracle.com Cc: geert@linux-m68k.org Cc: jgross@suse.com Cc: linux-efi@vger.kernel.org Cc: luto@kernel.org Cc: matt@codeblueprint.co.uk Cc: thomas.lendacky@amd.com Cc: tim.c.chen@linux.intel.com Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1513024951-9221-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/itmt.c | 1 - arch/x86/kernel/process.c | 1 - arch/x86/kernel/setup.c | 1 - arch/x86/kernel/smpboot.c | 1 - arch/x86/platform/efi/efi_64.c | 1 - arch/x86/xen/spinlock.c | 2 -- 6 files changed, 7 deletions(-) diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index f73f475d0573..d177940aa090 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bb988a24db92..d6321855f9da 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1..c8e04472a141 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -114,7 +114,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 05a97d5fe298..d44b64d571b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6a151ce70e86..1e5184d7ce7a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 02f3445a2b5f..cd97a62394e7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -23,8 +23,6 @@ static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); static bool xen_pvspin = true; -#include - static void xen_qlock_kick(int cpu) { int irq = per_cpu(lock_kicker_irq, cpu); -- cgit v1.2.3 From 21faaea1343f2f8dc6539302c92231afc6d999a5 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 12 Dec 2017 16:11:45 +0800 Subject: ASoC: sun4i-i2s: Add support for A83T The I2S controller in the A83T is mostly compatible with the one found in earlier SoCs such as the A20 and A31. While the documents publicly available for the A83T do not cover this hardware, the officially released BSP kernel does have register definitions for it. These were matched against the A20 user manual. The only difference is the TX FIFO and interrupt status registers have been swapped around, like what we have seen with the SPDIF controller. This patch adds support for this hardware. Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/sun4i-i2s.txt | 2 ++ sound/soc/sunxi/sun4i-i2s.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt b/Documentation/devicetree/bindings/sound/sun4i-i2s.txt index 05d7135a8d2f..b9d50d6cdef3 100644 --- a/Documentation/devicetree/bindings/sound/sun4i-i2s.txt +++ b/Documentation/devicetree/bindings/sound/sun4i-i2s.txt @@ -8,6 +8,7 @@ Required properties: - compatible: should be one of the following: - "allwinner,sun4i-a10-i2s" - "allwinner,sun6i-a31-i2s" + - "allwinner,sun8i-a83t-i2s" - "allwinner,sun8i-h3-i2s" - reg: physical base address of the controller and length of memory mapped region. @@ -23,6 +24,7 @@ Required properties: Required properties for the following compatibles: - "allwinner,sun6i-a31-i2s" + - "allwinner,sun8i-a83t-i2s" - "allwinner,sun8i-h3-i2s" - resets: phandle to the reset line for this codec diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index 04f92583a969..13d7ecabe1b6 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -897,6 +897,23 @@ static const struct sun4i_i2s_quirks sun6i_a31_i2s_quirks = { .field_rxchansel = REG_FIELD(SUN4I_I2S_RX_CHAN_SEL_REG, 0, 2), }; +static const struct sun4i_i2s_quirks sun8i_a83t_i2s_quirks = { + .has_reset = true, + .reg_offset_txdata = SUN8I_I2S_FIFO_TX_REG, + .sun4i_i2s_regmap = &sun4i_i2s_regmap_config, + .field_clkdiv_mclk_en = REG_FIELD(SUN4I_I2S_CLK_DIV_REG, 7, 7), + .field_fmt_wss = REG_FIELD(SUN4I_I2S_FMT0_REG, 2, 3), + .field_fmt_sr = REG_FIELD(SUN4I_I2S_FMT0_REG, 4, 5), + .field_fmt_bclk = REG_FIELD(SUN4I_I2S_FMT0_REG, 6, 6), + .field_fmt_lrclk = REG_FIELD(SUN4I_I2S_FMT0_REG, 7, 7), + .has_slave_select_bit = true, + .field_fmt_mode = REG_FIELD(SUN4I_I2S_FMT0_REG, 0, 1), + .field_txchanmap = REG_FIELD(SUN4I_I2S_TX_CHAN_MAP_REG, 0, 31), + .field_rxchanmap = REG_FIELD(SUN4I_I2S_RX_CHAN_MAP_REG, 0, 31), + .field_txchansel = REG_FIELD(SUN4I_I2S_TX_CHAN_SEL_REG, 0, 2), + .field_rxchansel = REG_FIELD(SUN4I_I2S_RX_CHAN_SEL_REG, 0, 2), +}; + static const struct sun4i_i2s_quirks sun8i_h3_i2s_quirks = { .has_reset = true, .reg_offset_txdata = SUN8I_I2S_FIFO_TX_REG, @@ -1120,6 +1137,10 @@ static const struct of_device_id sun4i_i2s_match[] = { .compatible = "allwinner,sun6i-a31-i2s", .data = &sun6i_a31_i2s_quirks, }, + { + .compatible = "allwinner,sun8i-a83t-i2s", + .data = &sun8i_a83t_i2s_quirks, + }, { .compatible = "allwinner,sun8i-h3-i2s", .data = &sun8i_h3_i2s_quirks, -- cgit v1.2.3 From 5a0cf02465a0510c48ab34e7ba88b0f8c20b9ea1 Mon Sep 17 00:00:00 2001 From: Andrea Bondavalli Date: Tue, 12 Dec 2017 10:14:50 +0100 Subject: ASoC: sun4i-codec: enable 12Khz and 24Khz audio sample rates H3 ASoC supports 12Khz and 24Khz audio sample rates but the current drivers doesn't advertise these rates properly and they cannot be used. For example attempt to capture at 12Khz uses 11Khz (same applies to audio playback): Recording raw data '/tmp/testS16_LE.raw' : Signed 16 bit Little Endian, Rate 12000 Hz, Stereo Warning: rate is not accurate (requested = 12000Hz, got = 11025Hz) This patch fixes the audio sample rates declared and supported by the driver according to the H3 data sheet. Specifically for audio playback: 8000, 11050, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 96000, 192000 and for audio capture: 8000, 11050, 12000, 16000, 22050, 24000, 32000, 44100, 48000 Signed-off-by: Andrea Bondavalli Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- sound/soc/sunxi/sun4i-codec.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c index 5da4efe7a550..886281673972 100644 --- a/sound/soc/sunxi/sun4i-codec.c +++ b/sound/soc/sunxi/sun4i-codec.c @@ -590,12 +590,28 @@ static int sun4i_codec_hw_params(struct snd_pcm_substream *substream, hwrate); } + +static unsigned int sun4i_codec_src_rates[] = { + 8000, 11025, 12000, 16000, 22050, 24000, 32000, + 44100, 48000, 96000, 192000 +}; + + +static struct snd_pcm_hw_constraint_list sun4i_codec_constraints = { + .count = ARRAY_SIZE(sun4i_codec_src_rates), + .list = sun4i_codec_src_rates, +}; + + static int sun4i_codec_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct sun4i_codec *scodec = snd_soc_card_get_drvdata(rtd->card); + snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, &sun4i_codec_constraints); + /* * Stop issuing DRQ when we have room for less than 16 samples * in our TX FIFO @@ -633,9 +649,7 @@ static struct snd_soc_dai_driver sun4i_codec_dai = { .channels_max = 2, .rate_min = 8000, .rate_max = 192000, - .rates = SNDRV_PCM_RATE_8000_48000 | - SNDRV_PCM_RATE_96000 | - SNDRV_PCM_RATE_192000, + .rates = SNDRV_PCM_RATE_CONTINUOUS, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE, .sig_bits = 24, @@ -645,11 +659,8 @@ static struct snd_soc_dai_driver sun4i_codec_dai = { .channels_min = 1, .channels_max = 2, .rate_min = 8000, - .rate_max = 192000, - .rates = SNDRV_PCM_RATE_8000_48000 | - SNDRV_PCM_RATE_96000 | - SNDRV_PCM_RATE_192000 | - SNDRV_PCM_RATE_KNOT, + .rate_max = 48000, + .rates = SNDRV_PCM_RATE_CONTINUOUS, .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE, .sig_bits = 24, @@ -1128,7 +1139,7 @@ static const struct snd_soc_component_driver sun4i_codec_component = { .name = "sun4i-codec", }; -#define SUN4I_CODEC_RATES SNDRV_PCM_RATE_8000_192000 +#define SUN4I_CODEC_RATES SNDRV_PCM_RATE_CONTINUOUS #define SUN4I_CODEC_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ SNDRV_PCM_FMTBIT_S32_LE) -- cgit v1.2.3 From d61f9982c2a395407a75a4f7057c4a3f55bda462 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 11 Dec 2017 15:26:14 +0100 Subject: ASoC: tfa9879: clean up bindings Fix a couple of nitpicks: - list #sound-dai-cells as a required property. - The chip supports full speed I2C; don't indicate standard mode only. - status = "okay" is just noise. - The chip is an amplifier, not a codec. - consistently indent with tabs. Signed-off-by: Peter Rosin Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/tfa9879.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/tfa9879.txt b/Documentation/devicetree/bindings/sound/tfa9879.txt index 23ba522d9e2b..1620e6848436 100644 --- a/Documentation/devicetree/bindings/sound/tfa9879.txt +++ b/Documentation/devicetree/bindings/sound/tfa9879.txt @@ -6,18 +6,18 @@ Required properties: - reg : the I2C address of the device +- #sound-dai-cells : must be 0. + Example: &i2c1 { - clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; - status = "okay"; - codec: tfa9879@6c { + amp: amp@6c { #sound-dai-cells = <0>; compatible = "nxp,tfa9879"; reg = <0x6c>; - }; + }; }; -- cgit v1.2.3 From a73be94364b80388c0a600715117923669f165f8 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Mon, 11 Dec 2017 15:26:15 +0100 Subject: ASoC: tfa9879: add DT bindings to MAINTAINERS Let's keep maintenance of the driver and the bindings in one place. Signed-off-by: Peter Rosin Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..a04aaa270ad5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9805,6 +9805,7 @@ NXP TFA9879 DRIVER M: Peter Rosin L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained +F: Documentation/devicetree/bindings/sound/tfa9879.txt F: sound/soc/codecs/tfa9879* NXP-NCI NFC DRIVER -- cgit v1.2.3 From aa9c387c2dc25597b730cd8386cac8ccfe75de07 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Dec 2017 22:26:40 +0100 Subject: ASoC: rt5645: Set card long_name for GPD win / pocket The GPD win and pocket devices both use the same codec setup and both have too generic dmi strings making snd_soc_set_dmi_name() not work. As these devices have only a single speaker we want a separate ucm file for them, which requires a unique long_name, use the existing GPD quirk handling to also provide a unique long_name. Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- include/sound/rt5645.h | 3 +++ sound/soc/codecs/rt5645.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/sound/rt5645.h b/include/sound/rt5645.h index d0c33a9972b9..f218c742f08e 100644 --- a/include/sound/rt5645.h +++ b/include/sound/rt5645.h @@ -25,6 +25,9 @@ struct rt5645_platform_data { bool level_trigger_irq; /* Invert JD1_1 status polarity */ bool inv_jd1_1; + + /* Value to asign to snd_soc_card.long_name */ + const char *long_name; }; #endif diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index fcd02c2c76f1..a1a7bb770745 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3394,6 +3394,9 @@ static int rt5645_probe(struct snd_soc_codec *codec) snd_soc_dapm_sync(dapm); } + if (rt5645->pdata.long_name) + codec->component.card->long_name = rt5645->pdata.long_name; + rt5645->eq_param = devm_kzalloc(codec->dev, RT5645_HWEQ_NUM * sizeof(struct rt5645_eq_param_s), GFP_KERNEL); @@ -3624,6 +3627,7 @@ static const struct dmi_system_id dmi_platform_intel_broadwell[] = { static const struct rt5645_platform_data gpd_win_platform_data = { .jd_mode = 3, .inv_jd1_1 = true, + .long_name = "gpd-win-pocket-rt5645", }; static const struct dmi_system_id dmi_platform_gpd_win[] = { -- cgit v1.2.3 From 54eed78c5c831ba696259f7fa69966d699a173b1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Dec 2017 12:30:14 +0100 Subject: usb: gadget: webcam: fix V4L2 Kconfig dependency Configuring the USB_G_WEBCAM driver as built-in leads to a link error when CONFIG_VIDEO_V4L2 is a loadable module: drivers/usb/gadget/function/f_uvc.o: In function `uvc_function_setup': f_uvc.c:(.text+0xfe): undefined reference to `v4l2_event_queue' drivers/usb/gadget/function/f_uvc.o: In function `uvc_function_ep0_complete': f_uvc.c:(.text+0x188): undefined reference to `v4l2_event_queue' This changes the Kconfig dependency to disallow that configuration, and force it to be a module in that case as well. This is apparently a rather old bug, but very hard to trigger even in thousands of randconfig builds. Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/gadget/legacy/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index 9570bbeced4f..487568f2c729 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -487,7 +487,7 @@ endif # or video class gadget drivers), or specific hardware, here. config USB_G_WEBCAM tristate "USB Webcam Gadget" - depends on VIDEO_DEV + depends on VIDEO_V4L2 select USB_LIBCOMPOSITE select VIDEOBUF2_VMALLOC select USB_F_UVC -- cgit v1.2.3 From 9dbe416b656bb015fc49fc17961000ffa418838a Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 12 Dec 2017 12:44:40 +0200 Subject: Revert "usb: gadget: allow to enable legacy drivers without USB_ETH" This reverts commit 7a9618a22aadffb55027d665491adf466bced61a. Romain Izard recently reported that commit 7a9618a22aad ended up allowing every legacy gadget driver to statically linked to the kernel, however that doesn't work, since only one legacy gadget can be bound to a controller. Because of that, let's revert the original commit and fix the problem. Reported-by: Romain Izard Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 4 ++-- drivers/usb/gadget/legacy/Kconfig | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 0a19a76645ad..31cce7805eb2 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -508,8 +508,8 @@ choice controller, and the relevant drivers for each function declared by the device. -endchoice - source "drivers/usb/gadget/legacy/Kconfig" +endchoice + endif # USB_GADGET diff --git a/drivers/usb/gadget/legacy/Kconfig b/drivers/usb/gadget/legacy/Kconfig index 487568f2c729..784bf86dad4f 100644 --- a/drivers/usb/gadget/legacy/Kconfig +++ b/drivers/usb/gadget/legacy/Kconfig @@ -13,14 +13,6 @@ # both kinds of controller can also support "USB On-the-Go" (CONFIG_USB_OTG). # -menuconfig USB_GADGET_LEGACY - bool "Legacy USB Gadget Support" - help - Legacy USB gadgets are USB gadgets that do not use the USB gadget - configfs interface. - -if USB_GADGET_LEGACY - config USB_ZERO tristate "Gadget Zero (DEVELOPMENT)" select USB_LIBCOMPOSITE @@ -498,5 +490,3 @@ config USB_G_WEBCAM Say "y" to link the driver statically, or "m" to build a dynamically linked module called "g_webcam". - -endif -- cgit v1.2.3 From 87248dc79236575908568810a61e0953f738516f Mon Sep 17 00:00:00 2001 From: Michele Dionisio Date: Tue, 12 Dec 2017 11:36:59 +0100 Subject: spi: davinci: Initialize dspi->done before any possible use of it On SOC with multiple cpu (like omal l138) it is possible that spi periferic is already initialized when this module is loaded and so it is possible to recieve interrupt when the modules is not fully initialized. this patch initialize dspi->done before refister the interrupt handler that use it Signed-off-by: Michele Dionisio Signed-off-by: Mark Brown --- drivers/spi/spi-davinci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index 6ddb6ef1fda4..60d59b003aa4 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -945,6 +945,8 @@ static int davinci_spi_probe(struct platform_device *pdev) goto free_master; } + init_completion(&dspi->done); + ret = platform_get_irq(pdev, 0); if (ret == 0) ret = -EINVAL; @@ -1021,8 +1023,6 @@ static int davinci_spi_probe(struct platform_device *pdev) dspi->get_rx = davinci_spi_rx_buf_u8; dspi->get_tx = davinci_spi_tx_buf_u8; - init_completion(&dspi->done); - /* Reset In/OUT SPI module */ iowrite32(0, dspi->base + SPIGCR0); udelay(100); -- cgit v1.2.3 From e966eaeeb623f09975ef362c2866fae6f86844f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 12 Dec 2017 12:31:16 +0100 Subject: locking/lockdep: Remove the cross-release locking checks This code (CONFIG_LOCKDEP_CROSSRELEASE=y and CONFIG_LOCKDEP_COMPLETIONS=y), while it found a number of old bugs initially, was also causing too many false positives that caused people to disable lockdep - which is arguably a worse overall outcome. If we disable cross-release by default but keep the code upstream then in practice the most likely outcome is that we'll allow the situation to degrade gradually, by allowing entropy to introduce more and more false positives, until it overwhelms maintenance capacity. Another bad side effect was that people were trying to work around the false positives by uglifying/complicating unrelated code. There's a marked difference between annotating locking operations and uglifying good code just due to bad lock debugging code ... This gradual decrease in quality happened to a number of debugging facilities in the kernel, and lockdep is pretty complex already, so we cannot risk this outcome. Either cross-release checking can be done right with no false positives, or it should not be included in the upstream kernel. ( Note that it might make sense to maintain it out of tree and go through the false positives every now and then and see whether new bugs were introduced. ) Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/locking/crossrelease.txt | 874 --------------------------------- include/linux/completion.h | 45 -- include/linux/lockdep.h | 125 ----- include/linux/sched.h | 11 - kernel/locking/lockdep.c | 652 ++---------------------- lib/Kconfig.debug | 33 -- 6 files changed, 35 insertions(+), 1705 deletions(-) delete mode 100644 Documentation/locking/crossrelease.txt diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt deleted file mode 100644 index bdf1423d5f99..000000000000 --- a/Documentation/locking/crossrelease.txt +++ /dev/null @@ -1,874 +0,0 @@ -Crossrelease -============ - -Started by Byungchul Park - -Contents: - - (*) Background - - - What causes deadlock - - How lockdep works - - (*) Limitation - - - Limit lockdep - - Pros from the limitation - - Cons from the limitation - - Relax the limitation - - (*) Crossrelease - - - Introduce crossrelease - - Introduce commit - - (*) Implementation - - - Data structures - - How crossrelease works - - (*) Optimizations - - - Avoid duplication - - Lockless for hot paths - - (*) APPENDIX A: What lockdep does to work aggresively - - (*) APPENDIX B: How to avoid adding false dependencies - - -========== -Background -========== - -What causes deadlock --------------------- - -A deadlock occurs when a context is waiting for an event to happen, -which is impossible because another (or the) context who can trigger the -event is also waiting for another (or the) event to happen, which is -also impossible due to the same reason. - -For example: - - A context going to trigger event C is waiting for event A to happen. - A context going to trigger event A is waiting for event B to happen. - A context going to trigger event B is waiting for event C to happen. - -A deadlock occurs when these three wait operations run at the same time, -because event C cannot be triggered if event A does not happen, which in -turn cannot be triggered if event B does not happen, which in turn -cannot be triggered if event C does not happen. After all, no event can -be triggered since any of them never meets its condition to wake up. - -A dependency might exist between two waiters and a deadlock might happen -due to an incorrect releationship between dependencies. Thus, we must -define what a dependency is first. A dependency exists between them if: - - 1. There are two waiters waiting for each event at a given time. - 2. The only way to wake up each waiter is to trigger its event. - 3. Whether one can be woken up depends on whether the other can. - -Each wait in the example creates its dependency like: - - Event C depends on event A. - Event A depends on event B. - Event B depends on event C. - - NOTE: Precisely speaking, a dependency is one between whether a - waiter for an event can be woken up and whether another waiter for - another event can be woken up. However from now on, we will describe - a dependency as if it's one between an event and another event for - simplicity. - -And they form circular dependencies like: - - -> C -> A -> B - - / \ - \ / - ---------------- - - where 'A -> B' means that event A depends on event B. - -Such circular dependencies lead to a deadlock since no waiter can meet -its condition to wake up as described. - -CONCLUSION - -Circular dependencies cause a deadlock. - - -How lockdep works ------------------ - -Lockdep tries to detect a deadlock by checking dependencies created by -lock operations, acquire and release. Waiting for a lock corresponds to -waiting for an event, and releasing a lock corresponds to triggering an -event in the previous section. - -In short, lockdep does: - - 1. Detect a new dependency. - 2. Add the dependency into a global graph. - 3. Check if that makes dependencies circular. - 4. Report a deadlock or its possibility if so. - -For example, consider a graph built by lockdep that looks like: - - A -> B - - \ - -> E - / - C -> D - - - where A, B,..., E are different lock classes. - -Lockdep will add a dependency into the graph on detection of a new -dependency. For example, it will add a dependency 'E -> C' when a new -dependency between lock E and lock C is detected. Then the graph will be: - - A -> B - - \ - -> E - - / \ - -> C -> D - \ - / / - \ / - ------------------ - - where A, B,..., E are different lock classes. - -This graph contains a subgraph which demonstrates circular dependencies: - - -> E - - / \ - -> C -> D - \ - / / - \ / - ------------------ - - where C, D and E are different lock classes. - -This is the condition under which a deadlock might occur. Lockdep -reports it on detection after adding a new dependency. This is the way -how lockdep works. - -CONCLUSION - -Lockdep detects a deadlock or its possibility by checking if circular -dependencies were created after adding each new dependency. - - -========== -Limitation -========== - -Limit lockdep -------------- - -Limiting lockdep to work on only typical locks e.g. spin locks and -mutexes, which are released within the acquire context, the -implementation becomes simple but its capacity for detection becomes -limited. Let's check pros and cons in next section. - - -Pros from the limitation ------------------------- - -Given the limitation, when acquiring a lock, locks in a held_locks -cannot be released if the context cannot acquire it so has to wait to -acquire it, which means all waiters for the locks in the held_locks are -stuck. It's an exact case to create dependencies between each lock in -the held_locks and the lock to acquire. - -For example: - - CONTEXT X - --------- - acquire A - acquire B /* Add a dependency 'A -> B' */ - release B - release A - - where A and B are different lock classes. - -When acquiring lock A, the held_locks of CONTEXT X is empty thus no -dependency is added. But when acquiring lock B, lockdep detects and adds -a new dependency 'A -> B' between lock A in the held_locks and lock B. -They can be simply added whenever acquiring each lock. - -And data required by lockdep exists in a local structure, held_locks -embedded in task_struct. Forcing to access the data within the context, -lockdep can avoid racy problems without explicit locks while handling -the local data. - -Lastly, lockdep only needs to keep locks currently being held, to build -a dependency graph. However, relaxing the limitation, it needs to keep -even locks already released, because a decision whether they created -dependencies might be long-deferred. - -To sum up, we can expect several advantages from the limitation: - - 1. Lockdep can easily identify a dependency when acquiring a lock. - 2. Races are avoidable while accessing local locks in a held_locks. - 3. Lockdep only needs to keep locks currently being held. - -CONCLUSION - -Given the limitation, the implementation becomes simple and efficient. - - -Cons from the limitation ------------------------- - -Given the limitation, lockdep is applicable only to typical locks. For -example, page locks for page access or completions for synchronization -cannot work with lockdep. - -Can we detect deadlocks below, under the limitation? - -Example 1: - - CONTEXT X CONTEXT Y CONTEXT Z - --------- --------- ---------- - mutex_lock A - lock_page B - lock_page B - mutex_lock A /* DEADLOCK */ - unlock_page B held by X - unlock_page B - mutex_unlock A - mutex_unlock A - - where A and B are different lock classes. - -No, we cannot. - -Example 2: - - CONTEXT X CONTEXT Y - --------- --------- - mutex_lock A - mutex_lock A - wait_for_complete B /* DEADLOCK */ - complete B - mutex_unlock A - mutex_unlock A - - where A is a lock class and B is a completion variable. - -No, we cannot. - -CONCLUSION - -Given the limitation, lockdep cannot detect a deadlock or its -possibility caused by page locks or completions. - - -Relax the limitation --------------------- - -Under the limitation, things to create dependencies are limited to -typical locks. However, synchronization primitives like page locks and -completions, which are allowed to be released in any context, also -create dependencies and can cause a deadlock. So lockdep should track -these locks to do a better job. We have to relax the limitation for -these locks to work with lockdep. - -Detecting dependencies is very important for lockdep to work because -adding a dependency means adding an opportunity to check whether it -causes a deadlock. The more lockdep adds dependencies, the more it -thoroughly works. Thus Lockdep has to do its best to detect and add as -many true dependencies into a graph as possible. - -For example, considering only typical locks, lockdep builds a graph like: - - A -> B - - \ - -> E - / - C -> D - - - where A, B,..., E are different lock classes. - -On the other hand, under the relaxation, additional dependencies might -be created and added. Assuming additional 'FX -> C' and 'E -> GX' are -added thanks to the relaxation, the graph will be: - - A -> B - - \ - -> E -> GX - / - FX -> C -> D - - - where A, B,..., E, FX and GX are different lock classes, and a suffix - 'X' is added on non-typical locks. - -The latter graph gives us more chances to check circular dependencies -than the former. However, it might suffer performance degradation since -relaxing the limitation, with which design and implementation of lockdep -can be efficient, might introduce inefficiency inevitably. So lockdep -should provide two options, strong detection and efficient detection. - -Choosing efficient detection: - - Lockdep works with only locks restricted to be released within the - acquire context. However, lockdep works efficiently. - -Choosing strong detection: - - Lockdep works with all synchronization primitives. However, lockdep - suffers performance degradation. - -CONCLUSION - -Relaxing the limitation, lockdep can add additional dependencies giving -additional opportunities to check circular dependencies. - - -============ -Crossrelease -============ - -Introduce crossrelease ----------------------- - -In order to allow lockdep to handle additional dependencies by what -might be released in any context, namely 'crosslock', we have to be able -to identify those created by crosslocks. The proposed 'crossrelease' -feature provoides a way to do that. - -Crossrelease feature has to do: - - 1. Identify dependencies created by crosslocks. - 2. Add the dependencies into a dependency graph. - -That's all. Once a meaningful dependency is added into graph, then -lockdep would work with the graph as it did. The most important thing -crossrelease feature has to do is to correctly identify and add true -dependencies into the global graph. - -A dependency e.g. 'A -> B' can be identified only in the A's release -context because a decision required to identify the dependency can be -made only in the release context. That is to decide whether A can be -released so that a waiter for A can be woken up. It cannot be made in -other than the A's release context. - -It's no matter for typical locks because each acquire context is same as -its release context, thus lockdep can decide whether a lock can be -released in the acquire context. However for crosslocks, lockdep cannot -make the decision in the acquire context but has to wait until the -release context is identified. - -Therefore, deadlocks by crosslocks cannot be detected just when it -happens, because those cannot be identified until the crosslocks are -released. However, deadlock possibilities can be detected and it's very -worth. See 'APPENDIX A' section to check why. - -CONCLUSION - -Using crossrelease feature, lockdep can work with what might be released -in any context, namely crosslock. - - -Introduce commit ----------------- - -Since crossrelease defers the work adding true dependencies of -crosslocks until they are actually released, crossrelease has to queue -all acquisitions which might create dependencies with the crosslocks. -Then it identifies dependencies using the queued data in batches at a -proper time. We call it 'commit'. - -There are four types of dependencies: - -1. TT type: 'typical lock A -> typical lock B' - - Just when acquiring B, lockdep can see it's in the A's release - context. So the dependency between A and B can be identified - immediately. Commit is unnecessary. - -2. TC type: 'typical lock A -> crosslock BX' - - Just when acquiring BX, lockdep can see it's in the A's release - context. So the dependency between A and BX can be identified - immediately. Commit is unnecessary, too. - -3. CT type: 'crosslock AX -> typical lock B' - - When acquiring B, lockdep cannot identify the dependency because - there's no way to know if it's in the AX's release context. It has - to wait until the decision can be made. Commit is necessary. - -4. CC type: 'crosslock AX -> crosslock BX' - - When acquiring BX, lockdep cannot identify the dependency because - there's no way to know if it's in the AX's release context. It has - to wait until the decision can be made. Commit is necessary. - But, handling CC type is not implemented yet. It's a future work. - -Lockdep can work without commit for typical locks, but commit step is -necessary once crosslocks are involved. Introducing commit, lockdep -performs three steps. What lockdep does in each step is: - -1. Acquisition: For typical locks, lockdep does what it originally did - and queues the lock so that CT type dependencies can be checked using - it at the commit step. For crosslocks, it saves data which will be - used at the commit step and increases a reference count for it. - -2. Commit: No action is reauired for typical locks. For crosslocks, - lockdep adds CT type dependencies using the data saved at the - acquisition step. - -3. Release: No changes are required for typical locks. When a crosslock - is released, it decreases a reference count for it. - -CONCLUSION - -Crossrelease introduces commit step to handle dependencies of crosslocks -in batches at a proper time. - - -============== -Implementation -============== - -Data structures ---------------- - -Crossrelease introduces two main data structures. - -1. hist_lock - - This is an array embedded in task_struct, for keeping lock history so - that dependencies can be added using them at the commit step. Since - it's local data, it can be accessed locklessly in the owner context. - The array is filled at the acquisition step and consumed at the - commit step. And it's managed in circular manner. - -2. cross_lock - - One per lockdep_map exists. This is for keeping data of crosslocks - and used at the commit step. - - -How crossrelease works ----------------------- - -It's the key of how crossrelease works, to defer necessary works to an -appropriate point in time and perform in at once at the commit step. -Let's take a look with examples step by step, starting from how lockdep -works without crossrelease for typical locks. - - acquire A /* Push A onto held_locks */ - acquire B /* Push B onto held_locks and add 'A -> B' */ - acquire C /* Push C onto held_locks and add 'B -> C' */ - release C /* Pop C from held_locks */ - release B /* Pop B from held_locks */ - release A /* Pop A from held_locks */ - - where A, B and C are different lock classes. - - NOTE: This document assumes that readers already understand how - lockdep works without crossrelease thus omits details. But there's - one thing to note. Lockdep pretends to pop a lock from held_locks - when releasing it. But it's subtly different from the original pop - operation because lockdep allows other than the top to be poped. - -In this case, lockdep adds 'the top of held_locks -> the lock to acquire' -dependency every time acquiring a lock. - -After adding 'A -> B', a dependency graph will be: - - A -> B - - where A and B are different lock classes. - -And after adding 'B -> C', the graph will be: - - A -> B -> C - - where A, B and C are different lock classes. - -Let's performs commit step even for typical locks to add dependencies. -Of course, commit step is not necessary for them, however, it would work -well because this is a more general way. - - acquire A - /* - * Queue A into hist_locks - * - * In hist_locks: A - * In graph: Empty - */ - - acquire B - /* - * Queue B into hist_locks - * - * In hist_locks: A, B - * In graph: Empty - */ - - acquire C - /* - * Queue C into hist_locks - * - * In hist_locks: A, B, C - * In graph: Empty - */ - - commit C - /* - * Add 'C -> ?' - * Answer the following to decide '?' - * What has been queued since acquire C: Nothing - * - * In hist_locks: A, B, C - * In graph: Empty - */ - - release C - - commit B - /* - * Add 'B -> ?' - * Answer the following to decide '?' - * What has been queued since acquire B: C - * - * In hist_locks: A, B, C - * In graph: 'B -> C' - */ - - release B - - commit A - /* - * Add 'A -> ?' - * Answer the following to decide '?' - * What has been queued since acquire A: B, C - * - * In hist_locks: A, B, C - * In graph: 'B -> C', 'A -> B', 'A -> C' - */ - - release A - - where A, B and C are different lock classes. - -In this case, dependencies are added at the commit step as described. - -After commits for A, B and C, the graph will be: - - A -> B -> C - - where A, B and C are different lock classes. - - NOTE: A dependency 'A -> C' is optimized out. - -We can see the former graph built without commit step is same as the -latter graph built using commit steps. Of course the former way leads to -earlier finish for building the graph, which means we can detect a -deadlock or its possibility sooner. So the former way would be prefered -when possible. But we cannot avoid using the latter way for crosslocks. - -Let's look at how commit steps work for crosslocks. In this case, the -commit step is performed only on crosslock AX as real. And it assumes -that the AX release context is different from the AX acquire context. - - BX RELEASE CONTEXT BX ACQUIRE CONTEXT - ------------------ ------------------ - acquire A - /* - * Push A onto held_locks - * Queue A into hist_locks - * - * In held_locks: A - * In hist_locks: A - * In graph: Empty - */ - - acquire BX - /* - * Add 'the top of held_locks -> BX' - * - * In held_locks: A - * In hist_locks: A - * In graph: 'A -> BX' - */ - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - It must be guaranteed that the following operations are seen after - acquiring BX globally. It can be done by things like barrier. - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - acquire C - /* - * Push C onto held_locks - * Queue C into hist_locks - * - * In held_locks: C - * In hist_locks: C - * In graph: 'A -> BX' - */ - - release C - /* - * Pop C from held_locks - * - * In held_locks: Empty - * In hist_locks: C - * In graph: 'A -> BX' - */ - acquire D - /* - * Push D onto held_locks - * Queue D into hist_locks - * Add 'the top of held_locks -> D' - * - * In held_locks: A, D - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D' - */ - acquire E - /* - * Push E onto held_locks - * Queue E into hist_locks - * - * In held_locks: E - * In hist_locks: C, E - * In graph: 'A -> BX', 'A -> D' - */ - - release E - /* - * Pop E from held_locks - * - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D' - */ - release D - /* - * Pop D from held_locks - * - * In held_locks: A - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D' - */ - commit BX - /* - * Add 'BX -> ?' - * What has been queued since acquire BX: C, E - * - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - - release BX - /* - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - release A - /* - * Pop A from held_locks - * - * In held_locks: Empty - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - - where A, BX, C,..., E are different lock classes, and a suffix 'X' is - added on crosslocks. - -Crossrelease considers all acquisitions after acqiuring BX are -candidates which might create dependencies with BX. True dependencies -will be determined when identifying the release context of BX. Meanwhile, -all typical locks are queued so that they can be used at the commit step. -And then two dependencies 'BX -> C' and 'BX -> E' are added at the -commit step when identifying the release context. - -The final graph will be, with crossrelease: - - -> C - / - -> BX - - / \ - A - -> E - \ - -> D - - where A, BX, C,..., E are different lock classes, and a suffix 'X' is - added on crosslocks. - -However, the final graph will be, without crossrelease: - - A -> D - - where A and D are different lock classes. - -The former graph has three more dependencies, 'A -> BX', 'BX -> C' and -'BX -> E' giving additional opportunities to check if they cause -deadlocks. This way lockdep can detect a deadlock or its possibility -caused by crosslocks. - -CONCLUSION - -We checked how crossrelease works with several examples. - - -============= -Optimizations -============= - -Avoid duplication ------------------ - -Crossrelease feature uses a cache like what lockdep already uses for -dependency chains, but this time it's for caching CT type dependencies. -Once that dependency is cached, the same will never be added again. - - -Lockless for hot paths ----------------------- - -To keep all locks for later use at the commit step, crossrelease adopts -a local array embedded in task_struct, which makes access to the data -lockless by forcing it to happen only within the owner context. It's -like how lockdep handles held_locks. Lockless implmentation is important -since typical locks are very frequently acquired and released. - - -================================================= -APPENDIX A: What lockdep does to work aggresively -================================================= - -A deadlock actually occurs when all wait operations creating circular -dependencies run at the same time. Even though they don't, a potential -deadlock exists if the problematic dependencies exist. Thus it's -meaningful to detect not only an actual deadlock but also its potential -possibility. The latter is rather valuable. When a deadlock occurs -actually, we can identify what happens in the system by some means or -other even without lockdep. However, there's no way to detect possiblity -without lockdep unless the whole code is parsed in head. It's terrible. -Lockdep does the both, and crossrelease only focuses on the latter. - -Whether or not a deadlock actually occurs depends on several factors. -For example, what order contexts are switched in is a factor. Assuming -circular dependencies exist, a deadlock would occur when contexts are -switched so that all wait operations creating the dependencies run -simultaneously. Thus to detect a deadlock possibility even in the case -that it has not occured yet, lockdep should consider all possible -combinations of dependencies, trying to: - -1. Use a global dependency graph. - - Lockdep combines all dependencies into one global graph and uses them, - regardless of which context generates them or what order contexts are - switched in. Aggregated dependencies are only considered so they are - prone to be circular if a problem exists. - -2. Check dependencies between classes instead of instances. - - What actually causes a deadlock are instances of lock. However, - lockdep checks dependencies between classes instead of instances. - This way lockdep can detect a deadlock which has not happened but - might happen in future by others but the same class. - -3. Assume all acquisitions lead to waiting. - - Although locks might be acquired without waiting which is essential - to create dependencies, lockdep assumes all acquisitions lead to - waiting since it might be true some time or another. - -CONCLUSION - -Lockdep detects not only an actual deadlock but also its possibility, -and the latter is more valuable. - - -================================================== -APPENDIX B: How to avoid adding false dependencies -================================================== - -Remind what a dependency is. A dependency exists if: - - 1. There are two waiters waiting for each event at a given time. - 2. The only way to wake up each waiter is to trigger its event. - 3. Whether one can be woken up depends on whether the other can. - -For example: - - acquire A - acquire B /* A dependency 'A -> B' exists */ - release B - release A - - where A and B are different lock classes. - -A depedency 'A -> B' exists since: - - 1. A waiter for A and a waiter for B might exist when acquiring B. - 2. Only way to wake up each is to release what it waits for. - 3. Whether the waiter for A can be woken up depends on whether the - other can. IOW, TASK X cannot release A if it fails to acquire B. - -For another example: - - TASK X TASK Y - ------ ------ - acquire AX - acquire B /* A dependency 'AX -> B' exists */ - release B - release AX held by Y - - where AX and B are different lock classes, and a suffix 'X' is added - on crosslocks. - -Even in this case involving crosslocks, the same rule can be applied. A -depedency 'AX -> B' exists since: - - 1. A waiter for AX and a waiter for B might exist when acquiring B. - 2. Only way to wake up each is to release what it waits for. - 3. Whether the waiter for AX can be woken up depends on whether the - other can. IOW, TASK X cannot release AX if it fails to acquire B. - -Let's take a look at more complicated example: - - TASK X TASK Y - ------ ------ - acquire B - release B - fork Y - acquire AX - acquire C /* A dependency 'AX -> C' exists */ - release C - release AX held by Y - - where AX, B and C are different lock classes, and a suffix 'X' is - added on crosslocks. - -Does a dependency 'AX -> B' exist? Nope. - -Two waiters are essential to create a dependency. However, waiters for -AX and B to create 'AX -> B' cannot exist at the same time in this -example. Thus the dependency 'AX -> B' cannot be created. - -It would be ideal if the full set of true ones can be considered. But -we can ensure nothing but what actually happened. Relying on what -actually happens at runtime, we can anyway add only true ones, though -they might be a subset of true ones. It's similar to how lockdep works -for typical locks. There might be more true dependencies than what -lockdep has detected in runtime. Lockdep has no choice but to rely on -what actually happens. Crossrelease also relies on it. - -CONCLUSION - -Relying on what actually happens, lockdep can avoid adding false -dependencies. diff --git a/include/linux/completion.h b/include/linux/completion.h index 0662a417febe..94a59ba7d422 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,9 +10,6 @@ */ #include -#ifdef CONFIG_LOCKDEP_COMPLETIONS -#include -#endif /* * struct completion - structure used to maintain state for a "completion" @@ -29,58 +26,16 @@ struct completion { unsigned int done; wait_queue_head_t wait; -#ifdef CONFIG_LOCKDEP_COMPLETIONS - struct lockdep_map_cross map; -#endif }; -#ifdef CONFIG_LOCKDEP_COMPLETIONS -static inline void complete_acquire(struct completion *x) -{ - lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); -} - -static inline void complete_release(struct completion *x) -{ - lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); -} - -static inline void complete_release_commit(struct completion *x) -{ - lock_commit_crosslock((struct lockdep_map *)&x->map); -} - -#define init_completion_map(x, m) \ -do { \ - lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ - (m)->name, (m)->key, 0); \ - __init_completion(x); \ -} while (0) - -#define init_completion(x) \ -do { \ - static struct lock_class_key __key; \ - lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ - "(completion)" #x, \ - &__key, 0); \ - __init_completion(x); \ -} while (0) -#else #define init_completion_map(x, m) __init_completion(x) #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} static inline void complete_release_commit(struct completion *x) {} -#endif -#ifdef CONFIG_LOCKDEP_COMPLETIONS -#define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ - STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) } -#else #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } -#endif #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ (*({ init_completion_map(&(work), &(map)); &(work); })) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index a842551fe044..2e75dc34bff5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -158,12 +158,6 @@ struct lockdep_map { int cpu; unsigned long ip; #endif -#ifdef CONFIG_LOCKDEP_CROSSRELEASE - /* - * Whether it's a crosslock. - */ - int cross; -#endif }; static inline void lockdep_copy_map(struct lockdep_map *to, @@ -267,95 +261,8 @@ struct held_lock { unsigned int hardirqs_off:1; unsigned int references:12; /* 32 bits */ unsigned int pin_count; -#ifdef CONFIG_LOCKDEP_CROSSRELEASE - /* - * Generation id. - * - * A value of cross_gen_id will be stored when holding this, - * which is globally increased whenever each crosslock is held. - */ - unsigned int gen_id; -#endif -}; - -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -#define MAX_XHLOCK_TRACE_ENTRIES 5 - -/* - * This is for keeping locks waiting for commit so that true dependencies - * can be added at commit step. - */ -struct hist_lock { - /* - * Id for each entry in the ring buffer. This is used to - * decide whether the ring buffer was overwritten or not. - * - * For example, - * - * |<----------- hist_lock ring buffer size ------->| - * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii - * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii....................... - * - * where 'p' represents an acquisition in process - * context, 'i' represents an acquisition in irq - * context. - * - * In this example, the ring buffer was overwritten by - * acquisitions in irq context, that should be detected on - * rollback or commit. - */ - unsigned int hist_id; - - /* - * Seperate stack_trace data. This will be used at commit step. - */ - struct stack_trace trace; - unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; - - /* - * Seperate hlock instance. This will be used at commit step. - * - * TODO: Use a smaller data structure containing only necessary - * data. However, we should make lockdep code able to handle the - * smaller one first. - */ - struct held_lock hlock; }; -/* - * To initialize a lock as crosslock, lockdep_init_map_crosslock() should - * be called instead of lockdep_init_map(). - */ -struct cross_lock { - /* - * When more than one acquisition of crosslocks are overlapped, - * we have to perform commit for them based on cross_gen_id of - * the first acquisition, which allows us to add more true - * dependencies. - * - * Moreover, when no acquisition of a crosslock is in progress, - * we should not perform commit because the lock might not exist - * any more, which might cause incorrect memory access. So we - * have to track the number of acquisitions of a crosslock. - */ - int nr_acquire; - - /* - * Seperate hlock instance. This will be used at commit step. - * - * TODO: Use a smaller data structure containing only necessary - * data. However, we should make lockdep code able to handle the - * smaller one first. - */ - struct held_lock hlock; -}; - -struct lockdep_map_cross { - struct lockdep_map map; - struct cross_lock xlock; -}; -#endif - /* * Initialization, self-test and debugging-output methods: */ @@ -560,37 +467,6 @@ enum xhlock_context_t { XHLOCK_CTX_NR, }; -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -extern void lockdep_init_map_crosslock(struct lockdep_map *lock, - const char *name, - struct lock_class_key *key, - int subclass); -extern void lock_commit_crosslock(struct lockdep_map *lock); - -/* - * What we essencially have to initialize is 'nr_acquire'. Other members - * will be initialized in add_xlock(). - */ -#define STATIC_CROSS_LOCK_INIT() \ - { .nr_acquire = 0,} - -#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ - { .map.name = (_name), .map.key = (void *)(_key), \ - .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), } - -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), .cross = 0, } - -extern void crossrelease_hist_start(enum xhlock_context_t c); -extern void crossrelease_hist_end(enum xhlock_context_t c); -extern void lockdep_invariant_state(bool force); -extern void lockdep_init_task(struct task_struct *task); -extern void lockdep_free_task(struct task_struct *task); -#else /* !CROSSRELEASE */ #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. @@ -604,7 +480,6 @@ static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} -#endif /* CROSSRELEASE */ #ifdef CONFIG_LOCK_STAT diff --git a/include/linux/sched.h b/include/linux/sched.h index 21991d668d35..9ce6c3001e9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,17 +849,6 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -#define MAX_XHLOCKS_NR 64UL - struct hist_lock *xhlocks; /* Crossrelease history locks */ - unsigned int xhlock_idx; - /* For restoring at history boundaries */ - unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; - unsigned int hist_id; - /* For overwrite check at each context exit */ - unsigned int hist_id_save[XHLOCK_CTX_NR]; -#endif - #ifdef CONFIG_UBSAN unsigned int in_ubsan; #endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 670d8d7d8087..5fa1324a4f29 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -57,10 +57,6 @@ #define CREATE_TRACE_POINTS #include -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -#include -#endif - #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644); #define lock_stat 0 #endif -#ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK -static int crossrelease_fullstack = 1; -#else -static int crossrelease_fullstack; -#endif -static int __init allow_crossrelease_fullstack(char *str) -{ - crossrelease_fullstack = 1; - return 0; -} - -early_param("crossrelease_fullstack", allow_crossrelease_fullstack); - /* * lockdep_lock: protects the lockdep graph, the hashes and the * class/list/hash allocators. @@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); } -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -static void cross_init(struct lockdep_map *lock, int cross); -static int cross_lock(struct lockdep_map *lock); -static int lock_acquire_crosslock(struct held_lock *hlock); -static int lock_release_crosslock(struct lockdep_map *lock); -#else -static inline void cross_init(struct lockdep_map *lock, int cross) {} -static inline int cross_lock(struct lockdep_map *lock) { return 0; } -static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } -static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } -#endif - /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src, printk(KERN_CONT "\n\n"); } - if (cross_lock(tgt->instance)) { - printk(" Possible unsafe locking scenario by crosslock:\n\n"); - printk(" CPU0 CPU1\n"); - printk(" ---- ----\n"); - printk(" lock("); - __print_lock_name(parent); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(source); - printk(KERN_CONT ");\n"); - printk(" unlock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk("\n *** DEADLOCK ***\n\n"); - } else { - printk(" Possible unsafe locking scenario:\n\n"); - printk(" CPU0 CPU1\n"); - printk(" ---- ----\n"); - printk(" lock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(parent); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(target); - printk(KERN_CONT ");\n"); - printk(" lock("); - __print_lock_name(source); - printk(KERN_CONT ");\n"); - printk("\n *** DEADLOCK ***\n\n"); - } + printk(" Possible unsafe locking scenario:\n\n"); + printk(" CPU0 CPU1\n"); + printk(" ---- ----\n"); + printk(" lock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(parent); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(target); + printk(KERN_CONT ");\n"); + printk(" lock("); + __print_lock_name(source); + printk(KERN_CONT ");\n"); + printk("\n *** DEADLOCK ***\n\n"); } /* @@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, curr->comm, task_pid_nr(curr)); print_lock(check_src); - if (cross_lock(check_tgt->instance)) - pr_warn("\nbut now in release context of a crosslock acquired at the following:\n"); - else - pr_warn("\nbut task is already holding lock:\n"); + pr_warn("\nbut task is already holding lock:\n"); print_lock(check_tgt); pr_warn("\nwhich lock already depends on the new lock.\n\n"); @@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this, if (!debug_locks_off_graph_unlock() || debug_locks_silent) return 0; - if (cross_lock(check_tgt->instance)) - this->trace = *trace; - else if (!save_trace(&this->trace)) + if (!save_trace(&this->trace)) return 0; depth = get_lock_depth(target); @@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, if (nest) return 2; - if (cross_lock(prev->instance)) - continue; - return print_deadlock_bug(curr, prev, next); } return 1; @@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) for (;;) { int distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth - 1; + /* - * Only non-crosslock entries get new dependencies added. - * Crosslock entries will be added by commit later: + * Only non-recursive-read entries get new dependencies + * added: */ - if (!cross_lock(hlock->instance)) { + if (hlock->read != 2 && hlock->check) { + int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace); + if (!ret) + return 0; + /* - * Only non-recursive-read entries get new dependencies - * added: + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: */ - if (hlock->read != 2 && hlock->check) { - int ret = check_prev_add(curr, hlock, next, - distance, &trace, save_trace); - if (!ret) - return 0; - - /* - * Stop after the first non-trylock entry, - * as non-trylock entries have added their - * own direct dependencies already, so this - * lock is connected to them indirectly: - */ - if (!hlock->trylock) - break; - } + if (!hlock->trylock) + break; } + depth--; /* * End of lock-stack? @@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - cross_init(lock, 0); __lockdep_init_map(lock, name, key, subclass); } EXPORT_SYMBOL_GPL(lockdep_init_map); -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, - struct lock_class_key *key, int subclass) -{ - cross_init(lock, 1); - __lockdep_init_map(lock, name, key, subclass); -} -EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); -#endif - struct lock_class_key __lockdep_no_validate__; EXPORT_SYMBOL_GPL(__lockdep_no_validate__); @@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int chain_head = 0; int class_idx; u64 chain_key; - int ret; if (unlikely(!debug_locks)) return 0; @@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, class_idx = class - lock_classes + 1; - /* TODO: nest_lock is not implemented for crosslock yet. */ - if (depth && !cross_lock(lock)) { + if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { if (hlock->references) { @@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; - ret = lock_acquire_crosslock(hlock); - /* - * 2 means normal acquire operations are needed. Otherwise, it's - * ok just to return with '0:fail, 1:success'. - */ - if (ret != 2) - return ret; - curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); @@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) struct task_struct *curr = current; struct held_lock *hlock; unsigned int depth; - int ret, i; + int i; if (unlikely(!debug_locks)) return 0; - ret = lock_release_crosslock(lock); - /* - * 2 means normal release operations are needed. Otherwise, it's - * ok just to return with '0:fail, 1:success'. - */ - if (ret != 2) - return ret; - depth = curr->lockdep_depth; /* * So we're all set to release this lock.. wait what lock? We don't @@ -4675,495 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) dump_stack(); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); - -#ifdef CONFIG_LOCKDEP_CROSSRELEASE - -/* - * Crossrelease works by recording a lock history for each thread and - * connecting those historic locks that were taken after the - * wait_for_completion() in the complete() context. - * - * Task-A Task-B - * - * mutex_lock(&A); - * mutex_unlock(&A); - * - * wait_for_completion(&C); - * lock_acquire_crosslock(); - * atomic_inc_return(&cross_gen_id); - * | - * | mutex_lock(&B); - * | mutex_unlock(&B); - * | - * | complete(&C); - * `-- lock_commit_crosslock(); - * - * Which will then add a dependency between B and C. - */ - -#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) - -/* - * Whenever a crosslock is held, cross_gen_id will be increased. - */ -static atomic_t cross_gen_id; /* Can be wrapped */ - -/* - * Make an entry of the ring buffer invalid. - */ -static inline void invalidate_xhlock(struct hist_lock *xhlock) -{ - /* - * Normally, xhlock->hlock.instance must be !NULL. - */ - xhlock->hlock.instance = NULL; -} - -/* - * Lock history stacks; we have 2 nested lock history stacks: - * - * HARD(IRQ) - * SOFT(IRQ) - * - * The thing is that once we complete a HARD/SOFT IRQ the future task locks - * should not depend on any of the locks observed while running the IRQ. So - * what we do is rewind the history buffer and erase all our knowledge of that - * temporal event. - */ - -void crossrelease_hist_start(enum xhlock_context_t c) -{ - struct task_struct *cur = current; - - if (!cur->xhlocks) - return; - - cur->xhlock_idx_hist[c] = cur->xhlock_idx; - cur->hist_id_save[c] = cur->hist_id; -} - -void crossrelease_hist_end(enum xhlock_context_t c) -{ - struct task_struct *cur = current; - - if (cur->xhlocks) { - unsigned int idx = cur->xhlock_idx_hist[c]; - struct hist_lock *h = &xhlock(idx); - - cur->xhlock_idx = idx; - - /* Check if the ring was overwritten. */ - if (h->hist_id != cur->hist_id_save[c]) - invalidate_xhlock(h); - } -} - -/* - * lockdep_invariant_state() is used to annotate independence inside a task, to - * make one task look like multiple independent 'tasks'. - * - * Take for instance workqueues; each work is independent of the last. The - * completion of a future work does not depend on the completion of a past work - * (in general). Therefore we must not carry that (lock) dependency across - * works. - * - * This is true for many things; pretty much all kthreads fall into this - * pattern, where they have an invariant state and future completions do not - * depend on past completions. Its just that since they all have the 'same' - * form -- the kthread does the same over and over -- it doesn't typically - * matter. - * - * The same is true for system-calls, once a system call is completed (we've - * returned to userspace) the next system call does not depend on the lock - * history of the previous system call. - * - * They key property for independence, this invariant state, is that it must be - * a point where we hold no locks and have no history. Because if we were to - * hold locks, the restore at _end() would not necessarily recover it's history - * entry. Similarly, independence per-definition means it does not depend on - * prior state. - */ -void lockdep_invariant_state(bool force) -{ - /* - * We call this at an invariant point, no current state, no history. - * Verify the former, enforce the latter. - */ - WARN_ON_ONCE(!force && current->lockdep_depth); - if (current->xhlocks) - invalidate_xhlock(&xhlock(current->xhlock_idx)); -} - -static int cross_lock(struct lockdep_map *lock) -{ - return lock ? lock->cross : 0; -} - -/* - * This is needed to decide the relationship between wrapable variables. - */ -static inline int before(unsigned int a, unsigned int b) -{ - return (int)(a - b) < 0; -} - -static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) -{ - return hlock_class(&xhlock->hlock); -} - -static inline struct lock_class *xlock_class(struct cross_lock *xlock) -{ - return hlock_class(&xlock->hlock); -} - -/* - * Should we check a dependency with previous one? - */ -static inline int depend_before(struct held_lock *hlock) -{ - return hlock->read != 2 && hlock->check && !hlock->trylock; -} - -/* - * Should we check a dependency with next one? - */ -static inline int depend_after(struct held_lock *hlock) -{ - return hlock->read != 2 && hlock->check; -} - -/* - * Check if the xhlock is valid, which would be false if, - * - * 1. Has not used after initializaion yet. - * 2. Got invalidated. - * - * Remind hist_lock is implemented as a ring buffer. - */ -static inline int xhlock_valid(struct hist_lock *xhlock) -{ - /* - * xhlock->hlock.instance must be !NULL. - */ - return !!xhlock->hlock.instance; -} - -/* - * Record a hist_lock entry. - * - * Irq disable is only required. - */ -static void add_xhlock(struct held_lock *hlock) -{ - unsigned int idx = ++current->xhlock_idx; - struct hist_lock *xhlock = &xhlock(idx); - -#ifdef CONFIG_DEBUG_LOCKDEP - /* - * This can be done locklessly because they are all task-local - * state, we must however ensure IRQs are disabled. - */ - WARN_ON_ONCE(!irqs_disabled()); -#endif - - /* Initialize hist_lock's members */ - xhlock->hlock = *hlock; - xhlock->hist_id = ++current->hist_id; - - xhlock->trace.nr_entries = 0; - xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; - xhlock->trace.entries = xhlock->trace_entries; - - if (crossrelease_fullstack) { - xhlock->trace.skip = 3; - save_stack_trace(&xhlock->trace); - } else { - xhlock->trace.nr_entries = 1; - xhlock->trace.entries[0] = hlock->acquire_ip; - } -} - -static inline int same_context_xhlock(struct hist_lock *xhlock) -{ - return xhlock->hlock.irq_context == task_irq_context(current); -} - -/* - * This should be lockless as far as possible because this would be - * called very frequently. - */ -static void check_add_xhlock(struct held_lock *hlock) -{ - /* - * Record a hist_lock, only in case that acquisitions ahead - * could depend on the held_lock. For example, if the held_lock - * is trylock then acquisitions ahead never depends on that. - * In that case, we don't need to record it. Just return. - */ - if (!current->xhlocks || !depend_before(hlock)) - return; - - add_xhlock(hlock); -} - -/* - * For crosslock. - */ -static int add_xlock(struct held_lock *hlock) -{ - struct cross_lock *xlock; - unsigned int gen_id; - - if (!graph_lock()) - return 0; - - xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; - - /* - * When acquisitions for a crosslock are overlapped, we use - * nr_acquire to perform commit for them, based on cross_gen_id - * of the first acquisition, which allows to add additional - * dependencies. - * - * Moreover, when no acquisition of a crosslock is in progress, - * we should not perform commit because the lock might not exist - * any more, which might cause incorrect memory access. So we - * have to track the number of acquisitions of a crosslock. - * - * depend_after() is necessary to initialize only the first - * valid xlock so that the xlock can be used on its commit. - */ - if (xlock->nr_acquire++ && depend_after(&xlock->hlock)) - goto unlock; - - gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); - xlock->hlock = *hlock; - xlock->hlock.gen_id = gen_id; -unlock: - graph_unlock(); - return 1; -} - -/* - * Called for both normal and crosslock acquires. Normal locks will be - * pushed on the hist_lock queue. Cross locks will record state and - * stop regular lock_acquire() to avoid being placed on the held_lock - * stack. - * - * Return: 0 - failure; - * 1 - crosslock, done; - * 2 - normal lock, continue to held_lock[] ops. - */ -static int lock_acquire_crosslock(struct held_lock *hlock) -{ - /* - * CONTEXT 1 CONTEXT 2 - * --------- --------- - * lock A (cross) - * X = atomic_inc_return(&cross_gen_id) - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * Y = atomic_read_acquire(&cross_gen_id) - * lock B - * - * atomic_read_acquire() is for ordering between A and B, - * IOW, A happens before B, when CONTEXT 2 see Y >= X. - * - * Pairs with atomic_inc_return() in add_xlock(). - */ - hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); - - if (cross_lock(hlock->instance)) - return add_xlock(hlock); - - check_add_xhlock(hlock); - return 2; -} - -static int copy_trace(struct stack_trace *trace) -{ - unsigned long *buf = stack_trace + nr_stack_trace_entries; - unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; - unsigned int nr = min(max_nr, trace->nr_entries); - - trace->nr_entries = nr; - memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); - trace->entries = buf; - nr_stack_trace_entries += nr; - - if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { - if (!debug_locks_off_graph_unlock()) - return 0; - - print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); - dump_stack(); - - return 0; - } - - return 1; -} - -static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) -{ - unsigned int xid, pid; - u64 chain_key; - - xid = xlock_class(xlock) - lock_classes; - chain_key = iterate_chain_key((u64)0, xid); - pid = xhlock_class(xhlock) - lock_classes; - chain_key = iterate_chain_key(chain_key, pid); - - if (lookup_chain_cache(chain_key)) - return 1; - - if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, - chain_key)) - return 0; - - if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, - &xhlock->trace, copy_trace)) - return 0; - - return 1; -} - -static void commit_xhlocks(struct cross_lock *xlock) -{ - unsigned int cur = current->xhlock_idx; - unsigned int prev_hist_id = xhlock(cur).hist_id; - unsigned int i; - - if (!graph_lock()) - return; - - if (xlock->nr_acquire) { - for (i = 0; i < MAX_XHLOCKS_NR; i++) { - struct hist_lock *xhlock = &xhlock(cur - i); - - if (!xhlock_valid(xhlock)) - break; - - if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) - break; - - if (!same_context_xhlock(xhlock)) - break; - - /* - * Filter out the cases where the ring buffer was - * overwritten and the current entry has a bigger - * hist_id than the previous one, which is impossible - * otherwise: - */ - if (unlikely(before(prev_hist_id, xhlock->hist_id))) - break; - - prev_hist_id = xhlock->hist_id; - - /* - * commit_xhlock() returns 0 with graph_lock already - * released if fail. - */ - if (!commit_xhlock(xlock, xhlock)) - return; - } - } - - graph_unlock(); -} - -void lock_commit_crosslock(struct lockdep_map *lock) -{ - struct cross_lock *xlock; - unsigned long flags; - - if (unlikely(!debug_locks || current->lockdep_recursion)) - return; - - if (!current->xhlocks) - return; - - /* - * Do commit hist_locks with the cross_lock, only in case that - * the cross_lock could depend on acquisitions after that. - * - * For example, if the cross_lock does not have the 'check' flag - * then we don't need to check dependencies and commit for that. - * Just skip it. In that case, of course, the cross_lock does - * not depend on acquisitions ahead, either. - * - * WARNING: Don't do that in add_xlock() in advance. When an - * acquisition context is different from the commit context, - * invalid(skipped) cross_lock might be accessed. - */ - if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) - return; - - raw_local_irq_save(flags); - check_flags(flags); - current->lockdep_recursion = 1; - xlock = &((struct lockdep_map_cross *)lock)->xlock; - commit_xhlocks(xlock); - current->lockdep_recursion = 0; - raw_local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(lock_commit_crosslock); - -/* - * Return: 0 - failure; - * 1 - crosslock, done; - * 2 - normal lock, continue to held_lock[] ops. - */ -static int lock_release_crosslock(struct lockdep_map *lock) -{ - if (cross_lock(lock)) { - if (!graph_lock()) - return 0; - ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--; - graph_unlock(); - return 1; - } - return 2; -} - -static void cross_init(struct lockdep_map *lock, int cross) -{ - if (cross) - ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0; - - lock->cross = cross; - - /* - * Crossrelease assumes that the ring buffer size of xhlocks - * is aligned with power of 2. So force it on build. - */ - BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); -} - -void lockdep_init_task(struct task_struct *task) -{ - int i; - - task->xhlock_idx = UINT_MAX; - task->hist_id = 0; - - for (i = 0; i < XHLOCK_CTX_NR; i++) { - task->xhlock_idx_hist[i] = UINT_MAX; - task->hist_id_save[i] = 0; - } - - task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, - GFP_KERNEL); -} - -void lockdep_free_task(struct task_struct *task) -{ - if (task->xhlocks) { - void *tmp = task->xhlocks; - /* Diable crossrelease for current */ - task->xhlocks = NULL; - kfree(tmp); - } -} -#endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 947d3e2ed5c2..9d5b78aad4c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1099,8 +1099,6 @@ config PROVE_LOCKING select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC - select LOCKDEP_CROSSRELEASE - select LOCKDEP_COMPLETIONS select TRACE_IRQFLAGS default n help @@ -1170,37 +1168,6 @@ config LOCK_STAT CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. (CONFIG_LOCKDEP defines "acquire" and "release" events.) -config LOCKDEP_CROSSRELEASE - bool - help - This makes lockdep work for crosslock which is a lock allowed to - be released in a different context from the acquisition context. - Normally a lock must be released in the context acquiring the lock. - However, relexing this constraint helps synchronization primitives - such as page locks or completions can use the lock correctness - detector, lockdep. - -config LOCKDEP_COMPLETIONS - bool - help - A deadlock caused by wait_for_completion() and complete() can be - detected by lockdep using crossrelease feature. - -config BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK - bool "Enable the boot parameter, crossrelease_fullstack" - depends on LOCKDEP_CROSSRELEASE - default n - help - The lockdep "cross-release" feature needs to record stack traces - (of calling functions) for all acquisitions, for eventual later - use during analysis. By default only a single caller is recorded, - because the unwind operation can be very expensive with deeper - stack chains. - - However a boot parameter, crossrelease_fullstack, was - introduced since sometimes deeper traces are required for full - analysis. This option turns on the boot parameter. - config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP -- cgit v1.2.3 From 86c9e8126e9fbcbf06c36e285168b880369a537c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Dec 2017 10:48:54 +0000 Subject: arm64: mm: Fix false positives in set_pte_at access/dirty race detection Jiankang reports that our race detection in set_pte_at is firing when copying the page tables in dup_mmap as a result of a fork(). In this situation, the page table isn't actually live and so there is no way that we can race with a concurrent update from the hardware page table walker. This patch reworks the race detection so that we require either the mm to match the current active_mm (i.e. currently installed in our TTBR0) or the mm_users count to be greater than 1, implying that the page table could be live in another CPU. The mm_users check might still be racy, but we'll avoid false positives and it's not realistic to validate that all the necessary locks are held as part of this assertion. Cc: Yisheng Xie Reported-by: Jiankang Chen Tested-by: Jiankang Chen Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3ff03a755c32..bdcc7f1c9d06 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -42,6 +42,8 @@ #include #include #include +#include +#include extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); @@ -215,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } -struct mm_struct; -struct vm_area_struct; - extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); /* @@ -246,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * hardware updates of the pte (ptep_set_access_flags safely changes * valid ptes without going through an invalid entry). */ - if (pte_valid(*ptep) && pte_valid(pte)) { + if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && + (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { VM_WARN_ONCE(!pte_young(pte), "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", __func__, pte_val(*ptep), pte_val(pte)); -- cgit v1.2.3 From c622cc013cece073722592cff1ac6643a33b1622 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Dec 2017 16:42:31 -0600 Subject: arm64: Define cputype macros for Falkor CPU Add cputype definition macros for Qualcomm Datacenter Technologies Falkor CPU in cputype.h. It's unfortunate that the first revision of the Falkor CPU used the wrong part number 0x800, got fixed in v2 chip with part number 0xC00, and would be used the same value for future revisions. Signed-off-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 235e77d98261..cbf08d7cbf30 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -91,6 +91,7 @@ #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_FALKOR 0xC00 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -99,6 +100,7 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 932b50c7c1c65e6f23002e075b97ee083c4a9e71 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 11 Dec 2017 16:42:32 -0600 Subject: arm64: Add software workaround for Falkor erratum 1041 The ARM architecture defines the memory locations that are permitted to be accessed as the result of a speculative instruction fetch from an exception level for which all stages of translation are disabled. Specifically, the core is permitted to speculatively fetch from the 4KB region containing the current program counter 4K and next 4K. When translation is changed from enabled to disabled for the running exception level (SCTLR_ELn[M] changed from a value of 1 to 0), the Falkor core may errantly speculatively access memory locations outside of the 4KB region permitted by the architecture. The errant memory access may lead to one of the following unexpected behaviors. 1) A System Error Interrupt (SEI) being raised by the Falkor core due to the errant memory access attempting to access a region of memory that is protected by a slave-side memory protection unit. 2) Unpredictable device behavior due to a speculative read from device memory. This behavior may only occur if the instruction cache is disabled prior to or coincident with translation being changed from enabled to disabled. The conditions leading to this erratum will not occur when either of the following occur: 1) A higher exception level disables translation of a lower exception level (e.g. EL2 changing SCTLR_EL1[M] from a value of 1 to 0). 2) An exception level disabling its stage-1 translation if its stage-2 translation is enabled (e.g. EL1 changing SCTLR_EL1[M] from a value of 1 to 0 when HCR_EL2[VM] has a value of 1). To avoid the errant behavior, software must execute an ISB immediately prior to executing the MSR that will change SCTLR_ELn[M] from 1 to 0. Signed-off-by: Shanker Donthineni Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 12 +++++++++++- arch/arm64/include/asm/assembler.h | 10 ++++++++++ arch/arm64/kernel/cpu-reset.S | 1 + arch/arm64/kernel/efi-entry.S | 2 ++ arch/arm64/kernel/head.S | 1 + arch/arm64/kernel/relocate_kernel.S | 1 + arch/arm64/kvm/hyp-init.S | 1 + 8 files changed, 28 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 304bf22bb83c..fc1c884fea10 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -75,3 +75,4 @@ stable kernels. | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | +| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a93339f5178f..c9a7e9e1414f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. - config SOCIONEXT_SYNQUACER_PREITS bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" default y @@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802 a 128kB offset to be applied to the target address in this commands. If unsure, say Y. + +config QCOM_FALKOR_ERRATUM_E1041 + bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" + default y + help + Falkor CPU may speculatively fetch instructions from an improper + memory location when MMU translation is changed from SCTLR_ELn[M]=1 + to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aef72d886677..8b168280976f 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -512,4 +512,14 @@ alternative_else_nop_endif #endif .endm +/** + * Errata workaround prior to disable MMU. Insert an ISB immediately prior + * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. + */ + .macro pre_disable_mmu_workaround +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041 + isb +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 65f42d257414..2a752cb2a0f3 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart) mrs x12, sctlr_el1 ldr x13, =SCTLR_ELx_FLAGS bic x12, x12, x13 + pre_disable_mmu_workaround msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 4e6ad355bd05..6b9736c3fb56 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -96,6 +96,7 @@ ENTRY(entry) mrs x0, sctlr_el2 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el2, x0 isb b 2f @@ -103,6 +104,7 @@ ENTRY(entry) mrs x0, sctlr_el1 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el1, x0 isb 2: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 67e86a0f57ac..e3cb9fbf96b6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -750,6 +750,7 @@ __primary_switch: * to take into account by discarding the current kernel mapping and * creating a new one. */ + pre_disable_mmu_workaround msr sctlr_el1, x20 // disable the MMU isb bl __create_page_tables // recreate kernel mapping diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index ce704a4aeadd..f407e422a720 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel) mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 + pre_disable_mmu_workaround msr sctlr_el2, x0 isb 1: diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377..870828c364c5 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -151,6 +151,7 @@ reset: mrs x5, sctlr_el2 ldr x6, =SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround msr sctlr_el2, x5 isb -- cgit v1.2.3 From 0e17cada2a5b4dc847082e1db0e3f84599ffd436 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Dec 2017 11:53:26 +0000 Subject: arm64: hw_breakpoint: Use linux/uaccess.h instead of asm/uaccess.h The only inclusion of asm/uaccess.h should be by linux/uaccess.h. All other headers should use the latter. Reported-by: Al Viro Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 749f81779420..74bb56f656ef 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); -- cgit v1.2.3 From 872bcad246e30f0ae8009a0f8c13874009601445 Mon Sep 17 00:00:00 2001 From: Andreas Dannenberg Date: Mon, 11 Dec 2017 13:01:54 -0600 Subject: ASoC: tas5720: add basic support for TAS5722 devices The TI TAS5722 digital amplifier is very similar to the TAS5720 from an overall and register map perspective. Therefore the existing driver can be extended easily to support this additional device. This commit allows TAS5722 devices to be used in a "subset" type of fashion, without exposing any of the additional features they offer. Signed-off-by: Andreas Dannenberg Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/tas5720.txt | 4 ++- sound/soc/codecs/tas5720.c | 38 ++++++++++++++++++---- sound/soc/codecs/tas5720.h | 1 + 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/tas5720.txt b/Documentation/devicetree/bindings/sound/tas5720.txt index 40d94f82beb3..7481653fe8e3 100644 --- a/Documentation/devicetree/bindings/sound/tas5720.txt +++ b/Documentation/devicetree/bindings/sound/tas5720.txt @@ -6,10 +6,12 @@ audio playback. For more product information please see the links below: http://www.ti.com/product/TAS5720L http://www.ti.com/product/TAS5720M +http://www.ti.com/product/TAS5722L Required properties: -- compatible : "ti,tas5720" +- compatible : "ti,tas5720", + "ti,tas5722" - reg : I2C slave address - dvdd-supply : phandle to a 3.3-V supply for the digital circuitry - pvdd-supply : phandle to a supply used for the Class-D amp and the analog diff --git a/sound/soc/codecs/tas5720.c b/sound/soc/codecs/tas5720.c index a736a2a6976c..5def54d1336d 100644 --- a/sound/soc/codecs/tas5720.c +++ b/sound/soc/codecs/tas5720.c @@ -36,6 +36,11 @@ /* Define how often to check (and clear) the fault status register (in ms) */ #define TAS5720_FAULT_CHECK_INTERVAL 200 +enum tas572x_type { + TAS5720, + TAS5722, +}; + static const char * const tas5720_supply_names[] = { "dvdd", /* Digital power supply. Connect to 3.3-V supply. */ "pvdd", /* Class-D amp and analog power supply (connected). */ @@ -47,6 +52,7 @@ struct tas5720_data { struct snd_soc_codec *codec; struct regmap *regmap; struct i2c_client *tas5720_client; + enum tas572x_type devtype; struct regulator_bulk_data supplies[TAS5720_NUM_SUPPLIES]; struct delayed_work fault_check_work; unsigned int last_fault; @@ -264,7 +270,7 @@ out: static int tas5720_codec_probe(struct snd_soc_codec *codec) { struct tas5720_data *tas5720 = snd_soc_codec_get_drvdata(codec); - unsigned int device_id; + unsigned int device_id, expected_device_id; int ret; tas5720->codec = codec; @@ -276,6 +282,11 @@ static int tas5720_codec_probe(struct snd_soc_codec *codec) return ret; } + /* + * Take a liberal approach to checking the device ID to allow the + * driver to be used even if the device ID does not match, however + * issue a warning if there is a mismatch. + */ ret = regmap_read(tas5720->regmap, TAS5720_DEVICE_ID_REG, &device_id); if (ret < 0) { dev_err(codec->dev, "failed to read device ID register: %d\n", @@ -283,13 +294,22 @@ static int tas5720_codec_probe(struct snd_soc_codec *codec) goto probe_fail; } - if (device_id != TAS5720_DEVICE_ID) { - dev_err(codec->dev, "wrong device ID. expected: %u read: %u\n", - TAS5720_DEVICE_ID, device_id); - ret = -ENODEV; - goto probe_fail; + switch (tas5720->devtype) { + case TAS5720: + expected_device_id = TAS5720_DEVICE_ID; + break; + case TAS5722: + expected_device_id = TAS5722_DEVICE_ID; + break; + default: + dev_err(codec->dev, "unexpected private driver data\n"); + return -EINVAL; } + if (device_id != expected_device_id) + dev_warn(codec->dev, "wrong device ID. expected: %u read: %u\n", + expected_device_id, device_id); + /* Set device to mute */ ret = snd_soc_update_bits(codec, TAS5720_DIGITAL_CTRL2_REG, TAS5720_MUTE, TAS5720_MUTE); @@ -552,6 +572,8 @@ static int tas5720_probe(struct i2c_client *client, return -ENOMEM; data->tas5720_client = client; + data->devtype = id->driver_data; + data->regmap = devm_regmap_init_i2c(client, &tas5720_regmap_config); if (IS_ERR(data->regmap)) { ret = PTR_ERR(data->regmap); @@ -592,7 +614,8 @@ static int tas5720_remove(struct i2c_client *client) } static const struct i2c_device_id tas5720_id[] = { - { "tas5720", 0 }, + { "tas5720", TAS5720 }, + { "tas5722", TAS5722 }, { } }; MODULE_DEVICE_TABLE(i2c, tas5720_id); @@ -600,6 +623,7 @@ MODULE_DEVICE_TABLE(i2c, tas5720_id); #if IS_ENABLED(CONFIG_OF) static const struct of_device_id tas5720_of_match[] = { { .compatible = "ti,tas5720", }, + { .compatible = "ti,tas5722", }, { }, }; MODULE_DEVICE_TABLE(of, tas5720_of_match); diff --git a/sound/soc/codecs/tas5720.h b/sound/soc/codecs/tas5720.h index 3d077c779b12..bef802afcc69 100644 --- a/sound/soc/codecs/tas5720.h +++ b/sound/soc/codecs/tas5720.h @@ -32,6 +32,7 @@ /* TAS5720_DEVICE_ID_REG */ #define TAS5720_DEVICE_ID 0x01 +#define TAS5722_DEVICE_ID 0x12 /* TAS5720_POWER_CTRL_REG */ #define TAS5720_DIG_CLIP_MASK GENMASK(7, 2) -- cgit v1.2.3 From d5eb436acc8104b5c789359aa8c923ff5fafcd62 Mon Sep 17 00:00:00 2001 From: Andreas Dannenberg Date: Mon, 11 Dec 2017 13:01:55 -0600 Subject: ASoC: tas5720: add TAS5722 register support Introduce a custom super-set register map and associated bit definitions to allow driver access to all TAS5722 device functionality. Signed-off-by: Andreas Dannenberg Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tas5720.c | 23 ++++++++++++++++++++++- sound/soc/codecs/tas5720.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas5720.c b/sound/soc/codecs/tas5720.c index 5def54d1336d..f3006f301fe8 100644 --- a/sound/soc/codecs/tas5720.c +++ b/sound/soc/codecs/tas5720.c @@ -466,6 +466,15 @@ static const struct regmap_config tas5720_regmap_config = { .volatile_reg = tas5720_is_volatile_reg, }; +static const struct regmap_config tas5722_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + + .max_register = TAS5722_MAX_REG, + .cache_type = REGCACHE_RBTREE, + .volatile_reg = tas5720_is_volatile_reg, +}; + /* * DAC analog gain. There are four discrete values to select from, ranging * from 19.2 dB to 26.3dB. @@ -564,6 +573,7 @@ static int tas5720_probe(struct i2c_client *client, { struct device *dev = &client->dev; struct tas5720_data *data; + const struct regmap_config *regmap_config; int ret; int i; @@ -574,7 +584,18 @@ static int tas5720_probe(struct i2c_client *client, data->tas5720_client = client; data->devtype = id->driver_data; - data->regmap = devm_regmap_init_i2c(client, &tas5720_regmap_config); + switch (id->driver_data) { + case TAS5720: + regmap_config = &tas5720_regmap_config; + break; + case TAS5722: + regmap_config = &tas5722_regmap_config; + break; + default: + dev_err(dev, "unexpected private driver data\n"); + return -EINVAL; + } + data->regmap = devm_regmap_init_i2c(client, regmap_config); if (IS_ERR(data->regmap)) { ret = PTR_ERR(data->regmap); dev_err(dev, "failed to allocate register map: %d\n", ret); diff --git a/sound/soc/codecs/tas5720.h b/sound/soc/codecs/tas5720.h index bef802afcc69..1dda3095961d 100644 --- a/sound/soc/codecs/tas5720.h +++ b/sound/soc/codecs/tas5720.h @@ -30,6 +30,11 @@ #define TAS5720_DIGITAL_CLIP1_REG 0x11 #define TAS5720_MAX_REG TAS5720_DIGITAL_CLIP1_REG +/* Additional TAS5722-specific Registers */ +#define TAS5722_DIGITAL_CTRL2_REG 0x13 +#define TAS5722_ANALOG_CTRL2_REG 0x14 +#define TAS5722_MAX_REG TAS5722_ANALOG_CTRL2_REG + /* TAS5720_DEVICE_ID_REG */ #define TAS5720_DEVICE_ID 0x01 #define TAS5722_DEVICE_ID 0x12 @@ -52,6 +57,7 @@ #define TAS5720_SAIF_FORMAT_MASK GENMASK(2, 0) /* TAS5720_DIGITAL_CTRL2_REG */ +#define TAS5722_VOL_RAMP_RATE BIT(6) #define TAS5720_MUTE BIT(4) #define TAS5720_TDM_SLOT_SEL_MASK GENMASK(2, 0) @@ -88,4 +94,28 @@ #define TAS5720_CLIP1_MASK GENMASK(7, 2) #define TAS5720_CLIP1_SHIFT (0x2) +/* TAS5722_DIGITAL_CTRL2_REG */ +#define TAS5722_HPF_3_7HZ (0x0 << 5) +#define TAS5722_HPF_7_4HZ (0x1 << 5) +#define TAS5722_HPF_14_9HZ (0x2 << 5) +#define TAS5722_HPF_29_7HZ (0x3 << 5) +#define TAS5722_HPF_59_4HZ (0x4 << 5) +#define TAS5722_HPF_118_4HZ (0x5 << 5) +#define TAS5722_HPF_235_0HZ (0x6 << 5) +#define TAS5722_HPF_463_2HZ (0x7 << 5) +#define TAS5722_HPF_MASK GENMASK(7, 5) +#define TAS5722_AUTO_SLEEP_OFF (0x0 << 3) +#define TAS5722_AUTO_SLEEP_1024LR (0x1 << 3) +#define TAS5722_AUTO_SLEEP_65536LR (0x2 << 3) +#define TAS5722_AUTO_SLEEP_262144LR (0x3 << 3) +#define TAS5722_AUTO_SLEEP_MASK GENMASK(4, 3) +#define TAS5722_TDM_SLOT_16B BIT(2) +#define TAS5722_MCLK_PIN_CFG BIT(1) +#define TAS5722_VOL_CONTROL_LSB BIT(0) + +/* TAS5722_ANALOG_CTRL2_REG */ +#define TAS5722_FAULTZ_PU BIT(3) +#define TAS5722_VREG_LVL BIT(2) +#define TAS5722_PWR_TUNE BIT(0) + #endif /* __TAS5720_H__ */ -- cgit v1.2.3 From 32fd87b3bbf5f7a045546401dfe2894dbbf4d8c3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 11 Dec 2017 22:48:41 +0100 Subject: USB: core: only clean up what we allocated When cleaning up the configurations, make sure we only free the number of configurations and interfaces that we could have allocated. Reported-by: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 55b198ba629b..93b38471754e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -764,18 +764,21 @@ void usb_destroy_configuration(struct usb_device *dev) return; if (dev->rawdescriptors) { - for (i = 0; i < dev->descriptor.bNumConfigurations; i++) + for (i = 0; i < dev->descriptor.bNumConfigurations && + i < USB_MAXCONFIG; i++) kfree(dev->rawdescriptors[i]); kfree(dev->rawdescriptors); dev->rawdescriptors = NULL; } - for (c = 0; c < dev->descriptor.bNumConfigurations; c++) { + for (c = 0; c < dev->descriptor.bNumConfigurations && + c < USB_MAXCONFIG; c++) { struct usb_host_config *cf = &dev->config[c]; kfree(cf->string); - for (i = 0; i < cf->desc.bNumInterfaces; i++) { + for (i = 0; i < cf->desc.bNumInterfaces && + i < USB_MAXINTERFACES; i++) { if (cf->intf_cache[i]) kref_put(&cf->intf_cache[i]->ref, usb_release_interface_cache); -- cgit v1.2.3 From f971e511cb7d6f1b3730248cf2967d3ccdd8874c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:21 +0000 Subject: tools/perf: Convert ACCESS_ONCE() to READ_ONCE() Recently there was a treewide conversion of ACCESS_ONCE() to {READ,WRITE}_ONCE(), but a new use was introduced concurrently by commit: 1695849735752d2a ("perf mmap: Move perf_mmap and methods to separate mmap.[ch] files") Let's convert this over to READ_ONCE() so that we can remove the ACCESS_ONCE() definitions in subsequent patches. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Reviewed-by: Paul E. McKenney Cc: Arnaldo Carvalho de Melo Cc: Joe Perches Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: apw@canonical.com Link: http://lkml.kernel.org/r/20171127103824.36526-2-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- tools/perf/util/mmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index efd78b827b05..3a5cb5a6e94a 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -70,7 +70,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; - u64 head = ACCESS_ONCE(pc->data_head); + u64 head = READ_ONCE(pc->data_head); rmb(); return head; } -- cgit v1.2.3 From 2a22f692bbe0a7933acbd50045479ffc0fdf11f7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:22 +0000 Subject: tools/include: Remove ACCESS_ONCE() There are no longer any usersapce uses of ACCESS_ONCE(), so we can remove the definition from our userspace , which is only used by tools in the kernel directory (i.e. it isn't a uapi header). This patch removes the ACCESS_ONCE() definition, and updates comments which referred to it. At the same time, some inconsistent and redundant whitespace is removed from comments. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: Joe Perches Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: apw@canonical.com Link: http://lkml.kernel.org/r/20171127103824.36526-3-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- tools/include/linux/compiler.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 07fd03c74a77..04e32f965ad7 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -84,8 +84,6 @@ #define uninitialized_var(x) x = *(&(x)) -#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) - #include /* @@ -135,20 +133,19 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of - * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the - * compiler is aware of some particular ordering. One way to make the - * compiler aware of ordering is to put the two invocations of READ_ONCE, - * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some + * particular ordering. One way to make the compiler aware of ordering is to + * put the two invocations of READ_ONCE or WRITE_ONCE in different C + * statements. * - * In contrast to ACCESS_ONCE these two macros will also work on aggregate - * data types like structs or unions. If the size of the accessed data - * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a - * compile-time warning. + * These two macros will also work on aggregate data types like structs or + * unions. If the size of the accessed data type exceeds the word size of + * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will + * fall back to memcpy and print a compile-time warning. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise * mutilate accesses that either do not require ordering or that interact * with an explicit memory barrier or atomic instruction that provides the * required ordering. -- cgit v1.2.3 From b899a850431e2dd0943205a63a68573f3e312d0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:23 +0000 Subject: compiler.h: Remove ACCESS_ONCE() There are no longer any kernelspace uses of ACCESS_ONCE(), so we can remove the definition from . This patch removes the ACCESS_ONCE() definition, and updates comments which referred to it. At the same time, some inconsistent and redundant whitespace is removed from comments. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: Joe Perches Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: apw@canonical.com Link: http://lkml.kernel.org/r/20171127103824.36526-4-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 47 +++++++++++------------------------------------ 1 file changed, 11 insertions(+), 36 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 188ed9f65517..52e611ab9a6c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -220,21 +220,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of - * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the - * compiler is aware of some particular ordering. One way to make the - * compiler aware of ordering is to put the two invocations of READ_ONCE, - * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some + * particular ordering. One way to make the compiler aware of ordering is to + * put the two invocations of READ_ONCE or WRITE_ONCE in different C + * statements. * - * In contrast to ACCESS_ONCE these two macros will also work on aggregate - * data types like structs or unions. If the size of the accessed data - * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at - * least two memcpy()s: one for the __builtin_memcpy() and then one for - * the macro doing the copy of variable - '__u' allocated on the stack. + * These two macros will also work on aggregate data types like structs or + * unions. If the size of the accessed data type exceeds the word size of + * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will + * fall back to memcpy(). There's at least two memcpy()s: one for the + * __builtin_memcpy() and then one for the macro doing the copy of variable + * - '__u' allocated on the stack. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise * mutilate accesses that either do not require ordering or that interact * with an explicit memory barrier or atomic instruction that provides the * required ordering. @@ -327,29 +327,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s compiletime_assert(__native_word(t), \ "Need native word sized stores/loads for atomicity.") -/* - * Prevent the compiler from merging or refetching accesses. The compiler - * is also forbidden from reordering successive instances of ACCESS_ONCE(), - * but only when the compiler is aware of some particular ordering. One way - * to make the compiler aware of ordering is to put the two invocations of - * ACCESS_ONCE() in different C statements. - * - * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE - * on a union member will work as long as the size of the member matches the - * size of the union and the size is smaller than word size. - * - * The major use cases of ACCESS_ONCE used to be (1) Mediating communication - * between process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise - * mutilate accesses that either do not require ordering or that interact - * with an explicit memory barrier or atomic instruction that provides the - * required ordering. - * - * If possible use READ_ONCE()/WRITE_ONCE() instead. - */ -#define __ACCESS_ONCE(x) ({ \ - __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ - (volatile typeof(x) *)&(x); }) -#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) - #endif /* __LINUX_COMPILER_H */ -- cgit v1.2.3 From 8cb562b1d56fad42cbee44bdc9bc64cea41a0a8c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:24 +0000 Subject: checkpatch: Remove ACCESS_ONCE() warning Now that ACCESS_ONCE() has been excised from the kernel, any uses will result in a build error, and we no longer need to whine about it in checkpatch. This patch removes the newly redundant warning. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Link: http://lkml.kernel.org/r/20171127103824.36526-5-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- scripts/checkpatch.pl | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 040aa79e1d9d..31031f10fe56 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6233,28 +6233,6 @@ sub process { } } -# whine about ACCESS_ONCE - if ($^V && $^V ge 5.10.0 && - $line =~ /\bACCESS_ONCE\s*$balanced_parens\s*(=(?!=))?\s*($FuncArg)?/) { - my $par = $1; - my $eq = $2; - my $fun = $3; - $par =~ s/^\(\s*(.*)\s*\)$/$1/; - if (defined($eq)) { - if (WARN("PREFER_WRITE_ONCE", - "Prefer WRITE_ONCE(, ) over ACCESS_ONCE() = \n" . $herecurr) && - $fix) { - $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)\s*$eq\s*\Q$fun\E/WRITE_ONCE($par, $fun)/; - } - } else { - if (WARN("PREFER_READ_ONCE", - "Prefer READ_ONCE() over ACCESS_ONCE()\n" . $herecurr) && - $fix) { - $fixed[$fixlinenr] =~ s/\bACCESS_ONCE\s*\(\s*\Q$par\E\s*\)/READ_ONCE($par)/; - } - } - } - # check for mutex_trylock_recursive usage if ($line =~ /mutex_trylock_recursive/) { ERROR("LOCKING", -- cgit v1.2.3 From 6b63dd119eb4eee44733ca435168ce05487b8644 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Nov 2017 00:10:25 +0900 Subject: x86/tools: Rename test_get_len to insn_decoder_test Rename test_get_len test command to insn_decoder_test as it a more meaningful name. This also changes some comments in related files. Note that this also removes the paragraph about writing to the Free Software Foundation's mailing address. Signed-off-by: Masami Hiramatsu Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151153622537.22827.14928774603980883278.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/tools/Makefile | 10 +-- arch/x86/tools/distill.awk | 2 +- arch/x86/tools/insn_decoder_test.c | 169 ++++++++++++++++++++++++++++++++++++ arch/x86/tools/test_get_len.c | 173 ------------------------------------- 4 files changed, 175 insertions(+), 179 deletions(-) create mode 100644 arch/x86/tools/insn_decoder_test.c delete mode 100644 arch/x86/tools/test_get_len.c diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 972b8e8d939c..b0d75684d313 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -17,24 +17,24 @@ distill_awk = $(srctree)/arch/x86/tools/distill.awk chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk quiet_cmd_posttest = TEST $@ - cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose) quiet_cmd_sanitytest = TEST $@ cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 -posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity +posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity $(call cmd,posttest) $(call cmd,sanitytest) -hostprogs-y += test_get_len insn_sanity +hostprogs-y += insn_decoder_test insn_sanity # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ +HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ # Dependencies are also needed. -$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c +$(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk index e0edeccc1429..80cd7d53bd07 100644 --- a/arch/x86/tools/distill.awk +++ b/arch/x86/tools/distill.awk @@ -1,6 +1,6 @@ #!/bin/awk -f # SPDX-License-Identifier: GPL-2.0 -# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Usage: objdump -d a.out | awk -f distill.awk | ./insn_decoder_test # Distills the disassembly as follows: # - Removes all lines except the disassembled instructions. # - For instructions that exceed 1 line (7 bytes), crams all the hex bytes diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c new file mode 100644 index 000000000000..8be7264cb723 --- /dev/null +++ b/arch/x86/tools/insn_decoder_test.c @@ -0,0 +1,169 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +#include +#include +#include +#include + +#define unlikely(cond) (cond) + +#include +#include +#include + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./insn_decoder_test + */ + +const char *prog; +static int verbose; +static int x86_64; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " %s [-y|-n] [-v]\n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = {\n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE], sym[BUFSIZE] = ""; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0; + int warnings = 0; + + parse_args(argc, argv); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ + insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); + insn_get_length(&insn); + if (insn.length != nb) { + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", + prog, sym); + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); + } + } + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stdout, "Success: decoded and checked %d" + " instructions\n", insns); + return 0; +} diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c deleted file mode 100644 index ecf31e0358c8..000000000000 --- a/arch/x86/tools/test_get_len.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2009 - */ - -#include -#include -#include -#include -#include - -#define unlikely(cond) (cond) - -#include -#include -#include - -/* - * Test of instruction analysis in general and insn_get_length() in - * particular. See if insn_get_length() and the disassembler agree - * on the length of each instruction in an elf disassembly. - * - * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len - */ - -const char *prog; -static int verbose; -static int x86_64; - -static void usage(void) -{ - fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " %s [-y|-n] [-v]\n", prog); - fprintf(stderr, "\t-y 64bit mode\n"); - fprintf(stderr, "\t-n 32bit mode\n"); - fprintf(stderr, "\t-v verbose mode\n"); - exit(1); -} - -static void malformed_line(const char *line, int line_nr) -{ - fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); - exit(3); -} - -static void dump_field(FILE *fp, const char *name, const char *indent, - struct insn_field *field) -{ - fprintf(fp, "%s.%s = {\n", indent, name); - fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", - indent, field->value, field->bytes[0], field->bytes[1], - field->bytes[2], field->bytes[3]); - fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, - field->got, field->nbytes); -} - -static void dump_insn(FILE *fp, struct insn *insn) -{ - fprintf(fp, "Instruction = {\n"); - dump_field(fp, "prefixes", "\t", &insn->prefixes); - dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); - dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); - dump_field(fp, "opcode", "\t", &insn->opcode); - dump_field(fp, "modrm", "\t", &insn->modrm); - dump_field(fp, "sib", "\t", &insn->sib); - dump_field(fp, "displacement", "\t", &insn->displacement); - dump_field(fp, "immediate1", "\t", &insn->immediate1); - dump_field(fp, "immediate2", "\t", &insn->immediate2); - fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", - insn->attr, insn->opnd_bytes, insn->addr_bytes); - fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", - insn->length, insn->x86_64, insn->kaddr); -} - -static void parse_args(int argc, char **argv) -{ - int c; - prog = argv[0]; - while ((c = getopt(argc, argv, "ynv")) != -1) { - switch (c) { - case 'y': - x86_64 = 1; - break; - case 'n': - x86_64 = 0; - break; - case 'v': - verbose = 1; - break; - default: - usage(); - } - } -} - -#define BUFSIZE 256 - -int main(int argc, char **argv) -{ - char line[BUFSIZE], sym[BUFSIZE] = ""; - unsigned char insn_buf[16]; - struct insn insn; - int insns = 0; - int warnings = 0; - - parse_args(argc, argv); - - while (fgets(line, BUFSIZE, stdin)) { - char copy[BUFSIZE], *s, *tab1, *tab2; - int nb = 0; - unsigned int b; - - if (line[0] == '<') { - /* Symbol line */ - strcpy(sym, line); - continue; - } - - insns++; - memset(insn_buf, 0, 16); - strcpy(copy, line); - tab1 = strchr(copy, '\t'); - if (!tab1) - malformed_line(line, insns); - s = tab1 + 1; - s += strspn(s, " "); - tab2 = strchr(s, '\t'); - if (!tab2) - malformed_line(line, insns); - *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ - while (s < tab2) { - if (sscanf(s, "%x", &b) == 1) { - insn_buf[nb++] = (unsigned char) b; - s += 3; - } else - break; - } - /* Decode an instruction */ - insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); - insn_get_length(&insn); - if (insn.length != nb) { - warnings++; - fprintf(stderr, "Warning: %s found difference at %s\n", - prog, sym); - fprintf(stderr, "Warning: %s", line); - fprintf(stderr, "Warning: objdump says %d bytes, but " - "insn_get_length() says %d\n", nb, - insn.length); - if (verbose) - dump_insn(stderr, &insn); - } - } - if (warnings) - fprintf(stderr, "Warning: decoded and checked %d" - " instructions with %d warnings\n", insns, warnings); - else - fprintf(stdout, "Success: decoded and checked %d" - " instructions\n", insns); - return 0; -} -- cgit v1.2.3 From 98fe07fccc3e25889186277a5158c0a658d528a4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Nov 2017 00:10:54 +0900 Subject: x86/tools: Rename distill.awk to objdump_reformat.awk Rename distill.awk to objdump_reformat.awk because it more clearly expresses its purpose of re-formatting the output of objdump so that insn_decoder_test can read it. Signed-off-by: Masami Hiramatsu Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151153625409.22827.10470603625519700259.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/tools/Makefile | 4 ++-- arch/x86/tools/distill.awk | 48 ------------------------------------- arch/x86/tools/insn_decoder_test.c | 6 ++--- arch/x86/tools/objdump_reformat.awk | 48 +++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 53 deletions(-) delete mode 100644 arch/x86/tools/distill.awk create mode 100644 arch/x86/tools/objdump_reformat.awk diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index b0d75684d313..09af7ff53044 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -13,11 +13,11 @@ else posttest_64bit = -n endif -distill_awk = $(srctree)/arch/x86/tools/distill.awk +reformatter = $(srctree)/arch/x86/tools/objdump_reformat.awk chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk quiet_cmd_posttest = TEST $@ - cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose) + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(reformatter) | $(obj)/insn_decoder_test $(posttest_64bit) $(posttest_verbose) quiet_cmd_sanitytest = TEST $@ cmd_sanitytest = $(obj)/insn_sanity $(posttest_64bit) -m 1000000 diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk deleted file mode 100644 index 80cd7d53bd07..000000000000 --- a/arch/x86/tools/distill.awk +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/awk -f -# SPDX-License-Identifier: GPL-2.0 -# Usage: objdump -d a.out | awk -f distill.awk | ./insn_decoder_test -# Distills the disassembly as follows: -# - Removes all lines except the disassembled instructions. -# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes -# into a single line. -# - Remove bad(or prefix only) instructions - -BEGIN { - prev_addr = "" - prev_hex = "" - prev_mnemonic = "" - bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" - fwait_expr = "^9b " - fwait_str="9b\tfwait" -} - -/^ *[0-9a-f]+ <[^>]*>:/ { - # Symbol entry - printf("%s%s\n", $2, $1) -} - -/^ *[0-9a-f]+:/ { - if (split($0, field, "\t") < 3) { - # This is a continuation of the same insn. - prev_hex = prev_hex field[2] - } else { - # Skip bad instructions - if (match(prev_mnemonic, bad_expr)) - prev_addr = "" - # Split fwait from other f* instructions - if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { - printf "%s\t%s\n", prev_addr, fwait_str - sub(fwait_expr, "", prev_hex) - } - if (prev_addr != "") - printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic - prev_addr = field[1] - prev_hex = field[2] - prev_mnemonic = field[3] - } -} - -END { - if (prev_addr != "") - printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic -} diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index 8be7264cb723..286d2e3b9d57 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -29,7 +29,7 @@ * particular. See if insn_get_length() and the disassembler agree * on the length of each instruction in an elf disassembly. * - * Usage: objdump -d a.out | awk -f distill.awk | ./insn_decoder_test + * Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test */ const char *prog; @@ -38,8 +38,8 @@ static int x86_64; static void usage(void) { - fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " %s [-y|-n] [-v]\n", prog); + fprintf(stderr, "Usage: objdump -d a.out | awk -f objdump_reformat.awk" + " | %s [-y|-n] [-v]\n", prog); fprintf(stderr, "\t-y 64bit mode\n"); fprintf(stderr, "\t-n 32bit mode\n"); fprintf(stderr, "\t-v verbose mode\n"); diff --git a/arch/x86/tools/objdump_reformat.awk b/arch/x86/tools/objdump_reformat.awk new file mode 100644 index 000000000000..f418c91b71f0 --- /dev/null +++ b/arch/x86/tools/objdump_reformat.awk @@ -0,0 +1,48 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# Usage: objdump -d a.out | awk -f objdump_reformat.awk | ./insn_decoder_test +# Reformats the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} -- cgit v1.2.3 From 10c91577d5e631773a6394e14cf60125389b71ae Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 25 Nov 2017 00:11:22 +0900 Subject: x86/tools: Standardize output format of insn_decode_test Standardize warning, error, and success printout format of insn_decode_test so that user can easily understand which test tool caused the messages. Signed-off-by: Masami Hiramatsu Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/151153628279.22827.4869104298276788693.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/tools/insn_decoder_test.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index 286d2e3b9d57..a3b4fd954931 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -17,6 +17,7 @@ #include #include #include +#include #define unlikely(cond) (cond) @@ -48,10 +49,21 @@ static void usage(void) static void malformed_line(const char *line, int line_nr) { - fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + fprintf(stderr, "%s: error: malformed line %d:\n%s", + prog, line_nr, line); exit(3); } +static void pr_warn(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s: warning: ", prog); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + static void dump_field(FILE *fp, const char *name, const char *indent, struct insn_field *field) { @@ -149,21 +161,20 @@ int main(int argc, char **argv) insn_get_length(&insn); if (insn.length != nb) { warnings++; - fprintf(stderr, "Warning: %s found difference at %s\n", - prog, sym); - fprintf(stderr, "Warning: %s", line); - fprintf(stderr, "Warning: objdump says %d bytes, but " - "insn_get_length() says %d\n", nb, - insn.length); + pr_warn("Found an x86 instruction decoder bug, " + "please report this.\n", sym); + pr_warn("%s", line); + pr_warn("objdump says %d bytes, but insn_get_length() " + "says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); } } if (warnings) - fprintf(stderr, "Warning: decoded and checked %d" - " instructions with %d warnings\n", insns, warnings); + pr_warn("Decoded and checked %d instructions with %d " + "failures\n", insns, warnings); else - fprintf(stdout, "Success: decoded and checked %d" - " instructions\n", insns); + fprintf(stdout, "%s: success: Decoded and checked %d" + " instructions\n", prog, insns); return 0; } -- cgit v1.2.3 From 0f3922a9b99eca76c6578cd84191573378f2c988 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 8 Dec 2017 04:17:28 -0700 Subject: x86/Xen: don't report ancient LAPIC version Unconditionally reporting a value seen on the P4 or older invokes functionality like io_apic_get_unique_id() on 32-bit builds, resulting in a panic() with sufficiently many CPUs and/or IO-APICs. Doing what that function does would be the hypervisor's responsibility anyway, so makes no sense to be used when running on Xen. Uniformly report a more modern version; this shouldn't matter much as both LAPIC and IO-APIC are being managed entirely / mostly by the hypervisor. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index b5e48da7fbff..c14048553c18 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -56,7 +56,7 @@ static u32 xen_apic_read(u32 reg) return 0; if (reg == APIC_LVR) - return 0x10; + return 0x14; #ifdef CONFIG_X86_32 if (reg == APIC_LDR) return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); -- cgit v1.2.3 From c4f9d9cb2c29ff04c6b4bb09b72802d8aedfc7cb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 12 Dec 2017 03:18:11 -0700 Subject: xen: XEN_ACPI_PROCESSOR is Dom0-only Add a respective dependency. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index d8dd54678ab7..e5d0c28372ea 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -269,7 +269,7 @@ config XEN_ACPI_HOTPLUG_CPU config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ + depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen -- cgit v1.2.3 From e7fd37ba12170cc414be8b639dfc2c5f7172fac2 Mon Sep 17 00:00:00 2001 From: Ma Shimiao Date: Tue, 12 Dec 2017 09:43:49 +0800 Subject: cgroup: avoid copying strings longer than the buffers cgroup root name and file name have max length limit, we should avoid copying longer name than that to the name. tj: minor update to $SUBJ. Signed-off-by: Ma Shimiao Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0b1ffe147f24..18d71fbd3923 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, cft->name); else - strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX); + strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX); return buf; } @@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) root->flags = opts->flags; if (opts->release_agent) - strcpy(root->release_agent_path, opts->release_agent); + strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); if (opts->name) - strcpy(root->name, opts->name); + strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); if (opts->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -- cgit v1.2.3 From 30791ac41927ebd3e75486f9504b6d2280463bf0 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 Dec 2017 00:05:46 -0800 Subject: tcp md5sig: Use skb's saddr when replying to an incoming segment The MD5-key that belongs to a connection is identified by the peer's IP-address. When we are in tcp_v4(6)_reqsk_send_ack(), we are replying to an incoming segment from tcp_check_req() that failed the seq-number checks. Thus, to find the correct key, we need to use the skb's saddr and not the daddr. This bug seems to have been there since quite a while, but probably got unnoticed because the consequences are not catastrophic. We will call tcp_v4_reqsk_send_ack only to send a challenge-ACK back to the peer, thus the connection doesn't really fail. Fixes: 9501f9722922 ("tcp md5sig: Let the caller pass appropriate key for tcp_v{4,6}_do_calc_md5_hash().") Signed-off-by: Christoph Paasch Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77ea45da0fe9..94e28350f420 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -848,7 +848,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, 0, - tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1f04ec0e4a7a..7178476b3d2f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -994,7 +994,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr), 0, 0); } -- cgit v1.2.3 From 17278a91e04f858155d54bee5528ba4fbcec6f87 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 14 Nov 2017 12:01:20 +0000 Subject: MIPS: CPS: Fix r1 .set mt assembler warning MIPS CPS has a build warning on kernels configured for MIPS32R1 or MIPS64R1, due to the use of .set mt without a prior .set mips{32,64}r2: arch/mips/kernel/cps-vec.S Assembler messages: arch/mips/kernel/cps-vec.S:238: Warning: the `mt' extension requires MIPS32 revision 2 or greater Add .set MIPS_ISA_LEVEL_RAW before .set mt to silence the warning. Fixes: 245a7868d2f2 ("MIPS: smp-cps: rework core/VPE initialisation") Signed-off-by: James Hogan Cc: Paul Burton Cc: James Hogan Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17699/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index c7ed26029cbb..e68e6e04063a 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -235,6 +235,7 @@ LEAF(mips_cps_core_init) has_mt t0, 3f .set push + .set MIPS_ISA_LEVEL_RAW .set mt /* Only allow 1 TC per VPE to execute... */ @@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes) #elif defined(CONFIG_MIPS_MT) .set push + .set MIPS_ISA_LEVEL_RAW .set mt /* If the core doesn't support MT then return */ -- cgit v1.2.3 From 0c31f1d7be1b5c4858b1d714dcefa25f41428cab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 7 Dec 2017 11:15:19 +0100 Subject: PCI: rcar: Fix use-after-free in probe error path If CONFIG_DEBUG_SLAB=y, and no PCIe card is inserted, the kernel crashes during probe on r8a7791/koelsch: rcar-pcie fe000000.pcie: PCIe link down Unable to handle kernel paging request at virtual address 6b6b6b6b (seeing this message requires earlycon and keep_bootcon). Indeed, pci_free_host_bridge() frees the PCI host bridge, including the embedded rcar_pcie object, so pci_free_resource_list() must not be called afterwards. To fix this, move the call to pci_free_resource_list() up, and update the label name accordingly. Fixes: ddd535f1ea3eb27e ("PCI: rcar: Fix memory leak when no PCIe card is inserted") Signed-off-by: Geert Uytterhoeven Signed-off-by: Bjorn Helgaas Acked-by: Simon Horman Acked-by: Lorenzo Pieralisi --- drivers/pci/host/pcie-rcar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c index 12796eccb2be..52ab3cb0a0bf 100644 --- a/drivers/pci/host/pcie-rcar.c +++ b/drivers/pci/host/pcie-rcar.c @@ -1128,12 +1128,12 @@ static int rcar_pcie_probe(struct platform_device *pdev) err = rcar_pcie_get_resources(pcie); if (err < 0) { dev_err(dev, "failed to request resources: %d\n", err); - goto err_free_bridge; + goto err_free_resource_list; } err = rcar_pcie_parse_map_dma_ranges(pcie, dev->of_node); if (err) - goto err_free_bridge; + goto err_free_resource_list; pm_runtime_enable(dev); err = pm_runtime_get_sync(dev); @@ -1176,9 +1176,9 @@ err_pm_put: err_pm_disable: pm_runtime_disable(dev); -err_free_bridge: - pci_free_host_bridge(bridge); +err_free_resource_list: pci_free_resource_list(&pcie->resources); + pci_free_host_bridge(bridge); return err; } -- cgit v1.2.3 From 283ca526a9bd75aed7350220d7b1f8027d99c3fd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:30 +0100 Subject: bpf: fix corruption on concurrent perf_event_output calls When tracing and networking programs are both attached in the system and both use event-output helpers that eventually call into perf_event_output(), then we could end up in a situation where the tracing attached program runs in user context while a cls_bpf program is triggered on that same CPU out of softirq context. Since both rely on the same per-cpu perf_sample_data, we could potentially corrupt it. This can only ever happen in a combination of the two types; all tracing programs use a bpf_prog_active counter to bail out in case a program is already running on that CPU out of a different context. XDP and cls_bpf programs by themselves don't have this issue as they run in the same context only. Therefore, split both perf_sample_data so they cannot be accessed from each other. Fixes: 20b9d7ac4852 ("bpf: avoid excessive stack usage for perf_sample_data") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Song Liu Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/trace/bpf_trace.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0ce99c379c30..40207c2a4113 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, - u64 flags, struct perf_raw_record *raw) + u64 flags, struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; @@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; - perf_sample_data_init(sd, 0, 0); - sd->raw = raw; perf_event_output(event, sd, regs); return 0; } @@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); struct perf_raw_record raw = { .frag = { .size = size, @@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; - return __bpf_perf_event_output(regs, map, flags, &raw); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; + + return __bpf_perf_event_output(regs, map, flags, sd); } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { }; static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); struct perf_raw_frag frag = { .copy = ctx_copy, @@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, }; perf_fetch_caller_regs(regs); + perf_sample_data_init(sd, 0, 0); + sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, &raw); + return __bpf_perf_event_output(regs, map, flags, sd); } BPF_CALL_0(bpf_get_current_task) -- cgit v1.2.3 From a23f06f06dbe54696e8d4f156b317e8c9961c345 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:31 +0100 Subject: bpf: fix build issues on um due to mising bpf_perf_event.h Since c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build on i386 or x86_64: [...] CC init/main.o In file included from ../include/linux/perf_event.h:18:0, from ../include/linux/trace_events.h:10, from ../include/trace/syscall.h:7, from ../include/linux/syscalls.h:82, from ../init/main.c:20: ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error: asm/bpf_perf_event.h: No such file or directory #include [...] Lets add missing bpf_perf_event.h also to um arch. This seems to be the only one still missing. Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type") Reported-by: Randy Dunlap Suggested-by: Richard Weinberger Signed-off-by: Daniel Borkmann Tested-by: Randy Dunlap Cc: Hendrik Brueckner Cc: Richard Weinberger Acked-by: Alexei Starovoitov Acked-by: Richard Weinberger Signed-off-by: Alexei Starovoitov --- arch/um/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 50a32c33d729..73c57f614c9e 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += barrier.h +generic-y += bpf_perf_event.h generic-y += bug.h generic-y += clkdev.h generic-y += current.h -- cgit v1.2.3 From 720f228e8d3128b7ab1d39f51fdd8da07a7640c9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:32 +0100 Subject: bpf: fix broken BPF selftest build At least on x86_64, the kernel's BPF selftests seemed to have stopped to build due to 618e165b2a8e ("selftests/bpf: sync kernel headers and introduce arch support in Makefile"): [...] In file included from test_verifier.c:29:0: ../../../include/uapi/linux/bpf_perf_event.h:11:32: fatal error: asm/bpf_perf_event.h: No such file or directory #include ^ compilation terminated. [...] While pulling in tools/arch/*/include/uapi/asm/bpf_perf_event.h seems to work fine, there's no automated fall-back logic right now that would do the same out of tools/include/uapi/asm-generic/bpf_perf_event.h. The usual convention today is to add a include/[uapi/]asm/ equivalent that would pull in the correct arch header or generic one as fall-back, all ifdef'ed based on compiler target definition. It's similarly done also in other cases such as tools/include/asm/barrier.h, thus adapt the same here. Fixes: 618e165b2a8e ("selftests/bpf: sync kernel headers and introduce arch support in Makefile") Signed-off-by: Daniel Borkmann Cc: Hendrik Brueckner Cc: Arnaldo Carvalho de Melo Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- tools/include/uapi/asm/bpf_perf_event.h | 7 +++++++ tools/testing/selftests/bpf/Makefile | 13 +------------ 2 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 tools/include/uapi/asm/bpf_perf_event.h diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..13a58531e6fa --- /dev/null +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,7 @@ +#if defined(__aarch64__) +#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__s390__) +#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" +#else +#include +#endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 21a2d76b67dc..792af7c3b74f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,19 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -endif -include $(srctree)/tools/scripts/Makefile.arch - -$(call detected_var,SRCARCH) - LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf APIDIR := ../../../include/uapi -ASMDIR:= ../../../arch/$(ARCH)/include/uapi GENDIR := ../../../../include/generated GENHDR := $(GENDIR)/autoconf.h @@ -21,7 +10,7 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(ASMDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ -- cgit v1.2.3 From a03fe72572c12e98f4173f8a535f32468e48b6ec Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:51:35 +0000 Subject: MIPS: Factor out NT_PRFPREG regset access helpers In preparation to fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") FCSR access regression factor out NT_PRFPREG regset access helpers for the non-MSA and the MSA variants respectively, to avoid having to deal with excessive indentation in the actual fix. No functional change, however use `target->thread.fpu.fpr[0]' rather than `target->thread.fpu.fpr[i]' for FGR holding type size determination as there's no `i' variable to refer to anymore, and for the factored out `i' variable declaration use `unsigned int' rather than `unsigned' as its type, following the common style. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.15+ Patchwork: https://patchwork.linux-mips.org/patch/17925/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 108 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index efbd8df8b665..62e8ffd9370a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -419,25 +419,36 @@ static int gpr64_set(struct task_struct *target, #endif /* CONFIG_64BIT */ -static int fpr_get(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots + * correspond 1:1 to buffer slots. + */ +static int fpr_get_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) { - unsigned i; - int err; - u64 fpr_val; - - /* XXX fcr31 */ + return user_regset_copyout(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's + * general register slots are copied to buffer slots. + */ +static int fpr_get_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + err = user_regset_copyout(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -447,27 +458,54 @@ static int fpr_get(struct task_struct *target, return 0; } -static int fpr_set(struct task_struct *target, +/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) + void *kbuf, void __user *ubuf) { - unsigned i; int err; - u64 fpr_val; /* XXX fcr31 */ - init_fp_ctx(target); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + + return err; +} - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP + * context's general register slots. + */ +static int fpr_set_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, sizeof(elf_fpregset_t)); +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 + * bits only of FP context's general register slots. + */ +static int fpr_set_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { - err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); if (err) @@ -478,6 +516,26 @@ static int fpr_set(struct task_struct *target, return 0; } +/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + + /* XXX fcr31 */ + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + + return err; +} + enum mips_regset { REGSET_GPR, REGSET_FPR, -- cgit v1.2.3 From dc24d0edf33c3e15099688b6bbdf7bdc24bf6e91 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:52:15 +0000 Subject: MIPS: Guard against any partial write attempt with PTRACE_SETREGSET Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and ensure that no partial register write attempt is made with PTRACE_SETREGSET, as we do not preinitialize any temporaries used to hold incoming register data and consequently random data could be written. It is the responsibility of the caller, such as `ptrace_regset', to arrange for writes to span whole registers only, so here we only assert that it has indeed happened. Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.15+ Patchwork: https://patchwork.linux-mips.org/patch/17926/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 62e8ffd9370a..7fcadaaf330f 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -516,7 +516,15 @@ static int fpr_set_msa(struct task_struct *target, return 0; } -/* Copy the supplied NT_PRFPREG buffer to the floating-point context. */ +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', + * which is supposed to have been guaranteed by the kernel before + * calling us, e.g. in `ptrace_regset'. We enforce that requirement, + * so that we can safely avoid preinitializing temporaries for + * partial register writes. + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -524,6 +532,8 @@ static int fpr_set(struct task_struct *target, { int err; + BUG_ON(count % sizeof(elf_fpreg_t)); + /* XXX fcr31 */ init_fp_ctx(target); -- cgit v1.2.3 From 80b3ffce0196ea50068885d085ff981e4b8396f4 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:53:14 +0000 Subject: MIPS: Consistently handle buffer counter with PTRACE_SETREGSET Update commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") bug and consistently consume all data supplied to `fpr_set_msa' with the ptrace(2) PTRACE_SETREGSET request, such that a zero data buffer counter is returned where insufficient data has been given to fill a whole number of FP general registers. In reality this is not going to happen, as the caller is supposed to only supply data covering a whole number of registers and it is verified in `ptrace_regset' and again asserted in `fpr_set', however structuring code such that the presence of trailing partial FP general register data causes `fpr_set_msa' to return with a non-zero data buffer counter makes it appear that this trailing data will be used if there are subsequent writes made to FP registers, which is going to be the case with the FCSR once the missing write to that register has been fixed. Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Signed-off-by: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.11+ Patchwork: https://patchwork.linux-mips.org/patch/17927/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 7fcadaaf330f..47a01d5f26ea 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -504,7 +504,7 @@ static int fpr_set_msa(struct task_struct *target, int err; BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && *count >= sizeof(elf_fpreg_t); i++) { + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { err = user_regset_copyin(pos, count, kbuf, ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); -- cgit v1.2.3 From be07a6a1188372b6d19a3307ec33211fc9c9439d Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:54:33 +0000 Subject: MIPS: Fix an FCSR access API regression with NT_PRFPREG and MSA Fix a commit 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") public API regression, then activated by commit 1db1af84d6df ("MIPS: Basic MSA context switching support"), that caused the FCSR register not to be read or written for CONFIG_CPU_HAS_MSA kernel configurations (regardless of actual presence or absence of the MSA feature in a given processor) with ptrace(2) PTRACE_GETREGSET and PTRACE_SETREGSET requests nor recorded in core dumps. This is because with !CONFIG_CPU_HAS_MSA configurations the whole of `elf_fpregset_t' array is bulk-copied as it is, which includes the FCSR in one half of the last, 33rd slot, whereas with CONFIG_CPU_HAS_MSA configurations array elements are copied individually, and then only the leading 32 FGR slots while the remaining slot is ignored. Correct the code then such that only FGR slots are copied in the respective !MSA and MSA helpers an then the FCSR slot is handled separately in common code. Use `ptrace_setfcr31' to update the FCSR too, so that the read-only mask is respected. Retrieving a correct value of FCSR is important in debugging not only for the human to be able to get the right interpretation of the situation, but for correct operation of GDB as well. This is because the condition code bits in FSCR are used by GDB to determine the location to place a breakpoint at when single-stepping through an FPU branch instruction. If such a breakpoint is placed incorrectly (i.e. with the condition reversed), then it will be missed, likely causing the debuggee to run away from the control of GDB and consequently breaking the process of investigation. Fortunately GDB continues using the older PTRACE_GETFPREGS ptrace(2) request which is unaffected, so the regression only really hits with post-mortem debug sessions using a core dump file, in which case execution, and consequently single-stepping through branches is not possible. Of course core files created by buggy kernels out there will have the value of FCSR recorded clobbered, but such core files cannot be corrected and the person using them simply will have to be aware that the value of FCSR retrieved is not reliable. Which also means we can likely get away without defining a replacement API which would ensure a correct value of FSCR to be retrieved, or none at all. This is based on previous work by Alex Smith, extensively rewritten. Signed-off-by: Alex Smith Signed-off-by: James Hogan Signed-off-by: Maciej W. Rozycki Fixes: 72b22bbad1e7 ("MIPS: Don't assume 64-bit FP registers for FP regset") Cc: Paul Burton Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.15+ Patchwork: https://patchwork.linux-mips.org/patch/17928/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 47a01d5f26ea..0a939593ccb7 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -422,7 +422,7 @@ static int gpr64_set(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots - * correspond 1:1 to buffer slots. + * correspond 1:1 to buffer slots. Only general registers are copied. */ static int fpr_get_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -430,13 +430,14 @@ static int fpr_get_fpa(struct task_struct *target, { return user_regset_copyout(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the floating-point context to the supplied NT_PRFPREG buffer, * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's - * general register slots are copied to buffer slots. + * general register slots are copied to buffer slots. Only general + * registers are copied. */ static int fpr_get_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -458,20 +459,29 @@ static int fpr_get_msa(struct task_struct *target, return 0; } -/* Copy the floating-point context to the supplied NT_PRFPREG buffer. */ +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); int err; - /* XXX fcr31 */ - if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); return err; } @@ -479,7 +489,7 @@ static int fpr_get(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP - * context's general register slots. + * context's general register slots. Only general registers are copied. */ static int fpr_set_fpa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -487,13 +497,14 @@ static int fpr_set_fpa(struct task_struct *target, { return user_regset_copyin(pos, count, kbuf, ubuf, &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); } /* * Copy the supplied NT_PRFPREG buffer to the floating-point context, * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 - * bits only of FP context's general register slots. + * bits only of FP context's general register slots. Only general + * registers are copied. */ static int fpr_set_msa(struct task_struct *target, unsigned int *pos, unsigned int *count, @@ -518,6 +529,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -530,18 +543,30 @@ static int fpr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + u32 fcr31; int err; BUG_ON(count % sizeof(elf_fpreg_t)); - /* XXX fcr31 */ - init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); else err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + if (count > 0) { + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + ptrace_setfcr31(target, fcr31); + } return err; } -- cgit v1.2.3 From 006501e039eec411842bb3150c41358867d320c2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:55:40 +0000 Subject: MIPS: Also verify sizeof `elf_fpreg_t' with PTRACE_SETREGSET Complement commit d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") and like with the PTRACE_GETREGSET ptrace(2) request also apply a BUILD_BUG_ON check for the size of the `elf_fpreg_t' type in the PTRACE_SETREGSET request handler. Signed-off-by: Maciej W. Rozycki Fixes: d614fd58a283 ("mips/ptrace: Preserve previous registers for short regset write") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.11+ Patchwork: https://patchwork.linux-mips.org/patch/17929/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0a939593ccb7..256908951a7c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -447,6 +447,7 @@ static int fpr_get_msa(struct task_struct *target, u64 fpr_val; int err; + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); for (i = 0; i < NUM_FPU_REGS; i++) { fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); err = user_regset_copyout(pos, count, kbuf, ubuf, -- cgit v1.2.3 From c8c5a3a24d395b14447a9a89d61586a913840a3b Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 11 Dec 2017 22:56:54 +0000 Subject: MIPS: Disallow outsized PTRACE_SETREGSET NT_PRFPREG regset accesses Complement commit c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") and also reject outsized PTRACE_SETREGSET requests to the NT_PRFPREG regset, like with the NT_PRSTATUS regset. Signed-off-by: Maciej W. Rozycki Fixes: c23b3d1a5311 ("MIPS: ptrace: Change GP regset to use correct core dump register layout") Cc: James Hogan Cc: Paul Burton Cc: Alex Smith Cc: Dave Martin Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.17+ Patchwork: https://patchwork.linux-mips.org/patch/17930/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 256908951a7c..0b23b1ad99e6 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -550,6 +550,9 @@ static int fpr_set(struct task_struct *target, BUG_ON(count % sizeof(elf_fpreg_t)); + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; + init_fp_ctx(target); if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) -- cgit v1.2.3 From 10a6a6975691775bbcc677a04c6fd3120b5c1160 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Tue, 12 Dec 2017 14:40:12 +0100 Subject: Revert "dt-bindings: mtd: add sst25wf040b and en25s64 to sip-nor list" This reverts commit b07815d4eaf658b683c345d6e643895a20d92f29. The reverted commit was merged into v4-15-rc1 by mistake: it was taken from the IMX tree but the patch has never been sent to linux-mtd nor reviewed by any spi-nor maintainers. Actually, it would have been rejected since we add new values for the 'compatible' DT property only for SPI NOR memories that don't support the JEDEC READ ID op code (0x9F). Both en25s64 and sst25wf040b support the JEDEC READ ID op code, hence should use the "jedec,spi-nor" string alone as 'compatible' value. See the following link for more details: http://lists.infradead.org/pipermail/linux-mtd/2017-November/077425.html Signed-off-by: Cyrille Pitchen Acked-by: Marek Vasut --- Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index 376fa2f50e6b..956bb046e599 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -13,7 +13,6 @@ Required properties: at25df321a at25df641 at26df081a - en25s64 mr25h128 mr25h256 mr25h10 @@ -33,7 +32,6 @@ Required properties: s25fl008k s25fl064k sst25vf040b - sst25wf040b m25p40 m25p80 m25p16 -- cgit v1.2.3 From a782fc8cc6bf6909daf3b65630079e2afec316ef Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 1 Dec 2017 18:21:34 +0800 Subject: drm/ttm: fix incorrect calculate on shrink_pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shrink_pages is in unit of Order after ttm_page_pool_free, but it is used by nr_free in next round so need change it into native page unit Signed-off-by: Monk Liu Reviewed-by: Roger He Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 44343a2bf55c..71945ccaf012 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed += (nr_free_pool - shrink_pages) << pool->order; if (freed >= sc->nr_to_scan) break; + shrink_pages <<= pool->order; } mutex_unlock(&lock); return freed; -- cgit v1.2.3 From 13d3fc69a03721d972460fe2bff9b479f7999221 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 1 Dec 2017 18:23:56 +0800 Subject: drm/ttm: max_cpages is in unit of native page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix calculation. Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 71945ccaf012..b5ba6441489f 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -544,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, int r = 0; unsigned i, j, cpages; unsigned npages = 1 << order; - unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC); + unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC); /* allocate array for page caching change */ caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL); -- cgit v1.2.3 From 0507f438ea19d4280006467ba02956f6a693deca Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 23 Nov 2017 18:38:59 +0800 Subject: drm/amdgpu: fix MAP_QUEUES paramter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should be 0. Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da43813d67a4..5aeb5f8816f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); -- cgit v1.2.3 From 9c41e452188339989c2c9ca5fc54f10935207968 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 30 Nov 2017 09:43:57 +0100 Subject: i2c: stm32: Fix copyrights Uniformize STMicroelectronics copyrights headers and add SPDX identifier. Signed-off-by: Benjamin Gaignard Acked-by: Alexandre TORGUE Acked-by: Pierre-Yves MORDRET Acked-by: M'boumba Cedric Madianga Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stm32.h | 3 ++- drivers/i2c/busses/i2c-stm32f4.c | 3 ++- drivers/i2c/busses/i2c-stm32f7.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h index dab51761f8c5..d4f9cef251ac 100644 --- a/drivers/i2c/busses/i2c-stm32.h +++ b/drivers/i2c/busses/i2c-stm32.h @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 /* * i2c-stm32.h * * Copyright (C) M'boumba Cedric Madianga 2017 + * Copyright (C) STMicroelectronics 2017 * Author: M'boumba Cedric Madianga * - * License terms: GNU General Public License (GPL), version 2 */ #ifndef _I2C_STM32_H diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c index 4ec108496f15..47c8d00de53f 100644 --- a/drivers/i2c/busses/i2c-stm32f4.c +++ b/drivers/i2c/busses/i2c-stm32f4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for STMicroelectronics STM32 I2C controller * @@ -6,11 +7,11 @@ * http://www.st.com/resource/en/reference_manual/DM00031020.pdf * * Copyright (C) M'boumba Cedric Madianga 2016 + * Copyright (C) STMicroelectronics 2017 * Author: M'boumba Cedric Madianga * * This driver is based on i2c-st.c * - * License terms: GNU General Public License (GPL), version 2 */ #include diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index d4a6e9c2e9aa..b445b3bb0bb1 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for STMicroelectronics STM32F7 I2C controller * @@ -7,11 +8,11 @@ * http://www.st.com/resource/en/reference_manual/dm00124865.pdf * * Copyright (C) M'boumba Cedric Madianga 2017 + * Copyright (C) STMicroelectronics 2017 * Author: M'boumba Cedric Madianga * * This driver is based on i2c-stm32f4.c * - * License terms: GNU General Public License (GPL), version 2 */ #include #include -- cgit v1.2.3 From 45fd4470ba86e9ca2837b666a52cc65dc69f0fa3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 7 Dec 2017 12:25:45 +0100 Subject: i2c: piix4: Fix port number check on release The port number shift is still hard-coded to 1 while it now depends on the hardware. Thankfully 0 is always 0 no matter how you shift it, so this was a bug without consequences. Signed-off-by: Jean Delvare Fixes: 0fe16195f891 ("i2c: piix4: Fix SMBus port selection for AMD Family 17h chips") Reviewed-by: Guenter Roeck Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 174579d32e5f..462948e2c535 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -983,7 +983,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap) if (adapdata->smba) { i2c_del_adapter(adap); - if (adapdata->port == (0 << 1)) { + if (adapdata->port == (0 << piix4_port_shift_sb800)) { release_region(adapdata->smba, SMBIOSIZE); if (adapdata->sb800_main) release_region(SB800_PIIX4_SMB_IDX, 2); -- cgit v1.2.3 From 9147efcbe0b7cc96b18eb64b1a3f0d4bba81443c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2017 14:22:39 -0800 Subject: bpf: add schedule points to map alloc/free While using large percpu maps, htab_map_alloc() can hold cpu for hundreds of ms. This patch adds cond_resched() calls to percpu alloc/free call sites, all running in process context. Signed-off-by: Eric Dumazet Signed-off-by: Alexei Starovoitov --- kernel/bpf/hashtab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index e469e05c8e83..3905d4bc5b80 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab) pptr = htab_elem_get_ptr(get_htab_elem(htab, i), htab->map.key_size); free_percpu(pptr); + cond_resched(); } free_elems: bpf_map_area_free(htab->elems); @@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab) goto free_elems; htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, pptr); + cond_resched(); } skip_percpu_elems: -- cgit v1.2.3 From 34354d4bf845c85f9795a9f39239ca3aa46c3a94 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Sun, 19 Nov 2017 00:48:23 +0100 Subject: mtd: spi-nor: add support for ISSI is25lp128 Add support for ISSI is25lp128 spi nor flash. Signed-off-by: Angelo Dureghello Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index bc266f70a15b..7139ad6ada4e 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1021,6 +1021,8 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25lp128", INFO(0x9d6018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ) }, /* Macronix */ { "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) }, -- cgit v1.2.3 From 6d17969c8eb454116d906005a6c4752f4f560b26 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 3 Dec 2017 20:36:24 -0200 Subject: dt-bindings: mtd: fsl-quadspi: Pass the qspi clock names In order to improve the bindings documentation, explicitly pass the name of the clocks: "qspi_en" and "qspi", which are mandatory. Signed-off-by: Fabio Estevam Reviewed-by: Rob Herring Signed-off-by: Cyrille Pitchen --- Documentation/devicetree/bindings/mtd/fsl-quadspi.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt index c34aa6f8a424..63d4d626fbd5 100644 --- a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt +++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt @@ -12,7 +12,7 @@ Required properties: - reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory" - interrupts : Should contain the interrupt for the device - clocks : The clocks needed by the QuadSPI controller - - clock-names : the name of the clocks + - clock-names : Should contain the name of the clocks: "qspi_en" and "qspi". Optional properties: - fsl,qspi-has-second-chip: The controller has two buses, bus A and bus B. -- cgit v1.2.3 From 20ccb993f29bd6ad17699dd0b349db086e3ca719 Mon Sep 17 00:00:00 2001 From: "Bean Huo (beanhuo)" Date: Mon, 4 Dec 2017 12:34:47 +0000 Subject: mtd: spi-nor: check FSR error bits for Micron memories For Micron spi nor device, when erase/program operation fails, especially the failure results from intending to modify protected space, spi-nor upper layers still get the return which shows the operation succeeds. This is because current spi_nor_fsr_ready() only uses FSR bit.7 (flag status register) to check device whether ready. This patch fixes this issue by checking relevant error bits in FSR. The FSR is a powerful tool to investigate the status of device, checking information regarding what the memory is actually doing and detecting possible error conditions. Signed-off-by: beanhuo Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 18 ++++++++++++++++-- include/linux/mtd/spi-nor.h | 6 +++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 7139ad6ada4e..07d040ff574b 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -330,8 +330,22 @@ static inline int spi_nor_fsr_ready(struct spi_nor *nor) int fsr = read_fsr(nor); if (fsr < 0) return fsr; - else - return fsr & FSR_READY; + + if (fsr & (FSR_E_ERR | FSR_P_ERR)) { + if (fsr & FSR_E_ERR) + dev_err(nor->dev, "Erase operation failed.\n"); + else + dev_err(nor->dev, "Program operation failed.\n"); + + if (fsr & FSR_PT_ERR) + dev_err(nor->dev, + "Attempted to modify a protected sector.\n"); + + nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0); + return -EIO; + } + + return fsr & FSR_READY; } static int spi_nor_ready(struct spi_nor *nor) diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index d0c66a0975cf..c0836cca5280 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -61,6 +61,7 @@ #define SPINOR_OP_RDSFDP 0x5a /* Read SFDP */ #define SPINOR_OP_RDCR 0x35 /* Read configuration register */ #define SPINOR_OP_RDFSR 0x70 /* Read flag status register */ +#define SPINOR_OP_CLFSR 0x50 /* Clear flag status register */ /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */ #define SPINOR_OP_READ_4B 0x13 /* Read data bytes (low frequency) */ @@ -130,7 +131,10 @@ #define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */ /* Flag Status Register bits */ -#define FSR_READY BIT(7) +#define FSR_READY BIT(7) /* Device status, 0 = Busy, 1 = Ready */ +#define FSR_E_ERR BIT(5) /* Erase operation status */ +#define FSR_P_ERR BIT(4) /* Program operation status */ +#define FSR_PT_ERR BIT(1) /* Protection error bit */ /* Configuration Register bits. */ #define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ -- cgit v1.2.3 From 2666067fdba26a0a87cf50bb38f5a73aabd0f517 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Tue, 5 Dec 2017 12:13:44 -0600 Subject: mtd: spi-nor: Check that BP bits are set properly Previously, the lock and unlock functions returned success even if the BP bits were not actually updated in the status register due to hardware write protection. Introduce write_sr_and_check() to write and read back the status register to ensure the desired BP bits are actually set as requested. Signed-off-by: Joe Schultz Signed-off-by: Aaron Sierra Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 07d040ff574b..1e4b4dfe26b5 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -566,6 +566,27 @@ erase_err: return ret; } +/* Write status register and ensure bits in mask match written values */ +static int write_sr_and_check(struct spi_nor *nor, u8 status_new, u8 mask) +{ + int ret; + + write_enable(nor); + ret = write_sr(nor, status_new); + if (ret) + return ret; + + ret = spi_nor_wait_till_ready(nor); + if (ret) + return ret; + + ret = read_sr(nor); + if (ret < 0) + return ret; + + return ((ret & mask) != (status_new & mask)) ? -EIO : 0; +} + static void stm_get_locked_range(struct spi_nor *nor, u8 sr, loff_t *ofs, uint64_t *len) { @@ -664,7 +685,6 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len) loff_t lock_len; bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB; bool use_top; - int ret; status_old = read_sr(nor); if (status_old < 0) @@ -728,11 +748,7 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len) if ((status_new & mask) < (status_old & mask)) return -EINVAL; - write_enable(nor); - ret = write_sr(nor, status_new); - if (ret) - return ret; - return spi_nor_wait_till_ready(nor); + return write_sr_and_check(nor, status_new, mask); } /* @@ -749,7 +765,6 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len) loff_t lock_len; bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB; bool use_top; - int ret; status_old = read_sr(nor); if (status_old < 0) @@ -816,11 +831,7 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len) if ((status_new & mask) > (status_old & mask)) return -EINVAL; - write_enable(nor); - ret = write_sr(nor, status_new); - if (ret) - return ret; - return spi_nor_wait_till_ready(nor); + return write_sr_and_check(nor, status_new, mask); } /* -- cgit v1.2.3 From 8dee1d971af9af2f7b5f54c2eac4ebd04c5c237c Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Wed, 6 Dec 2017 10:53:41 +0800 Subject: mtd: spi-nor: add an API to restore the status of SPI flash chip Add this API to restore the status of SPI flash chip to the default such as addressing mode, whenever detach the driver from device or reboot the system. Signed-off-by: Hou Zhiqiang Signed-off-by: Cyrille Pitchen --- Documentation/mtd/spi-nor.txt | 3 +++ drivers/mtd/spi-nor/spi-nor.c | 10 ++++++++++ include/linux/mtd/spi-nor.h | 6 ++++++ 3 files changed, 19 insertions(+) diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/mtd/spi-nor.txt index 548d6306ebca..da1fbff5a24c 100644 --- a/Documentation/mtd/spi-nor.txt +++ b/Documentation/mtd/spi-nor.txt @@ -60,3 +60,6 @@ The main API is spi_nor_scan(). Before you call the hook, a driver should initialize the necessary fields for spi_nor{}. Please see drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c when you want to write a new driver for a SPI NOR controller. +Another API is spi_nor_restore(), this is used to restore the status of SPI +flash chip such as addressing mode. Call it whenever detach the driver from +device or reboot the system. diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1e4b4dfe26b5..9178139a39d0 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -2740,6 +2740,16 @@ static void spi_nor_resume(struct mtd_info *mtd) dev_err(dev, "resume() failed\n"); } +void spi_nor_restore(struct spi_nor *nor) +{ + /* restore the addressing mode */ + if ((nor->addr_width == 4) && + (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) && + !(nor->info->flags & SPI_NOR_4B_OPCODES)) + set_4byte(nor, nor->info, 0); +} +EXPORT_SYMBOL_GPL(spi_nor_restore); + int spi_nor_scan(struct spi_nor *nor, const char *name, const struct spi_nor_hwcaps *hwcaps) { diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index c0836cca5280..de36969eb359 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -403,4 +403,10 @@ struct spi_nor_hwcaps { int spi_nor_scan(struct spi_nor *nor, const char *name, const struct spi_nor_hwcaps *hwcaps); +/** + * spi_nor_restore_addr_mode() - restore the status of SPI NOR + * @nor: the spi_nor structure + */ +void spi_nor_restore(struct spi_nor *nor); + #endif -- cgit v1.2.3 From 59b356ffd0b00ed986c0aa1b401dd9b466ee619d Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Wed, 6 Dec 2017 10:53:42 +0800 Subject: mtd: m25p80: restore the status of SPI flash when exiting Restore the status to be compatible with legacy devices. Take Freescale eSPI boot for example, it copies (in 3 Byte addressing mode) the RCW and bootloader images from SPI flash without firing a reset signal previously, so the reboot command will fail without resetting the addressing mode of SPI flash. This patch implements .shutdown function to restore the status in reboot process, and add the same operation to the .remove function. Signed-off-by: Hou Zhiqiang Signed-off-by: Cyrille Pitchen --- drivers/mtd/devices/m25p80.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index dbe6a1de2bb8..a4e18f6aaa33 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -307,10 +307,18 @@ static int m25p_remove(struct spi_device *spi) { struct m25p *flash = spi_get_drvdata(spi); + spi_nor_restore(&flash->spi_nor); + /* Clean up MTD stuff. */ return mtd_device_unregister(&flash->spi_nor.mtd); } +static void m25p_shutdown(struct spi_device *spi) +{ + struct m25p *flash = spi_get_drvdata(spi); + + spi_nor_restore(&flash->spi_nor); +} /* * Do NOT add to this array without reading the following: * @@ -386,6 +394,7 @@ static struct spi_driver m25p80_driver = { .id_table = m25p_ids, .probe = m25p_probe, .remove = m25p_remove, + .shutdown = m25p_shutdown, /* REVISIT: many of these chips have deep power-down modes, which * should clearly be entered on suspend() to minimize power use. -- cgit v1.2.3 From ae35d656d796fa203787455ce59874c6682dc0cf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 16:06:11 +0100 Subject: ACPI: button: Add a debug message when we're sending a LID event I've been debugging some spurious suspend issues on various devices, at least on some devices these spurious suspends are caused by surious LID closed events being send to userspace. Running e.g. evemu-record after noticing a spurious suspend is too late to detect that a LID closed event it the (probable) cause of this. This commit adds an acpi_handle_debug call to help debugging this. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index bf8e4d371fa7..aac81f40e28e 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -210,6 +210,8 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state) } /* Send the platform triggered reliable event */ if (do_update) { + acpi_handle_debug(device->handle, "ACPI LID %s\n", + state ? "open" : "closed"); input_report_switch(button->input, SW_LID, !state); input_sync(button->input); button->last_state = !!state; -- cgit v1.2.3 From 9e811e19a7aca16d40e1ce0c68e9bbffea1b9810 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Nov 2017 16:06:12 +0100 Subject: ACPI: button: Add a LID switch blacklist and add 1 model to it The GP-electronic T701 tablet does not have a LID switch, but it does define a LID device in its DSDT. The _LID method points to the "\\_SB.GPO2" pin 0x18 GPIO with a pull setting of "PullDefault", which leaves the pin floating. This causes the ACPI button driver to cause spurious LID closed events, causing the device to suspend while the user is using it. There is nothing the ACPI button driver (or the gpio code) can do to fix this, so the only solution is to add a DMI based blacklist and ignore the LID device on these tablets. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index aac81f40e28e..e1eee7a60fad 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #define PREFIX "ACPI: " @@ -76,6 +77,22 @@ static const struct acpi_device_id button_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, button_device_ids); +/* + * Some devices which don't even have a lid in anyway have a broken _LID + * method (e.g. pointing to a floating gpio pin) causing spurious LID events. + */ +static const struct dmi_system_id lid_blacklst[] = { + { + /* GP-electronic T701 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "T701"), + DMI_MATCH(DMI_BIOS_VERSION, "BYT70A.YNCHENG.WIN.007"), + }, + }, + {} +}; + static int acpi_button_add(struct acpi_device *device); static int acpi_button_remove(struct acpi_device *device); static void acpi_button_notify(struct acpi_device *device, u32 event); @@ -475,6 +492,9 @@ static int acpi_button_add(struct acpi_device *device) char *name, *class; int error; + if (!strcmp(hid, ACPI_BUTTON_HID_LID) && dmi_check_system(lid_blacklst)) + return -ENODEV; + button = kzalloc(sizeof(struct acpi_button), GFP_KERNEL); if (!button) return -ENOMEM; -- cgit v1.2.3 From e8894f5578d62f7137a3c4139945e61148728471 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 29 Nov 2017 10:05:02 +0530 Subject: ACPI / PMIC: constify platform_device_id platform_device_id are not supposed to change at runtime. All functions working with platform_device_id provided by work with const platform_device_id. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pmic/intel_pmic_bxtwc.c | 2 +- drivers/acpi/pmic/intel_pmic_chtwc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pmic/intel_pmic_bxtwc.c b/drivers/acpi/pmic/intel_pmic_bxtwc.c index 90011aad4d20..2012d1d87dc3 100644 --- a/drivers/acpi/pmic/intel_pmic_bxtwc.c +++ b/drivers/acpi/pmic/intel_pmic_bxtwc.c @@ -400,7 +400,7 @@ static int intel_bxtwc_pmic_opregion_probe(struct platform_device *pdev) &intel_bxtwc_pmic_opregion_data); } -static struct platform_device_id bxt_wc_opregion_id_table[] = { +static const struct platform_device_id bxt_wc_opregion_id_table[] = { { .name = "bxt_wcove_region" }, {}, }; diff --git a/drivers/acpi/pmic/intel_pmic_chtwc.c b/drivers/acpi/pmic/intel_pmic_chtwc.c index 85636d7a9d39..813b829e1c24 100644 --- a/drivers/acpi/pmic/intel_pmic_chtwc.c +++ b/drivers/acpi/pmic/intel_pmic_chtwc.c @@ -260,7 +260,7 @@ static int intel_cht_wc_pmic_opregion_probe(struct platform_device *pdev) &intel_cht_wc_pmic_opregion_data); } -static struct platform_device_id cht_wc_opregion_id_table[] = { +static const struct platform_device_id cht_wc_opregion_id_table[] = { { .name = "cht_wcove_region" }, {}, }; -- cgit v1.2.3 From 0f27cff8597d86f881ea8274b49b63b678c14a3c Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 30 Nov 2017 15:05:59 -0500 Subject: ACPI: sysfs: Make ACPI GPE mask kernel parameter cover all GPEs The acpi_mask_gpe= kernel parameter documentation states that the range of mask is 128 GPEs (0x00 to 0x7F). The acpi_masked_gpes mask is a u64 so only 64 GPEs (0x00 to 0x3F) can really be masked. Use a bitmap of size 0xFF instead of a u64 for the GPE mask so 256 GPEs can be masked. Fixes: 9c4aa1eecb48 (ACPI / sysfs: Provide quirk mechanism to prevent GPE flooding) Signed-off-by: Prarit Bharava Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 1 - drivers/acpi/sysfs.c | 26 ++++++++----------------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..89ba74761180 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -114,7 +114,6 @@ This facility can be used to prevent such uncontrolled GPE floodings. Format: - Support masking of GPEs numbered from 0x00 to 0x7f. acpi_no_auto_serialize [HW,ACPI] Disable auto-serialization of AML methods diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 06a150bb35bf..4fc59c3bc673 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -816,14 +816,8 @@ end: * interface: * echo unmask > /sys/firmware/acpi/interrupts/gpe00 */ - -/* - * Currently, the GPE flooding prevention only supports to mask the GPEs - * numbered from 00 to 7f. - */ -#define ACPI_MASKABLE_GPE_MAX 0x80 - -static u64 __initdata acpi_masked_gpes; +#define ACPI_MASKABLE_GPE_MAX 0xFF +static DECLARE_BITMAP(acpi_masked_gpes_map, ACPI_MASKABLE_GPE_MAX) __initdata; static int __init acpi_gpe_set_masked_gpes(char *val) { @@ -831,7 +825,7 @@ static int __init acpi_gpe_set_masked_gpes(char *val) if (kstrtou8(val, 0, &gpe) || gpe > ACPI_MASKABLE_GPE_MAX) return -EINVAL; - acpi_masked_gpes |= ((u64)1< Date: Tue, 5 Dec 2017 18:46:39 +0100 Subject: ACPI: battery: Drop redundant test for failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merging the two adjacent conditionally built blocks makes the code a lot more readable. And as a bonus, we drop a duplicate test when CONFIG_ACPI_PROCFS_POWER is undefined. Signed-off-by: Bjørn Mork Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 13e7b56e33ae..f2eb6c37ea0a 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -1237,13 +1237,11 @@ static int acpi_battery_add(struct acpi_device *device) #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); -#endif if (result) { -#ifdef CONFIG_ACPI_PROCFS_POWER acpi_battery_remove_fs(device); -#endif goto fail; } +#endif printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n", ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device), -- cgit v1.2.3 From 001d50c9a14f1fab94b329161a1db9235b4e60da Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 12:54:28 +0100 Subject: PM / Domains: Remove obsolete "samsung,power-domain" check Currently the generic PM Domain code code checks for the presence of both (generic) "power-domains" and (Samsung Exynos legacy) "samsung,power-domain" properties in all device tree nodes representing devices. There are two issues with this: 1. This imposes a small boot-time penalty on all platforms using DT, 2. Platform-specific checks do not really belong in core framework code. Remove the platform-specific check, as the last user of "samsung,power-domain" was removed in commit 46dcf0ff0de35da8 ("ARM: dts: exynos: Remove exynos4415.dtsi"). All other users were converted before in commit 0da6587041363033 ("ARM: dts: convert to generic power domain bindings for exynos DT"). Signed-off-by: Geert Uytterhoeven Acked-by: Ulf Hansson Acked-by: Krzysztof Kozlowski Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 0c80bea05bcb..f9dcc981b6b9 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2199,20 +2199,8 @@ int genpd_dev_pm_attach(struct device *dev) ret = of_parse_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells", 0, &pd_args); - if (ret < 0) { - if (ret != -ENOENT) - return ret; - - /* - * Try legacy Samsung-specific bindings - * (for backwards compatibility of DT ABI) - */ - pd_args.args_count = 0; - pd_args.np = of_parse_phandle(dev->of_node, - "samsung,power-domain", 0); - if (!pd_args.np) - return -ENOENT; - } + if (ret < 0) + return ret; mutex_lock(&gpd_list_lock); pd = genpd_get_from_provider(&pd_args); -- cgit v1.2.3 From 1d0d064307cbfd8546841f6e9d94d02c55e45e1e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Dec 2017 15:15:19 +0530 Subject: cpufreq: longhaul: Revert transition_delay_us to 200 ms The commit e948bc8fbee0 ("cpufreq: Cap the default transition delay value to 10 ms") caused a regression on EPIA-M min-ITX computer where shutdown or reboot hangs occasionally with a print message like: longhaul: Warning: Timeout while waiting for idle PCI bus cpufreq: __target_index: Failed to change cpu frequency: -16 This probably happens because the cpufreq governor tries to change the frequency of the CPU faster than allowed by the hardware. Before the above commit, the default transition delay was set to 200 ms for a transition_latency of 200000 ns. Lets revert back to that transition delay value to fix it. Note that several other transition delay values were tested like 20 ms and 30 ms and none of them have resolved system hang issue completely. Fixes: e948bc8fbee0 (cpufreq: Cap the default transition delay value to 10 ms) Reported-by: Meelis Roos Suggested-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/longhaul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c index c46a12df40dd..5faa37c5b091 100644 --- a/drivers/cpufreq/longhaul.c +++ b/drivers/cpufreq/longhaul.c @@ -894,7 +894,7 @@ static int longhaul_cpu_init(struct cpufreq_policy *policy) if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) longhaul_setup_voltagescaling(); - policy->cpuinfo.transition_latency = 200000; /* nsec */ + policy->transition_delay_us = 200000; /* usec */ return cpufreq_table_validate_and_show(policy, longhaul_table); } -- cgit v1.2.3 From 7e6a70a57800014743ecfae7023c379388eff121 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 8 Dec 2017 11:56:10 +0900 Subject: PM / core: remove unneeded kallsyms include The file was converted from print_fn_descriptor_symbol() to %pF some time ago (c80cfb0406c01bb "vsprintf: use new vsprintf symbolic function pointer format"). kallsyms does not seem to be needed anymore. Signed-off-by: Sergey Senozhatsky Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c0d5f4a3611d..d8aa88baf9c1 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include -- cgit v1.2.3 From 245fe15e4773d62fe528d5876e73c8ed31c32873 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 8 Dec 2017 11:56:12 +0900 Subject: PNP: remove unneeded kallsyms include The file was converted from print_fn_descriptor_symbol() to %pF some time ago (2e532d68a2b3e2aa {pci,pnp} quirks.c: don't use deprecated print_fn_descriptor_symbol()). kallsyms does not seem to be needed anymore. Signed-off-by: Sergey Senozhatsky Signed-off-by: Rafael J. Wysocki --- drivers/pnp/quirks.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index f054cdddfef8..803666ae3635 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "base.h" static void quirk_awe32_add_ports(struct pnp_dev *dev, -- cgit v1.2.3 From a9596dbc3582c19da0958df25b3852696a0f5ae5 Mon Sep 17 00:00:00 2001 From: Andrew-sh Cheng Date: Fri, 8 Dec 2017 14:07:55 +0800 Subject: cpufreq: mediatek: add mt2712 into compatible list Support mt2712 in mediatek-cpufreq.c Signed-off-by: Andrew-sh Cheng Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mediatek-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index e0d5090b303d..b783919f063d 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -574,6 +574,7 @@ static struct platform_driver mtk_cpufreq_platdrv = { /* List of machines supported by this driver */ static const struct of_device_id mtk_cpufreq_machines[] __initconst = { { .compatible = "mediatek,mt2701", }, + { .compatible = "mediatek,mt2712", }, { .compatible = "mediatek,mt7622", }, { .compatible = "mediatek,mt7623", }, { .compatible = "mediatek,mt817x", }, -- cgit v1.2.3 From 6066998cbd2b1012a8d5bc9a2957cfd0ad53150e Mon Sep 17 00:00:00 2001 From: Andrew-sh Cheng Date: Fri, 8 Dec 2017 14:07:56 +0800 Subject: cpufreq: mediatek: add mediatek related projects into blacklist mediatek projects will use mediate-cpufreq.c as cpufreq driver, instead of using cpufreq_dt.c Add mediatek related projects into cpufreq-dt blacklist Signed-off-by: Andrew-sh Cheng Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index ecc56e26f8f6..3b585e4bfac5 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -108,6 +108,14 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "marvell,armadaxp", }, + { .compatible = "mediatek,mt2701", }, + { .compatible = "mediatek,mt2712", }, + { .compatible = "mediatek,mt7622", }, + { .compatible = "mediatek,mt7623", }, + { .compatible = "mediatek,mt817x", }, + { .compatible = "mediatek,mt8173", }, + { .compatible = "mediatek,mt8176", }, + { .compatible = "nvidia,tegra124", }, { .compatible = "st,stih407", }, -- cgit v1.2.3 From 34fb8f0ba9ceea88e116688f9f53e3802c38aafb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 Dec 2017 00:56:50 +0100 Subject: PM / core: Use dev_pm_skip_next_resume_phases() internally Make the PM core call dev_pm_skip_next_resume_phases() to skip the "early resume" and "resume" phases of system-wide transitions to the working state for a given device instead of clearing the relevant status bits for it directly. No intentional changes in functionality. Signed-off-by: Rafael J. Wysocki Reviewed-by: Geert Uytterhoeven Reviewed-by: Ulf Hansson --- drivers/base/power/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d8aa88baf9c1..cd48b1c69167 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -593,8 +593,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn * device again. */ pm_runtime_set_suspended(dev); - dev->power.is_late_suspended = false; - dev->power.is_suspended = false; + dev_pm_skip_next_resume_phases(dev); } Out: -- cgit v1.2.3 From a248efb3d634854dfcf43a165003597542e90d11 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 30 Nov 2017 12:57:00 +0100 Subject: bus: simple-pm-bus: convert bool SIMPLE_PM_BUS to tristate The Kconfig currently controlling compilation of this code is: config SIMPLE_PM_BUS bool "Simple Power-Managed Bus Driver" ...meaning that it currently is not being built as a module by anyone. In removing the orphaned modular support in a previous patch set, Geert indicated he'd rather see this code converted to tristate. I normally don't do that because it extends functionality that I can't easily run time test or even know if the use case makes sense, but since in this case the author has nominated it as such, we do the conversion here. Note that doesn't change the lack of run time testing ; this change is only tested for sucessful compile and modpost. [geert: Ethernet is probed successfully on sh73a0/kzm9g after insmodding simple-pm-bus.ko] Signed-off-by: Paul Gortmaker Tested-by: Geert Uytterhoeven Signed-off-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Rafael J. Wysocki --- drivers/bus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index dc7b3c7b7d42..57e011d36a79 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -120,7 +120,7 @@ config QCOM_EBI2 SRAM, ethernet adapters, FPGAs and LCD displays. config SIMPLE_PM_BUS - bool "Simple Power-Managed Bus Driver" + tristate "Simple Power-Managed Bus Driver" depends on OF && PM help Driver for transparent busses that don't need a real driver, but -- cgit v1.2.3 From 964728f9f407eca0b417fdf8e784b7a76979490c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Nov 2017 11:12:58 +0100 Subject: USB: chipidea: msm: fix ulpi-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at the parent rather than just matching on its children. Note that the original premature free of the parent node has already been fixed separately, but that fix was apparently never backported to stable. Fixes: 47654a162081 ("usb: chipidea: msm: Restore wrapper settings after reset") Fixes: b74c43156c0c ("usb: chipidea: msm: ci_hdrc_msm_probe() missing of_node_get()") Cc: stable # 4.10: b74c43156c0c Cc: Stephen Boyd Cc: Frank Rowand Signed-off-by: Johan Hovold Signed-off-by: Peter Chen --- drivers/usb/chipidea/ci_hdrc_msm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c index 3593ce0ec641..880009987460 100644 --- a/drivers/usb/chipidea/ci_hdrc_msm.c +++ b/drivers/usb/chipidea/ci_hdrc_msm.c @@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev) if (ret) goto err_mux; - ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi"); + ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi"); if (ulpi_node) { phy_node = of_get_next_available_child(ulpi_node, NULL); ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy"); -- cgit v1.2.3 From f41d84dddc66b164ac16acf3f584c276146f1c48 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 12 Dec 2017 17:59:15 +0530 Subject: powerpc/perf: Dereference BHRB entries safely It's theoretically possible that branch instructions recorded in BHRB (Branch History Rolling Buffer) entries have already been unmapped before they are processed by the kernel. Hence, trying to dereference such memory location will result in a crash. eg: Unable to handle kernel paging request for data at address 0xd000000019c41764 Faulting instruction address: 0xc000000000084a14 NIP [c000000000084a14] branch_target+0x4/0x70 LR [c0000000000eb828] record_and_restart+0x568/0x5c0 Call Trace: [c0000000000eb3b4] record_and_restart+0xf4/0x5c0 (unreliable) [c0000000000ec378] perf_event_interrupt+0x298/0x460 [c000000000027964] performance_monitor_exception+0x54/0x70 [c000000000009ba4] performance_monitor_common+0x114/0x120 Fix it by deferefencing the addresses safely. Fixes: 691231846ceb ("powerpc/perf: Fix setting of "to" addresses for BHRB") Cc: stable@vger.kernel.org # v3.10+ Suggested-by: Naveen N. Rao Signed-off-by: Ravi Bangoria Reviewed-by: Naveen N. Rao [mpe: Use probe_kernel_read() which is clearer, tweak change log] Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 153812966365..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); -- cgit v1.2.3 From ad2b6e01024ef23bddc3ce0bcb115ecd8c520b7e Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Tue, 5 Dec 2017 11:00:38 +0530 Subject: powerpc/perf/imc: Fix nest-imc cpuhotplug callback failure Oops is observed during boot: Faulting instruction address: 0xc000000000248340 cpu 0x0: Vector: 380 (Data Access Out of Range) at [c000000ff66fb850] pc: c000000000248340: event_function_call+0x50/0x1f0 lr: c00000000024878c: perf_remove_from_context+0x3c/0x100 sp: c000000ff66fbad0 msr: 9000000000009033 dar: 7d20e2a6f92d03c0 pid = 14, comm = cpuhp/0 While registering the cpuhotplug callbacks for nest-imc, if we fail in the cpuhotplug online path for any random node in a multi node system (because the opal call to stop nest-imc counters fails for that node), ppc_nest_imc_cpu_offline() will get invoked for other nodes who successfully returned from cpuhotplug online path. This call trace is generated since in the ppc_nest_imc_cpu_offline() path we are trying to migrate the event context, when nest-imc counters are not even initialized. Patch to add a check to ensure that nest-imc is registered before migrating the event context. Fixes: 885dcd709ba9 ("powerpc/perf: Add nest IMC PMU support") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..f1b940714d65 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) return 0; + /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. -- cgit v1.2.3 From 110df8bd3e418b3476cae80babe8add48a8ea523 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Thu, 7 Dec 2017 22:53:27 +0530 Subject: powerpc/perf: Fix kfree memory allocated for nest pmus imc_common_cpuhp_mem_free() is the common function for all IMC (In-memory Collection counters) domains to unregister cpuhotplug callback and free memory. Since kfree of memory allocated for nest-imc (per_nest_pmu_arr) is in the common code, all domains (core/nest/thread) can do the kfree in the failure case. This could potentially create a call trace as shown below, where core(/thread/nest) imc pmu initialization fails and in the failure path imc_common_cpuhp_mem_free() free the memory(per_nest_pmu_arr), which is allocated by successfully registered nest units. The call trace is generated in a scenario where core-imc initialization is made to fail and a cpuhotplug is performed in a p9 system. During cpuhotplug ppc_nest_imc_cpu_offline() tries to access per_nest_pmu_arr, which is already freed by core-imc. NIP [c000000000cb6a94] mutex_lock+0x34/0x90 LR [c000000000cb6a88] mutex_lock+0x28/0x90 Call Trace: mutex_lock+0x28/0x90 (unreliable) perf_pmu_migrate_context+0x90/0x3a0 ppc_nest_imc_cpu_offline+0x190/0x1f0 cpuhp_invoke_callback+0x160/0x820 cpuhp_thread_fun+0x1bc/0x270 smpboot_thread_fn+0x250/0x290 kthread+0x1a8/0x1b0 ret_from_kernel_thread+0x5c/0x74 To address this scenario do the kfree(per_nest_pmu_arr) only in case of nest-imc initialization failure, and when there is no other nest units registered. Fixes: 73ce9aec65b1 ("powerpc/perf: Fix IMC_MAX_PMU macro") Signed-off-by: Anju T Sudhakar Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/imc-pmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index f1b940714d65..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1184,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1208,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1322,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } -- cgit v1.2.3 From a5f1005517534aeb1fac20180badfbf0896c183c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 1 Dec 2017 18:47:32 +0100 Subject: s390/pci: handle insufficient resources during dma tlb flush In a virtualized setup lazy flushing can lead to the hypervisor running out of resources when lots of guest pages need to be pinned. In this situation simply trigger a global flush to give the hypervisor a chance to free some of these resources. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Reviewed-by: Pierre Morel Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 21 +++++++++++++++++++-- arch/s390/pci/pci_insn.c | 3 +++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f7aa5a77827e..2d15d84c20ed 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -181,6 +181,9 @@ out_unlock: static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, size_t size, int flags) { + unsigned long irqflags; + int ret; + /* * With zdev->tlb_refresh == 0, rpcit is not required to establish new * translations when previously invalid translation-table entries are @@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, return 0; } - return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); + ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); + if (ret == -ENOMEM && !s390_iommu_strict) { + /* enable the hypervisor to free some resources */ + if (zpci_refresh_global(zdev)) + goto out; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); + ret = 0; + } +out: + return ret; } static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 19bcb3b45a70..f069929e8211 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) if (cc) zpci_err_insn(cc, status, addr, range); + if (cc == 1 && (status == 4 || status == 16)) + return -ENOMEM; + return (cc) ? -EIO : 0; } -- cgit v1.2.3 From faa75e147b583417273902552c61cf3250a44308 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Wed, 13 Dec 2017 18:36:47 +0800 Subject: arm64: fault: avoid send SIGBUS two times do_sea() calls arm64_notify_die() which will always signal user-space. It also returns whether APEI claimed the external abort as a RAS notification. If it returns failure do_mem_abort() will signal user-space too. do_mem_abort() wants to know if we handled the error, we always call arm64_notify_die() so can always return success. Signed-off-by: Dongjiu Geng Reviewed-by: James Morse Reviewed-by: Xie XiuQi Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 22168cd0dde7..9b7f89df49db 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; - int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ret = ghes_notify_sea(); + ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return ret; + return 0; } static const struct fault_info fault_info[] = { -- cgit v1.2.3 From 92ccc262e485781ff4c0fb3b7c77a619282df49a Mon Sep 17 00:00:00 2001 From: Mengting Zhang Date: Tue, 12 Dec 2017 18:16:57 +0000 Subject: tools/lib/lockdep: Add missing declaration of 'pr_cont()' Commit: 681fbec881de ("lockdep: Use consistent printing primitives") has moved lockdep away from using printk() for printing. The commit added usage of pr_cont() which wasn't wrapped in the userspace headers, causing the following warning for the liblockdep build: ../../../kernel/locking/lockdep.c:3544:2: warning: implicit declaration of function 'pr_cont' [-Wimplicit-function-declaration] Adding an empty declaration of 'pr_cont' fixes the problem. Signed-off-by: Mengting Zhang Signed-off-by: Sasha Levin Reviewed-by: Alexander Sverdlin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: a.p.zijlstra@chello.nl Link: http://lkml.kernel.org/r/20171212181644.11913-2-alexander.levin@verizon.com Signed-off-by: Ingo Molnar --- tools/include/linux/lockdep.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index 940c1b075659..6b0c36a58fcb 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -48,6 +48,7 @@ static inline int debug_locks_off(void) #define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define pr_warn pr_err +#define pr_cont pr_err #define list_del_rcu list_del -- cgit v1.2.3 From cf4df407e0d7cde60a45369c2a3414d18e2d4fdd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 13 Dec 2017 11:59:39 +0100 Subject: Revert "USB: core: only clean up what we allocated" This reverts commit 32fd87b3bbf5f7a045546401dfe2894dbbf4d8c3. Alan wrote a better fix for this... Cc: Andrey Konovalov Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 93b38471754e..55b198ba629b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -764,21 +764,18 @@ void usb_destroy_configuration(struct usb_device *dev) return; if (dev->rawdescriptors) { - for (i = 0; i < dev->descriptor.bNumConfigurations && - i < USB_MAXCONFIG; i++) + for (i = 0; i < dev->descriptor.bNumConfigurations; i++) kfree(dev->rawdescriptors[i]); kfree(dev->rawdescriptors); dev->rawdescriptors = NULL; } - for (c = 0; c < dev->descriptor.bNumConfigurations && - c < USB_MAXCONFIG; c++) { + for (c = 0; c < dev->descriptor.bNumConfigurations; c++) { struct usb_host_config *cf = &dev->config[c]; kfree(cf->string); - for (i = 0; i < cf->desc.bNumInterfaces && - i < USB_MAXINTERFACES; i++) { + for (i = 0; i < cf->desc.bNumInterfaces; i++) { if (cf->intf_cache[i]) kref_put(&cf->intf_cache[i]->ref, usb_release_interface_cache); -- cgit v1.2.3 From 48a4ff1c7bb5a32d2e396b03132d20d552c0eca7 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 12 Dec 2017 14:25:13 -0500 Subject: USB: core: prevent malicious bNumInterfaces overflow A malicious USB device with crafted descriptors can cause the kernel to access unallocated memory by setting the bNumInterfaces value too high in a configuration descriptor. Although the value is adjusted during parsing, this adjustment is skipped in one of the error return paths. This patch prevents the problem by setting bNumInterfaces to 0 initially. The existing code already sets it to the proper value after parsing is complete. Signed-off-by: Alan Stern Reported-by: Andrey Konovalov CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 55b198ba629b..78e92d29f8d9 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -555,6 +555,9 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, unsigned iad_num = 0; memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); + nintf = nintf_orig = config->desc.bNumInterfaces; + config->desc.bNumInterfaces = 0; // Adjusted later + if (config->desc.bDescriptorType != USB_DT_CONFIG || config->desc.bLength < USB_DT_CONFIG_SIZE || config->desc.bLength > size) { @@ -568,7 +571,6 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx, buffer += config->desc.bLength; size -= config->desc.bLength; - nintf = nintf_orig = config->desc.bNumInterfaces; if (nintf > USB_MAXINTERFACES) { dev_warn(ddev, "config %d has too many interfaces: %d, " "using maximum allowed: %d\n", -- cgit v1.2.3 From 81e30b189f593afbf10a7bf47f18f030f8aea3b5 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 Dec 2017 10:28:10 +0100 Subject: regmap: rename regmap_lock_unlock_empty() to regmap_lock_unlock_none() Minor naming convention tweak. Suggested-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 72917b2fc10e..54b1aa371c61 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -459,7 +459,7 @@ static void regmap_unlock_hwlock_irqrestore(void *__map) } #endif -static void regmap_lock_unlock_empty(void *__map) +static void regmap_lock_unlock_none(void *__map) { } @@ -675,7 +675,7 @@ struct regmap *__regmap_init(struct device *dev, } if (config->disable_locking) { - map->lock = map->unlock = regmap_lock_unlock_empty; + map->lock = map->unlock = regmap_lock_unlock_none; } else if (config->lock && config->unlock) { map->lock = config->lock; map->unlock = config->unlock; -- cgit v1.2.3 From b0a858a47a7889757dbc9ac9872685955eaa5cc0 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 12 Dec 2017 18:09:15 +0000 Subject: ASoC: rsnd: Add device tree support for r8a774[35] Document r8a774[35] specific compatible strings. The Renesas RZ/G1[ME] (r8a774[35]) sound modules are identical to the R-Car Gen2 family. No driver change is needed as the fallback compatible string "renesas,rcar_sound-gen2" activates the right code in the driver. Signed-off-by: Biju Das Reviewed-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/renesas,rsnd.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt index 085bec364caf..b3c28bdcc268 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt +++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt @@ -4,7 +4,7 @@ Renesas R-Car sound * Modules ============================================= -Renesas R-Car sound is constructed from below modules +Renesas R-Car and RZ/G sound is constructed from below modules (for Gen2 or later) SCU : Sampling Rate Converter Unit @@ -334,9 +334,11 @@ Required properties: - compatible : "renesas,rcar_sound-", fallbacks "renesas,rcar_sound-gen1" if generation1, and - "renesas,rcar_sound-gen2" if generation2 + "renesas,rcar_sound-gen2" if generation2 (or RZ/G1) "renesas,rcar_sound-gen3" if generation3 Examples with soctypes are: + - "renesas,rcar_sound-r8a7743" (RZ/G1M) + - "renesas,rcar_sound-r8a7745" (RZ/G1E) - "renesas,rcar_sound-r8a7778" (R-Car M1A) - "renesas,rcar_sound-r8a7779" (R-Car H1) - "renesas,rcar_sound-r8a7790" (R-Car H2) -- cgit v1.2.3 From ac0bf025d2c0e88097f0ab247e7460053fef7e9d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 11 Dec 2017 06:35:20 -0500 Subject: ima: Use i_version only when filesystem supports it i_version is only supported by a filesystem when the SB_I_VERSION flag is set. This patch tests for the SB_I_VERSION flag before using i_version. If we can't use i_version to detect a file change then we must assume the file has changed in the last_writer path and remeasure it. On filesystems without i_version support IMA used to measure a file only once and didn't detect any changes to a file. With this patch IMA now works properly on these filesystems. Signed-off-by: Sascha Hauer Reviewed-by: Jeff Layton Signed-off-by: Jeff Layton --- security/integrity/ima/ima_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 770654694efc..50b82599994d 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -127,7 +127,8 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, inode_lock(inode); if (atomic_read(&inode->i_writecount) == 1) { - if ((iint->version != inode->i_version) || + if (!IS_I_VERSION(inode) || + (iint->version != inode->i_version) || (iint->flags & IMA_NEW_FILE)) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; -- cgit v1.2.3 From 1714196c7ec540292324d9022c43b5f281dbf74c Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:03 -0600 Subject: ASoC: tlv320aic32x4: Use AIC32X4_REG macro for all register definitions All register definitions should use the AIC32X4_REG macro, even the ones in page 0. This makes datasheet lookup more consistent and helps with alignment both in this file and across other tlv320aic* drivers. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.h | 152 ++++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 75 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.h b/sound/soc/codecs/tlv320aic32x4.h index da7cec482bcb..936bb7a1b5c8 100644 --- a/sound/soc/codecs/tlv320aic32x4.h +++ b/sound/soc/codecs/tlv320aic32x4.h @@ -19,81 +19,83 @@ int aic32x4_remove(struct device *dev); /* tlv320aic32x4 register space (in decimal to match datasheet) */ -#define AIC32X4_PAGE1 128 - -#define AIC32X4_PSEL 0 -#define AIC32X4_RESET 1 -#define AIC32X4_CLKMUX 4 -#define AIC32X4_PLLPR 5 -#define AIC32X4_PLLJ 6 -#define AIC32X4_PLLDMSB 7 -#define AIC32X4_PLLDLSB 8 -#define AIC32X4_NDAC 11 -#define AIC32X4_MDAC 12 -#define AIC32X4_DOSRMSB 13 -#define AIC32X4_DOSRLSB 14 -#define AIC32X4_NADC 18 -#define AIC32X4_MADC 19 -#define AIC32X4_AOSR 20 -#define AIC32X4_CLKMUX2 25 -#define AIC32X4_CLKOUTM 26 -#define AIC32X4_IFACE1 27 -#define AIC32X4_IFACE2 28 -#define AIC32X4_IFACE3 29 -#define AIC32X4_BCLKN 30 -#define AIC32X4_IFACE4 31 -#define AIC32X4_IFACE5 32 -#define AIC32X4_IFACE6 33 -#define AIC32X4_GPIOCTL 52 -#define AIC32X4_DOUTCTL 53 -#define AIC32X4_DINCTL 54 -#define AIC32X4_MISOCTL 55 -#define AIC32X4_SCLKCTL 56 -#define AIC32X4_DACSPB 60 -#define AIC32X4_ADCSPB 61 -#define AIC32X4_DACSETUP 63 -#define AIC32X4_DACMUTE 64 -#define AIC32X4_LDACVOL 65 -#define AIC32X4_RDACVOL 66 -#define AIC32X4_ADCSETUP 81 -#define AIC32X4_ADCFGA 82 -#define AIC32X4_LADCVOL 83 -#define AIC32X4_RADCVOL 84 -#define AIC32X4_LAGC1 86 -#define AIC32X4_LAGC2 87 -#define AIC32X4_LAGC3 88 -#define AIC32X4_LAGC4 89 -#define AIC32X4_LAGC5 90 -#define AIC32X4_LAGC6 91 -#define AIC32X4_LAGC7 92 -#define AIC32X4_RAGC1 94 -#define AIC32X4_RAGC2 95 -#define AIC32X4_RAGC3 96 -#define AIC32X4_RAGC4 97 -#define AIC32X4_RAGC5 98 -#define AIC32X4_RAGC6 99 -#define AIC32X4_RAGC7 100 -#define AIC32X4_PWRCFG (AIC32X4_PAGE1 + 1) -#define AIC32X4_LDOCTL (AIC32X4_PAGE1 + 2) -#define AIC32X4_OUTPWRCTL (AIC32X4_PAGE1 + 9) -#define AIC32X4_CMMODE (AIC32X4_PAGE1 + 10) -#define AIC32X4_HPLROUTE (AIC32X4_PAGE1 + 12) -#define AIC32X4_HPRROUTE (AIC32X4_PAGE1 + 13) -#define AIC32X4_LOLROUTE (AIC32X4_PAGE1 + 14) -#define AIC32X4_LORROUTE (AIC32X4_PAGE1 + 15) -#define AIC32X4_HPLGAIN (AIC32X4_PAGE1 + 16) -#define AIC32X4_HPRGAIN (AIC32X4_PAGE1 + 17) -#define AIC32X4_LOLGAIN (AIC32X4_PAGE1 + 18) -#define AIC32X4_LORGAIN (AIC32X4_PAGE1 + 19) -#define AIC32X4_HEADSTART (AIC32X4_PAGE1 + 20) -#define AIC32X4_MICBIAS (AIC32X4_PAGE1 + 51) -#define AIC32X4_LMICPGAPIN (AIC32X4_PAGE1 + 52) -#define AIC32X4_LMICPGANIN (AIC32X4_PAGE1 + 54) -#define AIC32X4_RMICPGAPIN (AIC32X4_PAGE1 + 55) -#define AIC32X4_RMICPGANIN (AIC32X4_PAGE1 + 57) -#define AIC32X4_FLOATINGINPUT (AIC32X4_PAGE1 + 58) -#define AIC32X4_LMICPGAVOL (AIC32X4_PAGE1 + 59) -#define AIC32X4_RMICPGAVOL (AIC32X4_PAGE1 + 60) +#define AIC32X4_REG(page, reg) ((page * 128) + reg) + +#define AIC32X4_PSEL AIC32X4_REG(0, 0) + +#define AIC32X4_RESET AIC32X4_REG(0, 1) +#define AIC32X4_CLKMUX AIC32X4_REG(0, 4) +#define AIC32X4_PLLPR AIC32X4_REG(0, 5) +#define AIC32X4_PLLJ AIC32X4_REG(0, 6) +#define AIC32X4_PLLDMSB AIC32X4_REG(0, 7) +#define AIC32X4_PLLDLSB AIC32X4_REG(0, 8) +#define AIC32X4_NDAC AIC32X4_REG(0, 11) +#define AIC32X4_MDAC AIC32X4_REG(0, 12) +#define AIC32X4_DOSRMSB AIC32X4_REG(0, 13) +#define AIC32X4_DOSRLSB AIC32X4_REG(0, 14) +#define AIC32X4_NADC AIC32X4_REG(0, 18) +#define AIC32X4_MADC AIC32X4_REG(0, 19) +#define AIC32X4_AOSR AIC32X4_REG(0, 20) +#define AIC32X4_CLKMUX2 AIC32X4_REG(0, 25) +#define AIC32X4_CLKOUTM AIC32X4_REG(0, 26) +#define AIC32X4_IFACE1 AIC32X4_REG(0, 27) +#define AIC32X4_IFACE2 AIC32X4_REG(0, 28) +#define AIC32X4_IFACE3 AIC32X4_REG(0, 29) +#define AIC32X4_BCLKN AIC32X4_REG(0, 30) +#define AIC32X4_IFACE4 AIC32X4_REG(0, 31) +#define AIC32X4_IFACE5 AIC32X4_REG(0, 32) +#define AIC32X4_IFACE6 AIC32X4_REG(0, 33) +#define AIC32X4_GPIOCTL AIC32X4_REG(0, 52) +#define AIC32X4_DOUTCTL AIC32X4_REG(0, 53) +#define AIC32X4_DINCTL AIC32X4_REG(0, 54) +#define AIC32X4_MISOCTL AIC32X4_REG(0, 55) +#define AIC32X4_SCLKCTL AIC32X4_REG(0, 56) +#define AIC32X4_DACSPB AIC32X4_REG(0, 60) +#define AIC32X4_ADCSPB AIC32X4_REG(0, 61) +#define AIC32X4_DACSETUP AIC32X4_REG(0, 63) +#define AIC32X4_DACMUTE AIC32X4_REG(0, 64) +#define AIC32X4_LDACVOL AIC32X4_REG(0, 65) +#define AIC32X4_RDACVOL AIC32X4_REG(0, 66) +#define AIC32X4_ADCSETUP AIC32X4_REG(0, 81) +#define AIC32X4_ADCFGA AIC32X4_REG(0, 82) +#define AIC32X4_LADCVOL AIC32X4_REG(0, 83) +#define AIC32X4_RADCVOL AIC32X4_REG(0, 84) +#define AIC32X4_LAGC1 AIC32X4_REG(0, 86) +#define AIC32X4_LAGC2 AIC32X4_REG(0, 87) +#define AIC32X4_LAGC3 AIC32X4_REG(0, 88) +#define AIC32X4_LAGC4 AIC32X4_REG(0, 89) +#define AIC32X4_LAGC5 AIC32X4_REG(0, 90) +#define AIC32X4_LAGC6 AIC32X4_REG(0, 91) +#define AIC32X4_LAGC7 AIC32X4_REG(0, 92) +#define AIC32X4_RAGC1 AIC32X4_REG(0, 94) +#define AIC32X4_RAGC2 AIC32X4_REG(0, 95) +#define AIC32X4_RAGC3 AIC32X4_REG(0, 96) +#define AIC32X4_RAGC4 AIC32X4_REG(0, 97) +#define AIC32X4_RAGC5 AIC32X4_REG(0, 98) +#define AIC32X4_RAGC6 AIC32X4_REG(0, 99) +#define AIC32X4_RAGC7 AIC32X4_REG(0, 100) + +#define AIC32X4_PWRCFG AIC32X4_REG(1, 1) +#define AIC32X4_LDOCTL AIC32X4_REG(1, 2) +#define AIC32X4_OUTPWRCTL AIC32X4_REG(1, 9) +#define AIC32X4_CMMODE AIC32X4_REG(1, 10) +#define AIC32X4_HPLROUTE AIC32X4_REG(1, 12) +#define AIC32X4_HPRROUTE AIC32X4_REG(1, 13) +#define AIC32X4_LOLROUTE AIC32X4_REG(1, 14) +#define AIC32X4_LORROUTE AIC32X4_REG(1, 15) +#define AIC32X4_HPLGAIN AIC32X4_REG(1, 16) +#define AIC32X4_HPRGAIN AIC32X4_REG(1, 17) +#define AIC32X4_LOLGAIN AIC32X4_REG(1, 18) +#define AIC32X4_LORGAIN AIC32X4_REG(1, 19) +#define AIC32X4_HEADSTART AIC32X4_REG(1, 20) +#define AIC32X4_MICBIAS AIC32X4_REG(1, 51) +#define AIC32X4_LMICPGAPIN AIC32X4_REG(1, 52) +#define AIC32X4_LMICPGANIN AIC32X4_REG(1, 54) +#define AIC32X4_RMICPGAPIN AIC32X4_REG(1, 55) +#define AIC32X4_RMICPGANIN AIC32X4_REG(1, 57) +#define AIC32X4_FLOATINGINPUT AIC32X4_REG(1, 58) +#define AIC32X4_LMICPGAVOL AIC32X4_REG(1, 59) +#define AIC32X4_RMICPGAVOL AIC32X4_REG(1, 60) #define AIC32X4_FREQ_12000000 12000000 #define AIC32X4_FREQ_24000000 24000000 -- cgit v1.2.3 From 7e2a4dc5c1f0875646816c527cad5943cb6d5cc7 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:04 -0600 Subject: ASoC: tlv320aic32x4: Drop define mapping from number to number Drop definition of frequencies that only map from one number to the same number. This is not needed and if misused can hide bugs. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 46 ++++++++++++++++++++-------------------- sound/soc/codecs/tlv320aic32x4.h | 4 ---- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index e694f5f04eb9..d7a67bfcc6d8 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -281,34 +281,34 @@ static const struct snd_kcontrol_new aic32x4_snd_controls[] = { static const struct aic32x4_rate_divs aic32x4_divs[] = { /* 8k rate */ - {AIC32X4_FREQ_12000000, 8000, 1, 7, 6800, 768, 5, 3, 128, 5, 18, 24}, - {AIC32X4_FREQ_24000000, 8000, 2, 7, 6800, 768, 15, 1, 64, 45, 4, 24}, - {AIC32X4_FREQ_25000000, 8000, 2, 7, 3728, 768, 15, 1, 64, 45, 4, 24}, + {12000000, 8000, 1, 7, 6800, 768, 5, 3, 128, 5, 18, 24}, + {24000000, 8000, 2, 7, 6800, 768, 15, 1, 64, 45, 4, 24}, + {25000000, 8000, 2, 7, 3728, 768, 15, 1, 64, 45, 4, 24}, /* 11.025k rate */ - {AIC32X4_FREQ_12000000, 11025, 1, 7, 5264, 512, 8, 2, 128, 8, 8, 16}, - {AIC32X4_FREQ_24000000, 11025, 2, 7, 5264, 512, 16, 1, 64, 32, 4, 16}, + {12000000, 11025, 1, 7, 5264, 512, 8, 2, 128, 8, 8, 16}, + {24000000, 11025, 2, 7, 5264, 512, 16, 1, 64, 32, 4, 16}, /* 16k rate */ - {AIC32X4_FREQ_12000000, 16000, 1, 7, 6800, 384, 5, 3, 128, 5, 9, 12}, - {AIC32X4_FREQ_24000000, 16000, 2, 7, 6800, 384, 15, 1, 64, 18, 5, 12}, - {AIC32X4_FREQ_25000000, 16000, 2, 7, 3728, 384, 15, 1, 64, 18, 5, 12}, + {12000000, 16000, 1, 7, 6800, 384, 5, 3, 128, 5, 9, 12}, + {24000000, 16000, 2, 7, 6800, 384, 15, 1, 64, 18, 5, 12}, + {25000000, 16000, 2, 7, 3728, 384, 15, 1, 64, 18, 5, 12}, /* 22.05k rate */ - {AIC32X4_FREQ_12000000, 22050, 1, 7, 5264, 256, 4, 4, 128, 4, 8, 8}, - {AIC32X4_FREQ_24000000, 22050, 2, 7, 5264, 256, 16, 1, 64, 16, 4, 8}, - {AIC32X4_FREQ_25000000, 22050, 2, 7, 2253, 256, 16, 1, 64, 16, 4, 8}, + {12000000, 22050, 1, 7, 5264, 256, 4, 4, 128, 4, 8, 8}, + {24000000, 22050, 2, 7, 5264, 256, 16, 1, 64, 16, 4, 8}, + {25000000, 22050, 2, 7, 2253, 256, 16, 1, 64, 16, 4, 8}, /* 32k rate */ - {AIC32X4_FREQ_12000000, 32000, 1, 7, 1680, 192, 2, 7, 64, 2, 21, 6}, - {AIC32X4_FREQ_24000000, 32000, 2, 7, 1680, 192, 7, 2, 64, 7, 6, 6}, + {12000000, 32000, 1, 7, 1680, 192, 2, 7, 64, 2, 21, 6}, + {24000000, 32000, 2, 7, 1680, 192, 7, 2, 64, 7, 6, 6}, /* 44.1k rate */ - {AIC32X4_FREQ_12000000, 44100, 1, 7, 5264, 128, 2, 8, 128, 2, 8, 4}, - {AIC32X4_FREQ_24000000, 44100, 2, 7, 5264, 128, 8, 2, 64, 8, 4, 4}, - {AIC32X4_FREQ_25000000, 44100, 2, 7, 2253, 128, 8, 2, 64, 8, 4, 4}, + {12000000, 44100, 1, 7, 5264, 128, 2, 8, 128, 2, 8, 4}, + {24000000, 44100, 2, 7, 5264, 128, 8, 2, 64, 8, 4, 4}, + {25000000, 44100, 2, 7, 2253, 128, 8, 2, 64, 8, 4, 4}, /* 48k rate */ - {AIC32X4_FREQ_12000000, 48000, 1, 8, 1920, 128, 2, 8, 128, 2, 8, 4}, - {AIC32X4_FREQ_24000000, 48000, 2, 8, 1920, 128, 8, 2, 64, 8, 4, 4}, - {AIC32X4_FREQ_25000000, 48000, 2, 7, 8643, 128, 8, 2, 64, 8, 4, 4}, + {12000000, 48000, 1, 8, 1920, 128, 2, 8, 128, 2, 8, 4}, + {24000000, 48000, 2, 8, 1920, 128, 8, 2, 64, 8, 4, 4}, + {25000000, 48000, 2, 7, 8643, 128, 8, 2, 64, 8, 4, 4}, /* 96k rate */ - {AIC32X4_FREQ_25000000, 96000, 2, 7, 8643, 64, 4, 4, 64, 4, 4, 1}, + {25000000, 96000, 2, 7, 8643, 64, 4, 4, 64, 4, 4, 1}, }; static const struct snd_kcontrol_new hpl_output_mixer_controls[] = { @@ -601,9 +601,9 @@ static int aic32x4_set_dai_sysclk(struct snd_soc_dai *codec_dai, struct aic32x4_priv *aic32x4 = snd_soc_codec_get_drvdata(codec); switch (freq) { - case AIC32X4_FREQ_12000000: - case AIC32X4_FREQ_24000000: - case AIC32X4_FREQ_25000000: + case 12000000: + case 24000000: + case 25000000: aic32x4->sysclk = freq; return 0; } diff --git a/sound/soc/codecs/tlv320aic32x4.h b/sound/soc/codecs/tlv320aic32x4.h index 936bb7a1b5c8..b017211f83eb 100644 --- a/sound/soc/codecs/tlv320aic32x4.h +++ b/sound/soc/codecs/tlv320aic32x4.h @@ -97,10 +97,6 @@ int aic32x4_remove(struct device *dev); #define AIC32X4_LMICPGAVOL AIC32X4_REG(1, 59) #define AIC32X4_RMICPGAVOL AIC32X4_REG(1, 60) -#define AIC32X4_FREQ_12000000 12000000 -#define AIC32X4_FREQ_24000000 24000000 -#define AIC32X4_FREQ_25000000 25000000 - #define AIC32X4_WORD_LEN_16BITS 0x00 #define AIC32X4_WORD_LEN_20BITS 0x01 #define AIC32X4_WORD_LEN_24BITS 0x02 -- cgit v1.2.3 From 4483521d81684764cb7f2569bf3e4b10d38ef9f7 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:05 -0600 Subject: ASoC: tlv320aic32x4: Use correct shift definition for DATATYPE bits Setting the DATATYPE bit field requires shifting our value by 6. Setting the J value of the PLL also requires a shift by 6. Currently the code abuses this fact and uses the shift for the PLL register to set the data-type register. Fix this here by using the definition meant for this register. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index d7a67bfcc6d8..8f9719e6cdfd 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -641,21 +641,23 @@ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) case SND_SOC_DAIFMT_I2S: break; case SND_SOC_DAIFMT_DSP_A: - iface_reg_1 |= (AIC32X4_DSP_MODE << AIC32X4_PLLJ_SHIFT); + iface_reg_1 |= (AIC32X4_DSP_MODE << + AIC32X4_IFACE1_DATATYPE_SHIFT); iface_reg_3 |= (1 << 3); /* invert bit clock */ iface_reg_2 = 0x01; /* add offset 1 */ break; case SND_SOC_DAIFMT_DSP_B: - iface_reg_1 |= (AIC32X4_DSP_MODE << AIC32X4_PLLJ_SHIFT); + iface_reg_1 |= (AIC32X4_DSP_MODE << + AIC32X4_IFACE1_DATATYPE_SHIFT); iface_reg_3 |= (1 << 3); /* invert bit clock */ break; case SND_SOC_DAIFMT_RIGHT_J: - iface_reg_1 |= - (AIC32X4_RIGHT_JUSTIFIED_MODE << AIC32X4_PLLJ_SHIFT); + iface_reg_1 |= (AIC32X4_RIGHT_JUSTIFIED_MODE << + AIC32X4_IFACE1_DATATYPE_SHIFT); break; case SND_SOC_DAIFMT_LEFT_J: - iface_reg_1 |= - (AIC32X4_LEFT_JUSTIFIED_MODE << AIC32X4_PLLJ_SHIFT); + iface_reg_1 |= (AIC32X4_LEFT_JUSTIFIED_MODE << + AIC32X4_IFACE1_DATATYPE_SHIFT); break; default: printk(KERN_ERR "aic32x4: invalid DAI interface format\n"); -- cgit v1.2.3 From 77bdb58795d86262e96ba37524489ba0969de253 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:06 -0600 Subject: ASoC: tlv320aic32x4: Use correct shift definition for DATALEN bits Setting the DATALEN bit field requires shifting our value by 4. Setting the OSR value of the PLL divider also requires a shift by 4. Currently the code abuses this fact and uses the shift for the divider register to set the data-length register. Fix this here by using the definition meant for this register. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 8f9719e6cdfd..9f643199e1ba 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -738,15 +738,20 @@ static int aic32x4_hw_params(struct snd_pcm_substream *substream, data = data & ~(3 << 4); switch (params_width(params)) { case 16: + data |= (AIC32X4_WORD_LEN_16BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 20: - data |= (AIC32X4_WORD_LEN_20BITS << AIC32X4_DOSRMSB_SHIFT); + data |= (AIC32X4_WORD_LEN_20BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 24: - data |= (AIC32X4_WORD_LEN_24BITS << AIC32X4_DOSRMSB_SHIFT); + data |= (AIC32X4_WORD_LEN_24BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 32: - data |= (AIC32X4_WORD_LEN_32BITS << AIC32X4_DOSRMSB_SHIFT); + data |= (AIC32X4_WORD_LEN_32BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; } snd_soc_write(codec, AIC32X4_IFACE1, data); -- cgit v1.2.3 From 0fe7aa39ba0492aabdde67dc1511055c9dc7e960 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:07 -0600 Subject: ASoC: tlv320aic32x4: Use BIT and GENMASK for bit field definitions Inter-register definitions should use BIT and GENMASK definitions and also be grouped by what register they belong to. This makes it easy to cross-check with the datasheet and is consistent with other drivers. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 2 +- sound/soc/codecs/tlv320aic32x4.h | 132 ++++++++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 9f643199e1ba..e528a8495346 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -686,7 +686,7 @@ static int aic32x4_hw_params(struct snd_pcm_substream *substream, } /* Use PLL as CODEC_CLKIN and DAC_MOD_CLK as BDIV_CLKIN */ - snd_soc_write(codec, AIC32X4_CLKMUX, AIC32X4_PLLCLKIN); + snd_soc_write(codec, AIC32X4_CLKMUX, AIC32X4_CODEC_CLKIN_PLL); snd_soc_write(codec, AIC32X4_IFACE3, AIC32X4_DACMOD2BCLK); /* We will fix R value to 1 and will make P & J=K.D as varialble */ diff --git a/sound/soc/codecs/tlv320aic32x4.h b/sound/soc/codecs/tlv320aic32x4.h index b017211f83eb..67772e5585b8 100644 --- a/sound/soc/codecs/tlv320aic32x4.h +++ b/sound/soc/codecs/tlv320aic32x4.h @@ -97,61 +97,103 @@ int aic32x4_remove(struct device *dev); #define AIC32X4_LMICPGAVOL AIC32X4_REG(1, 59) #define AIC32X4_RMICPGAVOL AIC32X4_REG(1, 60) -#define AIC32X4_WORD_LEN_16BITS 0x00 -#define AIC32X4_WORD_LEN_20BITS 0x01 -#define AIC32X4_WORD_LEN_24BITS 0x02 -#define AIC32X4_WORD_LEN_32BITS 0x03 - -#define AIC32X4_LADC_EN (1 << 7) -#define AIC32X4_RADC_EN (1 << 6) - -#define AIC32X4_I2S_MODE 0x00 -#define AIC32X4_DSP_MODE 0x01 -#define AIC32X4_RIGHT_JUSTIFIED_MODE 0x02 -#define AIC32X4_LEFT_JUSTIFIED_MODE 0x03 - -#define AIC32X4_AVDDWEAKDISABLE 0x08 -#define AIC32X4_LDOCTLEN 0x01 +/* Bits, masks, and shifts */ + +/* AIC32X4_CLKMUX */ +#define AIC32X4_PLL_CLKIN_MASK GENMASK(3, 2) +#define AIC32X4_PLL_CLKIN_SHIFT (2) +#define AIC32X4_PLL_CLKIN_MCLK (0x00) +#define AIC32X4_PLL_CLKIN_BCKL (0x01) +#define AIC32X4_PLL_CLKIN_GPIO1 (0x02) +#define AIC32X4_PLL_CLKIN_DIN (0x03) +#define AIC32X4_CODEC_CLKIN_MASK GENMASK(1, 0) +#define AIC32X4_CODEC_CLKIN_SHIFT (0) +#define AIC32X4_CODEC_CLKIN_MCLK (0x00) +#define AIC32X4_CODEC_CLKIN_BCLK (0x01) +#define AIC32X4_CODEC_CLKIN_GPIO1 (0x02) +#define AIC32X4_CODEC_CLKIN_PLL (0x03) + +/* AIC32X4_PLLPR */ +#define AIC32X4_PLLEN BIT(7) + +/* AIC32X4_NDAC */ +#define AIC32X4_NDACEN BIT(7) + +/* AIC32X4_MDAC */ +#define AIC32X4_MDACEN BIT(7) + +/* AIC32X4_NADC */ +#define AIC32X4_NADCEN BIT(7) + +/* AIC32X4_MADC */ +#define AIC32X4_MADCEN BIT(7) + +/* AIC32X4_BCLKN */ +#define AIC32X4_BCLKEN BIT(7) + +/* AIC32X4_IFACE1 */ +#define AIC32X4_IFACE1_DATATYPE_MASK GENMASK(7, 6) +#define AIC32X4_IFACE1_DATATYPE_SHIFT (6) +#define AIC32X4_I2S_MODE (0x00) +#define AIC32X4_DSP_MODE (0x01) +#define AIC32X4_RIGHT_JUSTIFIED_MODE (0x02) +#define AIC32X4_LEFT_JUSTIFIED_MODE (0x03) +#define AIC32X4_IFACE1_DATALEN_MASK GENMASK(5, 4) +#define AIC32X4_IFACE1_DATALEN_SHIFT (4) +#define AIC32X4_WORD_LEN_16BITS (0x00) +#define AIC32X4_WORD_LEN_20BITS (0x01) +#define AIC32X4_WORD_LEN_24BITS (0x02) +#define AIC32X4_WORD_LEN_32BITS (0x03) +#define AIC32X4_IFACE1_MASTER_MASK GENMASK(3, 2) +#define AIC32X4_BCLKMASTER BIT(2) +#define AIC32X4_WCLKMASTER BIT(3) + +/* AIC32X4_IFACE2 */ +#define AIC32X4_DATA_OFFSET_MASK GENMASK(7, 0) + +/* AIC32X4_IFACE3 */ +#define AIC32X4_BCLKINV_MASK BIT(3) +#define AIC32X4_BDIVCLK_MASK GENMASK(1, 0) +#define AIC32X4_BDIVCLK_SHIFT (0) +#define AIC32X4_DAC2BCLK (0x00) +#define AIC32X4_DACMOD2BCLK (0x01) +#define AIC32X4_ADC2BCLK (0x02) +#define AIC32X4_ADCMOD2BCLK (0x03) + +/* AIC32X4_DACSETUP */ +#define AIC32X4_DAC_CHAN_MASK GENMASK(5, 2) +#define AIC32X4_LDAC2RCHN BIT(5) +#define AIC32X4_LDAC2LCHN BIT(4) +#define AIC32X4_RDAC2LCHN BIT(3) +#define AIC32X4_RDAC2RCHN BIT(2) + +/* AIC32X4_DACMUTE */ +#define AIC32X4_MUTEON 0x0C -#define AIC32X4_LDOIN_18_36 0x01 -#define AIC32X4_LDOIN2HP 0x02 +/* AIC32X4_ADCSETUP */ +#define AIC32X4_LADC_EN BIT(7) +#define AIC32X4_RADC_EN BIT(6) -#define AIC32X4_DACSPBLOCK_MASK 0x1f -#define AIC32X4_ADCSPBLOCK_MASK 0x1f +/* AIC32X4_PWRCFG */ +#define AIC32X4_AVDDWEAKDISABLE BIT(3) -#define AIC32X4_PLLJ_SHIFT 6 -#define AIC32X4_DOSRMSB_SHIFT 4 +/* AIC32X4_LDOCTL */ +#define AIC32X4_LDOCTLEN BIT(0) -#define AIC32X4_PLLCLKIN 0x03 +/* AIC32X4_CMMODE */ +#define AIC32X4_LDOIN_18_36 BIT(0) +#define AIC32X4_LDOIN2HP BIT(1) -#define AIC32X4_MICBIAS_LDOIN 0x08 +/* AIC32X4_MICBIAS */ +#define AIC32X4_MICBIAS_LDOIN BIT(3) #define AIC32X4_MICBIAS_2075V 0x60 +/* AIC32X4_LMICPGANIN */ #define AIC32X4_LMICPGANIN_IN2R_10K 0x10 #define AIC32X4_LMICPGANIN_CM1L_10K 0x40 + +/* AIC32X4_RMICPGANIN */ #define AIC32X4_RMICPGANIN_IN1L_10K 0x10 #define AIC32X4_RMICPGANIN_CM1R_10K 0x40 -#define AIC32X4_LMICPGAVOL_NOGAIN 0x80 -#define AIC32X4_RMICPGAVOL_NOGAIN 0x80 - -#define AIC32X4_BCLKMASTER 0x08 -#define AIC32X4_WCLKMASTER 0x04 -#define AIC32X4_PLLEN (0x01 << 7) -#define AIC32X4_NDACEN (0x01 << 7) -#define AIC32X4_MDACEN (0x01 << 7) -#define AIC32X4_NADCEN (0x01 << 7) -#define AIC32X4_MADCEN (0x01 << 7) -#define AIC32X4_BCLKEN (0x01 << 7) -#define AIC32X4_DACEN (0x03 << 6) -#define AIC32X4_RDAC2LCHN (0x02 << 2) -#define AIC32X4_LDAC2RCHN (0x02 << 4) -#define AIC32X4_LDAC2LCHN (0x01 << 4) -#define AIC32X4_RDAC2RCHN (0x01 << 2) -#define AIC32X4_DAC_CHAN_MASK 0x3c - -#define AIC32X4_SSTEP2WCLK 0x01 -#define AIC32X4_MUTEON 0x0C -#define AIC32X4_DACMOD2BCLK 0x01 - #endif /* _TLV320AIC32X4_H */ -- cgit v1.2.3 From b7ddd9cab7d1c800db83e442e881d8cb3f755633 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:08 -0600 Subject: ASoC: tlv320aic32x4: Use snd_soc_update_bits() in aic32x4_mute() Simplify mute function by using snd_soc_update_bits() over read/modify/write style code. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index e528a8495346..5deabe36ae94 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -773,13 +773,10 @@ static int aic32x4_hw_params(struct snd_pcm_substream *substream, static int aic32x4_mute(struct snd_soc_dai *dai, int mute) { struct snd_soc_codec *codec = dai->codec; - u8 dac_reg; - dac_reg = snd_soc_read(codec, AIC32X4_DACMUTE) & ~AIC32X4_MUTEON; - if (mute) - snd_soc_write(codec, AIC32X4_DACMUTE, dac_reg | AIC32X4_MUTEON); - else - snd_soc_write(codec, AIC32X4_DACMUTE, dac_reg); + snd_soc_update_bits(codec, AIC32X4_DACMUTE, + AIC32X4_MUTEON, mute ? AIC32X4_MUTEON : 0); + return 0; } -- cgit v1.2.3 From 64aab89974ebddf4cc67e4ed8996d879a9d054b9 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:09 -0600 Subject: ASoC: tlv320aic32x4: Use snd_soc_update_bits() in aic32x4_hw_params() Make the code easier to read by using snd_soc_update_bits() over read/modify/write sequences. Also use separate per-register variables instead of re-using "data". This can prevent accidental over-writing and makes it clear for which register each bit value is intended. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 90 ++++++++++++++++++++-------------------- sound/soc/codecs/tlv320aic32x4.h | 8 ++++ 2 files changed, 54 insertions(+), 44 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 5deabe36ae94..63a52cdb7afe 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -676,7 +676,8 @@ static int aic32x4_hw_params(struct snd_pcm_substream *substream, { struct snd_soc_codec *codec = dai->codec; struct aic32x4_priv *aic32x4 = snd_soc_codec_get_drvdata(codec); - u8 data; + u8 iface1_reg = 0; + u8 dacsetup_reg = 0; int i; i = aic32x4_get_divs(aic32x4->sysclk, params_rate(params)); @@ -685,87 +686,88 @@ static int aic32x4_hw_params(struct snd_pcm_substream *substream, return i; } - /* Use PLL as CODEC_CLKIN and DAC_MOD_CLK as BDIV_CLKIN */ - snd_soc_write(codec, AIC32X4_CLKMUX, AIC32X4_CODEC_CLKIN_PLL); - snd_soc_write(codec, AIC32X4_IFACE3, AIC32X4_DACMOD2BCLK); + /* MCLK as PLL_CLKIN */ + snd_soc_update_bits(codec, AIC32X4_CLKMUX, AIC32X4_PLL_CLKIN_MASK, + AIC32X4_PLL_CLKIN_MCLK << AIC32X4_PLL_CLKIN_SHIFT); + /* PLL as CODEC_CLKIN */ + snd_soc_update_bits(codec, AIC32X4_CLKMUX, AIC32X4_CODEC_CLKIN_MASK, + AIC32X4_CODEC_CLKIN_PLL << AIC32X4_CODEC_CLKIN_SHIFT); + /* DAC_MOD_CLK as BDIV_CLKIN */ + snd_soc_update_bits(codec, AIC32X4_IFACE3, AIC32X4_BDIVCLK_MASK, + AIC32X4_DACMOD2BCLK << AIC32X4_BDIVCLK_SHIFT); - /* We will fix R value to 1 and will make P & J=K.D as varialble */ - data = snd_soc_read(codec, AIC32X4_PLLPR); - data &= ~(7 << 4); - snd_soc_write(codec, AIC32X4_PLLPR, - (data | (aic32x4_divs[i].p_val << 4) | 0x01)); + /* We will fix R value to 1 and will make P & J=K.D as variable */ + snd_soc_update_bits(codec, AIC32X4_PLLPR, AIC32X4_PLL_R_MASK, 0x01); + /* PLL P value */ + snd_soc_update_bits(codec, AIC32X4_PLLPR, AIC32X4_PLL_P_MASK, + aic32x4_divs[i].p_val << AIC32X4_PLL_P_SHIFT); + + /* PLL J value */ snd_soc_write(codec, AIC32X4_PLLJ, aic32x4_divs[i].pll_j); + /* PLL D value */ snd_soc_write(codec, AIC32X4_PLLDMSB, (aic32x4_divs[i].pll_d >> 8)); - snd_soc_write(codec, AIC32X4_PLLDLSB, - (aic32x4_divs[i].pll_d & 0xff)); + snd_soc_write(codec, AIC32X4_PLLDLSB, (aic32x4_divs[i].pll_d & 0xff)); /* NDAC divider value */ - data = snd_soc_read(codec, AIC32X4_NDAC); - data &= ~(0x7f); - snd_soc_write(codec, AIC32X4_NDAC, data | aic32x4_divs[i].ndac); + snd_soc_update_bits(codec, AIC32X4_NDAC, + AIC32X4_NDAC_MASK, aic32x4_divs[i].ndac); /* MDAC divider value */ - data = snd_soc_read(codec, AIC32X4_MDAC); - data &= ~(0x7f); - snd_soc_write(codec, AIC32X4_MDAC, data | aic32x4_divs[i].mdac); + snd_soc_update_bits(codec, AIC32X4_MDAC, + AIC32X4_MDAC_MASK, aic32x4_divs[i].mdac); /* DOSR MSB & LSB values */ snd_soc_write(codec, AIC32X4_DOSRMSB, aic32x4_divs[i].dosr >> 8); - snd_soc_write(codec, AIC32X4_DOSRLSB, - (aic32x4_divs[i].dosr & 0xff)); + snd_soc_write(codec, AIC32X4_DOSRLSB, (aic32x4_divs[i].dosr & 0xff)); /* NADC divider value */ - data = snd_soc_read(codec, AIC32X4_NADC); - data &= ~(0x7f); - snd_soc_write(codec, AIC32X4_NADC, data | aic32x4_divs[i].nadc); + snd_soc_update_bits(codec, AIC32X4_NADC, + AIC32X4_NADC_MASK, aic32x4_divs[i].nadc); /* MADC divider value */ - data = snd_soc_read(codec, AIC32X4_MADC); - data &= ~(0x7f); - snd_soc_write(codec, AIC32X4_MADC, data | aic32x4_divs[i].madc); + snd_soc_update_bits(codec, AIC32X4_MADC, + AIC32X4_MADC_MASK, aic32x4_divs[i].madc); /* AOSR value */ snd_soc_write(codec, AIC32X4_AOSR, aic32x4_divs[i].aosr); /* BCLK N divider */ - data = snd_soc_read(codec, AIC32X4_BCLKN); - data &= ~(0x7f); - snd_soc_write(codec, AIC32X4_BCLKN, data | aic32x4_divs[i].blck_N); + snd_soc_update_bits(codec, AIC32X4_BCLKN, + AIC32X4_BCLK_MASK, aic32x4_divs[i].blck_N); - data = snd_soc_read(codec, AIC32X4_IFACE1); - data = data & ~(3 << 4); switch (params_width(params)) { case 16: - data |= (AIC32X4_WORD_LEN_16BITS << - AIC32X4_IFACE1_DATALEN_SHIFT); + iface1_reg |= (AIC32X4_WORD_LEN_16BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 20: - data |= (AIC32X4_WORD_LEN_20BITS << - AIC32X4_IFACE1_DATALEN_SHIFT); + iface1_reg |= (AIC32X4_WORD_LEN_20BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 24: - data |= (AIC32X4_WORD_LEN_24BITS << - AIC32X4_IFACE1_DATALEN_SHIFT); + iface1_reg |= (AIC32X4_WORD_LEN_24BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; case 32: - data |= (AIC32X4_WORD_LEN_32BITS << - AIC32X4_IFACE1_DATALEN_SHIFT); + iface1_reg |= (AIC32X4_WORD_LEN_32BITS << + AIC32X4_IFACE1_DATALEN_SHIFT); break; } - snd_soc_write(codec, AIC32X4_IFACE1, data); + snd_soc_update_bits(codec, AIC32X4_IFACE1, + AIC32X4_IFACE1_DATALEN_MASK, iface1_reg); if (params_channels(params) == 1) { - data = AIC32X4_RDAC2LCHN | AIC32X4_LDAC2LCHN; + dacsetup_reg = AIC32X4_RDAC2LCHN | AIC32X4_LDAC2LCHN; } else { if (aic32x4->swapdacs) - data = AIC32X4_RDAC2LCHN | AIC32X4_LDAC2RCHN; + dacsetup_reg = AIC32X4_RDAC2LCHN | AIC32X4_LDAC2RCHN; else - data = AIC32X4_LDAC2LCHN | AIC32X4_RDAC2RCHN; + dacsetup_reg = AIC32X4_LDAC2LCHN | AIC32X4_RDAC2RCHN; } - snd_soc_update_bits(codec, AIC32X4_DACSETUP, AIC32X4_DAC_CHAN_MASK, - data); + snd_soc_update_bits(codec, AIC32X4_DACSETUP, + AIC32X4_DAC_CHAN_MASK, dacsetup_reg); return 0; } diff --git a/sound/soc/codecs/tlv320aic32x4.h b/sound/soc/codecs/tlv320aic32x4.h index 67772e5585b8..e9df49edbf19 100644 --- a/sound/soc/codecs/tlv320aic32x4.h +++ b/sound/soc/codecs/tlv320aic32x4.h @@ -115,21 +115,29 @@ int aic32x4_remove(struct device *dev); /* AIC32X4_PLLPR */ #define AIC32X4_PLLEN BIT(7) +#define AIC32X4_PLL_P_MASK GENMASK(6, 4) +#define AIC32X4_PLL_P_SHIFT (4) +#define AIC32X4_PLL_R_MASK GENMASK(3, 0) /* AIC32X4_NDAC */ #define AIC32X4_NDACEN BIT(7) +#define AIC32X4_NDAC_MASK GENMASK(6, 0) /* AIC32X4_MDAC */ #define AIC32X4_MDACEN BIT(7) +#define AIC32X4_MDAC_MASK GENMASK(6, 0) /* AIC32X4_NADC */ #define AIC32X4_NADCEN BIT(7) +#define AIC32X4_NADC_MASK GENMASK(6, 0) /* AIC32X4_MADC */ #define AIC32X4_MADCEN BIT(7) +#define AIC32X4_MADC_MASK GENMASK(6, 0) /* AIC32X4_BCLKN */ #define AIC32X4_BCLKEN BIT(7) +#define AIC32X4_BCLK_MASK GENMASK(6, 0) /* AIC32X4_IFACE1 */ #define AIC32X4_IFACE1_DATATYPE_MASK GENMASK(7, 6) -- cgit v1.2.3 From 60fb4be565c9c44f6999aaa9d18808f1ac49d6ef Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:10 -0600 Subject: ASoC: tlv320aic32x4: Use snd_soc_update_bits() in aic32x4_set_dai_fmt() Make the code easier to read by using snd_soc_update_bits() over read/modify/write sequences. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index 63a52cdb7afe..fea019343c3b 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -614,16 +614,9 @@ static int aic32x4_set_dai_sysclk(struct snd_soc_dai *codec_dai, static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) { struct snd_soc_codec *codec = codec_dai->codec; - u8 iface_reg_1; - u8 iface_reg_2; - u8 iface_reg_3; - - iface_reg_1 = snd_soc_read(codec, AIC32X4_IFACE1); - iface_reg_1 = iface_reg_1 & ~(3 << 6 | 3 << 2); - iface_reg_2 = snd_soc_read(codec, AIC32X4_IFACE2); - iface_reg_2 = 0; - iface_reg_3 = snd_soc_read(codec, AIC32X4_IFACE3); - iface_reg_3 = iface_reg_3 & ~(1 << 3); + u8 iface_reg_1 = 0; + u8 iface_reg_2 = 0; + u8 iface_reg_3 = 0; /* set master/slave audio interface */ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { @@ -643,13 +636,13 @@ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) case SND_SOC_DAIFMT_DSP_A: iface_reg_1 |= (AIC32X4_DSP_MODE << AIC32X4_IFACE1_DATATYPE_SHIFT); - iface_reg_3 |= (1 << 3); /* invert bit clock */ + iface_reg_3 |= AIC32X4_BCLKINV_MASK; /* invert bit clock */ iface_reg_2 = 0x01; /* add offset 1 */ break; case SND_SOC_DAIFMT_DSP_B: iface_reg_1 |= (AIC32X4_DSP_MODE << AIC32X4_IFACE1_DATATYPE_SHIFT); - iface_reg_3 |= (1 << 3); /* invert bit clock */ + iface_reg_3 |= AIC32X4_BCLKINV_MASK; /* invert bit clock */ break; case SND_SOC_DAIFMT_RIGHT_J: iface_reg_1 |= (AIC32X4_RIGHT_JUSTIFIED_MODE << @@ -664,9 +657,14 @@ static int aic32x4_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return -EINVAL; } - snd_soc_write(codec, AIC32X4_IFACE1, iface_reg_1); - snd_soc_write(codec, AIC32X4_IFACE2, iface_reg_2); - snd_soc_write(codec, AIC32X4_IFACE3, iface_reg_3); + snd_soc_update_bits(codec, AIC32X4_IFACE1, + AIC32X4_IFACE1_DATATYPE_MASK | + AIC32X4_IFACE1_MASTER_MASK, iface_reg_1); + snd_soc_update_bits(codec, AIC32X4_IFACE2, + AIC32X4_DATA_OFFSET_MASK, iface_reg_2); + snd_soc_update_bits(codec, AIC32X4_IFACE3, + AIC32X4_BCLKINV_MASK, iface_reg_3); + return 0; } -- cgit v1.2.3 From 9245f647fc7b82b88587b2d1de02fae66d2d314e Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 12 Dec 2017 16:43:11 -0600 Subject: ASoC: tlv320aic32x4: Make driver selectable in Kconfig Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..7780dcf02f99 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -913,12 +913,12 @@ config SND_SOC_TLV320AIC32X4 tristate config SND_SOC_TLV320AIC32X4_I2C - tristate + tristate "Texas Instruments TLV320AIC32x4 audio CODECs - I2C" depends on I2C select SND_SOC_TLV320AIC32X4 config SND_SOC_TLV320AIC32X4_SPI - tristate + tristate "Texas Instruments TLV320AIC32x4 audio CODECs - SPI" depends on SPI_MASTER select SND_SOC_TLV320AIC32X4 -- cgit v1.2.3 From 4b4df570b41dbb421f52605357d5d56c872df6d9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 13 Dec 2017 00:44:26 -0800 Subject: drm: Update edid-derived drm_display_info fields at edid property set [v2] There are a set of values in the drm_display_info structure for each connector which hold information derived from EDID. These are computed in drm_add_display_info. Before this patch, that was only called in drm_add_edid_modes. This meant that they were only set when EDID was present and never reset when EDID was not, as happened when the display was disconnected. One of these fields, non_desktop, is used from drm_mode_connector_update_edid_property, the function responsible for assigning the new edid value to the application-visible property. Various drivers call these two functions (drm_add_edid_modes and drm_mode_connector_update_edid_property) in different orders. This means that even when EDID is present, the drm_display_info fields may not have been computed at the time that drm_mode_connector_update_edid_property used the non_desktop value to set the non_desktop property. I've added a public function (drm_reset_display_info) that resets the drm_display_info field values to default values and then made the drm_add_display_info function public. These two functions are now called directly from drm_mode_connector_update_edid_property so that the drm_display_info fields are always computed from the current EDID information before being used in that function. This means that the drm_display_info values are often computed twice, once when the EDID property it set and a second time when EDID is used to compute modes for the device. The alternative would be to uniformly ensure that the values were computed once before being used, which would require that all drivers reliably invoke the two paths in the same order. The computation is inexpensive enough that it seems more maintainable in the long term to simply compute them in both paths. The API to drm_add_display_info has been changed so that it no longer takes the set of edid-based quirks as a parameter. Rather, it now computes those quirks itself and returns them for further use by drm_add_edid_modes. This patch also includes a number of 'const' additions caused by drm_mode_connector_update_edid_property taking a 'const struct edid *' parameter and wanting to pass that along to drm_add_display_info. v2: after review by Daniel Vetter Removed EXPORT_SYMBOL_GPL for drm_reset_display_info and drm_add_display_info. Added FIXME in drm_mode_connector_update_edid_property about potentially merging that with drm_add_edid_modes to avoid the need for two driver calls. Signed-off-by: Keith Packard Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171213084427.31199-1-keithp@keithp.com (danvet: cherry picked from commit 12a889bf4bca ("drm: rework delayed connector cleanup in connector_iter") from drm-misc-next since functional conflict with changes in -next and we need to make sure both have the right version and nothing gets lost.) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_connector.c | 13 +++++++++++ drivers/gpu/drm/drm_edid.c | 52 ++++++++++++++++++++++++++++++----------- include/drm/drm_edid.h | 2 ++ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 482014137953..c4dfcbc861a1 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1231,6 +1231,19 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, if (edid) size = EDID_LENGTH * (1 + edid->extensions); + /* Set the display info, using edid if available, otherwise + * reseting the values to defaults. This duplicates the work + * done in drm_add_edid_modes, but that function is not + * consistently called before this one in all drivers and the + * computation is cheap enough that it seems better to + * duplicate it rather than attempt to ensure some arbitrary + * ordering of calls. + */ + if (edid) + drm_add_display_info(connector, edid); + else + drm_reset_display_info(connector); + drm_object_property_set_value(&connector->base, dev->mode_config.non_desktop_property, connector->display_info.non_desktop); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 5dfe14763871..cb487148359a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1731,7 +1731,7 @@ EXPORT_SYMBOL(drm_edid_duplicate); * * Returns true if @vendor is in @edid, false otherwise */ -static bool edid_vendor(struct edid *edid, const char *vendor) +static bool edid_vendor(const struct edid *edid, const char *vendor) { char edid_vendor[3]; @@ -1749,7 +1749,7 @@ static bool edid_vendor(struct edid *edid, const char *vendor) * * This tells subsequent routines what fixes they need to apply. */ -static u32 edid_get_quirks(struct edid *edid) +static u32 edid_get_quirks(const struct edid *edid) { const struct edid_quirk *quirk; int i; @@ -2813,7 +2813,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid, /* * Search EDID for CEA extension block. */ -static u8 *drm_find_edid_extension(struct edid *edid, int ext_id) +static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id) { u8 *edid_ext = NULL; int i; @@ -2835,12 +2835,12 @@ static u8 *drm_find_edid_extension(struct edid *edid, int ext_id) return edid_ext; } -static u8 *drm_find_cea_extension(struct edid *edid) +static u8 *drm_find_cea_extension(const struct edid *edid) { return drm_find_edid_extension(edid, CEA_EXT); } -static u8 *drm_find_displayid_extension(struct edid *edid) +static u8 *drm_find_displayid_extension(const struct edid *edid) { return drm_find_edid_extension(edid, DISPLAYID_EXT); } @@ -4363,7 +4363,7 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db) } static void drm_parse_cea_ext(struct drm_connector *connector, - struct edid *edid) + const struct edid *edid) { struct drm_display_info *info = &connector->display_info; const u8 *edid_ext; @@ -4397,11 +4397,33 @@ static void drm_parse_cea_ext(struct drm_connector *connector, } } -static void drm_add_display_info(struct drm_connector *connector, - struct edid *edid, u32 quirks) +/* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset + * all of the values which would have been set from EDID + */ +void +drm_reset_display_info(struct drm_connector *connector) { struct drm_display_info *info = &connector->display_info; + info->width_mm = 0; + info->height_mm = 0; + + info->bpc = 0; + info->color_formats = 0; + info->cea_rev = 0; + info->max_tmds_clock = 0; + info->dvi_dual = false; + + info->non_desktop = 0; +} +EXPORT_SYMBOL_GPL(drm_reset_display_info); + +u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid) +{ + struct drm_display_info *info = &connector->display_info; + + u32 quirks = edid_get_quirks(edid); + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; @@ -4414,11 +4436,13 @@ static void drm_add_display_info(struct drm_connector *connector, info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); + DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); + if (edid->revision < 3) - return; + return quirks; if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) - return; + return quirks; drm_parse_cea_ext(connector, edid); @@ -4438,7 +4462,7 @@ static void drm_add_display_info(struct drm_connector *connector, /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) - return; + return quirks; switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) { case DRM_EDID_DIGITAL_DEPTH_6: @@ -4473,7 +4497,9 @@ static void drm_add_display_info(struct drm_connector *connector, info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; + return quirks; } +EXPORT_SYMBOL_GPL(drm_add_display_info); static int validate_displayid(u8 *displayid, int length, int idx) { @@ -4627,14 +4653,12 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) return 0; } - quirks = edid_get_quirks(edid); - /* * CEA-861-F adds ycbcr capability map block, for HDMI 2.0 sinks. * To avoid multiple parsing of same block, lets parse that map * from sink info, before parsing CEA modes. */ - drm_add_display_info(connector, edid, quirks); + quirks = drm_add_display_info(connector, edid); /* * EDID spec says modes should be preferred in this order: diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 2ec41d032e56..efe6d5a8e834 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -465,6 +465,8 @@ struct edid *drm_get_edid(struct drm_connector *connector, struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, struct i2c_adapter *adapter); struct edid *drm_edid_duplicate(const struct edid *edid); +void drm_reset_display_info(struct drm_connector *connector); +u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid); int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid); u8 drm_match_cea_mode(const struct drm_display_mode *to_match); -- cgit v1.2.3 From 366d8216488319ed29308b977cd62b7964a779b7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 13 Dec 2017 09:21:59 +0100 Subject: s390/sclp: disable FORTIFY_SOURCE for early sclp code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michal Suchánek reported the following compile error with FORTIFY_SOURCE enabled: drivers/s390/char/sclp_early_core.o: In function `memcpy': include/linux/string.h:340: undefined reference to `fortify_panic' To fix this simply disable FORTIFY_SOURCE on the early sclp code as well, which I forgot on the initial commit. Fixes: 79962038dffa ("s390: add support for FORTIFY_SOURCE") Reported-by: Michal Suchánek Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 05ac6ba15a53..614b44e70a28 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) CFLAGS_sclp_early_core.o += -march=z900 endif +CFLAGS_sclp_early_core.o += -D__NO_FORTIFY + obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \ sclp_early.o sclp_early_core.o -- cgit v1.2.3 From 6b782f43d34974c7909306fd9af06241d658a1f7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Dec 2017 09:54:09 +0100 Subject: Revert "ravb: add workaround for clock when resuming with WoL enabled" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit fbf3d034f2ff6264183cfa6845770e8cc2a986c8. As of commit 560869100b99a3da ("clk: renesas: cpg-mssr: Restore module clocks during resume"), the workaround is no longer needed. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 2b962d349f5f..009780df664b 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2308,32 +2308,9 @@ static int __maybe_unused ravb_resume(struct device *dev) struct ravb_private *priv = netdev_priv(ndev); int ret = 0; - if (priv->wol_enabled) { - /* Reduce the usecount of the clock to zero and then - * restore it to its original value. This is done to force - * the clock to be re-enabled which is a workaround - * for renesas-cpg-mssr driver which do not enable clocks - * when resuming from PSCI suspend/resume. - * - * Without this workaround the driver fails to communicate - * with the hardware if WoL was enabled when the system - * entered PSCI suspend. This is due to that if WoL is enabled - * we explicitly keep the clock from being turned off when - * suspending, but in PSCI sleep power is cut so the clock - * is disabled anyhow, the clock driver is not aware of this - * so the clock is not turned back on when resuming. - * - * TODO: once the renesas-cpg-mssr suspend/resume is working - * this clock dance should be removed. - */ - clk_disable(priv->clk); - clk_disable(priv->clk); - clk_enable(priv->clk); - clk_enable(priv->clk); - - /* Set reset mode to rearm the WoL logic */ + /* If WoL is enabled set reset mode to rearm the WoL logic */ + if (priv->wol_enabled) ravb_write(ndev, CCC_OPC_RESET, CCC); - } /* All register have been reset to default values. * Restore all registers which where setup at probe time and -- cgit v1.2.3 From 72465736adf2aade263a9475a1d42007fd49e703 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 12 Dec 2017 16:56:43 +0000 Subject: regmap: Disable debugfs when locking is disabled The recently added support for disabling the regmap internal locking left debugfs enabled for devices with the locking disabled. This is a problem since debugfs allows userspace to do things like initiate reads from the hardware which will use the scratch buffers protected by the regmap locking so could cause data corruption. For safety address this by just disabling debugfs for these devices. That is overly conservative since some of the debugfs files just read internal data structures but it's much simpler to implmement and less likely to lead to problems with tooling that works with debugfs. Reported-by: Lars-Peter Clausen Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 8 ++++++++ drivers/base/regmap/regmap-debugfs.c | 3 +++ drivers/base/regmap/regmap.c | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index 8641183cac2f..53785e0e297a 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -77,6 +77,7 @@ struct regmap { int async_ret; #ifdef CONFIG_DEBUG_FS + bool debugfs_disable; struct dentry *debugfs; const char *debugfs_name; @@ -215,10 +216,17 @@ struct regmap_field { extern void regmap_debugfs_initcall(void); extern void regmap_debugfs_init(struct regmap *map, const char *name); extern void regmap_debugfs_exit(struct regmap *map); + +static inline void regmap_debugfs_disable(struct regmap *map) +{ + map->debugfs_disable = true; +} + #else static inline void regmap_debugfs_initcall(void) { } static inline void regmap_debugfs_init(struct regmap *map, const char *name) { } static inline void regmap_debugfs_exit(struct regmap *map) { } +static inline void regmap_debugfs_disable(struct regmap *map) { } #endif /* regcache core declarations */ diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 36ce3511c733..c8ecefd75d6f 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -529,6 +529,9 @@ void regmap_debugfs_init(struct regmap *map, const char *name) struct regmap_range_node *range_node; const char *devname = "dummy"; + if (map->debugfs_disable) + return; + /* If we don't have the debugfs root yet, postpone init */ if (!regmap_debugfs_root) { struct regmap_debugfs_node *node; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 54b1aa371c61..df9ca36753ff 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -676,6 +676,7 @@ struct regmap *__regmap_init(struct device *dev, if (config->disable_locking) { map->lock = map->unlock = regmap_lock_unlock_none; + regmap_debugfs_disable(map); } else if (config->lock && config->unlock) { map->lock = config->lock; map->unlock = config->unlock; -- cgit v1.2.3 From 8253bb3f82554cedb830a4cb65c84796df129c81 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 13 Dec 2017 17:25:31 +0100 Subject: regmap: potentially duplicate the name string stored in regmap Currently we just copy over the pointer passed to regmap_init() in the regmap config struct. To be on the safe side: duplicate the string with kstrdup_const() so that if an unaware user passes an address to a stack-allocated buffer, we won't crash. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 496da7bc5e77..84b5784e171b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -672,6 +672,14 @@ struct regmap *__regmap_init(struct device *dev, goto err; } + if (config->name) { + map->name = kstrdup_const(config->name, GFP_KERNEL); + if (!map->name) { + ret = -ENOMEM; + goto err_map; + } + } + if (config->disable_locking) { map->lock = map->unlock = regmap_lock_unlock_none; regmap_debugfs_disable(map); @@ -683,7 +691,7 @@ struct regmap *__regmap_init(struct device *dev, map->hwlock = hwspin_lock_request_specific(config->hwlock_id); if (!map->hwlock) { ret = -ENXIO; - goto err_map; + goto err_name; } switch (config->hwlock_mode) { @@ -763,7 +771,6 @@ struct regmap *__regmap_init(struct device *dev, map->volatile_reg = config->volatile_reg; map->precious_reg = config->precious_reg; map->cache_type = config->cache_type; - map->name = config->name; spin_lock_init(&map->async_lock); INIT_LIST_HEAD(&map->async_list); @@ -1119,6 +1126,8 @@ err_range: err_hwlock: if (map->hwlock) hwspin_lock_free(map->hwlock); +err_name: + kfree_const(map->name); err_map: kfree(map); err: @@ -1308,6 +1317,7 @@ void regmap_exit(struct regmap *map) } if (map->hwlock) hwspin_lock_free(map->hwlock); + kfree_const(map->name); kfree(map); } EXPORT_SYMBOL_GPL(regmap_exit); -- cgit v1.2.3 From 9d98e19ba08f6aa33a4a1414f3dfe8440e67530c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 13 Dec 2017 12:25:19 +0200 Subject: IB/ipoib: Restore MM behavior in case of tx_ring allocation failure memalloc_noio_save modifies the behavior of MM, we must restore it after we are done. Fixes: d83187dda9b9 ("IB/IPoIB: Convert IPoIB to memalloc_noio_* calls") Signed-off-by: Yuval Shaia Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 87f4bd99cdf7..2c13123bfd69 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1145,6 +1145,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, noio_flag = memalloc_noio_save(); p->tx_ring = vzalloc(ipoib_sendq_size * sizeof(*p->tx_ring)); if (!p->tx_ring) { + memalloc_noio_restore(noio_flag); ret = -ENOMEM; goto err_tx; } -- cgit v1.2.3 From b9b312a7a451e9c098921856e7cfbc201120e1a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Dec 2017 07:03:38 -0800 Subject: ipv6: mcast: better catch silly mtu values syzkaller reported crashes in IPv6 stack [1] Xin Long found that lo MTU was set to silly values. IPv6 stack reacts to changes to small MTU, by disabling itself under RTNL. But there is a window where threads not using RTNL can see a wrong device mtu. This can lead to surprises, in mld code where it is assumed the mtu is suitable. Fix this by reading device mtu once and checking IPv6 minimal MTU. [1] skbuff: skb_over_panic: text:0000000010b86b8d len:196 put:20 head:000000003b477e60 data:000000000e85441e tail:0xd4 end:0xc0 dev:lo ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc2-mm1+ #39 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_panic+0x15c/0x1f0 net/core/skbuff.c:100 RSP: 0018:ffff8801db307508 EFLAGS: 00010286 RAX: 0000000000000082 RBX: ffff8801c517e840 RCX: 0000000000000000 RDX: 0000000000000082 RSI: 1ffff1003b660e61 RDI: ffffed003b660e95 RBP: ffff8801db307570 R08: 1ffff1003b660e23 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff85bd4020 R13: ffffffff84754ed2 R14: 0000000000000014 R15: ffff8801c4e26540 FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000463610 CR3: 00000001c6698000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_over_panic net/core/skbuff.c:109 [inline] skb_put+0x181/0x1c0 net/core/skbuff.c:1694 add_grhead.isra.24+0x42/0x3b0 net/ipv6/mcast.c:1695 add_grec+0xa55/0x1060 net/ipv6/mcast.c:1817 mld_send_cr net/ipv6/mcast.c:1903 [inline] mld_ifc_timer_expire+0x4d2/0x770 net/ipv6/mcast.c:2448 call_timer_fn+0x23b/0x840 kernel/time/timer.c:1320 expire_timers kernel/time/timer.c:1357 [inline] __run_timers+0x7e1/0xb60 kernel/time/timer.c:1660 run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686 __do_softirq+0x29d/0xbb2 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1d3/0x210 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:540 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:920 Signed-off-by: Eric Dumazet Reported-by: syzbot Tested-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fc6d7d143f2c..844642682b83 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1682,16 +1682,16 @@ static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, struct mld2_grec **ppgr) + int type, struct mld2_grec **ppgr, unsigned int mtu) { - struct net_device *dev = pmc->idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr; - if (!skb) - skb = mld_newpack(pmc->idev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = mld_newpack(pmc->idev, mtu); + if (!skb) + return NULL; + } pgr = skb_put(skb, sizeof(struct mld2_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -1714,10 +1714,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV6_MIN_MTU) + return skb; + isquery = type == MLD2_MODE_IS_INCLUDE || type == MLD2_MODE_IS_EXCLUDE; truncate = type == MLD2_MODE_IS_EXCLUDE || @@ -1738,7 +1743,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); } } first = 1; @@ -1774,12 +1779,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(idev, dev->mtu); + skb = mld_newpack(idev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -1814,7 +1819,7 @@ empty_source: mld_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) -- cgit v1.2.3 From b5476022bbada3764609368f03329ca287528dc8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Dec 2017 07:17:39 -0800 Subject: ipv4: igmp: guard against silly MTU values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPv4 stack reacts to changes to small MTU, by disabling itself under RTNL. But there is a window where threads not using RTNL can see a wrong device mtu. This can lead to surprises, in igmp code where it is assumed the mtu is suitable. Fix this by reading device mtu once and checking IPv4 minimal MTU. This patch adds missing IPV4_MIN_MTU define, to not abuse ETH_MIN_MTU anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 1 + net/ipv4/devinet.c | 2 +- net/ipv4/igmp.c | 24 +++++++++++++++--------- net/ipv4/ip_tunnel.c | 4 ++-- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 9896f46cbbf1..af8addbaa3c1 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -34,6 +34,7 @@ #include #define IPV4_MAX_PMTU 65535U /* RFC 2675, Section 5.1 */ +#define IPV4_MIN_MTU 68 /* RFC 791 */ struct sock; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a4573bccd6da..7a93359fbc72 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1428,7 +1428,7 @@ skip: static bool inetdev_valid_mtu(unsigned int mtu) { - return mtu >= 68; + return mtu >= IPV4_MIN_MTU; } static void inetdev_send_gratuitous_arp(struct net_device *dev, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d1f8f302dbf3..50448a220a1f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -404,16 +404,17 @@ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) } static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, - int type, struct igmpv3_grec **ppgr) + int type, struct igmpv3_grec **ppgr, unsigned int mtu) { struct net_device *dev = pmc->interface->dev; struct igmpv3_report *pih; struct igmpv3_grec *pgr; - if (!skb) - skb = igmpv3_newpack(dev, dev->mtu); - if (!skb) - return NULL; + if (!skb) { + skb = igmpv3_newpack(dev, mtu); + if (!skb) + return NULL; + } pgr = skb_put(skb, sizeof(struct igmpv3_grec)); pgr->grec_type = type; pgr->grec_auxwords = 0; @@ -436,12 +437,17 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, struct igmpv3_grec *pgr = NULL; struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list; int scount, stotal, first, isquery, truncate; + unsigned int mtu; if (pmc->multiaddr == IGMP_ALL_HOSTS) return skb; if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports) return skb; + mtu = READ_ONCE(dev->mtu); + if (mtu < IPV4_MIN_MTU) + return skb; + isquery = type == IGMPV3_MODE_IS_INCLUDE || type == IGMPV3_MODE_IS_EXCLUDE; truncate = type == IGMPV3_MODE_IS_EXCLUDE || @@ -462,7 +468,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); } } first = 1; @@ -498,12 +504,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, pgr->grec_nsrcs = htons(scount); if (skb) igmpv3_sendpack(skb); - skb = igmpv3_newpack(dev, dev->mtu); + skb = igmpv3_newpack(dev, mtu); first = 1; scount = 0; } if (first) { - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); first = 0; } if (!skb) @@ -538,7 +544,7 @@ empty_source: igmpv3_sendpack(skb); skb = NULL; /* add_grhead will get a new one */ } - skb = add_grhead(skb, pmc, type, &pgr); + skb = add_grhead(skb, pmc, type, &pgr, mtu); } } if (pgr) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fe6fee728ce4..5ddb1cb52bd4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -349,8 +349,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev) dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); - if (mtu < 68) - mtu = 68; + if (mtu < IPV4_MIN_MTU) + mtu = IPV4_MIN_MTU; return mtu; } -- cgit v1.2.3 From 83593010d3b87601e775f240ce46c53ddf25828d Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Mon, 11 Dec 2017 22:09:46 +0530 Subject: net: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/core/netprio_cgroup.c | 1 - net/dsa/slave.c | 1 - net/netfilter/nf_conntrack_netlink.c | 1 - net/sched/act_meta_mark.c | 1 - net/sched/act_meta_skbtcindex.c | 1 - net/sched/cls_api.c | 1 - net/sched/cls_u32.c | 1 - 7 files changed, 7 deletions(-) diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 1c4810919a0a..b9057478d69c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d6e7a642493b..a95a55f79137 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59c08997bfdf..332b51870ed7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #ifdef CONFIG_NF_NAT_NEEDED #include diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 1e3f10e5da99..6445184b2759 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -22,7 +22,6 @@ #include #include #include -#include static int skbmark_encode(struct sk_buff *skb, void *skbdata, struct tcf_meta_info *e) diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 2ea1f26c9e96..7221437ca3a6 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -22,7 +22,6 @@ #include #include #include -#include static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, struct tcf_meta_info *e) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ddcf04b4ab43..f40256a3e7f0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ac152b4f4247..507859cdd1cb 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -45,7 +45,6 @@ #include #include #include -#include #include struct tc_u_knode { -- cgit v1.2.3 From c545a945d0d9ea2ea2c7d23d43cf0d86e32cd7cf Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 11 Dec 2017 19:11:55 +0100 Subject: tipc: eliminate potential memory leak In the function tipc_sk_mcast_rcv() we call refcount_dec(&skb->users) on received sk_buffers. Since the reference counter might hit zero at this point, we have a potential memory leak. We fix this by replacing refcount_dec() with kfree_skb(). Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..41127d0b925e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1140,7 +1140,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, __skb_dequeue(arrvq); __skb_queue_tail(inputq, skb); } - refcount_dec(&skb->users); + kfree_skb(skb); spin_unlock_bh(&inputq->lock); continue; } -- cgit v1.2.3 From a46182b00290839fa3fa159d54fd3237bd8669f0 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Mon, 11 Dec 2017 11:13:45 -0800 Subject: net: igmp: Use correct source address on IGMPv3 reports Closing a multicast socket after the final IPv4 address is deleted from an interface can generate a membership report that uses the source IP from a different interface. The following test script, run from an isolated netns, reproduces the issue: #!/bin/bash ip link add dummy0 type dummy ip link add dummy1 type dummy ip link set dummy0 up ip link set dummy1 up ip addr add 10.1.1.1/24 dev dummy0 ip addr add 192.168.99.99/24 dev dummy1 tcpdump -U -i dummy0 & socat EXEC:"sleep 2" \ UDP4-DATAGRAM:239.101.1.68:8889,ip-add-membership=239.0.1.68:10.1.1.1 & sleep 1 ip addr del 10.1.1.1/24 dev dummy0 sleep 5 kill %tcpdump RFC 3376 specifies that the report must be sent with a valid IP source address from the destination subnet, or from address 0.0.0.0. Add an extra check to make sure this is the case. Signed-off-by: Kevin Cernekee Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 50448a220a1f..726f6b608274 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,23 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } +/* source address selection per RFC 3376 section 4.2.13 */ +static __be32 igmpv3_get_srcaddr(struct net_device *dev, + const struct flowi4 *fl4) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (!in_dev) + return htonl(INADDR_ANY); + + for_ifa(in_dev) { + if (inet_ifa_match(fl4->saddr, ifa)) + return fl4->saddr; + } endfor_ifa(in_dev); + + return htonl(INADDR_ANY); +} + static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; @@ -368,7 +386,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) pip->frag_off = htons(IP_DF); pip->ttl = 1; pip->daddr = fl4.daddr; - pip->saddr = fl4.saddr; + pip->saddr = igmpv3_get_srcaddr(dev, &fl4); pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ ip_select_ident(net, skb, NULL); -- cgit v1.2.3 From aceef61ee56898cfa7b6960fb60b9326c3860441 Mon Sep 17 00:00:00 2001 From: Sebastian Sjoholm Date: Mon, 11 Dec 2017 21:51:14 +0100 Subject: net: qmi_wwan: add Sierra EM7565 1199:9091 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sierra Wireless EM7565 is an Qualcomm MDM9x50 based M.2 modem. The USB id is added to qmi_wwan.c to allow QMI communication with the EM7565. Signed-off-by: Sebastian Sjoholm Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 304ec6555cd8..d2ca5a202e8d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1204,6 +1204,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ {QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */ + {QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ -- cgit v1.2.3 From 2a9ee696c72a24d63529c76483fcd92d04b1d2b7 Mon Sep 17 00:00:00 2001 From: Branislav Radocaj Date: Tue, 12 Dec 2017 00:13:38 +0100 Subject: net: ethernet: arc: fix error handling in emac_rockchip_probe If clk_set_rate() fails, we should disable clk before return. Found by Linux Driver Verification project (linuxtesting.org). Changes since v2 [1]: * Merged with latest code changes Changes since v1: Update made thanks to David's review, much appreciated David. * Improved inconsistent failure handling of clock rate setting * For completeness of usecase, added arc_emac_probe error handling Signed-off-by: Branislav Radocaj Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_rockchip.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index c6163874e4e7..16f9bee992fe 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -199,9 +199,11 @@ static int emac_rockchip_probe(struct platform_device *pdev) /* RMII interface needs always a rate of 50MHz */ err = clk_set_rate(priv->refclk, 50000000); - if (err) + if (err) { dev_err(dev, "failed to change reference clock rate (%d)\n", err); + goto out_regulator_disable; + } if (priv->soc_data->need_div_macclk) { priv->macclk = devm_clk_get(dev, "macclk"); @@ -230,12 +232,14 @@ static int emac_rockchip_probe(struct platform_device *pdev) err = arc_emac_probe(ndev, interface); if (err) { dev_err(dev, "failed to probe arc emac (%d)\n", err); - goto out_regulator_disable; + goto out_clk_disable_macclk; } return 0; + out_clk_disable_macclk: - clk_disable_unprepare(priv->macclk); + if (priv->soc_data->need_div_macclk) + clk_disable_unprepare(priv->macclk); out_regulator_disable: if (priv->regulator) regulator_disable(priv->regulator); -- cgit v1.2.3 From 16eab16ef9382704ff12c68ef52d62c68219cbb9 Mon Sep 17 00:00:00 2001 From: Takuo Koguchi Date: Thu, 7 Dec 2017 19:11:41 +0900 Subject: spi: jcore: disable clock when registering spi conroller failed When probe function fails in registering the spi controller, the clock should remain disabled. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Takuo Koguchi Signed-off-by: Mark Brown --- drivers/spi/spi-jcore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c index cebfea5faa4b..dafed6280df3 100644 --- a/drivers/spi/spi-jcore.c +++ b/drivers/spi/spi-jcore.c @@ -198,8 +198,10 @@ static int jcore_spi_probe(struct platform_device *pdev) /* Register our spi controller */ err = devm_spi_register_master(&pdev->dev, master); - if (err) + if (err) { + clk_disable(clk); goto exit; + } return 0; -- cgit v1.2.3 From 6e266610eb6553cfb7e7eb5d11914bd01509c406 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 12 Dec 2017 16:49:52 +0800 Subject: hippi: Fix a Fix a possible sleep-in-atomic bug in rr_close The driver may sleep under a spinlock. The function call path is: rr_close (acquire the spinlock) free_irq --> may sleep To fix it, free_irq is moved to the place without holding the spinlock. This bug is found by my static analysis tool(DSAC) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 8483f03d5a41..1ab97d99b9ba 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1379,8 +1379,8 @@ static int rr_close(struct net_device *dev) rrpriv->info_dma); rrpriv->info = NULL; - free_irq(pdev->irq, dev); spin_unlock_irqrestore(&rrpriv->lock, flags); + free_irq(pdev->irq, dev); return 0; } -- cgit v1.2.3 From 2e51a8dc7fdc9d06c52a0a0e442cc813357ea44d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Dec 2017 09:29:46 +0000 Subject: net: dsa: allow XAUI phy interface mode XGMII is a 32-bit bus plus two clock signals per direction. XAUI is four serial lanes per direction. The 88e6190 supports XAUI but not XGMII as it doesn't have enough pins. The same is true of 88e6176. Match on PHY_INTERFACE_MODE_XAUI for the XAUI port type, but keep accepting XGMII for backwards compatibility. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/port.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index a7801f6668a5..6315774d72b3 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -338,6 +338,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port, cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX; break; case PHY_INTERFACE_MODE_XGMII: + case PHY_INTERFACE_MODE_XAUI: cmode = MV88E6XXX_PORT_STS_CMODE_XAUI; break; case PHY_INTERFACE_MODE_RXAUI: -- cgit v1.2.3 From cd8165c3d5fb07667328434835f2968a87caee67 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Dec 2017 09:29:51 +0000 Subject: ARM: dts: vf610-zii-dev: use XAUI for DSA link ports Use XAUI rather than XGMII for DSA link ports, as this is the interface mode that the switches actually use. XAUI is the 4 lane bus with clock per direction, whereas XGMII is a 32 bit bus with clock. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- arch/arm/boot/dts/vf610-zii-dev-rev-c.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts index 02a6227c717c..15a685dc2aa2 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts @@ -121,7 +121,7 @@ switch0port10: port@10 { reg = <10>; label = "dsa"; - phy-mode = "xgmii"; + phy-mode = "xaui"; link = <&switch1port10>; }; }; @@ -208,7 +208,7 @@ switch1port10: port@10 { reg = <10>; label = "dsa"; - phy-mode = "xgmii"; + phy-mode = "xaui"; link = <&switch0port10>; }; }; -- cgit v1.2.3 From f5e64032a799d4f54decc7eb6aafcdffb67f9ad9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Dec 2017 10:45:36 +0000 Subject: net: phy: fix resume handling When a PHY has the BMCR_PDOWN bit set, it may decide to ignore writes to other registers, or reset the registers to power-on defaults. Micrel PHYs do this for their interrupt registers. The current structure of phylib tries to enable interrupts before resuming (and releasing) the BMCR_PDOWN bit. This fails, causing Micrel PHYs to stop working after a suspend/resume sequence if they are using interrupts. Fix this by ensuring that the PHY driver resume methods do not take the phydev->lock mutex themselves, but the callers of phy_resume() take that lock. This then allows us to move the call to phy_resume() before we enable interrupts in phy_start(). Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 4 ---- drivers/net/phy/phy.c | 9 +++------ drivers/net/phy/phy_device.c | 10 ++++++---- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5f93e6add563..e911e4990b20 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -239,14 +239,10 @@ static int at803x_resume(struct phy_device *phydev) { int value; - mutex_lock(&phydev->lock); - value = phy_read(phydev, MII_BMCR); value &= ~(BMCR_PDOWN | BMCR_ISOLATE); phy_write(phydev, MII_BMCR, value); - mutex_unlock(&phydev->lock); - return 0; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 2b1e67bc1e73..ed10d1fc8f59 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -828,7 +828,6 @@ EXPORT_SYMBOL(phy_stop); */ void phy_start(struct phy_device *phydev) { - bool do_resume = false; int err = 0; mutex_lock(&phydev->lock); @@ -841,6 +840,9 @@ void phy_start(struct phy_device *phydev) phydev->state = PHY_UP; break; case PHY_HALTED: + /* if phy was suspended, bring the physical link up again */ + phy_resume(phydev); + /* make sure interrupts are re-enabled for the PHY */ if (phydev->irq != PHY_POLL) { err = phy_enable_interrupts(phydev); @@ -849,17 +851,12 @@ void phy_start(struct phy_device *phydev) } phydev->state = PHY_RESUMING; - do_resume = true; break; default: break; } mutex_unlock(&phydev->lock); - /* if phy was suspended, bring the physical link up again */ - if (do_resume) - phy_resume(phydev); - phy_trigger_machine(phydev, true); } EXPORT_SYMBOL(phy_start); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 67f25ac29025..b15b31ca2618 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -135,7 +135,9 @@ static int mdio_bus_phy_resume(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) goto no_resume; + mutex_lock(&phydev->lock); ret = phy_resume(phydev); + mutex_unlock(&phydev->lock); if (ret < 0) return ret; @@ -1026,7 +1028,9 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (err) goto error; + mutex_lock(&phydev->lock); phy_resume(phydev); + mutex_unlock(&phydev->lock); phy_led_triggers_register(phydev); return err; @@ -1157,6 +1161,8 @@ int phy_resume(struct phy_device *phydev) struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); int ret = 0; + WARN_ON(!mutex_is_locked(&phydev->lock)); + if (phydev->drv && phydrv->resume) ret = phydrv->resume(phydev); @@ -1639,13 +1645,9 @@ int genphy_resume(struct phy_device *phydev) { int value; - mutex_lock(&phydev->lock); - value = phy_read(phydev, MII_BMCR); phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN); - mutex_unlock(&phydev->lock); - return 0; } EXPORT_SYMBOL(genphy_resume); -- cgit v1.2.3 From 94a5ef1b77da4674a6bc1d3de3051b758859d106 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Dec 2017 10:49:15 +0000 Subject: of_mdio / mdiobus: ensure mdio devices have fwnode correctly populated Ensure that all mdio devices populate the struct device fwnode pointer as well as the of_node pointer to allow drivers that wish to use fwnode APIs to work. Signed-off-by: Russell King Reviewed-by: Rob Herring Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 1 + drivers/of/of_mdio.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 2df7b62c1a36..54d00a1d2bef 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -270,6 +270,7 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus, if (addr == mdiodev->addr) { dev->of_node = child; + dev->fwnode = of_fwnode_handle(child); return; } } diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 98258583abb0..3481e69738b5 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -81,6 +81,7 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio, * can be looked up later */ of_node_get(child); phy->mdio.dev.of_node = child; + phy->mdio.dev.fwnode = of_fwnode_handle(child); /* All data is now stored in the phy struct; * register it */ @@ -111,6 +112,7 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, */ of_node_get(child); mdiodev->dev.of_node = child; + mdiodev->dev.fwnode = of_fwnode_handle(child); /* All data is now stored in the mdiodev struct; register it. */ rc = mdio_device_register(mdiodev); @@ -206,6 +208,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) mdio->phy_mask = ~0; mdio->dev.of_node = np; + mdio->dev.fwnode = of_fwnode_handle(np); /* Get bus level PHY reset GPIO details */ mdio->reset_delay_us = DEFAULT_GPIO_RESET_DELAY; -- cgit v1.2.3 From 3b3397e2031564db07022e99f04d4b9f3df6fced Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 Dec 2017 13:03:11 +0000 Subject: net: phy: meson-gxl: make function meson_gxl_read_status static The function meson_gxl_read_status is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'meson_gxl_read_status' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/meson-gxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index 700007dd4be5..842eb871a6e3 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -67,7 +67,7 @@ static int meson_gxl_config_init(struct phy_device *phydev) * When this failure happens, the first retry is usually successful but, * in some cases, it may take up to 6 retries to get a decent result */ -int meson_gxl_read_status(struct phy_device *phydev) +static int meson_gxl_read_status(struct phy_device *phydev) { int ret, wol, lpa, exp; -- cgit v1.2.3 From c009cb842fcc0f84536a9d2692e6f063af5ac5c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 12 Dec 2017 10:30:29 -0800 Subject: skge: remove redundunt free_irq under spinlock The code to handle multi-port SKGE boards was freeing IRQ twice. The first one was under lock and might sleep. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 6e423f098a60..31efc47c847e 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -4081,7 +4081,6 @@ static void skge_remove(struct pci_dev *pdev) if (hw->ports > 1) { skge_write32(hw, B0_IMSK, 0); skge_read32(hw, B0_IMSK); - free_irq(pdev->irq, hw); } spin_unlock_irq(&hw->hw_lock); -- cgit v1.2.3 From 9ee11bd03cb1a5c3ca33c2bb70e7ed325f68890f Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 12 Dec 2017 16:28:58 -0800 Subject: tcp: fix potential underestimation on rcv_rtt When ms timestamp is used, current logic uses 1us in tcp_rcv_rtt_update() when the real rcv_rtt is within 1 - 999us. This could cause rcv_rtt underestimation. Fix it by always using a min value of 1ms if ms timestamp is used. Fixes: 645f4c6f2ebd ("tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps") Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9550cc42de2d..45f750e85714 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -508,9 +508,6 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) u32 new_sample = tp->rcv_rtt_est.rtt_us; long m = sample; - if (m == 0) - m = 1; - if (new_sample != 0) { /* If we sample in larger samples in the non-timestamp * case, we could grossly overestimate the RTT especially @@ -547,6 +544,8 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time); + if (!delta_us) + delta_us = 1; tcp_rcv_rtt_update(tp, delta_us, 1); new_measure: @@ -563,8 +562,11 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + u32 delta_us; + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); tcp_rcv_rtt_update(tp, delta_us, 0); } } -- cgit v1.2.3 From 4688eb7cf3ae2c2721d1dacff5c1384cba47d176 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2017 18:22:52 -0800 Subject: tcp: refresh tcp_mstamp from timers callbacks Only the retransmit timer currently refreshes tcp_mstamp We should do the same for delayed acks and keepalives. Even if RFC 7323 does not request it, this is consistent to what linux did in the past, when TS values were based on jiffies. Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Mike Maloney Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Acked-by: Mike Maloney Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 16df6dd44b98..968fda198376 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -264,6 +264,7 @@ void tcp_delack_timer_handler(struct sock *sk) icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.ato = TCP_ATO_MIN; } + tcp_mstamp_refresh(tcp_sk(sk)); tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } @@ -632,6 +633,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } + tcp_mstamp_refresh(tp); if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (tp->linger2 >= 0) { const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; -- cgit v1.2.3 From 53c64870d03edfa5c554ac2f750c5d6b38e3680a Mon Sep 17 00:00:00 2001 From: Jie Deng Date: Wed, 13 Dec 2017 12:04:12 +0800 Subject: dwc-xlgmac: Add co-maintainer Jose Abreu will join to maintain dwc-xlgmac. He will help with new feature development for this driver. Thanks Jose and welcome on board! Signed-off-by: Jie Deng Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9e0045e3ee0c..51497dc05333 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13117,6 +13117,7 @@ F: drivers/dma/dw/ SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER M: Jie Deng +M: Jose Abreu L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/synopsys/ -- cgit v1.2.3 From de9c4e06bbe872d725f306e34f3eea21155488e2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 13 Dec 2017 09:22:03 +0000 Subject: net: phy: marvell: avoid configuring fiber page for SGMII-to-Copper When in SGMII-to-Copper mode, the fiber page is used for the MAC facing link, and does not require configuration of the fiber auto-negotiation settings. Avoid trying. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 4d02b27df044..b5a8f750e433 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -637,6 +637,10 @@ static int m88e1510_config_aneg(struct phy_device *phydev) if (err < 0) goto error; + /* Do not touch the fiber page if we're in copper->sgmii mode */ + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) + return 0; + /* Then the fiber link */ err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) -- cgit v1.2.3 From 78034f5fdd622520eb843301cf35ce6c626543a7 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 13 Dec 2017 18:12:09 +0200 Subject: net/mlx4_en: Fix selftest for small MTUs Set the minimal MTU threshold for running loopback selftest. MTU should be big enough to include packet payload, NET_IP_ALIGN, Ethernet headers and preamble length. Fixes: e7c1c2c46201 ("mlx4_en: Added self diagnostics test implementation") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_selftest.c | 2 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 88699b181946..946d9db7c8c2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -185,7 +185,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) if (priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) { buf[3] = mlx4_en_test_registers(priv); - if (priv->port_up) + if (priv->port_up && dev->mtu >= MLX4_SELFTEST_LB_MIN_MTU) buf[4] = mlx4_en_test_loopback(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 1856e279a7e0..2b72677eccd4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -153,6 +153,9 @@ #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128 - NET_IP_ALIGN) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) +#define PREAMBLE_LEN 8 +#define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \ + ETH_HLEN + PREAMBLE_LEN) #define MLX4_EN_MIN_MTU 46 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple -- cgit v1.2.3 From 0bb9fc4f5429ac970181c073aa32e521e20f7b73 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 Dec 2017 18:12:10 +0200 Subject: net/mlx4_core: Fix wrong calculation of free counters The field res_free indicates the total number of counters which are available for allocation (reserved and unreserved). Fixed a bug where the reserved counters were subtracted from res_free before any allocation was performed. Before this fix, free counters which were not reserved could not be allocated. Fixes: 9de92c60beaa ("net/mlx4_core: Adjust counter grant policy in the resource tracker") Signed-off-by: Eran Ben Elisha Reviewed-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 04304dd894c6..606a0e0beeae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -611,7 +611,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) MLX4_MAX_PORTS; else res_alloc->guaranteed[t] = 0; - res_alloc->res_free -= res_alloc->guaranteed[t]; break; default: break; -- cgit v1.2.3 From 5a1647c391ba543a77a400dddf89053ec5c2b7a4 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 Dec 2017 18:12:11 +0200 Subject: net/mlx4_en: Fill all counters under one call of stats lock Before this patch, the stats_lock was acquired twice. In between the locks Driver sent command to gather some more statistics (per priority and counter statistics). If the stats lock was acquired by get statistics NDO in between we would have report out of sync counters. Fix this by collecting all stats from Firmware in advance and then fill the Software structs under one lock. Fixes: 0b131561a7d6 ("net/mlx4_en: Add Flow control statistics display via ethtool") Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_port.c | 57 +++++++++++++++------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index e0eb695318e6..1fa4849a6f56 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -188,7 +188,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) struct net_device *dev = mdev->pndev[port]; struct mlx4_en_priv *priv = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; - struct mlx4_cmd_mailbox *mailbox; + struct mlx4_cmd_mailbox *mailbox, *mailbox_priority; u64 in_mod = reset << 8 | port; int err; int i, counter_index; @@ -198,6 +198,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + + mailbox_priority = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox_priority)) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return PTR_ERR(mailbox_priority); + } + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -206,6 +213,28 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) mlx4_en_stats = mailbox->buf; + memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats)); + counter_index = mlx4_get_default_counter_index(mdev->dev, port); + err = mlx4_get_counter_stats(mdev->dev, counter_index, + &tmp_counter_stats, reset); + + /* 0xffs indicates invalid value */ + memset(mailbox_priority->buf, 0xff, + sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { + memset(mailbox_priority->buf, 0, + sizeof(*flowstats) * MLX4_NUM_PRIORITIES); + err = mlx4_cmd_box(mdev->dev, 0, mailbox_priority->dma, + in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, + 0, MLX4_CMD_DUMP_ETH_STATS, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + } + + flowstats = mailbox_priority->buf; + spin_lock_bh(&priv->stats_lock); mlx4_en_fold_software_stats(dev); @@ -345,31 +374,6 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->pkstats.tx_prio[8][0] = be64_to_cpu(mlx4_en_stats->TTOT_novlan); priv->pkstats.tx_prio[8][1] = be64_to_cpu(mlx4_en_stats->TOCT_novlan); - spin_unlock_bh(&priv->stats_lock); - - memset(&tmp_counter_stats, 0, sizeof(tmp_counter_stats)); - counter_index = mlx4_get_default_counter_index(mdev->dev, port); - err = mlx4_get_counter_stats(mdev->dev, counter_index, - &tmp_counter_stats, reset); - - /* 0xffs indicates invalid value */ - memset(mailbox->buf, 0xff, sizeof(*flowstats) * MLX4_NUM_PRIORITIES); - - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN) { - memset(mailbox->buf, 0, - sizeof(*flowstats) * MLX4_NUM_PRIORITIES); - err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, - in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, - 0, MLX4_CMD_DUMP_ETH_STATS, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - if (err) - goto out; - } - - flowstats = mailbox->buf; - - spin_lock_bh(&priv->stats_lock); - if (tmp_counter_stats.counter_mode == 0) { priv->pf_stats.rx_bytes = be64_to_cpu(tmp_counter_stats.rx_bytes); priv->pf_stats.tx_bytes = be64_to_cpu(tmp_counter_stats.tx_bytes); @@ -410,6 +414,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); + mlx4_free_cmd_mailbox(mdev->dev, mailbox_priority); return err; } -- cgit v1.2.3 From ea497bb92064875497554ee7cdf10df7fb7393fc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 13 Dec 2017 13:49:36 +0100 Subject: drm: rework delayed connector cleanup in connector_iter PROBE_DEFER also uses system_wq to reprobe drivers, which means when that again fails, and we try to flush the overall system_wq (to get all the delayed connectore cleanup work_struct completed), we deadlock. Fix this by using just a single cleanup work, so that we can only flush that one and don't block on anything else. That means a free list plus locking, a standard pattern. v2: - Correctly free connectors only on last ref. Oops (Chris). - use llist_head/node (Chris). v3 - Add init_llist_head (Chris). Fixes: a703c55004e1 ("drm: safely free connectors from connector_iter") Fixes: 613051dac40d ("drm: locking&new iterators for connector_list") Cc: Ben Widawsky Cc: Dave Airlie Cc: Chris Wilson Cc: Sean Paul Cc: # v4.11+: 613051dac40d ("drm: locking&new iterators for connector_list" Cc: # v4.11+ Cc: Daniel Vetter Cc: Jani Nikula Cc: Gustavo Padovan Cc: David Airlie Cc: Javier Martinez Canillas Cc: Shuah Khan Cc: Guillaume Tucker Cc: Mark Brown Cc: Kevin Hilman Cc: Matt Hart Cc: Thierry Escande Cc: Tomeu Vizoso Cc: Enric Balletbo i Serra Tested-by: Marek Szyprowski Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171213124936.17914-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_connector.c | 50 ++++++++++++++++++++++++++----------- drivers/gpu/drm/drm_crtc_internal.h | 1 + drivers/gpu/drm/drm_mode_config.c | 5 +++- include/drm/drm_connector.h | 10 +++++--- include/drm/drm_mode_config.h | 18 ++++++++++++- 5 files changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index c4dfcbc861a1..9ae236036e32 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -152,14 +152,23 @@ static void drm_connector_free(struct kref *kref) connector->funcs->destroy(connector); } -static void drm_connector_free_work_fn(struct work_struct *work) +void drm_connector_free_work_fn(struct work_struct *work) { - struct drm_connector *connector = - container_of(work, struct drm_connector, free_work); - struct drm_device *dev = connector->dev; + struct drm_connector *connector, *n; + struct drm_device *dev = + container_of(work, struct drm_device, mode_config.connector_free_work); + struct drm_mode_config *config = &dev->mode_config; + unsigned long flags; + struct llist_node *freed; - drm_mode_object_unregister(dev, &connector->base); - connector->funcs->destroy(connector); + spin_lock_irqsave(&config->connector_list_lock, flags); + freed = llist_del_all(&config->connector_free_list); + spin_unlock_irqrestore(&config->connector_list_lock, flags); + + llist_for_each_entry_safe(connector, n, freed, free_node) { + drm_mode_object_unregister(dev, &connector->base); + connector->funcs->destroy(connector); + } } /** @@ -191,8 +200,6 @@ int drm_connector_init(struct drm_device *dev, if (ret) return ret; - INIT_WORK(&connector->free_work, drm_connector_free_work_fn); - connector->base.properties = &connector->properties; connector->dev = dev; connector->funcs = funcs; @@ -547,10 +554,17 @@ EXPORT_SYMBOL(drm_connector_list_iter_begin); * actually release the connector when dropping our final reference. */ static void -drm_connector_put_safe(struct drm_connector *conn) +__drm_connector_put_safe(struct drm_connector *conn) { - if (refcount_dec_and_test(&conn->base.refcount.refcount)) - schedule_work(&conn->free_work); + struct drm_mode_config *config = &conn->dev->mode_config; + + lockdep_assert_held(&config->connector_list_lock); + + if (!refcount_dec_and_test(&conn->base.refcount.refcount)) + return; + + llist_add(&conn->free_node, &config->connector_free_list); + schedule_work(&config->connector_free_work); } /** @@ -582,10 +596,10 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter) /* loop until it's not a zombie connector */ } while (!kref_get_unless_zero(&iter->conn->base.refcount)); - spin_unlock_irqrestore(&config->connector_list_lock, flags); if (old_conn) - drm_connector_put_safe(old_conn); + __drm_connector_put_safe(old_conn); + spin_unlock_irqrestore(&config->connector_list_lock, flags); return iter->conn; } @@ -602,9 +616,15 @@ EXPORT_SYMBOL(drm_connector_list_iter_next); */ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter) { + struct drm_mode_config *config = &iter->dev->mode_config; + unsigned long flags; + iter->dev = NULL; - if (iter->conn) - drm_connector_put_safe(iter->conn); + if (iter->conn) { + spin_lock_irqsave(&config->connector_list_lock, flags); + __drm_connector_put_safe(iter->conn); + spin_unlock_irqrestore(&config->connector_list_lock, flags); + } lock_release(&connector_list_iter_dep_map, 0, _RET_IP_); } EXPORT_SYMBOL(drm_connector_list_iter_end); diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 9ebb8841778c..af00f42ba269 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -142,6 +142,7 @@ int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj, uint64_t value); int drm_connector_create_standard_properties(struct drm_device *dev); const char *drm_get_connector_force_name(enum drm_connector_force force); +void drm_connector_free_work_fn(struct work_struct *work); /* IOCTL */ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index cc78b3d9e5e4..256de7313612 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -382,6 +382,9 @@ void drm_mode_config_init(struct drm_device *dev) ida_init(&dev->mode_config.connector_ida); spin_lock_init(&dev->mode_config.connector_list_lock); + init_llist_head(&dev->mode_config.connector_free_list); + INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn); + drm_mode_create_standard_properties(dev); /* Just to be sure */ @@ -432,7 +435,7 @@ void drm_mode_config_cleanup(struct drm_device *dev) } drm_connector_list_iter_end(&conn_iter); /* connector_iter drops references in a work item. */ - flush_scheduled_work(); + flush_work(&dev->mode_config.connector_free_work); if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) { drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index a4649c56ca2f..5971577016a2 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -24,6 +24,7 @@ #define __DRM_CONNECTOR_H__ #include +#include #include #include #include @@ -918,12 +919,13 @@ struct drm_connector { uint16_t tile_h_size, tile_v_size; /** - * @free_work: + * @free_node: * - * Work used only by &drm_connector_iter to be able to clean up a - * connector from any context. + * List used only by &drm_connector_iter to be able to clean up a + * connector from any context, in conjunction with + * &drm_mode_config.connector_free_work. */ - struct work_struct free_work; + struct llist_node free_node; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index b21e827c5c78..b0ce26d71296 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -393,7 +394,7 @@ struct drm_mode_config { /** * @connector_list_lock: Protects @num_connector and - * @connector_list. + * @connector_list and @connector_free_list. */ spinlock_t connector_list_lock; /** @@ -413,6 +414,21 @@ struct drm_mode_config { * &struct drm_connector_list_iter to walk this list. */ struct list_head connector_list; + /** + * @connector_free_list: + * + * List of connector objects linked with &drm_connector.free_head. + * Protected by @connector_list_lock. Used by + * drm_for_each_connector_iter() and + * &struct drm_connector_list_iter to savely free connectors using + * @connector_free_work. + */ + struct llist_head connector_free_list; + /** + * @connector_free_work: Work to clean up @connector_free_list. + */ + struct work_struct connector_free_work; + /** * @num_encoder: * -- cgit v1.2.3 From bd36d3bab2e3d08f80766c86487090dbceed4651 Mon Sep 17 00:00:00 2001 From: Marius Vlad Date: Wed, 13 Dec 2017 20:10:48 +0200 Subject: drm/drm_lease: Prevent deadlock in case drm_lease_create() fails This case can been seen when creating the lease with the same objects passed. [ 605.515097] 2 locks held by testapp/3337: [ 605.519027] #0: (&dev->mode_config.idr_mutex){......}, at: [] drm_mode_create_lease_ioctl+0x384/0x858 [ 605.530045] #1: (&dev->mode_config.idr_mutex){......}, at: [] drm_lease_destroy+0x2c/0x110 Which was causing the process to hang: [ 605.398827] [] __switch_to+0x94/0xa8 [ 605.404030] [] __schedule+0x1b0/0x698 [ 605.409322] [] schedule+0x3c/0xa8 [ 605.414260] [] schedule_preempt_disabled+0x20/0x38 [ 605.420677] [] mutex_lock_nested+0x158/0x340 [ 605.426572] [] drm_lease_destroy+0x2c/0x110 [ 605.432389] [] drm_master_put+0xc0/0xc8 [ 605.437845] [] drm_mode_create_lease_ioctl+0x47c/0x858 [ 605.444612] [] drm_ioctl+0x198/0x448 [ 605.449811] [] do_vfs_ioctl+0xa4/0x748 [ 605.455192] [] SyS_ioctl+0x8c/0xa0 [ 605.460216] [] __sys_trace_return+0x0/0x4 drm_mode_create_lease_ioctl() calls drm_lease_create() which acquires a lock on dev->mode_config.idr_mutex. In case of failure, drm_lease_create() calls drm_master_put() which in turn tries to acquire the same lock when calling drm_lease_destroy(). v2: - Reverse the order at exit in case of fail, so that unlocking takes place before dropping the reference. - Include detail information about deadlock (Daniel Vetter) Signed-off-by: Marius Vlad Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171213181048.32719-1-marius-cristian.vlad@nxp.com --- drivers/gpu/drm/drm_lease.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index d1eb56a1eff4..59849f02e2ad 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -254,10 +254,10 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr return lessee; out_lessee: - drm_master_put(&lessee); - mutex_unlock(&dev->mode_config.idr_mutex); + drm_master_put(&lessee); + return ERR_PTR(error); } -- cgit v1.2.3 From 2797c4a11f373b2545c2398ccb02e362ee66a142 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Dec 2017 13:25:13 +0000 Subject: drm/i915: Flush pending GTT writes before unbinding From the shrinker paths, we want to relinquish the GPU and GGTT access to the object, releasing the backing storage back to the system for swapout. As a part of that process we would unpin the pages, marking them for access by the CPU (for the swapout/swapin). However, if that process was interrupted after unbind the vma, we missed a flush of the inflight GGTT writes before we made that GTT space available again for reuse, with the prospect that we would redirect them to another page. The bug dates back to the introduction of multiple GGTT vma, but the code itself dates to commit 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()"). Fixes: 02bef8f98d26 ("drm/i915: Unbind closed vma for i915_gem_object_unbind()") Fixes: c5ad54cf7dd8 ("drm/i915: Use partial view in mmap fault handler") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171204132513.7303-1-chris@chris-wilson.co.uk (cherry picked from commit 5888fc9eac3c2ff96e76aeeb865fdb46ab2d711e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad4050f7ab3b..18de6569d04a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { -- cgit v1.2.3 From 2b3a2e9f400acff4a4a9a2316e3e13b36b76b0e9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 Dec 2017 22:00:25 +0000 Subject: drm/i915: Drop fb reference on load_detect_pipe failure path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When intel_modeset_setup_plane_state() fails drop the local framebuffer reference before jumping to the error, otherwise we leak the framebuffer. Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Fixes: edde361711ef ("drm/i915: Use atomic state to obtain load detection crtc, v3.") Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171207220025.22698-1-chris@chris-wilson.co.uk (cherry picked from commit 3e72be177cf19ab3d62b3084d424dce7e71d847f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e8ccf89cb17b..ff9397030092 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9944,11 +9944,10 @@ found: } ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0); + drm_framebuffer_put(fb); if (ret) goto fail; - drm_framebuffer_put(fb); - ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode); if (ret) goto fail; -- cgit v1.2.3 From 74c7b0782b15bc2478f557cea34b3fe34d452dc6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Dec 2017 12:10:33 +0000 Subject: drm/i915: Stop listening to request resubmission from the signaler kthread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intent here was that we would be listening to i915_gem_request_unsubmit in order to cancel the signaler quickly and release the reference on the request. Cancelling the signaler is done directly via intel_engine_cancel_signaling (called from unsubmit), but that does not directly wake up the signaling thread, and neither does setting the request->global_seqno back to zero wake up listeners to the request->execute waitqueue. So the only time that listening to the request->execute waitqueue would wake up the signaling kthread would be on the request resubmission, during which time we would already receive wake ups from rejoining the global breadcrumbs wait rbtree. Trying to wake up to release the request remains an issue. If the signaling was cancelled and no other request required signaling, then it is possible for us to shutdown with the reference on the request still held. To ensure that we do not try to shutdown, leaking that request, we kick the signaling threads whenever we disarm the breadcrumbs, i.e. on parking the engine when idle. v2: We do need to be sure to release the last reference on stopping the kthread; asserting that it has been dropped already is insufficient. Fixes: d6a2289d9d6b ("drm/i915: Remove the preempted request from the execution queue") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20171208121033.5236-1-chris@chris-wilson.co.uk Acked-by: Daniel Vetter Reviewed-by: Tvrtko Ursulin (cherry picked from commit 776bc27fd8ab67a675cb0041d3af361af5d0e290) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5f8b9f1f40f1..bcbc7abe6693 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_wait *wait, *n, *first; if (!b->irq_armed) - return; + goto wakeup_signaler; /* We only disarm the irq when we are idle (all requests completed), * so if the bottom-half remains asleep, it missed the request @@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) b->waiters = RB_ROOT; spin_unlock_irq(&b->rb_lock); + + /* + * The signaling thread may be asleep holding a reference to a request, + * that had its signaling cancelled prior to being preempted. We need + * to kick the signaler, just in case, to release any such reference. + */ +wakeup_signaler: + wake_up_process(b->signaler); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg) } if (unlikely(do_schedule)) { - DEFINE_WAIT(exec); - if (kthread_should_park()) kthread_parkme(); - if (kthread_should_stop()) { - GEM_BUG_ON(request); + if (unlikely(kthread_should_stop())) { + i915_gem_request_put(request); break; } - if (request) - add_wait_queue(&request->execute, &exec); - schedule(); - - if (request) - remove_wait_queue(&request->execute, &exec); } i915_gem_request_put(request); } while (1); -- cgit v1.2.3 From 2cf654db8d7eafb973d28eb3cddf043d353e1345 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 09:48:02 +0000 Subject: drm/i915/fence: Use rcu to defer freeing of irq_work It is illegal to perform an immediate free of the struct irq_work from inside the irq_work callback (as irq_work_run_list modifies work->flags after execution of the work->func()). As we use the irq_work to coordinate the freeing of the callback from two different softirq paths, we need to defer the kfree from inside our irq_work callback, for which we can use kfree_rcu. Fixes: 81c0ed21aa91 ("drm/i915/fence: Avoid del_timer_sync() from inside a timer") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20171213094802.28243-1-chris@chris-wilson.co.uk (cherry picked from commit 7d622351c94172a42bfe9b13bdb0fdc2be90ed3b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_sw_fence.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index e8ca67a129d2..ac236b88c99c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb { struct dma_fence *dma; struct timer_list timer; struct irq_work work; + struct rcu_head rcu; }; static void timer_i915_sw_fence_wake(struct timer_list *t) @@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk) del_timer_sync(&cb->timer); dma_fence_put(cb->dma); - kfree(cb); + kfree_rcu(cb, rcu); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, -- cgit v1.2.3 From da2e6b7eeda8919f677c790ef51161dd02e513a6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 22 Nov 2017 20:27:34 +0200 Subject: ovl: fix overlay: warning prefix Conform two stray warning messages to the standard overlayfs: prefix. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 3 ++- fs/overlayfs/readdir.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index e13921824c70..f9788bc116a8 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) spin_unlock(&dentry->d_lock); } else { kfree(redirect); - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n", + err); /* Fall back to userspace copy-up */ err = -EXDEV; } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 51088849ce97..8c98578d27a1 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -499,7 +499,7 @@ out: return err; fail: - pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n", + pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n", p->name, err); goto out; } -- cgit v1.2.3 From 1d08a044cf12aee37dfd54837558e3295287b343 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 Dec 2017 11:45:42 +0000 Subject: arm64: fix CONFIG_DEBUG_WX address reporting In ptdump_check_wx(), we pass walk_pgd() a start address of 0 (rather than VA_START) for the init_mm. This means that any reported W&X addresses are offset by VA_START, which is clearly wrong and can make them appear like userspace addresses. Fix this by telling the ptdump code that we're walking init_mm starting at VA_START. We don't need to update the addr_markers, since these are still valid bounds regardless. Cc: Fixes: 1404d6f13e47 ("arm64: dump: Add checking for writable and exectuable pages") Signed-off-by: Mark Rutland Cc: Kees Cook Cc: Laura Abbott Reported-by: Timur Tabi Signed-off-by: Will Deacon --- arch/arm64/mm/dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ca74a2aace42..7b60d62ac593 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -389,7 +389,7 @@ void ptdump_check_wx(void) .check_wx = true, }; - walk_pgd(&st, &init_mm, 0); + walk_pgd(&st, &init_mm, VA_START); note_page(&st, 0, 0, 0); if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", -- cgit v1.2.3 From 2ff739b9bdd3199cd52e450097cb0f7fc4e1e9e8 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 14 Dec 2017 15:29:28 +0800 Subject: ASoC: sun4i-i2s: Show detailed error when DAI configuration callbacks fail When any of the DAI hardware configuration callbacks (.hw_param, .set_fmt, .set_sysclk) fails, there is no explanation about why it failed. This is particularly confusing for .hw_param, which covers many parameters of the DAI. Telling the users what parameter isn't supported, and what the requested value was goes a long way for developers trying to combine sun4i-i2s with external codecs. This patch adds dev_err calls explaining what isn't supported or failed, and what the value was. sun4i_i2s_set_clk_rate()'s first parameter was changed to a struct snd_soc_dai *dai, so we can get the underlying device. Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Acked-by: Marcus Cooper Signed-off-by: Mark Brown --- sound/soc/sunxi/sun4i-i2s.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index 04f92583a969..bc147e2dcff5 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -269,10 +269,11 @@ static bool sun4i_i2s_oversample_is_valid(unsigned int oversample) return false; } -static int sun4i_i2s_set_clk_rate(struct sun4i_i2s *i2s, +static int sun4i_i2s_set_clk_rate(struct snd_soc_dai *dai, unsigned int rate, unsigned int word_size) { + struct sun4i_i2s *i2s = snd_soc_dai_get_drvdata(dai); unsigned int oversample_rate, clk_rate; int bclk_div, mclk_div; int ret; @@ -300,6 +301,7 @@ static int sun4i_i2s_set_clk_rate(struct sun4i_i2s *i2s, break; default: + dev_err(dai->dev, "Unsupported sample rate: %u\n", rate); return -EINVAL; } @@ -308,18 +310,25 @@ static int sun4i_i2s_set_clk_rate(struct sun4i_i2s *i2s, return ret; oversample_rate = i2s->mclk_freq / rate; - if (!sun4i_i2s_oversample_is_valid(oversample_rate)) + if (!sun4i_i2s_oversample_is_valid(oversample_rate)) { + dev_err(dai->dev, "Unsupported oversample rate: %d\n", + oversample_rate); return -EINVAL; + } bclk_div = sun4i_i2s_get_bclk_div(i2s, oversample_rate, word_size); - if (bclk_div < 0) + if (bclk_div < 0) { + dev_err(dai->dev, "Unsupported BCLK divider: %d\n", bclk_div); return -EINVAL; + } mclk_div = sun4i_i2s_get_mclk_div(i2s, oversample_rate, clk_rate, rate); - if (mclk_div < 0) + if (mclk_div < 0) { + dev_err(dai->dev, "Unsupported MCLK divider: %d\n", mclk_div); return -EINVAL; + } /* Adjust the clock division values if needed */ bclk_div += i2s->variant->bclk_offset; @@ -349,8 +358,11 @@ static int sun4i_i2s_hw_params(struct snd_pcm_substream *substream, u32 width; channels = params_channels(params); - if (channels != 2) + if (channels != 2) { + dev_err(dai->dev, "Unsupported number of channels: %d\n", + channels); return -EINVAL; + } if (i2s->variant->has_chcfg) { regmap_update_bits(i2s->regmap, SUN8I_I2S_CHAN_CFG_REG, @@ -382,6 +394,8 @@ static int sun4i_i2s_hw_params(struct snd_pcm_substream *substream, width = DMA_SLAVE_BUSWIDTH_2_BYTES; break; default: + dev_err(dai->dev, "Unsupported physical sample width: %d\n", + params_physical_width(params)); return -EINVAL; } i2s->playback_dma_data.addr_width = width; @@ -393,6 +407,8 @@ static int sun4i_i2s_hw_params(struct snd_pcm_substream *substream, break; default: + dev_err(dai->dev, "Unsupported sample width: %d\n", + params_width(params)); return -EINVAL; } @@ -401,7 +417,7 @@ static int sun4i_i2s_hw_params(struct snd_pcm_substream *substream, regmap_field_write(i2s->field_fmt_sr, sr + i2s->variant->fmt_offset); - return sun4i_i2s_set_clk_rate(i2s, params_rate(params), + return sun4i_i2s_set_clk_rate(dai, params_rate(params), params_width(params)); } @@ -426,6 +442,8 @@ static int sun4i_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) val = SUN4I_I2S_FMT0_FMT_RIGHT_J; break; default: + dev_err(dai->dev, "Unsupported format: %d\n", + fmt & SND_SOC_DAIFMT_FORMAT_MASK); return -EINVAL; } @@ -464,6 +482,8 @@ static int sun4i_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) case SND_SOC_DAIFMT_NB_NF: break; default: + dev_err(dai->dev, "Unsupported clock polarity: %d\n", + fmt & SND_SOC_DAIFMT_INV_MASK); return -EINVAL; } @@ -482,6 +502,8 @@ static int sun4i_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) val = SUN4I_I2S_CTRL_MODE_SLAVE; break; default: + dev_err(dai->dev, "Unsupported slave setting: %d\n", + fmt & SND_SOC_DAIFMT_MASTER_MASK); return -EINVAL; } regmap_update_bits(i2s->regmap, SUN4I_I2S_CTRL_REG, @@ -504,6 +526,8 @@ static int sun4i_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) val = 0; break; default: + dev_err(dai->dev, "Unsupported slave setting: %d\n", + fmt & SND_SOC_DAIFMT_MASTER_MASK); return -EINVAL; } regmap_update_bits(i2s->regmap, SUN4I_I2S_CTRL_REG, -- cgit v1.2.3 From 958d022e326810fd762505bd02007aced79ffcbc Mon Sep 17 00:00:00 2001 From: "oder_chiou@realtek.com" Date: Thu, 14 Dec 2017 09:54:07 +0800 Subject: ASoC: rt5663: Fix the wrong result of the first jack detection In the first jack detection while booting, the result will always show as headset, even we insert the headphone. Signed-off-by: Oder Chiou Signed-off-by: Mark Brown --- sound/soc/codecs/rt5663.c | 4 ++++ sound/soc/codecs/rt5663.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index b036c9dc0c8c..d329bf719d80 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert) RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN); snd_soc_update_bits(codec, RT5663_IRQ_1, RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_RST); + snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1, + RT5663_EM_JD_MASK, RT5663_EM_JD_NOR); while (true) { regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val); diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h index c5a9b69579ad..03adc8004ba9 100644 --- a/sound/soc/codecs/rt5663.h +++ b/sound/soc/codecs/rt5663.h @@ -1029,6 +1029,10 @@ #define RT5663_POL_EXT_JD_SHIFT 10 #define RT5663_POL_EXT_JD_EN (0x1 << 10) #define RT5663_POL_EXT_JD_DIS (0x0 << 10) +#define RT5663_EM_JD_MASK (0x1 << 7) +#define RT5663_EM_JD_SHIFT 7 +#define RT5663_EM_JD_NOR (0x1 << 7) +#define RT5663_EM_JD_RST (0x0 << 7) /* DACREF LDO Control (0x0112)*/ #define RT5663_PWR_LDO_DACREFL_MASK (0x1 << 9) -- cgit v1.2.3 From 7ff0b53c4051145d1cf992d2f60987e6447eed4f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Dec 2017 20:05:10 +0100 Subject: spi: sh-msiof: Avoid writing to registers from spi_master.setup() The spi_master.setup() callback must not change configuration registers, as that could corrupt I/O that is in progress for other SPI slaves. The only exception is the configuration of the native chip select polarity in SPI master mode, as a wrong chip select polarity will cause havoc during all future transfers to any other SPI slave. Hence stop writing to registers in sh_msiof_spi_setup(), unless it is the first call for a controller using a native chip select, or unless native chip select polarity has changed (note that you'll loose anyway if I/O is in progress). Even then, only do what is strictly necessary, instead of calling sh_msiof_spi_set_pin_regs(). Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 81a9144f5442..2704abb11ea4 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -55,6 +55,8 @@ struct sh_msiof_spi_priv { void *rx_dma_page; dma_addr_t tx_dma_addr; dma_addr_t rx_dma_addr; + bool native_cs_inited; + bool native_cs_high; bool slave_aborted; }; @@ -528,8 +530,7 @@ static int sh_msiof_spi_setup(struct spi_device *spi) { struct device_node *np = spi->master->dev.of_node; struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master); - - pm_runtime_get_sync(&p->pdev->dev); + u32 clr, set, tmp; if (!np) { /* @@ -539,19 +540,31 @@ static int sh_msiof_spi_setup(struct spi_device *spi) spi->cs_gpio = (uintptr_t)spi->controller_data; } - /* Configure pins before deasserting CS */ - sh_msiof_spi_set_pin_regs(p, !!(spi->mode & SPI_CPOL), - !!(spi->mode & SPI_CPHA), - !!(spi->mode & SPI_3WIRE), - !!(spi->mode & SPI_LSB_FIRST), - !!(spi->mode & SPI_CS_HIGH)); - - if (spi->cs_gpio >= 0) + if (spi->cs_gpio >= 0) { gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); + return 0; + } + if (spi_controller_is_slave(p->master)) + return 0; - pm_runtime_put(&p->pdev->dev); + if (p->native_cs_inited && + (p->native_cs_high == !!(spi->mode & SPI_CS_HIGH))) + return 0; + /* Configure native chip select mode/polarity early */ + clr = MDR1_SYNCMD_MASK; + set = MDR1_TRMD | TMDR1_PCON | MDR1_SYNCMD_SPI; + if (spi->mode & SPI_CS_HIGH) + clr |= BIT(MDR1_SYNCAC_SHIFT); + else + set |= BIT(MDR1_SYNCAC_SHIFT); + pm_runtime_get_sync(&p->pdev->dev); + tmp = sh_msiof_read(p, TMDR1) & ~clr; + sh_msiof_write(p, TMDR1, tmp | set); + pm_runtime_put(&p->pdev->dev); + p->native_cs_high = spi->mode & SPI_CS_HIGH; + p->native_cs_inited = true; return 0; } -- cgit v1.2.3 From 9cce882bedd2768dc251b73f2ad86a9bfcfd9fc7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Dec 2017 20:05:11 +0100 Subject: spi: sh-msiof: Extend support to 3 native chip selects Currently only the MSIOF_SYNC signal can be used as a native chip select. Extend support to up to 3 native chipselects using the MSIOF_SS1 and MSIOF_SS2 signals. Inspired by a patch in the BSP by Hiromitsu Yamasaki. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/sh-msiof.txt | 6 +++++- drivers/spi/spi-sh-msiof.c | 18 +++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt index bdd83959019c..bc8c16a6cfc8 100644 --- a/Documentation/devicetree/bindings/spi/sh-msiof.txt +++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt @@ -36,7 +36,11 @@ Required properties: Optional properties: - clocks : Must contain a reference to the functional clock. -- num-cs : Total number of chip-selects (default is 1) +- num-cs : Total number of chip selects (default is 1). + Up to 3 native chip selects are supported: + 0: MSIOF_SYNC + 1: MSIOF_SS1 + 2: MSIOF_SS2 - dmas : Must contain a list of two references to DMA specifiers, one for transmission, and one for reception. diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 2704abb11ea4..9bdc292aa050 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -60,6 +60,8 @@ struct sh_msiof_spi_priv { bool slave_aborted; }; +#define MAX_SS 3 /* Maximum number of native chip selects */ + #define TMDR1 0x00 /* Transmit Mode Register 1 */ #define TMDR2 0x04 /* Transmit Mode Register 2 */ #define TMDR3 0x08 /* Transmit Mode Register 3 */ @@ -93,6 +95,8 @@ struct sh_msiof_spi_priv { #define MDR1_XXSTP 0x00000001 /* Transmission/Reception Stop on FIFO */ /* TMDR1 */ #define TMDR1_PCON 0x40000000 /* Transfer Signal Connection */ +#define TMDR1_SYNCCH_MASK 0xc000000 /* Synchronization Signal Channel Select */ +#define TMDR1_SYNCCH_SHIFT 26 /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */ /* TMDR2 and RMDR2 */ #define MDR2_BITLEN1(i) (((i) - 1) << 24) /* Data Size (8-32 bits) */ @@ -326,7 +330,7 @@ static u32 sh_msiof_spi_get_dtdl_and_syncdl(struct sh_msiof_spi_priv *p) return val; } -static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, +static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, u32 ss, u32 cpol, u32 cpha, u32 tx_hi_z, u32 lsb_first, u32 cs_high) { @@ -344,10 +348,13 @@ static void sh_msiof_spi_set_pin_regs(struct sh_msiof_spi_priv *p, tmp |= !cs_high << MDR1_SYNCAC_SHIFT; tmp |= lsb_first << MDR1_BITLSB_SHIFT; tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p); - if (spi_controller_is_slave(p->master)) + if (spi_controller_is_slave(p->master)) { sh_msiof_write(p, TMDR1, tmp | TMDR1_PCON); - else - sh_msiof_write(p, TMDR1, tmp | MDR1_TRMD | TMDR1_PCON); + } else { + sh_msiof_write(p, TMDR1, + tmp | MDR1_TRMD | TMDR1_PCON | + (ss < MAX_SS ? ss : 0) << TMDR1_SYNCCH_SHIFT); + } if (p->master->flags & SPI_MASTER_MUST_TX) { /* These bits are reserved if RX needs TX */ tmp &= ~0x0000ffff; @@ -575,7 +582,8 @@ static int sh_msiof_prepare_message(struct spi_master *master, const struct spi_device *spi = msg->spi; /* Configure pins before asserting CS */ - sh_msiof_spi_set_pin_regs(p, !!(spi->mode & SPI_CPOL), + sh_msiof_spi_set_pin_regs(p, spi->chip_select, + !!(spi->mode & SPI_CPOL), !!(spi->mode & SPI_CPHA), !!(spi->mode & SPI_3WIRE), !!(spi->mode & SPI_LSB_FIRST), -- cgit v1.2.3 From b8761434bdec32fa46a644c26a12d16a9b0f58d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Dec 2017 20:05:12 +0100 Subject: spi: sh-msiof: Implement cs-gpios configuration The current support for GPIO chip selects assumes the GPIOs have been configured by platform code or the boot loader. This includes pinmux setup and GPIO direction. Hence it does not work as expected when just described in DT using the "cs-gpios" property. Fix this by: 1. using devm_gpiod_get_index() to request the GPIO, and thus configure pinmux, if needed, 2. configuring the GPIO direction is the spi_master.setup() callback. Use gpio_is_valid() instead of a check on positive numbers. Note that when using GPIO chip selects, at least one native chip select must be left unused, as that native chip select will be driven anyway, and (global) native chip select polarity must be taken into account. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 66 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 9bdc292aa050..8aa5c7b910d9 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ struct sh_msiof_spi_priv { void *rx_dma_page; dma_addr_t tx_dma_addr; dma_addr_t rx_dma_addr; + unsigned short unused_ss; bool native_cs_inited; bool native_cs_high; bool slave_aborted; @@ -547,8 +549,8 @@ static int sh_msiof_spi_setup(struct spi_device *spi) spi->cs_gpio = (uintptr_t)spi->controller_data; } - if (spi->cs_gpio >= 0) { - gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); + if (gpio_is_valid(spi->cs_gpio)) { + gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); return 0; } @@ -580,14 +582,20 @@ static int sh_msiof_prepare_message(struct spi_master *master, { struct sh_msiof_spi_priv *p = spi_master_get_devdata(master); const struct spi_device *spi = msg->spi; + u32 ss, cs_high; /* Configure pins before asserting CS */ - sh_msiof_spi_set_pin_regs(p, spi->chip_select, - !!(spi->mode & SPI_CPOL), + if (gpio_is_valid(spi->cs_gpio)) { + ss = p->unused_ss; + cs_high = p->native_cs_high; + } else { + ss = spi->chip_select; + cs_high = !!(spi->mode & SPI_CS_HIGH); + } + sh_msiof_spi_set_pin_regs(p, ss, !!(spi->mode & SPI_CPOL), !!(spi->mode & SPI_CPHA), !!(spi->mode & SPI_3WIRE), - !!(spi->mode & SPI_LSB_FIRST), - !!(spi->mode & SPI_CS_HIGH)); + !!(spi->mode & SPI_LSB_FIRST), cs_high); return 0; } @@ -1091,6 +1099,45 @@ static struct sh_msiof_spi_info *sh_msiof_spi_parse_dt(struct device *dev) } #endif +static int sh_msiof_get_cs_gpios(struct sh_msiof_spi_priv *p) +{ + struct device *dev = &p->pdev->dev; + unsigned int used_ss_mask = 0; + unsigned int cs_gpios = 0; + unsigned int num_cs, i; + int ret; + + ret = gpiod_count(dev, "cs"); + if (ret <= 0) + return 0; + + num_cs = max_t(unsigned int, ret, p->master->num_chipselect); + for (i = 0; i < num_cs; i++) { + struct gpio_desc *gpiod; + + gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS); + if (!IS_ERR(gpiod)) { + cs_gpios++; + continue; + } + + if (PTR_ERR(gpiod) != -ENOENT) + return PTR_ERR(gpiod); + + if (i >= MAX_SS) { + dev_err(dev, "Invalid native chip select %d\n", i); + return -EINVAL; + } + used_ss_mask |= BIT(i); + } + p->unused_ss = ffz(used_ss_mask); + if (cs_gpios && p->unused_ss >= MAX_SS) { + dev_err(dev, "No unused native chip select available\n"); + return -EINVAL; + } + return 0; +} + static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev, enum dma_transfer_direction dir, unsigned int id, dma_addr_t port_addr) { @@ -1304,13 +1351,18 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) if (p->info->rx_fifo_override) p->rx_fifo_size = p->info->rx_fifo_override; + /* Setup GPIO chip selects */ + master->num_chipselect = p->info->num_chipselect; + ret = sh_msiof_get_cs_gpios(p); + if (ret) + goto err1; + /* init master code */ master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; master->mode_bits |= SPI_LSB_FIRST | SPI_3WIRE; master->flags = chipdata->master_flags; master->bus_num = pdev->id; master->dev.of_node = pdev->dev.of_node; - master->num_chipselect = p->info->num_chipselect; master->setup = sh_msiof_spi_setup; master->prepare_message = sh_msiof_prepare_message; master->slave_abort = sh_msiof_slave_abort; -- cgit v1.2.3 From c99182f73cce7926c623b5c1c0ff0b7954ac8d81 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Dec 2017 20:05:13 +0100 Subject: spi: sh-msiof: Document hardware limitations related to chip selects Guide users to maintain the proper balance between native and GPIO chip selects. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/sh-msiof.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt index bc8c16a6cfc8..80710f0f0448 100644 --- a/Documentation/devicetree/bindings/spi/sh-msiof.txt +++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt @@ -41,6 +41,16 @@ Optional properties: 0: MSIOF_SYNC 1: MSIOF_SS1 2: MSIOF_SS2 + Hardware limitations related to chip selects: + - Native chip selects are always deasserted in + between transfers that are part of the same + message. Use cs-gpios to work around this. + - All slaves using native chip selects must use the + same spi-cs-high configuration. Use cs-gpios to + work around this. + - When using GPIO chip selects, at least one native + chip select must be left unused, as it will be + driven anyway. - dmas : Must contain a list of two references to DMA specifiers, one for transmission, and one for reception. -- cgit v1.2.3 From 97d90da8a886949f09bb4754843fb0b504956ad2 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 30 Nov 2017 18:01:29 +0100 Subject: mtd: nand: provide several helpers to do common NAND operations This is part of the process of removing direct calls to ->cmdfunc() outside of the core in order to introduce a better interface to execute NAND operations. Here we provide several helpers and make use of them to remove all direct calls to ->cmdfunc(). This way, we can easily modify those helpers to make use of the new ->exec_op() interface when available. Signed-off-by: Boris Brezillon [miquel.raynal@free-electrons.com: rebased and fixed some conflicts] Signed-off-by: Miquel Raynal Acked-by: Masahiro Yamada --- drivers/mtd/nand/atmel/nand-controller.c | 2 +- drivers/mtd/nand/brcmnand/brcmnand.c | 9 +- drivers/mtd/nand/cafe_nand.c | 14 +- drivers/mtd/nand/denali.c | 37 +- drivers/mtd/nand/diskonchip.c | 4 +- drivers/mtd/nand/docg4.c | 2 +- drivers/mtd/nand/fsmc_nand.c | 5 +- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 58 +- drivers/mtd/nand/hisi504_nand.c | 3 +- drivers/mtd/nand/jz4740_nand.c | 16 +- drivers/mtd/nand/lpc32xx_mlc.c | 2 +- drivers/mtd/nand/lpc32xx_slc.c | 22 +- drivers/mtd/nand/mtk_nand.c | 11 +- drivers/mtd/nand/nand_base.c | 1011 +++++++++++++++++++++++++----- drivers/mtd/nand/nand_hynix.c | 115 ++-- drivers/mtd/nand/nand_micron.c | 77 ++- drivers/mtd/nand/omap2.c | 8 +- drivers/mtd/nand/pxa3xx_nand.c | 8 +- drivers/mtd/nand/qcom_nandc.c | 16 +- drivers/mtd/nand/r852.c | 11 +- drivers/mtd/nand/sunxi_nand.c | 71 +-- drivers/mtd/nand/tango_nand.c | 26 +- drivers/mtd/nand/tmio_nand.c | 5 +- include/linux/mtd/rawnand.h | 29 + 24 files changed, 1131 insertions(+), 431 deletions(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 90a71a56bc23..e81fdd2d47b1 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1000,7 +1000,7 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf, * to the non-optimized one. */ if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); + nand_read_page_op(chip, page, 0, NULL, 0); return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, raw); diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e0eb51d8c012..3f441096a14c 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -1071,7 +1071,7 @@ static void brcmnand_wp(struct mtd_info *mtd, int wp) return; brcmnand_set_wp(ctrl, wp); - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); + nand_status_op(chip, NULL); /* NAND_STATUS_WP 0x00 = protected, 0x80 = not protected */ ret = bcmnand_ctrl_poll_status(ctrl, NAND_CTRL_RDY | @@ -1453,7 +1453,7 @@ static uint8_t brcmnand_read_byte(struct mtd_info *mtd) /* At FC_BYTES boundary, switch to next column */ if (host->last_byte > 0 && offs == 0) - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, addr, -1); + nand_change_read_column_op(chip, addr, NULL, 0, false); ret = ctrl->flash_cache[offs]; break; @@ -1689,7 +1689,7 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd, sas = mtd->oobsize / chip->ecc.steps; /* read without ecc for verification */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); + nand_read_page_op(chip, page, 0, NULL, 0); ret = chip->ecc.read_page_raw(mtd, chip, buf, true, page); if (ret) return ret; @@ -2369,12 +2369,11 @@ static int brcmnand_resume(struct device *dev) list_for_each_entry(host, &ctrl->host_list, node) { struct nand_chip *chip = &host->chip; - struct mtd_info *mtd = nand_to_mtd(chip); brcmnand_save_restore_cs_config(host, 1); /* Reset the chip, required by some chips after power-up */ - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + nand_reset_op(chip); } return 0; diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index bc558c438a57..95c2cfa68b66 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -353,23 +353,15 @@ static void cafe_nand_bug(struct mtd_info *mtd) static int cafe_nand_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - int status = 0; - - chip->cmdfunc(mtd, NAND_CMD_SEQIN, mtd->writesize, page); - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi, + mtd->oobsize); } /* Don't use -- use nand_read_oob_std for now */ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); } /** * cafe_nand_read_page_syndrome - [REPLACEABLE] hardware ecc syndrome based page read diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 3e19861a46c6..d5c80d617854 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -645,8 +645,6 @@ static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip, int page, int write) { struct denali_nand_info *denali = mtd_to_denali(mtd); - unsigned int start_cmd = write ? NAND_CMD_SEQIN : NAND_CMD_READ0; - unsigned int rnd_cmd = write ? NAND_CMD_RNDIN : NAND_CMD_RNDOUT; int writesize = mtd->writesize; int oobsize = mtd->oobsize; uint8_t *bufpoi = chip->oob_poi; @@ -658,11 +656,11 @@ static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip, int i, pos, len; /* BBM at the beginning of the OOB area */ - chip->cmdfunc(mtd, start_cmd, writesize, page); if (write) - chip->write_buf(mtd, bufpoi, oob_skip); + nand_prog_page_begin_op(chip, page, writesize, bufpoi, + oob_skip); else - chip->read_buf(mtd, bufpoi, oob_skip); + nand_read_page_op(chip, page, writesize, bufpoi, oob_skip); bufpoi += oob_skip; /* OOB ECC */ @@ -675,30 +673,35 @@ static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip, else if (pos + len > writesize) len = writesize - pos; - chip->cmdfunc(mtd, rnd_cmd, pos, -1); if (write) - chip->write_buf(mtd, bufpoi, len); + nand_change_write_column_op(chip, pos, bufpoi, len, + false); else - chip->read_buf(mtd, bufpoi, len); + nand_change_read_column_op(chip, pos, bufpoi, len, + false); bufpoi += len; if (len < ecc_bytes) { len = ecc_bytes - len; - chip->cmdfunc(mtd, rnd_cmd, writesize + oob_skip, -1); if (write) - chip->write_buf(mtd, bufpoi, len); + nand_change_write_column_op(chip, writesize + + oob_skip, bufpoi, + len, false); else - chip->read_buf(mtd, bufpoi, len); + nand_change_read_column_op(chip, writesize + + oob_skip, bufpoi, + len, false); bufpoi += len; } } /* OOB free */ len = oobsize - (bufpoi - chip->oob_poi); - chip->cmdfunc(mtd, rnd_cmd, size - len, -1); if (write) - chip->write_buf(mtd, bufpoi, len); + nand_change_write_column_op(chip, size - len, bufpoi, len, + false); else - chip->read_buf(mtd, bufpoi, len); + nand_change_read_column_op(chip, size - len, bufpoi, len, + false); } static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, @@ -788,16 +791,12 @@ static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { struct denali_nand_info *denali = mtd_to_denali(mtd); - int status; denali_reset_irq(denali); denali_oob_xfer(mtd, chip, page, 1); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip, diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index 72671dc52e2e..6bc93ea66f50 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -448,7 +448,7 @@ static int doc200x_wait(struct mtd_info *mtd, struct nand_chip *this) int status; DoC_WaitReady(doc); - this->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); + nand_status_op(this, NULL); DoC_WaitReady(doc); status = (int)this->read_byte(mtd); @@ -595,7 +595,7 @@ static void doc2001plus_select_chip(struct mtd_info *mtd, int chip) /* Assert ChipEnable and deassert WriteProtect */ WriteDOC((DOC_FLASH_CE), docptr, Mplus_FlashSelect); - this->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + nand_reset_op(this); doc->curchip = chip; doc->curfloor = floor; diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c index 45c01b4b34c7..5a27f56dafdc 100644 --- a/drivers/mtd/nand/docg4.c +++ b/drivers/mtd/nand/docg4.c @@ -864,7 +864,7 @@ static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand, dev_dbg(doc->dev, "%s: page %x\n", __func__, page); - docg4_command(mtd, NAND_CMD_READ0, nand->ecc.size, page); + nand_read_page_op(nand, page, nand->ecc.size, NULL, 0); writew(DOC_ECCCONF0_READ_MODE | DOCG4_OOB_SIZE, docptr + DOC_ECCCONF0); write_nop(docptr); diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c index eac15d9bf49e..b44e5c6545e0 100644 --- a/drivers/mtd/nand/fsmc_nand.c +++ b/drivers/mtd/nand/fsmc_nand.c @@ -697,7 +697,7 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, unsigned int max_bitflips = 0; for (i = 0, s = 0; s < eccsteps; s++, i += eccbytes, p += eccsize) { - chip->cmdfunc(mtd, NAND_CMD_READ0, s * eccsize, page); + nand_read_page_op(chip, page, s * eccsize, NULL, 0); chip->ecc.hwctl(mtd, NAND_ECC_READ); chip->read_buf(mtd, p, eccsize); @@ -720,8 +720,7 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, if (chip->options & NAND_BUSWIDTH_16) len = roundup(len, 2); - chip->cmdfunc(mtd, NAND_CMD_READOOB, off, page); - chip->read_buf(mtd, oob + j, len); + nand_read_oob_op(chip, page, off, oob + j, len); j += len; } diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 9e365d488b6c..63a425ced4cd 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1097,8 +1097,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, eccbytes = DIV_ROUND_UP(offset + eccbits, 8); offset /= 8; eccbytes -= offset; - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset, -1); - chip->read_buf(mtd, eccbuf, eccbytes); + nand_change_read_column_op(chip, offset, eccbuf, + eccbytes, false); /* * ECC data are not byte aligned and we may have @@ -1220,7 +1220,7 @@ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, meta = geo->metadata_size; if (first) { col = meta + (size + ecc_parity_size) * first; - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, col, -1); + nand_change_read_column_op(chip, col, NULL, 0, false); meta = 0; buf = buf + first * size; @@ -1411,7 +1411,7 @@ static int gpmi_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip, memset(chip->oob_poi, ~0, mtd->oobsize); /* Read out the conventional OOB. */ - chip->cmdfunc(mtd, NAND_CMD_READ0, mtd->writesize, page); + nand_read_page_op(chip, page, mtd->writesize, NULL, 0); chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); /* @@ -1421,7 +1421,7 @@ static int gpmi_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip, */ if (GPMI_IS_MX23(this)) { /* Read the block mark into the first byte of the OOB buffer. */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); chip->oob_poi[0] = chip->read_byte(mtd); } @@ -1432,7 +1432,6 @@ static int gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { struct mtd_oob_region of = { }; - int status = 0; /* Do we have available oob area? */ mtd_ooblayout_free(mtd, 0, &of); @@ -1442,12 +1441,8 @@ gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) if (!nand_is_slc(chip)) return -EPERM; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, mtd->writesize + of.offset, page); - chip->write_buf(mtd, chip->oob_poi + of.offset, of.length); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_op(chip, page, mtd->writesize + of.offset, + chip->oob_poi + of.offset, of.length); } /* @@ -1622,7 +1617,7 @@ static int gpmi_ecc_write_page_raw(struct mtd_info *mtd, static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page); } @@ -1630,7 +1625,7 @@ static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page); + nand_prog_page_begin_op(chip, page, 0, NULL, 0); return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1, page); } @@ -1641,7 +1636,7 @@ static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs) struct gpmi_nand_data *this = nand_get_controller_data(chip); int ret = 0; uint8_t *block_mark; - int column, page, status, chipnr; + int column, page, chipnr; chipnr = (int)(ofs >> chip->chip_shift); chip->select_chip(mtd, chipnr); @@ -1655,13 +1650,7 @@ static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs) /* Shift to get page */ page = (int)(ofs >> chip->page_shift); - chip->cmdfunc(mtd, NAND_CMD_SEQIN, column, page); - chip->write_buf(mtd, block_mark, 1); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) - ret = -EIO; + ret = nand_prog_page_op(chip, page, column, block_mark, 1); chip->select_chip(mtd, -1); @@ -1729,7 +1718,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this) * Read the NCB fingerprint. The fingerprint is four bytes long * and starts in the 12th byte of the page. */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 12, page); + nand_read_page_op(chip, page, 12, NULL, 0); chip->read_buf(mtd, buffer, strlen(fingerprint)); /* Look for the fingerprint. */ @@ -1789,17 +1778,10 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) dev_dbg(dev, "Erasing the search area...\n"); for (block = 0; block < search_area_size_in_blocks; block++) { - /* Compute the page address. */ - page = block * block_size_in_pages; - /* Erase this block. */ dev_dbg(dev, "\tErasing block 0x%x\n", block); - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); - chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); - - /* Wait for the erase to finish. */ - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) + status = nand_erase_op(chip, block); + if (status) dev_err(dev, "[%s] Erase failed.\n", __func__); } @@ -1815,13 +1797,11 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) /* Write the first page of the current stride. */ dev_dbg(dev, "Writing an NCB fingerprint in page 0x%x\n", page); - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); - chip->ecc.write_page_raw(mtd, chip, buffer, 0, page); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - /* Wait for the write to finish. */ - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + chip->ecc.write_page_raw(mtd, chip, buffer, 0, page); + status = nand_prog_page_end_op(chip); + if (status) dev_err(dev, "[%s] Write failed.\n", __func__); } @@ -1876,7 +1856,7 @@ static int mx23_boot_init(struct gpmi_nand_data *this) /* Send the command to read the conventional block mark. */ chip->select_chip(mtd, chipnr); - chip->cmdfunc(mtd, NAND_CMD_READ0, mtd->writesize, page); + nand_read_page_op(chip, page, mtd->writesize, NULL, 0); block_mark = chip->read_byte(mtd); chip->select_chip(mtd, -1); diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c index 0897261c3e17..184d765c8bbe 100644 --- a/drivers/mtd/nand/hisi504_nand.c +++ b/drivers/mtd/nand/hisi504_nand.c @@ -574,8 +574,7 @@ static int hisi_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip, { struct hinfc_host *host = nand_get_controller_data(chip); - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); + nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); if (host->irq_status & HINFC504_INTS_UE) { host->irq_status = 0; diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c index ad827d4af3e9..613b00a9604b 100644 --- a/drivers/mtd/nand/jz4740_nand.c +++ b/drivers/mtd/nand/jz4740_nand.c @@ -313,6 +313,7 @@ static int jz_nand_detect_bank(struct platform_device *pdev, uint32_t ctrl; struct nand_chip *chip = &nand->chip; struct mtd_info *mtd = nand_to_mtd(chip); + u8 id[2]; /* Request I/O resource. */ sprintf(res_name, "bank%d", bank); @@ -335,17 +336,16 @@ static int jz_nand_detect_bank(struct platform_device *pdev, /* Retrieve the IDs from the first chip. */ chip->select_chip(mtd, 0); - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); - *nand_maf_id = chip->read_byte(mtd); - *nand_dev_id = chip->read_byte(mtd); + nand_reset_op(chip); + nand_readid_op(chip, 0, id, sizeof(id)); + *nand_maf_id = id[0]; + *nand_dev_id = id[1]; } else { /* Detect additional chip. */ chip->select_chip(mtd, chipnr); - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); - if (*nand_maf_id != chip->read_byte(mtd) - || *nand_dev_id != chip->read_byte(mtd)) { + nand_reset_op(chip); + nand_readid_op(chip, 0, id, sizeof(id)); + if (*nand_maf_id != id[0] || *nand_dev_id != id[1]) { ret = -ENODEV; goto notfound_id; } diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index 5796468db653..31cb3b2967b9 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -461,7 +461,7 @@ static int lpc32xx_read_page(struct mtd_info *mtd, struct nand_chip *chip, } /* Writing Command and Address */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); /* For all sub-pages */ for (i = 0; i < host->mlcsubpages; i++) { diff --git a/drivers/mtd/nand/lpc32xx_slc.c b/drivers/mtd/nand/lpc32xx_slc.c index b61f28a1554d..2b96c281b1a2 100644 --- a/drivers/mtd/nand/lpc32xx_slc.c +++ b/drivers/mtd/nand/lpc32xx_slc.c @@ -399,10 +399,7 @@ static void lpc32xx_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int static int lpc32xx_nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - - return 0; + return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); } /* @@ -411,17 +408,8 @@ static int lpc32xx_nand_read_oob_syndrome(struct mtd_info *mtd, static int lpc32xx_nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page) { - int status; - - chip->cmdfunc(mtd, NAND_CMD_SEQIN, mtd->writesize, page); - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - - /* Send command to program the OOB data */ - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi, + mtd->oobsize); } /* @@ -632,7 +620,7 @@ static int lpc32xx_nand_read_page_syndrome(struct mtd_info *mtd, uint8_t *oobecc, tmpecc[LPC32XX_ECC_SAVE_SIZE]; /* Issue read command */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); /* Read data and oob, calculate ECC */ status = lpc32xx_xfer(mtd, buf, chip->ecc.steps, 1); @@ -675,7 +663,7 @@ static int lpc32xx_nand_read_page_raw_syndrome(struct mtd_info *mtd, int page) { /* Issue read command */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); /* Raw reads can just use the FIFO interface */ chip->read_buf(mtd, buf, chip->ecc.size * chip->ecc.steps); diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index 6d0101e13ef6..9c4adaf9331b 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -834,16 +834,13 @@ static int mtk_nfc_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, { int ret; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); + nand_prog_page_begin_op(chip, page, 0, NULL, 0); ret = mtk_nfc_write_page_raw(mtd, chip, NULL, 1, page); if (ret < 0) return -EIO; - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - ret = chip->waitfunc(mtd, chip); - - return ret & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } static int mtk_nfc_update_ecc_stats(struct mtd_info *mtd, u8 *buf, u32 sectors) @@ -893,7 +890,7 @@ static int mtk_nfc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, buf = bufpoi + start * chip->ecc.size; if (column != 0) - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, column, -1); + nand_change_read_column_op(chip, column, NULL, 0, false); addr = dma_map_single(nfc->dev, buf, len, DMA_FROM_DEVICE); rc = dma_mapping_error(nfc->dev, addr); @@ -1016,7 +1013,7 @@ static int mtk_nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); return mtk_nfc_read_page_raw(mtd, chip, NULL, 1, page); } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index eacc3f39cafd..539132ef0095 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -561,14 +561,19 @@ static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs) static int nand_check_wp(struct mtd_info *mtd) { struct nand_chip *chip = mtd_to_nand(mtd); + u8 status; + int ret; /* Broken xD cards report WP despite being writable */ if (chip->options & NAND_BROKEN_XD) return 0; /* Check the WP bit */ - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); - return (chip->read_byte(mtd) & NAND_STATUS_WP) ? 0 : 1; + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + return status & NAND_STATUS_WP ? 0 : 1; } /** @@ -667,10 +672,17 @@ EXPORT_SYMBOL_GPL(nand_wait_ready); static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo) { register struct nand_chip *chip = mtd_to_nand(mtd); + int ret; timeo = jiffies + msecs_to_jiffies(timeo); do { - if ((chip->read_byte(mtd) & NAND_STATUS_READY)) + u8 status; + + ret = nand_read_data_op(chip, &status, sizeof(status), true); + if (ret) + return; + + if (status & NAND_STATUS_READY) break; touch_softlockup_watchdog(); } while (time_before(jiffies, timeo)); @@ -1019,7 +1031,15 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip, if (chip->dev_ready(mtd)) break; } else { - if (chip->read_byte(mtd) & NAND_STATUS_READY) + int ret; + u8 status; + + ret = nand_read_data_op(chip, &status, sizeof(status), + true); + if (ret) + return; + + if (status & NAND_STATUS_READY) break; } mdelay(1); @@ -1036,8 +1056,9 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip, static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) { - int status; unsigned long timeo = 400; + u8 status; + int ret; /* * Apply this short delay always to ensure that we do wait tWB in any @@ -1045,7 +1066,9 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) */ ndelay(100); - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); + ret = nand_status_op(chip, NULL); + if (ret) + return ret; if (in_interrupt() || oops_in_progress) panic_nand_wait(mtd, chip, timeo); @@ -1056,14 +1079,22 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) if (chip->dev_ready(mtd)) break; } else { - if (chip->read_byte(mtd) & NAND_STATUS_READY) + ret = nand_read_data_op(chip, &status, + sizeof(status), true); + if (ret) + return ret; + + if (status & NAND_STATUS_READY) break; } cond_resched(); } while (time_before(jiffies, timeo)); } - status = (int)chip->read_byte(mtd); + ret = nand_read_data_op(chip, &status, sizeof(status), true); + if (ret) + return ret; + /* This can happen if in case of timeout or buggy dev_ready */ WARN_ON(!(status & NAND_STATUS_READY)); return status; @@ -1217,6 +1248,516 @@ static void nand_release_data_interface(struct nand_chip *chip) kfree(chip->data_interface); } +/** + * nand_read_page_op - Do a READ PAGE operation + * @chip: The NAND chip + * @page: page to read + * @offset_in_page: offset within the page + * @buf: buffer used to store the data + * @len: length of the buffer + * + * This function issues a READ PAGE operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_read_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, void *buf, unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (len && !buf) + return -EINVAL; + + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_READ0, offset_in_page, page); + if (len) + chip->read_buf(mtd, buf, len); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_read_page_op); + +/** + * nand_read_param_page_op - Do a READ PARAMETER PAGE operation + * @chip: The NAND chip + * @page: parameter page to read + * @buf: buffer used to store the data + * @len: length of the buffer + * + * This function issues a READ PARAMETER PAGE operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf, + unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int i; + u8 *p = buf; + + if (len && !buf) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_PARAM, page, -1); + for (i = 0; i < len; i++) + p[i] = chip->read_byte(mtd); + + return 0; +} + +/** + * nand_change_read_column_op - Do a CHANGE READ COLUMN operation + * @chip: The NAND chip + * @offset_in_page: offset within the page + * @buf: buffer used to store the data + * @len: length of the buffer + * @force_8bit: force 8-bit bus access + * + * This function issues a CHANGE READ COLUMN operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_change_read_column_op(struct nand_chip *chip, + unsigned int offset_in_page, void *buf, + unsigned int len, bool force_8bit) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (len && !buf) + return -EINVAL; + + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset_in_page, -1); + if (len) + chip->read_buf(mtd, buf, len); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_change_read_column_op); + +/** + * nand_read_oob_op - Do a READ OOB operation + * @chip: The NAND chip + * @page: page to read + * @offset_in_oob: offset within the OOB area + * @buf: buffer used to store the data + * @len: length of the buffer + * + * This function issues a READ OOB operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_read_oob_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_oob, void *buf, unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (len && !buf) + return -EINVAL; + + if (offset_in_oob + len > mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_READOOB, offset_in_oob, page); + if (len) + chip->read_buf(mtd, buf, len); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_read_oob_op); + +/** + * nand_prog_page_begin_op - starts a PROG PAGE operation + * @chip: The NAND chip + * @page: page to write + * @offset_in_page: offset within the page + * @buf: buffer containing the data to write to the page + * @len: length of the buffer + * + * This function issues the first half of a PROG PAGE operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, const void *buf, + unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (len && !buf) + return -EINVAL; + + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page); + + if (buf) + chip->write_buf(mtd, buf, len); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_prog_page_begin_op); + +/** + * nand_prog_page_end_op - ends a PROG PAGE operation + * @chip: The NAND chip + * + * This function issues the second half of a PROG PAGE operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_prog_page_end_op(struct nand_chip *chip) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + int status; + + chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + + status = chip->waitfunc(mtd, chip); + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(nand_prog_page_end_op); + +/** + * nand_prog_page_op - Do a full PROG PAGE operation + * @chip: The NAND chip + * @page: page to write + * @offset_in_page: offset within the page + * @buf: buffer containing the data to write to the page + * @len: length of the buffer + * + * This function issues a full PROG PAGE operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_prog_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, const void *buf, + unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + int status; + + if (!len || !buf) + return -EINVAL; + + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page); + chip->write_buf(mtd, buf, len); + chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + + status = chip->waitfunc(mtd, chip); + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(nand_prog_page_op); + +/** + * nand_change_write_column_op - Do a CHANGE WRITE COLUMN operation + * @chip: The NAND chip + * @offset_in_page: offset within the page + * @buf: buffer containing the data to send to the NAND + * @len: length of the buffer + * @force_8bit: force 8-bit bus access + * + * This function issues a CHANGE WRITE COLUMN operation. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_change_write_column_op(struct nand_chip *chip, + unsigned int offset_in_page, + const void *buf, unsigned int len, + bool force_8bit) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (len && !buf) + return -EINVAL; + + if (offset_in_page + len > mtd->writesize + mtd->oobsize) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset_in_page, -1); + if (len) + chip->write_buf(mtd, buf, len); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_change_write_column_op); + +/** + * nand_readid_op - Do a READID operation + * @chip: The NAND chip + * @addr: address cycle to pass after the READID command + * @buf: buffer used to store the ID + * @len: length of the buffer + * + * This function sends a READID command and reads back the ID returned by the + * NAND. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, + unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int i; + u8 *id = buf; + + if (len && !buf) + return -EINVAL; + + chip->cmdfunc(mtd, NAND_CMD_READID, addr, -1); + + for (i = 0; i < len; i++) + id[i] = chip->read_byte(mtd); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_readid_op); + +/** + * nand_status_op - Do a STATUS operation + * @chip: The NAND chip + * @status: out variable to store the NAND status + * + * This function sends a STATUS command and reads back the status returned by + * the NAND. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_status_op(struct nand_chip *chip, u8 *status) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); + if (status) + *status = chip->read_byte(mtd); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_status_op); + +/** + * nand_exit_status_op - Exit a STATUS operation + * @chip: The NAND chip + * + * This function sends a READ0 command to cancel the effect of the STATUS + * command to avoid reading only the status until a new read command is sent. + * + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_exit_status_op(struct nand_chip *chip) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_exit_status_op); + +/** + * nand_erase_op - Do an erase operation + * @chip: The NAND chip + * @eraseblock: block to erase + * + * This function sends an ERASE command and waits for the NAND to be ready + * before returning. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int page = eraseblock << + (chip->phys_erase_shift - chip->page_shift); + int status; + + chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); + chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); + + status = chip->waitfunc(mtd, chip); + if (status < 0) + return status; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(nand_erase_op); + +/** + * nand_set_features_op - Do a SET FEATURES operation + * @chip: The NAND chip + * @feature: feature id + * @data: 4 bytes of data + * + * This function sends a SET FEATURES command and waits for the NAND to be + * ready before returning. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +static int nand_set_features_op(struct nand_chip *chip, u8 feature, + const void *data) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + const u8 *params = data; + int i, status; + + chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, feature, -1); + for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) + chip->write_byte(mtd, params[i]); + + status = chip->waitfunc(mtd, chip); + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; +} + +/** + * nand_get_features_op - Do a GET FEATURES operation + * @chip: The NAND chip + * @feature: feature id + * @data: 4 bytes of data + * + * This function sends a GET FEATURES command and waits for the NAND to be + * ready before returning. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +static int nand_get_features_op(struct nand_chip *chip, u8 feature, + void *data) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + u8 *params = data; + int i; + + chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, feature, -1); + for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) + params[i] = chip->read_byte(mtd); + + return 0; +} + +/** + * nand_reset_op - Do a reset operation + * @chip: The NAND chip + * + * This function sends a RESET command and waits for the NAND to be ready + * before returning. + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_reset_op(struct nand_chip *chip) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + + return 0; +} +EXPORT_SYMBOL_GPL(nand_reset_op); + +/** + * nand_read_data_op - Read data from the NAND + * @chip: The NAND chip + * @buf: buffer used to store the data + * @len: length of the buffer + * @force_8bit: force 8-bit bus access + * + * This function does a raw data read on the bus. Usually used after launching + * another NAND operation like nand_read_page_op(). + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, + bool force_8bit) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (!len || !buf) + return -EINVAL; + + if (force_8bit) { + u8 *p = buf; + unsigned int i; + + for (i = 0; i < len; i++) + p[i] = chip->read_byte(mtd); + } else { + chip->read_buf(mtd, buf, len); + } + + return 0; +} +EXPORT_SYMBOL_GPL(nand_read_data_op); + +/** + * nand_write_data_op - Write data from the NAND + * @chip: The NAND chip + * @buf: buffer containing the data to send on the bus + * @len: length of the buffer + * @force_8bit: force 8-bit bus access + * + * This function does a raw data write on the bus. Usually used after launching + * another NAND operation like nand_write_page_begin_op(). + * This function does not select/unselect the CS line. + * + * Returns 0 on success, a negative error code otherwise. + */ +int nand_write_data_op(struct nand_chip *chip, const void *buf, + unsigned int len, bool force_8bit) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + if (!len || !buf) + return -EINVAL; + + if (force_8bit) { + const u8 *p = buf; + unsigned int i; + + for (i = 0; i < len; i++) + chip->write_byte(mtd, p[i]); + } else { + chip->write_buf(mtd, buf, len); + } + + return 0; +} +EXPORT_SYMBOL_GPL(nand_write_data_op); + /** * nand_reset - Reset and initialize a NAND device * @chip: The NAND chip @@ -1238,8 +1779,10 @@ int nand_reset(struct nand_chip *chip, int chipnr) * interface settings, hence this weird ->select_chip() dance. */ chip->select_chip(mtd, chipnr); - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + ret = nand_reset_op(chip); chip->select_chip(mtd, -1); + if (ret) + return ret; chip->select_chip(mtd, chipnr); ret = nand_setup_data_interface(chip, chipnr); @@ -1395,9 +1938,19 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk); int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { - chip->read_buf(mtd, buf, mtd->writesize); - if (oob_required) - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); + int ret; + + ret = nand_read_data_op(chip, buf, mtd->writesize, false); + if (ret) + return ret; + + if (oob_required) { + ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, + false); + if (ret) + return ret; + } + return 0; } EXPORT_SYMBOL(nand_read_page_raw); @@ -1419,29 +1972,46 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd, int eccsize = chip->ecc.size; int eccbytes = chip->ecc.bytes; uint8_t *oob = chip->oob_poi; - int steps, size; + int steps, size, ret; for (steps = chip->ecc.steps; steps > 0; steps--) { - chip->read_buf(mtd, buf, eccsize); + ret = nand_read_data_op(chip, buf, eccsize, false); + if (ret) + return ret; + buf += eccsize; if (chip->ecc.prepad) { - chip->read_buf(mtd, oob, chip->ecc.prepad); + ret = nand_read_data_op(chip, oob, chip->ecc.prepad, + false); + if (ret) + return ret; + oob += chip->ecc.prepad; } - chip->read_buf(mtd, oob, eccbytes); + ret = nand_read_data_op(chip, oob, eccbytes, false); + if (ret) + return ret; + oob += eccbytes; if (chip->ecc.postpad) { - chip->read_buf(mtd, oob, chip->ecc.postpad); + ret = nand_read_data_op(chip, oob, chip->ecc.postpad, + false); + if (ret) + return ret; + oob += chip->ecc.postpad; } } size = mtd->oobsize - (oob - chip->oob_poi); - if (size) - chip->read_buf(mtd, oob, size); + if (size) { + ret = nand_read_data_op(chip, oob, size, false); + if (ret) + return ret; + } return 0; } @@ -1530,7 +2100,9 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1); p = bufpoi + data_col_addr; - chip->read_buf(mtd, p, datafrag_len); + ret = nand_read_data_op(chip, p, datafrag_len, false); + if (ret) + return ret; /* Calculate ECC */ for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size) @@ -1548,8 +2120,11 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, gaps = 1; if (gaps) { - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, mtd->writesize, -1); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); + ret = nand_change_read_column_op(chip, mtd->writesize, + chip->oob_poi, mtd->oobsize, + false); + if (ret) + return ret; } else { /* * Send the command to read the particular ECC bytes take care @@ -1563,9 +2138,12 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, (busw - 1)) aligned_len++; - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, - mtd->writesize + aligned_pos, -1); - chip->read_buf(mtd, &chip->oob_poi[aligned_pos], aligned_len); + ret = nand_change_read_column_op(chip, + mtd->writesize + aligned_pos, + &chip->oob_poi[aligned_pos], + aligned_len, false); + if (ret) + return ret; } ret = mtd_ooblayout_get_eccbytes(mtd, chip->buffers->ecccode, @@ -1622,10 +2200,17 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_READ); - chip->read_buf(mtd, p, eccsize); + + ret = nand_read_data_op(chip, p, eccsize, false); + if (ret) + return ret; + chip->ecc.calculate(mtd, p, &ecc_calc[i]); } - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); + + ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false); + if (ret) + return ret; ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0, chip->ecc.total); @@ -1684,9 +2269,13 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, unsigned int max_bitflips = 0; /* Read the OOB area first */ - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + ret = nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); + if (ret) + return ret; + + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0, chip->ecc.total); @@ -1697,7 +2286,11 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, int stat; chip->ecc.hwctl(mtd, NAND_ECC_READ); - chip->read_buf(mtd, p, eccsize); + + ret = nand_read_data_op(chip, p, eccsize, false); + if (ret) + return ret; + chip->ecc.calculate(mtd, p, &ecc_calc[i]); stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL); @@ -1734,7 +2327,7 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { - int i, eccsize = chip->ecc.size; + int ret, i, eccsize = chip->ecc.size; int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; int eccpadbytes = eccbytes + chip->ecc.prepad + chip->ecc.postpad; @@ -1746,21 +2339,36 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int stat; chip->ecc.hwctl(mtd, NAND_ECC_READ); - chip->read_buf(mtd, p, eccsize); + + ret = nand_read_data_op(chip, p, eccsize, false); + if (ret) + return ret; if (chip->ecc.prepad) { - chip->read_buf(mtd, oob, chip->ecc.prepad); + ret = nand_read_data_op(chip, oob, chip->ecc.prepad, + false); + if (ret) + return ret; + oob += chip->ecc.prepad; } chip->ecc.hwctl(mtd, NAND_ECC_READSYN); - chip->read_buf(mtd, oob, eccbytes); + + ret = nand_read_data_op(chip, oob, eccbytes, false); + if (ret) + return ret; + stat = chip->ecc.correct(mtd, p, oob, NULL); oob += eccbytes; if (chip->ecc.postpad) { - chip->read_buf(mtd, oob, chip->ecc.postpad); + ret = nand_read_data_op(chip, oob, chip->ecc.postpad, + false); + if (ret) + return ret; + oob += chip->ecc.postpad; } @@ -1784,8 +2392,11 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, /* Calculate remaining oob bytes */ i = mtd->oobsize - (oob - chip->oob_poi); - if (i) - chip->read_buf(mtd, oob, i); + if (i) { + ret = nand_read_data_op(chip, oob, i, false); + if (ret) + return ret; + } return max_bitflips; } @@ -1906,8 +2517,11 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, __func__, buf); read_retry: - if (nand_standard_page_accessors(&chip->ecc)) - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); + if (nand_standard_page_accessors(&chip->ecc)) { + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + break; + } /* * Now read the page into the buffer. Absent an error, @@ -2066,9 +2680,7 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, */ int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); } EXPORT_SYMBOL(nand_read_oob_std); @@ -2086,25 +2698,43 @@ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad; int eccsize = chip->ecc.size; uint8_t *bufpoi = chip->oob_poi; - int i, toread, sndrnd = 0, pos; + int i, toread, sndrnd = 0, pos, ret; + + ret = nand_read_page_op(chip, page, chip->ecc.size, NULL, 0); + if (ret) + return ret; - chip->cmdfunc(mtd, NAND_CMD_READ0, chip->ecc.size, page); for (i = 0; i < chip->ecc.steps; i++) { if (sndrnd) { + int ret; + pos = eccsize + i * (eccsize + chunk); if (mtd->writesize > 512) - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, pos, -1); + ret = nand_change_read_column_op(chip, pos, + NULL, 0, + false); else - chip->cmdfunc(mtd, NAND_CMD_READ0, pos, page); + ret = nand_read_page_op(chip, page, pos, NULL, + 0); + + if (ret) + return ret; } else sndrnd = 1; toread = min_t(int, length, chunk); - chip->read_buf(mtd, bufpoi, toread); + + ret = nand_read_data_op(chip, bufpoi, toread, false); + if (ret) + return ret; + bufpoi += toread; length -= toread; } - if (length > 0) - chip->read_buf(mtd, bufpoi, length); + if (length > 0) { + ret = nand_read_data_op(chip, bufpoi, length, false); + if (ret) + return ret; + } return 0; } @@ -2118,18 +2748,8 @@ EXPORT_SYMBOL(nand_read_oob_syndrome); */ int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { - int status = 0; - const uint8_t *buf = chip->oob_poi; - int length = mtd->oobsize; - - chip->cmdfunc(mtd, NAND_CMD_SEQIN, mtd->writesize, page); - chip->write_buf(mtd, buf, length); - /* Send command to program the OOB data */ - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi, + mtd->oobsize); } EXPORT_SYMBOL(nand_write_oob_std); @@ -2145,7 +2765,7 @@ int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, { int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad; int eccsize = chip->ecc.size, length = mtd->oobsize; - int i, len, pos, status = 0, sndcmd = 0, steps = chip->ecc.steps; + int ret, i, len, pos, sndcmd = 0, steps = chip->ecc.steps; const uint8_t *bufpoi = chip->oob_poi; /* @@ -2159,7 +2779,10 @@ int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, } else pos = eccsize; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, pos, page); + ret = nand_prog_page_begin_op(chip, page, pos, NULL, 0); + if (ret) + return ret; + for (i = 0; i < steps; i++) { if (sndcmd) { if (mtd->writesize <= 512) { @@ -2168,28 +2791,40 @@ int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, len = eccsize; while (len > 0) { int num = min_t(int, len, 4); - chip->write_buf(mtd, (uint8_t *)&fill, - num); + + ret = nand_write_data_op(chip, &fill, + num, false); + if (ret) + return ret; + len -= num; } } else { pos = eccsize + i * (eccsize + chunk); - chip->cmdfunc(mtd, NAND_CMD_RNDIN, pos, -1); + ret = nand_change_write_column_op(chip, pos, + NULL, 0, + false); + if (ret) + return ret; } } else sndcmd = 1; len = min_t(int, length, chunk); - chip->write_buf(mtd, bufpoi, len); + + ret = nand_write_data_op(chip, bufpoi, len, false); + if (ret) + return ret; + bufpoi += len; length -= len; } - if (length > 0) - chip->write_buf(mtd, bufpoi, length); - - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); + if (length > 0) { + ret = nand_write_data_op(chip, bufpoi, length, false); + if (ret) + return ret; + } - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } EXPORT_SYMBOL(nand_write_oob_syndrome); @@ -2341,9 +2976,18 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - chip->write_buf(mtd, buf, mtd->writesize); - if (oob_required) - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + int ret; + + ret = nand_write_data_op(chip, buf, mtd->writesize, false); + if (ret) + return ret; + + if (oob_required) { + ret = nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, + false); + if (ret) + return ret; + } return 0; } @@ -2367,29 +3011,46 @@ static int nand_write_page_raw_syndrome(struct mtd_info *mtd, int eccsize = chip->ecc.size; int eccbytes = chip->ecc.bytes; uint8_t *oob = chip->oob_poi; - int steps, size; + int steps, size, ret; for (steps = chip->ecc.steps; steps > 0; steps--) { - chip->write_buf(mtd, buf, eccsize); + ret = nand_write_data_op(chip, buf, eccsize, false); + if (ret) + return ret; + buf += eccsize; if (chip->ecc.prepad) { - chip->write_buf(mtd, oob, chip->ecc.prepad); + ret = nand_write_data_op(chip, oob, chip->ecc.prepad, + false); + if (ret) + return ret; + oob += chip->ecc.prepad; } - chip->write_buf(mtd, oob, eccbytes); + ret = nand_write_data_op(chip, oob, eccbytes, false); + if (ret) + return ret; + oob += eccbytes; if (chip->ecc.postpad) { - chip->write_buf(mtd, oob, chip->ecc.postpad); + ret = nand_write_data_op(chip, oob, chip->ecc.postpad, + false); + if (ret) + return ret; + oob += chip->ecc.postpad; } } size = mtd->oobsize - (oob - chip->oob_poi); - if (size) - chip->write_buf(mtd, oob, size); + if (size) { + ret = nand_write_data_op(chip, oob, size, false); + if (ret) + return ret; + } return 0; } @@ -2443,7 +3104,11 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_WRITE); - chip->write_buf(mtd, p, eccsize); + + ret = nand_write_data_op(chip, p, eccsize, false); + if (ret) + return ret; + chip->ecc.calculate(mtd, p, &ecc_calc[i]); } @@ -2452,7 +3117,9 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, if (ret) return ret; - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + ret = nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false); + if (ret) + return ret; return 0; } @@ -2488,7 +3155,9 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, chip->ecc.hwctl(mtd, NAND_ECC_WRITE); /* write data (untouched subpages already masked by 0xFF) */ - chip->write_buf(mtd, buf, ecc_size); + ret = nand_write_data_op(chip, buf, ecc_size, false); + if (ret) + return ret; /* mask ECC of un-touched subpages by padding 0xFF */ if ((step < start_step) || (step > end_step)) @@ -2515,7 +3184,9 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, return ret; /* write OOB buffer to NAND device */ - chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + ret = nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false); + if (ret) + return ret; return 0; } @@ -2542,31 +3213,49 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, int eccsteps = chip->ecc.steps; const uint8_t *p = buf; uint8_t *oob = chip->oob_poi; + int ret; for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { - chip->ecc.hwctl(mtd, NAND_ECC_WRITE); - chip->write_buf(mtd, p, eccsize); + + ret = nand_write_data_op(chip, p, eccsize, false); + if (ret) + return ret; if (chip->ecc.prepad) { - chip->write_buf(mtd, oob, chip->ecc.prepad); + ret = nand_write_data_op(chip, oob, chip->ecc.prepad, + false); + if (ret) + return ret; + oob += chip->ecc.prepad; } chip->ecc.calculate(mtd, p, oob); - chip->write_buf(mtd, oob, eccbytes); + + ret = nand_write_data_op(chip, oob, eccbytes, false); + if (ret) + return ret; + oob += eccbytes; if (chip->ecc.postpad) { - chip->write_buf(mtd, oob, chip->ecc.postpad); + ret = nand_write_data_op(chip, oob, chip->ecc.postpad, + false); + if (ret) + return ret; + oob += chip->ecc.postpad; } } /* Calculate remaining oob bytes */ i = mtd->oobsize - (oob - chip->oob_poi); - if (i) - chip->write_buf(mtd, oob, i); + if (i) { + ret = nand_write_data_op(chip, oob, i, false); + if (ret) + return ret; + } return 0; } @@ -2594,8 +3283,11 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, else subpage = 0; - if (nand_standard_page_accessors(&chip->ecc)) - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); + if (nand_standard_page_accessors(&chip->ecc)) { + status = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (status) + return status; + } if (unlikely(raw)) status = chip->ecc.write_page_raw(mtd, chip, buf, @@ -2610,13 +3302,8 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (status < 0) return status; - if (nand_standard_page_accessors(&chip->ecc)) { - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) - return -EIO; - } + if (nand_standard_page_accessors(&chip->ecc)) + return nand_prog_page_end_op(chip); return 0; } @@ -2989,17 +3676,12 @@ out: static int single_erase(struct mtd_info *mtd, int page) { struct nand_chip *chip = mtd_to_nand(mtd); - int status; + unsigned int eraseblock; /* Send commands to erase a block */ - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); - chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); + eraseblock = page >> (chip->phys_erase_shift - chip->page_shift); - status = chip->waitfunc(mtd, chip); - if (status < 0) - return status; - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_erase_op(chip, eraseblock); } /** @@ -3226,22 +3908,12 @@ static int nand_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len) static int nand_onfi_set_features(struct mtd_info *mtd, struct nand_chip *chip, int addr, uint8_t *subfeature_param) { - int status; - int i; - if (!chip->onfi_version || !(le16_to_cpu(chip->onfi_params.opt_cmd) & ONFI_OPT_CMD_SET_GET_FEATURES)) return -EINVAL; - chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, addr, -1); - for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) - chip->write_byte(mtd, subfeature_param[i]); - - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) - return -EIO; - return 0; + return nand_set_features_op(chip, addr, subfeature_param); } /** @@ -3254,17 +3926,12 @@ static int nand_onfi_set_features(struct mtd_info *mtd, struct nand_chip *chip, static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip, int addr, uint8_t *subfeature_param) { - int i; - if (!chip->onfi_version || !(le16_to_cpu(chip->onfi_params.opt_cmd) & ONFI_OPT_CMD_SET_GET_FEATURES)) return -EINVAL; - chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, addr, -1); - for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) - *subfeature_param++ = chip->read_byte(mtd); - return 0; + return nand_get_features_op(chip, addr, subfeature_param); } /** @@ -3407,12 +4074,11 @@ static u16 onfi_crc16(u16 crc, u8 const *p, size_t len) static int nand_flash_detect_ext_param_page(struct nand_chip *chip, struct nand_onfi_params *p) { - struct mtd_info *mtd = nand_to_mtd(chip); struct onfi_ext_param_page *ep; struct onfi_ext_section *s; struct onfi_ext_ecc_info *ecc; uint8_t *cursor; - int ret = -EINVAL; + int ret; int len; int i; @@ -3422,14 +4088,18 @@ static int nand_flash_detect_ext_param_page(struct nand_chip *chip, return -ENOMEM; /* Send our own NAND_CMD_PARAM. */ - chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1); + ret = nand_read_param_page_op(chip, 0, NULL, 0); + if (ret) + goto ext_out; /* Use the Change Read Column command to skip the ONFI param pages. */ - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, - sizeof(*p) * p->num_of_param_pages , -1); + ret = nand_change_read_column_op(chip, + sizeof(*p) * p->num_of_param_pages, + ep, len, true); + if (ret) + goto ext_out; - /* Read out the Extended Parameter Page. */ - chip->read_buf(mtd, (uint8_t *)ep, len); + ret = -EINVAL; if ((onfi_crc16(ONFI_CRC_BASE, ((uint8_t *)ep) + 2, len - 2) != le16_to_cpu(ep->crc))) { pr_debug("fail in the CRC.\n"); @@ -3482,19 +4152,23 @@ static int nand_flash_detect_onfi(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); struct nand_onfi_params *p = &chip->onfi_params; - int i, j; - int val; + char id[4]; + int i, ret, val; /* Try ONFI for unknown chip or LP */ - chip->cmdfunc(mtd, NAND_CMD_READID, 0x20, -1); - if (chip->read_byte(mtd) != 'O' || chip->read_byte(mtd) != 'N' || - chip->read_byte(mtd) != 'F' || chip->read_byte(mtd) != 'I') + ret = nand_readid_op(chip, 0x20, id, sizeof(id)); + if (ret || strncmp(id, "ONFI", 4)) + return 0; + + ret = nand_read_param_page_op(chip, 0, NULL, 0); + if (ret) return 0; - chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1); for (i = 0; i < 3; i++) { - for (j = 0; j < sizeof(*p); j++) - ((uint8_t *)p)[j] = chip->read_byte(mtd); + ret = nand_read_data_op(chip, p, sizeof(*p), true); + if (ret) + return 0; + if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 254) == le16_to_cpu(p->crc)) { break; @@ -3585,20 +4259,22 @@ static int nand_flash_detect_jedec(struct nand_chip *chip) struct mtd_info *mtd = nand_to_mtd(chip); struct nand_jedec_params *p = &chip->jedec_params; struct jedec_ecc_info *ecc; - int val; - int i, j; + char id[5]; + int i, val, ret; /* Try JEDEC for unknown chip or LP */ - chip->cmdfunc(mtd, NAND_CMD_READID, 0x40, -1); - if (chip->read_byte(mtd) != 'J' || chip->read_byte(mtd) != 'E' || - chip->read_byte(mtd) != 'D' || chip->read_byte(mtd) != 'E' || - chip->read_byte(mtd) != 'C') + ret = nand_readid_op(chip, 0x40, id, sizeof(id)); + if (ret || strncmp(id, "JEDEC", sizeof(id))) + return 0; + + ret = nand_read_param_page_op(chip, 0x40, NULL, 0); + if (ret) return 0; - chip->cmdfunc(mtd, NAND_CMD_PARAM, 0x40, -1); for (i = 0; i < 3; i++) { - for (j = 0; j < sizeof(*p); j++) - ((uint8_t *)p)[j] = chip->read_byte(mtd); + ret = nand_read_data_op(chip, p, sizeof(*p), true); + if (ret) + return 0; if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 510) == le16_to_cpu(p->crc)) @@ -3877,8 +4553,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) { const struct nand_manufacturer *manufacturer; struct mtd_info *mtd = nand_to_mtd(chip); - int busw; - int i; + int busw, ret; u8 *id_data = chip->id.data; u8 maf_id, dev_id; @@ -3886,17 +4561,21 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx) * after power-up. */ - nand_reset(chip, 0); + ret = nand_reset(chip, 0); + if (ret) + return ret; /* Select the device */ chip->select_chip(mtd, 0); /* Send the command for reading device ID */ - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); + ret = nand_readid_op(chip, 0, id_data, 2); + if (ret) + return ret; /* Read manufacturer and device IDs */ - maf_id = chip->read_byte(mtd); - dev_id = chip->read_byte(mtd); + maf_id = id_data[0]; + dev_id = id_data[1]; /* * Try again to make sure, as some systems the bus-hold or other @@ -3905,11 +4584,10 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type) * not match, ignore the device completely. */ - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); - /* Read entire ID string */ - for (i = 0; i < ARRAY_SIZE(chip->id.data); i++) - id_data[i] = chip->read_byte(mtd); + ret = nand_readid_op(chip, 0, id_data, sizeof(chip->id.data)); + if (ret) + return ret; if (id_data[0] != maf_id || id_data[1] != dev_id) { pr_info("second ID read did not match %02x,%02x against %02x,%02x\n", @@ -4236,15 +4914,16 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, /* Check for a chip array */ for (i = 1; i < maxchips; i++) { + u8 id[2]; + /* See comment in nand_get_flash_type for reset */ nand_reset(chip, i); chip->select_chip(mtd, i); /* Send the command for reading device ID */ - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); + nand_readid_op(chip, 0, id, sizeof(id)); /* Read manufacturer and device IDs */ - if (nand_maf_id != chip->read_byte(mtd) || - nand_dev_id != chip->read_byte(mtd)) { + if (nand_maf_id != id[0] || nand_dev_id != id[1]) { chip->select_chip(mtd, -1); break; } diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c index 72d98cbff4ca..bae0da2aa2a8 100644 --- a/drivers/mtd/nand/nand_hynix.c +++ b/drivers/mtd/nand/nand_hynix.c @@ -66,16 +66,35 @@ struct hynix_read_retry_otp { }; static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip) +{ + u8 jedecid[5] = { }; + int ret; + + ret = nand_readid_op(chip, 0x40, jedecid, sizeof(jedecid)); + if (ret) + return false; + + return !strncmp("JEDEC", jedecid, sizeof(jedecid)); +} + +static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + chip->cmdfunc(mtd, cmd, -1, -1); + + return 0; +} + +static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val) { struct mtd_info *mtd = nand_to_mtd(chip); - u8 jedecid[6] = { }; - int i = 0; + u16 column = ((u16)addr << 8) | addr; - chip->cmdfunc(mtd, NAND_CMD_READID, 0x40, -1); - for (i = 0; i < 5; i++) - jedecid[i] = chip->read_byte(mtd); + chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1); + chip->write_byte(mtd, val); - return !strcmp("JEDEC", jedecid); + return 0; } static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) @@ -83,13 +102,15 @@ static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) struct nand_chip *chip = mtd_to_nand(mtd); struct hynix_nand *hynix = nand_get_manufacturer_data(chip); const u8 *values; - int i; + int i, ret; values = hynix->read_retry->values + (retry_mode * hynix->read_retry->nregs); /* Enter 'Set Hynix Parameters' mode */ - chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1); + ret = hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_SET_PARAMS); + if (ret) + return ret; /* * Configure the NAND in the requested read-retry mode. @@ -101,17 +122,14 @@ static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode) * probably tweaked at production in this case). */ for (i = 0; i < hynix->read_retry->nregs; i++) { - int column = hynix->read_retry->regs[i]; - - column |= column << 8; - chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1); - chip->write_byte(mtd, values[i]); + ret = hynix_nand_reg_write_op(chip, hynix->read_retry->regs[i], + values[i]); + if (ret) + return ret; } /* Apply the new settings. */ - chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1); - - return 0; + return hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_APPLY_PARAMS); } /** @@ -167,40 +185,63 @@ static int hynix_read_rr_otp(struct nand_chip *chip, const struct hynix_read_retry_otp *info, void *buf) { - struct mtd_info *mtd = nand_to_mtd(chip); - int i; + int i, ret; - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + ret = nand_reset_op(chip); + if (ret) + return ret; - chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, -1, -1); + ret = hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_SET_PARAMS); + if (ret) + return ret; for (i = 0; i < info->nregs; i++) { - int column = info->regs[i]; - - column |= column << 8; - chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1); - chip->write_byte(mtd, info->values[i]); + ret = hynix_nand_reg_write_op(chip, info->regs[i], + info->values[i]); + if (ret) + return ret; } - chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1); + ret = hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_APPLY_PARAMS); + if (ret) + return ret; /* Sequence to enter OTP mode? */ - chip->cmdfunc(mtd, 0x17, -1, -1); - chip->cmdfunc(mtd, 0x04, -1, -1); - chip->cmdfunc(mtd, 0x19, -1, -1); + ret = hynix_nand_cmd_op(chip, 0x17); + if (ret) + return ret; + + ret = hynix_nand_cmd_op(chip, 0x4); + if (ret) + return ret; + + ret = hynix_nand_cmd_op(chip, 0x19); + if (ret) + return ret; /* Now read the page */ - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, info->page); - chip->read_buf(mtd, buf, info->size); + ret = nand_read_page_op(chip, info->page, 0, buf, info->size); + if (ret) + return ret; /* Put everything back to normal */ - chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); - chip->cmdfunc(mtd, NAND_HYNIX_CMD_SET_PARAMS, 0x38, -1); - chip->write_byte(mtd, 0x0); - chip->cmdfunc(mtd, NAND_HYNIX_CMD_APPLY_PARAMS, -1, -1); - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x0, -1); + ret = nand_reset_op(chip); + if (ret) + return ret; - return 0; + ret = hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_SET_PARAMS); + if (ret) + return ret; + + ret = hynix_nand_reg_write_op(chip, 0x38, 0); + if (ret) + return ret; + + ret = hynix_nand_cmd_op(chip, NAND_HYNIX_CMD_APPLY_PARAMS); + if (ret) + return ret; + + return nand_read_page_op(chip, 0, 0, NULL, 0); } #define NAND_HYNIX_1XNM_RR_COUNT_OFFS 0 diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c index abf6a3c376e8..bf2dc23e1c32 100644 --- a/drivers/mtd/nand/nand_micron.c +++ b/drivers/mtd/nand/nand_micron.c @@ -117,16 +117,28 @@ micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { - int status; - int max_bitflips = 0; + u8 status; + int ret, max_bitflips = 0; - micron_nand_on_die_ecc_setup(chip, true); + ret = micron_nand_on_die_ecc_setup(chip, true); + if (ret) + return ret; + + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + goto out; + + ret = nand_status_op(chip, &status); + if (ret) + goto out; + + ret = nand_exit_status_op(chip); + if (ret) + goto out; - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); - status = chip->read_byte(mtd); if (status & NAND_STATUS_FAIL) mtd->ecc_stats.failed++; + /* * The internal ECC doesn't tell us the number of bitflips * that have been corrected, but tells us if it recommends to @@ -137,13 +149,12 @@ micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, else if (status & NAND_STATUS_WRITE_RECOMMENDED) max_bitflips = chip->ecc.strength; - chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1); - - nand_read_page_raw(mtd, chip, buf, oob_required, page); + ret = nand_read_page_raw(mtd, chip, buf, oob_required, page); +out: micron_nand_on_die_ecc_setup(chip, false); - return max_bitflips; + return ret ? ret : max_bitflips; } static int @@ -151,18 +162,26 @@ micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - int status; + int ret; - micron_nand_on_die_ecc_setup(chip, true); + ret = micron_nand_on_die_ecc_setup(chip, true); + if (ret) + return ret; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); - nand_write_page_raw(mtd, chip, buf, oob_required, page); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + goto out; + ret = nand_write_page_raw(mtd, chip, buf, oob_required, page); + if (ret) + return ret; + + ret = nand_prog_page_end_op(chip); + +out: micron_nand_on_die_ecc_setup(chip, false); - return status & NAND_STATUS_FAIL ? -EIO : 0; + return ret; } static int @@ -171,10 +190,13 @@ micron_nand_read_page_raw_on_die_ecc(struct mtd_info *mtd, uint8_t *buf, int oob_required, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page); - nand_read_page_raw(mtd, chip, buf, oob_required, page); + int ret; - return 0; + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + + return nand_read_page_raw(mtd, chip, buf, oob_required, page); } static int @@ -183,14 +205,17 @@ micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd, const uint8_t *buf, int oob_required, int page) { - int status; + int ret; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page); - nand_write_page_raw(mtd, chip, buf, oob_required, page); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - status = chip->waitfunc(mtd, chip); + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + + ret = nand_write_page_raw(mtd, chip, buf, oob_required, page); + if (ret) + return ret; - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } enum { diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index dad438c4906a..6e1b209cd5a7 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1647,10 +1647,10 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, chip->read_buf(mtd, buf, mtd->writesize); /* Read oob bytes */ - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, - mtd->writesize + BADBLOCK_MARKER_LENGTH, -1); - chip->read_buf(mtd, chip->oob_poi + BADBLOCK_MARKER_LENGTH, - chip->ecc.total); + nand_change_read_column_op(chip, + mtd->writesize + BADBLOCK_MARKER_LENGTH, + chip->oob_poi + BADBLOCK_MARKER_LENGTH, + chip->ecc.total, false); /* Calculate ecc bytes */ omap_calculate_ecc_bch_multi(mtd, buf, ecc_calc); diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 90b9a9ccbe60..28bcdf64c1fc 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -520,15 +520,13 @@ static int pxa3xx_nand_init_timings_compat(struct pxa3xx_nand_host *host, struct nand_chip *chip = &host->chip; struct pxa3xx_nand_info *info = host->info_data; const struct pxa3xx_nand_flash *f = NULL; - struct mtd_info *mtd = nand_to_mtd(&host->chip); int i, id, ntypes; + u8 idbuf[2]; ntypes = ARRAY_SIZE(builtin_flash_types); - chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); - - id = chip->read_byte(mtd); - id |= chip->read_byte(mtd) << 0x8; + nand_readid_op(chip, 0, idbuf, sizeof(idbuf)); + id = idbuf[0] | (idbuf[1] << 8); for (i = 0; i < ntypes; i++) { f = &builtin_flash_types[i]; diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 2656c1ac5646..e34313ecd903 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -1990,7 +1990,7 @@ static int qcom_nandc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, struct nand_ecc_ctrl *ecc = &chip->ecc; u8 *oob = chip->oob_poi; int data_size, oob_size; - int ret, status = 0; + int ret; host->use_ecc = true; @@ -2027,11 +2027,7 @@ static int qcom_nandc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, return -EIO; } - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } static int qcom_nandc_block_bad(struct mtd_info *mtd, loff_t ofs) @@ -2081,7 +2077,7 @@ static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs) struct qcom_nand_host *host = to_qcom_nand_host(chip); struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip); struct nand_ecc_ctrl *ecc = &chip->ecc; - int page, ret, status = 0; + int page, ret; clear_read_regs(nandc); clear_bam_transaction(nandc); @@ -2114,11 +2110,7 @@ static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs) return -EIO; } - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } /* diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index fc9287af4614..595635b9e9de 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -364,7 +364,7 @@ static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip) struct r852_device *dev = nand_get_controller_data(chip); unsigned long timeout; - int status; + u8 status; timeout = jiffies + (chip->state == FL_ERASING ? msecs_to_jiffies(400) : msecs_to_jiffies(20)); @@ -373,8 +373,7 @@ static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip) if (chip->dev_ready(mtd)) break; - chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); - status = (int)chip->read_byte(mtd); + nand_status_op(chip, &status); /* Unfortunelly, no way to send detailed error status... */ if (dev->dma_error) { @@ -522,9 +521,7 @@ exit: static int r852_read_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page); - chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); } /* @@ -1046,7 +1043,7 @@ static int r852_resume(struct device *device) if (dev->card_registred) { r852_engine_enable(dev); dev->chip->select_chip(mtd, 0); - dev->chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + nand_reset_op(dev->chip); dev->chip->select_chip(mtd, -1); } diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 82244be3e766..da5cc36f4c30 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -958,12 +958,12 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd, int ret; if (*cur_off != data_off) - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1); + nand_change_read_column_op(nand, data_off, NULL, 0, false); sunxi_nfc_randomizer_read_buf(mtd, NULL, ecc->size, false, page); if (data_off + ecc->size != oob_off) - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1); + nand_change_read_column_op(nand, oob_off, NULL, 0, false); ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); if (ret) @@ -991,16 +991,15 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd, * Re-read the data with the randomizer disabled to identify * bitflips in erased pages. */ - if (nand->options & NAND_NEED_SCRAMBLING) { - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1); - nand->read_buf(mtd, data, ecc->size); - } else { + if (nand->options & NAND_NEED_SCRAMBLING) + nand_change_read_column_op(nand, data_off, data, + ecc->size, false); + else memcpy_fromio(data, nfc->regs + NFC_RAM0_BASE, ecc->size); - } - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1); - nand->read_buf(mtd, oob, ecc->bytes + 4); + nand_change_read_column_op(nand, oob_off, oob, ecc->bytes + 4, + false); ret = nand_check_erased_ecc_chunk(data, ecc->size, oob, ecc->bytes + 4, @@ -1011,7 +1010,8 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd, memcpy_fromio(data, nfc->regs + NFC_RAM0_BASE, ecc->size); if (oob_required) { - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1); + nand_change_read_column_op(nand, oob_off, NULL, 0, + false); sunxi_nfc_randomizer_read_buf(mtd, oob, ecc->bytes + 4, true, page); @@ -1038,8 +1038,8 @@ static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd, return; if (!cur_off || *cur_off != offset) - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, - offset + mtd->writesize, -1); + nand_change_read_column_op(nand, mtd->writesize, NULL, 0, + false); if (!randomize) sunxi_nfc_read_buf(mtd, oob + offset, len); @@ -1116,9 +1116,9 @@ static int sunxi_nfc_hw_ecc_read_chunks_dma(struct mtd_info *mtd, uint8_t *buf, if (oob_required && !erased) { /* TODO: use DMA to retrieve OOB */ - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, - mtd->writesize + oob_off, -1); - nand->read_buf(mtd, oob, ecc->bytes + 4); + nand_change_read_column_op(nand, + mtd->writesize + oob_off, + oob, ecc->bytes + 4, false); sunxi_nfc_hw_ecc_get_prot_oob_bytes(mtd, oob, i, !i, page); @@ -1143,18 +1143,17 @@ static int sunxi_nfc_hw_ecc_read_chunks_dma(struct mtd_info *mtd, uint8_t *buf, /* * Re-read the data with the randomizer disabled to * identify bitflips in erased pages. + * TODO: use DMA to read page in raw mode */ - if (randomized) { - /* TODO: use DMA to read page in raw mode */ - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, - data_off, -1); - nand->read_buf(mtd, data, ecc->size); - } + if (randomized) + nand_change_read_column_op(nand, data_off, + data, ecc->size, + false); /* TODO: use DMA to retrieve OOB */ - nand->cmdfunc(mtd, NAND_CMD_RNDOUT, - mtd->writesize + oob_off, -1); - nand->read_buf(mtd, oob, ecc->bytes + 4); + nand_change_read_column_op(nand, + mtd->writesize + oob_off, + oob, ecc->bytes + 4, false); ret = nand_check_erased_ecc_chunk(data, ecc->size, oob, ecc->bytes + 4, @@ -1187,12 +1186,12 @@ static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd, int ret; if (data_off != *cur_off) - nand->cmdfunc(mtd, NAND_CMD_RNDIN, data_off, -1); + nand_change_write_column_op(nand, data_off, NULL, 0, false); sunxi_nfc_randomizer_write_buf(mtd, data, ecc->size, false, page); if (data_off + ecc->size != oob_off) - nand->cmdfunc(mtd, NAND_CMD_RNDIN, oob_off, -1); + nand_change_write_column_op(nand, oob_off, NULL, 0, false); ret = sunxi_nfc_wait_cmd_fifo_empty(nfc); if (ret) @@ -1228,8 +1227,8 @@ static void sunxi_nfc_hw_ecc_write_extra_oob(struct mtd_info *mtd, return; if (!cur_off || *cur_off != offset) - nand->cmdfunc(mtd, NAND_CMD_RNDIN, - offset + mtd->writesize, -1); + nand_change_write_column_op(nand, offset + mtd->writesize, + NULL, 0, false); sunxi_nfc_randomizer_write_buf(mtd, oob + offset, len, false, page); @@ -1285,7 +1284,7 @@ static int sunxi_nfc_hw_ecc_read_page_dma(struct mtd_info *mtd, return ret; /* Fallback to PIO mode */ - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, 0, -1); + nand_change_read_column_op(chip, 0, NULL, 0, false); return sunxi_nfc_hw_ecc_read_page(mtd, chip, buf, oob_required, page); } @@ -1335,7 +1334,7 @@ static int sunxi_nfc_hw_ecc_read_subpage_dma(struct mtd_info *mtd, return ret; /* Fallback to PIO mode */ - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, 0, -1); + nand_change_read_column_op(chip, 0, NULL, 0, false); return sunxi_nfc_hw_ecc_read_subpage(mtd, chip, data_offs, readlen, buf, page); @@ -1540,7 +1539,7 @@ static int sunxi_nfc_hw_common_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); chip->pagebuf = -1; @@ -1551,9 +1550,9 @@ static int sunxi_nfc_hw_common_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - int ret, status; + int ret; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page); + nand_prog_page_begin_op(chip, page, 0, NULL, 0); chip->pagebuf = -1; @@ -1563,11 +1562,7 @@ static int sunxi_nfc_hw_common_ecc_write_oob(struct mtd_info *mtd, return ret; /* Send command to program the OOB data */ - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - - return status & NAND_STATUS_FAIL ? -EIO : 0; + return nand_prog_page_end_op(chip); } static const s32 tWB_lut[] = {6, 12, 16, 20}; diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 766906f03943..97a300b46b1d 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -329,7 +329,7 @@ static void aux_read(struct nand_chip *chip, u8 **buf, int len, int *pos) if (!*buf) { /* skip over "len" bytes */ - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, *pos, -1); + nand_change_read_column_op(chip, *pos, NULL, 0, false); } else { tango_read_buf(mtd, *buf, len); *buf += len; @@ -344,7 +344,7 @@ static void aux_write(struct nand_chip *chip, const u8 **buf, int len, int *pos) if (!*buf) { /* skip over "len" bytes */ - chip->cmdfunc(mtd, NAND_CMD_RNDIN, *pos, -1); + nand_change_write_column_op(chip, *pos, NULL, 0, false); } else { tango_write_buf(mtd, *buf, len); *buf += len; @@ -427,7 +427,7 @@ static void raw_write(struct nand_chip *chip, const u8 *buf, const u8 *oob) static int tango_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, u8 *buf, int oob_required, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); raw_read(chip, buf, chip->oob_poi); return 0; } @@ -435,23 +435,15 @@ static int tango_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, static int tango_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const u8 *buf, int oob_required, int page) { - int status; - - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page); + nand_prog_page_begin_op(chip, page, 0, NULL, 0); raw_write(chip, buf, chip->oob_poi); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - - status = chip->waitfunc(mtd, chip); - if (status & NAND_STATUS_FAIL) - return -EIO; - - return 0; + return nand_prog_page_end_op(chip); } static int tango_read_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page); + nand_read_page_op(chip, page, 0, NULL, 0); raw_read(chip, NULL, chip->oob_poi); return 0; } @@ -459,11 +451,9 @@ static int tango_read_oob(struct mtd_info *mtd, struct nand_chip *chip, static int tango_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0, page); + nand_prog_page_begin_op(chip, page, 0, NULL, 0); raw_write(chip, NULL, chip->oob_poi); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); - chip->waitfunc(mtd, chip); - return 0; + return nand_prog_page_end_op(chip); } static int oob_ecc(struct mtd_info *mtd, int idx, struct mtd_oob_region *res) diff --git a/drivers/mtd/nand/tmio_nand.c b/drivers/mtd/nand/tmio_nand.c index 84dbf32332e1..dcaa924502de 100644 --- a/drivers/mtd/nand/tmio_nand.c +++ b/drivers/mtd/nand/tmio_nand.c @@ -192,6 +192,7 @@ tmio_nand_wait(struct mtd_info *mtd, struct nand_chip *nand_chip) { struct tmio_nand *tmio = mtd_to_tmio(mtd); long timeout; + u8 status; /* enable RDYREQ interrupt */ tmio_iowrite8(0x0f, tmio->fcr + FCR_ISR); @@ -212,8 +213,8 @@ tmio_nand_wait(struct mtd_info *mtd, struct nand_chip *nand_chip) dev_warn(&tmio->dev->dev, "timeout waiting for interrupt\n"); } - nand_chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); - return nand_chip->read_byte(mtd); + nand_status_op(nand_chip, &status); + return status; } /* diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 749bb08c4772..fd99d5137d71 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1316,6 +1316,35 @@ int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); +/* NAND operation helpers */ +int nand_reset_op(struct nand_chip *chip); +int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, + unsigned int len); +int nand_status_op(struct nand_chip *chip, u8 *status); +int nand_exit_status_op(struct nand_chip *chip); +int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock); +int nand_read_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, void *buf, unsigned int len); +int nand_change_read_column_op(struct nand_chip *chip, + unsigned int offset_in_page, void *buf, + unsigned int len, bool force_8bit); +int nand_read_oob_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, void *buf, unsigned int len); +int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, const void *buf, + unsigned int len); +int nand_prog_page_end_op(struct nand_chip *chip); +int nand_prog_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, const void *buf, + unsigned int len); +int nand_change_write_column_op(struct nand_chip *chip, + unsigned int offset_in_page, const void *buf, + unsigned int len, bool force_8bit); +int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, + bool force_8bit); +int nand_write_data_op(struct nand_chip *chip, const void *buf, + unsigned int len, bool force_8bit); + /* Free resources held by the NAND device */ void nand_cleanup(struct nand_chip *chip); -- cgit v1.2.3 From 25f815f66a141436df8a4c45e5d2765272aea2ac Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 30 Nov 2017 18:01:30 +0100 Subject: mtd: nand: force drivers to explicitly send READ/PROG commands The core currently send the READ0 and SEQIN+PAGEPROG commands in nand_do_read/write_ops(). This is inconsistent with ->read/write_oob[_raw]() hooks behavior which are expected to send these commands. There's already a flag (NAND_ECC_CUSTOM_PAGE_ACCESS) to inform the core that a specific controller wants to send the READ/SEQIN+PAGEPROG commands on its own, but it's an opt-in flag, and existing drivers are unlikely to be updated to pass it. Moreover, some controllers cannot dissociate the READ/PAGEPROG commands from the associated data transfer and ECC engine activation, and developers have to hack things in their ->cmdfunc() implementation to handle such complex cases, or have to accept the perf penalty of sending twice the same command. To address this problem we are planning on adding a new interface which is passed all information about a NAND operation (including the amount of data to transfer) and replacing all calls to ->cmdfunc() to calls to this new ->exec_op() hook. But, in order to do that, we need to have all ->cmdfunc() calls placed near their associated ->read/write_buf/byte() calls. Modify the core and relevant drivers to make NAND_ECC_CUSTOM_PAGE_ACCESS the default case, and remove this flag. Signed-off-by: Boris Brezillon [miquel.raynal@free-electrons.com: tested, fixed and rebased on nand/next] Signed-off-by: Miquel Raynal Acked-by: Masahiro Yamada --- drivers/mtd/nand/atmel/nand-controller.c | 7 ++- drivers/mtd/nand/bf5xx_nand.c | 6 +- drivers/mtd/nand/brcmnand/brcmnand.c | 13 +++- drivers/mtd/nand/cafe_nand.c | 6 +- drivers/mtd/nand/denali.c | 1 - drivers/mtd/nand/docg4.c | 12 ++-- drivers/mtd/nand/fsl_elbc_nand.c | 10 +-- drivers/mtd/nand/fsl_ifc_nand.c | 6 +- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 31 +++++----- drivers/mtd/nand/hisi504_nand.c | 6 +- drivers/mtd/nand/lpc32xx_mlc.c | 5 +- drivers/mtd/nand/lpc32xx_slc.c | 11 +++- drivers/mtd/nand/mtk_nand.c | 22 +++---- drivers/mtd/nand/nand_base.c | 87 +++++++++++---------------- drivers/mtd/nand/nand_micron.c | 56 ++--------------- drivers/mtd/nand/omap2.c | 10 ++- drivers/mtd/nand/pxa3xx_nand.c | 6 +- drivers/mtd/nand/qcom_nandc.c | 11 ++++ drivers/mtd/nand/sh_flctl.c | 6 +- drivers/mtd/nand/sunxi_nand.c | 34 +++++++---- drivers/mtd/nand/tango_nand.c | 1 - drivers/mtd/nand/vf610_nfc.c | 6 +- drivers/staging/mt29f_spinand/mt29f_spinand.c | 5 +- include/linux/mtd/rawnand.h | 11 ---- 24 files changed, 171 insertions(+), 198 deletions(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index e81fdd2d47b1..b2f00b398490 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -841,6 +841,8 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf, struct atmel_nand *nand = to_atmel_nand(chip); int ret; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + ret = atmel_nand_pmecc_enable(chip, NAND_ECC_WRITE, raw); if (ret) return ret; @@ -857,7 +859,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf, atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } static int atmel_nand_pmecc_write_page(struct mtd_info *mtd, @@ -881,6 +883,8 @@ static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf, struct mtd_info *mtd = nand_to_mtd(chip); int ret; + nand_read_page_op(chip, page, 0, NULL, 0); + ret = atmel_nand_pmecc_enable(chip, NAND_ECC_READ, raw); if (ret) return ret; @@ -1178,7 +1182,6 @@ static int atmel_hsmc_nand_ecc_init(struct atmel_nand *nand) chip->ecc.write_page = atmel_hsmc_nand_pmecc_write_page; chip->ecc.read_page_raw = atmel_hsmc_nand_pmecc_read_page_raw; chip->ecc.write_page_raw = atmel_hsmc_nand_pmecc_write_page_raw; - chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS; return 0; } diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c index 5655dca6ce43..87bbd177b3e5 100644 --- a/drivers/mtd/nand/bf5xx_nand.c +++ b/drivers/mtd/nand/bf5xx_nand.c @@ -572,6 +572,8 @@ static void bf5xx_nand_dma_write_buf(struct mtd_info *mtd, static int bf5xx_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { + nand_read_page_op(chip, page, 0, NULL, 0); + bf5xx_nand_read_buf(mtd, buf, mtd->writesize); bf5xx_nand_read_buf(mtd, chip->oob_poi, mtd->oobsize); @@ -582,10 +584,10 @@ static int bf5xx_nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - bf5xx_nand_write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); bf5xx_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } /* diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index 3f441096a14c..e6879d4d53ca 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -1689,7 +1689,6 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd, sas = mtd->oobsize / chip->ecc.steps; /* read without ecc for verification */ - nand_read_page_op(chip, page, 0, NULL, 0); ret = chip->ecc.read_page_raw(mtd, chip, buf, true, page); if (ret) return ret; @@ -1793,6 +1792,8 @@ static int brcmnand_read_page(struct mtd_info *mtd, struct nand_chip *chip, struct brcmnand_host *host = nand_get_controller_data(chip); u8 *oob = oob_required ? (u8 *)chip->oob_poi : NULL; + nand_read_page_op(chip, page, 0, NULL, 0); + return brcmnand_read(mtd, chip, host->last_addr, mtd->writesize >> FC_SHIFT, (u32 *)buf, oob); } @@ -1804,6 +1805,8 @@ static int brcmnand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, u8 *oob = oob_required ? (u8 *)chip->oob_poi : NULL; int ret; + nand_read_page_op(chip, page, 0, NULL, 0); + brcmnand_set_ecc_enabled(host, 0); ret = brcmnand_read(mtd, chip, host->last_addr, mtd->writesize >> FC_SHIFT, (u32 *)buf, oob); @@ -1909,8 +1912,10 @@ static int brcmnand_write_page(struct mtd_info *mtd, struct nand_chip *chip, struct brcmnand_host *host = nand_get_controller_data(chip); void *oob = oob_required ? chip->oob_poi : NULL; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); brcmnand_write(mtd, chip, host->last_addr, (const u32 *)buf, oob); - return 0; + + return nand_prog_page_end_op(chip); } static int brcmnand_write_page_raw(struct mtd_info *mtd, @@ -1920,10 +1925,12 @@ static int brcmnand_write_page_raw(struct mtd_info *mtd, struct brcmnand_host *host = nand_get_controller_data(chip); void *oob = oob_required ? chip->oob_poi : NULL; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); brcmnand_set_ecc_enabled(host, 0); brcmnand_write(mtd, chip, host->last_addr, (const u32 *)buf, oob); brcmnand_set_ecc_enabled(host, 1); - return 0; + + return nand_prog_page_end_op(chip); } static int brcmnand_write_oob(struct mtd_info *mtd, struct nand_chip *chip, diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index 95c2cfa68b66..de36762e3058 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -383,7 +383,7 @@ static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip, cafe_readl(cafe, NAND_ECC_RESULT), cafe_readl(cafe, NAND_ECC_SYN01)); - chip->read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); if (checkecc && cafe_readl(cafe, NAND_ECC_RESULT) & (1<<18)) { @@ -541,13 +541,13 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd, { struct cafe_priv *cafe = nand_get_controller_data(chip); - chip->write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); /* Set up ECC autogeneration */ cafe->ctl2 |= (1<<30); - return 0; + return nand_prog_page_end_op(chip); } static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index d5c80d617854..47a253737bb2 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -1358,7 +1358,6 @@ int denali_init(struct denali_nand_info *denali) chip->read_buf = denali_read_buf; chip->write_buf = denali_write_buf; } - chip->ecc.options |= NAND_ECC_CUSTOM_PAGE_ACCESS; chip->ecc.read_page = denali_read_page; chip->ecc.read_page_raw = denali_read_page_raw; chip->ecc.write_page = denali_write_page; diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c index 5a27f56dafdc..72f1327c4430 100644 --- a/drivers/mtd/nand/docg4.c +++ b/drivers/mtd/nand/docg4.c @@ -785,6 +785,8 @@ static int read_page(struct mtd_info *mtd, struct nand_chip *nand, dev_dbg(doc->dev, "%s: page %08x\n", __func__, page); + nand_read_page_op(nand, page, 0, NULL, 0); + writew(DOC_ECCCONF0_READ_MODE | DOC_ECCCONF0_ECC_ENABLE | DOC_ECCCONF0_UNKNOWN | @@ -948,7 +950,7 @@ static int docg4_erase_block(struct mtd_info *mtd, int page) } static int write_page(struct mtd_info *mtd, struct nand_chip *nand, - const uint8_t *buf, bool use_ecc) + const uint8_t *buf, int page, bool use_ecc) { struct docg4_priv *doc = nand_get_controller_data(nand); void __iomem *docptr = doc->virtadr; @@ -956,6 +958,8 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand, dev_dbg(doc->dev, "%s...\n", __func__); + nand_prog_page_begin_op(nand, page, 0, NULL, 0); + writew(DOC_ECCCONF0_ECC_ENABLE | DOC_ECCCONF0_UNKNOWN | DOCG4_BCH_SIZE, @@ -1000,19 +1004,19 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand, writew(0, docptr + DOC_DATAEND); write_nop(docptr); - return 0; + return nand_prog_page_end_op(nand); } static int docg4_write_page_raw(struct mtd_info *mtd, struct nand_chip *nand, const uint8_t *buf, int oob_required, int page) { - return write_page(mtd, nand, buf, false); + return write_page(mtd, nand, buf, page, false); } static int docg4_write_page(struct mtd_info *mtd, struct nand_chip *nand, const uint8_t *buf, int oob_required, int page) { - return write_page(mtd, nand, buf, true); + return write_page(mtd, nand, buf, page, true); } static int docg4_write_oob(struct mtd_info *mtd, struct nand_chip *nand, diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c index 17db2f90aa2c..8b6dcd739ecb 100644 --- a/drivers/mtd/nand/fsl_elbc_nand.c +++ b/drivers/mtd/nand/fsl_elbc_nand.c @@ -713,7 +713,7 @@ static int fsl_elbc_read_page(struct mtd_info *mtd, struct nand_chip *chip, struct fsl_lbc_ctrl *ctrl = priv->ctrl; struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = ctrl->nand; - fsl_elbc_read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); if (oob_required) fsl_elbc_read_buf(mtd, chip->oob_poi, mtd->oobsize); @@ -729,10 +729,10 @@ static int fsl_elbc_read_page(struct mtd_info *mtd, struct nand_chip *chip, static int fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - fsl_elbc_write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } /* ECC will be calculated automatically, and errors will be detected in @@ -742,10 +742,10 @@ static int fsl_elbc_write_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offset, uint32_t data_len, const uint8_t *buf, int oob_required, int page) { + nand_prog_page_begin_op(chip, page, 0, NULL, 0); fsl_elbc_write_buf(mtd, buf, mtd->writesize); fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize); - - return 0; + return nand_prog_page_end_op(chip); } static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv) diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index bbdd68a54d68..4872a7ba6503 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -688,7 +688,7 @@ static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip, struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl; - fsl_ifc_read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); if (oob_required) fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize); @@ -711,10 +711,10 @@ static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip, static int fsl_ifc_write_page(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - fsl_ifc_write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } static int fsl_ifc_chip_init_tail(struct mtd_info *mtd) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 63a425ced4cd..3c3f3f58fdcb 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1043,6 +1043,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, unsigned int max_bitflips = 0; int ret; + nand_read_page_op(chip, page, 0, NULL, 0); + dev_dbg(this->dev, "page number is : %d\n", page); ret = read_page_prepare(this, buf, nfc_geo->payload_size, this->payload_virt, this->payload_phys, @@ -1220,12 +1222,12 @@ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, meta = geo->metadata_size; if (first) { col = meta + (size + ecc_parity_size) * first; - nand_change_read_column_op(chip, col, NULL, 0, false); - meta = 0; buf = buf + first * size; } + nand_read_page_op(chip, page, col, NULL, 0); + /* Save the old environment */ r1_old = r1_new = readl(bch_regs + HW_BCH_FLASH0LAYOUT0); r2_old = r2_new = readl(bch_regs + HW_BCH_FLASH0LAYOUT1); @@ -1277,6 +1279,9 @@ static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip, int ret; dev_dbg(this->dev, "ecc write page.\n"); + + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (this->swap_block_mark) { /* * If control arrives here, we're doing block mark swapping. @@ -1338,7 +1343,10 @@ exit_auxiliary: payload_virt, payload_phys); } - return 0; + if (ret) + return ret; + + return nand_prog_page_end_op(chip); } /* @@ -1472,8 +1480,8 @@ static int gpmi_ecc_read_page_raw(struct mtd_info *mtd, uint8_t *oob = chip->oob_poi; int step; - chip->read_buf(mtd, tmp_buf, - mtd->writesize + mtd->oobsize); + nand_read_page_op(chip, page, 0, tmp_buf, + mtd->writesize + mtd->oobsize); /* * If required, swap the bad block marker and the data stored in the @@ -1609,24 +1617,19 @@ static int gpmi_ecc_write_page_raw(struct mtd_info *mtd, if (this->swap_block_mark) swap(tmp_buf[0], tmp_buf[mtd->writesize]); - chip->write_buf(mtd, tmp_buf, mtd->writesize + mtd->oobsize); - - return 0; + return nand_prog_page_op(chip, page, 0, tmp_buf, + mtd->writesize + mtd->oobsize); } static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, int page) { - nand_read_page_op(chip, page, 0, NULL, 0); - return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page); } static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip, int page) { - nand_prog_page_begin_op(chip, page, 0, NULL, 0); - return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1, page); } @@ -1798,9 +1801,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) /* Write the first page of the current stride. */ dev_dbg(dev, "Writing an NCB fingerprint in page 0x%x\n", page); - nand_prog_page_begin_op(chip, page, 0, NULL, 0); - chip->ecc.write_page_raw(mtd, chip, buffer, 0, page); - status = nand_prog_page_end_op(chip); + status = chip->ecc.write_page_raw(mtd, chip, buffer, 0, page); if (status) dev_err(dev, "[%s] Write failed.\n", __func__); } diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c index 184d765c8bbe..cb862793ab6d 100644 --- a/drivers/mtd/nand/hisi504_nand.c +++ b/drivers/mtd/nand/hisi504_nand.c @@ -544,7 +544,7 @@ static int hisi_nand_read_page_hwecc(struct mtd_info *mtd, int max_bitflips = 0, stat = 0, stat_max = 0, status_ecc; int stat_1, stat_2; - chip->read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); /* errors which can not be corrected by ECC */ @@ -589,11 +589,11 @@ static int hisi_nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - chip->write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); if (oob_required) chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } static void hisi_nfc_host_init(struct hinfc_host *host) diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index 31cb3b2967b9..e357948a7505 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -522,6 +522,8 @@ static int lpc32xx_write_page_lowlevel(struct mtd_info *mtd, memcpy(dma_buf, buf, mtd->writesize); } + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + for (i = 0; i < host->mlcsubpages; i++) { /* Start Encode */ writeb(0x00, MLC_ECC_ENC_REG(host->io_base)); @@ -550,7 +552,8 @@ static int lpc32xx_write_page_lowlevel(struct mtd_info *mtd, /* Wait for Controller Ready */ lpc32xx_waitfunc_controller(mtd, chip); } - return 0; + + return nand_prog_page_end_op(chip); } static int lpc32xx_read_oob(struct mtd_info *mtd, struct nand_chip *chip, diff --git a/drivers/mtd/nand/lpc32xx_slc.c b/drivers/mtd/nand/lpc32xx_slc.c index 2b96c281b1a2..5f7cc6da0a7f 100644 --- a/drivers/mtd/nand/lpc32xx_slc.c +++ b/drivers/mtd/nand/lpc32xx_slc.c @@ -686,6 +686,8 @@ static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd, uint8_t *pb; int error; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + /* Write data, calculate ECC on outbound data */ error = lpc32xx_xfer(mtd, (uint8_t *)buf, chip->ecc.steps, 0); if (error) @@ -704,7 +706,8 @@ static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd, /* Write ECC data to device */ chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + + return nand_prog_page_end_op(chip); } /* @@ -717,9 +720,11 @@ static int lpc32xx_nand_write_page_raw_syndrome(struct mtd_info *mtd, int oob_required, int page) { /* Raw writes can just use the FIFO interface */ - chip->write_buf(mtd, buf, chip->ecc.size * chip->ecc.steps); + nand_prog_page_begin_op(chip, page, 0, buf, + chip->ecc.size * chip->ecc.steps); chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + + return nand_prog_page_end_op(chip); } static int lpc32xx_nand_dma_setup(struct lpc32xx_nand_host *host) diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index 9c4adaf9331b..5d76be451596 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -761,6 +761,8 @@ static int mtk_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip, u32 reg; int ret; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (!raw) { /* OOB => FDM: from register, ECC: from HW */ reg = nfi_readw(nfc, NFI_CNFG) | CNFG_AUTO_FMT_EN; @@ -794,7 +796,10 @@ static int mtk_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (!raw) mtk_ecc_disable(nfc->ecc); - return ret; + if (ret) + return ret; + + return nand_prog_page_end_op(chip); } static int mtk_nfc_write_page_hwecc(struct mtd_info *mtd, @@ -832,15 +837,7 @@ static int mtk_nfc_write_subpage_hwecc(struct mtd_info *mtd, static int mtk_nfc_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { - int ret; - - nand_prog_page_begin_op(chip, page, 0, NULL, 0); - - ret = mtk_nfc_write_page_raw(mtd, chip, NULL, 1, page); - if (ret < 0) - return -EIO; - - return nand_prog_page_end_op(chip); + return mtk_nfc_write_page_raw(mtd, chip, NULL, 1, page); } static int mtk_nfc_update_ecc_stats(struct mtd_info *mtd, u8 *buf, u32 sectors) @@ -889,8 +886,7 @@ static int mtk_nfc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, len = sectors * chip->ecc.size + (raw ? sectors * spare : 0); buf = bufpoi + start * chip->ecc.size; - if (column != 0) - nand_change_read_column_op(chip, column, NULL, 0, false); + nand_read_page_op(chip, page, column, NULL, 0); addr = dma_map_single(nfc->dev, buf, len, DMA_FROM_DEVICE); rc = dma_mapping_error(nfc->dev, addr); @@ -1013,8 +1009,6 @@ static int mtk_nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page) { - nand_read_page_op(chip, page, 0, NULL, 0); - return mtk_nfc_read_page_raw(mtd, chip, NULL, 1, page); } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 539132ef0095..e3bf33bc1fb6 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1940,7 +1940,7 @@ int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip, { int ret; - ret = nand_read_data_op(chip, buf, mtd->writesize, false); + ret = nand_read_page_op(chip, page, 0, buf, mtd->writesize); if (ret) return ret; @@ -1974,6 +1974,10 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd, uint8_t *oob = chip->oob_poi; int steps, size, ret; + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (steps = chip->ecc.steps; steps > 0; steps--) { ret = nand_read_data_op(chip, buf, eccsize, false); if (ret) @@ -2096,11 +2100,8 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, data_col_addr = start_step * chip->ecc.size; /* If we read not a page aligned data */ - if (data_col_addr != 0) - chip->cmdfunc(mtd, NAND_CMD_RNDOUT, data_col_addr, -1); - p = bufpoi + data_col_addr; - ret = nand_read_data_op(chip, p, datafrag_len, false); + ret = nand_read_page_op(chip, page, data_col_addr, p, datafrag_len); if (ret) return ret; @@ -2198,6 +2199,10 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *ecc_code = chip->buffers->ecccode; unsigned int max_bitflips = 0; + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_READ); @@ -2335,6 +2340,10 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *oob = chip->oob_poi; unsigned int max_bitflips = 0; + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { int stat; @@ -2517,12 +2526,6 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, __func__, buf); read_retry: - if (nand_standard_page_accessors(&chip->ecc)) { - ret = nand_read_page_op(chip, page, 0, NULL, 0); - if (ret) - break; - } - /* * Now read the page into the buffer. Absent an error, * the read methods return max bitflips per ecc step. @@ -2978,7 +2981,7 @@ int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, { int ret; - ret = nand_write_data_op(chip, buf, mtd->writesize, false); + ret = nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); if (ret) return ret; @@ -2989,7 +2992,7 @@ int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip, return ret; } - return 0; + return nand_prog_page_end_op(chip); } EXPORT_SYMBOL(nand_write_page_raw); @@ -3013,6 +3016,10 @@ static int nand_write_page_raw_syndrome(struct mtd_info *mtd, uint8_t *oob = chip->oob_poi; int steps, size, ret; + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (steps = chip->ecc.steps; steps > 0; steps--) { ret = nand_write_data_op(chip, buf, eccsize, false); if (ret) @@ -3052,7 +3059,7 @@ static int nand_write_page_raw_syndrome(struct mtd_info *mtd, return ret; } - return 0; + return nand_prog_page_end_op(chip); } /** * nand_write_page_swecc - [REPLACEABLE] software ECC based page write function @@ -3102,6 +3109,10 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *ecc_calc = chip->buffers->ecccalc; const uint8_t *p = buf; + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_WRITE); @@ -3121,7 +3132,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, if (ret) return ret; - return 0; + return nand_prog_page_end_op(chip); } @@ -3150,6 +3161,10 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, int oob_bytes = mtd->oobsize / ecc_steps; int step, ret; + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (step = 0; step < ecc_steps; step++) { /* configure controller for WRITE access */ chip->ecc.hwctl(mtd, NAND_ECC_WRITE); @@ -3188,7 +3203,7 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, if (ret) return ret; - return 0; + return nand_prog_page_end_op(chip); } @@ -3215,6 +3230,10 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, uint8_t *oob = chip->oob_poi; int ret; + ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(mtd, NAND_ECC_WRITE); @@ -3257,7 +3276,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, return ret; } - return 0; + return nand_prog_page_end_op(chip); } /** @@ -3283,12 +3302,6 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, else subpage = 0; - if (nand_standard_page_accessors(&chip->ecc)) { - status = nand_prog_page_begin_op(chip, page, 0, NULL, 0); - if (status) - return status; - } - if (unlikely(raw)) status = chip->ecc.write_page_raw(mtd, chip, buf, oob_required, page); @@ -3302,9 +3315,6 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (status < 0) return status; - if (nand_standard_page_accessors(&chip->ecc)) - return nand_prog_page_end_op(chip); - return 0; } @@ -5290,26 +5300,6 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd) return corr >= ds_corr && ecc->strength >= chip->ecc_strength_ds; } -static bool invalid_ecc_page_accessors(struct nand_chip *chip) -{ - struct nand_ecc_ctrl *ecc = &chip->ecc; - - if (nand_standard_page_accessors(ecc)) - return false; - - /* - * NAND_ECC_CUSTOM_PAGE_ACCESS flag is set, make sure the NAND - * controller driver implements all the page accessors because - * default helpers are not suitable when the core does not - * send the READ0/PAGEPROG commands. - */ - return (!ecc->read_page || !ecc->write_page || - !ecc->read_page_raw || !ecc->write_page_raw || - (NAND_HAS_SUBPAGE_READ(chip) && !ecc->read_subpage) || - (NAND_HAS_SUBPAGE_WRITE(chip) && !ecc->write_subpage && - ecc->hwctl && ecc->calculate)); -} - /** * nand_scan_tail - [NAND Interface] Scan for the NAND device * @mtd: MTD device structure @@ -5331,11 +5321,6 @@ int nand_scan_tail(struct mtd_info *mtd) return -EINVAL; } - if (invalid_ecc_page_accessors(chip)) { - pr_err("Invalid ECC page accessors setup\n"); - return -EINVAL; - } - if (!(chip->options & NAND_OWN_BUFFERS)) { nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); if (!nbuf) diff --git a/drivers/mtd/nand/nand_micron.c b/drivers/mtd/nand/nand_micron.c index bf2dc23e1c32..02e109ae73f1 100644 --- a/drivers/mtd/nand/nand_micron.c +++ b/drivers/mtd/nand/nand_micron.c @@ -149,7 +149,10 @@ micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, else if (status & NAND_STATUS_WRITE_RECOMMENDED) max_bitflips = chip->ecc.strength; - ret = nand_read_page_raw(mtd, chip, buf, oob_required, page); + ret = nand_read_data_op(chip, buf, mtd->writesize, false); + if (!ret && oob_required) + ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, + false); out: micron_nand_on_die_ecc_setup(chip, false); @@ -168,56 +171,12 @@ micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip, if (ret) return ret; - ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); - if (ret) - goto out; - ret = nand_write_page_raw(mtd, chip, buf, oob_required, page); - if (ret) - return ret; - - ret = nand_prog_page_end_op(chip); - -out: micron_nand_on_die_ecc_setup(chip, false); return ret; } -static int -micron_nand_read_page_raw_on_die_ecc(struct mtd_info *mtd, - struct nand_chip *chip, - uint8_t *buf, int oob_required, - int page) -{ - int ret; - - ret = nand_read_page_op(chip, page, 0, NULL, 0); - if (ret) - return ret; - - return nand_read_page_raw(mtd, chip, buf, oob_required, page); -} - -static int -micron_nand_write_page_raw_on_die_ecc(struct mtd_info *mtd, - struct nand_chip *chip, - const uint8_t *buf, int oob_required, - int page) -{ - int ret; - - ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); - if (ret) - return ret; - - ret = nand_write_page_raw(mtd, chip, buf, oob_required, page); - if (ret) - return ret; - - return nand_prog_page_end_op(chip); -} - enum { /* The NAND flash doesn't support on-die ECC */ MICRON_ON_DIE_UNSUPPORTED, @@ -310,17 +269,14 @@ static int micron_nand_init(struct nand_chip *chip) return -EINVAL; } - chip->ecc.options = NAND_ECC_CUSTOM_PAGE_ACCESS; chip->ecc.bytes = 8; chip->ecc.size = 512; chip->ecc.strength = 4; chip->ecc.algo = NAND_ECC_BCH; chip->ecc.read_page = micron_nand_read_page_on_die_ecc; chip->ecc.write_page = micron_nand_write_page_on_die_ecc; - chip->ecc.read_page_raw = - micron_nand_read_page_raw_on_die_ecc; - chip->ecc.write_page_raw = - micron_nand_write_page_raw_on_die_ecc; + chip->ecc.read_page_raw = nand_read_page_raw; + chip->ecc.write_page_raw = nand_write_page_raw; mtd_set_ooblayout(mtd, µn_nand_on_die_ooblayout_ops); } diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 6e1b209cd5a7..5cb4db6f88e3 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1532,6 +1532,8 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip, int ret; uint8_t *ecc_calc = chip->buffers->ecccalc; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + /* Enable GPMC ecc engine */ chip->ecc.hwctl(mtd, NAND_ECC_WRITE); @@ -1548,7 +1550,8 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip, /* Write ecc vector to OOB area */ chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + + return nand_prog_page_end_op(chip); } /** @@ -1582,6 +1585,7 @@ static int omap_write_subpage_bch(struct mtd_info *mtd, * ECC is calculated for all subpages but we choose * only what we want. */ + nand_prog_page_begin_op(chip, page, 0, NULL, 0); /* Enable GPMC ECC engine */ chip->ecc.hwctl(mtd, NAND_ECC_WRITE); @@ -1614,7 +1618,7 @@ static int omap_write_subpage_bch(struct mtd_info *mtd, /* write OOB buffer to NAND device */ chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } /** @@ -1640,6 +1644,8 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, int stat, ret; unsigned int max_bitflips = 0; + nand_read_page_op(chip, page, 0, NULL, 0); + /* Enable GPMC ecc engine */ chip->ecc.hwctl(mtd, NAND_ECC_READ); diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 28bcdf64c1fc..021374fe59dc 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1348,10 +1348,10 @@ static int pxa3xx_nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - chip->write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } static int pxa3xx_nand_read_page_hwecc(struct mtd_info *mtd, @@ -1361,7 +1361,7 @@ static int pxa3xx_nand_read_page_hwecc(struct mtd_info *mtd, struct pxa3xx_nand_host *host = nand_get_controller_data(chip); struct pxa3xx_nand_info *info = host->info_data; - chip->read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); if (info->retcode == ERR_CORERR && info->use_ecc) { diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index e34313ecd903..245d0f39e0aa 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -1725,6 +1725,7 @@ static int qcom_nandc_read_page(struct mtd_info *mtd, struct nand_chip *chip, u8 *data_buf, *oob_buf = NULL; int ret; + nand_read_page_op(chip, page, 0, NULL, 0); data_buf = buf; oob_buf = oob_required ? chip->oob_poi : NULL; @@ -1750,6 +1751,7 @@ static int qcom_nandc_read_page_raw(struct mtd_info *mtd, int i, ret; int read_loc; + nand_read_page_op(chip, page, 0, NULL, 0); data_buf = buf; oob_buf = chip->oob_poi; @@ -1850,6 +1852,8 @@ static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip, u8 *data_buf, *oob_buf; int i, ret; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + clear_read_regs(nandc); clear_bam_transaction(nandc); @@ -1902,6 +1906,9 @@ static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip, free_descs(nandc); + if (!ret) + ret = nand_prog_page_end_op(chip); + return ret; } @@ -1916,6 +1923,7 @@ static int qcom_nandc_write_page_raw(struct mtd_info *mtd, u8 *data_buf, *oob_buf; int i, ret; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); clear_read_regs(nandc); clear_bam_transaction(nandc); @@ -1970,6 +1978,9 @@ static int qcom_nandc_write_page_raw(struct mtd_info *mtd, free_descs(nandc); + if (!ret) + ret = nand_prog_page_end_op(chip); + return ret; } diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index 3c5008a4f5f3..c4e7755448e6 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -614,7 +614,7 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { - chip->read_buf(mtd, buf, mtd->writesize); + nand_read_page_op(chip, page, 0, buf, mtd->writesize); if (oob_required) chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); return 0; @@ -624,9 +624,9 @@ static int flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { - chip->write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); - return 0; + return nand_prog_page_end_op(chip); } static void execmd_read_page_sector(struct mtd_info *mtd, int page_addr) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index da5cc36f4c30..5c176dee821e 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1245,6 +1245,8 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd, int ret, i, cur_off = 0; bool raw_mode = false; + nand_read_page_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = 0; i < ecc->steps; i++) { @@ -1278,14 +1280,14 @@ static int sunxi_nfc_hw_ecc_read_page_dma(struct mtd_info *mtd, { int ret; + nand_read_page_op(chip, page, 0, NULL, 0); + ret = sunxi_nfc_hw_ecc_read_chunks_dma(mtd, buf, oob_required, page, chip->ecc.steps); if (ret >= 0) return ret; /* Fallback to PIO mode */ - nand_change_read_column_op(chip, 0, NULL, 0, false); - return sunxi_nfc_hw_ecc_read_page(mtd, chip, buf, oob_required, page); } @@ -1298,6 +1300,8 @@ static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd, int ret, i, cur_off = 0; unsigned int max_bitflips = 0; + nand_read_page_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = data_offs / ecc->size; @@ -1329,13 +1333,13 @@ static int sunxi_nfc_hw_ecc_read_subpage_dma(struct mtd_info *mtd, int nchunks = DIV_ROUND_UP(data_offs + readlen, chip->ecc.size); int ret; + nand_read_page_op(chip, page, 0, NULL, 0); + ret = sunxi_nfc_hw_ecc_read_chunks_dma(mtd, buf, false, page, nchunks); if (ret >= 0) return ret; /* Fallback to PIO mode */ - nand_change_read_column_op(chip, 0, NULL, 0, false); - return sunxi_nfc_hw_ecc_read_subpage(mtd, chip, data_offs, readlen, buf, page); } @@ -1348,6 +1352,8 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc = &chip->ecc; int ret, i, cur_off = 0; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = 0; i < ecc->steps; i++) { @@ -1369,7 +1375,7 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd, sunxi_nfc_hw_ecc_disable(mtd); - return 0; + return nand_prog_page_end_op(chip); } static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd, @@ -1381,6 +1387,8 @@ static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc = &chip->ecc; int ret, i, cur_off = 0; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = data_offs / ecc->size; @@ -1399,7 +1407,7 @@ static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd, sunxi_nfc_hw_ecc_disable(mtd); - return 0; + return nand_prog_page_end_op(chip); } static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, @@ -1429,6 +1437,8 @@ static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, sunxi_nfc_hw_ecc_set_prot_oob_bytes(mtd, oob, i, !i, page); } + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); sunxi_nfc_randomizer_config(mtd, page, false); sunxi_nfc_randomizer_enable(mtd); @@ -1459,7 +1469,7 @@ static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, NULL, page); - return 0; + return nand_prog_page_end_op(chip); pio_fallback: return sunxi_nfc_hw_ecc_write_page(mtd, chip, buf, oob_required, page); @@ -1475,6 +1485,8 @@ static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd, int ret, i, cur_off = 0; bool raw_mode = false; + nand_read_page_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = 0; i < ecc->steps; i++) { @@ -1511,6 +1523,8 @@ static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc = &chip->ecc; int ret, i, cur_off = 0; + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + sunxi_nfc_hw_ecc_enable(mtd); for (i = 0; i < ecc->steps; i++) { @@ -1532,15 +1546,13 @@ static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd, sunxi_nfc_hw_ecc_disable(mtd); - return 0; + return nand_prog_page_end_op(chip); } static int sunxi_nfc_hw_common_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip, int page) { - nand_read_page_op(chip, page, 0, NULL, 0); - chip->pagebuf = -1; return chip->ecc.read_page(mtd, chip, chip->buffers->databuf, 1, page); @@ -1552,8 +1564,6 @@ static int sunxi_nfc_hw_common_ecc_write_oob(struct mtd_info *mtd, { int ret; - nand_prog_page_begin_op(chip, page, 0, NULL, 0); - chip->pagebuf = -1; memset(chip->buffers->databuf, 0xff, mtd->writesize); diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 97a300b46b1d..c5bee00b7f5e 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -580,7 +580,6 @@ static int chip_init(struct device *dev, struct device_node *np) ecc->write_page = tango_write_page; ecc->read_oob = tango_read_oob; ecc->write_oob = tango_write_oob; - ecc->options = NAND_ECC_CUSTOM_PAGE_ACCESS; err = nand_scan_tail(mtd); if (err) diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c index 8037d4b48a05..80d31a58e558 100644 --- a/drivers/mtd/nand/vf610_nfc.c +++ b/drivers/mtd/nand/vf610_nfc.c @@ -560,7 +560,7 @@ static int vf610_nfc_read_page(struct mtd_info *mtd, struct nand_chip *chip, int eccsize = chip->ecc.size; int stat; - vf610_nfc_read_buf(mtd, buf, eccsize); + nand_read_page_op(chip, page, 0, buf, eccsize); if (oob_required) vf610_nfc_read_buf(mtd, chip->oob_poi, mtd->oobsize); @@ -580,7 +580,7 @@ static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip, { struct vf610_nfc *nfc = mtd_to_nfc(mtd); - vf610_nfc_write_buf(mtd, buf, mtd->writesize); + nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize); if (oob_required) vf610_nfc_write_buf(mtd, chip->oob_poi, mtd->oobsize); @@ -588,7 +588,7 @@ static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip, nfc->use_hw_ecc = true; nfc->write_sz = mtd->writesize + mtd->oobsize; - return 0; + return nand_prog_page_end_op(chip); } static const struct of_device_id vf610_nfc_dt_ids[] = { diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c index 87595c594b12..264ad362d858 100644 --- a/drivers/staging/mt29f_spinand/mt29f_spinand.c +++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c @@ -637,8 +637,7 @@ static int spinand_write_page_hwecc(struct mtd_info *mtd, int eccsteps = chip->ecc.steps; enable_hw_ecc = 1; - chip->write_buf(mtd, p, eccsize * eccsteps); - return 0; + return nand_prog_page_op(chip, page, 0, p, eccsize * eccsteps); } static int spinand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, @@ -653,7 +652,7 @@ static int spinand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, enable_read_hw_ecc = 1; - chip->read_buf(mtd, p, eccsize * eccsteps); + nand_read_page_op(chip, page, 0, p, eccsize * eccsteps); if (oob_required) chip->read_buf(mtd, chip->oob_poi, mtd->oobsize); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fd99d5137d71..e6810f0b8f9e 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -133,12 +133,6 @@ enum nand_ecc_algo { */ #define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) #define NAND_ECC_MAXIMIZE BIT(1) -/* - * If your controller already sends the required NAND commands when - * reading or writing a page, then the framework is not supposed to - * send READ0 and SEQIN/PAGEPROG respectively. - */ -#define NAND_ECC_CUSTOM_PAGE_ACCESS BIT(2) /* Bit mask for flags passed to do_nand_read_ecc */ #define NAND_GET_DEVICE 0x80 @@ -602,11 +596,6 @@ struct nand_ecc_ctrl { int page); }; -static inline int nand_standard_page_accessors(struct nand_ecc_ctrl *ecc) -{ - return !(ecc->options & NAND_ECC_CUSTOM_PAGE_ACCESS); -} - /** * struct nand_buffers - buffer structure for read/write * @ecccalc: buffer pointer for calculated ECC, size is oobsize. -- cgit v1.2.3 From 17fa8044188c152e8a3b9493f8b8054cacbfb9ba Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 30 Nov 2017 18:01:31 +0100 Subject: mtd: nand: provide valid ->data_interface during NAND detection Right now, the chip->data_interface field is populated in nand_scan_tail(), so after the whole NAND detection has taken place. This is fine because these timings are not yet used by the core so early in the probe process, but the situation is about to change with the introduction of ->exec_op(). Also, by convention, nand_scan_ident() is not supposed to allocate resources, only nand_scan_tail() can, so this prevent us from allocating and initializing the data_interface object in nand_scan_ident(). In order to solve this problem, directly embed a data_interface object in nand_chip so that we don't have to allocate it, and initialize it to ONFI SDR mode 0 at the very beginning of nand_scan_ident(). Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 46 ++++++++++++++++++----------------------- drivers/mtd/nand/nand_timings.c | 21 +++++-------------- include/linux/mtd/rawnand.h | 7 ++----- 3 files changed, 27 insertions(+), 47 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index e3bf33bc1fb6..215c52a3b9ad 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -816,8 +816,8 @@ static void nand_ccs_delay(struct nand_chip *chip) * Wait tCCS_min if it is correctly defined, otherwise wait 500ns * (which should be safe for all NANDs). */ - if (chip->data_interface && chip->data_interface->timings.sdr.tCCS_min) - ndelay(chip->data_interface->timings.sdr.tCCS_min / 1000); + if (chip->setup_data_interface) + ndelay(chip->data_interface.timings.sdr.tCCS_min / 1000); else ndelay(500); } @@ -1112,7 +1112,6 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) static int nand_reset_data_interface(struct nand_chip *chip, int chipnr) { struct mtd_info *mtd = nand_to_mtd(chip); - const struct nand_data_interface *conf; int ret; if (!chip->setup_data_interface) @@ -1132,8 +1131,8 @@ static int nand_reset_data_interface(struct nand_chip *chip, int chipnr) * timings to timing mode 0. */ - conf = nand_get_default_data_interface(); - ret = chip->setup_data_interface(mtd, chipnr, conf); + onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0); + ret = chip->setup_data_interface(mtd, chipnr, &chip->data_interface); if (ret) pr_err("Failed to configure data interface to SDR timing mode 0\n"); @@ -1158,7 +1157,7 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) struct mtd_info *mtd = nand_to_mtd(chip); int ret; - if (!chip->setup_data_interface || !chip->data_interface) + if (!chip->setup_data_interface) return 0; /* @@ -1179,7 +1178,7 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) goto err; } - ret = chip->setup_data_interface(mtd, chipnr, chip->data_interface); + ret = chip->setup_data_interface(mtd, chipnr, &chip->data_interface); err: return ret; } @@ -1219,21 +1218,16 @@ static int nand_init_data_interface(struct nand_chip *chip) modes = GENMASK(chip->onfi_timing_mode_default, 0); } - chip->data_interface = kzalloc(sizeof(*chip->data_interface), - GFP_KERNEL); - if (!chip->data_interface) - return -ENOMEM; for (mode = fls(modes) - 1; mode >= 0; mode--) { - ret = onfi_init_data_interface(chip, chip->data_interface, - NAND_SDR_IFACE, mode); + ret = onfi_fill_data_interface(chip, NAND_SDR_IFACE, mode); if (ret) continue; /* Pass -1 to only */ ret = chip->setup_data_interface(mtd, NAND_DATA_IFACE_CHECK_ONLY, - chip->data_interface); + &chip->data_interface); if (!ret) { chip->onfi_timing_mode_default = mode; break; @@ -1243,11 +1237,6 @@ static int nand_init_data_interface(struct nand_chip *chip) return 0; } -static void nand_release_data_interface(struct nand_chip *chip) -{ - kfree(chip->data_interface); -} - /** * nand_read_page_op - Do a READ PAGE operation * @chip: The NAND chip @@ -1763,11 +1752,16 @@ EXPORT_SYMBOL_GPL(nand_write_data_op); * @chip: The NAND chip * @chipnr: Internal die id * - * Returns 0 for success or negative error code otherwise + * Save the timings data structure, then apply SDR timings mode 0 (see + * nand_reset_data_interface for details), do the reset operation, and + * apply back the previous timings. + * + * Returns 0 on success, a negative error code otherwise. */ int nand_reset(struct nand_chip *chip, int chipnr) { struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_data_interface saved_data_intf = chip->data_interface; int ret; ret = nand_reset_data_interface(chip, chipnr); @@ -1785,6 +1779,7 @@ int nand_reset(struct nand_chip *chip, int chipnr) return ret; chip->select_chip(mtd, chipnr); + chip->data_interface = saved_data_intf; ret = nand_setup_data_interface(chip, chipnr); chip->select_chip(mtd, -1); if (ret) @@ -4889,6 +4884,9 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, struct nand_chip *chip = mtd_to_nand(mtd); int ret; + /* Enforce the right timings for reset/detection */ + onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0); + ret = nand_dt_init(chip); if (ret) return ret; @@ -5629,7 +5627,7 @@ int nand_scan_tail(struct mtd_info *mtd) chip->select_chip(mtd, -1); if (ret) - goto err_nand_data_iface_cleanup; + goto err_nand_manuf_cleanup; } /* Check, if we should skip the bad block table scan */ @@ -5639,12 +5637,10 @@ int nand_scan_tail(struct mtd_info *mtd) /* Build bad block table */ ret = chip->scan_bbt(mtd); if (ret) - goto err_nand_data_iface_cleanup; + goto err_nand_manuf_cleanup; return 0; -err_nand_data_iface_cleanup: - nand_release_data_interface(chip); err_nand_manuf_cleanup: nand_manufacturer_cleanup(chip); @@ -5703,8 +5699,6 @@ void nand_cleanup(struct nand_chip *chip) chip->ecc.algo == NAND_ECC_BCH) nand_bch_free((struct nand_bch_control *)chip->ecc.priv); - nand_release_data_interface(chip); - /* Free bad block table memory */ kfree(chip->bbt); if (!(chip->options & NAND_OWN_BUFFERS) && chip->buffers) { diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index 5d1533bcc5bd..9400d039ddbd 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -283,16 +283,16 @@ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode) EXPORT_SYMBOL(onfi_async_timing_mode_to_sdr_timings); /** - * onfi_init_data_interface - [NAND Interface] Initialize a data interface from + * onfi_fill_data_interface - [NAND Interface] Initialize a data interface from * given ONFI mode - * @iface: The data interface to be initialized * @mode: The ONFI timing mode */ -int onfi_init_data_interface(struct nand_chip *chip, - struct nand_data_interface *iface, +int onfi_fill_data_interface(struct nand_chip *chip, enum nand_data_interface_type type, int timing_mode) { + struct nand_data_interface *iface = &chip->data_interface; + if (type != NAND_SDR_IFACE) return -EINVAL; @@ -321,15 +321,4 @@ int onfi_init_data_interface(struct nand_chip *chip, return 0; } -EXPORT_SYMBOL(onfi_init_data_interface); - -/** - * nand_get_default_data_interface - [NAND Interface] Retrieve NAND - * data interface for mode 0. This is used as default timing after - * reset. - */ -const struct nand_data_interface *nand_get_default_data_interface(void) -{ - return &onfi_sdr_timings[0]; -} -EXPORT_SYMBOL(nand_get_default_data_interface); +EXPORT_SYMBOL(onfi_fill_data_interface); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e6810f0b8f9e..2a72eab286ef 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -917,7 +917,7 @@ struct nand_chip { u16 max_bb_per_die; u32 blocks_per_die; - struct nand_data_interface *data_interface; + struct nand_data_interface data_interface; int read_retries; @@ -1214,8 +1214,7 @@ static inline int onfi_get_sync_timing_mode(struct nand_chip *chip) return le16_to_cpu(chip->onfi_params.src_sync_timing_mode); } -int onfi_init_data_interface(struct nand_chip *chip, - struct nand_data_interface *iface, +int onfi_fill_data_interface(struct nand_chip *chip, enum nand_data_interface_type type, int timing_mode); @@ -1258,8 +1257,6 @@ static inline int jedec_feature(struct nand_chip *chip) /* get timing characteristics from ONFI timing mode. */ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); -/* get data interface from ONFI timing mode 0, used after reset. */ -const struct nand_data_interface *nand_get_default_data_interface(void); int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, -- cgit v1.2.3 From f880b07bf155226af8491d58558a41f2cf5245dc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 5 Dec 2017 17:47:14 +0900 Subject: mtd: nand: cafe: remove use of NAND_OWN_BUFFERS This driver is the last/only user of NAND_OWN_BUFFERS. Boris suggested to remove this flag. Taking a closer look at this driver, it calls dma_alloc_coherent() for the concatenated area for the DMA bounce buffer + struct nand_buffers, but the latter does not need to be DMA-coherent; cafe_{write,read}_buf simply do memcpy() between buffers when usedma==1. Let's do dma_alloc_coherent() for the DMA bounce buffer in the front, and leave the nand_buffers allocation to nand_scan_tail(), then rip off NAND_OWN_BUFFERS. The magic number, 2112, is still mysterious (hard-coded writesize + oobsize ?), but this is not our main interest. I am keeping it. Suggested-by: Boris Brezillon Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/cafe_nand.c | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index de36762e3058..a438b7114053 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -605,7 +605,6 @@ static int cafe_nand_probe(struct pci_dev *pdev, uint32_t ctrl; int err = 0; int old_dma; - struct nand_buffers *nbuf; /* Very old versions shared the same PCI ident for all three functions on the chip. Verify the class too... */ @@ -653,7 +652,6 @@ static int cafe_nand_probe(struct pci_dev *pdev, /* Enable the following for a flash based bad block table */ cafe->nand.bbt_options = NAND_BBT_USE_FLASH; - cafe->nand.options = NAND_OWN_BUFFERS; if (skipbbt) { cafe->nand.options |= NAND_SKIP_BBTSCAN; @@ -723,15 +721,12 @@ static int cafe_nand_probe(struct pci_dev *pdev, if (err) goto out_irq; - cafe->dmabuf = dma_alloc_coherent(&cafe->pdev->dev, - 2112 + sizeof(struct nand_buffers) + - mtd->writesize + mtd->oobsize, - &cafe->dmaaddr, GFP_KERNEL); + cafe->dmabuf = dma_alloc_coherent(&cafe->pdev->dev, 2112, + &cafe->dmaaddr, GFP_KERNEL); if (!cafe->dmabuf) { err = -ENOMEM; goto out_irq; } - cafe->nand.buffers = nbuf = (void *)cafe->dmabuf + 2112; /* Set up DMA address */ cafe_writel(cafe, cafe->dmaaddr & 0xffffffff, NAND_DMA_ADDR0); @@ -744,11 +739,6 @@ static int cafe_nand_probe(struct pci_dev *pdev, cafe_dev_dbg(&cafe->pdev->dev, "Set DMA address to %x (virt %p)\n", cafe_readl(cafe, NAND_DMA_ADDR0), cafe->dmabuf); - /* this driver does not need the @ecccalc and @ecccode */ - nbuf->ecccalc = NULL; - nbuf->ecccode = NULL; - nbuf->databuf = (uint8_t *)(nbuf + 1); - /* Restore the DMA flag */ usedma = old_dma; @@ -793,10 +783,7 @@ static int cafe_nand_probe(struct pci_dev *pdev, goto out; out_free_dma: - dma_free_coherent(&cafe->pdev->dev, - 2112 + sizeof(struct nand_buffers) + - mtd->writesize + mtd->oobsize, - cafe->dmabuf, cafe->dmaaddr); + dma_free_coherent(&cafe->pdev->dev, 2112, cafe->dmabuf, cafe->dmaaddr); out_irq: /* Disable NAND IRQ in global IRQ mask register */ cafe_writel(cafe, ~1 & cafe_readl(cafe, GLOBAL_IRQ_MASK), GLOBAL_IRQ_MASK); @@ -821,10 +808,7 @@ static void cafe_nand_remove(struct pci_dev *pdev) nand_release(mtd); free_rs(cafe->rs); pci_iounmap(pdev, cafe->mmio); - dma_free_coherent(&cafe->pdev->dev, - 2112 + sizeof(struct nand_buffers) + - mtd->writesize + mtd->oobsize, - cafe->dmabuf, cafe->dmaaddr); + dma_free_coherent(&cafe->pdev->dev, 2112, cafe->dmabuf, cafe->dmaaddr); kfree(cafe); } -- cgit v1.2.3 From 8b311ead8bff9b56e512e3e544c488042ad0e7e7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 5 Dec 2017 17:47:15 +0900 Subject: mtd: nand: remove unused NAND_OWN_BUFFERS flag The last/only user of NAND_OWN_BUFFERS (cafe_nand.c) has been reworked. This flag is no longer needed. Suggested-by: Boris Brezillon Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 45 ++++++++++++++++++++------------------------ include/linux/mtd/rawnand.h | 5 ----- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 215c52a3b9ad..b63cc95e9179 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -5319,35 +5319,30 @@ int nand_scan_tail(struct mtd_info *mtd) return -EINVAL; } - if (!(chip->options & NAND_OWN_BUFFERS)) { - nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); - if (!nbuf) - return -ENOMEM; - - nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!nbuf->ecccalc) { - ret = -ENOMEM; - goto err_free_nbuf; - } + nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); + if (!nbuf) + return -ENOMEM; - nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!nbuf->ecccode) { - ret = -ENOMEM; - goto err_free_nbuf; - } + nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL); + if (!nbuf->ecccalc) { + ret = -ENOMEM; + goto err_free_nbuf; + } - nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize, - GFP_KERNEL); - if (!nbuf->databuf) { - ret = -ENOMEM; - goto err_free_nbuf; - } + nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL); + if (!nbuf->ecccode) { + ret = -ENOMEM; + goto err_free_nbuf; + } - chip->buffers = nbuf; - } else if (!chip->buffers) { - return -ENOMEM; + nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); + if (!nbuf->databuf) { + ret = -ENOMEM; + goto err_free_nbuf; } + chip->buffers = nbuf; + /* * FIXME: some NAND manufacturer drivers expect the first die to be * selected when manufacturer->init() is called. They should be fixed @@ -5701,7 +5696,7 @@ void nand_cleanup(struct nand_chip *chip) /* Free bad block table memory */ kfree(chip->bbt); - if (!(chip->options & NAND_OWN_BUFFERS) && chip->buffers) { + if (chip->buffers) { kfree(chip->buffers->databuf); kfree(chip->buffers->ecccode); kfree(chip->buffers->ecccalc); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2a72eab286ef..fca802ef9af3 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -185,11 +185,6 @@ enum nand_ecc_algo { /* Non chip related options */ /* This option skips the bbt scan during initialization. */ #define NAND_SKIP_BBTSCAN 0x00010000 -/* - * This option is defined if the board driver allocates its own buffers - * (e.g. because it needs them DMA-coherent). - */ -#define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 /* -- cgit v1.2.3 From c0313b966a0942fba934d34c7a76f444641d0b6e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 5 Dec 2017 17:47:16 +0900 Subject: mtd: nand: squash struct nand_buffers into struct nand_chip struct nand_buffers is malloc'ed in nand_scan_tail() just for containing three pointers. Squash this struct into nand_chip. Move and rename as follows: chip->buffers->ecccalc -> chip->ecc.calc_buf chip->buffers->ecccode -> chip->ecc.code_buf chip->buffers->databuf -> chip->data_buf Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/brcmnand/brcmnand.c | 2 +- drivers/mtd/nand/denali.c | 2 +- drivers/mtd/nand/fsmc_nand.c | 4 +- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 4 +- drivers/mtd/nand/nand_base.c | 91 ++++++++++++++-------------------- drivers/mtd/nand/nand_bbt.c | 2 +- drivers/mtd/nand/omap2.c | 10 ++-- drivers/mtd/nand/sunxi_nand.c | 6 +-- include/linux/mtd/rawnand.h | 23 +++------ 9 files changed, 59 insertions(+), 85 deletions(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e6879d4d53ca..54842512edb1 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -1681,7 +1681,7 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd, int ret; if (!buf) { - buf = chip->buffers->databuf; + buf = chip->data_buf; /* Invalidate page cache */ chip->pagebuf = -1; } diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 47a253737bb2..00698b33cb22 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -330,7 +330,7 @@ static int denali_check_erased_page(struct mtd_info *mtd, unsigned long uncor_ecc_flags, unsigned int max_bitflips) { - uint8_t *ecc_code = chip->buffers->ecccode; + uint8_t *ecc_code = chip->ecc.code_buf; int ecc_steps = chip->ecc.steps; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c index b44e5c6545e0..f49ed46fa770 100644 --- a/drivers/mtd/nand/fsmc_nand.c +++ b/drivers/mtd/nand/fsmc_nand.c @@ -684,8 +684,8 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; uint8_t *p = buf; - uint8_t *ecc_calc = chip->buffers->ecccalc; - uint8_t *ecc_code = chip->buffers->ecccode; + uint8_t *ecc_calc = chip->ecc.calc_buf; + uint8_t *ecc_code = chip->ecc.code_buf; int off, len, group = 0; /* * ecc_oob is intentionally taken as uint16_t. In 16bit devices, we diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 3c3f3f58fdcb..b51db8c85405 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1696,7 +1696,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this) unsigned int search_area_size_in_strides; unsigned int stride; unsigned int page; - uint8_t *buffer = chip->buffers->databuf; + uint8_t *buffer = chip->data_buf; int saved_chip_number; int found_an_ncb_fingerprint = false; @@ -1755,7 +1755,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this) unsigned int block; unsigned int stride; unsigned int page; - uint8_t *buffer = chip->buffers->databuf; + uint8_t *buffer = chip->data_buf; int saved_chip_number; int status; diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index b63cc95e9179..32c0239b380a 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2030,8 +2030,8 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; uint8_t *p = buf; - uint8_t *ecc_calc = chip->buffers->ecccalc; - uint8_t *ecc_code = chip->buffers->ecccode; + uint8_t *ecc_calc = chip->ecc.calc_buf; + uint8_t *ecc_code = chip->ecc.code_buf; unsigned int max_bitflips = 0; chip->ecc.read_page_raw(mtd, chip, buf, 1, page); @@ -2102,7 +2102,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, /* Calculate ECC */ for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size) - chip->ecc.calculate(mtd, p, &chip->buffers->ecccalc[i]); + chip->ecc.calculate(mtd, p, &chip->ecc.calc_buf[i]); /* * The performance is faster if we position offsets according to @@ -2142,7 +2142,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, return ret; } - ret = mtd_ooblayout_get_eccbytes(mtd, chip->buffers->ecccode, + ret = mtd_ooblayout_get_eccbytes(mtd, chip->ecc.code_buf, chip->oob_poi, index, eccfrag_len); if (ret) return ret; @@ -2151,13 +2151,13 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size) { int stat; - stat = chip->ecc.correct(mtd, p, - &chip->buffers->ecccode[i], &chip->buffers->ecccalc[i]); + stat = chip->ecc.correct(mtd, p, &chip->ecc.code_buf[i], + &chip->ecc.calc_buf[i]); if (stat == -EBADMSG && (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) { /* check for empty pages with bitflips */ stat = nand_check_erased_ecc_chunk(p, chip->ecc.size, - &chip->buffers->ecccode[i], + &chip->ecc.code_buf[i], chip->ecc.bytes, NULL, 0, chip->ecc.strength); @@ -2190,8 +2190,8 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; uint8_t *p = buf; - uint8_t *ecc_calc = chip->buffers->ecccalc; - uint8_t *ecc_code = chip->buffers->ecccode; + uint8_t *ecc_calc = chip->ecc.calc_buf; + uint8_t *ecc_code = chip->ecc.code_buf; unsigned int max_bitflips = 0; ret = nand_read_page_op(chip, page, 0, NULL, 0); @@ -2264,8 +2264,8 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd, int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; uint8_t *p = buf; - uint8_t *ecc_code = chip->buffers->ecccode; - uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_code = chip->ecc.code_buf; + uint8_t *ecc_calc = chip->ecc.calc_buf; unsigned int max_bitflips = 0; /* Read the OOB area first */ @@ -2514,7 +2514,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from, /* Is the current page in the buffer? */ if (realpage != chip->pagebuf || oob) { - bufpoi = use_bufpoi ? chip->buffers->databuf : buf; + bufpoi = use_bufpoi ? chip->data_buf : buf; if (use_bufpoi && aligned) pr_debug("%s: using read bounce buffer for buf@%p\n", @@ -2555,7 +2555,7 @@ read_retry: /* Invalidate page cache */ chip->pagebuf = -1; } - memcpy(buf, chip->buffers->databuf + col, bytes); + memcpy(buf, chip->data_buf + col, bytes); } if (unlikely(oob)) { @@ -2596,7 +2596,7 @@ read_retry: buf += bytes; max_bitflips = max_t(unsigned int, max_bitflips, ret); } else { - memcpy(buf, chip->buffers->databuf + col, bytes); + memcpy(buf, chip->data_buf + col, bytes); buf += bytes; max_bitflips = max_t(unsigned int, max_bitflips, chip->pagebuf_bitflips); @@ -3071,7 +3071,7 @@ static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, int i, eccsize = chip->ecc.size, ret; int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; - uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_calc = chip->ecc.calc_buf; const uint8_t *p = buf; /* Software ECC calculation */ @@ -3101,7 +3101,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip, int i, eccsize = chip->ecc.size, ret; int eccbytes = chip->ecc.bytes; int eccsteps = chip->ecc.steps; - uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_calc = chip->ecc.calc_buf; const uint8_t *p = buf; ret = nand_prog_page_begin_op(chip, page, 0, NULL, 0); @@ -3147,7 +3147,7 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, int oob_required, int page) { uint8_t *oob_buf = chip->oob_poi; - uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_calc = chip->ecc.calc_buf; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; int ecc_steps = chip->ecc.steps; @@ -3187,7 +3187,7 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd, /* copy calculated ECC for whole page to chip->buffer->oob */ /* this include masked-value(0xFF) for unwritten subpages */ - ecc_calc = chip->buffers->ecccalc; + ecc_calc = chip->ecc.calc_buf; ret = mtd_ooblayout_set_eccbytes(mtd, ecc_calc, chip->oob_poi, 0, chip->ecc.total); if (ret) @@ -3434,9 +3434,9 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, if (part_pagewr) bytes = min_t(int, bytes - column, writelen); chip->pagebuf = -1; - memset(chip->buffers->databuf, 0xff, mtd->writesize); - memcpy(&chip->buffers->databuf[column], buf, bytes); - wbuf = chip->buffers->databuf; + memset(chip->data_buf, 0xff, mtd->writesize); + memcpy(&chip->data_buf[column], buf, bytes); + wbuf = chip->data_buf; } if (unlikely(oob)) { @@ -5310,7 +5310,6 @@ int nand_scan_tail(struct mtd_info *mtd) { struct nand_chip *chip = mtd_to_nand(mtd); struct nand_ecc_ctrl *ecc = &chip->ecc; - struct nand_buffers *nbuf = NULL; int ret, i; /* New bad blocks should be marked in OOB, flash-based BBT, or both */ @@ -5319,30 +5318,22 @@ int nand_scan_tail(struct mtd_info *mtd) return -EINVAL; } - nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); - if (!nbuf) + ecc->calc_buf = kmalloc(mtd->oobsize, GFP_KERNEL); + if (!ecc->calc_buf) return -ENOMEM; - nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!nbuf->ecccalc) { + ecc->code_buf = kmalloc(mtd->oobsize, GFP_KERNEL); + if (!ecc->code_buf) { ret = -ENOMEM; - goto err_free_nbuf; + goto err_free_buf; } - nbuf->ecccode = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!nbuf->ecccode) { + chip->data_buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); + if (!chip->data_buf) { ret = -ENOMEM; - goto err_free_nbuf; + goto err_free_buf; } - nbuf->databuf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); - if (!nbuf->databuf) { - ret = -ENOMEM; - goto err_free_nbuf; - } - - chip->buffers = nbuf; - /* * FIXME: some NAND manufacturer drivers expect the first die to be * selected when manufacturer->init() is called. They should be fixed @@ -5353,10 +5344,10 @@ int nand_scan_tail(struct mtd_info *mtd) ret = nand_manufacturer_init(chip); chip->select_chip(mtd, -1); if (ret) - goto err_free_nbuf; + goto err_free_buf; /* Set the internal oob buffer location, just after the page data */ - chip->oob_poi = chip->buffers->databuf + mtd->writesize; + chip->oob_poi = chip->data_buf + mtd->writesize; /* * If no default placement scheme is given, select an appropriate one. @@ -5640,13 +5631,10 @@ int nand_scan_tail(struct mtd_info *mtd) err_nand_manuf_cleanup: nand_manufacturer_cleanup(chip); -err_free_nbuf: - if (nbuf) { - kfree(nbuf->databuf); - kfree(nbuf->ecccode); - kfree(nbuf->ecccalc); - kfree(nbuf); - } +err_free_buf: + kfree(chip->data_buf); + kfree(ecc->code_buf); + kfree(ecc->calc_buf); return ret; } @@ -5696,12 +5684,9 @@ void nand_cleanup(struct nand_chip *chip) /* Free bad block table memory */ kfree(chip->bbt); - if (chip->buffers) { - kfree(chip->buffers->databuf); - kfree(chip->buffers->ecccode); - kfree(chip->buffers->ecccalc); - kfree(chip->buffers); - } + kfree(chip->data_buf); + kfree(chip->ecc.code_buf); + kfree(chip->ecc.calc_buf); /* Free bad block descriptor memory */ if (chip->badblock_pattern && chip->badblock_pattern->options diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c index 2915b6739bf8..36092850be2c 100644 --- a/drivers/mtd/nand/nand_bbt.c +++ b/drivers/mtd/nand/nand_bbt.c @@ -898,7 +898,7 @@ static inline int nand_memory_bbt(struct mtd_info *mtd, struct nand_bbt_descr *b { struct nand_chip *this = mtd_to_nand(mtd); - return create_bbt(mtd, this->buffers->databuf, bd, -1); + return create_bbt(mtd, this->data_buf, bd, -1); } /** diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 5cb4db6f88e3..8cdf7d3d8fa7 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1530,7 +1530,7 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip, const uint8_t *buf, int oob_required, int page) { int ret; - uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_calc = chip->ecc.calc_buf; nand_prog_page_begin_op(chip, page, 0, NULL, 0); @@ -1571,7 +1571,7 @@ static int omap_write_subpage_bch(struct mtd_info *mtd, u32 data_len, const u8 *buf, int oob_required, int page) { - u8 *ecc_calc = chip->buffers->ecccalc; + u8 *ecc_calc = chip->ecc.calc_buf; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; int ecc_steps = chip->ecc.steps; @@ -1609,7 +1609,7 @@ static int omap_write_subpage_bch(struct mtd_info *mtd, /* copy calculated ECC for whole page to chip->buffer->oob */ /* this include masked-value(0xFF) for unwritten subpages */ - ecc_calc = chip->buffers->ecccalc; + ecc_calc = chip->ecc.calc_buf; ret = mtd_ooblayout_set_eccbytes(mtd, ecc_calc, chip->oob_poi, 0, chip->ecc.total); if (ret) @@ -1639,8 +1639,8 @@ static int omap_write_subpage_bch(struct mtd_info *mtd, static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page) { - uint8_t *ecc_calc = chip->buffers->ecccalc; - uint8_t *ecc_code = chip->buffers->ecccode; + uint8_t *ecc_calc = chip->ecc.calc_buf; + uint8_t *ecc_code = chip->ecc.code_buf; int stat, ret; unsigned int max_bitflips = 0; diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 5c176dee821e..2275fbedfb2a 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1555,7 +1555,7 @@ static int sunxi_nfc_hw_common_ecc_read_oob(struct mtd_info *mtd, { chip->pagebuf = -1; - return chip->ecc.read_page(mtd, chip, chip->buffers->databuf, 1, page); + return chip->ecc.read_page(mtd, chip, chip->data_buf, 1, page); } static int sunxi_nfc_hw_common_ecc_write_oob(struct mtd_info *mtd, @@ -1566,8 +1566,8 @@ static int sunxi_nfc_hw_common_ecc_write_oob(struct mtd_info *mtd, chip->pagebuf = -1; - memset(chip->buffers->databuf, 0xff, mtd->writesize); - ret = chip->ecc.write_page(mtd, chip, chip->buffers->databuf, 1, page); + memset(chip->data_buf, 0xff, mtd->writesize); + ret = chip->ecc.write_page(mtd, chip, chip->data_buf, 1, page); if (ret) return ret; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index fca802ef9af3..f8f27c6801a6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -514,6 +514,8 @@ static const struct nand_ecc_caps __name = { \ * @postpad: padding information for syndrome based ECC generators * @options: ECC specific options (see NAND_ECC_XXX flags defined above) * @priv: pointer to private ECC control data + * @calc_buf: buffer for calculated ECC, size is oobsize. + * @code_buf: buffer for ECC read from flash, size is oobsize. * @hwctl: function to control hardware ECC generator. Must only * be provided if an hardware ECC is available * @calculate: function for ECC calculation or readback from ECC hardware @@ -564,6 +566,8 @@ struct nand_ecc_ctrl { int postpad; unsigned int options; void *priv; + u8 *calc_buf; + u8 *code_buf; void (*hwctl)(struct mtd_info *mtd, int mode); int (*calculate)(struct mtd_info *mtd, const uint8_t *dat, uint8_t *ecc_code); @@ -591,21 +595,6 @@ struct nand_ecc_ctrl { int page); }; -/** - * struct nand_buffers - buffer structure for read/write - * @ecccalc: buffer pointer for calculated ECC, size is oobsize. - * @ecccode: buffer pointer for ECC read from flash, size is oobsize. - * @databuf: buffer pointer for data, size is (page size + oobsize). - * - * Do not change the order of buffers. databuf and oobrbuf must be in - * consecutive order. - */ -struct nand_buffers { - uint8_t *ecccalc; - uint8_t *ecccode; - uint8_t *databuf; -}; - /** * struct nand_sdr_timings - SDR NAND chip timings * @@ -774,7 +763,6 @@ struct nand_manufacturer_ops { * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure - * @buffers: buffer structure for read/write * @buf_align: minimum buffer alignment required by a platform * @hwcontrol: platform-specific hardware control structure * @erase: [REPLACEABLE] erase function @@ -814,6 +802,7 @@ struct nand_manufacturer_ops { * @numchips: [INTERN] number of physical chips * @chipsize: [INTERN] the size of one chip for multichip arrays * @pagemask: [INTERN] page number mask = number of (pages / chip) - 1 + * @data_buf: [INTERN] buffer for data, size is (page size + oobsize). * @pagebuf: [INTERN] holds the pagenumber which is currently in * data_buf. * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is @@ -892,6 +881,7 @@ struct nand_chip { int numchips; uint64_t chipsize; int pagemask; + u8 *data_buf; int pagebuf; unsigned int pagebuf_bitflips; int subpagesize; @@ -922,7 +912,6 @@ struct nand_chip { struct nand_hw_control *controller; struct nand_ecc_ctrl ecc; - struct nand_buffers *buffers; unsigned long buf_align; struct nand_hw_control hwcontrol; -- cgit v1.2.3 From 958ef111cca5e70994b806127ff15258d446ed25 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 5 Dec 2017 17:49:56 +0900 Subject: mtd: nand: cafe: clean up DMA address setup Use macros from to make the code readable. The compiler warning will be kept suppressed. Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- drivers/mtd/nand/cafe_nand.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c index a438b7114053..567ff972d5fc 100644 --- a/drivers/mtd/nand/cafe_nand.c +++ b/drivers/mtd/nand/cafe_nand.c @@ -729,12 +729,8 @@ static int cafe_nand_probe(struct pci_dev *pdev, } /* Set up DMA address */ - cafe_writel(cafe, cafe->dmaaddr & 0xffffffff, NAND_DMA_ADDR0); - if (sizeof(cafe->dmaaddr) > 4) - /* Shift in two parts to shut the compiler up */ - cafe_writel(cafe, (cafe->dmaaddr >> 16) >> 16, NAND_DMA_ADDR1); - else - cafe_writel(cafe, 0, NAND_DMA_ADDR1); + cafe_writel(cafe, lower_32_bits(cafe->dmaaddr), NAND_DMA_ADDR0); + cafe_writel(cafe, upper_32_bits(cafe->dmaaddr), NAND_DMA_ADDR1); cafe_dev_dbg(&cafe->pdev->dev, "Set DMA address to %x (virt %p)\n", cafe_readl(cafe, NAND_DMA_ADDR0), cafe->dmabuf); -- cgit v1.2.3 From f170c6fb7036dca779bbcb2e0fea4b4eed3201b1 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Mon, 27 Nov 2017 23:52:56 +0100 Subject: mtd: onenand: Remove obsolete url from Kconfig help Samsung website no longer host information about OneNAND, delete it. Signed-off-by: Ladislav Michl Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig index dcae2f6a2b11..aaeb30458139 100644 --- a/drivers/mtd/onenand/Kconfig +++ b/drivers/mtd/onenand/Kconfig @@ -4,8 +4,7 @@ menuconfig MTD_ONENAND depends on HAS_IOMEM help This enables support for accessing all type of OneNAND flash - devices. For further information see - + devices. if MTD_ONENAND -- cgit v1.2.3 From 624d5abf6c008ba00b2b868990708d5f2a5e2d08 Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Thu, 30 Nov 2017 22:10:43 +0800 Subject: mtd: nand: mtk: update DT bindings Add MT7622 NAND Flash Controller dt bindings documentation. Signed-off-by: RogerCC Lin Reviewed-by: Matthias Brugger Signed-off-by: Boris Brezillon --- Documentation/devicetree/bindings/mtd/mtk-nand.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt index dbf9e054c11c..0025bc4c94a0 100644 --- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt +++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt @@ -12,8 +12,10 @@ tree nodes. The first part of NFC is NAND Controller Interface (NFI) HW. Required NFI properties: -- compatible: Should be one of "mediatek,mt2701-nfc", - "mediatek,mt2712-nfc". +- compatible: Should be one of + "mediatek,mt2701-nfc", + "mediatek,mt2712-nfc", + "mediatek,mt7622-nfc". - reg: Base physical address and size of NFI. - interrupts: Interrupts of NFI. - clocks: NFI required clocks. @@ -142,7 +144,10 @@ Example: ============== Required BCH properties: -- compatible: Should be one of "mediatek,mt2701-ecc", "mediatek,mt2712-ecc". +- compatible: Should be one of + "mediatek,mt2701-ecc", + "mediatek,mt2712-ecc", + "mediatek,mt7622-ecc". - reg: Base physical address and size of ECC. - interrupts: Interrupts of ECC. - clocks: ECC required clocks. -- cgit v1.2.3 From b45ee5501ede9a369e9bc20edca508193b848d25 Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Thu, 30 Nov 2017 22:10:44 +0800 Subject: mtd: nand: mtk: Support different MTK NAND flash controller IP MT7622 uses an MTK's earlier NAND flash controller IP which support different sector size, max spare size per sector and paraity bits..., some register's offset and definition also been changed in the NAND flash controller, this patch is the preparation to support MT7622 NAND flash controller. MT7622 NFC and ECC engine are similar to MT2701's, except below differences: (1)MT7622 NFC's max sector size(ECC data size) is 512 bytes, and MT2701's is 1024, and MT7622's max sector number is 8. (2)The parity bit of MT7622 is 13, MT2701 is 14. (3)MT7622 ECC supports less ECC strength, max to 16 bit ecc strength. (4)MT7622 supports less spare size per sector, max spare size per sector is 28 bytes. (5)Some register's offset are different, include ECC_ENCIRQ_EN, ECC_ENCIRQ_STA, ECC_DECDONE, ECC_DECIRQ_EN and ECC_DECIRQ_STA. (6)ENC_MODE of ECC_ENCCNFG register is moved from bit 5-6 to bit 4-5. Signed-off-by: RogerCC Lin Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_ecc.c | 100 ++++++++++++++++++++++++++++++-------------- drivers/mtd/nand/mtk_ecc.h | 3 +- drivers/mtd/nand/mtk_nand.c | 27 ++++++++---- 3 files changed, 89 insertions(+), 41 deletions(-) diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c index c51d214d169e..6610eefaa92b 100644 --- a/drivers/mtd/nand/mtk_ecc.c +++ b/drivers/mtd/nand/mtk_ecc.c @@ -34,34 +34,28 @@ #define ECC_ENCCON (0x00) #define ECC_ENCCNFG (0x04) -#define ECC_MODE_SHIFT (5) #define ECC_MS_SHIFT (16) #define ECC_ENCDIADDR (0x08) #define ECC_ENCIDLE (0x0C) -#define ECC_ENCIRQ_EN (0x80) -#define ECC_ENCIRQ_STA (0x84) #define ECC_DECCON (0x100) #define ECC_DECCNFG (0x104) #define DEC_EMPTY_EN BIT(31) #define DEC_CNFG_CORRECT (0x3 << 12) #define ECC_DECIDLE (0x10C) #define ECC_DECENUM0 (0x114) -#define ECC_DECDONE (0x124) -#define ECC_DECIRQ_EN (0x200) -#define ECC_DECIRQ_STA (0x204) #define ECC_TIMEOUT (500000) #define ECC_IDLE_REG(op) ((op) == ECC_ENCODE ? ECC_ENCIDLE : ECC_DECIDLE) #define ECC_CTL_REG(op) ((op) == ECC_ENCODE ? ECC_ENCCON : ECC_DECCON) -#define ECC_IRQ_REG(op) ((op) == ECC_ENCODE ? \ - ECC_ENCIRQ_EN : ECC_DECIRQ_EN) struct mtk_ecc_caps { u32 err_mask; const u8 *ecc_strength; + const u32 *ecc_regs; u8 num_ecc_strength; - u32 encode_parity_reg0; + u8 ecc_mode_shift; + u32 parity_bits; int pg_irq_sel; }; @@ -89,6 +83,33 @@ static const u8 ecc_strength_mt2712[] = { 40, 44, 48, 52, 56, 60, 68, 72, 80 }; +enum mtk_ecc_regs { + ECC_ENCPAR00, + ECC_ENCIRQ_EN, + ECC_ENCIRQ_STA, + ECC_DECDONE, + ECC_DECIRQ_EN, + ECC_DECIRQ_STA, +}; + +static int mt2701_ecc_regs[] = { + [ECC_ENCPAR00] = 0x10, + [ECC_ENCIRQ_EN] = 0x80, + [ECC_ENCIRQ_STA] = 0x84, + [ECC_DECDONE] = 0x124, + [ECC_DECIRQ_EN] = 0x200, + [ECC_DECIRQ_STA] = 0x204, +}; + +static int mt2712_ecc_regs[] = { + [ECC_ENCPAR00] = 0x300, + [ECC_ENCIRQ_EN] = 0x80, + [ECC_ENCIRQ_STA] = 0x84, + [ECC_DECDONE] = 0x124, + [ECC_DECIRQ_EN] = 0x200, + [ECC_DECIRQ_STA] = 0x204, +}; + static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc, enum mtk_ecc_operation op) { @@ -107,32 +128,30 @@ static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc, static irqreturn_t mtk_ecc_irq(int irq, void *id) { struct mtk_ecc *ecc = id; - enum mtk_ecc_operation op; u32 dec, enc; - dec = readw(ecc->regs + ECC_DECIRQ_STA) & ECC_IRQ_EN; + dec = readw(ecc->regs + ecc->caps->ecc_regs[ECC_DECIRQ_STA]) + & ECC_IRQ_EN; if (dec) { - op = ECC_DECODE; - dec = readw(ecc->regs + ECC_DECDONE); + dec = readw(ecc->regs + ecc->caps->ecc_regs[ECC_DECDONE]); if (dec & ecc->sectors) { /* * Clear decode IRQ status once again to ensure that * there will be no extra IRQ. */ - readw(ecc->regs + ECC_DECIRQ_STA); + readw(ecc->regs + ecc->caps->ecc_regs[ECC_DECIRQ_STA]); ecc->sectors = 0; complete(&ecc->done); } else { return IRQ_HANDLED; } } else { - enc = readl(ecc->regs + ECC_ENCIRQ_STA) & ECC_IRQ_EN; - if (enc) { - op = ECC_ENCODE; + enc = readl(ecc->regs + ecc->caps->ecc_regs[ECC_ENCIRQ_STA]) + & ECC_IRQ_EN; + if (enc) complete(&ecc->done); - } else { + else return IRQ_NONE; - } } return IRQ_HANDLED; @@ -160,7 +179,7 @@ static int mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config) /* configure ECC encoder (in bits) */ enc_sz = config->len << 3; - reg = ecc_bit | (config->mode << ECC_MODE_SHIFT); + reg = ecc_bit | (config->mode << ecc->caps->ecc_mode_shift); reg |= (enc_sz << ECC_MS_SHIFT); writel(reg, ecc->regs + ECC_ENCCNFG); @@ -171,9 +190,9 @@ static int mtk_ecc_config(struct mtk_ecc *ecc, struct mtk_ecc_config *config) } else { /* configure ECC decoder (in bits) */ dec_sz = (config->len << 3) + - config->strength * ECC_PARITY_BITS; + config->strength * ecc->caps->parity_bits; - reg = ecc_bit | (config->mode << ECC_MODE_SHIFT); + reg = ecc_bit | (config->mode << ecc->caps->ecc_mode_shift); reg |= (dec_sz << ECC_MS_SHIFT) | DEC_CNFG_CORRECT; reg |= DEC_EMPTY_EN; writel(reg, ecc->regs + ECC_DECCNFG); @@ -291,7 +310,12 @@ int mtk_ecc_enable(struct mtk_ecc *ecc, struct mtk_ecc_config *config) */ if (ecc->caps->pg_irq_sel && config->mode == ECC_NFI_MODE) reg_val |= ECC_PG_IRQ_SEL; - writew(reg_val, ecc->regs + ECC_IRQ_REG(op)); + if (op == ECC_ENCODE) + writew(reg_val, ecc->regs + + ecc->caps->ecc_regs[ECC_ENCIRQ_EN]); + else + writew(reg_val, ecc->regs + + ecc->caps->ecc_regs[ECC_DECIRQ_EN]); } writew(ECC_OP_ENABLE, ecc->regs + ECC_CTL_REG(op)); @@ -310,13 +334,17 @@ void mtk_ecc_disable(struct mtk_ecc *ecc) /* disable it */ mtk_ecc_wait_idle(ecc, op); - if (op == ECC_DECODE) + if (op == ECC_DECODE) { /* * Clear decode IRQ status in case there is a timeout to wait * decode IRQ. */ - readw(ecc->regs + ECC_DECIRQ_STA); - writew(0, ecc->regs + ECC_IRQ_REG(op)); + readw(ecc->regs + ecc->caps->ecc_regs[ECC_DECDONE]); + writew(0, ecc->regs + ecc->caps->ecc_regs[ECC_DECIRQ_EN]); + } else { + writew(0, ecc->regs + ecc->caps->ecc_regs[ECC_ENCIRQ_EN]); + } + writew(ECC_OP_DISABLE, ecc->regs + ECC_CTL_REG(op)); mutex_unlock(&ecc->lock); @@ -367,11 +395,11 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, mtk_ecc_wait_idle(ecc, ECC_ENCODE); /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */ - len = (config->strength * ECC_PARITY_BITS + 7) >> 3; + len = (config->strength * ecc->caps->parity_bits + 7) >> 3; /* write the parity bytes generated by the ECC back to temp buffer */ __ioread32_copy(ecc->eccdata, - ecc->regs + ecc->caps->encode_parity_reg0, + ecc->regs + ecc->caps->ecc_regs[ECC_ENCPAR00], round_up(len, 4)); /* copy into possibly unaligned OOB region with actual length */ @@ -404,19 +432,29 @@ void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p) } EXPORT_SYMBOL(mtk_ecc_adjust_strength); +unsigned int mtk_ecc_get_parity_bits(struct mtk_ecc *ecc) +{ + return ecc->caps->parity_bits; +} +EXPORT_SYMBOL(mtk_ecc_get_parity_bits); + static const struct mtk_ecc_caps mtk_ecc_caps_mt2701 = { .err_mask = 0x3f, .ecc_strength = ecc_strength_mt2701, + .ecc_regs = mt2701_ecc_regs, .num_ecc_strength = 20, - .encode_parity_reg0 = 0x10, + .ecc_mode_shift = 5, + .parity_bits = 14, .pg_irq_sel = 0, }; static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { .err_mask = 0x7f, .ecc_strength = ecc_strength_mt2712, + .ecc_regs = mt2712_ecc_regs, .num_ecc_strength = 23, - .encode_parity_reg0 = 0x300, + .ecc_mode_shift = 5, + .parity_bits = 14, .pg_irq_sel = 1, }; @@ -452,7 +490,7 @@ static int mtk_ecc_probe(struct platform_device *pdev) max_eccdata_size = ecc->caps->num_ecc_strength - 1; max_eccdata_size = ecc->caps->ecc_strength[max_eccdata_size]; - max_eccdata_size = (max_eccdata_size * ECC_PARITY_BITS + 7) >> 3; + max_eccdata_size = (max_eccdata_size * ecc->caps->parity_bits + 7) >> 3; max_eccdata_size = round_up(max_eccdata_size, 4); ecc->eccdata = devm_kzalloc(dev, max_eccdata_size, GFP_KERNEL); if (!ecc->eccdata) diff --git a/drivers/mtd/nand/mtk_ecc.h b/drivers/mtd/nand/mtk_ecc.h index d245c14f1b80..a455df080952 100644 --- a/drivers/mtd/nand/mtk_ecc.h +++ b/drivers/mtd/nand/mtk_ecc.h @@ -14,8 +14,6 @@ #include -#define ECC_PARITY_BITS (14) - enum mtk_ecc_mode {ECC_DMA_MODE = 0, ECC_NFI_MODE = 1}; enum mtk_ecc_operation {ECC_ENCODE, ECC_DECODE}; @@ -43,6 +41,7 @@ int mtk_ecc_wait_done(struct mtk_ecc *, enum mtk_ecc_operation); int mtk_ecc_enable(struct mtk_ecc *, struct mtk_ecc_config *); void mtk_ecc_disable(struct mtk_ecc *); void mtk_ecc_adjust_strength(struct mtk_ecc *ecc, u32 *p); +unsigned int mtk_ecc_get_parity_bits(struct mtk_ecc *ecc); struct mtk_ecc *of_mtk_ecc_get(struct device_node *); void mtk_ecc_release(struct mtk_ecc *); diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index 5d76be451596..b9946ae58616 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -97,7 +97,6 @@ #define MTK_TIMEOUT (500000) #define MTK_RESET_TIMEOUT (1000000) -#define MTK_MAX_SECTOR (16) #define MTK_NAND_MAX_NSELS (2) #define MTK_NFC_MIN_SPARE (16) #define ACCTIMING(tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt) \ @@ -109,6 +108,8 @@ struct mtk_nfc_caps { u8 num_spare_size; u8 pageformat_spare_shift; u8 nfi_clk_div; + u8 max_sector; + u32 max_sector_size; }; struct mtk_nfc_bad_mark_ctl { @@ -450,7 +451,7 @@ static inline u8 mtk_nfc_read_byte(struct mtd_info *mtd) * set to max sector to allow the HW to continue reading over * unaligned accesses */ - reg = (MTK_MAX_SECTOR << CON_SEC_SHIFT) | CON_BRD; + reg = (nfc->caps->max_sector << CON_SEC_SHIFT) | CON_BRD; nfi_writel(nfc, reg, NFI_CON); /* trigger to fetch data */ @@ -481,7 +482,7 @@ static void mtk_nfc_write_byte(struct mtd_info *mtd, u8 byte) reg = nfi_readw(nfc, NFI_CNFG) | CNFG_BYTE_RW; nfi_writew(nfc, reg, NFI_CNFG); - reg = MTK_MAX_SECTOR << CON_SEC_SHIFT | CON_BWR; + reg = nfc->caps->max_sector << CON_SEC_SHIFT | CON_BWR; nfi_writel(nfc, reg, NFI_CON); nfi_writew(nfc, STAR_EN, NFI_STRDATA); @@ -1117,9 +1118,11 @@ static void mtk_nfc_set_fdm(struct mtk_nfc_fdm *fdm, struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); struct mtk_nfc_nand_chip *chip = to_mtk_nand(nand); + struct mtk_nfc *nfc = nand_get_controller_data(nand); u32 ecc_bytes; - ecc_bytes = DIV_ROUND_UP(nand->ecc.strength * ECC_PARITY_BITS, 8); + ecc_bytes = DIV_ROUND_UP(nand->ecc.strength * + mtk_ecc_get_parity_bits(nfc->ecc), 8); fdm->reg_size = chip->spare_per_sector - ecc_bytes; if (fdm->reg_size > NFI_FDM_MAX_SIZE) @@ -1199,7 +1202,8 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd) * this controller only supports 512 and 1024 sizes */ if (nand->ecc.size < 1024) { - if (mtd->writesize > 512) { + if (mtd->writesize > 512 && + nfc->caps->max_sector_size > 512) { nand->ecc.size = 1024; nand->ecc.strength <<= 1; } else { @@ -1214,7 +1218,8 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd) return ret; /* calculate oob bytes except ecc parity data */ - free = ((nand->ecc.strength * ECC_PARITY_BITS) + 7) >> 3; + free = (nand->ecc.strength * mtk_ecc_get_parity_bits(nfc->ecc) + + 7) >> 3; free = spare - free; /* @@ -1224,10 +1229,12 @@ static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd) */ if (free > NFI_FDM_MAX_SIZE) { spare -= NFI_FDM_MAX_SIZE; - nand->ecc.strength = (spare << 3) / ECC_PARITY_BITS; + nand->ecc.strength = (spare << 3) / + mtk_ecc_get_parity_bits(nfc->ecc); } else if (free < 0) { spare -= NFI_FDM_MIN_SIZE; - nand->ecc.strength = (spare << 3) / ECC_PARITY_BITS; + nand->ecc.strength = (spare << 3) / + mtk_ecc_get_parity_bits(nfc->ecc); } } @@ -1380,6 +1387,8 @@ static const struct mtk_nfc_caps mtk_nfc_caps_mt2701 = { .num_spare_size = 16, .pageformat_spare_shift = 4, .nfi_clk_div = 1, + .max_sector = 16, + .max_sector_size = 1024, }; static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = { @@ -1387,6 +1396,8 @@ static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = { .num_spare_size = 19, .pageformat_spare_shift = 16, .nfi_clk_div = 2, + .max_sector = 16, + .max_sector_size = 1024, }; static const struct of_device_id mtk_nfc_id_table[] = { -- cgit v1.2.3 From 98dea8d71931460c189e5001b0faf2180a42db42 Mon Sep 17 00:00:00 2001 From: RogerCC Lin Date: Thu, 30 Nov 2017 22:10:45 +0800 Subject: mtd: nand: mtk: Support MT7622 NAND flash controller. Add tables to support MT7622 NAND flash controller. Signed-off-by: RogerCC Lin Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_ecc.c | 26 ++++++++++++++++++++++++++ drivers/mtd/nand/mtk_nand.c | 16 ++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c index 6610eefaa92b..40d86a861a70 100644 --- a/drivers/mtd/nand/mtk_ecc.c +++ b/drivers/mtd/nand/mtk_ecc.c @@ -83,6 +83,10 @@ static const u8 ecc_strength_mt2712[] = { 40, 44, 48, 52, 56, 60, 68, 72, 80 }; +static const u8 ecc_strength_mt7622[] = { + 4, 6, 8, 10, 12, 14, 16 +}; + enum mtk_ecc_regs { ECC_ENCPAR00, ECC_ENCIRQ_EN, @@ -110,6 +114,15 @@ static int mt2712_ecc_regs[] = { [ECC_DECIRQ_STA] = 0x204, }; +static int mt7622_ecc_regs[] = { + [ECC_ENCPAR00] = 0x10, + [ECC_ENCIRQ_EN] = 0x30, + [ECC_ENCIRQ_STA] = 0x34, + [ECC_DECDONE] = 0x11c, + [ECC_DECIRQ_EN] = 0x140, + [ECC_DECIRQ_STA] = 0x144, +}; + static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc, enum mtk_ecc_operation op) { @@ -458,6 +471,16 @@ static const struct mtk_ecc_caps mtk_ecc_caps_mt2712 = { .pg_irq_sel = 1, }; +static const struct mtk_ecc_caps mtk_ecc_caps_mt7622 = { + .err_mask = 0x3f, + .ecc_strength = ecc_strength_mt7622, + .ecc_regs = mt7622_ecc_regs, + .num_ecc_strength = 7, + .ecc_mode_shift = 4, + .parity_bits = 13, + .pg_irq_sel = 0, +}; + static const struct of_device_id mtk_ecc_dt_match[] = { { .compatible = "mediatek,mt2701-ecc", @@ -465,6 +488,9 @@ static const struct of_device_id mtk_ecc_dt_match[] = { }, { .compatible = "mediatek,mt2712-ecc", .data = &mtk_ecc_caps_mt2712, + }, { + .compatible = "mediatek,mt7622-ecc", + .data = &mtk_ecc_caps_mt7622, }, {}, }; diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c index b9946ae58616..6977da3a26aa 100644 --- a/drivers/mtd/nand/mtk_nand.c +++ b/drivers/mtd/nand/mtk_nand.c @@ -174,6 +174,10 @@ static const u8 spare_size_mt2712[] = { 74 }; +static const u8 spare_size_mt7622[] = { + 16, 26, 27, 28 +}; + static inline struct mtk_nfc_nand_chip *to_mtk_nand(struct nand_chip *nand) { return container_of(nand, struct mtk_nfc_nand_chip, nand); @@ -1400,6 +1404,15 @@ static const struct mtk_nfc_caps mtk_nfc_caps_mt2712 = { .max_sector_size = 1024, }; +static const struct mtk_nfc_caps mtk_nfc_caps_mt7622 = { + .spare_size = spare_size_mt7622, + .num_spare_size = 4, + .pageformat_spare_shift = 4, + .nfi_clk_div = 1, + .max_sector = 8, + .max_sector_size = 512, +}; + static const struct of_device_id mtk_nfc_id_table[] = { { .compatible = "mediatek,mt2701-nfc", @@ -1407,6 +1420,9 @@ static const struct of_device_id mtk_nfc_id_table[] = { }, { .compatible = "mediatek,mt2712-nfc", .data = &mtk_nfc_caps_mt2712, + }, { + .compatible = "mediatek,mt7622-nfc", + .data = &mtk_nfc_caps_mt7622, }, {} }; -- cgit v1.2.3 From 8c677541bb24871ce44b5a1e327a3e7f5792eae4 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 5 Dec 2017 12:09:28 +0100 Subject: mtd: nand: denali: Avoid using ecc->code_buf as a temporary buffer ECC bytes are contiguous in the ->oob_poi buffer, which means we don't have to copy them into ->code_buf (here used as a temporary buffer) before passing them to the nand_check_erased_ecc_chunk() function. This change will allow us to allocate ecc->{code,calc}_buf only when ecc->calculate() or ecc->correct() is specified. Signed-off-by: Boris Brezillon Acked-by: Masahiro Yamada --- drivers/mtd/nand/denali.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 00698b33cb22..313c7f50621b 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -330,16 +330,12 @@ static int denali_check_erased_page(struct mtd_info *mtd, unsigned long uncor_ecc_flags, unsigned int max_bitflips) { - uint8_t *ecc_code = chip->ecc.code_buf; + struct denali_nand_info *denali = mtd_to_denali(mtd); + uint8_t *ecc_code = chip->oob_poi + denali->oob_skip_bytes; int ecc_steps = chip->ecc.steps; int ecc_size = chip->ecc.size; int ecc_bytes = chip->ecc.bytes; - int i, ret, stat; - - ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0, - chip->ecc.total); - if (ret) - return ret; + int i, stat; for (i = 0; i < ecc_steps; i++) { if (!(uncor_ecc_flags & BIT(i))) -- cgit v1.2.3 From aeb93af96d0b0f0916aa0f65fe400a1808ea9cb9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 5 Dec 2017 12:09:29 +0100 Subject: mtd: nand: Only allocate ecc->{calc, code}_buf when actually needed The only users of the ecc->{calc,code}_buf buffers are NAND controller drivers implementing ecc->calculate() and/or ecc->correct(). Since the ->oobsize can be non-negligle, especially on modern NAND devices, we'd better allocate it only when it is actually required. Make ecc->{calc,code}_buf allocation dependent on the presence of ecc->calculate() or ecc->correct(). Signed-off-by: Boris Brezillon Reviewed-by: Masahiro Yamada --- drivers/mtd/nand/nand_base.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 32c0239b380a..84d0a5d67e33 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -5318,21 +5318,9 @@ int nand_scan_tail(struct mtd_info *mtd) return -EINVAL; } - ecc->calc_buf = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!ecc->calc_buf) - return -ENOMEM; - - ecc->code_buf = kmalloc(mtd->oobsize, GFP_KERNEL); - if (!ecc->code_buf) { - ret = -ENOMEM; - goto err_free_buf; - } - chip->data_buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); - if (!chip->data_buf) { - ret = -ENOMEM; - goto err_free_buf; - } + if (!chip->data_buf) + return -ENOMEM; /* * FIXME: some NAND manufacturer drivers expect the first die to be @@ -5495,6 +5483,15 @@ int nand_scan_tail(struct mtd_info *mtd) goto err_nand_manuf_cleanup; } + if (ecc->correct || ecc->calculate) { + ecc->calc_buf = kmalloc(mtd->oobsize, GFP_KERNEL); + ecc->code_buf = kmalloc(mtd->oobsize, GFP_KERNEL); + if (!ecc->calc_buf || !ecc->code_buf) { + ret = -ENOMEM; + goto err_nand_manuf_cleanup; + } + } + /* For many systems, the standard OOB write also works for raw */ if (!ecc->read_oob_raw) ecc->read_oob_raw = ecc->read_oob; -- cgit v1.2.3 From 707d81545dbc3d3ee4ae093fc600831eb97302e7 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 Dec 2017 10:33:58 +0100 Subject: mtd: nand: samsung: add ECC requirements for K9F4G08U0D Samsung NAND chip K9F4G08U0D minimum ECC strength requirement is 1 bit per 512 bytes. As the chip is not ONFI nor JEDEC and because of the lack of these values, boards using it fail to probe the NAND controller driver. Fix this by setting up the default values. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_samsung.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c index d348f0129ae7..f6b0a63a068c 100644 --- a/drivers/mtd/nand/nand_samsung.c +++ b/drivers/mtd/nand/nand_samsung.c @@ -91,6 +91,12 @@ static void samsung_nand_decode_id(struct nand_chip *chip) } } else { nand_decode_ext_id(chip); + + /* Datasheet values for SLC Samsung K9F4G08U0D-S[I|C]B0(T00) */ + if (nand_is_slc(chip) && chip->id.data[1] == 0xDC) { + chip->ecc_step_ds = 512; + chip->ecc_strength_ds = 1; + } } } -- cgit v1.2.3 From 3fab39997a98b97138c886978af660c4f6c7e9e6 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 14 Dec 2017 14:03:44 +0000 Subject: arm64/sve: Report SVE to userspace via CPUID only if supported Currently, the SVE field in ID_AA64PFR0_EL1 is visible unconditionally to userspace via the CPU ID register emulation, irrespective of the kernel config. This means that if a kernel configured with CONFIG_ARM64_SVE=n is run on SVE-capable hardware, userspace will see SVE reported as present in the ID regs even though the kernel forbids execution of SVE instructions. This patch makes the exposure of the SVE field in ID_AA64PFR0_EL1 conditional on CONFIG_ARM64_SVE=y. Since future architecture features are likely to encounter a similar requirement, this patch adds a suitable helper macros for use when declaring config-conditional ID register fields. Fixes: 43994d824e84 ("arm64/sve: Detect SVE and activate runtime support") Reviewed-by: Suzuki K Poulose Reported-by: Mark Rutland Signed-off-by: Dave Martin Cc: Suzuki Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 3 +++ arch/arm64/kernel/cpufeature.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ac67cfc2585a..060e3a4008ab 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -60,6 +60,9 @@ enum ftr_type { #define FTR_VISIBLE true /* Feature visible to the user space */ #define FTR_HIDDEN false /* Feature is hidden from the user */ +#define FTR_VISIBLE_IF_IS_ENABLED(config) \ + (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN) + struct arm64_ftr_bits { bool sign; /* Value is signed ? */ bool visible; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c5ba0097887f..a73a5928f09b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), -- cgit v1.2.3 From c1cfd9025cc394fd137a01159d74335c5ac978ce Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Dec 2017 16:44:12 +0100 Subject: ALSA: rawmidi: Avoid racy info ioctl via ctl device The rawmidi also allows to obtaining the information via ioctl of ctl API. It means that user can issue an ioctl to the rawmidi device even when it's being removed as long as the control device is present. Although the code has some protection via the global register_mutex, its range is limited to the search of the corresponding rawmidi object, and the mutex is already unlocked at accessing the rawmidi object. This may lead to a use-after-free. For avoiding it, this patch widens the application of register_mutex to the whole snd_rawmidi_info_select() function. We have another mutex per rawmidi object, but this operation isn't very hot path, so it shouldn't matter from the performance POV. Cc: Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index b3b353d72527..f055ca10bbc1 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream, return 0; } -int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +static int __snd_rawmidi_info_select(struct snd_card *card, + struct snd_rawmidi_info *info) { struct snd_rawmidi *rmidi; struct snd_rawmidi_str *pstr; struct snd_rawmidi_substream *substream; - mutex_lock(®ister_mutex); rmidi = snd_rawmidi_search(card, info->device); - mutex_unlock(®ister_mutex); if (!rmidi) return -ENXIO; if (info->stream < 0 || info->stream > 1) @@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info } return -ENXIO; } + +int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info) +{ + int ret; + + mutex_lock(®ister_mutex); + ret = __snd_rawmidi_info_select(card, info); + mutex_unlock(®ister_mutex); + return ret; +} EXPORT_SYMBOL(snd_rawmidi_info_select); static int snd_rawmidi_info_select_user(struct snd_card *card, -- cgit v1.2.3 From b7b2846fe26f2c0d7f317c874a13d3ecf22670ff Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:02 -0800 Subject: xfs: add the ability to join a held buffer to a defer_ops In certain cases, defer_ops callers will lock a buffer and want to hold the lock across transaction rolls. Similar to ijoined inodes, we want to dirty & join the buffer with each transaction roll in defer_finish so that afterwards the caller still owns the buffer lock and we haven't inadvertently pinned the log. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_defer.c | 39 ++++++++++++++++++++++++++++++++++++--- fs/xfs/libxfs/xfs_defer.h | 5 ++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 072ebfe1d6ae..087fea02c389 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -249,6 +249,10 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); + /* Hold the (previously bjoin'd) buffer locked across the roll. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) + xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]); + trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); /* Roll the transaction. */ @@ -264,6 +268,12 @@ xfs_defer_trans_roll( for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) { + xfs_trans_bjoin(*tp, dop->dop_bufs[i]); + xfs_trans_bhold(*tp, dop->dop_bufs[i]); + } + return error; } @@ -295,6 +305,31 @@ xfs_defer_ijoin( } } + ASSERT(0); + return -EFSCORRUPTED; +} + +/* + * Add this buffer to the deferred op. Each joined buffer is relogged + * each time we roll the transaction. + */ +int +xfs_defer_bjoin( + struct xfs_defer_ops *dop, + struct xfs_buf *bp) +{ + int i; + + for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) { + if (dop->dop_bufs[i] == bp) + return 0; + else if (dop->dop_bufs[i] == NULL) { + dop->dop_bufs[i] = bp; + return 0; + } + } + + ASSERT(0); return -EFSCORRUPTED; } @@ -493,9 +528,7 @@ xfs_defer_init( struct xfs_defer_ops *dop, xfs_fsblock_t *fbp) { - dop->dop_committed = false; - dop->dop_low = false; - memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes)); + memset(dop, 0, sizeof(struct xfs_defer_ops)); *fbp = NULLFSBLOCK; INIT_LIST_HEAD(&dop->dop_intake); INIT_LIST_HEAD(&dop->dop_pending); diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index d4f046dd44bd..045beacdd37d 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -59,6 +59,7 @@ enum xfs_defer_ops_type { }; #define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ +#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */ struct xfs_defer_ops { bool dop_committed; /* did any trans commit? */ @@ -66,8 +67,9 @@ struct xfs_defer_ops { struct list_head dop_intake; /* unlogged pending work */ struct list_head dop_pending; /* logged pending work */ - /* relog these inodes with each roll */ + /* relog these with each roll */ struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; + struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS]; }; void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, @@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop); void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); +int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp); /* Description of a deferred type. */ struct xfs_defer_op_type { -- cgit v1.2.3 From 6e643cd094de3bd0f97edcc1db0089afa24d909f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:02 -0800 Subject: xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute The new attribute leaf buffer is not held locked across the transaction roll between the shortform->leaf modification and the addition of the new entry. As a result, the attribute buffer modification being made is not atomic from an operational perspective. Hence the AIL push can grab it in the transient state of "just created" after the initial transaction is rolled, because the buffer has been released. This leads to xfs_attr3_leaf_verify() asserting that hdr.count is zero, treating this as in-memory corruption, and shutting down the filesystem. Darrick ported the original patch to 4.15 and reworked it use the xfs_defer_bjoin helper and hold/join the buffer correctly across the second transaction roll. Signed-off-by: Alex Lyakas Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 20 +++++++++++++++----- fs/xfs/libxfs/xfs_attr_leaf.c | 9 ++++++--- fs/xfs/libxfs/xfs_attr_leaf.h | 3 ++- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 6249c92671de..a76914db72ef 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -212,6 +212,7 @@ xfs_attr_set( int flags) { struct xfs_mount *mp = dp->i_mount; + struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_defer_ops dfops; struct xfs_trans_res tres; @@ -327,9 +328,16 @@ xfs_attr_set( * GROT: another possible req'mt for a double-split btree op. */ xfs_defer_init(args.dfops, args.firstblock); - error = xfs_attr_shortform_to_leaf(&args); + error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out_defer_cancel; + /* + * Prevent the leaf buffer from being unlocked so that a + * concurrent AIL push cannot grab the half-baked leaf + * buffer and run into problems with the write verifier. + */ + xfs_trans_bhold(args.trans, leaf_bp); + xfs_defer_bjoin(args.dfops, leaf_bp); xfs_defer_ijoin(args.dfops, dp); error = xfs_defer_finish(&args.trans, args.dfops); if (error) @@ -337,13 +345,14 @@ xfs_attr_set( /* * Commit the leaf transformation. We'll need another (linked) - * transaction to add the new attribute to the leaf. + * transaction to add the new attribute to the leaf, which + * means that we have to hold & join the leaf buffer here too. */ - error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; - + xfs_trans_bjoin(args.trans, leaf_bp); + leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) @@ -374,8 +383,9 @@ xfs_attr_set( out_defer_cancel: xfs_defer_cancel(&dfops); - args.trans = NULL; out: + if (leaf_bp) + xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 53cc8b986eac..601eaa36f1ad 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args) } /* - * Convert from using the shortform to the leaf. + * Convert from using the shortform to the leaf. On success, return the + * buffer so that we can keep it locked until we're totally done with it. */ int -xfs_attr_shortform_to_leaf(xfs_da_args_t *args) +xfs_attr_shortform_to_leaf( + struct xfs_da_args *args, + struct xfs_buf **leaf_bp) { xfs_inode_t *dp; xfs_attr_shortform_t *sf; @@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } error = 0; - + *leaf_bp = bp; out: kmem_free(tmpbuffer); return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index f7dda0c237b0..894124efb421 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); -int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); +int xfs_attr_shortform_to_leaf(struct xfs_da_args *args, + struct xfs_buf **leaf_bp); int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); -- cgit v1.2.3 From 8c57b88637d78a723e0854fc3d06c6d4c31a1e0c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:53 -0800 Subject: xfs: account for null transactions in bunmapi In e1a4e37cc7b665 ("xfs: try to avoid blowing out the transaction reservation when bunmaping a shared extent"), we try to constrain the amount of real extents we unmap from the data fork in a given call so that we don't blow out transaction reservations. However, not all bunmapi operations require a transaction -- if we're only removing a delalloc extent, no transaction is needed, so we have to code against that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1210f684d3c2..1bddbba6b80c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5136,7 +5136,7 @@ __xfs_bunmapi( * blowing out the transaction with a mix of EFIs and reflink * adjustments. */ - if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) + if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); else max_len = len; -- cgit v1.2.3 From c54854a437a447a6bb1dcb11f60dd01cef3fa597 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:54 -0800 Subject: xfs: move xfs_iext_insert tracepoint to report useful information Move the tracepoint in xfs_iext_insert to after the point where we've inserted the extent because otherwise we report stale extent data in the ftrace output. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_iext_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 89bf16b4d937..b0f31791c7e6 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -632,8 +632,6 @@ xfs_iext_insert( struct xfs_iext_leaf *new = NULL; int nr_entries, i; - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (ifp->if_height == 0) xfs_iext_alloc_root(ifp, cur); else if (ifp->if_height == 1) @@ -661,6 +659,8 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); + if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } -- cgit v1.2.3 From 5c989a0ee06eb77a44baffd1779a5dbb9a7e873f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:54 -0800 Subject: xfs: remove dest file's post-eof preallocations before reflinking If we try to reflink into a file with post-eof preallocations at an offset well past the preallocations, we increase i_size as one would expect. However, those allocations do not have page cache backing them, so they won't get cleaned out on their own. This leads to asserts in the collapse/insert range code and xfs_destroy_inode when they encounter delalloc extents they weren't expecting to find. Since there are plenty of other places where we dump those post-eof blocks, do the same to the reflink destination file before we start remapping extents. This was found by adding clonerange support to fsstress and running it in write-only mode. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cf7c8f81bebb..e13f5ad57a03 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1291,6 +1291,17 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); + /* + * Clear out post-eof preallocations because we don't have page cache + * backing the delayed allocations and they'll never get freed on + * their own. + */ + if (xfs_can_free_eofblocks(dest, true)) { + ret = xfs_free_eofblocks(dest); + if (ret) + goto out_unlock; + } + /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) -- cgit v1.2.3 From 73353f486c9b5b2407ec32be1004174dbbaf6c18 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:55 -0800 Subject: xfs: relax is_reflink_inode assert in xfs_reflink_find_cow_mapping We don't hold the ilock through the entire sequence of xfs_writepage_map -> xfs_map_cow -> xfs_reflink_find_cow_mapping. This means that we can race with another thread that is trying to clear the inode reflink flag, with the result that the flag is set for the xfs_map_cow check but cleared before we get to the assert in find_cow_mapping. When this happens, we blow the assert even though everything is fine. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e13f5ad57a03..99c5852f9fe7 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -490,8 +490,9 @@ xfs_reflink_find_cow_mapping( struct xfs_iext_cursor icur; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); - ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_is_reflink_inode(ip)) + return false; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) return false; -- cgit v1.2.3 From 9d40fba8b2056773b9744a95df9ddd6cc33a4f83 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:55 -0800 Subject: xfs: avoid infinite loop when cancelling CoW blocks after writeback failure When we're cancelling a cow range, we don't always delete each extent that we iterate, so we have to move icur backwards in the list to avoid an infinite loop. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 99c5852f9fe7..6931b0c79cac 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -611,6 +611,9 @@ xfs_reflink_cancel_cow_blocks( /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); + } else { + /* Didn't do anything, push cursor back. */ + xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) -- cgit v1.2.3 From a192de265b26c525672884630d5376c405e83b2a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 10 Dec 2017 18:03:56 -0800 Subject: xfs: allow CoW remap transactions to use reserve blocks Since we as yet have no way of holding on to the indlen blocks that are reserved as part of CoW fork delalloc reservations, let the CoW remap transaction dip into the reserves so that we avoid failing writes. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6931b0c79cac..e49e6db415f7 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -729,7 +729,7 @@ xfs_reflink_end_cow( (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - resblks, 0, 0, &tp); + resblks, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; -- cgit v1.2.3 From c2e90800aef22e7ea14ea7560ba99993f11d3616 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 12 Dec 2017 13:45:50 +0000 Subject: virtio_mmio: fix devm cleanup Recent rework of the virtio_mmio probe/remove paths balanced a devm_ioremap() with an iounmap() rather than its devm variant. This ends up corrupting the devm datastructures, and results in the following boot-time splat on arm64 under QEMU 2.9.0: [ 3.450397] ------------[ cut here ]------------ [ 3.453822] Trying to vfree() nonexistent vm area (00000000c05b4844) [ 3.460534] WARNING: CPU: 1 PID: 1 at mm/vmalloc.c:1525 __vunmap+0x1b8/0x220 [ 3.475898] Kernel panic - not syncing: panic_on_warn set ... [ 3.475898] [ 3.493933] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc3 #1 [ 3.513109] Hardware name: linux,dummy-virt (DT) [ 3.525382] Call trace: [ 3.531683] dump_backtrace+0x0/0x368 [ 3.543921] show_stack+0x20/0x30 [ 3.547767] dump_stack+0x108/0x164 [ 3.559584] panic+0x25c/0x51c [ 3.569184] __warn+0x29c/0x31c [ 3.576023] report_bug+0x1d4/0x290 [ 3.586069] bug_handler.part.2+0x40/0x100 [ 3.597820] bug_handler+0x4c/0x88 [ 3.608400] brk_handler+0x11c/0x218 [ 3.613430] do_debug_exception+0xe8/0x318 [ 3.627370] el1_dbg+0x18/0x78 [ 3.634037] __vunmap+0x1b8/0x220 [ 3.648747] vunmap+0x6c/0xc0 [ 3.653864] __iounmap+0x44/0x58 [ 3.659771] devm_ioremap_release+0x34/0x68 [ 3.672983] release_nodes+0x404/0x880 [ 3.683543] devres_release_all+0x6c/0xe8 [ 3.695692] driver_probe_device+0x250/0x828 [ 3.706187] __driver_attach+0x190/0x210 [ 3.717645] bus_for_each_dev+0x14c/0x1f0 [ 3.728633] driver_attach+0x48/0x78 [ 3.740249] bus_add_driver+0x26c/0x5b8 [ 3.752248] driver_register+0x16c/0x398 [ 3.757211] __platform_driver_register+0xd8/0x128 [ 3.770860] virtio_mmio_init+0x1c/0x24 [ 3.782671] do_one_initcall+0xe0/0x398 [ 3.791890] kernel_init_freeable+0x594/0x660 [ 3.798514] kernel_init+0x18/0x190 [ 3.810220] ret_from_fork+0x10/0x18 To fix this, we can simply rip out the explicit cleanup that the devm infrastructure will do for us when our probe function returns an error code, or when our remove function returns. We only need to ensure that we call put_device() if a call to register_virtio_device() fails in the probe path. Signed-off-by: Mark Rutland Fixes: 7eb781b1bbb7136f ("virtio_mmio: add cleanup for virtio_mmio_probe") Fixes: 25f32223bce5c580 ("virtio_mmio: add cleanup for virtio_mmio_remove") Cc: Cornelia Huck Cc: Michael S. Tsirkin Cc: weiping zhang Cc: virtualization@lists.linux-foundation.org Signed-off-by: Michael S. Tsirkin Reviewed-by: Cornelia Huck --- drivers/virtio/virtio_mmio.c | 43 +++++++++---------------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index a9192fe4f345..c92131edfaba 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -522,10 +522,8 @@ static int virtio_mmio_probe(struct platform_device *pdev) return -EBUSY; vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL); - if (!vm_dev) { - rc = -ENOMEM; - goto free_mem; - } + if (!vm_dev) + return -ENOMEM; vm_dev->vdev.dev.parent = &pdev->dev; vm_dev->vdev.dev.release = virtio_mmio_release_dev; @@ -535,17 +533,14 @@ static int virtio_mmio_probe(struct platform_device *pdev) spin_lock_init(&vm_dev->lock); vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); - if (vm_dev->base == NULL) { - rc = -EFAULT; - goto free_vmdev; - } + if (vm_dev->base == NULL) + return -EFAULT; /* Check magic value */ magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE); if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) { dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic); - rc = -ENODEV; - goto unmap; + return -ENODEV; } /* Check device version */ @@ -553,8 +548,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) if (vm_dev->version < 1 || vm_dev->version > 2) { dev_err(&pdev->dev, "Version %ld not supported!\n", vm_dev->version); - rc = -ENXIO; - goto unmap; + return -ENXIO; } vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID); @@ -563,8 +557,7 @@ static int virtio_mmio_probe(struct platform_device *pdev) * virtio-mmio device with an ID 0 is a (dummy) placeholder * with no function. End probing now with no error reported. */ - rc = -ENODEV; - goto unmap; + return -ENODEV; } vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID); @@ -590,33 +583,15 @@ static int virtio_mmio_probe(struct platform_device *pdev) platform_set_drvdata(pdev, vm_dev); rc = register_virtio_device(&vm_dev->vdev); - if (rc) { - iounmap(vm_dev->base); - devm_release_mem_region(&pdev->dev, mem->start, - resource_size(mem)); + if (rc) put_device(&vm_dev->vdev.dev); - } - return rc; -unmap: - iounmap(vm_dev->base); -free_mem: - devm_release_mem_region(&pdev->dev, mem->start, - resource_size(mem)); -free_vmdev: - devm_kfree(&pdev->dev, vm_dev); + return rc; } static int virtio_mmio_remove(struct platform_device *pdev) { struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev); - struct resource *mem; - - iounmap(vm_dev->base); - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (mem) - devm_release_mem_region(&pdev->dev, mem->start, - resource_size(mem)); unregister_virtio_device(&vm_dev->vdev); return 0; -- cgit v1.2.3 From c47d7f56e914900410f65835933f9fc4374d0a2b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 14 Dec 2017 15:32:24 -0800 Subject: include/linux/idr.h: add #include The was removed from radix-tree.h by commit f5bba9d11a25 ("include/linux/radix-tree.h: remove unneeded #include "). Since that commit, tools/testing/radix-tree/ couldn't pass compilation due to tools/testing/radix-tree/idr.c:17: undefined reference to WARN_ON_ONCE. This patch adds the bug.h header to idr.h to solve the issue. Link: http://lkml.kernel.org/r/1511963726-34070-2-git-send-email-wei.w.wang@intel.com Fixes: f5bba9d11a2 ("include/linux/radix-tree.h: remove unneeded #include ") Signed-off-by: Wei Wang Cc: Matthew Wilcox Cc: Jan Kara Cc: Eric Biggers Cc: Tejun Heo Cc: Masahiro Yamada Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/idr.h b/include/linux/idr.h index 7c3a365f7e12..fa14f834e4ed 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,6 +15,7 @@ #include #include #include +#include struct idr { struct radix_tree_root idr_rt; -- cgit v1.2.3 From 338f1d9d1b829fec494d053f62820a2ee625b1ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Dec 2017 15:32:28 -0800 Subject: lib/rbtree,drm/mm: add rbtree_replace_node_cached() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a variant of rbtree_replace_node() that maintains the leftmost cache of struct rbtree_root_cached when replacing nodes within the rbtree. As drm_mm is the only rb_replace_node() being used on an interval tree, the mistake looks fairly self-contained. Furthermore the only user of drm_mm_replace_node() is its testsuite... Testcase: igt/drm_mm/replace Link: http://lkml.kernel.org/r/20171122100729.3742-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20171109212435.9265-1-chris@chris-wilson.co.uk Fixes: f808c13fd373 ("lib/interval_tree: fast overlap detection") Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Davidlohr Bueso Cc: Jérôme Glisse Cc: Joonas Lahtinen Cc: Daniel Vetter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_mm.c | 8 +++++--- include/linux/rbtree.h | 2 ++ lib/rbtree.c | 10 ++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 61a1c8ea74bc..c3c79ee6119e 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -575,21 +575,23 @@ EXPORT_SYMBOL(drm_mm_remove_node); */ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) { + struct drm_mm *mm = old->mm; + DRM_MM_BUG_ON(!old->allocated); *new = *old; list_replace(&old->node_list, &new->node_list); - rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root); + rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree); if (drm_mm_hole_follows(old)) { list_replace(&old->hole_stack, &new->hole_stack); rb_replace_node(&old->rb_hole_size, &new->rb_hole_size, - &old->mm->holes_size); + &mm->holes_size); rb_replace_node(&old->rb_hole_addr, &new->rb_hole_addr, - &old->mm->holes_addr); + &mm->holes_addr); } old->allocated = false; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index d574361943ea..fcbeed4053ef 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -99,6 +99,8 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, struct rb_root *root); +extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) diff --git a/lib/rbtree.c b/lib/rbtree.c index ba4a9d165f1b..d3ff682fd4b8 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -603,6 +603,16 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, } EXPORT_SYMBOL(rb_replace_node); +void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root) +{ + rb_replace_node(victim, new, &root->rb_root); + + if (root->rb_leftmost == victim) + root->rb_leftmost = new; +} +EXPORT_SYMBOL(rb_replace_node_cached); + void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, struct rb_root *root) { -- cgit v1.2.3 From 13ab183d138f607d885e995d625e58d47678bf97 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Dec 2017 15:32:31 -0800 Subject: mm/kmemleak.c: make cond_resched() rate-limiting more efficient Commit bde5f6bc68db ("kmemleak: add scheduling point to kmemleak_scan()") tries to rate-limit the frequency of cond_resched() calls, but does it in a way which might incur an expensive division operation in the inner loop. Simplify this. Fixes: bde5f6bc68db5 ("kmemleak: add scheduling point to kmemleak_scan()") Suggested-by: Linus Torvalds Cc: Yisheng Xie Cc: Catalin Marinas Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 3d4781756d50..d73c14294f3a 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1523,7 +1523,7 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); - if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + if (!(pfn & 63)) cond_resched(); } } -- cgit v1.2.3 From 146734b091430c80d80bb96b1139a96fb4bc830e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2017 15:32:34 -0800 Subject: string.h: workaround for increased stack usage The hardened strlen() function causes rather large stack usage in at least one file in the kernel, in particular when CONFIG_KASAN is enabled: drivers/media/usb/em28xx/em28xx-dvb.c: In function 'em28xx_dvb_init': drivers/media/usb/em28xx/em28xx-dvb.c:2062:1: error: the frame size of 3256 bytes is larger than 204 bytes [-Werror=frame-larger-than=] Analyzing this problem led to the discovery that gcc fails to merge the stack slots for the i2c_board_info[] structures after we strlcpy() into them, due to the 'noreturn' attribute on the source string length check. I reported this as a gcc bug, but it is unlikely to get fixed for gcc-8, since it is relatively easy to work around, and it gets triggered rarely. An earlier workaround I did added an empty inline assembly statement before the call to fortify_panic(), which works surprisingly well, but is really ugly and unintuitive. This is a new approach to the same problem, this time addressing it by not calling the 'extern __real_strnlen()' function for string constants where __builtin_strlen() is a compile-time constant and therefore known to be safe. We do this by checking if the last character in the string is a compile-time constant '\0'. If it is, we can assume that strlen() of the string is also constant. As a side-effect, this should also improve the object code output for any other call of strlen() on a string constant. [akpm@linux-foundation.org: add comment] Link: http://lkml.kernel.org/r/20171205215143.3085755-1-arnd@arndb.de Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 Link: https://patchwork.kernel.org/patch/9980413/ Link: https://patchwork.kernel.org/patch/9974047/ Fixes: 6974f0c4555 ("include/linux/string.h: add the option of fortified string.h functions") Signed-off-by: Arnd Bergmann Cc: Kees Cook Cc: Mauro Carvalho Chehab Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Daniel Micay Cc: Greg Kroah-Hartman Cc: Martin Wilck Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/string.h b/include/linux/string.h index 410ecf17de3c..cfd83eb2f926 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -259,7 +259,10 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 0); - if (p_size == (size_t)-1) + + /* Work around gcc excess stack consumption issue */ + if (p_size == (size_t)-1 || + (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0')) return __builtin_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) -- cgit v1.2.3 From 302ec300ef8a545a7fc7f667e5fd743b091c2eeb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 14 Dec 2017 15:32:38 -0800 Subject: autofs: fix careless error in recent commit Commit ecc0c469f277 ("autofs: don't fail mount for transient error") was meant to replace an 'if' with a 'switch', but instead added the 'switch' leaving the case in place. Link: http://lkml.kernel.org/r/87zi6wstmw.fsf@notabene.neil.brown.name Fixes: ecc0c469f277 ("autofs: don't fail mount for transient error") Reported-by: Ben Hutchings Signed-off-by: NeilBrown Cc: Ian Kent Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs4/waitq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 8fc41705c7cd..961a12dc6dc8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); - if (autofs4_write(sbi, pipe, &pkt, pktsz)) switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { case 0: break; -- cgit v1.2.3 From 3756f6401c302617c5e091081ca4d26ab604bec5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2017 15:32:41 -0800 Subject: exec: avoid gcc-8 warning for get_task_comm gcc-8 warns about using strncpy() with the source size as the limit: fs/exec.c:1223:32: error: argument to 'sizeof' in 'strncpy' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] This is indeed slightly suspicious, as it protects us from source arguments without NUL-termination, but does not guarantee that the destination is terminated. This keeps the strncpy() to ensure we have properly padded target buffer, but ensures that we use the correct length, by passing the actual length of the destination buffer as well as adding a build-time check to ensure it is exactly TASK_COMM_LEN. There are only 23 callsites which I all reviewed to ensure this is currently the case. We could get away with doing only the check or passing the right length, but it doesn't hurt to do both. Link: http://lkml.kernel.org/r/20171205151724.1764896-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Suggested-by: Kees Cook Acked-by: Kees Cook Acked-by: Ingo Molnar Cc: Alexander Viro Cc: Peter Zijlstra Cc: Serge Hallyn Cc: James Morris Cc: Aleksa Sarai Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 +++---- include/linux/sched.h | 6 +++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 6be2aa0ab26f..156f56acfe8e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1216,15 +1216,14 @@ killed: return -EAGAIN; } -char *get_task_comm(char *buf, struct task_struct *tsk) +char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) { - /* buf must be at least sizeof(tsk->comm) in size */ task_lock(tsk); - strncpy(buf, tsk->comm, sizeof(tsk->comm)); + strncpy(buf, tsk->comm, buf_size); task_unlock(tsk); return buf; } -EXPORT_SYMBOL_GPL(get_task_comm); +EXPORT_SYMBOL_GPL(__get_task_comm); /* * These functions flushes out all traces of the currently running executable diff --git a/include/linux/sched.h b/include/linux/sched.h index 21991d668d35..5124ba709830 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1503,7 +1503,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) __set_task_comm(tsk, from, false); } -extern char *get_task_comm(char *to, struct task_struct *tsk); +extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); +#define get_task_comm(buf, tsk) ({ \ + BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ + __get_task_comm(buf, sizeof(buf), tsk); \ +}) #ifdef CONFIG_SMP void scheduler_ipi(void); -- cgit v1.2.3 From 51f73fffbf30b335d036ab356b67b05e16e26585 Mon Sep 17 00:00:00 2001 From: Srividya Desireddy Date: Thu, 14 Dec 2017 15:32:45 -0800 Subject: Documentation/vm/zswap.txt: update with same-value filled page feature Update zswap document with details on same-value filled pages identification feature. The usage of zswap.same_filled_pages_enabled module parameter is explained. Link: http://lkml.kernel.org/r/20171206114852epcms5p6973b02a9f455d5d3c765eafda0fe2631@epcms5p6 Signed-off-by: Srividya Desireddy Acked-by: Dan Streetman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/zswap.txt | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt index 89fff7d611cc..0b3a1148f9f0 100644 --- a/Documentation/vm/zswap.txt +++ b/Documentation/vm/zswap.txt @@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its original compressor. Once all pages are removed from an old zpool, the zpool and its compressor are freed. +Some of the pages in zswap are same-value filled pages (i.e. contents of the +page have same value or repetitive pattern). These pages include zero-filled +pages and they are handled differently. During store operation, a page is +checked if it is a same-value filled page before compressing it. If true, the +compressed length of the page is set to zero and the pattern or same-filled +value is stored. + +Same-value filled pages identification feature is enabled by default and can be +disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0, +e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at +runtime using the sysfs "same_filled_pages_enabled" attribute, e.g. + +echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled + +When zswap same-filled page identification is disabled at runtime, it will stop +checking for the same-value filled pages during store operation. However, the +existing pages which are marked as same-value filled pages remain stored +unchanged in zswap until they are either loaded or invalidated. + A debugfs interface is provided for various statistic about pool size, number -of pages stored, and various counters for the reasons pages are rejected. +of pages stored, same-value filled pages and various counters for the reasons +pages are rejected. -- cgit v1.2.3 From 4cc90b4cc3d4955f79eae4f7f9d64e67e17b468e Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Thu, 14 Dec 2017 15:32:48 -0800 Subject: scripts/faddr2line: fix CROSS_COMPILE unset error faddr2line hit var unbound error when CROSS_COMPILE isn't set since nounset option is set in bash script. Link: http://lkml.kernel.org/r/20171206013022.GA83929@sofia Fixes: 95a879825419 ("scripts/faddr2line: extend usage on generic arch") Signed-off-by: Liu Changcheng Reported-by: Richard Weinberger Reviewed-by: Richard Weinberger Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 39e07d8574dd..7721d5b2b0c0 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -44,10 +44,10 @@ set -o errexit set -o nounset -READELF="${CROSS_COMPILE}readelf" -ADDR2LINE="${CROSS_COMPILE}addr2line" -SIZE="${CROSS_COMPILE}size" -NM="${CROSS_COMPILE}nm" +READELF="${CROSS_COMPILE:-}readelf" +ADDR2LINE="${CROSS_COMPILE:-}addr2line" +SIZE="${CROSS_COMPILE:-}size" +NM="${CROSS_COMPILE:-}nm" command -v awk >/dev/null 2>&1 || die "awk isn't installed" command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" -- cgit v1.2.3 From 183f24aa5b76e37da690b2def41cc70f0792ce09 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Dec 2017 15:32:52 -0800 Subject: mm/memory.c: mark wp_huge_pmd() inline to prevent build failure With gcc 4.1.2: mm/memory.o: In function `wp_huge_pmd': memory.c:(.text+0x9b4): undefined reference to `do_huge_pmd_wp_page' Interestingly, wp_huge_pmd() is emitted in the assembler output, but never called. Apparently replacing the call to pmd_write() in __handle_mm_fault() by a call to the more complex pmd_access_permitted() reduced the ability of the compiler to remove unused code. Fix this by marking wp_huge_pmd() inline, like was done in commit 91a90140f998 ("mm/memory.c: mark create_huge_pmd() inline to prevent build failure") for a similar problem. [akpm@linux-foundation.org: add comment] Link: http://lkml.kernel.org/r/1512335500-10889-1-git-send-email-geert@linux-m68k.org Fixes: c7da82b894e9eef6 ("mm: replace pmd_write with pmd_access_permitted in fault + gup paths") Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 5eb3d2524bdc..cfaba6287702 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3831,7 +3831,8 @@ static inline int create_huge_pmd(struct vm_fault *vmf) return VM_FAULT_FALLBACK; } -static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) +/* `inline' is required to avoid gcc 4.1.2 build error */ +static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); -- cgit v1.2.3 From c24ad77d962c31af92f2b731dad2104cbf3fbb03 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 14 Dec 2017 15:32:55 -0800 Subject: mm/page_alloc.c: avoid excessive IRQ disabled times in free_unref_page_list() Since commit 9cca35d42eb6 ("mm, page_alloc: enable/disable IRQs once when freeing a list of pages") we see excessive IRQ disabled times of up to 25ms on an embedded ARM system (tracing overhead included). This is due to graphics buffers being freed back to the system via release_pages(). Graphics buffers can be huge, so it's not hard to hit cases where the list of pages to free has 2048 entries. Disabling IRQs while freeing all those pages is clearly not a good idea. Introduce a batch limit, which allows IRQ servicing once every few pages. The batch count is the same as used in other parts of the MM subsystem when dealing with IRQ disabled regions. Link: http://lkml.kernel.org/r/20171207170314.4419-1-l.stach@pengutronix.de Fixes: 9cca35d42eb6 ("mm, page_alloc: enable/disable IRQs once when freeing a list of pages") Signed-off-by: Lucas Stach Acked-by: Mel Gorman Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 73f5d4556b3d..7e5e775e97f4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2684,6 +2684,7 @@ void free_unref_page_list(struct list_head *list) { struct page *page, *next; unsigned long flags, pfn; + int batch_count = 0; /* Prepare pages for freeing */ list_for_each_entry_safe(page, next, list, lru) { @@ -2700,6 +2701,16 @@ void free_unref_page_list(struct list_head *list) set_page_private(page, 0); trace_mm_page_free_batched(page); free_unref_page_commit(page, pfn); + + /* + * Guard against excessive IRQ disabled times when we get + * a large list of pages to free. + */ + if (++batch_count == SWAP_CLUSTER_MAX) { + local_irq_restore(flags); + batch_count = 0; + local_irq_save(flags); + } } local_irq_restore(flags); } -- cgit v1.2.3 From 85c3e4a5a185f22649c6bf33bdce7bb1ac890921 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Dec 2017 15:32:58 -0800 Subject: mm/slab.c: do not hash pointers when debugging slab If CONFIG_DEBUG_SLAB/CONFIG_DEBUG_SLAB_LEAK are enabled, the slab code prints extra debug information when e.g. corruption is detected. This includes pointers, which are not very useful when hashed. Fix this by using %px to print unhashed pointers instead where it makes sense, and by removing the printing of a last user pointer referring to code. [geert+renesas@glider.be: v2] Link: http://lkml.kernel.org/r/1513179267-2509-1-git-send-email-geert+renesas@glider.be Link: http://lkml.kernel.org/r/1512641861-5113-1-git-send-email-geert+renesas@glider.be Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Geert Uytterhoeven Acked-by: Christoph Lameter Acked-by: Linus Torvalds Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: "Tobin C . Harding" Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 183e996dde5f..4e51ef954026 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1584,11 +1584,8 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) *dbg_redzone2(cachep, objp)); } - if (cachep->flags & SLAB_STORE_USER) { - pr_err("Last user: [<%p>](%pSR)\n", - *dbg_userword(cachep, objp), - *dbg_userword(cachep, objp)); - } + if (cachep->flags & SLAB_STORE_USER) + pr_err("Last user: (%pSR)\n", *dbg_userword(cachep, objp)); realobj = (char *)objp + obj_offset(cachep); size = cachep->object_size; for (i = 0; i < size && lines; i += 16, lines--) { @@ -1621,7 +1618,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Mismatch ! */ /* Print header */ if (lines == 0) { - pr_err("Slab corruption (%s): %s start=%p, len=%d\n", + pr_err("Slab corruption (%s): %s start=%px, len=%d\n", print_tainted(), cachep->name, realobj, size); print_objinfo(cachep, objp, 0); @@ -1650,13 +1647,13 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) if (objnr) { objp = index_to_obj(cachep, page, objnr - 1); realobj = (char *)objp + obj_offset(cachep); - pr_err("Prev obj: start=%p, len=%d\n", realobj, size); + pr_err("Prev obj: start=%px, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { objp = index_to_obj(cachep, page, objnr + 1); realobj = (char *)objp + obj_offset(cachep); - pr_err("Next obj: start=%p, len=%d\n", realobj, size); + pr_err("Next obj: start=%px, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } } @@ -2608,7 +2605,7 @@ static void slab_put_obj(struct kmem_cache *cachep, /* Verify double free bug */ for (i = page->active; i < cachep->num; i++) { if (get_free_obj(page, i) == objnr) { - pr_err("slab: double free detected in cache '%s', objp %p\n", + pr_err("slab: double free detected in cache '%s', objp %px\n", cachep->name, objp); BUG(); } @@ -2772,7 +2769,7 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) else slab_error(cache, "memory outside object was overwritten"); - pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", + pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n", obj, redzone1, redzone2); } @@ -3078,7 +3075,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { slab_error(cachep, "double free, or memory outside object was overwritten"); - pr_err("%p: redzone 1:0x%llx, redzone 2:0x%llx\n", + pr_err("%px: redzone 1:0x%llx, redzone 2:0x%llx\n", objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp)); } @@ -3091,7 +3088,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, cachep->ctor(objp); if (ARCH_SLAB_MINALIGN && ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { - pr_err("0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", + pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", objp, (int)ARCH_SLAB_MINALIGN); } return objp; @@ -4283,7 +4280,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) return; } #endif - seq_printf(m, "%p", (void *)address); + seq_printf(m, "%px", (void *)address); } static int leaks_show(struct seq_file *m, void *p) -- cgit v1.2.3 From 689d77f001cd22da31cc943170e1f6f2e8197035 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 14 Dec 2017 15:33:02 -0800 Subject: kcov: fix comparison callback signature Fix a silly copy-paste bug. We truncated u32 args to u16. Link: http://lkml.kernel.org/r/20171207101134.107168-1-dvyukov@google.com Fixes: ded97d2c2b2c ("kcov: support comparison operands collection") Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: Alexander Potapenko Cc: Vegard Nossum Cc: Quentin Casasnovas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 15f33faf4013..7594c033d98a 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); -void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); } @@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) } EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); -void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) { write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, _RET_IP_); -- cgit v1.2.3 From 0b265c3b3b721dca03e82719ac0e15bc2c89aa3a Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Thu, 14 Dec 2017 15:33:05 -0800 Subject: tools/slabinfo-gnuplot: force to use bash shell On some linux distributions, the default link of sh is dash which deoesn't support split array like "${var//,/ }" It's better to force to use bash shell directly. Link: http://lkml.kernel.org/r/20171208093751.GA175471@sofia Signed-off-by: Liu Changcheng Reviewed-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/slabinfo-gnuplot.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 35b039864b77..0cf28aa6f21c 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Sergey Senozhatsky, 2015 # sergey.senozhatsky.work@gmail.com -- cgit v1.2.3 From 1f704fd0d14043e76e80f6b8b2251b9b2cedcca6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Dec 2017 15:33:08 -0800 Subject: mm/frame_vector.c: release a semaphore in 'get_vaddr_frames()' A semaphore is acquired before this check, so we must release it before leaving. Link: http://lkml.kernel.org/r/20171211211009.4971-1-christophe.jaillet@wanadoo.fr Fixes: b7f0554a56f2 ("mm: fail get_vaddr_frames() for filesystem-dax mappings") Signed-off-by: Christophe JAILLET Acked-by: Michal Hocko Cc: Dan Williams Cc: Christian Borntraeger Cc: David Sterba Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/frame_vector.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 297c7238f7d4..c64dca6e27c2 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -62,8 +62,10 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, * get_user_pages_longterm() and disallow it for filesystem-dax * mappings. */ - if (vma_is_fsdax(vma)) - return -EOPNOTSUPP; + if (vma_is_fsdax(vma)) { + ret = -EOPNOTSUPP; + goto out; + } if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { vec->got_ref = true; -- cgit v1.2.3 From bdcf0a423ea1c40bbb40e7ee483b50fc8aa3d758 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Thu, 14 Dec 2017 15:33:12 -0800 Subject: kernel: make groups_sort calling a responsibility group_info allocators In testing, we found that nfsd threads may call set_groups in parallel for the same entry cached in auth.unix.gid, racing in the call of groups_sort, corrupting the groups for that entry and leading to permission denials for the client. This patch: - Make groups_sort globally visible. - Move the call to groups_sort to the modifiers of group_info - Remove the call to groups_sort from set_groups Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.becker@gmail.com Signed-off-by: Thiago Rafael Becker Reviewed-by: Matthew Wilcox Reviewed-by: NeilBrown Acked-by: "J. Bruce Fields" Cc: Al Viro Cc: Martin Schwidefsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/compat_linux.c | 1 + fs/nfsd/auth.c | 3 +++ include/linux/cred.h | 1 + kernel/groups.c | 5 +++-- kernel/uid16.c | 1 + net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 1 + net/sunrpc/svcauth_unix.c | 2 ++ 8 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f04db3779b34..59eea9c65d3e 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 697f8ae7792d..f650e475d8f0 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } } else { gi = get_group_info(rqgi); diff --git a/include/linux/cred.h b/include/linux/cred.h index 099058e1178b..631286535d0f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -83,6 +83,7 @@ extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); extern bool may_setgroups(void); +extern void groups_sort(struct group_info *); /* * The security context of a task diff --git a/kernel/groups.c b/kernel/groups.c index e357bc800111..daae2f2dc6d4 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b) return gid_gt(a, b) - gid_lt(a, b); } -static void groups_sort(struct group_info *group_info) +void groups_sort(struct group_info *group_info) { sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid), gid_cmp, NULL); } +EXPORT_SYMBOL(groups_sort); /* a simple bsearch */ int groups_search(const struct group_info *group_info, kgid_t grp) @@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp) void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); - groups_sort(group_info); get_group_info(group_info); new->group_info = group_info; } @@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/kernel/uid16.c b/kernel/uid16.c index ce74a4901d2b..ef1da2a5f9bd 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index c4778cae58ef..444380f968f1 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, goto out_free_groups; creds->cr_group_info->gid[i] = kgid; } + groups_sort(creds->cr_group_info); return 0; out_free_groups: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5dd4e6c9fef2..26531193fce4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail *cd, goto out; rsci.cred.cr_group_info->gid[i] = kgid; } + groups_sort(rsci.cred.cr_group_info); /* mech name */ len = qword_get(&mesg, buf, mlen); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 740b67d5a733..af7f28fb8102 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_detail *cd, ug.gi->gid[i] = kgid; } + groups_sort(ug.gi); ugp = unix_gid_lookup(cd, uid); if (ugp) { struct cache_head *ch; @@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv)); cred->cr_group_info->gid[i] = kgid; } + groups_sort(cred->cr_group_info); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { *authp = rpc_autherr_badverf; return SVC_DENIED; -- cgit v1.2.3 From 4837fe37adff1d159904f0c013471b1ecbcb455e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 14 Dec 2017 15:33:15 -0800 Subject: mm, oom_reaper: fix memory corruption David Rientjes has reported the following memory corruption while the oom reaper tries to unmap the victims address space BUG: Bad page map in process oom_reaper pte:6353826300000000 pmd:00000000 addr:00007f50cab1d000 vm_flags:08100073 anon_vma:ffff9eea335603f0 mapping: (null) index:7f50cab1d file: (null) fault: (null) mmap: (null) readpage: (null) CPU: 2 PID: 1001 Comm: oom_reaper Call Trace: unmap_page_range+0x1068/0x1130 __oom_reap_task_mm+0xd5/0x16b oom_reaper+0xff/0x14c kthread+0xc1/0xe0 Tetsuo Handa has noticed that the synchronization inside exit_mmap is insufficient. We only synchronize with the oom reaper if tsk_is_oom_victim which is not true if the final __mmput is called from a different context than the oom victim exit path. This can trivially happen from context of any task which has grabbed mm reference (e.g. to read /proc// file which requires mm etc.). The race would look like this oom_reaper oom_victim task mmget_not_zero do_exit mmput __oom_reap_task_mm mmput __mmput exit_mmap remove_vma unmap_page_range Fix this issue by providing a new mm_is_oom_victim() helper which operates on the mm struct rather than a task. Any context which operates on a remote mm struct should use this helper in place of tsk_is_oom_victim. The flag is set in mark_oom_victim and never cleared so it is stable in the exit_mmap path. Debugged by Tetsuo Handa. Link: http://lkml.kernel.org/r/20171210095130.17110-1-mhocko@kernel.org Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") Signed-off-by: Michal Hocko Reported-by: David Rientjes Acked-by: David Rientjes Cc: Tetsuo Handa Cc: Andrea Argangeli Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 9 +++++++++ include/linux/sched/coredump.h | 1 + mm/mmap.c | 10 +++++----- mm/oom_kill.c | 4 +++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 01c91d874a57..5bad038ac012 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -66,6 +66,15 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) return tsk->signal->oom_mm; } +/* + * Use this helper if tsk->mm != mm and the victim mm needs a special + * handling. This is guaranteed to stay true after once set. + */ +static inline bool mm_is_oom_victim(struct mm_struct *mm) +{ + return test_bit(MMF_OOM_VICTIM, &mm->flags); +} + /* * Checks whether a page fault on the given mm is still reliable. * This is no longer true if the oom reaper started to reap the diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 9c8847395b5e..ec912d01126f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -70,6 +70,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/mm/mmap.c b/mm/mmap.c index a4d546821214..9efdc021ad22 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3019,20 +3019,20 @@ void exit_mmap(struct mm_struct *mm) /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); - set_bit(MMF_OOM_SKIP, &mm->flags); - if (unlikely(tsk_is_oom_victim(current))) { + if (unlikely(mm_is_oom_victim(mm))) { /* * Wait for oom_reap_task() to stop working on this * mm. Because MMF_OOM_SKIP is already set before * calling down_read(), oom_reap_task() will not run * on this "mm" post up_write(). * - * tsk_is_oom_victim() cannot be set from under us - * either because current->mm is already set to NULL + * mm_is_oom_victim() cannot be set from under us + * either because victim->mm is already set to NULL * under task_lock before calling mmput and oom_mm is - * set not NULL by the OOM killer only if current->mm + * set not NULL by the OOM killer only if victim->mm * is found not NULL while holding the task_lock. */ + set_bit(MMF_OOM_SKIP, &mm->flags); down_write(&mm->mmap_sem); up_write(&mm->mmap_sem); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c957be32b27a..29f855551efe 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -683,8 +683,10 @@ static void mark_oom_victim(struct task_struct *tsk) return; /* oom_mm is bound to the signal struct life time. */ - if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) + if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) { mmgrab(tsk->signal->oom_mm); + set_bit(MMF_OOM_VICTIM, &mm->flags); + } /* * Make sure that the task is woken up from uninterruptible sleep -- cgit v1.2.3 From 7c2c11b208be09c156573fc0076b7b3646e05219 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 14 Dec 2017 15:33:19 -0800 Subject: arch: define weak abort() gcc toggle -fisolate-erroneous-paths-dereference (default at -O2 onwards) isolates faulty code paths such as null pointer access, divide by zero etc. If gcc port doesnt implement __builtin_trap, an abort() is generated which causes kernel link error. In this case, gcc is generating abort due to 'divide by zero' in lib/mpi/mpih-div.c. Currently 'frv' and 'arc' are failing. Previously other arch was also broken like m32r was fixed by commit d22e3d69ee1a ("m32r: fix build failure"). Let's define this weak function which is common for all arch and fix the problem permanently. We can even remove the arch specific 'abort' after this is done. Link: http://lkml.kernel.org/r/1513118956-8718-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Alexey Brodkin Cc: Vineet Gupta Cc: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/exit.c b/kernel/exit.c index 6b4298a41167..df0c91d5606c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1755,3 +1755,11 @@ Efault: return -EFAULT; } #endif + +__weak void abort(void) +{ + BUG(); + + /* if that doesn't kill us, halt */ + panic("Oops failed to kill thread"); +} -- cgit v1.2.3 From b00d607bb188e187c7b60074d2fa91a6f1985029 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 5 Dec 2017 04:41:51 -0500 Subject: tracing: Have stack trace not record if RCU is not watching The stack tracer records a stack dump whenever it sees a stack usage that is more than what it ever saw before. This can happen at any function that is being traced. If it happens when the CPU is going idle (or other strange locations), RCU may not be watching, and in this case, the recording of the stack trace will trigger a warning. There's been lots of efforts to make hacks to allow stack tracing to proceed even if RCU is not watching, but this only causes more issues to appear. Simply do not trace a stack if RCU is not watching. It probably isn't a bad stack anyway. Acked-by: "Paul E. McKenney" Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 734accc02418..3c7bfc4bf5e9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, if (__this_cpu_read(disable_stack_tracer) != 1) goto out; + /* If rcu is not watching, then save stack trace can fail */ + if (!rcu_is_watching()) + goto out; + ip += MCOUNT_INSN_SIZE; check_stack(ip, &stack); -- cgit v1.2.3 From 093b8886f446c9351c4de512cb1d4afe30e37f6f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 12 Dec 2017 10:23:28 -0800 Subject: scsi: core: Use blist_flags_t consistently Use the type blist_flags_t for all variables that represent blacklist flags. Additionally, suppress recently introduced sparse warnings related to blacklist flags. [mkp: fixed commit id] Fixes: 5ebde4694e3b ("scsi: Use 'blist_flags_t' for scsi_devinfo flags") Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_devinfo.c | 6 ++---- drivers/scsi/scsi_scan.c | 13 +++++++------ drivers/scsi/scsi_sysfs.c | 5 +++-- drivers/scsi/scsi_transport_spi.c | 12 +++++++----- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 449ef5adbb2b..dfb8da83fa50 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model, model, compatible); if (strflags) - devinfo->flags = simple_strtoul(strflags, NULL, 0); - else - devinfo->flags = flags; - + flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0); + devinfo->flags = flags; devinfo->compatible = compatible; if (compatible) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index be5e919db0e8..0880d975eed3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, - int *bflags, int async) + blist_flags_t *bflags, int async) { int ret; @@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq, * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_probe_and_add_lun(struct scsi_target *starget, - u64 lun, int *bflagsp, + u64 lun, blist_flags_t *bflagsp, struct scsi_device **sdevp, enum scsi_scan_mode rescan, void *hostdata) { struct scsi_device *sdev; unsigned char *result; - int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256; + blist_flags_t bflags; + int res = SCSI_SCAN_NO_RESPONSE, result_len = 256; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); /* @@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget, * Modifies sdevscan->lun. **/ static void scsi_sequential_lun_scan(struct scsi_target *starget, - int bflags, int scsi_level, + blist_flags_t bflags, int scsi_level, enum scsi_scan_mode rescan) { uint max_dev_lun; @@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget, * 0: scan completed (or no memory, so further scanning is futile) * 1: could not scan with REPORT LUN **/ -static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, +static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags, enum scsi_scan_mode rescan) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; @@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { struct Scsi_Host *shost = dev_to_shost(parent); - int bflags = 0; + blist_flags_t bflags = 0; int res; struct scsi_target *starget; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 50e7d7e4a861..a9996c16f4ae 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); -#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name +#define BLIST_FLAG_NAME(name) \ + [ilog2((__force unsigned int)BLIST_##name)] = #name static const char *const sdev_bflags_name[] = { #include "scsi_devinfo_tbl.c" }; @@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr, for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { const char *name = NULL; - if (!(sdev->sdev_bflags & BIT(i))) + if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i))) continue; if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) name = sdev_bflags_name[i]; diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index d0219e36080c..10ebb213ddb3 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -50,14 +50,14 @@ /* Our blacklist flags */ enum { - SPI_BLIST_NOIUS = 0x1, + SPI_BLIST_NOIUS = (__force blist_flags_t)0x1, }; /* blacklist table, modelled on scsi_devinfo.c */ static struct { char *vendor; char *model; - unsigned flags; + blist_flags_t flags; } spi_static_device_list[] __initdata = { {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS }, {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS }, @@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc, { struct scsi_device *sdev = to_scsi_device(dev); struct scsi_target *starget = sdev->sdev_target; - unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], - &sdev->inquiry[16], - SCSI_DEVINFO_SPI); + blist_flags_t bflags; + + bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8], + &sdev->inquiry[16], + SCSI_DEVINFO_SPI); /* Populate the target capability fields with the values * gleaned from the device inquiry */ -- cgit v1.2.3 From 5771cfffdffe709ae9b403b6f80438ca40bf850e Mon Sep 17 00:00:00 2001 From: Prasad B Munirathnam Date: Tue, 12 Dec 2017 11:40:10 -0800 Subject: scsi: aacraid: Fix I/O drop during reset "FIB_CONTEXT_FLAG_TIMEDOUT" flag is set in aac_eh_abort to indicate command timeout. Using the same flag in reset handler causes the command to time out and the I/Os were dropped. Define a new flag "FIB_CONTEXT_FLAG_EH_RESET" to make sure I/O is properly handled in eh_reset handler. [mkp: tweaked commit message] Signed-off-by: Prasad B Munirathnam Reviewed-by: Raghava Aditya Renukunta Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/linit.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 6e3d81969a77..d52265416da2 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1725,6 +1725,7 @@ struct aac_dev #define FIB_CONTEXT_FLAG_NATIVE_HBA (0x00000010) #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF (0x00000020) #define FIB_CONTEXT_FLAG_SCSI_CMD (0x00000040) +#define FIB_CONTEXT_FLAG_EH_RESET (0x00000080) /* * Define the command values diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index bdf127aaab41..d55332de08f9 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd) info = &aac->hba_map[bus][cid]; if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || info->devtype != AAC_DEVTYPE_NATIVE_RAW) { - fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT; + fib->flags |= FIB_CONTEXT_FLAG_EH_RESET; cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER; } } -- cgit v1.2.3 From 2610acf46b9ed528ec2cacd717bc9d354e452b73 Mon Sep 17 00:00:00 2001 From: Andreas Platschek Date: Thu, 14 Dec 2017 12:50:51 +0100 Subject: dmaengine: fsl-edma: disable clks on all error paths Previously enabled clks are only disabled if clk_prepare_enable() fails. However, there are other error paths were the previously enabled clocks are not disabled. To fix the problem, fsl_disable_clocks() now takes the number of clocks that shall be disabled + unprepared. For existing calls were all clocks were already successfully prepared + enabled, DMAMUX_NR is passed to disable + unprepare all clocks. In error paths were only some clocks were successfully prepared + enabled the loop counter is passed, in order to disable + unprepare all successfully prepared + enabled clocks. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Andreas Platschek Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index 6775f2c74e25..c7568869284e 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -863,11 +863,11 @@ static void fsl_edma_irq_exit( } } -static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma) +static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks) { int i; - for (i = 0; i < DMAMUX_NR; i++) + for (i = 0; i < nr_clocks; i++) clk_disable_unprepare(fsl_edma->muxclk[i]); } @@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i); fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(fsl_edma->muxbase[i])) + if (IS_ERR(fsl_edma->muxbase[i])) { + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); return PTR_ERR(fsl_edma->muxbase[i]); + } sprintf(clkname, "dmamux%d", i); fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname); if (IS_ERR(fsl_edma->muxclk[i])) { dev_err(&pdev->dev, "Missing DMAMUX block clock.\n"); + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); return PTR_ERR(fsl_edma->muxclk[i]); } ret = clk_prepare_enable(fsl_edma->muxclk[i]); - if (ret) { - /* disable only clks which were enabled on error */ - for (; i >= 0; i--) - clk_disable_unprepare(fsl_edma->muxclk[i]); - - dev_err(&pdev->dev, "DMAMUX clk block failed.\n"); - return ret; - } + if (ret) + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); } @@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Can't register Freescale eDMA engine. (%d)\n", ret); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return ret; } @@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Can't register Freescale eDMA of_dma. (%d)\n", ret); dma_async_device_unregister(&fsl_edma->dma_dev); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return ret; } @@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev) fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); of_dma_controller_free(np); dma_async_device_unregister(&fsl_edma->dma_dev); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return 0; } -- cgit v1.2.3 From 6b79e77c92a3ab2417cd97dcd5ac981af967e6ad Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 7 Dec 2017 14:43:22 +0800 Subject: mmc: dt-bindings: add mmc support to MT7623 SoC Add the devicetree binding for MT7623 SoC using MT2701 as the fallback. Cc: devicetree@vger.kernel.org Signed-off-by: Sean Wang Acked-by: Rob Herring Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/mtk-sd.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt b/Documentation/devicetree/bindings/mmc/mtk-sd.txt index 72d2a734ab85..9b8017670870 100644 --- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt @@ -12,6 +12,8 @@ Required properties: "mediatek,mt8173-mmc": for mmc host ip compatible with mt8173 "mediatek,mt2701-mmc": for mmc host ip compatible with mt2701 "mediatek,mt2712-mmc": for mmc host ip compatible with mt2712 + "mediatek,mt7623-mmc", "mediatek,mt2701-mmc": for MT7623 SoC + - reg: physical base address of the controller and length - interrupts: Should contain MSDC interrupt number - clocks: Should contain phandle for the clock feeding the MMC controller -- cgit v1.2.3 From cbcaac6d7dd209f2077480f4297e131c8b90d223 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 19 Nov 2017 10:22:43 +0530 Subject: mmc: meson-gx-mmc: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Acked-by: Kevin Hilman Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index e0862d3f65b3..32a6a228cd12 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1208,7 +1208,7 @@ static int meson_mmc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (!irq) { + if (irq <= 0) { dev_err(&pdev->dev, "failed to get interrupt resource.\n"); ret = -EINVAL; goto free_host; -- cgit v1.2.3 From 928635c114adefc58aeb1a9f9615cd9d3c24e3e4 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 19 Nov 2017 10:22:44 +0530 Subject: mmc: s3cmci: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Signed-off-by: Ulf Hansson --- drivers/mmc/host/s3cmci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index f7f157a62a4a..36daee1e6588 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1658,7 +1658,7 @@ static int s3cmci_probe(struct platform_device *pdev) } host->irq = platform_get_irq(pdev, 0); - if (host->irq == 0) { + if (host->irq <= 0) { dev_err(&pdev->dev, "failed to get interrupt resource.\n"); ret = -EINVAL; goto probe_iounmap; -- cgit v1.2.3 From 1b7ba57ecc864173ef42fff7f8c2e9a880b42bd2 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 19 Nov 2017 10:22:45 +0530 Subject: mmc: sdhci-acpi: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 264f10327bf9..4065da58789d 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -680,6 +680,10 @@ static int sdhci_acpi_probe(struct platform_device *pdev) host->hw_name = "ACPI"; host->ops = &sdhci_acpi_ops_dflt; host->irq = platform_get_irq(pdev, 0); + if (host->irq <= 0) { + err = -EINVAL; + goto err_free; + } host->ioaddr = devm_ioremap_nocache(dev, iomem->start, resource_size(iomem)); -- cgit v1.2.3 From 682798a596a6f3ffe796ebbf6a9396bed6dc6de2 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 19 Nov 2017 10:22:46 +0530 Subject: mmc: sdhci-spear: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Acked-by: Viresh Kumar Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-spear.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index 8c0f88428556..14511526a3a8 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -82,6 +82,10 @@ static int sdhci_probe(struct platform_device *pdev) host->hw_name = "sdhci"; host->ops = &sdhci_pltfm_ops; host->irq = platform_get_irq(pdev, 0); + if (host->irq <= 0) { + ret = -EINVAL; + goto err_host; + } host->quirks = SDHCI_QUIRK_BROKEN_ADMA; sdhci = sdhci_priv(host); -- cgit v1.2.3 From 2408a08583d2711f43716b79cb879df66ad407dc Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 19 Nov 2017 10:22:47 +0530 Subject: mmc: sunxi-mmc: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 8fef5c17696e..bad612d6f879 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1255,6 +1255,11 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host, goto error_assert_reset; host->irq = platform_get_irq(pdev, 0); + if (host->irq <= 0) { + ret = -EINVAL; + goto error_assert_reset; + } + return devm_request_threaded_irq(&pdev->dev, host->irq, sunxi_mmc_irq, sunxi_mmc_handle_manual_stop, 0, "sunxi-mmc", host); -- cgit v1.2.3 From c7eb47f9e45226571be31212f6efd4b307d3b59d Mon Sep 17 00:00:00 2001 From: Benson Leung Date: Wed, 13 Dec 2017 11:32:15 +0100 Subject: extcon: usbc-cros-ec: add support to notify USB type cables. Extend the driver to notify host and device type cables and the presence of power. Signed-off-by: Benson Leung Signed-off-by: Enric Balletbo i Serra Reviewed-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-usbc-cros-ec.c | 142 ++++++++++++++++++++++++++++++++++- include/linux/mfd/cros_ec_commands.h | 17 +++++ 2 files changed, 155 insertions(+), 4 deletions(-) diff --git a/drivers/extcon/extcon-usbc-cros-ec.c b/drivers/extcon/extcon-usbc-cros-ec.c index 6187f731b29d..6721ab01fe7d 100644 --- a/drivers/extcon/extcon-usbc-cros-ec.c +++ b/drivers/extcon/extcon-usbc-cros-ec.c @@ -34,16 +34,26 @@ struct cros_ec_extcon_info { struct notifier_block notifier; + unsigned int dr; /* data role */ + bool pr; /* power role (true if VBUS enabled) */ bool dp; /* DisplayPort enabled */ bool mux; /* SuperSpeed (usb3) enabled */ unsigned int power_type; }; static const unsigned int usb_type_c_cable[] = { + EXTCON_USB, + EXTCON_USB_HOST, EXTCON_DISP_DP, EXTCON_NONE, }; +enum usb_data_roles { + DR_NONE, + DR_HOST, + DR_DEVICE, +}; + /** * cros_ec_pd_command() - Send a command to the EC. * @info: pointer to struct cros_ec_extcon_info @@ -150,6 +160,7 @@ static int cros_ec_usb_get_role(struct cros_ec_extcon_info *info, pd_control.port = info->port_id; pd_control.role = USB_PD_CTRL_ROLE_NO_CHANGE; pd_control.mux = USB_PD_CTRL_MUX_NO_CHANGE; + pd_control.swap = USB_PD_CTRL_SWAP_NONE; ret = cros_ec_pd_command(info, EC_CMD_USB_PD_CONTROL, 1, &pd_control, sizeof(pd_control), &resp, sizeof(resp)); @@ -183,11 +194,72 @@ static int cros_ec_pd_get_num_ports(struct cros_ec_extcon_info *info) return resp.num_ports; } +static const char *cros_ec_usb_role_string(unsigned int role) +{ + return role == DR_NONE ? "DISCONNECTED" : + (role == DR_HOST ? "DFP" : "UFP"); +} + +static const char *cros_ec_usb_power_type_string(unsigned int type) +{ + switch (type) { + case USB_CHG_TYPE_NONE: + return "USB_CHG_TYPE_NONE"; + case USB_CHG_TYPE_PD: + return "USB_CHG_TYPE_PD"; + case USB_CHG_TYPE_PROPRIETARY: + return "USB_CHG_TYPE_PROPRIETARY"; + case USB_CHG_TYPE_C: + return "USB_CHG_TYPE_C"; + case USB_CHG_TYPE_BC12_DCP: + return "USB_CHG_TYPE_BC12_DCP"; + case USB_CHG_TYPE_BC12_CDP: + return "USB_CHG_TYPE_BC12_CDP"; + case USB_CHG_TYPE_BC12_SDP: + return "USB_CHG_TYPE_BC12_SDP"; + case USB_CHG_TYPE_OTHER: + return "USB_CHG_TYPE_OTHER"; + case USB_CHG_TYPE_VBUS: + return "USB_CHG_TYPE_VBUS"; + case USB_CHG_TYPE_UNKNOWN: + return "USB_CHG_TYPE_UNKNOWN"; + default: + return "USB_CHG_TYPE_UNKNOWN"; + } +} + +static bool cros_ec_usb_power_type_is_wall_wart(unsigned int type, + unsigned int role) +{ + switch (type) { + /* FIXME : Guppy, Donnettes, and other chargers will be miscategorized + * because they identify with USB_CHG_TYPE_C, but we can't return true + * here from that code because that breaks Suzy-Q and other kinds of + * USB Type-C cables and peripherals. + */ + case USB_CHG_TYPE_PROPRIETARY: + case USB_CHG_TYPE_BC12_DCP: + return true; + case USB_CHG_TYPE_PD: + case USB_CHG_TYPE_C: + case USB_CHG_TYPE_BC12_CDP: + case USB_CHG_TYPE_BC12_SDP: + case USB_CHG_TYPE_OTHER: + case USB_CHG_TYPE_VBUS: + case USB_CHG_TYPE_UNKNOWN: + case USB_CHG_TYPE_NONE: + default: + return false; + } +} + static int extcon_cros_ec_detect_cable(struct cros_ec_extcon_info *info, bool force) { struct device *dev = info->dev; int role, power_type; + unsigned int dr = DR_NONE; + bool pr = false; bool polarity = false; bool dp = false; bool mux = false; @@ -206,9 +278,12 @@ static int extcon_cros_ec_detect_cable(struct cros_ec_extcon_info *info, dev_err(dev, "failed getting role err = %d\n", role); return role; } + dev_dbg(dev, "disconnected\n"); } else { int pd_mux_state; + dr = (role & PD_CTRL_RESP_ROLE_DATA) ? DR_HOST : DR_DEVICE; + pr = (role & PD_CTRL_RESP_ROLE_POWER); pd_mux_state = cros_ec_usb_get_pd_mux_state(info); if (pd_mux_state < 0) pd_mux_state = USB_PD_MUX_USB_ENABLED; @@ -216,20 +291,62 @@ static int extcon_cros_ec_detect_cable(struct cros_ec_extcon_info *info, dp = pd_mux_state & USB_PD_MUX_DP_ENABLED; mux = pd_mux_state & USB_PD_MUX_USB_ENABLED; hpd = pd_mux_state & USB_PD_MUX_HPD_IRQ; - } - if (force || info->dp != dp || info->mux != mux || - info->power_type != power_type) { + dev_dbg(dev, + "connected role 0x%x pwr type %d dr %d pr %d pol %d mux %d dp %d hpd %d\n", + role, power_type, dr, pr, polarity, mux, dp, hpd); + } + /* + * When there is no USB host (e.g. USB PD charger), + * we are not really a UFP for the AP. + */ + if (dr == DR_DEVICE && + cros_ec_usb_power_type_is_wall_wart(power_type, role)) + dr = DR_NONE; + + if (force || info->dr != dr || info->pr != pr || info->dp != dp || + info->mux != mux || info->power_type != power_type) { + bool host_connected = false, device_connected = false; + + dev_dbg(dev, "Type/Role switch! type = %s role = %s\n", + cros_ec_usb_power_type_string(power_type), + cros_ec_usb_role_string(dr)); + info->dr = dr; + info->pr = pr; info->dp = dp; info->mux = mux; info->power_type = power_type; - extcon_set_state(info->edev, EXTCON_DISP_DP, dp); + if (dr == DR_DEVICE) + device_connected = true; + else if (dr == DR_HOST) + host_connected = true; + extcon_set_state(info->edev, EXTCON_USB, device_connected); + extcon_set_state(info->edev, EXTCON_USB_HOST, host_connected); + extcon_set_state(info->edev, EXTCON_DISP_DP, dp); + extcon_set_property(info->edev, EXTCON_USB, + EXTCON_PROP_USB_VBUS, + (union extcon_property_value)(int)pr); + extcon_set_property(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_VBUS, + (union extcon_property_value)(int)pr); + extcon_set_property(info->edev, EXTCON_USB, + EXTCON_PROP_USB_TYPEC_POLARITY, + (union extcon_property_value)(int)polarity); + extcon_set_property(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_TYPEC_POLARITY, + (union extcon_property_value)(int)polarity); extcon_set_property(info->edev, EXTCON_DISP_DP, EXTCON_PROP_USB_TYPEC_POLARITY, (union extcon_property_value)(int)polarity); + extcon_set_property(info->edev, EXTCON_USB, + EXTCON_PROP_USB_SS, + (union extcon_property_value)(int)mux); + extcon_set_property(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_SS, + (union extcon_property_value)(int)mux); extcon_set_property(info->edev, EXTCON_DISP_DP, EXTCON_PROP_USB_SS, (union extcon_property_value)(int)mux); @@ -237,6 +354,8 @@ static int extcon_cros_ec_detect_cable(struct cros_ec_extcon_info *info, EXTCON_PROP_DISP_HPD, (union extcon_property_value)(int)hpd); + extcon_sync(info->edev, EXTCON_USB); + extcon_sync(info->edev, EXTCON_USB_HOST); extcon_sync(info->edev, EXTCON_DISP_DP); } else if (hpd) { @@ -322,13 +441,28 @@ static int extcon_cros_ec_probe(struct platform_device *pdev) return ret; } + extcon_set_property_capability(info->edev, EXTCON_USB, + EXTCON_PROP_USB_VBUS); + extcon_set_property_capability(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_VBUS); + extcon_set_property_capability(info->edev, EXTCON_USB, + EXTCON_PROP_USB_TYPEC_POLARITY); + extcon_set_property_capability(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_TYPEC_POLARITY); extcon_set_property_capability(info->edev, EXTCON_DISP_DP, EXTCON_PROP_USB_TYPEC_POLARITY); + extcon_set_property_capability(info->edev, EXTCON_USB, + EXTCON_PROP_USB_SS); + extcon_set_property_capability(info->edev, EXTCON_USB_HOST, + EXTCON_PROP_USB_SS); extcon_set_property_capability(info->edev, EXTCON_DISP_DP, EXTCON_PROP_USB_SS); extcon_set_property_capability(info->edev, EXTCON_DISP_DP, EXTCON_PROP_DISP_HPD); + info->dr = DR_NONE; + info->pr = false; + platform_set_drvdata(pdev, info); /* Get PD events from the EC */ diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 2b16e95b9bb8..a83f6498b95e 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -2904,16 +2904,33 @@ enum usb_pd_control_mux { USB_PD_CTRL_MUX_AUTO = 5, }; +enum usb_pd_control_swap { + USB_PD_CTRL_SWAP_NONE = 0, + USB_PD_CTRL_SWAP_DATA = 1, + USB_PD_CTRL_SWAP_POWER = 2, + USB_PD_CTRL_SWAP_VCONN = 3, + USB_PD_CTRL_SWAP_COUNT +}; + struct ec_params_usb_pd_control { uint8_t port; uint8_t role; uint8_t mux; + uint8_t swap; } __packed; #define PD_CTRL_RESP_ENABLED_COMMS (1 << 0) /* Communication enabled */ #define PD_CTRL_RESP_ENABLED_CONNECTED (1 << 1) /* Device connected */ #define PD_CTRL_RESP_ENABLED_PD_CAPABLE (1 << 2) /* Partner is PD capable */ +#define PD_CTRL_RESP_ROLE_POWER BIT(0) /* 0=SNK/1=SRC */ +#define PD_CTRL_RESP_ROLE_DATA BIT(1) /* 0=UFP/1=DFP */ +#define PD_CTRL_RESP_ROLE_VCONN BIT(2) /* Vconn status */ +#define PD_CTRL_RESP_ROLE_DR_POWER BIT(3) /* Partner is dualrole power */ +#define PD_CTRL_RESP_ROLE_DR_DATA BIT(4) /* Partner is dualrole data */ +#define PD_CTRL_RESP_ROLE_USB_COMM BIT(5) /* Partner USB comm capable */ +#define PD_CTRL_RESP_ROLE_EXT_POWERED BIT(6) /* Partner externally powerd */ + struct ec_response_usb_pd_control_v1 { uint8_t enabled; uint8_t role; -- cgit v1.2.3 From c813e10a6bbad9ef56bc115c64d48c5a7d0a7dd5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:36 +0900 Subject: mmc: renesas_sdhi: consolidate DMAC CONFIG options The description in the Makefile is odd. Fix the CONFIG selection in a cleaner way. Signed-off-by: Masahiro Yamada Acked-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 4 ++-- drivers/mmc/host/Makefile | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 2b02a9788bb6..d63a6ba47501 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -599,8 +599,6 @@ config MMC_SDHI depends on SUPERH || ARM || ARM64 depends on SUPERH || ARCH_RENESAS || COMPILE_TEST select MMC_TMIO_CORE - select MMC_SDHI_SYS_DMAC if (SUPERH || ARM) - select MMC_SDHI_INTERNAL_DMAC if ARM64 help This provides support for the SDHI SD/SDIO controller found in Renesas SuperH, ARM and ARM64 based SoCs @@ -608,6 +606,7 @@ config MMC_SDHI config MMC_SDHI_SYS_DMAC tristate "DMA for SDHI SD/SDIO controllers using SYS-DMAC" depends on MMC_SDHI + default MMC_SDHI if (SUPERH || ARM) help This provides DMA support for SDHI SD/SDIO controllers using SYS-DMAC via DMA Engine. This supports the controllers @@ -617,6 +616,7 @@ config MMC_SDHI_INTERNAL_DMAC tristate "DMA for SDHI SD/SDIO controllers using on-chip bus mastering" depends on ARM64 || COMPILE_TEST depends on MMC_SDHI + default MMC_SDHI if ARM64 help This provides DMA support for SDHI SD/SDIO controllers using on-chip bus mastering. This supports the controllers diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 407a011026cd..191a04010205 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -39,12 +39,8 @@ obj-$(CONFIG_MMC_SDRICOH_CS) += sdricoh_cs.o obj-$(CONFIG_MMC_TMIO) += tmio_mmc.o obj-$(CONFIG_MMC_TMIO_CORE) += tmio_mmc_core.o obj-$(CONFIG_MMC_SDHI) += renesas_sdhi_core.o -ifeq ($(subst m,y,$(CONFIG_MMC_SDHI_SYS_DMAC)),y) -obj-$(CONFIG_MMC_SDHI) += renesas_sdhi_sys_dmac.o -endif -ifeq ($(subst m,y,$(CONFIG_MMC_SDHI_INTERNAL_DMAC)),y) -obj-$(CONFIG_MMC_SDHI) += renesas_sdhi_internal_dmac.o -endif +obj-$(CONFIG_MMC_SDHI_SYS_DMAC) += renesas_sdhi_sys_dmac.o +obj-$(CONFIG_MMC_SDHI_INTERNAL_DMAC) += renesas_sdhi_internal_dmac.o obj-$(CONFIG_MMC_CB710) += cb710-mmc.o obj-$(CONFIG_MMC_VIA_SDMMC) += via-sdmmc.o obj-$(CONFIG_SDH_BFIN) += bfin_sdh.o -- cgit v1.2.3 From 08933099e6404f588f81c2050bfec7313e06eeaf Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 14 Dec 2017 16:54:45 +0100 Subject: USB: serial: option: add support for Telit ME910 PID 0x1101 This patch adds support for PID 0x1101 of Telit ME910. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3b3513874cfd..b02fb576b856 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -280,6 +280,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 #define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_ME910 0x1100 +#define TELIT_PRODUCT_ME910_DUAL_MODEM 0x1101 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 @@ -645,6 +646,11 @@ static const struct option_blacklist_info telit_me910_blacklist = { .reserved = BIT(1) | BIT(3), }; +static const struct option_blacklist_info telit_me910_dual_modem_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(3), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1244,6 +1250,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = (kernel_ulong_t)&telit_me910_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), + .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), -- cgit v1.2.3 From 92a18a657fb2e2ffbfa0659af32cc18fd2346516 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Fri, 15 Dec 2017 00:39:27 +0100 Subject: USB: serial: qcserial: add Sierra Wireless EM7565 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sierra Wireless EM7565 devices use the QCSERIAL_SWI layout for their serial ports T: Bus=01 Lev=03 Prnt=29 Port=01 Cnt=02 Dev#= 31 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1199 ProdID=9091 Rev= 0.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM7565 Qualcomm Snapdragon X16 LTE-A S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=qcserial E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=qcserial E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms but need sendsetup = true for the NMEA port to make it work properly. Simplify the patch compared to v1 as suggested by Bjørn Mork by taking advantage of the fact that existing devices work with sendsetup = true too. Use sendsetup = true for the NMEA interface of QCSERIAL_SWI and add DEVICE_SWI entries for the EM7565 PID 0x9091 and the EM7565 QDL PID 0x9090. Tests with several MC73xx/MC74xx/MC77xx devices have been performed in order to verify backward compatibility. Signed-off-by: Reinhard Speyerer Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index e3892541a489..613f91add03d 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x907a)}, /* Sierra Wireless EM74xx QDL */ {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ + {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ @@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case 2: dev_dbg(dev, "NMEA GPS interface found\n"); + sendsetup = true; break; case 3: dev_dbg(dev, "Modem port found\n"); -- cgit v1.2.3 From 967a6a07e95c58eb9c1581d22a1d9c2d1929843f Mon Sep 17 00:00:00 2001 From: Masaharu Hayakawa Date: Wed, 13 Dec 2017 11:33:00 +0900 Subject: mmc: renesas_sdhi: Add MODULE_LICENSE The following error occurs when loading renesas_sdhi_core.c module, so add MODULE_LICENSE("GPL v2"). renesas_sdhi_core: module license 'unspecified' taints kernel. Signed-off-by: Masaharu Hayakawa Fixes: 9d08428afb72 ("mmc: renesas-sdhi: make renesas_sdhi_sys_dmac main module file") Cc: # v4.13+ [Shimoda: Added Fixes tag and Cc to the stable ML] Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Acked-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fcf7235d5742..157e1d9e7725 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(renesas_sdhi_remove); + +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From f29810335965ac1f7bcb501ee2af5f039f792416 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Thu, 14 Dec 2017 03:01:52 -0500 Subject: KVM/x86: Check input paging mode when cs.l is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: WARNING: CPU: 0 PID: 27962 at arch/x86/kvm/emulate.c:5631 x86_emulate_insn+0x557/0x15f0 [kvm] Modules linked in: kvm_intel kvm [last unloaded: kvm] CPU: 0 PID: 27962 Comm: syz-executor Tainted: G B W 4.15.0-rc2-next-20171208+ #32 Hardware name: Intel Corporation S1200SP/S1200SP, BIOS S1200SP.86B.01.03.0006.040720161253 04/07/2016 RIP: 0010:x86_emulate_insn+0x557/0x15f0 [kvm] RSP: 0018:ffff8807234476d0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff88072d0237a0 RCX: ffffffffa0065c4d RDX: 1ffff100e5a046f9 RSI: 0000000000000003 RDI: ffff88072d0237c8 RBP: ffff880723447728 R08: ffff88072d020000 R09: ffffffffa008d240 R10: 0000000000000002 R11: ffffed00e7d87db3 R12: ffff88072d0237c8 R13: ffff88072d023870 R14: ffff88072d0238c2 R15: ffffffffa008d080 FS: 00007f8a68666700(0000) GS:ffff880802200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002009506c CR3: 000000071fec4005 CR4: 00000000003626f0 Call Trace: x86_emulate_instruction+0x3bc/0xb70 [kvm] ? reexecute_instruction.part.162+0x130/0x130 [kvm] vmx_handle_exit+0x46d/0x14f0 [kvm_intel] ? trace_event_raw_event_kvm_entry+0xe7/0x150 [kvm] ? handle_vmfunc+0x2f0/0x2f0 [kvm_intel] ? wait_lapic_expire+0x25/0x270 [kvm] vcpu_enter_guest+0x720/0x1ef0 [kvm] ... When CS.L is set, vcpu should run in the 64 bit paging mode. Current kvm set_sregs function doesn't have such check when userspace inputs sreg values. This will lead unexpected behavior. This patch is to add checks for CS.L, EFER.LME, EFER.LMA and CR4.PAE when get SREG inputs from userspace in order to avoid unexpected behavior. Suggested-by: Paolo Bonzini Reported-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: Jim Mattson Signed-off-by: Tianyu Lan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 56d036b9ad75..3a82f2d4333b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7494,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); +int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in + * 64-bit mode (though maybe in a 32-bit code segment). + * CR4.PAE and EFER.LMA must be set. + */ + if (!(sregs->cr4 & X86_CR4_PAE_BIT) + || !(sregs->efer & EFER_LMA)) + return -EINVAL; + } else { + /* + * Not in 64-bit mode: EFER.LMA is clear and the code + * segment cannot be 64-bit. + */ + if (sregs->efer & EFER_LMA || sregs->cs.l) + return -EINVAL; + } + + return 0; +} + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -7506,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, (sregs->cr4 & X86_CR4_OSXSAVE)) return -EINVAL; + if (kvm_valid_sregs(vcpu, sregs)) + return -EINVAL; + apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) -- cgit v1.2.3 From cef31d9af908243421258f1df35a4a644604efbe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Dec 2017 10:32:03 +0100 Subject: posix-timer: Properly check sigevent->sigev_notify timer_create() specifies via sigevent->sigev_notify the signal delivery for the new timer. The valid modes are SIGEV_NONE, SIGEV_SIGNAL, SIGEV_THREAD and (SIGEV_SIGNAL | SIGEV_THREAD_ID). The sanity check in good_sigevent() is only checking the valid combination for the SIGEV_THREAD_ID bit, i.e. SIGEV_SIGNAL, but if SIGEV_THREAD_ID is not set it accepts any random value. This has no real effects on the posix timer and signal delivery code, but it affects show_timer() which handles the output of /proc/$PID/timers. That function uses a string array to pretty print sigev_notify. The access to that array has no bound checks, so random sigev_notify cause access beyond the array bounds. Add proper checks for the valid notify modes and remove the SIGEV_THREAD_ID masking from various code pathes as SIGEV_NONE can never be set in combination with SIGEV_THREAD_ID. Reported-by: Eric Biggers Reported-by: Dmitry Vyukov Reported-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: stable@vger.kernel.org --- kernel/time/posix-timers.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 13d6881f908b..ec999f32c840 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; - if ((event->sigev_notify & SIGEV_THREAD_ID ) && - (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || - !same_thread_group(rtn, current) || - (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) + switch (event->sigev_notify) { + case SIGEV_SIGNAL | SIGEV_THREAD_ID: + rtn = find_task_by_vpid(event->sigev_notify_thread_id); + if (!rtn || !same_thread_group(rtn, current)) + return NULL; + /* FALLTHRU */ + case SIGEV_SIGNAL: + case SIGEV_THREAD: + if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) + return NULL; + /* FALLTHRU */ + case SIGEV_NONE: + return task_pid(rtn); + default: return NULL; - - if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) - return NULL; - - return task_pid(rtn); + } } static struct k_itimer * alloc_posix_timer(void) @@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) struct timespec64 ts64; bool sig_none; - sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; + sig_none = timr->it_sigev_notify == SIGEV_NONE; iv = timr->it_interval; /* interval timer ? */ @@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags, timr->it_interval = timespec64_to_ktime(new_setting->it_interval); expires = timespec64_to_ktime(new_setting->it_value); - sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; + sigev_none = timr->it_sigev_notify == SIGEV_NONE; kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); timr->it_active = !sigev_none; -- cgit v1.2.3 From 046046737bd35bed047460f080ea47e186be731e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 15 Nov 2017 10:43:16 +0100 Subject: phy: tegra: fix device-tree node lookups Fix child-node lookups during probe, which ended up searching the whole device tree depth-first starting at the parents rather than just matching on their children. To make things worse, some parent nodes could end up being being prematurely freed (by tegra_xusb_pad_register()) as of_find_node_by_name() drops a reference to its first argument. Fixes: 53d2a715c240 ("phy: Add Tegra XUSB pad controller support") Cc: stable # 4.7 Cc: Thierry Reding Signed-off-by: Johan Hovold Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/tegra/xusb.c | 58 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 4307bf0013e1..63e916d4d069 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match); static struct device_node * tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *pads, *np; + + pads = of_get_child_by_name(padctl->dev->of_node, "pads"); + if (!pads) + return NULL; - np = of_find_node_by_name(np, "pads"); - if (np) - np = of_find_node_by_name(np, name); + np = of_get_child_by_name(pads, name); + of_node_put(pads); return np; } @@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name) static struct device_node * tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(pad->dev.of_node); + struct device_node *np, *lanes; - np = of_find_node_by_name(np, "lanes"); - if (!np) + lanes = of_get_child_by_name(pad->dev.of_node, "lanes"); + if (!lanes) return NULL; - return of_find_node_by_name(np, pad->soc->lanes[index].name); + np = of_get_child_by_name(lanes, pad->soc->lanes[index].name); + of_node_put(lanes); + + return np; } static int @@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad, unsigned int i; int err; - children = of_find_node_by_name(pad->dev.of_node, "lanes"); + children = of_get_child_by_name(pad->dev.of_node, "lanes"); if (!children) return -ENODEV; @@ -444,21 +444,21 @@ static struct device_node * tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, unsigned int index) { - /* - * of_find_node_by_name() drops a reference, so make sure to grab one. - */ - struct device_node *np = of_node_get(padctl->dev->of_node); + struct device_node *ports, *np; + char *name; - np = of_find_node_by_name(np, "ports"); - if (np) { - char *name; + ports = of_get_child_by_name(padctl->dev->of_node, "ports"); + if (!ports) + return NULL; - name = kasprintf(GFP_KERNEL, "%s-%u", type, index); - if (!name) - return ERR_PTR(-ENOMEM); - np = of_find_node_by_name(np, name); - kfree(name); + name = kasprintf(GFP_KERNEL, "%s-%u", type, index); + if (!name) { + of_node_put(ports); + return ERR_PTR(-ENOMEM); } + np = of_get_child_by_name(ports, name); + kfree(name); + of_node_put(ports); return np; } @@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl) static int tegra_xusb_padctl_probe(struct platform_device *pdev) { - struct device_node *np = of_node_get(pdev->dev.of_node); + struct device_node *np = pdev->dev.of_node; const struct tegra_xusb_padctl_soc *soc; struct tegra_xusb_padctl *padctl; const struct of_device_id *match; @@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev) int err; /* for backwards compatibility with old device trees */ - np = of_find_node_by_name(np, "pads"); + np = of_get_child_by_name(np, "pads"); if (!np) { dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n"); return tegra_xusb_padctl_legacy_probe(pdev); -- cgit v1.2.3 From e796cc6a3a9186c92092e2f5929cf8f65b56cf01 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 17 Nov 2017 16:55:35 +0530 Subject: phy: cpcap-usb: Fix platform_get_irq_byname's error checking. The platform_get_irq_byname() function returns negative if an error occurs. zero or positive number on success. platform_get_irq_byname() error checking for zero is not correct. Fixes: 6d6ce40f63af ("phy: cpcap-usb: Add CPCAP PMIC USB support") Signed-off-by: Arvind Yadav Reviewed-by: Sebastian Reichel Acked-by: Tony Lindgren Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/motorola/phy-cpcap-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index accaaaccb662..6601ad0dfb3a 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev, int irq, error; irq = platform_get_irq_byname(pdev, name); - if (!irq) + if (irq < 0) return -ENODEV; error = devm_request_threaded_irq(ddata->dev, irq, NULL, -- cgit v1.2.3 From 3cb0ab6e008f2a9ffe2d1be4246984003caed7e2 Mon Sep 17 00:00:00 2001 From: Chris Zhong Date: Thu, 8 Sep 2016 10:38:11 -0700 Subject: phy: rockchip-typec: add pm_runtime_disable in err case Add pm_runtime_disable in err case to make the pm_runtime_enable/disable is invoked balanced. Signed-off-by: Chris Zhong Reviewed-by: Brian Norris Reviewed-by: Douglas Anderson Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/rockchip/phy-rockchip-typec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c index ee85fa0ca4b0..7492c8978217 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c @@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) if (IS_ERR(phy)) { dev_err(dev, "failed to create phy: %s\n", child_np->name); + pm_runtime_disable(dev); return PTR_ERR(phy); } @@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev) phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); if (IS_ERR(phy_provider)) { dev_err(dev, "Failed to register phy provider\n"); + pm_runtime_disable(dev); return PTR_ERR(phy_provider); } -- cgit v1.2.3 From 2b88212c4cc67ff33dec5bb4d690044b97a5f979 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 12:56:36 +0100 Subject: phy: rcar-gen3-usb2: select USB_COMMON When USB is disabled, we get a link error for this driver because of the added OTG support drivers/phy/renesas/phy-rcar-gen3-usb2.o: In function `rcar_gen3_phy_usb2_probe': phy-rcar-gen3-usb2.c:(.text+0x250): undefined reference to `of_usb_get_dr_mode_by_phy' Other phy drivers select USB_COMMON for this, so let's do the same here. Fixes: 7e0540f41332 ("phy: rcar-gen3-usb2: check dr_mode for otg mode") Signed-off-by: Arnd Bergmann Acked-by: Yoshihiro Shimoda Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/renesas/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig index cb09245e9b4c..c845facacb06 100644 --- a/drivers/phy/renesas/Kconfig +++ b/drivers/phy/renesas/Kconfig @@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2 tristate "Renesas R-Car generation 3 USB 2.0 PHY driver" depends on ARCH_RENESAS depends on EXTCON + depends on USB_SUPPORT select GENERIC_PHY + select USB_COMMON help Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs. -- cgit v1.2.3 From ea01a31b90581a94cdeef7fda9e4522f15ef64f2 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Mon, 20 Nov 2017 17:15:25 +0100 Subject: cros_ec: Split cros_ec_devs module This patch splits the cros_ec_devs module in two parts with a cros_ec_dev module responsible for handling MFD devices registration and a cros_ec_ctl module responsible for handling the various user-space interfaces. For consistency purpose, the driver name for the cros_ec_dev module is now cros-ec-dev instead of cros-ec-ctl. In the next commit, the new cros_ec_dev module will be moved to the MFD subtree so mfd_add_devices() calls are not done from outside MFD. Signed-off-by: Thierry Escande Reviewed-by: Gwendal Grignou Tested-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/cros_ec.c | 4 ++-- drivers/platform/chrome/Kconfig | 4 ++++ drivers/platform/chrome/Makefile | 8 ++++---- drivers/platform/chrome/cros_ec_debugfs.c | 2 ++ drivers/platform/chrome/cros_ec_dev.c | 7 +++++-- drivers/platform/chrome/cros_ec_lightbar.c | 4 ++++ drivers/platform/chrome/cros_ec_sysfs.c | 3 +++ drivers/platform/chrome/cros_ec_vbc.c | 1 + 8 files changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c index b0ca5a4c841e..d61024141e2b 100644 --- a/drivers/mfd/cros_ec.c +++ b/drivers/mfd/cros_ec.c @@ -40,13 +40,13 @@ static struct cros_ec_platform pd_p = { }; static const struct mfd_cell ec_cell = { - .name = "cros-ec-ctl", + .name = "cros-ec-dev", .platform_data = &ec_p, .pdata_size = sizeof(ec_p), }; static const struct mfd_cell ec_pd_cell = { - .name = "cros-ec-ctl", + .name = "cros-ec-dev", .platform_data = &pd_p, .pdata_size = sizeof(pd_p), }; diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index 0ad6e290bbda..bffc892c8bf1 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -41,12 +41,16 @@ config CHROMEOS_PSTORE config CROS_EC_CHARDEV tristate "Chrome OS Embedded Controller userspace device interface" depends on MFD_CROS_EC + select CROS_EC_CTL ---help--- This driver adds support to talk with the ChromeOS EC from userspace. If you have a supported Chromebook, choose Y or M here. The module will be called cros_ec_dev. +config CROS_EC_CTL + tristate + config CROS_EC_LPC tristate "ChromeOS Embedded Controller (LPC)" depends on MFD_CROS_EC && ACPI && (X86 || COMPILE_TEST) diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index a077b1f0211d..bc239ec98fd7 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -2,10 +2,10 @@ obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o -cros_ec_devs-objs := cros_ec_dev.o cros_ec_sysfs.o \ - cros_ec_lightbar.o cros_ec_vbc.o \ - cros_ec_debugfs.o -obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_devs.o +cros_ec_ctl-objs := cros_ec_sysfs.o cros_ec_lightbar.o \ + cros_ec_vbc.o cros_ec_debugfs.o +obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o +obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_dev.o cros_ec_lpcs-objs := cros_ec_lpc.o cros_ec_lpc_reg.o cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC) += cros_ec_lpc_mec.o obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 4cc66f405760..d0b8ce0d678e 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -390,6 +390,7 @@ remove_debugfs: debugfs_remove_recursive(debug_info->dir); return ret; } +EXPORT_SYMBOL(cros_ec_debugfs_init); void cros_ec_debugfs_remove(struct cros_ec_dev *ec) { @@ -399,3 +400,4 @@ void cros_ec_debugfs_remove(struct cros_ec_dev *ec) debugfs_remove_recursive(ec->debug_info->dir); cros_ec_cleanup_console_log(ec->debug_info); } +EXPORT_SYMBOL(cros_ec_debugfs_remove); diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c index cf6c4f0846b8..daf0ffd367a2 100644 --- a/drivers/platform/chrome/cros_ec_dev.c +++ b/drivers/platform/chrome/cros_ec_dev.c @@ -28,6 +28,8 @@ #include "cros_ec_debugfs.h" #include "cros_ec_dev.h" +#define DRV_NAME "cros-ec-dev" + /* Device variables */ #define CROS_MAX_DEV 128 static int ec_major; @@ -461,7 +463,7 @@ static int ec_device_remove(struct platform_device *pdev) } static const struct platform_device_id cros_ec_id[] = { - { "cros-ec-ctl", 0 }, + { DRV_NAME, 0 }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(platform, cros_ec_id); @@ -493,7 +495,7 @@ static const struct dev_pm_ops cros_ec_dev_pm_ops = { static struct platform_driver cros_ec_dev_driver = { .driver = { - .name = "cros-ec-ctl", + .name = DRV_NAME, .pm = &cros_ec_dev_pm_ops, }, .probe = ec_device_probe, @@ -544,6 +546,7 @@ static void __exit cros_ec_dev_exit(void) module_init(cros_ec_dev_init); module_exit(cros_ec_dev_exit); +MODULE_ALIAS("platform:" DRV_NAME); MODULE_AUTHOR("Bill Richardson "); MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller"); MODULE_VERSION("1.0"); diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index fd2b047a2748..925d91c5868e 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -414,6 +414,7 @@ error: return ret; } +EXPORT_SYMBOL(lb_manual_suspend_ctrl); int lb_suspend(struct cros_ec_dev *ec) { @@ -422,6 +423,7 @@ int lb_suspend(struct cros_ec_dev *ec) return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND); } +EXPORT_SYMBOL(lb_suspend); int lb_resume(struct cros_ec_dev *ec) { @@ -430,6 +432,7 @@ int lb_resume(struct cros_ec_dev *ec) return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME); } +EXPORT_SYMBOL(lb_resume); static ssize_t sequence_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -622,3 +625,4 @@ struct attribute_group cros_ec_lightbar_attr_group = { .attrs = __lb_cmds_attrs, .is_visible = cros_ec_lightbar_attrs_are_visible, }; +EXPORT_SYMBOL(cros_ec_lightbar_attr_group); diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c index f3baf9973989..201f11afcdc9 100644 --- a/drivers/platform/chrome/cros_ec_sysfs.c +++ b/drivers/platform/chrome/cros_ec_sysfs.c @@ -294,4 +294,7 @@ static struct attribute *__ec_attrs[] = { struct attribute_group cros_ec_attr_group = { .attrs = __ec_attrs, }; +EXPORT_SYMBOL(cros_ec_attr_group); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ChromeOS EC control driver"); diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c index 564a0d08c8bf..6d38e6b08334 100644 --- a/drivers/platform/chrome/cros_ec_vbc.c +++ b/drivers/platform/chrome/cros_ec_vbc.c @@ -135,3 +135,4 @@ struct attribute_group cros_ec_vbc_attr_group = { .bin_attrs = cros_ec_vbc_bin_attrs, .is_bin_visible = cros_ec_vbc_is_visible, }; +EXPORT_SYMBOL(cros_ec_vbc_attr_group); -- cgit v1.2.3 From 5e0115581bbc367c7958bf5ab8c511b808558533 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Mon, 20 Nov 2017 17:15:26 +0100 Subject: cros_ec: Move cros_ec_dev module to drivers/mfd The cros_ec_dev module is responsible for registering the MFD devices attached to the ChromeOS EC. This patch moves this module to drivers/mfd so calls to mfd_add_devices() are not done from outside the MFD subtree anymore. Signed-off-by: Thierry Escande Reviewed-by: Gwendal Grignou Tested-by: Guenter Roeck Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 10 + drivers/mfd/Makefile | 1 + drivers/mfd/cros_ec_dev.c | 552 ++++++++++++++++++++++++++++ drivers/mfd/cros_ec_dev.h | 52 +++ drivers/platform/chrome/Kconfig | 10 - drivers/platform/chrome/Makefile | 1 - drivers/platform/chrome/cros_ec_debugfs.c | 3 - drivers/platform/chrome/cros_ec_debugfs.h | 27 -- drivers/platform/chrome/cros_ec_dev.c | 553 ----------------------------- drivers/platform/chrome/cros_ec_dev.h | 52 --- drivers/platform/chrome/cros_ec_lightbar.c | 2 - drivers/platform/chrome/cros_ec_sysfs.c | 2 - include/linux/mfd/cros_ec.h | 4 + 13 files changed, 619 insertions(+), 650 deletions(-) create mode 100644 drivers/mfd/cros_ec_dev.c create mode 100644 drivers/mfd/cros_ec_dev.h delete mode 100644 drivers/platform/chrome/cros_ec_debugfs.h delete mode 100644 drivers/platform/chrome/cros_ec_dev.c delete mode 100644 drivers/platform/chrome/cros_ec_dev.h diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 1d20a800e967..538a2ae8bd25 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -222,6 +222,16 @@ config MFD_CROS_EC_SPI response time cannot be guaranteed, we support ignoring 'pre-amble' bytes before the response actually starts. +config MFD_CROS_EC_CHARDEV + tristate "Chrome OS Embedded Controller userspace device interface" + depends on MFD_CROS_EC + select CROS_EC_CTL + ---help--- + This driver adds support to talk with the ChromeOS EC from userspace. + + If you have a supported Chromebook, choose Y or M here. + The module will be called cros_ec_dev. + config MFD_ASIC3 bool "Compaq ASIC3" depends on GPIOLIB && ARM diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index d9474ade32e6..fcd8af88110e 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -17,6 +17,7 @@ cros_ec_core-$(CONFIG_ACPI) += cros_ec_acpi_gpe.o obj-$(CONFIG_MFD_CROS_EC) += cros_ec_core.o obj-$(CONFIG_MFD_CROS_EC_I2C) += cros_ec_i2c.o obj-$(CONFIG_MFD_CROS_EC_SPI) += cros_ec_spi.o +obj-$(CONFIG_MFD_CROS_EC_CHARDEV) += cros_ec_dev.o obj-$(CONFIG_MFD_EXYNOS_LPASS) += exynos-lpass.o rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c new file mode 100644 index 000000000000..e4fafdd96e5e --- /dev/null +++ b/drivers/mfd/cros_ec_dev.c @@ -0,0 +1,552 @@ +/* + * cros_ec_dev - expose the Chrome OS Embedded Controller to user-space + * + * Copyright (C) 2014 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cros_ec_dev.h" + +#define DRV_NAME "cros-ec-dev" + +/* Device variables */ +#define CROS_MAX_DEV 128 +static int ec_major; + +static const struct attribute_group *cros_ec_groups[] = { + &cros_ec_attr_group, + &cros_ec_lightbar_attr_group, + &cros_ec_vbc_attr_group, + NULL, +}; + +static struct class cros_class = { + .owner = THIS_MODULE, + .name = "chromeos", + .dev_groups = cros_ec_groups, +}; + +/* Basic communication */ +static int ec_get_version(struct cros_ec_dev *ec, char *str, int maxlen) +{ + struct ec_response_get_version *resp; + static const char * const current_image_name[] = { + "unknown", "read-only", "read-write", "invalid", + }; + struct cros_ec_command *msg; + int ret; + + msg = kmalloc(sizeof(*msg) + sizeof(*resp), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + msg->version = 0; + msg->command = EC_CMD_GET_VERSION + ec->cmd_offset; + msg->insize = sizeof(*resp); + msg->outsize = 0; + + ret = cros_ec_cmd_xfer(ec->ec_dev, msg); + if (ret < 0) + goto exit; + + if (msg->result != EC_RES_SUCCESS) { + snprintf(str, maxlen, + "%s\nUnknown EC version: EC returned %d\n", + CROS_EC_DEV_VERSION, msg->result); + ret = -EINVAL; + goto exit; + } + + resp = (struct ec_response_get_version *)msg->data; + if (resp->current_image >= ARRAY_SIZE(current_image_name)) + resp->current_image = 3; /* invalid */ + + snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION, + resp->version_string_ro, resp->version_string_rw, + current_image_name[resp->current_image]); + + ret = 0; +exit: + kfree(msg); + return ret; +} + +static int cros_ec_check_features(struct cros_ec_dev *ec, int feature) +{ + struct cros_ec_command *msg; + int ret; + + if (ec->features[0] == -1U && ec->features[1] == -1U) { + /* features bitmap not read yet */ + + msg = kmalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + msg->version = 0; + msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset; + msg->insize = sizeof(ec->features); + msg->outsize = 0; + + ret = cros_ec_cmd_xfer(ec->ec_dev, msg); + if (ret < 0 || msg->result != EC_RES_SUCCESS) { + dev_warn(ec->dev, "cannot get EC features: %d/%d\n", + ret, msg->result); + memset(ec->features, 0, sizeof(ec->features)); + } + + memcpy(ec->features, msg->data, sizeof(ec->features)); + + dev_dbg(ec->dev, "EC features %08x %08x\n", + ec->features[0], ec->features[1]); + + kfree(msg); + } + + return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature); +} + +/* Device file ops */ +static int ec_device_open(struct inode *inode, struct file *filp) +{ + struct cros_ec_dev *ec = container_of(inode->i_cdev, + struct cros_ec_dev, cdev); + filp->private_data = ec; + nonseekable_open(inode, filp); + return 0; +} + +static int ec_device_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static ssize_t ec_device_read(struct file *filp, char __user *buffer, + size_t length, loff_t *offset) +{ + struct cros_ec_dev *ec = filp->private_data; + char msg[sizeof(struct ec_response_get_version) + + sizeof(CROS_EC_DEV_VERSION)]; + size_t count; + int ret; + + if (*offset != 0) + return 0; + + ret = ec_get_version(ec, msg, sizeof(msg)); + if (ret) + return ret; + + count = min(length, strlen(msg)); + + if (copy_to_user(buffer, msg, count)) + return -EFAULT; + + *offset = count; + return count; +} + +/* Ioctls */ +static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) +{ + long ret; + struct cros_ec_command u_cmd; + struct cros_ec_command *s_cmd; + + if (copy_from_user(&u_cmd, arg, sizeof(u_cmd))) + return -EFAULT; + + if ((u_cmd.outsize > EC_MAX_MSG_BYTES) || + (u_cmd.insize > EC_MAX_MSG_BYTES)) + return -EINVAL; + + s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), + GFP_KERNEL); + if (!s_cmd) + return -ENOMEM; + + if (copy_from_user(s_cmd, arg, sizeof(*s_cmd) + u_cmd.outsize)) { + ret = -EFAULT; + goto exit; + } + + if (u_cmd.outsize != s_cmd->outsize || + u_cmd.insize != s_cmd->insize) { + ret = -EINVAL; + goto exit; + } + + s_cmd->command += ec->cmd_offset; + ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); + /* Only copy data to userland if data was received. */ + if (ret < 0) + goto exit; + + if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize)) + ret = -EFAULT; +exit: + kfree(s_cmd); + return ret; +} + +static long ec_device_ioctl_readmem(struct cros_ec_dev *ec, void __user *arg) +{ + struct cros_ec_device *ec_dev = ec->ec_dev; + struct cros_ec_readmem s_mem = { }; + long num; + + /* Not every platform supports direct reads */ + if (!ec_dev->cmd_readmem) + return -ENOTTY; + + if (copy_from_user(&s_mem, arg, sizeof(s_mem))) + return -EFAULT; + + num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes, + s_mem.buffer); + if (num <= 0) + return num; + + if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem))) + return -EFAULT; + + return 0; +} + +static long ec_device_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct cros_ec_dev *ec = filp->private_data; + + if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC) + return -ENOTTY; + + switch (cmd) { + case CROS_EC_DEV_IOCXCMD: + return ec_device_ioctl_xcmd(ec, (void __user *)arg); + case CROS_EC_DEV_IOCRDMEM: + return ec_device_ioctl_readmem(ec, (void __user *)arg); + } + + return -ENOTTY; +} + +/* Module initialization */ +static const struct file_operations fops = { + .open = ec_device_open, + .release = ec_device_release, + .read = ec_device_read, + .unlocked_ioctl = ec_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ec_device_ioctl, +#endif +}; + +static void __remove(struct device *dev) +{ + struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev, + class_dev); + kfree(ec); +} + +static void cros_ec_sensors_register(struct cros_ec_dev *ec) +{ + /* + * Issue a command to get the number of sensor reported. + * Build an array of sensors driver and register them all. + */ + int ret, i, id, sensor_num; + struct mfd_cell *sensor_cells; + struct cros_ec_sensor_platform *sensor_platforms; + int sensor_type[MOTIONSENSE_TYPE_MAX]; + struct ec_params_motion_sense *params; + struct ec_response_motion_sense *resp; + struct cros_ec_command *msg; + + msg = kzalloc(sizeof(struct cros_ec_command) + + max(sizeof(*params), sizeof(*resp)), GFP_KERNEL); + if (msg == NULL) + return; + + msg->version = 2; + msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset; + msg->outsize = sizeof(*params); + msg->insize = sizeof(*resp); + + params = (struct ec_params_motion_sense *)msg->data; + params->cmd = MOTIONSENSE_CMD_DUMP; + + ret = cros_ec_cmd_xfer(ec->ec_dev, msg); + if (ret < 0 || msg->result != EC_RES_SUCCESS) { + dev_warn(ec->dev, "cannot get EC sensor information: %d/%d\n", + ret, msg->result); + goto error; + } + + resp = (struct ec_response_motion_sense *)msg->data; + sensor_num = resp->dump.sensor_count; + /* Allocate 2 extra sensors in case lid angle or FIFO are needed */ + sensor_cells = kzalloc(sizeof(struct mfd_cell) * (sensor_num + 2), + GFP_KERNEL); + if (sensor_cells == NULL) + goto error; + + sensor_platforms = kzalloc(sizeof(struct cros_ec_sensor_platform) * + (sensor_num + 1), GFP_KERNEL); + if (sensor_platforms == NULL) + goto error_platforms; + + memset(sensor_type, 0, sizeof(sensor_type)); + id = 0; + for (i = 0; i < sensor_num; i++) { + params->cmd = MOTIONSENSE_CMD_INFO; + params->info.sensor_num = i; + ret = cros_ec_cmd_xfer(ec->ec_dev, msg); + if (ret < 0 || msg->result != EC_RES_SUCCESS) { + dev_warn(ec->dev, "no info for EC sensor %d : %d/%d\n", + i, ret, msg->result); + continue; + } + switch (resp->info.type) { + case MOTIONSENSE_TYPE_ACCEL: + sensor_cells[id].name = "cros-ec-accel"; + break; + case MOTIONSENSE_TYPE_BARO: + sensor_cells[id].name = "cros-ec-baro"; + break; + case MOTIONSENSE_TYPE_GYRO: + sensor_cells[id].name = "cros-ec-gyro"; + break; + case MOTIONSENSE_TYPE_MAG: + sensor_cells[id].name = "cros-ec-mag"; + break; + case MOTIONSENSE_TYPE_PROX: + sensor_cells[id].name = "cros-ec-prox"; + break; + case MOTIONSENSE_TYPE_LIGHT: + sensor_cells[id].name = "cros-ec-light"; + break; + case MOTIONSENSE_TYPE_ACTIVITY: + sensor_cells[id].name = "cros-ec-activity"; + break; + default: + dev_warn(ec->dev, "unknown type %d\n", resp->info.type); + continue; + } + sensor_platforms[id].sensor_num = i; + sensor_cells[id].id = sensor_type[resp->info.type]; + sensor_cells[id].platform_data = &sensor_platforms[id]; + sensor_cells[id].pdata_size = + sizeof(struct cros_ec_sensor_platform); + + sensor_type[resp->info.type]++; + id++; + } + if (sensor_type[MOTIONSENSE_TYPE_ACCEL] >= 2) { + sensor_platforms[id].sensor_num = sensor_num; + + sensor_cells[id].name = "cros-ec-angle"; + sensor_cells[id].id = 0; + sensor_cells[id].platform_data = &sensor_platforms[id]; + sensor_cells[id].pdata_size = + sizeof(struct cros_ec_sensor_platform); + id++; + } + if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) { + sensor_cells[id].name = "cros-ec-ring"; + id++; + } + + ret = mfd_add_devices(ec->dev, 0, sensor_cells, id, + NULL, 0, NULL); + if (ret) + dev_err(ec->dev, "failed to add EC sensors\n"); + + kfree(sensor_platforms); +error_platforms: + kfree(sensor_cells); +error: + kfree(msg); +} + +static int ec_device_probe(struct platform_device *pdev) +{ + int retval = -ENOMEM; + struct device *dev = &pdev->dev; + struct cros_ec_platform *ec_platform = dev_get_platdata(dev); + struct cros_ec_dev *ec = kzalloc(sizeof(*ec), GFP_KERNEL); + + if (!ec) + return retval; + + dev_set_drvdata(dev, ec); + ec->ec_dev = dev_get_drvdata(dev->parent); + ec->dev = dev; + ec->cmd_offset = ec_platform->cmd_offset; + ec->features[0] = -1U; /* Not cached yet */ + ec->features[1] = -1U; /* Not cached yet */ + device_initialize(&ec->class_dev); + cdev_init(&ec->cdev, &fops); + + /* + * Add the class device + * Link to the character device for creating the /dev entry + * in devtmpfs. + */ + ec->class_dev.devt = MKDEV(ec_major, pdev->id); + ec->class_dev.class = &cros_class; + ec->class_dev.parent = dev; + ec->class_dev.release = __remove; + + retval = dev_set_name(&ec->class_dev, "%s", ec_platform->ec_name); + if (retval) { + dev_err(dev, "dev_set_name failed => %d\n", retval); + goto failed; + } + + retval = cdev_device_add(&ec->cdev, &ec->class_dev); + if (retval) { + dev_err(dev, "cdev_device_add failed => %d\n", retval); + goto failed; + } + + if (cros_ec_debugfs_init(ec)) + dev_warn(dev, "failed to create debugfs directory\n"); + + /* check whether this EC is a sensor hub. */ + if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE)) + cros_ec_sensors_register(ec); + + /* Take control of the lightbar from the EC. */ + lb_manual_suspend_ctrl(ec, 1); + + return 0; + +failed: + put_device(&ec->class_dev); + return retval; +} + +static int ec_device_remove(struct platform_device *pdev) +{ + struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); + + /* Let the EC take over the lightbar again. */ + lb_manual_suspend_ctrl(ec, 0); + + cros_ec_debugfs_remove(ec); + + cdev_del(&ec->cdev); + device_unregister(&ec->class_dev); + return 0; +} + +static const struct platform_device_id cros_ec_id[] = { + { DRV_NAME, 0 }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, cros_ec_id); + +static __maybe_unused int ec_device_suspend(struct device *dev) +{ + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + lb_suspend(ec); + + return 0; +} + +static __maybe_unused int ec_device_resume(struct device *dev) +{ + struct cros_ec_dev *ec = dev_get_drvdata(dev); + + lb_resume(ec); + + return 0; +} + +static const struct dev_pm_ops cros_ec_dev_pm_ops = { +#ifdef CONFIG_PM_SLEEP + .suspend = ec_device_suspend, + .resume = ec_device_resume, +#endif +}; + +static struct platform_driver cros_ec_dev_driver = { + .driver = { + .name = DRV_NAME, + .pm = &cros_ec_dev_pm_ops, + }, + .probe = ec_device_probe, + .remove = ec_device_remove, +}; + +static int __init cros_ec_dev_init(void) +{ + int ret; + dev_t dev = 0; + + ret = class_register(&cros_class); + if (ret) { + pr_err(CROS_EC_DEV_NAME ": failed to register device class\n"); + return ret; + } + + /* Get a range of minor numbers (starting with 0) to work with */ + ret = alloc_chrdev_region(&dev, 0, CROS_MAX_DEV, CROS_EC_DEV_NAME); + if (ret < 0) { + pr_err(CROS_EC_DEV_NAME ": alloc_chrdev_region() failed\n"); + goto failed_chrdevreg; + } + ec_major = MAJOR(dev); + + /* Register the driver */ + ret = platform_driver_register(&cros_ec_dev_driver); + if (ret < 0) { + pr_warn(CROS_EC_DEV_NAME ": can't register driver: %d\n", ret); + goto failed_devreg; + } + return 0; + +failed_devreg: + unregister_chrdev_region(MKDEV(ec_major, 0), CROS_MAX_DEV); +failed_chrdevreg: + class_unregister(&cros_class); + return ret; +} + +static void __exit cros_ec_dev_exit(void) +{ + platform_driver_unregister(&cros_ec_dev_driver); + unregister_chrdev(ec_major, CROS_EC_DEV_NAME); + class_unregister(&cros_class); +} + +module_init(cros_ec_dev_init); +module_exit(cros_ec_dev_exit); + +MODULE_ALIAS("platform:" DRV_NAME); +MODULE_AUTHOR("Bill Richardson "); +MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller"); +MODULE_VERSION("1.0"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h new file mode 100644 index 000000000000..45e9453608c5 --- /dev/null +++ b/drivers/mfd/cros_ec_dev.h @@ -0,0 +1,52 @@ +/* + * cros_ec_dev - expose the Chrome OS Embedded Controller to userspace + * + * Copyright (C) 2014 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _CROS_EC_DEV_H_ +#define _CROS_EC_DEV_H_ + +#include +#include +#include + +#define CROS_EC_DEV_VERSION "1.0.0" + +/* + * @offset: within EC_LPC_ADDR_MEMMAP region + * @bytes: number of bytes to read. zero means "read a string" (including '\0') + * (at most only EC_MEMMAP_SIZE bytes can be read) + * @buffer: where to store the result + * ioctl returns the number of bytes read, negative on error + */ +struct cros_ec_readmem { + uint32_t offset; + uint32_t bytes; + uint8_t buffer[EC_MEMMAP_SIZE]; +}; + +#define CROS_EC_DEV_IOC 0xEC +#define CROS_EC_DEV_IOCXCMD _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command) +#define CROS_EC_DEV_IOCRDMEM _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem) + +/* Lightbar utilities */ +extern bool ec_has_lightbar(struct cros_ec_dev *ec); +extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable); +extern int lb_suspend(struct cros_ec_dev *ec); +extern int lb_resume(struct cros_ec_dev *ec); + +#endif /* _CROS_EC_DEV_H_ */ diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig index bffc892c8bf1..e728a96cabfd 100644 --- a/drivers/platform/chrome/Kconfig +++ b/drivers/platform/chrome/Kconfig @@ -38,16 +38,6 @@ config CHROMEOS_PSTORE If you have a supported Chromebook, choose Y or M here. The module will be called chromeos_pstore. -config CROS_EC_CHARDEV - tristate "Chrome OS Embedded Controller userspace device interface" - depends on MFD_CROS_EC - select CROS_EC_CTL - ---help--- - This driver adds support to talk with the ChromeOS EC from userspace. - - If you have a supported Chromebook, choose Y or M here. - The module will be called cros_ec_dev. - config CROS_EC_CTL tristate diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile index bc239ec98fd7..ff3b369911f0 100644 --- a/drivers/platform/chrome/Makefile +++ b/drivers/platform/chrome/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o cros_ec_ctl-objs := cros_ec_sysfs.o cros_ec_lightbar.o \ cros_ec_vbc.o cros_ec_debugfs.o obj-$(CONFIG_CROS_EC_CTL) += cros_ec_ctl.o -obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_dev.o cros_ec_lpcs-objs := cros_ec_lpc.o cros_ec_lpc_reg.o cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC) += cros_ec_lpc_mec.o obj-$(CONFIG_CROS_EC_LPC) += cros_ec_lpcs.o diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index d0b8ce0d678e..98a35d32f9dd 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -29,9 +29,6 @@ #include #include -#include "cros_ec_dev.h" -#include "cros_ec_debugfs.h" - #define LOG_SHIFT 14 #define LOG_SIZE (1 << LOG_SHIFT) #define LOG_POLL_SEC 10 diff --git a/drivers/platform/chrome/cros_ec_debugfs.h b/drivers/platform/chrome/cros_ec_debugfs.h deleted file mode 100644 index 1ff3a50aa1b8..000000000000 --- a/drivers/platform/chrome/cros_ec_debugfs.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2015 Google, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _DRV_CROS_EC_DEBUGFS_H_ -#define _DRV_CROS_EC_DEBUGFS_H_ - -#include "cros_ec_dev.h" - -/* debugfs stuff */ -int cros_ec_debugfs_init(struct cros_ec_dev *ec); -void cros_ec_debugfs_remove(struct cros_ec_dev *ec); - -#endif /* _DRV_CROS_EC_DEBUGFS_H_ */ diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c deleted file mode 100644 index daf0ffd367a2..000000000000 --- a/drivers/platform/chrome/cros_ec_dev.c +++ /dev/null @@ -1,553 +0,0 @@ -/* - * cros_ec_dev - expose the Chrome OS Embedded Controller to user-space - * - * Copyright (C) 2014 Google, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "cros_ec_debugfs.h" -#include "cros_ec_dev.h" - -#define DRV_NAME "cros-ec-dev" - -/* Device variables */ -#define CROS_MAX_DEV 128 -static int ec_major; - -static const struct attribute_group *cros_ec_groups[] = { - &cros_ec_attr_group, - &cros_ec_lightbar_attr_group, - &cros_ec_vbc_attr_group, - NULL, -}; - -static struct class cros_class = { - .owner = THIS_MODULE, - .name = "chromeos", - .dev_groups = cros_ec_groups, -}; - -/* Basic communication */ -static int ec_get_version(struct cros_ec_dev *ec, char *str, int maxlen) -{ - struct ec_response_get_version *resp; - static const char * const current_image_name[] = { - "unknown", "read-only", "read-write", "invalid", - }; - struct cros_ec_command *msg; - int ret; - - msg = kmalloc(sizeof(*msg) + sizeof(*resp), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - msg->version = 0; - msg->command = EC_CMD_GET_VERSION + ec->cmd_offset; - msg->insize = sizeof(*resp); - msg->outsize = 0; - - ret = cros_ec_cmd_xfer(ec->ec_dev, msg); - if (ret < 0) - goto exit; - - if (msg->result != EC_RES_SUCCESS) { - snprintf(str, maxlen, - "%s\nUnknown EC version: EC returned %d\n", - CROS_EC_DEV_VERSION, msg->result); - ret = -EINVAL; - goto exit; - } - - resp = (struct ec_response_get_version *)msg->data; - if (resp->current_image >= ARRAY_SIZE(current_image_name)) - resp->current_image = 3; /* invalid */ - - snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION, - resp->version_string_ro, resp->version_string_rw, - current_image_name[resp->current_image]); - - ret = 0; -exit: - kfree(msg); - return ret; -} - -static int cros_ec_check_features(struct cros_ec_dev *ec, int feature) -{ - struct cros_ec_command *msg; - int ret; - - if (ec->features[0] == -1U && ec->features[1] == -1U) { - /* features bitmap not read yet */ - - msg = kmalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - msg->version = 0; - msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset; - msg->insize = sizeof(ec->features); - msg->outsize = 0; - - ret = cros_ec_cmd_xfer(ec->ec_dev, msg); - if (ret < 0 || msg->result != EC_RES_SUCCESS) { - dev_warn(ec->dev, "cannot get EC features: %d/%d\n", - ret, msg->result); - memset(ec->features, 0, sizeof(ec->features)); - } - - memcpy(ec->features, msg->data, sizeof(ec->features)); - - dev_dbg(ec->dev, "EC features %08x %08x\n", - ec->features[0], ec->features[1]); - - kfree(msg); - } - - return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature); -} - -/* Device file ops */ -static int ec_device_open(struct inode *inode, struct file *filp) -{ - struct cros_ec_dev *ec = container_of(inode->i_cdev, - struct cros_ec_dev, cdev); - filp->private_data = ec; - nonseekable_open(inode, filp); - return 0; -} - -static int ec_device_release(struct inode *inode, struct file *filp) -{ - return 0; -} - -static ssize_t ec_device_read(struct file *filp, char __user *buffer, - size_t length, loff_t *offset) -{ - struct cros_ec_dev *ec = filp->private_data; - char msg[sizeof(struct ec_response_get_version) + - sizeof(CROS_EC_DEV_VERSION)]; - size_t count; - int ret; - - if (*offset != 0) - return 0; - - ret = ec_get_version(ec, msg, sizeof(msg)); - if (ret) - return ret; - - count = min(length, strlen(msg)); - - if (copy_to_user(buffer, msg, count)) - return -EFAULT; - - *offset = count; - return count; -} - -/* Ioctls */ -static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) -{ - long ret; - struct cros_ec_command u_cmd; - struct cros_ec_command *s_cmd; - - if (copy_from_user(&u_cmd, arg, sizeof(u_cmd))) - return -EFAULT; - - if ((u_cmd.outsize > EC_MAX_MSG_BYTES) || - (u_cmd.insize > EC_MAX_MSG_BYTES)) - return -EINVAL; - - s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize), - GFP_KERNEL); - if (!s_cmd) - return -ENOMEM; - - if (copy_from_user(s_cmd, arg, sizeof(*s_cmd) + u_cmd.outsize)) { - ret = -EFAULT; - goto exit; - } - - if (u_cmd.outsize != s_cmd->outsize || - u_cmd.insize != s_cmd->insize) { - ret = -EINVAL; - goto exit; - } - - s_cmd->command += ec->cmd_offset; - ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); - /* Only copy data to userland if data was received. */ - if (ret < 0) - goto exit; - - if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize)) - ret = -EFAULT; -exit: - kfree(s_cmd); - return ret; -} - -static long ec_device_ioctl_readmem(struct cros_ec_dev *ec, void __user *arg) -{ - struct cros_ec_device *ec_dev = ec->ec_dev; - struct cros_ec_readmem s_mem = { }; - long num; - - /* Not every platform supports direct reads */ - if (!ec_dev->cmd_readmem) - return -ENOTTY; - - if (copy_from_user(&s_mem, arg, sizeof(s_mem))) - return -EFAULT; - - num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes, - s_mem.buffer); - if (num <= 0) - return num; - - if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem))) - return -EFAULT; - - return 0; -} - -static long ec_device_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct cros_ec_dev *ec = filp->private_data; - - if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC) - return -ENOTTY; - - switch (cmd) { - case CROS_EC_DEV_IOCXCMD: - return ec_device_ioctl_xcmd(ec, (void __user *)arg); - case CROS_EC_DEV_IOCRDMEM: - return ec_device_ioctl_readmem(ec, (void __user *)arg); - } - - return -ENOTTY; -} - -/* Module initialization */ -static const struct file_operations fops = { - .open = ec_device_open, - .release = ec_device_release, - .read = ec_device_read, - .unlocked_ioctl = ec_device_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ec_device_ioctl, -#endif -}; - -static void __remove(struct device *dev) -{ - struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev, - class_dev); - kfree(ec); -} - -static void cros_ec_sensors_register(struct cros_ec_dev *ec) -{ - /* - * Issue a command to get the number of sensor reported. - * Build an array of sensors driver and register them all. - */ - int ret, i, id, sensor_num; - struct mfd_cell *sensor_cells; - struct cros_ec_sensor_platform *sensor_platforms; - int sensor_type[MOTIONSENSE_TYPE_MAX]; - struct ec_params_motion_sense *params; - struct ec_response_motion_sense *resp; - struct cros_ec_command *msg; - - msg = kzalloc(sizeof(struct cros_ec_command) + - max(sizeof(*params), sizeof(*resp)), GFP_KERNEL); - if (msg == NULL) - return; - - msg->version = 2; - msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset; - msg->outsize = sizeof(*params); - msg->insize = sizeof(*resp); - - params = (struct ec_params_motion_sense *)msg->data; - params->cmd = MOTIONSENSE_CMD_DUMP; - - ret = cros_ec_cmd_xfer(ec->ec_dev, msg); - if (ret < 0 || msg->result != EC_RES_SUCCESS) { - dev_warn(ec->dev, "cannot get EC sensor information: %d/%d\n", - ret, msg->result); - goto error; - } - - resp = (struct ec_response_motion_sense *)msg->data; - sensor_num = resp->dump.sensor_count; - /* Allocate 2 extra sensors in case lid angle or FIFO are needed */ - sensor_cells = kzalloc(sizeof(struct mfd_cell) * (sensor_num + 2), - GFP_KERNEL); - if (sensor_cells == NULL) - goto error; - - sensor_platforms = kzalloc(sizeof(struct cros_ec_sensor_platform) * - (sensor_num + 1), GFP_KERNEL); - if (sensor_platforms == NULL) - goto error_platforms; - - memset(sensor_type, 0, sizeof(sensor_type)); - id = 0; - for (i = 0; i < sensor_num; i++) { - params->cmd = MOTIONSENSE_CMD_INFO; - params->info.sensor_num = i; - ret = cros_ec_cmd_xfer(ec->ec_dev, msg); - if (ret < 0 || msg->result != EC_RES_SUCCESS) { - dev_warn(ec->dev, "no info for EC sensor %d : %d/%d\n", - i, ret, msg->result); - continue; - } - switch (resp->info.type) { - case MOTIONSENSE_TYPE_ACCEL: - sensor_cells[id].name = "cros-ec-accel"; - break; - case MOTIONSENSE_TYPE_BARO: - sensor_cells[id].name = "cros-ec-baro"; - break; - case MOTIONSENSE_TYPE_GYRO: - sensor_cells[id].name = "cros-ec-gyro"; - break; - case MOTIONSENSE_TYPE_MAG: - sensor_cells[id].name = "cros-ec-mag"; - break; - case MOTIONSENSE_TYPE_PROX: - sensor_cells[id].name = "cros-ec-prox"; - break; - case MOTIONSENSE_TYPE_LIGHT: - sensor_cells[id].name = "cros-ec-light"; - break; - case MOTIONSENSE_TYPE_ACTIVITY: - sensor_cells[id].name = "cros-ec-activity"; - break; - default: - dev_warn(ec->dev, "unknown type %d\n", resp->info.type); - continue; - } - sensor_platforms[id].sensor_num = i; - sensor_cells[id].id = sensor_type[resp->info.type]; - sensor_cells[id].platform_data = &sensor_platforms[id]; - sensor_cells[id].pdata_size = - sizeof(struct cros_ec_sensor_platform); - - sensor_type[resp->info.type]++; - id++; - } - if (sensor_type[MOTIONSENSE_TYPE_ACCEL] >= 2) { - sensor_platforms[id].sensor_num = sensor_num; - - sensor_cells[id].name = "cros-ec-angle"; - sensor_cells[id].id = 0; - sensor_cells[id].platform_data = &sensor_platforms[id]; - sensor_cells[id].pdata_size = - sizeof(struct cros_ec_sensor_platform); - id++; - } - if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) { - sensor_cells[id].name = "cros-ec-ring"; - id++; - } - - ret = mfd_add_devices(ec->dev, 0, sensor_cells, id, - NULL, 0, NULL); - if (ret) - dev_err(ec->dev, "failed to add EC sensors\n"); - - kfree(sensor_platforms); -error_platforms: - kfree(sensor_cells); -error: - kfree(msg); -} - -static int ec_device_probe(struct platform_device *pdev) -{ - int retval = -ENOMEM; - struct device *dev = &pdev->dev; - struct cros_ec_platform *ec_platform = dev_get_platdata(dev); - struct cros_ec_dev *ec = kzalloc(sizeof(*ec), GFP_KERNEL); - - if (!ec) - return retval; - - dev_set_drvdata(dev, ec); - ec->ec_dev = dev_get_drvdata(dev->parent); - ec->dev = dev; - ec->cmd_offset = ec_platform->cmd_offset; - ec->features[0] = -1U; /* Not cached yet */ - ec->features[1] = -1U; /* Not cached yet */ - device_initialize(&ec->class_dev); - cdev_init(&ec->cdev, &fops); - - /* - * Add the class device - * Link to the character device for creating the /dev entry - * in devtmpfs. - */ - ec->class_dev.devt = MKDEV(ec_major, pdev->id); - ec->class_dev.class = &cros_class; - ec->class_dev.parent = dev; - ec->class_dev.release = __remove; - - retval = dev_set_name(&ec->class_dev, "%s", ec_platform->ec_name); - if (retval) { - dev_err(dev, "dev_set_name failed => %d\n", retval); - goto failed; - } - - retval = cdev_device_add(&ec->cdev, &ec->class_dev); - if (retval) { - dev_err(dev, "cdev_device_add failed => %d\n", retval); - goto failed; - } - - if (cros_ec_debugfs_init(ec)) - dev_warn(dev, "failed to create debugfs directory\n"); - - /* check whether this EC is a sensor hub. */ - if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE)) - cros_ec_sensors_register(ec); - - /* Take control of the lightbar from the EC. */ - lb_manual_suspend_ctrl(ec, 1); - - return 0; - -failed: - put_device(&ec->class_dev); - return retval; -} - -static int ec_device_remove(struct platform_device *pdev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev); - - /* Let the EC take over the lightbar again. */ - lb_manual_suspend_ctrl(ec, 0); - - cros_ec_debugfs_remove(ec); - - cdev_del(&ec->cdev); - device_unregister(&ec->class_dev); - return 0; -} - -static const struct platform_device_id cros_ec_id[] = { - { DRV_NAME, 0 }, - { /* sentinel */ }, -}; -MODULE_DEVICE_TABLE(platform, cros_ec_id); - -static __maybe_unused int ec_device_suspend(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - lb_suspend(ec); - - return 0; -} - -static __maybe_unused int ec_device_resume(struct device *dev) -{ - struct cros_ec_dev *ec = dev_get_drvdata(dev); - - lb_resume(ec); - - return 0; -} - -static const struct dev_pm_ops cros_ec_dev_pm_ops = { -#ifdef CONFIG_PM_SLEEP - .suspend = ec_device_suspend, - .resume = ec_device_resume, -#endif -}; - -static struct platform_driver cros_ec_dev_driver = { - .driver = { - .name = DRV_NAME, - .pm = &cros_ec_dev_pm_ops, - }, - .probe = ec_device_probe, - .remove = ec_device_remove, -}; - -static int __init cros_ec_dev_init(void) -{ - int ret; - dev_t dev = 0; - - ret = class_register(&cros_class); - if (ret) { - pr_err(CROS_EC_DEV_NAME ": failed to register device class\n"); - return ret; - } - - /* Get a range of minor numbers (starting with 0) to work with */ - ret = alloc_chrdev_region(&dev, 0, CROS_MAX_DEV, CROS_EC_DEV_NAME); - if (ret < 0) { - pr_err(CROS_EC_DEV_NAME ": alloc_chrdev_region() failed\n"); - goto failed_chrdevreg; - } - ec_major = MAJOR(dev); - - /* Register the driver */ - ret = platform_driver_register(&cros_ec_dev_driver); - if (ret < 0) { - pr_warn(CROS_EC_DEV_NAME ": can't register driver: %d\n", ret); - goto failed_devreg; - } - return 0; - -failed_devreg: - unregister_chrdev_region(MKDEV(ec_major, 0), CROS_MAX_DEV); -failed_chrdevreg: - class_unregister(&cros_class); - return ret; -} - -static void __exit cros_ec_dev_exit(void) -{ - platform_driver_unregister(&cros_ec_dev_driver); - unregister_chrdev(ec_major, CROS_EC_DEV_NAME); - class_unregister(&cros_class); -} - -module_init(cros_ec_dev_init); -module_exit(cros_ec_dev_exit); - -MODULE_ALIAS("platform:" DRV_NAME); -MODULE_AUTHOR("Bill Richardson "); -MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller"); -MODULE_VERSION("1.0"); -MODULE_LICENSE("GPL"); diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h deleted file mode 100644 index 45e9453608c5..000000000000 --- a/drivers/platform/chrome/cros_ec_dev.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * cros_ec_dev - expose the Chrome OS Embedded Controller to userspace - * - * Copyright (C) 2014 Google, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _CROS_EC_DEV_H_ -#define _CROS_EC_DEV_H_ - -#include -#include -#include - -#define CROS_EC_DEV_VERSION "1.0.0" - -/* - * @offset: within EC_LPC_ADDR_MEMMAP region - * @bytes: number of bytes to read. zero means "read a string" (including '\0') - * (at most only EC_MEMMAP_SIZE bytes can be read) - * @buffer: where to store the result - * ioctl returns the number of bytes read, negative on error - */ -struct cros_ec_readmem { - uint32_t offset; - uint32_t bytes; - uint8_t buffer[EC_MEMMAP_SIZE]; -}; - -#define CROS_EC_DEV_IOC 0xEC -#define CROS_EC_DEV_IOCXCMD _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command) -#define CROS_EC_DEV_IOCRDMEM _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem) - -/* Lightbar utilities */ -extern bool ec_has_lightbar(struct cros_ec_dev *ec); -extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable); -extern int lb_suspend(struct cros_ec_dev *ec); -extern int lb_resume(struct cros_ec_dev *ec); - -#endif /* _CROS_EC_DEV_H_ */ diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c index 925d91c5868e..6ea79d495aa2 100644 --- a/drivers/platform/chrome/cros_ec_lightbar.c +++ b/drivers/platform/chrome/cros_ec_lightbar.c @@ -33,8 +33,6 @@ #include #include -#include "cros_ec_dev.h" - /* Rate-limit the lightbar interface to prevent DoS. */ static unsigned long lb_interval_jiffies = 50 * HZ / 1000; diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c index 201f11afcdc9..d6eebe872187 100644 --- a/drivers/platform/chrome/cros_ec_sysfs.c +++ b/drivers/platform/chrome/cros_ec_sysfs.c @@ -34,8 +34,6 @@ #include #include -#include "cros_ec_dev.h" - /* Accessor functions */ static ssize_t show_ec_reboot(struct device *dev, diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 4e887ba22635..c61535979b8f 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -322,6 +322,10 @@ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; extern struct attribute_group cros_ec_vbc_attr_group; +/* debugfs stuff */ +int cros_ec_debugfs_init(struct cros_ec_dev *ec); +void cros_ec_debugfs_remove(struct cros_ec_dev *ec); + /* ACPI GPE handler */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From 090edbe23ff57940fca7f57d9165ce57a826bd7a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Dec 2017 13:19:05 -0800 Subject: x86/power/64: Use struct desc_ptr for the IDT in struct saved_context x86_64's saved_context nonsensically used separate idt_limit and idt_base fields and then cast &idt_limit to struct desc_ptr *. This was correct (with -fno-strict-aliasing), but it's confusing, served no purpose, and required #ifdeffery. Simplify this by using struct desc_ptr directly. No change in functionality. Tested-by: Jarkko Nikula Signed-off-by: Andy Lutomirski Acked-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Zhang Rui Link: http://lkml.kernel.org/r/967909ce38d341b01d45eff53e278e2728a3a93a.1513286253.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/suspend_64.h | 3 +-- arch/x86/power/cpu.c | 11 +---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 7306e911faee..600e9e0aea51 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -30,8 +30,7 @@ struct saved_context { u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct desc_ptr idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 5191de14f4df..472bc8c8212b 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ -#ifdef CONFIG_X86_32 store_idt(&ctxt->idt); -#else -/* CONFIG_X86_64 */ - store_idt((struct desc_ptr *)&ctxt->idt_limit); -#endif + /* * We save it here, but restore it only in the hibernate case. * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit @@ -219,12 +215,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ -#ifdef CONFIG_X86_32 load_idt(&ctxt->idt); -#else -/* CONFIG_X86_64 */ - load_idt((const struct desc_ptr *)&ctxt->idt_limit); -#endif #ifdef CONFIG_X86_64 /* -- cgit v1.2.3 From 896c80bef4d3b357814a476663158aaf669d0fb3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Dec 2017 13:19:06 -0800 Subject: x86/power/32: Move SYSENTER MSR restoration to fix_processor_context() x86_64 restores system call MSRs in fix_processor_context(), and x86_32 restored them along with segment registers. The 64-bit variant makes more sense, so move the 32-bit code to match the 64-bit code. No side effects are expected to runtime behavior. Tested-by: Jarkko Nikula Signed-off-by: Andy Lutomirski Acked-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Zhang Rui Link: http://lkml.kernel.org/r/65158f8d7ee64dd6bbc6c1c83b3b34aaa854e3ae.1513286253.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/power/cpu.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 472bc8c8212b..033c61e6891b 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -174,6 +174,9 @@ static void fix_processor_context(void) write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); syscall_init(); /* This sets MSR_*STAR and related */ +#else + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); #endif load_TR_desc(); /* This does ltr */ load_mm_ldt(current->active_mm); /* This does lldt */ @@ -237,12 +240,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) loadsegment(fs, ctxt->fs); loadsegment(gs, ctxt->gs); loadsegment(ss, ctxt->ss); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); #else /* CONFIG_X86_64 */ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); -- cgit v1.2.3 From 7ee18d677989e99635027cee04c878950e0752b9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Dec 2017 13:19:07 -0800 Subject: x86/power: Make restore_processor_context() sane My previous attempt to fix a couple of bugs in __restore_processor_context(): 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()") ... introduced yet another bug, breaking suspend-resume. Rather than trying to come up with a minimal fix, let's try to clean it up for real. This patch fixes quite a few things: - The old code saved a nonsensical subset of segment registers. The only registers that need to be saved are those that contain userspace state or those that can't be trivially restored without percpu access working. (On x86_32, we can restore percpu access by writing __KERNEL_PERCPU to %fs. On x86_64, it's easier to save and restore the kernel's GSBASE.) With this patch, we restore hardcoded values to the kernel state where applicable and explicitly restore the user state after fixing all the descriptor tables. - We used to use an unholy mix of inline asm and C helpers for segment register access. Let's get rid of the inline asm. This fixes the reported s2ram hangs and make the code all around more logical. Analyzed-by: Linus Torvalds Reported-by: Jarkko Nikula Reported-by: Pavel Machek Tested-by: Jarkko Nikula Tested-by: Pavel Machek Signed-off-by: Andy Lutomirski Acked-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Cc: Borislav Petkov Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Zhang Rui Fixes: 5b06bbcfc2c6 ("x86/power: Fix some ordering bugs in __restore_processor_context()") Link: http://lkml.kernel.org/r/398ee68e5c0f766425a7b746becfc810840770ff.1513286253.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/suspend_32.h | 8 +++- arch/x86/include/asm/suspend_64.h | 16 +++++++- arch/x86/power/cpu.c | 79 ++++++++++++++++++++------------------- 3 files changed, 62 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 982c325dad33..8be6afb58471 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,7 +12,13 @@ /* image of the saved processor state */ struct saved_context { - u16 es, fs, gs, ss; + /* + * On x86_32, all segment registers, with the possible exception of + * gs, are saved at kernel entry in pt_regs. + */ +#ifdef CONFIG_X86_32_LAZY_GS + u16 gs; +#endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 600e9e0aea51..a7af9f53c0cb 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -20,8 +20,20 @@ */ struct saved_context { struct pt_regs regs; - u16 ds, es, fs, gs, ss; - unsigned long gs_base, gs_kernel_base, fs_base; + + /* + * User CS and SS are saved in current_pt_regs(). The rest of the + * segment selectors need to be saved and restored here. + */ + u16 ds, es, fs, gs; + + /* + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, + * so we save them separately. We save the kernelmode GSBASE to + * restore percpu access after resume. + */ + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; + unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 033c61e6891b..36a28eddb435 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -99,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt) /* * segment registers */ -#ifdef CONFIG_X86_32 - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); +#ifdef CONFIG_X86_32_LAZY_GS savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); - asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); - asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); - asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); - asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); +#endif +#ifdef CONFIG_X86_64 + savesegment(gs, ctxt->gs); + savesegment(fs, ctxt->fs); + savesegment(ds, ctxt->ds); + savesegment(es, ctxt->es); rdmsrl(MSR_FS_BASE, ctxt->fs_base); - rdmsrl(MSR_GS_BASE, ctxt->gs_base); - rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); mtrr_save_fixed_ranges(NULL); rdmsrl(MSR_EFER, ctxt->efer); @@ -189,9 +185,12 @@ static void fix_processor_context(void) } /** - * __restore_processor_state - restore the contents of CPU registers saved - * by __save_processor_state() - * @ctxt - structure to load the registers contents from + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + * + * The asm code that gets us here will have restored a usable GDT, although + * it will be pointing to the wrong alias. */ static void notrace __restore_processor_state(struct saved_context *ctxt) { @@ -214,46 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); + /* Restore the IDT. */ + load_idt(&ctxt->idt); + /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). + * Just in case the asm code got us here with the SS, DS, or ES + * out of sync with the GDT, update them. */ - load_idt(&ctxt->idt); + loadsegment(ss, __KERNEL_DS); + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); -#ifdef CONFIG_X86_64 /* - * We need GSBASE restored before percpu access can work. - * percpu access can happen in exception handlers or in complicated - * helpers like load_gs_index(). + * Restore percpu access. Percpu access can happen in exception + * handlers or in complicated helpers like load_gs_index(). */ - wrmsrl(MSR_GS_BASE, ctxt->gs_base); +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); +#else + loadsegment(fs, __KERNEL_PERCPU); + loadsegment(gs, __KERNEL_STACK_CANARY); #endif + /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */ fix_processor_context(); /* - * Restore segment registers. This happens after restoring the GDT - * and LDT, which happen in fix_processor_context(). + * Now that we have descriptor tables fully restored and working + * exception handling, restore the usermode segments. */ -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_64 + loadsegment(ds, ctxt->es); loadsegment(es, ctxt->es); loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); - asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); - asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); load_gs_index(ctxt->gs); - asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); /* - * Restore FSBASE and user GSBASE after reloading the respective - * segment selectors. + * Restore FSBASE and GSBASE after restoring the selectors, since + * restoring the selectors clobbers the bases. Keep in mind + * that MSR_KERNEL_GS_BASE is horribly misnamed. */ wrmsrl(MSR_FS_BASE, ctxt->fs_base); - wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); +#elif defined(CONFIG_X86_32_LAZY_GS) + loadsegment(gs, ctxt->gs); #endif do_fpu_end(); -- cgit v1.2.3 From f5b5fab1780c98b74526dbac527574bd02dc16f8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Dec 2017 10:38:36 -0800 Subject: x86/decoder: Fix and update the opcodes map Update x86-opcode-map.txt based on the October 2017 Intel SDM publication. Fix INVPID to INVVPID. Add UD0 and UD1 instruction opcodes. Also sync the objtool and perf tooling copies of this file. Signed-off-by: Randy Dunlap Acked-by: Masami Hiramatsu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/aac062d7-c0f6-96e3-5c92-ed299e2bd3da@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/lib/x86-opcode-map.txt | 13 +++++++++++-- tools/objtool/arch/x86/insn/x86-opcode-map.txt | 15 ++++++++++++--- tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | 15 ++++++++++++--- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index c4d55919fac1..e0b85930dd77 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0 EndTable Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 88: vexpandps/d Vpd,Wpd (66),(ev) @@ -970,6 +970,15 @@ GrpTable: Grp9 EndTable GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1 EndTable # Grp11A and Grp11B are expressed as Grp11 in Intel SDM diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt index 12e377184ee4..e0b85930dd77 100644 --- a/tools/objtool/arch/x86/insn/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0 EndTable Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 88: vexpandps/d Vpd,Wpd (66),(ev) @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb @@ -970,6 +970,15 @@ GrpTable: Grp9 EndTable GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1 EndTable # Grp11A and Grp11B are expressed as Grp11 in Intel SDM diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 12e377184ee4..e0b85930dd77 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0 EndTable Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 88: vexpandps/d Vpd,Wpd (66),(ev) @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb @@ -970,6 +970,15 @@ GrpTable: Grp9 EndTable GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1 EndTable # Grp11A and Grp11B are expressed as Grp11 in Intel SDM -- cgit v1.2.3 From 215eada73e77ede7e15531d99f712481ddd429be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Dec 2017 13:36:56 +0100 Subject: objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version This fixes the following warning: warning: objtool: x86 instruction decoder differs from kernel Note that there are cleanups queued up for v4.16 that will make this warning more informative and will make the syncing easier as well. Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/insn/inat.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h index 125ecd2a300d..52dc8d911173 100644 --- a/tools/objtool/arch/x86/insn/inat.h +++ b/tools/objtool/arch/x86/insn/inat.h @@ -97,6 +97,16 @@ #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) +/* Identifiers for segment registers */ +#define INAT_SEG_REG_IGNORE 0 +#define INAT_SEG_REG_DEFAULT 1 +#define INAT_SEG_REG_CS 2 +#define INAT_SEG_REG_SS 3 +#define INAT_SEG_REG_DS 4 +#define INAT_SEG_REG_ES 5 +#define INAT_SEG_REG_FS 6 +#define INAT_SEG_REG_GS 7 + /* Attribute search APIs */ extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); extern int inat_get_last_prefix_id(insn_byte_t last_pfx); -- cgit v1.2.3 From 643e345c95f0b4a4082c60755e06e3e635658da6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Dec 2017 13:47:51 +0100 Subject: tools/headers: Synchronize kernel <-> tooling headers Two kernel headers got modified recently, which are used by tooling as well: tools/include/uapi/linux/kvm.h arch/x86/include/asm/cpufeatures.h None of those changes have an effect on tooling, so do a plain copy. Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Namhyung Kim Cc: Jiri Olsa Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/arch/x86/include/asm/cpufeatures.h | 1 + tools/include/uapi/linux/kvm.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66..800104c8a3ed 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -266,6 +266,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 282d7613fce8..496e59a2738b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -630,9 +630,9 @@ struct kvm_s390_irq { struct kvm_s390_irq_state { __u64 buf; - __u32 flags; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 reserved[4]; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; /* for KVM_SET_GUEST_DEBUG */ -- cgit v1.2.3 From 50034ed49645463a16327cad05694e201e6b4126 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Dec 2017 05:09:47 -0800 Subject: cgroup: use strlcpy() instead of strscpy() to avoid spurious warning As long as cft->name is guaranteed to be NUL-terminated, using strlcpy() would work just as well and avoid that warning, so the change below could be folded into that commit. Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 18d71fbd3923..f4c2f8cb5748 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, cft->name); else - strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX); + strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX); return buf; } @@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) root->flags = opts->flags; if (opts->release_agent) - strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); + strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX); if (opts->name) - strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); + strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); if (opts->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -- cgit v1.2.3 From 2d17d8d79e77ff3f1b35b87522fc72fa562260ff Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 14 Dec 2017 17:17:56 -0800 Subject: xdp: linearize skb in netif_receive_generic_xdp() In netif_receive_generic_xdp(), it is necessary to linearize all nonlinear skb. However, in current implementation, skb with troom <= 0 are not linearized. This patch fixes this by calling skb_linearize() for all nonlinear skb. Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Song Liu Acked-by: Martin KaFai Lau Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index f47e96b62308..01ee854454a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) goto do_drop; - if (troom > 0 && __skb_linearize(skb)) + if (skb_linearize(skb)) goto do_drop; } -- cgit v1.2.3 From 9f37e797547cca9d14fe1f0f43f5c89b261ff0b0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 15 Dec 2017 14:16:04 +0100 Subject: s390: fix preemption race in disable_sacf_uaccess With CONFIG_PREEMPT=y there is a possible race in disable_sacf_uaccess. The new set_fs value needs to be stored the the task structure first, the control register update needs to be second. Otherwise a preemptive schedule may interrupt the code right after the control register update has been done and the next time the task is scheduled we get an incorrect value in the control register due to the old set_fs setting. Fixes: 0aaba41b58 ("s390: remove all code using the access register mode") Signed-off-by: Martin Schwidefsky --- arch/s390/lib/uaccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cae5a1e16cbd..c4f8039a35e8 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess); void disable_sacf_uaccess(mm_segment_t old_fs) { + current->thread.mm_segment = old_fs; if (old_fs == USER_DS && test_facility(27)) { __ctl_load(S390_lowcore.user_asce, 1, 1); clear_cpu_flag(CIF_ASCE_PRIMARY); } - current->thread.mm_segment = old_fs; } EXPORT_SYMBOL(disable_sacf_uaccess); -- cgit v1.2.3 From b224f6134d72e3493a023b5bea917f9a6beea0c8 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Fri, 24 Nov 2017 16:30:53 +0100 Subject: nvme: set discard_alignment to zero Similar to 7c084289795b ("rbd: set discard_alignment to zero"), NVMe devices are currently incorrectly initialised with the block queue discard_alignment set to the NVMe stream alignment. As per Documentation/ABI/testing/sysfs-block: The discard_alignment parameter indicates how many bytes the beginning of the device is offset from the internal allocation unit's natural alignment. Correcting the discard_alignment parameter to zero has no effect on how discard requests are propagated through the block layer - @alignment in __blkdev_issue_discard() remains zero. However, it does fix other consumers, such as LIO's Block Limits VPD response. Signed-off-by: David Disseldorp Reviewed-by: Jens Axboe Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..67f2f94cf86e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < NVME_DSM_MAX_RANGES); - queue->limits.discard_alignment = size; + queue->limits.discard_alignment = 0; queue->limits.discard_granularity = size; blk_queue_max_discard_sectors(queue, UINT_MAX); -- cgit v1.2.3 From 4596e752db02d47038cd7c965419789ab15d1985 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 15:11:37 -0800 Subject: nvme-fc: remove double put reference if admin connect fails There are two put references in the failure case of initial create_association. The first put actually frees the controller, thus the second put references freed memory. Remove the unnecessary 2nd put. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0a8af4daef89..794e66e4aa20 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); /* Remove core ctrl ref. */ nvme_put_ctrl(&ctrl->ctrl); -- cgit v1.2.3 From bd9f5d65769b9fe5e72110d4cbc9097b53b01613 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 6 Dec 2017 18:30:09 +0800 Subject: nvme: call blk_integrity_unregister after queue is cleaned up During IO complete path, bio_integrity_advance() is often called, and blk_get_integrity() is called in this function. But in blk_integrity_unregister, the buffer pointed by queue->integrity is cleared, and blk_integrity->profile becomes NULL, then blk_get_integrity returns NULL, and causes kernel oops[1] finally. This patch fixes this issue by calling blk_integrity_unregister() after blk_cleanup_queue(). [1] kernel oops log [ 122.068007] BUG: unable to handle kernel NULL pointer dereference at 000000000000000a [ 122.076760] IP: bio_integrity_advance+0x3d/0xf0 [ 122.081815] PGD 0 P4D 0 [ 122.084641] Oops: 0000 [#1] SMP [ 122.088142] Modules linked in: sunrpc ipmi_ssif intel_rapl vfat fat x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass mei_me ipmi_si crct10dif_pclmul crc32_pclmul sg mei ghash_clmulni_intel mxm_wmi ipmi_devintf iTCO_wdt intel_cstate intel_uncore pcspkr intel_rapl_perf iTCO_vendor_support dcdbas ipmi_msghandler lpc_ich acpi_power_meter shpchp wmi dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel ahci nvme tg3 libahci nvme_core i2c_core libata ptp megaraid_sas pps_core dm_mirror dm_region_hash dm_log dm_mod [ 122.149577] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.14.0-11.el7a.x86_64 #1 [ 122.157635] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017 [ 122.166179] task: ffff8802ff1e8000 task.stack: ffffc90000130000 [ 122.172785] RIP: 0010:bio_integrity_advance+0x3d/0xf0 [ 122.178419] RSP: 0018:ffff88047fc03d70 EFLAGS: 00010006 [ 122.184248] RAX: ffff880473b08000 RBX: ffff880458c71a80 RCX: ffff880473b08248 [ 122.192209] RDX: 0000000000000000 RSI: 000000000000003c RDI: ffffc900038d7ba0 [ 122.200171] RBP: ffff88047fc03d78 R08: 0000000000000001 R09: ffffffffa01a78b5 [ 122.208132] R10: ffff88047fc1eda0 R11: ffff880458c71ad0 R12: 0000000000007800 [ 122.216094] R13: 0000000000000000 R14: 0000000000007800 R15: ffff880473a39b40 [ 122.224056] FS: 0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000 [ 122.233083] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 122.239494] CR2: 000000000000000a CR3: 0000000001c09002 CR4: 00000000001606e0 [ 122.247455] Call Trace: [ 122.250183] [ 122.252429] bio_advance+0x28/0xf0 [ 122.256217] blk_update_request+0xa1/0x310 [ 122.260778] blk_mq_end_request+0x1e/0x70 [ 122.265256] nvme_complete_rq+0x1c/0xd0 [nvme_core] [ 122.270699] nvme_pci_complete_rq+0x85/0x130 [nvme] [ 122.276140] __blk_mq_complete_request+0x8d/0x140 [ 122.281387] blk_mq_complete_request+0x16/0x20 [ 122.286345] nvme_process_cq+0xdd/0x1c0 [nvme] [ 122.291301] nvme_irq+0x23/0x50 [nvme] [ 122.295485] __handle_irq_event_percpu+0x3c/0x190 [ 122.300725] handle_irq_event_percpu+0x32/0x80 [ 122.305683] handle_irq_event+0x3b/0x60 [ 122.309964] handle_edge_irq+0x8f/0x190 [ 122.314247] handle_irq+0xab/0x120 [ 122.318043] do_IRQ+0x48/0xd0 [ 122.321355] common_interrupt+0x9d/0x9d [ 122.325625] [ 122.327967] RIP: 0010:cpuidle_enter_state+0xe9/0x280 [ 122.333504] RSP: 0018:ffffc90000133e68 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff35 [ 122.341952] RAX: ffff88047fc1b900 RBX: ffff88047fc24400 RCX: 000000000000001f [ 122.349913] RDX: 0000000000000000 RSI: fffffcf2e6007295 RDI: 0000000000000000 [ 122.357874] RBP: ffffc90000133ea0 R08: 000000000000062e R09: 0000000000000253 [ 122.365836] R10: 0000000000000225 R11: 0000000000000018 R12: 0000000000000002 [ 122.373797] R13: 0000000000000001 R14: ffff88047fc24400 R15: 0000001c6bd1d263 [ 122.381762] ? cpuidle_enter_state+0xc5/0x280 [ 122.386623] cpuidle_enter+0x17/0x20 [ 122.390611] call_cpuidle+0x23/0x40 [ 122.394501] do_idle+0x17e/0x1f0 [ 122.398101] cpu_startup_entry+0x73/0x80 [ 122.402478] start_secondary+0x178/0x1c0 [ 122.406854] secondary_startup_64+0xa5/0xa5 [ 122.411520] Code: 48 8b 5f 68 48 8b 47 08 31 d2 4c 8b 5b 48 48 8b 80 d0 03 00 00 48 83 b8 48 02 00 00 00 48 8d 88 48 02 00 00 48 0f 45 d1 c1 ee 09 <0f> b6 4a 0a 0f b6 52 09 89 f0 48 01 73 08 83 e9 09 d3 e8 0f af [ 122.432604] RIP: bio_integrity_advance+0x3d/0xf0 RSP: ffff88047fc03d70 [ 122.439888] CR2: 000000000000000a Reported-by: Zhang Yi Tested-by: Zhang Yi Signed-off-by: Ming Lei Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 67f2f94cf86e..2cc6192ef275 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2965,8 +2965,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) return; if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - if (blk_get_integrity(ns->disk)) - blk_integrity_unregister(ns->disk); nvme_mpath_remove_disk_links(ns); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); @@ -2974,6 +2972,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); + if (blk_get_integrity(ns->disk)) + blk_integrity_unregister(ns->disk); } mutex_lock(&ns->ctrl->subsys->lock); -- cgit v1.2.3 From 249159c5f15812140fa216f9997d799ac0023a1f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 14 Dec 2017 11:20:14 -0700 Subject: nvme: check hw sectors before setting chunk sectors Some devices with IDs matching the "stripe" quirk don't actually have this quirk, and don't have an MDTS value. When MDTS is not set, the driver sets the max sectors to UINT_MAX, which is not a power of 2, hitting a BUG_ON from blk_queue_chunk_sectors. This patch skips setting chunk sectors for such devices. Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2cc6192ef275..eab812dd2429 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) -- cgit v1.2.3 From 654b4a4acd8b52a4272114b95896e9a10d382cde Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 14 Dec 2017 11:20:32 -0700 Subject: nvme: setup streams after initializing namespace head Fixes a NULL pointer dereference. Reported-by: Arnav Dawn Signed-off-by: Keith Busch Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eab812dd2429..1e46e60b8f10 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2870,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - nvme_setup_streams_ns(ctrl, ns); id = nvme_identify_ns(ctrl, nsid); if (!id) @@ -2881,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id, &new)) goto out_free_id; + nvme_setup_streams_ns(ctrl, ns); #ifdef CONFIG_NVME_MULTIPATH /* -- cgit v1.2.3 From cb2cf0de1174701b7c8c0285a0f398b9f2d30d8e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 15 Dec 2017 05:10:47 +0000 Subject: ASoC: soc-core: care Codec <-> Codec case by non_legacy_dai_naming CPU/Codec categorize will be removed soon. Then, it need to know DAI is Codec somehow. This patch uses component driver's "non_legacy_dai_naming" which is used by Codec for it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 90f1122d91c4..b9ca939fd05c 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1945,7 +1945,9 @@ int snd_soc_runtime_set_dai_fmt(struct snd_soc_pcm_runtime *rtd, } /* Flip the polarity for the "CPU" end of a CODEC<->CODEC link */ - if (cpu_dai->codec) { + /* the component which has non_legacy_dai_naming is Codec */ + if (cpu_dai->codec || + cpu_dai->component->driver->non_legacy_dai_naming) { unsigned int inv_dai_fmt; inv_dai_fmt = dai_fmt & ~SND_SOC_DAIFMT_MASTER_MASK; -- cgit v1.2.3 From 0e96a0c83f0842d5cfd83b0a896bc82ff61f9849 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 14 Dec 2017 17:16:29 -0800 Subject: cpupower: Remove FSF address Checkpatch in the kernel now complains about having the FSF address in comments. Other tools such as rpmlint are now starting to do the same thing. Remove the FSF address to reduce warnings on multiple tools. Signed-off-by: Laura Abbott Acked-by: Dominik Brodowski Signed-off-by: Shuah Khan --- tools/power/cpupower/lib/cpufreq.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index 3b005c39f068..60beaf5ed2ea 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -11,10 +11,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __CPUPOWER_CPUFREQ_H__ -- cgit v1.2.3 From f73c52a5bcd1710994e53fbccc378c42b97a06b6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 2 Dec 2017 13:04:54 -0500 Subject: sched/rt: Do not pull from current CPU if only one CPU to pull Daniel Wagner reported a crash on the BeagleBone Black SoC. This is a single CPU architecture, and does not have a functional arch_send_call_function_single_ipi() implementation which can crash the kernel if that is called. As it only has one CPU, it shouldn't be called, but if the kernel is compiled for SMP, the push/pull RT scheduling logic now calls it for irq_work if the one CPU is overloaded, it can use that function to call itself and crash the kernel. Ideally, we should disable the SCHED_FEAT(RT_PUSH_IPI) if the system only has a single CPU. But SCHED_FEAT is a constant if sched debugging is turned off. Another fix can also be used, and this should also help with normal SMP machines. That is, do not initiate the pull code if there's only one RT overloaded CPU, and that CPU happens to be the current CPU that is scheduling in a lower priority task. Even on a system with many CPUs, if there's many RT tasks waiting to run on a single CPU, and that CPU schedules in another RT task of lower priority, it will initiate the PULL logic in case there's a higher priority RT task on another CPU that is waiting to run. But if there is no other CPU with waiting RT tasks, it will initiate the RT pull logic on itself (as it still has RT tasks waiting to run). This is a wasted effort. Not only does this help with SMP code where the current CPU is the only one with RT overloaded tasks, it should also solve the issue that Daniel encountered, because it will prevent the PULL logic from executing, as there's only one CPU on the system, and the check added here will cause it to exit the RT pull code. Reported-by: Daniel Wagner Signed-off-by: Steven Rostedt (VMware) Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: linux-rt-users Cc: stable@vger.kernel.org Fixes: 4bdced5c9 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/20171202130454.4cbbfe8d@vmware.local.home Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4056c19ca3f0..665ace2fc558 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq) bool resched = false; struct task_struct *p; struct rq *src_rq; + int rt_overload_count = rt_overloaded(this_rq); - if (likely(!rt_overloaded(this_rq))) + if (likely(!rt_overload_count)) return; /* @@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq) */ smp_rmb(); + /* If we are the only overloaded CPU do nothing */ + if (rt_overload_count == 1 && + cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) + return; + #ifdef HAVE_RT_PUSH_IPI if (sched_feat(RT_PUSH_IPI)) { tell_cpu_to_push(this_rq); -- cgit v1.2.3 From fccff0862838908d21eaf956d57e09c6c189f7c5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 15 Dec 2017 08:44:21 +0100 Subject: mlxsw: spectrum: Disable MAC learning for ovs port Learning is currently enabled for ports which are OVS slaves - even though OVS doesn't need this indication. Since we're not associating a fid with the port, HW would continuously notify driver of learned [& aged] MACs which would be logged as errors. Fixes: 2b94e58df58c ("mlxsw: spectrum: Allow ports to work under OVS master") Signed-off-by: Yuval Mintz Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 2d0897b7d860..9bd8d28de152 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4300,6 +4300,7 @@ static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) { + u16 vid = 1; int err; err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true); @@ -4312,8 +4313,19 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) true, false); if (err) goto err_port_vlan_set; + + for (; vid <= VLAN_N_VID - 1; vid++) { + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, + vid, false); + if (err) + goto err_vid_learning_set; + } + return 0; +err_vid_learning_set: + for (vid--; vid >= 1; vid--) + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); err_port_vlan_set: mlxsw_sp_port_stp_set(mlxsw_sp_port, false); err_port_stp_set: @@ -4323,6 +4335,12 @@ err_port_stp_set: static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) { + u16 vid; + + for (vid = VLAN_N_VID - 1; vid >= 1; vid--) + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, + vid, true); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, false, false); mlxsw_sp_port_stp_set(mlxsw_sp_port, false); -- cgit v1.2.3 From c739f930be1dd5fd949030e3475a884fe06dae9b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:36 -0800 Subject: x86/espfix/64: Fix espfix double-fault handling on 5-level systems Using PGDIR_SHIFT to identify espfix64 addresses on 5-level systems was wrong, and it resulted in panics due to unhandled double faults. Use P4D_SHIFT instead, which is correct on 4-level and 5-level machines. This fixes a panic when running x86 selftests on 5-level machines. Signed-off-by: Andy Lutomirski Acked-by: Kirill A. Shutemov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1d33b219563f ("x86/espfix: Add support for 5-level paging") Link: http://lkml.kernel.org/r/24c898b4f44fdf8c22d93703850fb384ef87cfdc.1513035461.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b7b0f74a2150..c751518936ac 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -355,7 +355,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * No need for ist_enter here because we don't use RCU. */ - if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { -- cgit v1.2.3 From a4544831370618cb3627e27ffcc27d1cc857868f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 15 Dec 2017 16:07:22 +0000 Subject: arm64: fpsimd: Fix copying of FP state from signal frame into task struct Commit 9de52a755cfb6da5 ("arm64: fpsimd: Fix failure to restore FPSIMD state after signals") fixed an issue reported in our FPSIMD signal restore code but inadvertently introduced another issue which tends to manifest as random SEGVs in userspace. The problem is that when we copy the struct fpsimd_state from the kernel stack (populated from the signal frame) into the struct held in the current thread_struct, we blindly copy uninitialised stack into the "cpu" field, which means that context-switching of the FP registers is no longer reliable. This patch fixes the problem by copying only the user_fpsimd member of struct fpsimd_state. We should really rework the function prototypes to take struct user_fpsimd_state * instead, but let's just get this fixed for now. Cc: Dave Martin Fixes: 9de52a755cfb6da5 ("arm64: fpsimd: Fix failure to restore FPSIMD state after signals") Reported-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 540a1e010eb5..fae81f7964b4 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd; if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); -- cgit v1.2.3 From 7fbd9493f0eeae8cef58300505a9ef5c8fce6313 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:29 +0100 Subject: s390/qeth: apply takeover changes when mode is toggled Just as for an explicit enable/disable, toggling the takeover mode also requires that the IP addresses get updated. Otherwise all IPs that were added to the table before the mode-toggle, get registered with the old settings. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 2 +- drivers/s390/net/qeth_l3_sys.c | 35 +++++++++++++++++------------------ 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 15015a24f8ad..51c618d9fefe 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -565,7 +565,7 @@ enum qeth_cq { }; struct qeth_ipato { - int enabled; + bool enabled; int invert4; int invert6; struct list_head entries; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 430e3214f7e2..8d18675e60e2 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1480,7 +1480,7 @@ static int qeth_setup_card(struct qeth_card *card) qeth_set_intial_options(card); /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); - card->ipato.enabled = 0; + card->ipato.enabled = false; card->ipato.invert4 = 0; card->ipato.invert6 = 0; /* init QDIO stuff */ diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index bd12fdf678be..198717f71b3d 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -372,6 +372,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, struct qeth_card *card = dev_get_drvdata(dev); struct qeth_ipaddr *addr; int i, rc = 0; + bool enable; if (!card) return -EINVAL; @@ -384,25 +385,23 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, } if (sysfs_streq(buf, "toggle")) { - card->ipato.enabled = (card->ipato.enabled)? 0 : 1; - } else if (sysfs_streq(buf, "1")) { - card->ipato.enabled = 1; - hash_for_each(card->ip_htable, i, addr, hnode) { - if ((addr->type == QETH_IP_TYPE_NORMAL) && - qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= - QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else if (sysfs_streq(buf, "0")) { - card->ipato.enabled = 0; - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->set_flags & - QETH_IPA_SETIP_TAKEOVER_FLAG) - addr->set_flags &= - ~QETH_IPA_SETIP_TAKEOVER_FLAG; - } - } else + enable = !card->ipato.enabled; + } else if (kstrtobool(buf, &enable)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.enabled == enable) + goto out; + card->ipato.enabled = enable; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (!enable) + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + else if (addr->type == QETH_IP_TYPE_NORMAL && + qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + } out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; -- cgit v1.2.3 From b22d73d6689fd902a66c08ebe71ab2f3b351e22f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:30 +0100 Subject: s390/qeth: don't apply takeover changes to RXIP When takeover is switched off, current code clears the 'TAKEOVER' flag on all IPs. But the flag is also used for RXIP addresses, and those should not be affected by the takeover mode. Fix the behaviour by consistenly applying takover logic to NORMAL addresses only. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 5 +++-- drivers/s390/net/qeth_l3_sys.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6a73894b0cb5..4a4be81800eb 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -174,6 +174,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, if (!card->ipato.enabled) return 0; + if (addr->type != QETH_IP_TYPE_NORMAL) + return 0; qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits, (addr->proto == QETH_PROT_IPV4)? 4:16); @@ -290,8 +292,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr)); addr->ref_counter = 1; - if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) { + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) { QETH_CARD_TEXT(card, 2, "tkovaddr"); addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 198717f71b3d..e256928092e5 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -396,10 +396,11 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, card->ipato.enabled = enable; hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; if (!enable) addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (addr->type == QETH_IP_TYPE_NORMAL && - qeth_l3_is_addr_covered_by_ipato(card, addr)) + else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } out: -- cgit v1.2.3 From 8a03a3692b100d84785ee7a834e9215e304c9e00 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:31 +0100 Subject: s390/qeth: lock IP table while applying takeover changes Modifying the flags of an IP addr object needs to be protected against eg. concurrent removal of the same object from the IP table. Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index e256928092e5..aa676b4090da 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -395,6 +395,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, goto out; card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); hash_for_each(card->ip_htable, i, addr, hnode) { if (addr->type != QETH_IP_TYPE_NORMAL) continue; @@ -403,6 +404,7 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; } + spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; -- cgit v1.2.3 From 02f510f326501470348a5df341e8232c3497bbbb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 13 Dec 2017 18:56:32 +0100 Subject: s390/qeth: update takeover IPs after configuration change Any modification to the takeover IP-ranges requires that we re-evaluate which IP addresses are takeover-eligible. Otherwise we might do takeover for some addresses when we no longer should, or vice-versa. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 4 +-- drivers/s390/net/qeth_core_main.c | 4 +-- drivers/s390/net/qeth_l3.h | 2 +- drivers/s390/net/qeth_l3_main.c | 31 +++++++++++++++++-- drivers/s390/net/qeth_l3_sys.c | 63 +++++++++++++++++++++------------------ 5 files changed, 67 insertions(+), 37 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 51c618d9fefe..badf42acbf95 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -566,8 +566,8 @@ enum qeth_cq { struct qeth_ipato { bool enabled; - int invert4; - int invert6; + bool invert4; + bool invert6; struct list_head entries; }; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 8d18675e60e2..6c815207f4f5 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1481,8 +1481,8 @@ static int qeth_setup_card(struct qeth_card *card) /* IP address takeover */ INIT_LIST_HEAD(&card->ipato.entries); card->ipato.enabled = false; - card->ipato.invert4 = 0; - card->ipato.invert6 = 0; + card->ipato.invert4 = false; + card->ipato.invert6 = false; /* init QDIO stuff */ qeth_init_qdio_info(card); INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work); diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h index 194ae9b577cc..e5833837b799 100644 --- a/drivers/s390/net/qeth_l3.h +++ b/drivers/s390/net/qeth_l3.h @@ -82,7 +82,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *); int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *); void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions, const u8 *); -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *); +void qeth_l3_update_ipato(struct qeth_card *card); struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions); int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *); int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4a4be81800eb..ef0961e18686 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -164,8 +164,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len) } } -int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, - struct qeth_ipaddr *addr) +static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card, + struct qeth_ipaddr *addr) { struct qeth_ipato_entry *ipatoe; u8 addr_bits[128] = {0, }; @@ -606,6 +606,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card) /* * IP address takeover related functions */ + +/** + * qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs. + * + * Caller must hold ip_lock. + */ +void qeth_l3_update_ipato(struct qeth_card *card) +{ + struct qeth_ipaddr *addr; + unsigned int i; + + hash_for_each(card->ip_htable, i, addr, hnode) { + if (addr->type != QETH_IP_TYPE_NORMAL) + continue; + if (qeth_l3_is_addr_covered_by_ipato(card, addr)) + addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + else + addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; + } +} + static void qeth_l3_clear_ipato_list(struct qeth_card *card) { struct qeth_ipato_entry *ipatoe, *tmp; @@ -617,6 +638,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card) kfree(ipatoe); } + qeth_l3_update_ipato(card); spin_unlock_bh(&card->ip_lock); } @@ -641,8 +663,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card, } } - if (!rc) + if (!rc) { list_add_tail(&new->entry, &card->ipato.entries); + qeth_l3_update_ipato(card); + } spin_unlock_bh(&card->ip_lock); @@ -665,6 +689,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card, (proto == QETH_PROT_IPV4)? 4:16) && (ipatoe->mask_bits == mask_bits)) { list_del(&ipatoe->entry); + qeth_l3_update_ipato(card); kfree(ipatoe); } } diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index aa676b4090da..6ea2b528a64e 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -370,9 +370,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); - struct qeth_ipaddr *addr; - int i, rc = 0; bool enable; + int rc = 0; if (!card) return -EINVAL; @@ -391,20 +390,12 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev, goto out; } - if (card->ipato.enabled == enable) - goto out; - card->ipato.enabled = enable; - - spin_lock_bh(&card->ip_lock); - hash_for_each(card->ip_htable, i, addr, hnode) { - if (addr->type != QETH_IP_TYPE_NORMAL) - continue; - if (!enable) - addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG; - else if (qeth_l3_is_addr_covered_by_ipato(card, addr)) - addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG; + if (card->ipato.enabled != enable) { + card->ipato.enabled = enable; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); } - spin_unlock_bh(&card->ip_lock); out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; @@ -430,20 +421,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert4 = (card->ipato.invert4)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert4 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert4 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert4; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert4 != invert) { + card->ipato.invert4 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } @@ -609,20 +607,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + bool invert; int rc = 0; if (!card) return -EINVAL; mutex_lock(&card->conf_mutex); - if (sysfs_streq(buf, "toggle")) - card->ipato.invert6 = (card->ipato.invert6)? 0 : 1; - else if (sysfs_streq(buf, "1")) - card->ipato.invert6 = 1; - else if (sysfs_streq(buf, "0")) - card->ipato.invert6 = 0; - else + if (sysfs_streq(buf, "toggle")) { + invert = !card->ipato.invert6; + } else if (kstrtobool(buf, &invert)) { rc = -EINVAL; + goto out; + } + + if (card->ipato.invert6 != invert) { + card->ipato.invert6 = invert; + spin_lock_bh(&card->ip_lock); + qeth_l3_update_ipato(card); + spin_unlock_bh(&card->ip_lock); + } +out: mutex_unlock(&card->conf_mutex); return rc ? rc : count; } -- cgit v1.2.3 From 35b99dffc3f710cafceee6c8c6ac6a98eb2cb4bf Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 13 Dec 2017 14:41:06 -0500 Subject: sock: free skb in skb_complete_tx_timestamp on error skb_complete_tx_timestamp must ingest the skb it is passed. Call kfree_skb if the skb cannot be enqueued. Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl") Fixes: 9ac25fc06375 ("net: fix socket refcounting in skb_complete_tx_timestamp()") Reported-by: Richard Cochran Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6b0ff396fa9d..a592ca025fc4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4293,7 +4293,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk = skb->sk; if (!skb_may_tx_timestamp(sk, false)) - return; + goto err; /* Take a reference to prevent skb_orphan() from freeing the socket, * but only if the socket refcount is not zero. @@ -4302,7 +4302,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, *skb_hwtstamps(skb) = *hwtstamps; __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); + return; } + +err: + kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); -- cgit v1.2.3 From 6d59b7dbf72ed20d0138e2f9b75ca3d4a9d4faca Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:23 +0100 Subject: bpf, s390x: do not reload skb pointers in non-skb context The assumption of unconditionally reloading skb pointers on BPF helper calls where bpf_helper_changes_pkt_data() holds true is wrong. There can be different contexts where the BPF helper would enforce a reload such as in case of XDP. Here, we do have a struct xdp_buff instead of struct sk_buff as context, thus this will access garbage. JITs only ever need to deal with cached skb pointer reload when ld_abs/ind was seen, therefore guard the reload behind SEEN_SKB only. Tested on s390x. Fixes: 9db7f2b81880 ("s390/bpf: recache skb->data/hlen for skb_vlan_push/pop") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Michael Holzheu Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90..9557d8b516df 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -55,8 +55,7 @@ struct bpf_jit { #define SEEN_LITERAL 8 /* code uses literals */ #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ -#define SEEN_REG_AX 128 /* code uses constant blinding */ +#define SEEN_REG_AX 64 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } - if (jit->seen & SEEN_SKB) + if (jit->seen & SEEN_SKB) { emit_load_skb_data_hlen(jit); - if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); + } } /* @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT2(0x0d00, REG_14, REG_W1); /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); - if (bpf_helper_changes_pkt_data((void *)func)) { - jit->seen |= SEEN_SKB_CHANGE; + if ((jit->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data((void *)func)) { /* lg %b1,ST_OFF_SKBP(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); -- cgit v1.2.3 From 87338c8e2cbb317b5f757e6172f94e2e3799cd20 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:24 +0100 Subject: bpf, ppc64: do not reload skb pointers in non-skb context The assumption of unconditionally reloading skb pointers on BPF helper calls where bpf_helper_changes_pkt_data() holds true is wrong. There can be different contexts where the helper would enforce a reload such as in case of XDP. Here, we do have a struct xdp_buff instead of struct sk_buff as context, thus this will access garbage. JITs only ever need to deal with cached skb pointer reload when ld_abs/ind was seen, therefore guard the reload behind SEEN_SKB. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Signed-off-by: Daniel Borkmann Reviewed-by: Naveen N. Rao Acked-by: Alexei Starovoitov Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d183b4801bdb 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -763,7 +763,8 @@ emit_clear: func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_pkt_data(func)) + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -772,7 +773,8 @@ emit_clear: PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_pkt_data(func)) { + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); -- cgit v1.2.3 From 04514d13222f2c4c91adf0ecb21004cec3388795 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:25 +0100 Subject: bpf: guarantee r1 to be ctx in case of bpf_helper_changes_pkt_data Some JITs don't cache skb context on stack in prologue, so when LD_ABS/IND is used and helper calls yield bpf_helper_changes_pkt_data() as true, then they temporarily save/restore skb pointer. However, the assumption that skb always has to be in r1 is a bit of a gamble. Right now it turned out to be true for all helpers listed in bpf_helper_changes_pkt_data(), but lets enforce that from verifier side, so that we make this a guarantee and bail out if the func proto is misconfigured in future helpers. In case of BPF helper calls from cBPF, bpf_helper_changes_pkt_data() is completely unrelevant here (since cBPF is context read-only) and therefore always false. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d4593571c404..e39b01317b6f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1674,7 +1674,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return -EINVAL; } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); + if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { + verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", + func_id_name(func_id), func_id); + return -EINVAL; + } memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; -- cgit v1.2.3 From 07aee94394547721ac168cbf4e1c09c14a5fe671 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:26 +0100 Subject: bpf, sparc: fix usage of wrong reg for load_skb_regs after call When LD_ABS/IND is used in the program, and we have a BPF helper call that changes packet data (bpf_helper_changes_pkt_data() returns true), then in case of sparc JIT, we try to reload cached skb data from bpf2sparc[BPF_REG_6]. However, there is no such guarantee or assumption that skb sits in R6 at this point, all helpers changing skb data only have a guarantee that skb sits in R1. Therefore, store BPF R1 in L7 temporarily and after procedure call use L7 to reload cached skb data. skb sitting in R6 is only true at the time when LD_ABS/IND is executed. Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") Signed-off-by: Daniel Borkmann Acked-by: David S. Miller Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/sparc/net/bpf_jit_comp_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 5765e7e711f7..ff5f9cb3039a 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 *func = ((u8 *)__bpf_call_base) + imm; ctx->saw_call = true; + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); emit_call((u32 *)func, ctx); emit_nop(ctx); emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + load_skb_regs(ctx, L7); break; } -- cgit v1.2.3 From 87ab8194303e73af2898e9e1c8b3b9bcfe91e7a9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 14 Dec 2017 21:07:27 +0100 Subject: bpf: add test case for ld_abs and helper changing pkt data Add a test that i) uses LD_ABS, ii) zeroing R6 before call, iii) calls a helper that triggers reload of cached skb data, iv) uses LD_ABS again. It's added for test_bpf in order to do runtime testing after JITing as well as test_verifier to test that the sequence is allowed. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- lib/test_bpf.c | 43 +++++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_verifier.c | 24 ++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index aa8812ae6776..9e9748089270 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -435,6 +435,41 @@ loop: return 0; } +static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self) +{ + struct bpf_insn *insn; + + insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + /* Due to func address being non-const, we need to + * assemble this here. + */ + insn[0] = BPF_MOV64_REG(R6, R1); + insn[1] = BPF_LD_ABS(BPF_B, 0); + insn[2] = BPF_LD_ABS(BPF_H, 0); + insn[3] = BPF_LD_ABS(BPF_W, 0); + insn[4] = BPF_MOV64_REG(R7, R6); + insn[5] = BPF_MOV64_IMM(R6, 0); + insn[6] = BPF_MOV64_REG(R1, R7); + insn[7] = BPF_MOV64_IMM(R2, 1); + insn[8] = BPF_MOV64_IMM(R3, 2); + insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + bpf_skb_vlan_push_proto.func - __bpf_call_base); + insn[10] = BPF_MOV64_REG(R6, R7); + insn[11] = BPF_LD_ABS(BPF_B, 0); + insn[12] = BPF_LD_ABS(BPF_H, 0); + insn[13] = BPF_LD_ABS(BPF_W, 0); + insn[14] = BPF_MOV64_IMM(R0, 42); + insn[15] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = 16; + + return 0; +} + static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) { unsigned int len = BPF_MAXINSNS; @@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = { {}, { {0x1, 0x42 } }, }, + { + "LD_ABS with helper changing skb data", + { }, + INTERNAL, + { 0x34 }, + { { ETH_HLEN, 42 } }, + .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, + }, }; static struct net_device dev; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c64f30cf63c..b03ecfd7185b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6116,6 +6116,30 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, }, + { + "ld_abs: tests on r6 and skb data reload helper", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, { "ld_ind: check calling conv, r1", .insns = { -- cgit v1.2.3 From e4d02ca04c6d48ab2226342a1c4ed54f1dbb72bd Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:40 +0300 Subject: net: aquantia: Fix actual speed capabilities reporting Different hardware device Ids correspond to different maximum speed available. Extra checks were added for devices D108 and D109 to remove unsupported speeds from these device capabilities list. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 4 +++- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 7 ++++--- drivers/net/ethernet/aquantia/atlantic/aq_nic.h | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 5 +++-- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 13 ++++++++++++- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 13 ++++++++++++- 6 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 0207927dc8a6..4ebd53b3c7da 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -85,7 +85,9 @@ struct aq_hw_ops { void (*destroy)(struct aq_hw_s *self); int (*get_hw_caps)(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps); + struct aq_hw_caps_s *aq_hw_caps, + unsigned short device, + unsigned short subsystem_device); int (*hw_ring_tx_xmit)(struct aq_hw_s *self, struct aq_ring_s *aq_ring, unsigned int frags); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 78dfb2ab78ce..a360ccc298b9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -222,7 +222,7 @@ static struct net_device *aq_nic_ndev_alloc(void) struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, const struct ethtool_ops *et_ops, - struct device *dev, + struct pci_dev *pdev, struct aq_pci_func_s *aq_pci_func, unsigned int port, const struct aq_hw_ops *aq_hw_ops) @@ -242,7 +242,7 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, ndev->netdev_ops = ndev_ops; ndev->ethtool_ops = et_ops; - SET_NETDEV_DEV(ndev, dev); + SET_NETDEV_DEV(ndev, &pdev->dev); ndev->if_port = port; self->ndev = ndev; @@ -254,7 +254,8 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, self->aq_hw = self->aq_hw_ops.create(aq_pci_func, self->port, &self->aq_hw_ops); - err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps); + err = self->aq_hw_ops.get_hw_caps(self->aq_hw, &self->aq_hw_caps, + pdev->device, pdev->subsystem_device); if (err < 0) goto err_exit; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 4309983acdd6..3c9f8db03d5f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -71,7 +71,7 @@ struct aq_nic_cfg_s { struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops, const struct ethtool_ops *et_ops, - struct device *dev, + struct pci_dev *pdev, struct aq_pci_func_s *aq_pci_func, unsigned int port, const struct aq_hw_ops *aq_hw_ops); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index cadaa646c89f..58c29d04b186 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -51,7 +51,8 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops, pci_set_drvdata(pdev, self); self->pdev = pdev; - err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps); + err = aq_hw_ops->get_hw_caps(NULL, &self->aq_hw_caps, pdev->device, + pdev->subsystem_device); if (err < 0) goto err_exit; @@ -59,7 +60,7 @@ struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops, for (port = 0; port < self->ports; ++port) { struct aq_nic_s *aq_nic = aq_nic_alloc_cold(ndev_ops, eth_ops, - &pdev->dev, self, + pdev, self, port, aq_hw_ops); if (!aq_nic) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 07b3c49a16a4..b0abd187cead 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -18,9 +18,20 @@ #include "hw_atl_a0_internal.h" static int hw_atl_a0_get_hw_caps(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps) + struct aq_hw_caps_s *aq_hw_caps, + unsigned short device, + unsigned short subsystem_device) { memcpy(aq_hw_caps, &hw_atl_a0_hw_caps_, sizeof(*aq_hw_caps)); + + if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001) + aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G; + + if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) { + aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_10G; + aq_hw_caps->link_speed_msk &= ~HW_ATL_A0_RATE_5G; + } + return 0; } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec68c20efcbd..e4e3b8e2d67e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -18,9 +18,20 @@ #include "hw_atl_b0_internal.h" static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self, - struct aq_hw_caps_s *aq_hw_caps) + struct aq_hw_caps_s *aq_hw_caps, + unsigned short device, + unsigned short subsystem_device) { memcpy(aq_hw_caps, &hw_atl_b0_hw_caps_, sizeof(*aq_hw_caps)); + + if (device == HW_ATL_DEVICE_ID_D108 && subsystem_device == 0x0001) + aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G; + + if (device == HW_ATL_DEVICE_ID_D109 && subsystem_device == 0x0001) { + aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_10G; + aq_hw_caps->link_speed_msk &= ~HW_ATL_B0_RATE_5G; + } + return 0; } -- cgit v1.2.3 From 1e366161510f266516107a69db91f1f2edaea11c Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:41 +0300 Subject: net: aquantia: Fix hardware DMA stream overload on large MRRS Systems with large MRRS on device (2K, 4K) with high data rates and/or large MTU, atlantic observes DMA packet buffer overflow. On some systems that causes PCIe transaction errors, hardware NMIs or datapath freeze. This patch 1) Limits MRRS from device side to 2K (thats maximum our hardware supports) 2) Limit maximum size of outstanding TX DMA data read requests. This makes hardware buffers running fine. Signed-off-by: Pavel Belous Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 12 ++++++++++++ .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index e4e3b8e2d67e..36fddb199160 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -16,6 +16,7 @@ #include "hw_atl_utils.h" #include "hw_atl_llh.h" #include "hw_atl_b0_internal.h" +#include "hw_atl_llh_internal.h" static int hw_atl_b0_get_hw_caps(struct aq_hw_s *self, struct aq_hw_caps_s *aq_hw_caps, @@ -368,6 +369,7 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, }; int err = 0; + u32 val; self->aq_nic_cfg = aq_nic_cfg; @@ -385,6 +387,16 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss); hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss); + /* Force limit MRRS on RDM/TDM to 2K */ + val = aq_hw_read_reg(self, pci_reg_control6_adr); + aq_hw_write_reg(self, pci_reg_control6_adr, (val & ~0x707) | 0x404); + + /* TX DMA total request limit. B0 hardware is not capable to + * handle more than (8K-MRRS) incoming DMA data. + * Value 24 in 256byte units + */ + aq_hw_write_reg(self, tx_dma_total_req_limit_adr, 24); + err = aq_hw_err_from_flags(self); if (err < 0) goto err_exit; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index 5527fc0e5942..93450ec930e8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -2343,6 +2343,9 @@ #define tx_dma_desc_base_addrmsw_adr(descriptor) \ (0x00007c04u + (descriptor) * 0x40) +/* tx dma total request limit */ +#define tx_dma_total_req_limit_adr 0x00007b20u + /* tx interrupt moderation control register definitions * Preprocessor definitions for TX Interrupt Moderation Control Register * Base Address: 0x00008980 @@ -2369,6 +2372,9 @@ /* default value of bitfield reg_res_dsbl */ #define pci_reg_res_dsbl_default 0x1 +/* PCI core control register */ +#define pci_reg_control6_adr 0x1014u + /* global microprocessor scratch pad definitions */ #define glb_cpu_scratch_scp_adr(scratch_scp) (0x00000300u + (scratch_scp) * 0x4) -- cgit v1.2.3 From be08d839d9ef1c9b0e4ed809ec852ff100f9970d Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:42 +0300 Subject: net: aquantia: Extend stat counters to 64bit values Device hardware provides only 32bit counters. Using these directly causes byte counters to overflow soon. A separate nic level structure with 64 bit counters is now used to collect incrementally all the stats and report these counters to ethtool stats and ndev stats. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_hw.h | 25 ++++++- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 35 ++++++++-- .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 80 +++++++--------------- .../aquantia/atlantic/hw_atl/hw_atl_utils.h | 6 +- 4 files changed, 79 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 4ebd53b3c7da..b3825de6cdfb 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -46,6 +46,28 @@ struct aq_hw_link_status_s { unsigned int mbps; }; +struct aq_stats_s { + u64 uprc; + u64 mprc; + u64 bprc; + u64 erpt; + u64 uptc; + u64 mptc; + u64 bptc; + u64 erpr; + u64 mbtc; + u64 bbtc; + u64 mbrc; + u64 bbrc; + u64 ubrc; + u64 ubtc; + u64 dpc; + u64 dma_pkt_rc; + u64 dma_pkt_tc; + u64 dma_oct_rc; + u64 dma_oct_tc; +}; + #define AQ_HW_IRQ_INVALID 0U #define AQ_HW_IRQ_LEGACY 1U #define AQ_HW_IRQ_MSI 2U @@ -166,8 +188,7 @@ struct aq_hw_ops { int (*hw_update_stats)(struct aq_hw_s *self); - int (*hw_get_hw_stats)(struct aq_hw_s *self, u64 *data, - unsigned int *p_count); + struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self); int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index a360ccc298b9..28cbe9d43df6 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -750,16 +750,40 @@ int aq_nic_get_regs_count(struct aq_nic_s *self) void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) { - struct aq_vec_s *aq_vec = NULL; unsigned int i = 0U; unsigned int count = 0U; - int err = 0; + struct aq_vec_s *aq_vec = NULL; + struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw); - err = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw, data, &count); - if (err < 0) + if (!stats) goto err_exit; - data += count; + data[i] = stats->uprc + stats->mprc + stats->bprc; + data[++i] = stats->uprc; + data[++i] = stats->mprc; + data[++i] = stats->bprc; + data[++i] = stats->erpt; + data[++i] = stats->uptc + stats->mptc + stats->bptc; + data[++i] = stats->uptc; + data[++i] = stats->mptc; + data[++i] = stats->bptc; + data[++i] = stats->ubrc; + data[++i] = stats->ubtc; + data[++i] = stats->mbrc; + data[++i] = stats->mbtc; + data[++i] = stats->bbrc; + data[++i] = stats->bbtc; + data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; + data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; + data[++i] = stats->dma_pkt_rc; + data[++i] = stats->dma_pkt_tc; + data[++i] = stats->dma_oct_rc; + data[++i] = stats->dma_oct_tc; + data[++i] = stats->dpc; + + i++; + + data += i; count = 0U; for (i = 0U, aq_vec = self->aq_vec[0]; @@ -769,7 +793,6 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) } err_exit:; - (void)err; } void aq_nic_get_link_ksettings(struct aq_nic_s *self, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 1fe016fc4bc7..f2ce12ed4218 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -503,73 +503,43 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self) struct hw_atl_s *hw_self = PHAL_ATLANTIC; struct hw_aq_atl_utils_mbox mbox; - if (!self->aq_link_status.mbps) - return 0; - hw_atl_utils_mpi_read_stats(self, &mbox); #define AQ_SDELTA(_N_) (hw_self->curr_stats._N_ += \ mbox.stats._N_ - hw_self->last_stats._N_) - - AQ_SDELTA(uprc); - AQ_SDELTA(mprc); - AQ_SDELTA(bprc); - AQ_SDELTA(erpt); - - AQ_SDELTA(uptc); - AQ_SDELTA(mptc); - AQ_SDELTA(bptc); - AQ_SDELTA(erpr); - - AQ_SDELTA(ubrc); - AQ_SDELTA(ubtc); - AQ_SDELTA(mbrc); - AQ_SDELTA(mbtc); - AQ_SDELTA(bbrc); - AQ_SDELTA(bbtc); - AQ_SDELTA(dpc); - + if (self->aq_link_status.mbps) { + AQ_SDELTA(uprc); + AQ_SDELTA(mprc); + AQ_SDELTA(bprc); + AQ_SDELTA(erpt); + + AQ_SDELTA(uptc); + AQ_SDELTA(mptc); + AQ_SDELTA(bptc); + AQ_SDELTA(erpr); + + AQ_SDELTA(ubrc); + AQ_SDELTA(ubtc); + AQ_SDELTA(mbrc); + AQ_SDELTA(mbtc); + AQ_SDELTA(bbrc); + AQ_SDELTA(bbtc); + AQ_SDELTA(dpc); + } #undef AQ_SDELTA + hw_self->curr_stats.dma_pkt_rc = stats_rx_dma_good_pkt_counterlsw_get(self); + hw_self->curr_stats.dma_pkt_tc = stats_tx_dma_good_pkt_counterlsw_get(self); + hw_self->curr_stats.dma_oct_rc = stats_rx_dma_good_octet_counterlsw_get(self); + hw_self->curr_stats.dma_oct_tc = stats_tx_dma_good_octet_counterlsw_get(self); memcpy(&hw_self->last_stats, &mbox.stats, sizeof(mbox.stats)); return 0; } -int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, - u64 *data, unsigned int *p_count) +struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self) { - struct hw_atl_s *hw_self = PHAL_ATLANTIC; - struct hw_atl_stats_s *stats = &hw_self->curr_stats; - int i = 0; - - data[i] = stats->uprc + stats->mprc + stats->bprc; - data[++i] = stats->uprc; - data[++i] = stats->mprc; - data[++i] = stats->bprc; - data[++i] = stats->erpt; - data[++i] = stats->uptc + stats->mptc + stats->bptc; - data[++i] = stats->uptc; - data[++i] = stats->mptc; - data[++i] = stats->bptc; - data[++i] = stats->ubrc; - data[++i] = stats->ubtc; - data[++i] = stats->mbrc; - data[++i] = stats->mbtc; - data[++i] = stats->bbrc; - data[++i] = stats->bbtc; - data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; - data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; - data[++i] = stats_rx_dma_good_pkt_counterlsw_get(self); - data[++i] = stats_tx_dma_good_pkt_counterlsw_get(self); - data[++i] = stats_rx_dma_good_octet_counterlsw_get(self); - data[++i] = stats_tx_dma_good_octet_counterlsw_get(self); - data[++i] = stats->dpc; - - if (p_count) - *p_count = ++i; - - return 0; + return &PHAL_ATLANTIC->curr_stats; } static const u32 hw_atl_utils_hw_mac_regs[] = { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index c99cc690e425..21aeca6908d3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -129,7 +129,7 @@ struct __packed hw_aq_atl_utils_mbox { struct __packed hw_atl_s { struct aq_hw_s base; struct hw_atl_stats_s last_stats; - struct hw_atl_stats_s curr_stats; + struct aq_stats_s curr_stats; u64 speed; unsigned int chip_features; u32 fw_ver_actual; @@ -207,8 +207,6 @@ int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version); int hw_atl_utils_update_stats(struct aq_hw_s *self); -int hw_atl_utils_get_hw_stats(struct aq_hw_s *self, - u64 *data, - unsigned int *p_count); +struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self); #endif /* HW_ATL_UTILS_H */ -- cgit v1.2.3 From 9f8a2203a542f5f3cdeb17f40250c49bb87aa7e3 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:43 +0300 Subject: net: aquantia: Fill ndev stat couters from hardware Originally they were filled from ring sw counters. These sometimes incorrectly calculate byte and packet amounts when using LRO/LSO and jumboframes. Filling ndev counters from hardware makes them precise. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 28cbe9d43df6..307caac68731 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -37,6 +37,8 @@ static unsigned int aq_itr_rx; module_param_named(aq_itr_rx, aq_itr_rx, uint, 0644); MODULE_PARM_DESC(aq_itr_rx, "RX interrupt throttle rate"); +static void aq_nic_update_ndev_stats(struct aq_nic_s *self); + static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) { struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; @@ -166,11 +168,7 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) static void aq_nic_service_timer_cb(struct timer_list *t) { struct aq_nic_s *self = from_timer(self, t, service_timer); - struct net_device *ndev = aq_nic_get_ndev(self); int err = 0; - unsigned int i = 0U; - struct aq_ring_stats_rx_s stats_rx; - struct aq_ring_stats_tx_s stats_tx; if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY)) goto err_exit; @@ -182,19 +180,8 @@ static void aq_nic_service_timer_cb(struct timer_list *t) if (self->aq_hw_ops.hw_update_stats) self->aq_hw_ops.hw_update_stats(self->aq_hw); - memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); - memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); - for (i = AQ_DIMOF(self->aq_vec); i--;) { - if (self->aq_vec[i]) - aq_vec_add_stats(self->aq_vec[i], &stats_rx, &stats_tx); - } + aq_nic_update_ndev_stats(self); - ndev->stats.rx_packets = stats_rx.packets; - ndev->stats.rx_bytes = stats_rx.bytes; - ndev->stats.rx_errors = stats_rx.errors; - ndev->stats.tx_packets = stats_tx.packets; - ndev->stats.tx_bytes = stats_tx.bytes; - ndev->stats.tx_errors = stats_tx.errors; err_exit: mod_timer(&self->service_timer, @@ -795,6 +782,19 @@ void aq_nic_get_stats(struct aq_nic_s *self, u64 *data) err_exit:; } +static void aq_nic_update_ndev_stats(struct aq_nic_s *self) +{ + struct net_device *ndev = self->ndev; + struct aq_stats_s *stats = self->aq_hw_ops.hw_get_hw_stats(self->aq_hw); + + ndev->stats.rx_packets = stats->uprc + stats->mprc + stats->bprc; + ndev->stats.rx_bytes = stats->ubrc + stats->mbrc + stats->bbrc; + ndev->stats.rx_errors = stats->erpr; + ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc; + ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc; + ndev->stats.tx_errors = stats->erpt; +} + void aq_nic_get_link_ksettings(struct aq_nic_s *self, struct ethtool_link_ksettings *cmd) { -- cgit v1.2.3 From 45cc1c7ad47c4d166d15c7bce449d2de4daef0c5 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:44 +0300 Subject: net: aquantia: Fill in multicast counter in ndev stats from hardware This metric comes from HW and is also diff-calculated, like other counters Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 307caac68731..b3a5d1fbc713 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -793,6 +793,7 @@ static void aq_nic_update_ndev_stats(struct aq_nic_s *self) ndev->stats.tx_packets = stats->uptc + stats->mptc + stats->bptc; ndev->stats.tx_bytes = stats->ubtc + stats->mbtc + stats->bbtc; ndev->stats.tx_errors = stats->erpt; + ndev->stats.multicast = stats->mprc; } void aq_nic_get_link_ksettings(struct aq_nic_s *self, -- cgit v1.2.3 From fdb4a0830e74acfbe84d4d4e6772ea09c96786ad Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:45 +0300 Subject: net: aquantia: Improve link state and statistics check interval callback Reduce timeout from 2 secs to 1 sec. If link is down, reduce it to 500msec. This speeds up link detection. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 2 +- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 57e796870595..73b93a7b4800 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -50,7 +50,7 @@ #define AQ_CFG_PCI_FUNC_MSIX_IRQS 9U #define AQ_CFG_PCI_FUNC_PORTS 2U -#define AQ_CFG_SERVICE_TIMER_INTERVAL (2 * HZ) +#define AQ_CFG_SERVICE_TIMER_INTERVAL (1 * HZ) #define AQ_CFG_POLLING_TIMER_INTERVAL ((unsigned int)(2 * HZ)) #define AQ_CFG_SKB_FRAGS_MAX 32U diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index b3a5d1fbc713..75a894a9251c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -168,6 +168,7 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) static void aq_nic_service_timer_cb(struct timer_list *t) { struct aq_nic_s *self = from_timer(self, t, service_timer); + int ctimer = AQ_CFG_SERVICE_TIMER_INTERVAL; int err = 0; if (aq_utils_obj_test(&self->header.flags, AQ_NIC_FLAGS_IS_NOT_READY)) @@ -182,10 +183,12 @@ static void aq_nic_service_timer_cb(struct timer_list *t) aq_nic_update_ndev_stats(self); + /* If no link - use faster timer rate to detect link up asap */ + if (!netif_carrier_ok(self->ndev)) + ctimer = max(ctimer / 2, 1); err_exit: - mod_timer(&self->service_timer, - jiffies + AQ_CFG_SERVICE_TIMER_INTERVAL); + mod_timer(&self->service_timer, jiffies + ctimer); } static void aq_nic_polling_timer_cb(struct timer_list *t) -- cgit v1.2.3 From f3e2778429c2ad8555e888858e0f0e98c86c4b0f Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:46 +0300 Subject: net: aquantia: Update hw counters on hw init On very first start we should read out current HW counter values to make diff based calculations later. This also should be done each time NIC gets down/up or wakes up after sleep state. We reset link state explicitly to prevent diffs from being summed this first time. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 4 ++++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index b0abd187cead..f18dce14c93c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -344,6 +344,10 @@ static int hw_atl_a0_hw_init(struct aq_hw_s *self, hw_atl_a0_hw_rss_set(self, &aq_nic_cfg->aq_rss); hw_atl_a0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss); + /* Reset link status and read out initial hardware counters */ + self->aq_link_status.mbps = 0; + hw_atl_utils_update_stats(self); + err = aq_hw_err_from_flags(self); if (err < 0) goto err_exit; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 36fddb199160..e4a22ce7bf09 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -397,6 +397,10 @@ static int hw_atl_b0_hw_init(struct aq_hw_s *self, */ aq_hw_write_reg(self, tx_dma_total_req_limit_adr, 24); + /* Reset link status and read out initial hardware counters */ + self->aq_link_status.mbps = 0; + hw_atl_utils_update_stats(self); + err = aq_hw_err_from_flags(self); if (err < 0) goto err_exit; -- cgit v1.2.3 From 98bc036de40489416d61ab175bb417c094e7783c Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:47 +0300 Subject: net: aquantia: Fix typo in ethtool statistics names Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 70efb7467bf3..f2d8063a2cef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -66,14 +66,14 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "OutUCast", "OutMCast", "OutBCast", - "InUCastOctects", - "OutUCastOctects", - "InMCastOctects", - "OutMCastOctects", - "InBCastOctects", - "OutBCastOctects", - "InOctects", - "OutOctects", + "InUCastOctets", + "OutUCastOctets", + "InMCastOctets", + "OutMCastOctets", + "InBCastOctets", + "OutBCastOctets", + "InOctets", + "OutOctets", "InPacketsDma", "OutPacketsDma", "InOctetsDma", -- cgit v1.2.3 From d4c242d4ba5730b62579969804cd8fcf58b9c84f Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Thu, 14 Dec 2017 12:34:48 +0300 Subject: net: aquantia: Increment driver version Add a suffix to distinguish kernel mainline version and aquantia releases Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_cfg.h | 3 ++- drivers/net/ethernet/aquantia/atlantic/ver.h | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 73b93a7b4800..105fdb958cef 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -80,6 +80,7 @@ #define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\ __stringify(NIC_MINOR_DRIVER_VERSION)"."\ __stringify(NIC_BUILD_DRIVER_VERSION)"."\ - __stringify(NIC_REVISION_DRIVER_VERSION) + __stringify(NIC_REVISION_DRIVER_VERSION) \ + AQ_CFG_DRV_VERSION_SUFFIX #endif /* AQ_CFG_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h index 0de858d215c2..9009f2651e70 100644 --- a/drivers/net/ethernet/aquantia/atlantic/ver.h +++ b/drivers/net/ethernet/aquantia/atlantic/ver.h @@ -11,8 +11,10 @@ #define VER_H #define NIC_MAJOR_DRIVER_VERSION 1 -#define NIC_MINOR_DRIVER_VERSION 5 -#define NIC_BUILD_DRIVER_VERSION 345 +#define NIC_MINOR_DRIVER_VERSION 6 +#define NIC_BUILD_DRIVER_VERSION 13 #define NIC_REVISION_DRIVER_VERSION 0 +#define AQ_CFG_DRV_VERSION_SUFFIX "-kern" + #endif /* VER_H */ -- cgit v1.2.3 From 7a4fa29106d9a38ef005f5ab15d493c259f269c0 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 14 Dec 2017 15:54:29 +0200 Subject: net: sched: Add TCA_HW_OFFLOAD Qdiscs can be offloaded to HW, but current implementation isn't uniform. Instead, qdiscs either pass information about offload status via their TCA_OPTIONS or omit it altogether. Introduce a new attribute - TCA_HW_OFFLOAD that would form a uniform uAPI for the offloading status of qdiscs. Signed-off-by: Yuval Mintz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + include/uapi/linux/rtnetlink.h | 1 + net/sched/sch_api.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 65d0d25f2648..83a3e47d5845 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -71,6 +71,7 @@ struct Qdisc { * qdisc_tree_decrease_qlen() should stop. */ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ +#define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d8b5f80c2ea6..843e29aa3cac 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -557,6 +557,7 @@ enum { TCA_PAD, TCA_DUMP_INVISIBLE, TCA_CHAIN, + TCA_HW_OFFLOAD, __TCA_MAX }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b6c4f536876b..0f1eab99ff4e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -795,6 +795,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, tcm->tcm_info = refcount_read(&q->refcnt); if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; + if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) + goto nla_put_failure; if (q->ops->dump && q->ops->dump(q, skb) < 0) goto nla_put_failure; qlen = q->q.qlen; -- cgit v1.2.3 From 428a68af3a7c3a3380ff1f750a24d213f370f89f Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 14 Dec 2017 15:54:30 +0200 Subject: net: sched: Move to new offload indication in RED Let RED utilize the new internal flag, TCQ_F_OFFLOADED, to mark a given qdisc as offloaded instead of using a dedicated indication. Also, change internal logic into looking at said flag when possible. Fixes: 602f3baf2218 ("net_sch: red: Add offload ability to RED qdisc") Signed-off-by: Yuval Mintz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_red.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 9d874e60e032..f0747eb87dc4 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -157,6 +157,7 @@ static int red_offload(struct Qdisc *sch, bool enable) .handle = sch->handle, .parent = sch->parent, }; + int err; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -171,7 +172,14 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.command = TC_RED_DESTROY; } - return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); + + if (!err && enable) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + + return err; } static void red_destroy(struct Qdisc *sch) @@ -274,7 +282,7 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) return red_change(sch, opt); } -static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) +static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt) { struct net_device *dev = qdisc_dev(sch); struct tc_red_qopt_offload hw_stats = { @@ -286,21 +294,12 @@ static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) .stats.qstats = &sch->qstats, }, }; - int err; - opt->flags &= ~TC_RED_OFFLOADED; - if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) - return 0; - - err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, - &hw_stats); - if (err == -EOPNOTSUPP) + if (!(sch->flags & TCQ_F_OFFLOADED)) return 0; - if (!err) - opt->flags |= TC_RED_OFFLOADED; - - return err; + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); } static int red_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -319,7 +318,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) int err; sch->qstats.backlog = q->qdisc->qstats.backlog; - err = red_dump_offload(sch, &opt); + err = red_dump_offload_stats(sch, &opt); if (err) goto nla_put_failure; @@ -347,7 +346,7 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .marked = q->stats.prob_mark + q->stats.forced_mark, }; - if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { + if (sch->flags & TCQ_F_OFFLOADED) { struct red_stats hw_stats = {0}; struct tc_red_qopt_offload hw_stats_request = { .command = TC_RED_XSTATS, -- cgit v1.2.3 From 4a98795bc8ea148b1ebbbf001283e06430cffe36 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 14 Dec 2017 15:54:31 +0200 Subject: pkt_sched: Remove TC_RED_OFFLOADED from uapi Following the previous patch, RED is now using the new uniform uapi for indicating it's offloaded. As a result, TC_RED_OFFLOADED is no longer utilized by kernel and can be removed [as it's still not part of any stable release]. Fixes: 602f3baf2218 ("net_sch: red: Add offload ability to RED qdisc") Signed-off-by: Yuval Mintz Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index af3cc2f4e1ad..37b5096ae97b 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,7 +256,6 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 -#define TC_RED_OFFLOADED 8 }; struct tc_red_xstats { -- cgit v1.2.3 From c647c0d62c82eb3ddf78a0d8b3d58819d9f552aa Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 14 Dec 2017 16:56:14 +0100 Subject: net: usb: qmi_wwan: add Telit ME910 PID 0x1101 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for Telit ME910 PID 0x1101. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d2ca5a202e8d..3000ddd1c7e2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1211,6 +1211,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ + {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ -- cgit v1.2.3 From f57ab9a01a36ef3454333251cc57e3a9948b17bf Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 17 Nov 2017 11:56:41 +0000 Subject: drivers: base: cacheinfo: fix cache type for non-architected system cache Commit dfea747d2aba ("drivers: base: cacheinfo: support DT overrides for cache properties") doesn't initialise the cache type if it's present only in DT and the architecture is not aware of it. They are unified system level cache which are generally transparent. This patch check if the cache type is set to NOCACHE but the DT node indicates that it's unified cache and sets the cache type accordingly. Fixes: dfea747d2aba ("drivers: base: cacheinfo: support DT overrides for cache properties") Reported-and-tested-by: Tan Xiaojun Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index eb3af2739537..07532d83be0b 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } +static bool cache_node_is_unified(struct cacheinfo *this_leaf) +{ + return of_property_read_bool(this_leaf->of_node, "cache-unified"); +} + static void cache_of_override_properties(unsigned int cpu) { int index; @@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = this_cpu_ci->info_list + index; + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf)) + this_leaf->type = CACHE_TYPE_UNIFIED; cache_size(this_leaf); cache_get_line_size(this_leaf); cache_nr_sets(this_leaf); -- cgit v1.2.3 From c05fad5713b81b049ec6ac4eb2d304030b1efdce Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Fri, 15 Dec 2017 10:46:16 +0800 Subject: ip_gre: fix wrong return value of erspan_rcv If pskb_may_pull return failed, return PACKET_REJECT instead of -ENOMEM. Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Cc: William Tu Signed-off-by: Haishuang Yan Acked-by: William Tu Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index bb6239169b1a..9c1735632c8c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -266,7 +266,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, len = gre_hdr_len + sizeof(*ershdr); if (unlikely(!pskb_may_pull(skb, len))) - return -ENOMEM; + return PACKET_REJECT; iph = ip_hdr(skb); ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); -- cgit v1.2.3 From c156618e15101a9cc8c815108fec0300a0ec6637 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Dec 2017 13:55:44 -0500 Subject: nfs: fix a deadlock in nfs client initialization The following deadlock can occur between a process waiting for a client to initialize in while walking the client list during nfsv4 server trunking detection and another process waiting for the nfs_clid_init_mutex so it can initialize that client: Process 1 Process 2 --------- --------- spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTA->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); spin_lock(&nn->nfs_client_lock); list_add_tail(&CLIENTB->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); mutex_lock(&nfs_clid_init_mutex); nfs41_walk_client_list(clp, result, cred); nfs_wait_client_init_complete(CLIENTA); (waiting for nfs_clid_init_mutex) Make sure nfs_match_client() only evaluates clients that have completed initialization in order to prevent that deadlock. This patch also fixes v4.0 trunking behavior by not marking the client NFS_CS_READY until the clientid has been confirmed. Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 11 +++++++++++ fs/nfs/nfs4client.c | 17 +++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0ac2fb1c6b63..b9129e2befea 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat const struct sockaddr *sap = data->addr; struct nfs_net *nn = net_generic(data->net, nfs_net_id); +again: list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) continue; + /* If a client is still initializing then we need to wait */ + if (clp->cl_cons_state > NFS_CS_READY) { + refcount_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + nfs_wait_client_init_complete(clp); + nfs_put_client(clp); + spin_lock(&nn->nfs_client_lock); + goto again; + } + /* Different NFS versions cannot share the same nfs_client */ if (clp->rpc_ops != data->nfs_mod->rpc_ops) continue; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 12bbab0becb4..65a7e5da508c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (error < 0) goto error; - if (!nfs4_has_session(clp)) - nfs_mark_client_ready(clp, NFS_CS_READY); - error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; - if (clp != old) + if (clp != old) { clp->cl_preserve_clid = true; + /* + * Mark the client as having failed initialization so other + * processes walking the nfs_client_list in nfs_match_client() + * won't try to use it. + */ + nfs_mark_client_ready(clp, -EPERM); + } nfs_put_client(clp); clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); return old; @@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + if (pos == new) + goto found; + status = nfs4_match_client(pos, new, &prev, nn); if (status < 0) goto out_unlock; @@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new, * way that a SETCLIENTID_CONFIRM to pos can succeed is * if new and pos point to the same server: */ +found: refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new, case 0: nfs4_swap_callback_idents(pos, new); pos->cl_confirm = new->cl_confirm; + nfs_mark_client_ready(pos, NFS_CS_READY); prev = NULL; *result = pos; -- cgit v1.2.3 From ccede7598588ae344143f82fb763912535648d58 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 4 Dec 2017 14:04:04 -0500 Subject: xprtrdma: Spread reply processing over more CPUs Commit d8f532d20ee4 ("xprtrdma: Invoke rpcrdma_reply_handler directly from RECV completion") introduced a performance regression for NFS I/O small enough to not need memory registration. In multi- threaded benchmarks that generate primarily small I/O requests, IOPS throughput is reduced by nearly a third. This patch restores the previous level of throughput. Because workqueues are typically BOUND (in particular ib_comp_wq, nfsiod_workqueue, and rpciod_workqueue), NFS/RDMA workloads tend to aggregate on the CPU that is handling Receive completions. The usual approach to addressing this problem is to create a QP and CQ for each CPU, and then schedule transactions on the QP for the CPU where you want the transaction to complete. The transaction then does not require an extra context switch during completion to end up on the same CPU where the transaction was started. This approach doesn't work for the Linux NFS/RDMA client because currently the Linux NFS client does not support multiple connections per client-server pair, and the RDMA core API does not make it straightforward for ULPs to determine which CPU is responsible for handling Receive completions for a CQ. So for the moment, record the CPU number in the rpcrdma_req before the transport sends each RPC Call. Then during Receive completion, queue the RPC completion on that same CPU. Additionally, move all RPC completion processing to the deferred handler so that even RPCs with simple small replies complete on the CPU that sent the corresponding RPC Call. Fixes: d8f532d20ee4 ("xprtrdma: Invoke rpcrdma_reply_handler ...") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 6 +----- net/sunrpc/xprtrdma/transport.c | 2 ++ net/sunrpc/xprtrdma/verbs.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ed34dc0f144c..a3f2ab283aeb 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1408,11 +1408,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", __func__, rep, req, be32_to_cpu(rep->rr_xid)); - if (list_empty(&req->rl_registered) && - !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) - rpcrdma_complete_rqst(rep); - else - queue_work(rpcrdma_receive_wq, &rep->rr_work); + queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); return; out_badstatus: diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 646c24494ea7..6ee1ad8978f3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "xprt_rdma.h" @@ -656,6 +657,7 @@ xprt_rdma_allocate(struct rpc_task *task) task->tk_pid, __func__, rqst->rq_callsize, rqst->rq_rcvsize, req); + req->rl_cpu = smp_processor_id(); req->rl_connect_cookie = 0; /* our reserved value */ rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 710b3f77db82..8607c029c0dd 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -83,7 +83,7 @@ rpcrdma_alloc_wq(void) struct workqueue_struct *recv_wq; recv_wq = alloc_workqueue("xprtrdma_receive", - WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!recv_wq) return -ENOMEM; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 51686d9eac5f..1342f743f1c4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -342,6 +342,7 @@ enum { struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_list; + int rl_cpu; unsigned int rl_connect_cookie; struct rpcrdma_buffer *rl_buffer; struct rpcrdma_rep *rl_reply; -- cgit v1.2.3 From dc4fd9ab01ab379ae5af522b3efd4187a7c30a31 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 8 Dec 2017 16:00:12 -0500 Subject: nfs: don't wait on commit in nfs_commit_inode() if there were no commit requests If there were no commit requests, then nfs_commit_inode() should not wait on the commit or mark the inode dirty, otherwise the following BUG_ON can be triggered: [ 1917.130762] kernel BUG at fs/inode.c:578! [ 1917.130766] Oops: Exception in kernel mode, sig: 5 [#1] [ 1917.130768] SMP NR_CPUS=2048 NUMA pSeries [ 1917.130772] Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi blocklayoutdriver rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc sg nx_crypto pseries_rng ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_common ibmvscsi scsi_transport_srp ibmveth scsi_tgt dm_mirror dm_region_hash dm_log dm_mod [ 1917.130805] CPU: 2 PID: 14923 Comm: umount.nfs4 Tainted: G ------------ T 3.10.0-768.el7.ppc64 #1 [ 1917.130810] task: c0000005ecd88040 ti: c00000004cea0000 task.ti: c00000004cea0000 [ 1917.130813] NIP: c000000000354178 LR: c000000000354160 CTR: c00000000012db80 [ 1917.130816] REGS: c00000004cea3720 TRAP: 0700 Tainted: G ------------ T (3.10.0-768.el7.ppc64) [ 1917.130820] MSR: 8000000100029032 CR: 22002822 XER: 20000000 [ 1917.130828] CFAR: c00000000011f594 SOFTE: 1 GPR00: c000000000354160 c00000004cea39a0 c0000000014c4700 c0000000018cc750 GPR04: 000000000000c750 80c0000000000000 0600000000000000 04eeb76bea749a03 GPR08: 0000000000000034 c0000000018cc758 0000000000000001 d000000005e619e8 GPR12: c00000000012db80 c000000007b31200 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 c000000000dfc3ec 0000000000000000 c0000005eefc02c0 GPR28: d0000000079dbd50 c0000005b94a02c0 c0000005b94a0250 c0000005b94a01c8 [ 1917.130867] NIP [c000000000354178] .evict+0x1c8/0x350 [ 1917.130871] LR [c000000000354160] .evict+0x1b0/0x350 [ 1917.130873] Call Trace: [ 1917.130876] [c00000004cea39a0] [c000000000354160] .evict+0x1b0/0x350 (unreliable) [ 1917.130880] [c00000004cea3a30] [c0000000003558cc] .evict_inodes+0x13c/0x270 [ 1917.130884] [c00000004cea3af0] [c000000000327d20] .kill_anon_super+0x70/0x1e0 [ 1917.130896] [c00000004cea3b80] [d000000005e43e30] .nfs_kill_super+0x20/0x60 [nfs] [ 1917.130900] [c00000004cea3c00] [c000000000328a20] .deactivate_locked_super+0xa0/0x1b0 [ 1917.130903] [c00000004cea3c80] [c00000000035ba54] .cleanup_mnt+0xd4/0x180 [ 1917.130907] [c00000004cea3d10] [c000000000119034] .task_work_run+0x114/0x150 [ 1917.130912] [c00000004cea3db0] [c00000000001ba6c] .do_notify_resume+0xcc/0x100 [ 1917.130916] [c00000004cea3e30] [c00000000000a7b0] .ret_from_except_lite+0x5c/0x60 [ 1917.130919] Instruction dump: [ 1917.130921] 7fc3f378 486734b5 60000000 387f00a0 38800003 4bdcb365 60000000 e95f00a0 [ 1917.130927] 694a0060 7d4a0074 794ad182 694a0001 <0b0a0000> 892d02a4 2f890000 40de0134 Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5b5f464f6f2a..4a379d7918f2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how) if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); nfs_commit_end(cinfo.mds); + if (res == 0) + return res; if (error < 0) goto out_error; if (!may_wait) -- cgit v1.2.3 From 90d91b0cd371193d9dbfa9beacab8ab9a4cb75e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Dec 2017 21:24:08 -0500 Subject: SUNRPC: Fix a race in the receive code path We must ensure that the call to rpc_sleep_on() in xprt_transmit() cannot race with the call to xprt_complete_rqst(). Reported-by: Chuck Lever Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=317 Fixes: ce7c252a8c74 ("SUNRPC: Add a separate spinlock to protect..") Cc: stable@vger.kernel.org # 4.14+ Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 02a9bacb239b..5b06f6906a27 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1001,6 +1001,7 @@ void xprt_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + unsigned int connect_cookie; int status, numreqs; dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); @@ -1024,6 +1025,7 @@ void xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; + connect_cookie = xprt->connect_cookie; req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); trace_xprt_transmit(xprt, req->rq_xid, status); @@ -1047,20 +1049,28 @@ void xprt_transmit(struct rpc_task *task) xprt->stat.bklog_u += xprt->backlog.qlen; xprt->stat.sending_u += xprt->sending.qlen; xprt->stat.pending_u += xprt->pending.qlen; + spin_unlock_bh(&xprt->transport_lock); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else { + req->rq_connect_cookie = connect_cookie; + if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) { /* - * Sleep on the pending queue since - * we're expecting a reply. + * Sleep on the pending queue if we're expecting a reply. + * The spinlock ensures atomicity between the test of + * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on(). */ - if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) + spin_lock(&xprt->recv_lock); + if (!req->rq_reply_bytes_recvd) { rpc_sleep_on(&xprt->pending, task, xprt_timer); - req->rq_connect_cookie = xprt->connect_cookie; + /* + * Send an extra queue wakeup call if the + * connection was dropped in case the call to + * rpc_sleep_on() raced. + */ + if (!xprt_connected(xprt)) + xprt_wake_pending_tasks(xprt, -ENOTCONN); + } + spin_unlock(&xprt->recv_lock); } - spin_unlock_bh(&xprt->transport_lock); } static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task) -- cgit v1.2.3 From 343723dd51ef1025a860e54df9472b5ba21ee3d9 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 15 Dec 2017 12:40:12 +0100 Subject: net: sched: fix clsact init error path Since in qdisc_create, the destroy op is called when init fails, we don't do cleanup in init and leave it up to destroy. This fixes use-after-free when trying to put already freed block. Fixes: 6e40cf2d4dee ("net: sched: use extended variants of block_get/put in ingress and clsact qdiscs") Signed-off-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 4 ++-- net/sched/sch_ingress.c | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f40256a3e7f0..b91ea03e3afa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -351,6 +351,8 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, { struct tcf_chain *chain; + if (!block) + return; /* Hold a refcnt for all chains, except 0, so that they don't disappear * while we are iterating. */ @@ -377,8 +379,6 @@ void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; - if (!block) - return; tcf_block_put_ext(block, block->q, &ei); } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 5ecc38f35d47..5e1cd2e5df87 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -190,7 +190,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) - goto err_egress_block_get; + return err; net_inc_ingress_queue(); net_inc_egress_queue(); @@ -198,10 +198,6 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_CPUSTATS; return 0; - -err_egress_block_get: - tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); - return err; } static void clsact_destroy(struct Qdisc *sch) -- cgit v1.2.3 From b59e6979a86384e68b0ab6ffeab11f0034fba82d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 15 Dec 2017 12:40:13 +0100 Subject: net: sched: fix static key imbalance in case of ingress/clsact_init error Move static key increments to the beginning of the init function so they pair 1:1 with decrements in ingress/clsact_destroy, which is called in case ingress/clsact_init fails. Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Signed-off-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_ingress.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 5e1cd2e5df87..fc1286f499c1 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -68,6 +68,8 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; + net_inc_ingress_queue(); + mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; @@ -78,7 +80,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) if (err) return err; - net_inc_ingress_queue(); sch->flags |= TCQ_F_CPUSTATS; return 0; @@ -172,6 +173,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; + net_inc_ingress_queue(); + net_inc_egress_queue(); + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; @@ -192,9 +196,6 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) if (err) return err; - net_inc_ingress_queue(); - net_inc_egress_queue(); - sch->flags |= TCQ_F_CPUSTATS; return 0; -- cgit v1.2.3 From 043ee1debd0b29c16c4c4b11a348ca667bfe9144 Mon Sep 17 00:00:00 2001 From: Hemanth Puranik Date: Fri, 15 Dec 2017 20:05:58 +0530 Subject: net: qcom/emac: Reduce timeout for mdio read/write Currently mdio read/write takes around ~115us as the timeout between status check is set to 100us. By reducing the timeout to 1us mdio read/write takes ~15us to complete. This improves the link up event response. Signed-off-by: Hemanth Puranik Acked-by: Timur Tabi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-phy.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c index 18461fcb9815..53dbf1e163a8 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -47,6 +47,7 @@ #define MDIO_CLK_25_28 7 #define MDIO_WAIT_TIMES 1000 +#define MDIO_STATUS_DELAY_TIME 1 static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) { @@ -65,7 +66,7 @@ static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, !(reg & (MDIO_START | MDIO_BUSY)), - 100, MDIO_WAIT_TIMES * 100)) + MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100)) return -EIO; return (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK; @@ -88,8 +89,8 @@ static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) writel(reg, adpt->base + EMAC_MDIO_CTRL); if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, - !(reg & (MDIO_START | MDIO_BUSY)), 100, - MDIO_WAIT_TIMES * 100)) + !(reg & (MDIO_START | MDIO_BUSY)), + MDIO_STATUS_DELAY_TIME, MDIO_WAIT_TIMES * 100)) return -EIO; return 0; -- cgit v1.2.3 From caea4f384858ee7861367920df36995e7acfe160 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 15 Dec 2017 16:21:50 +0100 Subject: drm/sun4i: validate modes for HDMI When I connected my cubieboard running 4.15-rc1 to my 4k display I got no picture. Some digging found that there is no check against the upper pixelclock limit of the HDMI output, so X selects a 4kp60 format at 594 MHz, which obviously won't work. The patch below adds a check for the upper bound of what this hardware can do, and it checks if the requested tmds clock can be obtained. It also allows for the +/- 0.5% pixel clock variation that the HDMI spec permits. That code is based on commit 22d0be2a557e ("drm: arcpgu: Allow some clock deviation in crtc->mode_valid() callback") from Jose Abreu for drm/arc. Signed-off-by: Hans Verkuil Thanks-to: Jose Abreu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/162854cb-c7bd-d9ce-9fa0-9a6cd89c621b@xs4all.nl --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index dda904ec0534..c12f9bd12904 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -208,8 +208,27 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector) return ret; } +static int sun4i_hdmi_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); + long rate = mode->clock * 1000; + long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (max(rounded_rate, rate) - min(rounded_rate, rate) < diff && + rounded_rate > 0) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = { .get_modes = sun4i_hdmi_get_modes, + .mode_valid = sun4i_hdmi_mode_valid, }; static enum drm_connector_status -- cgit v1.2.3 From fdf2e821052958a114618a95ab18a300d0b080cb Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 11:51:40 +0100 Subject: mtd: nand: gpmi: Fix failure when a erased page has a bitflip at BBM When erased subpages are read then the BCH decoder returns STATUS_ERASED if they are all empty, or STATUS_UNCORRECTABLE if there are bitflips. When there are bitflips, we have to set these bits again to show the upper layers a completely erased page. When a bitflip happens in the exact byte where the bad block marker is, then this byte is swapped with another byte in block_mark_swapping(). The correction code then detects a bitflip in another subpage and no longer corrects the bitflip where it really happens. Correct this behaviour by calling block_mark_swapping() after the bitflips have been corrected. In our case UBIFS failed with this bug because it expects erased pages to be really empty: UBIFS error (pid 187): ubifs_scan: corrupt empty space at LEB 36:118735 UBIFS error (pid 187): ubifs_scanned_corruption: corruption at LEB 36:118735 UBIFS error (pid 187): ubifs_scanned_corruption: first 8192 bytes from LEB 36:118735 UBIFS error (pid 187): ubifs_scan: LEB 36 scanning failed UBIFS error (pid 187): do_commit: commit failed, error -117 Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Acked-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 50f8d4a1b983..d4d824ef64e9 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, return ret; } - /* handle the block mark swapping */ - block_mark_swapping(this, payload_virt, auxiliary_virt); - /* Loop over status bytes, accumulating ECC status. */ status = auxiliary_virt + nfc_geo->auxiliary_status_offset; @@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, max_bitflips = max_t(unsigned int, max_bitflips, *status); } + /* handle the block mark swapping */ + block_mark_swapping(this, buf, auxiliary_virt); + if (oob_required) { /* * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob() -- cgit v1.2.3 From e44b9a9c135727f3410e029910275f40681dc8bc Mon Sep 17 00:00:00 2001 From: Albert Hsieh Date: Mon, 20 Nov 2017 11:26:26 +0800 Subject: mtd: nand: brcmnand: Zero bitflip is not an error A negative return value of brcmstb_nand_verify_erased_page() indicates a real bitflip error of an erased page, and other return values (>= 0) show the corrected bitflip number. Zero return value means no bitflip, but the current driver code treats it as an error, and eventually leads to falsely reported ECC error. Fixes: 02b88eea9f9c ("mtd: brcmnand: Add check for erased page bitflip") Signed-off-by: Albert Hsieh Acked-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/nand/brcmnand/brcmnand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e0eb51d8c012..dd56a671ea42 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -1763,7 +1763,7 @@ try_dmaread: err = brcmstb_nand_verify_erased_page(mtd, chip, buf, addr); /* erased page bitflips corrected */ - if (err > 0) + if (err >= 0) return err; } -- cgit v1.2.3 From bc2fd1b11097ad981478abcc0328784ea131ac29 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 6 Dec 2017 18:27:14 +0100 Subject: mtd: nand: gpio: Fix ALE gpio configuration Fixes a copy/paste error in commit f3d0d8d938b4d ("mtd: nand: gpio: Convert to use GPIO descriptors") which breaks gpio-nand driver Fixes: f3d0d8d938b4d ("mtd: nand: gpio: Convert to use GPIO descriptors") Cc: Linus Walleij Signed-off-by: Christophe Leroy Reviewed-by: Richard Weinberger Acked-by: Boris Brezillon Reviewed-by: Linus Walleij Signed-off-by: Richard Weinberger --- drivers/mtd/nand/gpio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c index 484f7fbc3f7d..a8bde6665c24 100644 --- a/drivers/mtd/nand/gpio.c +++ b/drivers/mtd/nand/gpio.c @@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev) goto out_ce; } - gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); - if (IS_ERR(gpiomtd->nwp)) { - ret = PTR_ERR(gpiomtd->nwp); + gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW); + if (IS_ERR(gpiomtd->ale)) { + ret = PTR_ERR(gpiomtd->ale); goto out_ce; } -- cgit v1.2.3 From b2162117171864ef48d43cf5d888f3e8012c6c06 Mon Sep 17 00:00:00 2001 From: Bhawanpreet Lakha Date: Fri, 24 Nov 2017 17:26:28 -0500 Subject: drm/amd/display: add pipe locking before front end programing Add pipe locking/unlocking before we program the front end Signed-off-by: Bhawanpreet Lakha Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- .../amd/display/dc/dce110/dce110_hw_sequencer.c | 26 +++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 07ff8d2faf3f..d844fadcd56f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface( int num_planes, struct dc_state *context) { - int i, be_idx; + int i; if (num_planes == 0) return; - be_idx = -1; for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (stream == context->res_ctx.pipe_ctx[i].stream) { - be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst; - break; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (stream == pipe_ctx->stream) { + if (!pipe_ctx->top_pipe && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, true); } } @@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface( context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); + + dc->hwss.update_plane_addr(dc, pipe_ctx); + program_surface_visibility(dc, pipe_ctx); } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if ((stream == pipe_ctx->stream) && + (!pipe_ctx->top_pipe) && + (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) + dc->hwss.pipe_control_lock(dc, pipe_ctx, false); + } } static void dce110_power_down_fe(struct dc *dc, int fe_idx) -- cgit v1.2.3 From 56a9b95c4d3386a98f69f641dd6018886ed2e9d6 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Mon, 13 Nov 2017 17:03:53 -0500 Subject: drm/amd/display: set chroma taps to 1 when not scaling Signed-off-by: Dmytro Laktyushkin Reviewed-by: Tony Cheng Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 9 ++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3dce35e66b09..b142629a1058 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -900,6 +900,15 @@ bool dcn_validate_bandwidth( v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; + /* + * Spreadsheet doesn't handle taps_c is one properly, + * need to force Chroma to always be scaled to pass + * bandwidth validation. + */ + if (v->override_hta_pschroma[input_idx] == 1) + v->override_hta_pschroma[input_idx] = 2; + if (v->override_vta_pschroma[input_idx] == 1) + v->override_vta_pschroma[input_idx] = 2; v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; } if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 74e7c82bdc76..a9d55d0dd69e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps( scl_data->taps.h_taps = 1; if (IDENTITY_RATIO(scl_data->ratios.vert)) scl_data->taps.v_taps = 1; - /* - * Spreadsheet doesn't handle taps_c is one properly, - * need to force Chroma to always be scaled to pass - * bandwidth validation. - */ + if (IDENTITY_RATIO(scl_data->ratios.horz_c)) + scl_data->taps.h_taps_c = 1; + if (IDENTITY_RATIO(scl_data->ratios.vert_c)) + scl_data->taps.v_taps_c = 1; } return true; -- cgit v1.2.3 From 78288503199d0a33b69b972a44a4cf15df989899 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Fri, 10 Nov 2017 10:44:24 -0500 Subject: drm/amd/display: fix missing pixel clock adjustment for dongle Signed-off-by: Eric Yang Reviewed-by: Tony Cheng Reviewed-by: Andrew Jiang Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index e27ed4a45265..42a111b9505d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal, link); } -bool dp_active_dongle_validate_timing( +static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dc_dongle_caps *dongle_caps) { @@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) required_pix_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + required_pix_clk = required_pix_clk * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: -- cgit v1.2.3 From becd0875f4393a992afbf57aa323f7bf1a71c3ff Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Fri, 1 Dec 2017 13:26:05 -0500 Subject: drm/amd/display: Fix rehook MST display not light back on Original applied dm_restore_drm_connector_state() has got removed. Set link status to BAD before hotplug() event could trigger another modeset from userspace. The fix "Fix MST daisy chain SST not light up" commit makes so it is trying to create a stream prior to dc_sink. That makes dc_sink is not present in create_stream_for_sink(). Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Roman Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 +++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 + .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 51 ++++++++++++++++++++++ .../amd/display/amdgpu_dm/amdgpu_dm_mst_types.h | 1 + 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f71fe6d2ddda..bb5fa895fb64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct dm_connector_state *dm_state) { struct drm_display_mode *preferred_mode = NULL; - const struct drm_connector *drm_connector; + struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!aconnector->dc_sink) { /* - * Exclude MST from creating fake_sink - * TODO: need to enable MST into fake_sink feature + * Create dc_sink when necessary to MST + * Don't apply fake_sink to MST */ - if (aconnector->mst_port) - goto stream_create_fail; + if (aconnector->mst_port) { + dm_dp_mst_dc_sink_create(drm_connector); + goto mst_dc_sink_create_done; + } if (create_fake_sink(aconnector)) goto stream_create_fail; @@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream_create_fail: dm_state_null: drm_connector_null: +mst_dc_sink_create_done: return stream; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 117521c6a6ed..0230250a1164 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -189,6 +189,8 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; + + bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index f8efb98b1fa7..638c2c2b5cd7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector, return ret; } +void dm_dp_mst_dc_sink_create(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct edid *edid; + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); + + if (!edid) { + drm_mode_connector_update_edid_property( + &aconnector->base, + NULL); + return; + } + + aconnector->edid = edid; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, + &init_params); + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + + amdgpu_dm_add_sink_to_freesync_module( + connector, aconnector->edid); + + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_set_path_property(connector, pathprop); drm_connector_list_iter_end(&conn_iter); + aconnector->mst_connected = true; return &aconnector->base; } } @@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); + aconnector->mst_connected = true; + DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_update_edid_property( &aconnector->base, NULL); + + aconnector->mst_connected = false; } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } +static void dm_dp_mst_link_status_reset(struct drm_connector *connector) +{ + mutex_lock(&connector->dev->mode_config.mutex); + drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); +} + static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) drm_connector_register(connector); + if (aconnector->mst_connected) + dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 2da851b40042..8cf51da26657 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,5 +31,6 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); +void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif -- cgit v1.2.3 From b17d2f8d373ef648a05ad7b894722f154d6660f4 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 13 Dec 2017 18:05:35 +0100 Subject: cpufreq: ARM: sort the Kconfig menu Group all the related big LITTLE configuration together and sort the other entries in alphabetic order. Also fixing tab vs space issue while mofifying these entries. Acked-by: Viresh Kumar Signed-off-by: Gregory CLEMENT Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 81 ++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index bdce4488ded1..beb8826afbb1 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -2,6 +2,22 @@ # ARM CPU Frequency scaling drivers # +config ACPI_CPPC_CPUFREQ + tristate "CPUFreq driver based on the ACPI CPPC spec" + depends on ACPI_PROCESSOR + select ACPI_CPPC_LIB + help + This adds a CPUFreq driver which uses CPPC methods + as described in the ACPIv5.1 spec. CPPC stands for + Collaborative Processor Performance Controls. It + is based on an abstract continuous scale of CPU + performance values which allows the remote power + processor to flexibly optimize for power and + performance. CPPC relies on power management firmware + support for its operation. + + If in doubt, say N. + # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" @@ -12,6 +28,30 @@ config ARM_BIG_LITTLE_CPUFREQ help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. +config ARM_DT_BL_CPUFREQ + tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" + depends on ARM_BIG_LITTLE_CPUFREQ && OF + help + This enables probing via DT for Generic CPUfreq driver for ARM + big.LITTLE platform. This gets frequency tables from DT. + +config ARM_SCPI_CPUFREQ + tristate "SCPI based CPUfreq driver" + depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI + help + This adds the CPUfreq driver support for ARM big.LITTLE platforms + using SCPI protocol for CPU power management. + + This driver uses SCPI Message Protocol driver to interact with the + firmware providing the CPU DVFS functionality. + +config ARM_VEXPRESS_SPC_CPUFREQ + tristate "Versatile Express SPC based CPUfreq driver" + depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC + help + This add the CPUfreq driver support for Versatile Express + big.LITTLE platforms using SPC for power management. + config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on ARCH_BRCMSTB || COMPILE_TEST @@ -33,20 +73,6 @@ config ARM_BRCMSTB_AVS_CPUFREQ_DEBUG If in doubt, say N. -config ARM_DT_BL_CPUFREQ - tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver" - depends on ARM_BIG_LITTLE_CPUFREQ && OF - help - This enables probing via DT for Generic CPUfreq driver for ARM - big.LITTLE platform. This gets frequency tables from DT. - -config ARM_VEXPRESS_SPC_CPUFREQ - tristate "Versatile Express SPC based CPUfreq driver" - depends on ARM_BIG_LITTLE_CPUFREQ && ARCH_VEXPRESS_SPC - help - This add the CPUfreq driver support for Versatile Express - big.LITTLE platforms using SPC for power management. - config ARM_EXYNOS5440_CPUFREQ tristate "SAMSUNG EXYNOS5440" depends on SOC_EXYNOS5440 @@ -205,16 +231,6 @@ config ARM_SA1100_CPUFREQ config ARM_SA1110_CPUFREQ bool -config ARM_SCPI_CPUFREQ - tristate "SCPI based CPUfreq driver" - depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI - help - This adds the CPUfreq driver support for ARM big.LITTLE platforms - using SCPI protocol for CPU power management. - - This driver uses SCPI Message Protocol driver to interact with the - firmware providing the CPU DVFS functionality. - config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR @@ -275,20 +291,3 @@ config ARM_PXA2xx_CPUFREQ This add the CPUFreq driver support for Intel PXA2xx SOCs. If in doubt, say N. - -config ACPI_CPPC_CPUFREQ - tristate "CPUFreq driver based on the ACPI CPPC spec" - depends on ACPI_PROCESSOR - select ACPI_CPPC_LIB - default n - help - This adds a CPUFreq driver which uses CPPC methods - as described in the ACPIv5.1 spec. CPPC stands for - Collaborative Processor Performance Controls. It - is based on an abstract continuous scale of CPU - performance values which allows the remote power - processor to flexibly optimize for power and - performance. CPPC relies on power management firmware - support for its operation. - - If in doubt, say N. -- cgit v1.2.3 From 16630642f174cfc9b169c39cc74ec8e1360a9c55 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 13 Dec 2017 18:05:36 +0100 Subject: cpufreq: sort the drivers in ARM part Keep the driver files alphabetically sorted. Acked-by: Viresh Kumar Signed-off-by: Gregory CLEMENT Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 812f9e0d01a3..d762e76887e7 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -53,22 +53,24 @@ obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o +obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_MEDIATEK_CPUFREQ) += mediatek-cpufreq.o +obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_PXA2xx_CPUFREQ) += pxa2xx-cpufreq.o obj-$(CONFIG_PXA3xx) += pxa3xx-cpufreq.o -obj-$(CONFIG_ARM_S3C24XX_CPUFREQ) += s3c24xx-cpufreq.o -obj-$(CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS) += s3c24xx-cpufreq-debugfs.o obj-$(CONFIG_ARM_S3C2410_CPUFREQ) += s3c2410-cpufreq.o obj-$(CONFIG_ARM_S3C2412_CPUFREQ) += s3c2412-cpufreq.o obj-$(CONFIG_ARM_S3C2416_CPUFREQ) += s3c2416-cpufreq.o obj-$(CONFIG_ARM_S3C2440_CPUFREQ) += s3c2440-cpufreq.o obj-$(CONFIG_ARM_S3C64XX_CPUFREQ) += s3c64xx-cpufreq.o +obj-$(CONFIG_ARM_S3C24XX_CPUFREQ) += s3c24xx-cpufreq.o +obj-$(CONFIG_ARM_S3C24XX_CPUFREQ_DEBUGFS) += s3c24xx-cpufreq-debugfs.o obj-$(CONFIG_ARM_S5PV210_CPUFREQ) += s5pv210-cpufreq.o obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o @@ -81,8 +83,6 @@ obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o obj-$(CONFIG_ARM_TEGRA186_CPUFREQ) += tegra186-cpufreq.o obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o -obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o -obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o ################################################################################## -- cgit v1.2.3 From b3371600926638842c460d652599c4dddef72da6 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 13 Dec 2017 18:29:13 +0100 Subject: cpufreq: mvebu: Free the clock reference in the normal path In case of error the clock reference was freed but not in normal path once it was nor more used. This patch fixes it. Signed-off-by: Gregory CLEMENT Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mvebu-cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index ed915ee85dd9..c043aad8e3a0 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -99,6 +99,7 @@ static int __init armada_xp_pmsu_cpufreq_init(void) if (ret) dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); + clk_put(clk); } platform_device_register_simple("cpufreq-dt", -1, NULL, 0); -- cgit v1.2.3 From 3f4590a4a371006f66385eb5b739d5d01249ee3f Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Wed, 13 Dec 2017 18:29:14 +0100 Subject: cpufreq: mvebu: Free opp if registering failed Since the introduction of this driver, the functions to remove the opp were added. So stop claiming we can't remove opp and use one of them in case of failure. Signed-off-by: Gregory CLEMENT Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/mvebu-cpufreq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index c043aad8e3a0..31513bd42705 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -76,12 +76,6 @@ static int __init armada_xp_pmsu_cpufreq_init(void) return PTR_ERR(clk); } - /* - * In case of a failure of dev_pm_opp_add(), we don't - * bother with cleaning up the registered OPP (there's - * no function to do so), and simply cancel the - * registration of the cpufreq device. - */ ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0); if (ret) { clk_put(clk); @@ -91,7 +85,8 @@ static int __init armada_xp_pmsu_cpufreq_init(void) ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0); if (ret) { clk_put(clk); - return ret; + dev_err(cpu_dev, "Failed to register OPPs\n"); + goto opp_register_failed; } ret = dev_pm_opp_set_sharing_cpus(cpu_dev, @@ -104,5 +99,11 @@ static int __init armada_xp_pmsu_cpufreq_init(void) platform_device_register_simple("cpufreq-dt", -1, NULL, 0); return 0; + +opp_register_failed: + /* As registering has failed remove all the opp for all cpus */ + dev_pm_opp_cpumask_remove_table(cpu_possible_mask); + + return ret; } device_initcall(armada_xp_pmsu_cpufreq_init); -- cgit v1.2.3 From e6ce0ce34f657795fef6536edd24df81d6f2ba81 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 15 Dec 2017 14:48:47 +0200 Subject: ACPI / LPSS: Add device link for CHT SD card dependency on I2C Some Cherry Trail boards have a dependency between the SDHCI host controller used for SD cards and an external PMIC accessed via I2C. Add a device link between the SDHCI host controller (consumer) and the I2C adapter (supplier). This patch depends on a fix to devices links, namely commit 0ff26c662d5f ("driver core: Fix device link deferred probe"). And also either, commit 126dbc6b49c8 ("PM: i2c-designware-platdrv: Clean up PM handling in probe"), or patch "PM / runtime: Fix handling of suppliers with disabled runtime PM". Signed-off-by: Adrian Hunter Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 7f2b02cc8ea1..d78c57a95b86 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -427,6 +427,142 @@ out: return 0; } +struct lpss_device_links { + const char *supplier_hid; + const char *supplier_uid; + const char *consumer_hid; + const char *consumer_uid; + u32 flags; +}; + +/* + * The _DEP method is used to identify dependencies but instead of creating + * device links for every handle in _DEP, only links in the following list are + * created. That is necessary because, in the general case, _DEP can refer to + * devices that might not have drivers, or that are on different buses, or where + * the supplier is not enumerated until after the consumer is probed. + */ +static const struct lpss_device_links lpss_device_links[] = { + {"808622C1", "7", "80860F14", "3", DL_FLAG_PM_RUNTIME}, +}; + +static bool hid_uid_match(const char *hid1, const char *uid1, + const char *hid2, const char *uid2) +{ + return !strcmp(hid1, hid2) && uid1 && uid2 && !strcmp(uid1, uid2); +} + +static bool acpi_lpss_is_supplier(struct acpi_device *adev, + const struct lpss_device_links *link) +{ + return hid_uid_match(acpi_device_hid(adev), acpi_device_uid(adev), + link->supplier_hid, link->supplier_uid); +} + +static bool acpi_lpss_is_consumer(struct acpi_device *adev, + const struct lpss_device_links *link) +{ + return hid_uid_match(acpi_device_hid(adev), acpi_device_uid(adev), + link->consumer_hid, link->consumer_uid); +} + +struct hid_uid { + const char *hid; + const char *uid; +}; + +static int match_hid_uid(struct device *dev, void *data) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + struct hid_uid *id = data; + + if (!adev) + return 0; + + return hid_uid_match(acpi_device_hid(adev), acpi_device_uid(adev), + id->hid, id->uid); +} + +static struct device *acpi_lpss_find_device(const char *hid, const char *uid) +{ + struct hid_uid data = { + .hid = hid, + .uid = uid, + }; + + return bus_find_device(&platform_bus_type, NULL, &data, match_hid_uid); +} + +static bool acpi_lpss_dep(struct acpi_device *adev, acpi_handle handle) +{ + struct acpi_handle_list dep_devices; + acpi_status status; + int i; + + if (!acpi_has_method(adev->handle, "_DEP")) + return false; + + status = acpi_evaluate_reference(adev->handle, "_DEP", NULL, + &dep_devices); + if (ACPI_FAILURE(status)) { + dev_dbg(&adev->dev, "Failed to evaluate _DEP.\n"); + return false; + } + + for (i = 0; i < dep_devices.count; i++) { + if (dep_devices.handles[i] == handle) + return true; + } + + return false; +} + +static void acpi_lpss_link_consumer(struct device *dev1, + const struct lpss_device_links *link) +{ + struct device *dev2; + + dev2 = acpi_lpss_find_device(link->consumer_hid, link->consumer_uid); + if (!dev2) + return; + + if (acpi_lpss_dep(ACPI_COMPANION(dev2), ACPI_HANDLE(dev1))) + device_link_add(dev2, dev1, link->flags); + + put_device(dev2); +} + +static void acpi_lpss_link_supplier(struct device *dev1, + const struct lpss_device_links *link) +{ + struct device *dev2; + + dev2 = acpi_lpss_find_device(link->supplier_hid, link->supplier_uid); + if (!dev2) + return; + + if (acpi_lpss_dep(ACPI_COMPANION(dev1), ACPI_HANDLE(dev2))) + device_link_add(dev1, dev2, link->flags); + + put_device(dev2); +} + +static void acpi_lpss_create_device_links(struct acpi_device *adev, + struct platform_device *pdev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lpss_device_links); i++) { + const struct lpss_device_links *link = &lpss_device_links[i]; + + if (acpi_lpss_is_supplier(adev, link)) + acpi_lpss_link_consumer(&pdev->dev, link); + + if (acpi_lpss_is_consumer(adev, link)) + acpi_lpss_link_supplier(&pdev->dev, link); + } +} + static int acpi_lpss_create_device(struct acpi_device *adev, const struct acpi_device_id *id) { @@ -500,6 +636,7 @@ static int acpi_lpss_create_device(struct acpi_device *adev, adev->driver_data = pdata; pdev = acpi_create_platform_device(adev, dev_desc->properties); if (!IS_ERR_OR_NULL(pdev)) { + acpi_lpss_create_device_links(adev, pdev); return 1; } -- cgit v1.2.3 From 846583ce151644b8538ca6cdf3f0857a80787649 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Wed, 29 Nov 2017 17:20:57 +0100 Subject: PNP: pnpbios: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: drivers/pnp/pnpbios/core.c:584:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Reviewed-by: Jean Delvare Signed-off-by: Rafael J. Wysocki --- drivers/pnp/pnpbios/core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index e681140b85d8..077f334fdbae 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -581,10 +581,7 @@ static int __init pnpbios_thread_init(void) init_completion(&unload_sem); task = kthread_run(pnp_dock_thread, NULL, "kpnpbiosd"); - if (IS_ERR(task)) - return PTR_ERR(task); - - return 0; + return PTR_ERR_OR_ZERO(task); } /* Start the kernel thread later: */ -- cgit v1.2.3 From 099caa9137624e69d936a62ce585d1adaec385ab Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 12 Dec 2017 19:37:27 -0500 Subject: ACPI: GED: unregister interrupts during shutdown Some GED interrupts could be pending by the time we are doing a reboot. Even though GED driver uses devm_request_irq() to register the interrupt handler, the handler is not being freed on time during a shutdown since the driver is missing a shutdown callback. If the ACPI handler is no longer available, this causes an interrupt storm and delays shutdown. 1. Don't use devm family of functions for IRQ registration/free 2. Keep track of the events since free_irq() requires the dev_id parameter passed into the request_irq() function. 3. Call free_irq() on both remove and shutdown explicitly. Signed-off-by: Sinan Kaya Signed-off-by: Rafael J. Wysocki --- drivers/acpi/evged.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c index 46f060356a22..f13ba2c07667 100644 --- a/drivers/acpi/evged.c +++ b/drivers/acpi/evged.c @@ -49,6 +49,11 @@ #define MODULE_NAME "acpi-ged" +struct acpi_ged_device { + struct device *dev; + struct list_head event_list; +}; + struct acpi_ged_event { struct list_head node; struct device *dev; @@ -76,7 +81,8 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, unsigned int irq; unsigned int gsi; unsigned int irqflags = IRQF_ONESHOT; - struct device *dev = context; + struct acpi_ged_device *geddev = context; + struct device *dev = geddev->dev; acpi_handle handle = ACPI_HANDLE(dev); acpi_handle evt_handle; struct resource r; @@ -102,8 +108,6 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, return AE_ERROR; } - dev_info(dev, "GED listening GSI %u @ IRQ %u\n", gsi, irq); - event = devm_kzalloc(dev, sizeof(*event), GFP_KERNEL); if (!event) return AE_ERROR; @@ -116,29 +120,58 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, if (r.flags & IORESOURCE_IRQ_SHAREABLE) irqflags |= IRQF_SHARED; - if (devm_request_threaded_irq(dev, irq, NULL, acpi_ged_irq_handler, - irqflags, "ACPI:Ged", event)) { + if (request_threaded_irq(irq, NULL, acpi_ged_irq_handler, + irqflags, "ACPI:Ged", event)) { dev_err(dev, "failed to setup event handler for irq %u\n", irq); return AE_ERROR; } + dev_dbg(dev, "GED listening GSI %u @ IRQ %u\n", gsi, irq); + list_add_tail(&event->node, &geddev->event_list); return AE_OK; } static int ged_probe(struct platform_device *pdev) { + struct acpi_ged_device *geddev; acpi_status acpi_ret; + geddev = devm_kzalloc(&pdev->dev, sizeof(*geddev), GFP_KERNEL); + if (!geddev) + return -ENOMEM; + + geddev->dev = &pdev->dev; + INIT_LIST_HEAD(&geddev->event_list); acpi_ret = acpi_walk_resources(ACPI_HANDLE(&pdev->dev), "_CRS", - acpi_ged_request_interrupt, &pdev->dev); + acpi_ged_request_interrupt, geddev); if (ACPI_FAILURE(acpi_ret)) { dev_err(&pdev->dev, "unable to parse the _CRS record\n"); return -EINVAL; } + platform_set_drvdata(pdev, geddev); return 0; } +static void ged_shutdown(struct platform_device *pdev) +{ + struct acpi_ged_device *geddev = platform_get_drvdata(pdev); + struct acpi_ged_event *event, *next; + + list_for_each_entry_safe(event, next, &geddev->event_list, node) { + free_irq(event->irq, event); + list_del(&event->node); + dev_dbg(geddev->dev, "GED releasing GSI %u @ IRQ %u\n", + event->gsi, event->irq); + } +} + +static int ged_remove(struct platform_device *pdev) +{ + ged_shutdown(pdev); + return 0; +} + static const struct acpi_device_id ged_acpi_ids[] = { {"ACPI0013"}, {}, @@ -146,6 +179,8 @@ static const struct acpi_device_id ged_acpi_ids[] = { static struct platform_driver ged_driver = { .probe = ged_probe, + .remove = ged_remove, + .shutdown = ged_shutdown, .driver = { .name = MODULE_NAME, .acpi_match_table = ACPI_PTR(ged_acpi_ids), -- cgit v1.2.3 From f6f3732162b5ae3c771b9285a5a32d72b8586920 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 15 Dec 2017 18:53:22 -0800 Subject: Revert "mm: replace p??_write with pte_access_permitted in fault + gup paths" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commits 5c9d2d5c269c, c7da82b894e9, and e7fe7b5cae90. We'll probably need to revisit this, but basically we should not complicate the get_user_pages_fast() case, and checking the actual page table protection key bits will require more care anyway, since the protection keys depend on the exact state of the VM in question. Particularly when doing a "remote" page lookup (ie in somebody elses VM, not your own), you need to be much more careful than this was. Dave Hansen says: "So, the underlying bug here is that we now a get_user_pages_remote() and then go ahead and do the p*_access_permitted() checks against the current PKRU. This was introduced recently with the addition of the new p??_access_permitted() calls. We have checks in the VMA path for the "remote" gups and we avoid consulting PKRU for them. This got missed in the pkeys selftests because I did a ptrace read, but not a *write*. I also didn't explicitly test it against something where a COW needed to be done" It's also not entirely clear that it makes sense to check the protection key bits at this level at all. But one possible eventual solution is to make the get_user_pages_fast() case just abort if it sees protection key bits set, which makes us fall back to the regular get_user_pages() case, which then has a vma and can do the check there if we want to. We'll see. Somewhat related to this all: what we _do_ want to do some day is to check the PAGE_USER bit - it should obviously always be set for user pages, but it would be a good check to have back. Because we have no generic way to test for it, we lost it as part of moving over from the architecture-specific x86 GUP implementation to the generic one in commit e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation"). Cc: Peter Zijlstra Cc: Dan Williams Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: "Jérôme Glisse" Cc: Andrew Morton Cc: Al Viro Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 6 ------ arch/sparc/mm/gup.c | 4 ++-- fs/dax.c | 3 +-- mm/gup.c | 2 +- mm/hmm.c | 8 ++++---- mm/huge_memory.c | 6 +++--- mm/memory.c | 8 ++++---- 7 files changed, 15 insertions(+), 22 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57d7bc92e0b8..0a6b0286c32e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } -#define pud_write pud_write -static inline int pud_write(pud_t pud) -{ - return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; -} - static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 33c0f8bb0f33..5335ba3c850e 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (!pmd_access_permitted(pmd, write)) + if (write && !pmd_write(pmd)) return 0; refs = 0; @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (!pud_access_permitted(pud, write)) + if (write && !pud_write(pud)) return 0; refs = 0; diff --git a/fs/dax.c b/fs/dax.c index 78b72c48374e..95981591977a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, if (pfn != pmd_pfn(*pmdp)) goto unlock_pmd; - if (!pmd_dirty(*pmdp) - && !pmd_access_permitted(*pmdp, WRITE)) + if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp)) goto unlock_pmd; flush_cache_page(vma, address, pfn); diff --git a/mm/gup.c b/mm/gup.c index d3fb60e5bfac..e0d82b6706d7 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { - return pte_access_permitted(pte, WRITE) || + return pte_write(pte) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); } diff --git a/mm/hmm.c b/mm/hmm.c index 3a5c172af560..ea19742a5d60 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -391,11 +391,11 @@ again: if (pmd_protnone(pmd)) return hmm_vma_walk_clear(start, end, walk); - if (!pmd_access_permitted(pmd, write_fault)) + if (write_fault && !pmd_write(pmd)) return hmm_vma_walk_clear(start, end, walk); pfn = pmd_pfn(pmd) + pte_index(addr); - flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; + flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; for (; addr < end; addr += PAGE_SIZE, i++, pfn++) pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; return 0; @@ -456,11 +456,11 @@ again: continue; } - if (!pte_access_permitted(pte, write_fault)) + if (write_fault && !pte_write(pte)) goto fault; pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; - pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; + pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; continue; fault: diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2f2f5e774902..0e7ded98d114 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, */ WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); - if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) + if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; if (pmd_present(*pmd) && pmd_devmap(*pmd)) @@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pud_lockptr(mm, pud)); - if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) + if (flags & FOLL_WRITE && !pud_write(*pud)) return NULL; if (pud_present(*pud) && pud_devmap(*pud)) @@ -1386,7 +1386,7 @@ out_unlock: */ static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) { - return pmd_access_permitted(pmd, WRITE) || + return pmd_write(pmd) || ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); } diff --git a/mm/memory.c b/mm/memory.c index cfaba6287702..ca5674cbaff2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3949,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf) if (unlikely(!pte_same(*vmf->pte, entry))) goto unlock; if (vmf->flags & FAULT_FLAG_WRITE) { - if (!pte_access_permitted(entry, WRITE)) + if (!pte_write(entry)) return do_wp_page(vmf); entry = pte_mkdirty(entry); } @@ -4014,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* NUMA case for anonymous PUDs would go here */ - if (dirty && !pud_access_permitted(orig_pud, WRITE)) { + if (dirty && !pud_write(orig_pud)) { ret = wp_huge_pud(&vmf, orig_pud); if (!(ret & VM_FAULT_FALLBACK)) return ret; @@ -4047,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); - if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { + if (dirty && !pmd_write(orig_pmd)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; @@ -4337,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma, goto out; pte = *ptep; - if (!pte_access_permitted(pte, flags & FOLL_WRITE)) + if ((flags & FOLL_WRITE) && !pte_write(pte)) goto unlock; *prot = pgprot_val(pte_pgprot(pte)); -- cgit v1.2.3 From 8878b126df769831cb2fa4088c3806538e8305f5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 9 Nov 2017 14:16:45 +0100 Subject: mtd: nand: add ->exec_op() implementation Introduce a new interface to instruct NAND controllers to send specific NAND operations. The new interface takes the form of a single method called ->exec_op(). This method is designed to replace ->cmd_ctrl(), ->cmdfunc() and ->read/write_byte/word/buf() hooks. ->exec_op() is passed a set of instructions describing the operation to execute. Each instruction has a type (ADDR, CMD, DATA, WAITRDY) and delay. The delay is here to help simple controllers wait enough time between each instruction, advanced controllers with integrated timings control can ignore these delays. Controllers that natively support complex operations (operations formed of several instructions) can use the NAND op parser infrastructure. This infrastructure allows controller drivers to describe the sequence of instructions they support (called nand_op_pattern) and a hook for each of these supported sequences. The core then tries to find the best match for a given NAND operation, and calls the associated hook. Various other helpers are also added to ease NAND controller drivers writing. This new interface should ease support of vendor specific operations in that NAND manufacturer drivers now have a way to check if the controller they are connected to supports a specific operation, and complain or refuse to probe the NAND chip when that's not the case. Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 1013 ++++++++++++++++++++++++++++++++++++++++- drivers/mtd/nand/nand_hynix.c | 9 + include/linux/mtd/rawnand.h | 368 ++++++++++++++- 3 files changed, 1364 insertions(+), 26 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 84d0a5d67e33..ab8ad9e8a8d8 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -688,6 +688,66 @@ static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo) } while (time_before(jiffies, timeo)); }; +/** + * nand_soft_waitrdy - Poll STATUS reg until RDY bit is set to 1 + * @chip: NAND chip structure + * @timeout_ms: Timeout in ms + * + * Poll the STATUS register using ->exec_op() until the RDY bit becomes 1. + * If that does not happen whitin the specified timeout, -ETIMEDOUT is + * returned. + * + * This helper is intended to be used when the controller does not have access + * to the NAND R/B pin. + * + * Be aware that calling this helper from an ->exec_op() implementation means + * ->exec_op() must be re-entrant. + * + * Return 0 if the NAND chip is ready, a negative error otherwise. + */ +int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) +{ + u8 status = 0; + int ret; + + if (!chip->exec_op) + return -ENOTSUPP; + + ret = nand_status_op(chip, NULL); + if (ret) + return ret; + + timeout_ms = jiffies + msecs_to_jiffies(timeout_ms); + do { + ret = nand_read_data_op(chip, &status, sizeof(status), true); + if (ret) + break; + + if (status & NAND_STATUS_READY) + break; + + /* + * Typical lowest execution time for a tR on most NANDs is 10us, + * use this as polling delay before doing something smarter (ie. + * deriving a delay from the timeout value, timeout_ms/ratio). + */ + udelay(10); + } while (time_before(jiffies, timeout_ms)); + + /* + * We have to exit READ_STATUS mode in order to read real data on the + * bus in case the WAITRDY instruction is preceding a DATA_IN + * instruction. + */ + nand_exit_status_op(chip); + + if (ret) + return ret; + + return status & NAND_STATUS_READY ? 0 : -ETIMEDOUT; +}; +EXPORT_SYMBOL_GPL(nand_soft_waitrdy); + /** * nand_command - [DEFAULT] Send command to NAND device * @mtd: MTD device structure @@ -1237,6 +1297,140 @@ static int nand_init_data_interface(struct nand_chip *chip) return 0; } +/** + * nand_fill_column_cycles - fill the column cycles of an address + * @chip: The NAND chip + * @addrs: Array of address cycles to fill + * @offset_in_page: The offset in the page + * + * Fills the first or the first two bytes of the @addrs field depending + * on the NAND bus width and the page size. + * + * Returns the number of cycles needed to encode the column, or a negative + * error code in case one of the arguments is invalid. + */ +static int nand_fill_column_cycles(struct nand_chip *chip, u8 *addrs, + unsigned int offset_in_page) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + + /* Make sure the offset is less than the actual page size. */ + if (offset_in_page > mtd->writesize + mtd->oobsize) + return -EINVAL; + + /* + * On small page NANDs, there's a dedicated command to access the OOB + * area, and the column address is relative to the start of the OOB + * area, not the start of the page. Asjust the address accordingly. + */ + if (mtd->writesize <= 512 && offset_in_page >= mtd->writesize) + offset_in_page -= mtd->writesize; + + /* + * The offset in page is expressed in bytes, if the NAND bus is 16-bit + * wide, then it must be divided by 2. + */ + if (chip->options & NAND_BUSWIDTH_16) { + if (WARN_ON(offset_in_page % 2)) + return -EINVAL; + + offset_in_page /= 2; + } + + addrs[0] = offset_in_page; + + /* + * Small page NANDs use 1 cycle for the columns, while large page NANDs + * need 2 + */ + if (mtd->writesize <= 512) + return 1; + + addrs[1] = offset_in_page >> 8; + + return 2; +} + +static int nand_sp_exec_read_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, void *buf, + unsigned int len) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[4]; + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_READ0, 0), + NAND_OP_ADDR(3, addrs, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tR_max), + PSEC_TO_NSEC(sdr->tRR_min)), + NAND_OP_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + int ret; + + /* Drop the DATA_IN instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + if (offset_in_page >= mtd->writesize) + instrs[0].ctx.cmd.opcode = NAND_CMD_READOOB; + else if (offset_in_page >= 256 && + !(chip->options & NAND_BUSWIDTH_16)) + instrs[0].ctx.cmd.opcode = NAND_CMD_READ1; + + ret = nand_fill_column_cycles(chip, addrs, offset_in_page); + if (ret < 0) + return ret; + + addrs[1] = page; + addrs[2] = page >> 8; + + if (chip->options & NAND_ROW_ADDR_3) { + addrs[3] = page >> 16; + instrs[1].ctx.addr.naddrs++; + } + + return nand_exec_op(chip, &op); +} + +static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, void *buf, + unsigned int len) +{ + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[5]; + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_READ0, 0), + NAND_OP_ADDR(4, addrs, 0), + NAND_OP_CMD(NAND_CMD_READSTART, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tR_max), + PSEC_TO_NSEC(sdr->tRR_min)), + NAND_OP_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + int ret; + + /* Drop the DATA_IN instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + ret = nand_fill_column_cycles(chip, addrs, offset_in_page); + if (ret < 0) + return ret; + + addrs[2] = page; + addrs[3] = page >> 8; + + if (chip->options & NAND_ROW_ADDR_3) { + addrs[4] = page >> 16; + instrs[1].ctx.addr.naddrs++; + } + + return nand_exec_op(chip, &op); +} + /** * nand_read_page_op - Do a READ PAGE operation * @chip: The NAND chip @@ -1261,6 +1455,16 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; + if (chip->exec_op) { + if (mtd->writesize > 512) + return nand_lp_exec_read_page_op(chip, page, + offset_in_page, buf, + len); + + return nand_sp_exec_read_page_op(chip, page, offset_in_page, + buf, len); + } + chip->cmdfunc(mtd, NAND_CMD_READ0, offset_in_page, page); if (len) chip->read_buf(mtd, buf, len); @@ -1291,6 +1495,25 @@ static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf, if (len && !buf) return -EINVAL; + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_PARAM, 0), + NAND_OP_ADDR(1, &page, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tR_max), + PSEC_TO_NSEC(sdr->tRR_min)), + NAND_OP_8BIT_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + /* Drop the DATA_IN instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_PARAM, page, -1); for (i = 0; i < len; i++) p[i] = chip->read_byte(mtd); @@ -1323,6 +1546,37 @@ int nand_change_read_column_op(struct nand_chip *chip, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; + /* Small page NANDs do not support column change. */ + if (mtd->writesize <= 512) + return -ENOTSUPP; + + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[2] = {}; + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_RNDOUT, 0), + NAND_OP_ADDR(2, addrs, 0), + NAND_OP_CMD(NAND_CMD_RNDOUTSTART, + PSEC_TO_NSEC(sdr->tCCS_min)), + NAND_OP_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + int ret; + + ret = nand_fill_column_cycles(chip, addrs, offset_in_page); + if (ret < 0) + return ret; + + /* Drop the DATA_IN instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + instrs[3].ctx.data.force_8bit = force_8bit; + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset_in_page, -1); if (len) chip->read_buf(mtd, buf, len); @@ -1355,6 +1609,11 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page, if (offset_in_oob + len > mtd->oobsize) return -EINVAL; + if (chip->exec_op) + return nand_read_page_op(chip, page, + mtd->writesize + offset_in_oob, + buf, len); + chip->cmdfunc(mtd, NAND_CMD_READOOB, offset_in_oob, page); if (len) chip->read_buf(mtd, buf, len); @@ -1363,6 +1622,81 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page, } EXPORT_SYMBOL_GPL(nand_read_oob_op); +static int nand_exec_prog_page_op(struct nand_chip *chip, unsigned int page, + unsigned int offset_in_page, const void *buf, + unsigned int len, bool prog) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[5] = {}; + struct nand_op_instr instrs[] = { + /* + * The first instruction will be dropped if we're dealing + * with a large page NAND and adjusted if we're dealing + * with a small page NAND and the page offset is > 255. + */ + NAND_OP_CMD(NAND_CMD_READ0, 0), + NAND_OP_CMD(NAND_CMD_SEQIN, 0), + NAND_OP_ADDR(0, addrs, PSEC_TO_NSEC(sdr->tADL_min)), + NAND_OP_DATA_OUT(len, buf, 0), + NAND_OP_CMD(NAND_CMD_PAGEPROG, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tPROG_max), 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + int naddrs = nand_fill_column_cycles(chip, addrs, offset_in_page); + int ret; + u8 status; + + if (naddrs < 0) + return naddrs; + + addrs[naddrs++] = page; + addrs[naddrs++] = page >> 8; + if (chip->options & NAND_ROW_ADDR_3) + addrs[naddrs++] = page >> 16; + + instrs[2].ctx.addr.naddrs = naddrs; + + /* Drop the last two instructions if we're not programming the page. */ + if (!prog) { + op.ninstrs -= 2; + /* Also drop the DATA_OUT instruction if empty. */ + if (!len) + op.ninstrs--; + } + + if (mtd->writesize <= 512) { + /* + * Small pages need some more tweaking: we have to adjust the + * first instruction depending on the page offset we're trying + * to access. + */ + if (offset_in_page >= mtd->writesize) + instrs[0].ctx.cmd.opcode = NAND_CMD_READOOB; + else if (offset_in_page >= 256 && + !(chip->options & NAND_BUSWIDTH_16)) + instrs[0].ctx.cmd.opcode = NAND_CMD_READ1; + } else { + /* + * Drop the first command if we're dealing with a large page + * NAND. + */ + op.instrs++; + op.ninstrs--; + } + + ret = nand_exec_op(chip, &op); + if (!prog || ret) + return ret; + + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + return status; +} + /** * nand_prog_page_begin_op - starts a PROG PAGE operation * @chip: The NAND chip @@ -1388,6 +1722,10 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; + if (chip->exec_op) + return nand_exec_prog_page_op(chip, page, offset_in_page, buf, + len, false); + chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page); if (buf) @@ -1409,11 +1747,35 @@ EXPORT_SYMBOL_GPL(nand_prog_page_begin_op); int nand_prog_page_end_op(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); - int status; + int ret; + u8 status; - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_PAGEPROG, + PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tPROG_max), 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + ret = nand_exec_op(chip, &op); + if (ret) + return ret; + + ret = nand_status_op(chip, &status); + if (ret) + return ret; + } else { + chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + ret = chip->waitfunc(mtd, chip); + if (ret < 0) + return ret; + + status = ret; + } - status = chip->waitfunc(mtd, chip); if (status & NAND_STATUS_FAIL) return -EIO; @@ -1447,11 +1809,16 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; - chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page); - chip->write_buf(mtd, buf, len); - chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + if (chip->exec_op) { + status = nand_exec_prog_page_op(chip, page, offset_in_page, buf, + len, true); + } else { + chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page); + chip->write_buf(mtd, buf, len); + chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1); + status = chip->waitfunc(mtd, chip); + } - status = chip->waitfunc(mtd, chip); if (status & NAND_STATUS_FAIL) return -EIO; @@ -1485,6 +1852,35 @@ int nand_change_write_column_op(struct nand_chip *chip, if (offset_in_page + len > mtd->writesize + mtd->oobsize) return -EINVAL; + /* Small page NANDs do not support column change. */ + if (mtd->writesize <= 512) + return -ENOTSUPP; + + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[2]; + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_RNDIN, 0), + NAND_OP_ADDR(2, addrs, PSEC_TO_NSEC(sdr->tCCS_min)), + NAND_OP_DATA_OUT(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + int ret; + + ret = nand_fill_column_cycles(chip, addrs, offset_in_page); + if (ret < 0) + return ret; + + instrs[2].ctx.data.force_8bit = force_8bit; + + /* Drop the DATA_OUT instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset_in_page, -1); if (len) chip->write_buf(mtd, buf, len); @@ -1516,6 +1912,23 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf, if (len && !buf) return -EINVAL; + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_READID, 0), + NAND_OP_ADDR(1, &addr, PSEC_TO_NSEC(sdr->tADL_min)), + NAND_OP_8BIT_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + /* Drop the DATA_IN instruction if len is set to 0. */ + if (!len) + op.ninstrs--; + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_READID, addr, -1); for (i = 0; i < len; i++) @@ -1540,6 +1953,22 @@ int nand_status_op(struct nand_chip *chip, u8 *status) { struct mtd_info *mtd = nand_to_mtd(chip); + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_STATUS, + PSEC_TO_NSEC(sdr->tADL_min)), + NAND_OP_8BIT_DATA_IN(1, status, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + if (!status) + op.ninstrs--; + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1); if (status) *status = chip->read_byte(mtd); @@ -1563,6 +1992,15 @@ int nand_exit_status_op(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (chip->exec_op) { + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_READ0, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1); return 0; @@ -1585,14 +2023,42 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) struct mtd_info *mtd = nand_to_mtd(chip); unsigned int page = eraseblock << (chip->phys_erase_shift - chip->page_shift); - int status; + int ret; + u8 status; - chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); - chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + u8 addrs[3] = { page, page >> 8, page >> 16 }; + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_ERASE1, 0), + NAND_OP_ADDR(2, addrs, 0), + NAND_OP_CMD(NAND_CMD_ERASE2, + PSEC_TO_MSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tBERS_max), 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); - status = chip->waitfunc(mtd, chip); - if (status < 0) - return status; + if (chip->options & NAND_ROW_ADDR_3) + instrs[1].ctx.addr.naddrs++; + + ret = nand_exec_op(chip, &op); + if (ret) + return ret; + + ret = nand_status_op(chip, &status); + if (ret) + return ret; + } else { + chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page); + chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1); + + ret = chip->waitfunc(mtd, chip); + if (ret < 0) + return ret; + + status = ret; + } if (status & NAND_STATUS_FAIL) return -EIO; @@ -1618,13 +2084,40 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature, { struct mtd_info *mtd = nand_to_mtd(chip); const u8 *params = data; - int i, status; + int i, ret; + u8 status; - chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, feature, -1); - for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) - chip->write_byte(mtd, params[i]); + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_SET_FEATURES, 0), + NAND_OP_ADDR(1, &feature, PSEC_TO_NSEC(sdr->tADL_min)), + NAND_OP_8BIT_DATA_OUT(ONFI_SUBFEATURE_PARAM_LEN, data, + PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tFEAT_max), 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + ret = nand_exec_op(chip, &op); + if (ret) + return ret; + + ret = nand_status_op(chip, &status); + if (ret) + return ret; + } else { + chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, feature, -1); + for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) + chip->write_byte(mtd, params[i]); + + ret = chip->waitfunc(mtd, chip); + if (ret < 0) + return ret; + + status = ret; + } - status = chip->waitfunc(mtd, chip); if (status & NAND_STATUS_FAIL) return -EIO; @@ -1650,6 +2143,22 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature, u8 *params = data; int i; + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_GET_FEATURES, 0), + NAND_OP_ADDR(1, &feature, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tFEAT_max), + PSEC_TO_NSEC(sdr->tRR_min)), + NAND_OP_8BIT_DATA_IN(ONFI_SUBFEATURE_PARAM_LEN, + data, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, feature, -1); for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i) params[i] = chip->read_byte(mtd); @@ -1671,6 +2180,18 @@ int nand_reset_op(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (chip->exec_op) { + const struct nand_sdr_timings *sdr = + nand_get_sdr_timings(&chip->data_interface); + struct nand_op_instr instrs[] = { + NAND_OP_CMD(NAND_CMD_RESET, PSEC_TO_NSEC(sdr->tWB_max)), + NAND_OP_WAIT_RDY(PSEC_TO_MSEC(sdr->tRST_max), 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); return 0; @@ -1698,6 +2219,17 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, if (!len || !buf) return -EINVAL; + if (chip->exec_op) { + struct nand_op_instr instrs[] = { + NAND_OP_DATA_IN(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + instrs[0].ctx.data.force_8bit = force_8bit; + + return nand_exec_op(chip, &op); + } + if (force_8bit) { u8 *p = buf; unsigned int i; @@ -1733,6 +2265,17 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf, if (!len || !buf) return -EINVAL; + if (chip->exec_op) { + struct nand_op_instr instrs[] = { + NAND_OP_DATA_OUT(len, buf, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + instrs[0].ctx.data.force_8bit = force_8bit; + + return nand_exec_op(chip, &op); + } + if (force_8bit) { const u8 *p = buf; unsigned int i; @@ -1747,6 +2290,420 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf, } EXPORT_SYMBOL_GPL(nand_write_data_op); +/** + * struct nand_op_parser_ctx - Context used by the parser + * @instrs: array of all the instructions that must be addressed + * @ninstrs: length of the @instrs array + * @subop: Sub-operation to be passed to the NAND controller + * + * This structure is used by the core to split NAND operations into + * sub-operations that can be handled by the NAND controller. + */ +struct nand_op_parser_ctx { + const struct nand_op_instr *instrs; + unsigned int ninstrs; + struct nand_subop subop; +}; + +/** + * nand_op_parser_must_split_instr - Checks if an instruction must be split + * @pat: the parser pattern element that matches @instr + * @instr: pointer to the instruction to check + * @start_offset: this is an in/out parameter. If @instr has already been + * split, then @start_offset is the offset from which to start + * (either an address cycle or an offset in the data buffer). + * Conversely, if the function returns true (ie. instr must be + * split), this parameter is updated to point to the first + * data/address cycle that has not been taken care of. + * + * Some NAND controllers are limited and cannot send X address cycles with a + * unique operation, or cannot read/write more than Y bytes at the same time. + * In this case, split the instruction that does not fit in a single + * controller-operation into two or more chunks. + * + * Returns true if the instruction must be split, false otherwise. + * The @start_offset parameter is also updated to the offset at which the next + * bundle of instruction must start (if an address or a data instruction). + */ +static bool +nand_op_parser_must_split_instr(const struct nand_op_parser_pattern_elem *pat, + const struct nand_op_instr *instr, + unsigned int *start_offset) +{ + switch (pat->type) { + case NAND_OP_ADDR_INSTR: + if (!pat->addr.maxcycles) + break; + + if (instr->ctx.addr.naddrs - *start_offset > + pat->addr.maxcycles) { + *start_offset += pat->addr.maxcycles; + return true; + } + break; + + case NAND_OP_DATA_IN_INSTR: + case NAND_OP_DATA_OUT_INSTR: + if (!pat->data.maxlen) + break; + + if (instr->ctx.data.len - *start_offset > pat->data.maxlen) { + *start_offset += pat->data.maxlen; + return true; + } + break; + + default: + break; + } + + return false; +} + +/** + * nand_op_parser_match_pat - Checks if a pattern matches the instructions + * remaining in the parser context + * @pat: the pattern to test + * @ctx: the parser context structure to match with the pattern @pat + * + * Check if @pat matches the set or a sub-set of instructions remaining in @ctx. + * Returns true if this is the case, false ortherwise. When true is returned, + * @ctx->subop is updated with the set of instructions to be passed to the + * controller driver. + */ +static bool +nand_op_parser_match_pat(const struct nand_op_parser_pattern *pat, + struct nand_op_parser_ctx *ctx) +{ + unsigned int instr_offset = ctx->subop.first_instr_start_off; + const struct nand_op_instr *end = ctx->instrs + ctx->ninstrs; + const struct nand_op_instr *instr = ctx->subop.instrs; + unsigned int i, ninstrs; + + for (i = 0, ninstrs = 0; i < pat->nelems && instr < end; i++) { + /* + * The pattern instruction does not match the operation + * instruction. If the instruction is marked optional in the + * pattern definition, we skip the pattern element and continue + * to the next one. If the element is mandatory, there's no + * match and we can return false directly. + */ + if (instr->type != pat->elems[i].type) { + if (!pat->elems[i].optional) + return false; + + continue; + } + + /* + * Now check the pattern element constraints. If the pattern is + * not able to handle the whole instruction in a single step, + * we have to split it. + * The last_instr_end_off value comes back updated to point to + * the position where we have to split the instruction (the + * start of the next subop chunk). + */ + if (nand_op_parser_must_split_instr(&pat->elems[i], instr, + &instr_offset)) { + ninstrs++; + i++; + break; + } + + instr++; + ninstrs++; + instr_offset = 0; + } + + /* + * This can happen if all instructions of a pattern are optional. + * Still, if there's not at least one instruction handled by this + * pattern, this is not a match, and we should try the next one (if + * any). + */ + if (!ninstrs) + return false; + + /* + * We had a match on the pattern head, but the pattern may be longer + * than the instructions we're asked to execute. We need to make sure + * there's no mandatory elements in the pattern tail. + */ + for (; i < pat->nelems; i++) { + if (!pat->elems[i].optional) + return false; + } + + /* + * We have a match: update the subop structure accordingly and return + * true. + */ + ctx->subop.ninstrs = ninstrs; + ctx->subop.last_instr_end_off = instr_offset; + + return true; +} + +#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) +static void nand_op_parser_trace(const struct nand_op_parser_ctx *ctx) +{ + const struct nand_op_instr *instr; + char *prefix = " "; + unsigned int i; + + pr_debug("executing subop:\n"); + + for (i = 0; i < ctx->ninstrs; i++) { + instr = &ctx->instrs[i]; + + if (instr == &ctx->subop.instrs[0]) + prefix = " ->"; + + switch (instr->type) { + case NAND_OP_CMD_INSTR: + pr_debug("%sCMD [0x%02x]\n", prefix, + instr->ctx.cmd.opcode); + break; + case NAND_OP_ADDR_INSTR: + pr_debug("%sADDR [%d cyc: %*ph]\n", prefix, + instr->ctx.addr.naddrs, + instr->ctx.addr.naddrs < 64 ? + instr->ctx.addr.naddrs : 64, + instr->ctx.addr.addrs); + break; + case NAND_OP_DATA_IN_INSTR: + pr_debug("%sDATA_IN [%d B%s]\n", prefix, + instr->ctx.data.len, + instr->ctx.data.force_8bit ? + ", force 8-bit" : ""); + break; + case NAND_OP_DATA_OUT_INSTR: + pr_debug("%sDATA_OUT [%d B%s]\n", prefix, + instr->ctx.data.len, + instr->ctx.data.force_8bit ? + ", force 8-bit" : ""); + break; + case NAND_OP_WAITRDY_INSTR: + pr_debug("%sWAITRDY [max %d ms]\n", prefix, + instr->ctx.waitrdy.timeout_ms); + break; + } + + if (instr == &ctx->subop.instrs[ctx->subop.ninstrs - 1]) + prefix = " "; + } +} +#else +static void nand_op_parser_trace(const struct nand_op_parser_ctx *ctx) +{ + /* NOP */ +} +#endif + +/** + * nand_op_parser_exec_op - exec_op parser + * @chip: the NAND chip + * @parser: patterns description provided by the controller driver + * @op: the NAND operation to address + * @check_only: when true, the function only checks if @op can be handled but + * does not execute the operation + * + * Helper function designed to ease integration of NAND controller drivers that + * only support a limited set of instruction sequences. The supported sequences + * are described in @parser, and the framework takes care of splitting @op into + * multiple sub-operations (if required) and pass them back to the ->exec() + * callback of the matching pattern if @check_only is set to false. + * + * NAND controller drivers should call this function from their own ->exec_op() + * implementation. + * + * Returns 0 on success, a negative error code otherwise. A failure can be + * caused by an unsupported operation (none of the supported patterns is able + * to handle the requested operation), or an error returned by one of the + * matching pattern->exec() hook. + */ +int nand_op_parser_exec_op(struct nand_chip *chip, + const struct nand_op_parser *parser, + const struct nand_operation *op, bool check_only) +{ + struct nand_op_parser_ctx ctx = { + .subop.instrs = op->instrs, + .instrs = op->instrs, + .ninstrs = op->ninstrs, + }; + unsigned int i; + + while (ctx.subop.instrs < op->instrs + op->ninstrs) { + int ret; + + for (i = 0; i < parser->npatterns; i++) { + const struct nand_op_parser_pattern *pattern; + + pattern = &parser->patterns[i]; + if (!nand_op_parser_match_pat(pattern, &ctx)) + continue; + + nand_op_parser_trace(&ctx); + + if (check_only) + break; + + ret = pattern->exec(chip, &ctx.subop); + if (ret) + return ret; + + break; + } + + if (i == parser->npatterns) { + pr_debug("->exec_op() parser: pattern not found!\n"); + return -ENOTSUPP; + } + + /* + * Update the context structure by pointing to the start of the + * next subop. + */ + ctx.subop.instrs = ctx.subop.instrs + ctx.subop.ninstrs; + if (ctx.subop.last_instr_end_off) + ctx.subop.instrs -= 1; + + ctx.subop.first_instr_start_off = ctx.subop.last_instr_end_off; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nand_op_parser_exec_op); + +static bool nand_instr_is_data(const struct nand_op_instr *instr) +{ + return instr && (instr->type == NAND_OP_DATA_IN_INSTR || + instr->type == NAND_OP_DATA_OUT_INSTR); +} + +static bool nand_subop_instr_is_valid(const struct nand_subop *subop, + unsigned int instr_idx) +{ + return subop && instr_idx < subop->ninstrs; +} + +static int nand_subop_get_start_off(const struct nand_subop *subop, + unsigned int instr_idx) +{ + if (instr_idx) + return 0; + + return subop->first_instr_start_off; +} + +/** + * nand_subop_get_addr_start_off - Get the start offset in an address array + * @subop: The entire sub-operation + * @instr_idx: Index of the instruction inside the sub-operation + * + * During driver development, one could be tempted to directly use the + * ->addr.addrs field of address instructions. This is wrong as address + * instructions might be split. + * + * Given an address instruction, returns the offset of the first cycle to issue. + */ +int nand_subop_get_addr_start_off(const struct nand_subop *subop, + unsigned int instr_idx) +{ + if (!nand_subop_instr_is_valid(subop, instr_idx) || + subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR) + return -EINVAL; + + return nand_subop_get_start_off(subop, instr_idx); +} +EXPORT_SYMBOL_GPL(nand_subop_get_addr_start_off); + +/** + * nand_subop_get_num_addr_cyc - Get the remaining address cycles to assert + * @subop: The entire sub-operation + * @instr_idx: Index of the instruction inside the sub-operation + * + * During driver development, one could be tempted to directly use the + * ->addr->naddrs field of a data instruction. This is wrong as instructions + * might be split. + * + * Given an address instruction, returns the number of address cycle to issue. + */ +int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, + unsigned int instr_idx) +{ + int start_off, end_off; + + if (!nand_subop_instr_is_valid(subop, instr_idx) || + subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR) + return -EINVAL; + + start_off = nand_subop_get_addr_start_off(subop, instr_idx); + + if (instr_idx == subop->ninstrs - 1 && + subop->last_instr_end_off) + end_off = subop->last_instr_end_off; + else + end_off = subop->instrs[instr_idx].ctx.addr.naddrs; + + return end_off - start_off; +} +EXPORT_SYMBOL_GPL(nand_subop_get_num_addr_cyc); + +/** + * nand_subop_get_data_start_off - Get the start offset in a data array + * @subop: The entire sub-operation + * @instr_idx: Index of the instruction inside the sub-operation + * + * During driver development, one could be tempted to directly use the + * ->data->buf.{in,out} field of data instructions. This is wrong as data + * instructions might be split. + * + * Given a data instruction, returns the offset to start from. + */ +int nand_subop_get_data_start_off(const struct nand_subop *subop, + unsigned int instr_idx) +{ + if (!nand_subop_instr_is_valid(subop, instr_idx) || + !nand_instr_is_data(&subop->instrs[instr_idx])) + return -EINVAL; + + return nand_subop_get_start_off(subop, instr_idx); +} +EXPORT_SYMBOL_GPL(nand_subop_get_data_start_off); + +/** + * nand_subop_get_data_len - Get the number of bytes to retrieve + * @subop: The entire sub-operation + * @instr_idx: Index of the instruction inside the sub-operation + * + * During driver development, one could be tempted to directly use the + * ->data->len field of a data instruction. This is wrong as data instructions + * might be split. + * + * Returns the length of the chunk of data to send/receive. + */ +int nand_subop_get_data_len(const struct nand_subop *subop, + unsigned int instr_idx) +{ + int start_off = 0, end_off; + + if (!nand_subop_instr_is_valid(subop, instr_idx) || + !nand_instr_is_data(&subop->instrs[instr_idx])) + return -EINVAL; + + start_off = nand_subop_get_data_start_off(subop, instr_idx); + + if (instr_idx == subop->ninstrs - 1 && + subop->last_instr_end_off) + end_off = subop->last_instr_end_off; + else + end_off = subop->instrs[instr_idx].ctx.data.len; + + return end_off - start_off; +} +EXPORT_SYMBOL_GPL(nand_subop_get_data_len); + /** * nand_reset - Reset and initialize a NAND device * @chip: The NAND chip @@ -4002,7 +4959,7 @@ static void nand_set_defaults(struct nand_chip *chip) chip->chip_delay = 20; /* check, if a user supplied command function given */ - if (chip->cmdfunc == NULL) + if (!chip->cmdfunc && !chip->exec_op) chip->cmdfunc = nand_command; /* check, if a user supplied wait function given */ @@ -4894,15 +5851,21 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, if (!mtd->name && mtd->dev.parent) mtd->name = dev_name(mtd->dev.parent); - if ((!chip->cmdfunc || !chip->select_chip) && !chip->cmd_ctrl) { + /* + * ->cmdfunc() is legacy and will only be used if ->exec_op() is not + * populated. + */ + if (!chip->exec_op) { /* - * Default functions assigned for chip_select() and - * cmdfunc() both expect cmd_ctrl() to be populated, - * so we need to check that that's the case + * Default functions assigned for ->cmdfunc() and + * ->select_chip() both expect ->cmd_ctrl() to be populated. */ - pr_err("chip.cmd_ctrl() callback is not provided"); - return -EINVAL; + if ((!chip->cmdfunc || !chip->select_chip) && !chip->cmd_ctrl) { + pr_err("->cmd_ctrl() should be provided\n"); + return -EINVAL; + } } + /* Set the default functions */ nand_set_defaults(chip); diff --git a/drivers/mtd/nand/nand_hynix.c b/drivers/mtd/nand/nand_hynix.c index bae0da2aa2a8..d542908a0ebb 100644 --- a/drivers/mtd/nand/nand_hynix.c +++ b/drivers/mtd/nand/nand_hynix.c @@ -81,6 +81,15 @@ static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd) { struct mtd_info *mtd = nand_to_mtd(chip); + if (chip->exec_op) { + struct nand_op_instr instrs[] = { + NAND_OP_CMD(cmd, 0), + }; + struct nand_operation op = NAND_OPERATION(instrs); + + return nand_exec_op(chip, &op); + } + chip->cmdfunc(mtd, cmd, -1, -1); return 0; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index f8f27c6801a6..469dc724f5df 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -734,6 +734,350 @@ struct nand_manufacturer_ops { void (*cleanup)(struct nand_chip *chip); }; +/** + * struct nand_op_cmd_instr - Definition of a command instruction + * @opcode: the command to issue in one cycle + */ +struct nand_op_cmd_instr { + u8 opcode; +}; + +/** + * struct nand_op_addr_instr - Definition of an address instruction + * @naddrs: length of the @addrs array + * @addrs: array containing the address cycles to issue + */ +struct nand_op_addr_instr { + unsigned int naddrs; + const u8 *addrs; +}; + +/** + * struct nand_op_data_instr - Definition of a data instruction + * @len: number of data bytes to move + * @in: buffer to fill when reading from the NAND chip + * @out: buffer to read from when writing to the NAND chip + * @force_8bit: force 8-bit access + * + * Please note that "in" and "out" are inverted from the ONFI specification + * and are from the controller perspective, so a "in" is a read from the NAND + * chip while a "out" is a write to the NAND chip. + */ +struct nand_op_data_instr { + unsigned int len; + union { + void *in; + const void *out; + } buf; + bool force_8bit; +}; + +/** + * struct nand_op_waitrdy_instr - Definition of a wait ready instruction + * @timeout_ms: maximum delay while waiting for the ready/busy pin in ms + */ +struct nand_op_waitrdy_instr { + unsigned int timeout_ms; +}; + +/** + * enum nand_op_instr_type - Definition of all instruction types + * @NAND_OP_CMD_INSTR: command instruction + * @NAND_OP_ADDR_INSTR: address instruction + * @NAND_OP_DATA_IN_INSTR: data in instruction + * @NAND_OP_DATA_OUT_INSTR: data out instruction + * @NAND_OP_WAITRDY_INSTR: wait ready instruction + */ +enum nand_op_instr_type { + NAND_OP_CMD_INSTR, + NAND_OP_ADDR_INSTR, + NAND_OP_DATA_IN_INSTR, + NAND_OP_DATA_OUT_INSTR, + NAND_OP_WAITRDY_INSTR, +}; + +/** + * struct nand_op_instr - Instruction object + * @type: the instruction type + * @cmd/@addr/@data/@waitrdy: extra data associated to the instruction. + * You'll have to use the appropriate element + * depending on @type + * @delay_ns: delay the controller should apply after the instruction has been + * issued on the bus. Most modern controllers have internal timings + * control logic, and in this case, the controller driver can ignore + * this field. + */ +struct nand_op_instr { + enum nand_op_instr_type type; + union { + struct nand_op_cmd_instr cmd; + struct nand_op_addr_instr addr; + struct nand_op_data_instr data; + struct nand_op_waitrdy_instr waitrdy; + } ctx; + unsigned int delay_ns; +}; + +/* + * Special handling must be done for the WAITRDY timeout parameter as it usually + * is either tPROG (after a prog), tR (before a read), tRST (during a reset) or + * tBERS (during an erase) which all of them are u64 values that cannot be + * divided by usual kernel macros and must be handled with the special + * DIV_ROUND_UP_ULL() macro. + */ +#define __DIVIDE(dividend, divisor) ({ \ + sizeof(dividend) == sizeof(u32) ? \ + DIV_ROUND_UP(dividend, divisor) : \ + DIV_ROUND_UP_ULL(dividend, divisor); \ + }) +#define PSEC_TO_NSEC(x) __DIVIDE(x, 1000) +#define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000) + +#define NAND_OP_CMD(id, ns) \ + { \ + .type = NAND_OP_CMD_INSTR, \ + .ctx.cmd.opcode = id, \ + .delay_ns = ns, \ + } + +#define NAND_OP_ADDR(ncycles, cycles, ns) \ + { \ + .type = NAND_OP_ADDR_INSTR, \ + .ctx.addr = { \ + .naddrs = ncycles, \ + .addrs = cycles, \ + }, \ + .delay_ns = ns, \ + } + +#define NAND_OP_DATA_IN(l, b, ns) \ + { \ + .type = NAND_OP_DATA_IN_INSTR, \ + .ctx.data = { \ + .len = l, \ + .buf.in = b, \ + .force_8bit = false, \ + }, \ + .delay_ns = ns, \ + } + +#define NAND_OP_DATA_OUT(l, b, ns) \ + { \ + .type = NAND_OP_DATA_OUT_INSTR, \ + .ctx.data = { \ + .len = l, \ + .buf.out = b, \ + .force_8bit = false, \ + }, \ + .delay_ns = ns, \ + } + +#define NAND_OP_8BIT_DATA_IN(l, b, ns) \ + { \ + .type = NAND_OP_DATA_IN_INSTR, \ + .ctx.data = { \ + .len = l, \ + .buf.in = b, \ + .force_8bit = true, \ + }, \ + .delay_ns = ns, \ + } + +#define NAND_OP_8BIT_DATA_OUT(l, b, ns) \ + { \ + .type = NAND_OP_DATA_OUT_INSTR, \ + .ctx.data = { \ + .len = l, \ + .buf.out = b, \ + .force_8bit = true, \ + }, \ + .delay_ns = ns, \ + } + +#define NAND_OP_WAIT_RDY(tout_ms, ns) \ + { \ + .type = NAND_OP_WAITRDY_INSTR, \ + .ctx.waitrdy.timeout_ms = tout_ms, \ + .delay_ns = ns, \ + } + +/** + * struct nand_subop - a sub operation + * @instrs: array of instructions + * @ninstrs: length of the @instrs array + * @first_instr_start_off: offset to start from for the first instruction + * of the sub-operation + * @last_instr_end_off: offset to end at (excluded) for the last instruction + * of the sub-operation + * + * Both @first_instr_start_off and @last_instr_end_off only apply to data or + * address instructions. + * + * When an operation cannot be handled as is by the NAND controller, it will + * be split by the parser into sub-operations which will be passed to the + * controller driver. + */ +struct nand_subop { + const struct nand_op_instr *instrs; + unsigned int ninstrs; + unsigned int first_instr_start_off; + unsigned int last_instr_end_off; +}; + +int nand_subop_get_addr_start_off(const struct nand_subop *subop, + unsigned int op_id); +int nand_subop_get_num_addr_cyc(const struct nand_subop *subop, + unsigned int op_id); +int nand_subop_get_data_start_off(const struct nand_subop *subop, + unsigned int op_id); +int nand_subop_get_data_len(const struct nand_subop *subop, + unsigned int op_id); + +/** + * struct nand_op_parser_addr_constraints - Constraints for address instructions + * @maxcycles: maximum number of address cycles the controller can issue in a + * single step + */ +struct nand_op_parser_addr_constraints { + unsigned int maxcycles; +}; + +/** + * struct nand_op_parser_data_constraints - Constraints for data instructions + * @maxlen: maximum data length that the controller can handle in a single step + */ +struct nand_op_parser_data_constraints { + unsigned int maxlen; +}; + +/** + * struct nand_op_parser_pattern_elem - One element of a pattern + * @type: the instructuction type + * @optional: whether this element of the pattern is optional or mandatory + * @addr/@data: address or data constraint (number of cycles or data length) + */ +struct nand_op_parser_pattern_elem { + enum nand_op_instr_type type; + bool optional; + union { + struct nand_op_parser_addr_constraints addr; + struct nand_op_parser_data_constraints data; + }; +}; + +#define NAND_OP_PARSER_PAT_CMD_ELEM(_opt) \ + { \ + .type = NAND_OP_CMD_INSTR, \ + .optional = _opt, \ + } + +#define NAND_OP_PARSER_PAT_ADDR_ELEM(_opt, _maxcycles) \ + { \ + .type = NAND_OP_ADDR_INSTR, \ + .optional = _opt, \ + .addr.maxcycles = _maxcycles, \ + } + +#define NAND_OP_PARSER_PAT_DATA_IN_ELEM(_opt, _maxlen) \ + { \ + .type = NAND_OP_DATA_IN_INSTR, \ + .optional = _opt, \ + .data.maxlen = _maxlen, \ + } + +#define NAND_OP_PARSER_PAT_DATA_OUT_ELEM(_opt, _maxlen) \ + { \ + .type = NAND_OP_DATA_OUT_INSTR, \ + .optional = _opt, \ + .data.maxlen = _maxlen, \ + } + +#define NAND_OP_PARSER_PAT_WAITRDY_ELEM(_opt) \ + { \ + .type = NAND_OP_WAITRDY_INSTR, \ + .optional = _opt, \ + } + +/** + * struct nand_op_parser_pattern - NAND sub-operation pattern descriptor + * @elems: array of pattern elements + * @nelems: number of pattern elements in @elems array + * @exec: the function that will issue a sub-operation + * + * A pattern is a list of elements, each element reprensenting one instruction + * with its constraints. The pattern itself is used by the core to match NAND + * chip operation with NAND controller operations. + * Once a match between a NAND controller operation pattern and a NAND chip + * operation (or a sub-set of a NAND operation) is found, the pattern ->exec() + * hook is called so that the controller driver can issue the operation on the + * bus. + * + * Controller drivers should declare as many patterns as they support and pass + * this list of patterns (created with the help of the following macro) to + * the nand_op_parser_exec_op() helper. + */ +struct nand_op_parser_pattern { + const struct nand_op_parser_pattern_elem *elems; + unsigned int nelems; + int (*exec)(struct nand_chip *chip, const struct nand_subop *subop); +}; + +#define NAND_OP_PARSER_PATTERN(_exec, ...) \ + { \ + .exec = _exec, \ + .elems = (struct nand_op_parser_pattern_elem[]) { __VA_ARGS__ }, \ + .nelems = sizeof((struct nand_op_parser_pattern_elem[]) { __VA_ARGS__ }) / \ + sizeof(struct nand_op_parser_pattern_elem), \ + } + +/** + * struct nand_op_parser - NAND controller operation parser descriptor + * @patterns: array of supported patterns + * @npatterns: length of the @patterns array + * + * The parser descriptor is just an array of supported patterns which will be + * iterated by nand_op_parser_exec_op() everytime it tries to execute an + * NAND operation (or tries to determine if a specific operation is supported). + * + * It is worth mentioning that patterns will be tested in their declaration + * order, and the first match will be taken, so it's important to order patterns + * appropriately so that simple/inefficient patterns are placed at the end of + * the list. Usually, this is where you put single instruction patterns. + */ +struct nand_op_parser { + const struct nand_op_parser_pattern *patterns; + unsigned int npatterns; +}; + +#define NAND_OP_PARSER(...) \ + { \ + .patterns = (struct nand_op_parser_pattern[]) { __VA_ARGS__ }, \ + .npatterns = sizeof((struct nand_op_parser_pattern[]) { __VA_ARGS__ }) / \ + sizeof(struct nand_op_parser_pattern), \ + } + +/** + * struct nand_operation - NAND operation descriptor + * @instrs: array of instructions to execute + * @ninstrs: length of the @instrs array + * + * The actual operation structure that will be passed to chip->exec_op(). + */ +struct nand_operation { + const struct nand_op_instr *instrs; + unsigned int ninstrs; +}; + +#define NAND_OPERATION(_instrs) \ + { \ + .instrs = _instrs, \ + .ninstrs = ARRAY_SIZE(_instrs), \ + } + +int nand_op_parser_exec_op(struct nand_chip *chip, + const struct nand_op_parser *parser, + const struct nand_operation *op, bool check_only); + /** * struct nand_chip - NAND Private Flash Chip Data * @mtd: MTD device registered to the MTD framework @@ -760,6 +1104,10 @@ struct nand_manufacturer_ops { * commands to the chip. * @waitfunc: [REPLACEABLE] hardwarespecific function for wait on * ready. + * @exec_op: controller specific method to execute NAND operations. + * This method replaces ->cmdfunc(), + * ->{read,write}_{buf,byte,word}(), ->dev_ready() and + * ->waifunc(). * @setup_read_retry: [FLASHSPECIFIC] flash (vendor) specific function for * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure @@ -859,6 +1207,9 @@ struct nand_chip { void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr); int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this); + int (*exec_op)(struct nand_chip *chip, + const struct nand_operation *op, + bool check_only); int (*erase)(struct mtd_info *mtd, int page); int (*scan_bbt)(struct mtd_info *mtd); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, @@ -869,7 +1220,6 @@ struct nand_chip { int (*setup_data_interface)(struct mtd_info *mtd, int chipnr, const struct nand_data_interface *conf); - int chip_delay; unsigned int options; unsigned int bbt_options; @@ -929,6 +1279,15 @@ struct nand_chip { } manufacturer; }; +static inline int nand_exec_op(struct nand_chip *chip, + const struct nand_operation *op) +{ + if (!chip->exec_op) + return -ENOTSUPP; + + return chip->exec_op(chip, op, false); +} + extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; @@ -1320,4 +1679,11 @@ void nand_cleanup(struct nand_chip *chip); /* Default extended ID decoding function */ void nand_decode_ext_id(struct nand_chip *chip); + +/* + * External helper for controller drivers that have to implement the WAITRDY + * instruction and have no physical pin to check it. + */ +int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms); + #endif /* __LINUX_MTD_RAWNAND_H */ -- cgit v1.2.3 From 5f0e3fe6b1504d4e6530294ec87c473aa6d2d02f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 14 Nov 2017 09:10:11 -0500 Subject: x86/build: Make isoimage work on Debian Debian does not ship a 'mkisofs' symlink to genisoimage. All modern distros ship genisoimage, so just use that directly. That requires renaming the 'genisoimage' function. Also neaten up the 'for' loop while I'm in here. Signed-off-by: Matthew Wilcox Cc: Changbin Du Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/genimage.sh | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index c9e8499fbfe7..6a10d52a4145 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -80,39 +80,43 @@ genfdimage288() { mcopy $FBZIMAGE w:linux } -genisoimage() { +geniso() { tmp_dir=`dirname $FIMAGE`/isoimage rm -rf $tmp_dir mkdir $tmp_dir - for i in lib lib64 share end ; do + for i in lib lib64 share ; do for j in syslinux ISOLINUX ; do if [ -f /usr/$i/$j/isolinux.bin ] ; then isolinux=/usr/$i/$j/isolinux.bin - cp $isolinux $tmp_dir fi done for j in syslinux syslinux/modules/bios ; do if [ -f /usr/$i/$j/ldlinux.c32 ]; then ldlinux=/usr/$i/$j/ldlinux.c32 - cp $ldlinux $tmp_dir fi done if [ -n "$isolinux" -a -n "$ldlinux" ] ; then break fi - if [ $i = end -a -z "$isolinux" ] ; then - echo 'Need an isolinux.bin file, please install syslinux/isolinux.' - exit 1 - fi done + if [ -z "$isolinux" ] ; then + echo 'Need an isolinux.bin file, please install syslinux/isolinux.' + exit 1 + fi + if [ -z "$ldlinux" ] ; then + echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.' + exit 1 + fi + cp $isolinux $tmp_dir + cp $ldlinux $tmp_dir cp $FBZIMAGE $tmp_dir/linux echo "$KCMDLINE" > $tmp_dir/isolinux.cfg if [ -f "$FDINITRD" ] ; then cp "$FDINITRD" $tmp_dir/initrd.img fi - mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ - -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ - $tmp_dir + genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \ + -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table $tmp_dir isohybrid $FIMAGE 2>/dev/null || true rm -rf $tmp_dir } @@ -121,6 +125,6 @@ case $1 in bzdisk) genbzdisk;; fdimage144) genfdimage144;; fdimage288) genfdimage288;; - isoimage) genisoimage;; + isoimage) geniso;; *) echo 'Unknown image format'; exit 1; esac -- cgit v1.2.3 From cce1fea50e3be6b78fc677e8cf20cd0ca4c851b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 1 Dec 2017 15:08:03 +0300 Subject: thunderbolt: Make pathname to force_power shorter WMI is the bus inside kernel, so, we may access the GUID via /sys/bus/wmi instead of doing this through /sys/devices path. Signed-off-by: Andy Shevchenko Acked-by: Mario Limonciello Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/thunderbolt.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index de50a8561774..9b55952039a6 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with a sysfs attribute called "force_power". For example the intel-wmi-thunderbolt driver exposes this attribute in: - /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power + /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power To force the power to on, write 1 to this attribute file. To disable force power, write 0 to this attribute file. -- cgit v1.2.3 From 78dfa29c84bab548910490cf7508c53ad99d1d9e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 1 Dec 2017 15:08:04 +0300 Subject: MAINTAINERS: Add thunderbolt.rst to the Thunderbolt driver entry Make sure Thunderbolt maintainers get to see patches that touch documentation of the Thunderbolt driver as well. Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 82ad0eabce4f..5da966e19e8a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13492,6 +13492,7 @@ M: Mika Westerberg M: Yehezkel Bernat T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained +F: Documentation/admin-guide/thunderbolt.rst F: drivers/thunderbolt/ F: include/linux/thunderbolt.h -- cgit v1.2.3 From 74657181e7c449351d1ad28cf43941bc333e1bd6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 1 Dec 2017 15:08:05 +0300 Subject: thunderbolt: Mask ring interrupt properly when polling starts When ring enters polling mode we are expected to mask the ring interrupt before the callback is called. However, the current code actually unmasks it probably because of a copy-paste mistake. Mask the interrupt properly from now on. Fixes: 4ffe722eefcb ("thunderbolt: Add polling mode for rings") Signed-off-by: Mika Westerberg Acked-by: Yehezkel Bernat Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/nhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 419a7a90bce0..f45bcbc63738 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring) return; if (ring->start_poll) { - __ring_interrupt_mask(ring, false); + __ring_interrupt_mask(ring, true); ring->start_poll(ring->poll_data); } else { schedule_work(&ring->work); -- cgit v1.2.3 From 7a62f48b271919b78f23c216ede6ac5d2b8b0368 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 14 Dec 2017 16:00:03 +0100 Subject: dt-bindings: marvell: Add documentation for the North Bridge PM on Armada 37xx Extend the documentation of the Armada 37xx SoC with the the North Bridge Power Management component. Reviewed-by: Rob Herring Signed-off-by: Gregory CLEMENT Signed-off-by: Rafael J. Wysocki --- .../devicetree/bindings/arm/marvell/armada-37xx.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt index 51336e5fc761..35c3c3460d17 100644 --- a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt +++ b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt @@ -14,3 +14,22 @@ following property before the previous one: Example: compatible = "marvell,armada-3720-db", "marvell,armada3720", "marvell,armada3710"; + + +Power management +---------------- + +For power management (particularly DVFS and AVS), the North Bridge +Power Management component is needed: + +Required properties: +- compatible : should contain "marvell,armada-3700-nb-pm", "syscon"; +- reg : the register start and length for the North Bridge + Power Management + +Example: + +nb_pm: syscon@14000 { + compatible = "marvell,armada-3700-nb-pm", "syscon"; + reg = <0x14000 0x60>; +} -- cgit v1.2.3 From d71f617af7849e65884e2a5bd590807ca6b905b8 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 14 Dec 2017 16:00:04 +0100 Subject: MAINTAINERS: add new entries for Armada 37xx cpufreq driver This new driver belongs to the mvebu family, update the MAINTAINER file to document it. Acked-by: Viresh Kumar Signed-off-by: Gregory CLEMENT Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d4fdcb12616c..8b275dd1105a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1582,6 +1582,7 @@ F: arch/arm/boot/dts/kirkwood* F: arch/arm/configs/mvebu_*_defconfig F: arch/arm/mach-mvebu/ F: arch/arm64/boot/dts/marvell/armada* +F: drivers/cpufreq/armada-37xx-cpufreq.c F: drivers/cpufreq/mvebu-cpufreq.c F: drivers/irqchip/irq-armada-370-xp.c F: drivers/irqchip/irq-mvebu-* -- cgit v1.2.3 From 92ce45fb875d7c3e021cc454482fe0687ff54f29 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 14 Dec 2017 16:00:05 +0100 Subject: cpufreq: Add DVFS support for Armada 37xx This patch adds DVFS support for the Armada 37xx SoCs There are up to four CPU frequency loads for Armada 37xx controlled by the hardware. This driver associates the CPU load level to a frequency, then the hardware will switch while selecting a load level. The hardware also can associate a voltage for each level (AVS support) but it is not yet supported Tested-by: Andre Heider Acked-by: Viresh Kumar Signed-off-by: Gregory CLEMENT Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 7 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/armada-37xx-cpufreq.c | 241 ++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+) create mode 100644 drivers/cpufreq/armada-37xx-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index beb8826afbb1..3a88e33b0cfe 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -18,6 +18,13 @@ config ACPI_CPPC_CPUFREQ If in doubt, say N. +config ARM_ARMADA_37XX_CPUFREQ + tristate "Armada 37xx CPUFreq support" + depends on ARCH_MVEBU + help + This adds the CPUFreq driver support for Marvell Armada 37xx SoCs. + The Armada 37xx PMU supports 4 frequency and VDD levels. + # big LITTLE core layer and glue drivers config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index d762e76887e7..e07715ce8844 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o # LITTLE drivers, so that it is probed last. obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o +obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c new file mode 100644 index 000000000000..c6ebc88a7d8d --- /dev/null +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * CPU frequency scaling support for Armada 37xx platform. + * + * Copyright (C) 2017 Marvell + * + * Gregory CLEMENT + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Power management in North Bridge register set */ +#define ARMADA_37XX_NB_L0L1 0x18 +#define ARMADA_37XX_NB_L2L3 0x1C +#define ARMADA_37XX_NB_TBG_DIV_OFF 13 +#define ARMADA_37XX_NB_TBG_DIV_MASK 0x7 +#define ARMADA_37XX_NB_CLK_SEL_OFF 11 +#define ARMADA_37XX_NB_CLK_SEL_MASK 0x1 +#define ARMADA_37XX_NB_CLK_SEL_TBG 0x1 +#define ARMADA_37XX_NB_TBG_SEL_OFF 9 +#define ARMADA_37XX_NB_TBG_SEL_MASK 0x3 +#define ARMADA_37XX_NB_VDD_SEL_OFF 6 +#define ARMADA_37XX_NB_VDD_SEL_MASK 0x3 +#define ARMADA_37XX_NB_CONFIG_SHIFT 16 +#define ARMADA_37XX_NB_DYN_MOD 0x24 +#define ARMADA_37XX_NB_CLK_SEL_EN BIT(26) +#define ARMADA_37XX_NB_TBG_EN BIT(28) +#define ARMADA_37XX_NB_DIV_EN BIT(29) +#define ARMADA_37XX_NB_VDD_EN BIT(30) +#define ARMADA_37XX_NB_DFS_EN BIT(31) +#define ARMADA_37XX_NB_CPU_LOAD 0x30 +#define ARMADA_37XX_NB_CPU_LOAD_MASK 0x3 +#define ARMADA_37XX_DVFS_LOAD_0 0 +#define ARMADA_37XX_DVFS_LOAD_1 1 +#define ARMADA_37XX_DVFS_LOAD_2 2 +#define ARMADA_37XX_DVFS_LOAD_3 3 + +/* + * On Armada 37xx the Power management manages 4 level of CPU load, + * each level can be associated with a CPU clock source, a CPU + * divider, a VDD level, etc... + */ +#define LOAD_LEVEL_NR 4 + +struct armada_37xx_dvfs { + u32 cpu_freq_max; + u8 divider[LOAD_LEVEL_NR]; +}; + +static struct armada_37xx_dvfs armada_37xx_dvfs[] = { + {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, + {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} }, + {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} }, + {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} }, +}; + +static struct armada_37xx_dvfs *armada_37xx_cpu_freq_info_get(u32 freq) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(armada_37xx_dvfs); i++) { + if (freq == armada_37xx_dvfs[i].cpu_freq_max) + return &armada_37xx_dvfs[i]; + } + + pr_err("Unsupported CPU frequency %d MHz\n", freq/1000000); + return NULL; +} + +/* + * Setup the four level managed by the hardware. Once the four level + * will be configured then the DVFS will be enabled. + */ +static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, + struct clk *clk, u8 *divider) +{ + int load_lvl; + struct clk *parent; + + for (load_lvl = 0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { + unsigned int reg, mask, val, offset = 0; + + if (load_lvl <= ARMADA_37XX_DVFS_LOAD_1) + reg = ARMADA_37XX_NB_L0L1; + else + reg = ARMADA_37XX_NB_L2L3; + + if (load_lvl == ARMADA_37XX_DVFS_LOAD_0 || + load_lvl == ARMADA_37XX_DVFS_LOAD_2) + offset += ARMADA_37XX_NB_CONFIG_SHIFT; + + /* Set cpu clock source, for all the level we use TBG */ + val = ARMADA_37XX_NB_CLK_SEL_TBG << ARMADA_37XX_NB_CLK_SEL_OFF; + mask = (ARMADA_37XX_NB_CLK_SEL_MASK + << ARMADA_37XX_NB_CLK_SEL_OFF); + + /* + * Set cpu divider based on the pre-computed array in + * order to have balanced step. + */ + val |= divider[load_lvl] << ARMADA_37XX_NB_TBG_DIV_OFF; + mask |= (ARMADA_37XX_NB_TBG_DIV_MASK + << ARMADA_37XX_NB_TBG_DIV_OFF); + + /* Set VDD divider which is actually the load level. */ + val |= load_lvl << ARMADA_37XX_NB_VDD_SEL_OFF; + mask |= (ARMADA_37XX_NB_VDD_SEL_MASK + << ARMADA_37XX_NB_VDD_SEL_OFF); + + val <<= offset; + mask <<= offset; + + regmap_update_bits(base, reg, mask, val); + } + + /* + * Set cpu clock source, for all the level we keep the same + * clock source that the one already configured. For this one + * we need to use the clock framework + */ + parent = clk_get_parent(clk); + clk_set_parent(clk, parent); +} + +static void __init armada37xx_cpufreq_disable_dvfs(struct regmap *base) +{ + unsigned int reg = ARMADA_37XX_NB_DYN_MOD, + mask = ARMADA_37XX_NB_DFS_EN; + + regmap_update_bits(base, reg, mask, 0); +} + +static void __init armada37xx_cpufreq_enable_dvfs(struct regmap *base) +{ + unsigned int val, reg = ARMADA_37XX_NB_CPU_LOAD, + mask = ARMADA_37XX_NB_CPU_LOAD_MASK; + + /* Start with the highest load (0) */ + val = ARMADA_37XX_DVFS_LOAD_0; + regmap_update_bits(base, reg, mask, val); + + /* Now enable DVFS for the CPUs */ + reg = ARMADA_37XX_NB_DYN_MOD; + mask = ARMADA_37XX_NB_CLK_SEL_EN | ARMADA_37XX_NB_TBG_EN | + ARMADA_37XX_NB_DIV_EN | ARMADA_37XX_NB_VDD_EN | + ARMADA_37XX_NB_DFS_EN; + + regmap_update_bits(base, reg, mask, mask); +} + +static int __init armada37xx_cpufreq_driver_init(void) +{ + struct armada_37xx_dvfs *dvfs; + struct platform_device *pdev; + unsigned int cur_frequency; + struct regmap *nb_pm_base; + struct device *cpu_dev; + int load_lvl, ret; + struct clk *clk; + + nb_pm_base = + syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); + + if (IS_ERR(nb_pm_base)) + return -ENODEV; + + /* Before doing any configuration on the DVFS first, disable it */ + armada37xx_cpufreq_disable_dvfs(nb_pm_base); + + /* + * On CPU 0 register the operating points supported (which are + * the nominal CPU frequency and full integer divisions of + * it). + */ + cpu_dev = get_cpu_device(0); + if (!cpu_dev) { + dev_err(cpu_dev, "Cannot get CPU\n"); + return -ENODEV; + } + + clk = clk_get(cpu_dev, 0); + if (IS_ERR(clk)) { + dev_err(cpu_dev, "Cannot get clock for CPU0\n"); + return PTR_ERR(clk); + } + + /* Get nominal (current) CPU frequency */ + cur_frequency = clk_get_rate(clk); + if (!cur_frequency) { + dev_err(cpu_dev, "Failed to get clock rate for CPU\n"); + return -EINVAL; + } + + dvfs = armada_37xx_cpu_freq_info_get(cur_frequency); + if (!dvfs) + return -EINVAL; + + armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider); + + for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; + load_lvl++) { + unsigned long freq = cur_frequency / dvfs->divider[load_lvl]; + + ret = dev_pm_opp_add(cpu_dev, freq, 0); + if (ret) { + /* clean-up the already added opp before leaving */ + while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) { + freq = cur_frequency / dvfs->divider[load_lvl]; + dev_pm_opp_remove(cpu_dev, freq); + } + return ret; + } + } + + /* Now that everything is setup, enable the DVFS at hardware level */ + armada37xx_cpufreq_enable_dvfs(nb_pm_base); + + pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); + + return PTR_ERR_OR_ZERO(pdev); +} +/* late_initcall, to guarantee the driver is loaded after A37xx clock driver */ +late_initcall(armada37xx_cpufreq_driver_init); + +MODULE_AUTHOR("Gregory CLEMENT "); +MODULE_DESCRIPTION("Armada 37xx cpufreq driver"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From bf6571057f0830ab5132be8b6045d2677baad281 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Nov 2017 18:59:46 +0000 Subject: mtd: mtdswap: make array 'name' static const, shrinks object size Don't populate the read-only array 'name' on the stack but instead make it static and constify it. Makes the object code smaller by 35 bytes: Before: text data bss dec hex filename 26304 4444 352 31100 797c drivers/mtd/mtdswap.o After: text data bss dec hex filename 26205 4508 352 31065 7959 drivers/mtd/mtdswap.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Signed-off-by: Boris Brezillon --- drivers/mtd/mtdswap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index f07492c6f4b2..7eb0e1f4f980 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -1223,8 +1223,9 @@ static int mtdswap_show(struct seq_file *s, void *data) unsigned int max[MTDSWAP_TREE_CNT]; unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; uint64_t use_size; - char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip", - "failing"}; + static const char * const name[] = { + "clean", "used", "low", "high", "dirty", "bitflip", "failing" + }; mutex_lock(&d->mbd_dev->lock); -- cgit v1.2.3 From 588753f1eb18978512b1c9b85fddb457d46f9033 Mon Sep 17 00:00:00 2001 From: Brendan McGrath Date: Wed, 13 Dec 2017 22:14:57 +1100 Subject: ipv6: icmp6: Allow icmp messages to be looped back One example of when an ICMPv6 packet is required to be looped back is when a host acts as both a Multicast Listener and a Multicast Router. A Multicast Router will listen on address ff02::16 for MLDv2 messages. Currently, MLDv2 messages originating from a Multicast Listener running on the same host as the Multicast Router are not being delivered to the Multicast Router. This is due to dst.input being assigned the default value of dst_discard. This results in the packet being looped back but discarded before being delivered to the Multicast Router. This patch sets dst.input to ip6_input to ensure a looped back packet is delivered to the Multicast Router. Signed-off-by: Brendan McGrath Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7a8d1500d374..2bc91c349273 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->dst.flags |= DST_HOST; + rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; -- cgit v1.2.3 From f870c1ff65a6d1f3a083f277280802ee09a5b44d Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 14 Dec 2017 20:20:00 +0300 Subject: vxlan: restore dev->mtu setting based on lower device Stefano Brivio says: Commit a985343ba906 ("vxlan: refactor verification and application of configuration") introduced a change in the behaviour of initial MTU setting: earlier, the MTU for a link created on top of a given lower device, without an initial MTU specification, was set to the MTU of the lower device minus headroom as a result of this path in vxlan_dev_configure(): if (!conf->mtu) dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); which is now gone. Now, the initial MTU, in absence of a configured value, is simply set by ether_setup() to ETH_DATA_LEN (1500 bytes). This breaks userspace expectations in case the MTU of the lower device is higher than 1500 bytes minus headroom. This patch restores the previous behaviour on newlink operation. Since max_mtu can be negative and we update dev->mtu directly, also check it for valid minimum. Reported-by: Junhan Yan Fixes: a985343ba906 ("vxlan: refactor verification and application of configuration") Signed-off-by: Alexey Kodanev Acked-by: Stefano Brivio Signed-off-by: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 19b9cc51079e..1000b0e4ee01 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3103,6 +3103,11 @@ static void vxlan_config_apply(struct net_device *dev, max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + if (max_mtu < ETH_MIN_MTU) + max_mtu = ETH_MIN_MTU; + + if (!changelink && !conf->mtu) + dev->mtu = max_mtu; } if (dev->mtu > max_mtu) -- cgit v1.2.3 From 40d1a07b333ef1f7fce11ee20b8f4281d1a75fb9 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 27 Mar 2017 02:44:47 -0700 Subject: xtensa: enable stack protector The implementation is adopted from the ARM arch. GCC 7.3, 8 or newer is required for building the xtensa kernel with SSP. Signed-off-by: Max Filippov --- .../features/debug/stackprotector/arch-support.txt | 2 +- arch/xtensa/Kconfig | 1 + arch/xtensa/boot/lib/Makefile | 4 +++ arch/xtensa/include/asm/stackprotector.h | 40 ++++++++++++++++++++++ arch/xtensa/kernel/asm-offsets.c | 3 ++ arch/xtensa/kernel/entry.S | 6 ++++ arch/xtensa/kernel/process.c | 6 ++++ 7 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 arch/xtensa/include/asm/stackprotector.h diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index d7acd7bd3619..59a4c9ffb7f3 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -35,5 +35,5 @@ | um: | TODO | | unicore32: | TODO | | x86: | ok | - | xtensa: | TODO | + | xtensa: | ok | ----------------------- diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index eb1f196c3f6e..fffe05b698ac 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -15,6 +15,7 @@ config XTENSA select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS diff --git a/arch/xtensa/boot/lib/Makefile b/arch/xtensa/boot/lib/Makefile index d2a7f48564a4..2fe182915b63 100644 --- a/arch/xtensa/boot/lib/Makefile +++ b/arch/xtensa/boot/lib/Makefile @@ -15,6 +15,10 @@ CFLAGS_REMOVE_inftrees.o = -pg CFLAGS_REMOVE_inffast.o = -pg endif +CFLAGS_REMOVE_inflate.o += -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_zmem.o += -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_inftrees.o += -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_inffast.o += -fstack-protector -fstack-protector-strong quiet_cmd_copy_zlib = COPY $@ cmd_copy_zlib = cat $< > $@ diff --git a/arch/xtensa/include/asm/stackprotector.h b/arch/xtensa/include/asm/stackprotector.h new file mode 100644 index 000000000000..e368f94fd2af --- /dev/null +++ b/arch/xtensa/include/asm/stackprotector.h @@ -0,0 +1,40 @@ +/* + * GCC stack protector support. + * + * (This is directly adopted from the ARM implementation) + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on Xtensa. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index bcb5beb81177..3b119b372efd 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -76,6 +76,9 @@ int main(void) DEFINE(TASK_PID, offsetof (struct task_struct, pid)); DEFINE(TASK_THREAD, offsetof (struct task_struct, thread)); DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack)); +#ifdef CONFIG_CC_STACKPROTECTOR + DEFINE(TASK_STACK_CANARY, offsetof(struct task_struct, stack_canary)); +#endif DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct)); /* offsets in thread_info struct */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index a27a9a65635b..5caff0744f3c 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1971,6 +1971,12 @@ ENTRY(_switch_to) s32i a1, a2, THREAD_SP # save stack pointer #endif +#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) + movi a6, __stack_chk_guard + l32i a8, a3, TASK_STACK_CANARY + s32i a8, a6, 0 +#endif + /* Disable ints while we manipulate the stack pointer. */ irq_save a14, a3 diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index ff4f0ecb03dd..8dd0593fb2c4 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -58,6 +58,12 @@ void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); +#ifdef CONFIG_CC_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + #if XTENSA_HAVE_COPROCESSORS void coprocessor_release_all(struct thread_info *ti) -- cgit v1.2.3 From c130d3be84afb9b5a30ce4f715f88a1c1dcc4114 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 15 Dec 2017 12:00:30 -0800 Subject: xtensa: clean up custom-controlled debug output Replace #ifdef'fed/commented out debug printk statements with pr_debug. Replace printk statements with pr_* equivalents. Signed-off-by: Max Filippov --- arch/xtensa/kernel/module.c | 19 +++++++--------- arch/xtensa/kernel/pci.c | 30 +++++++++---------------- arch/xtensa/kernel/setup.c | 7 +++--- arch/xtensa/kernel/signal.c | 8 ++----- arch/xtensa/kernel/traps.c | 25 ++++++++++++--------- arch/xtensa/lib/pci-auto.c | 45 ++++++++----------------------------- arch/xtensa/mm/cache.c | 3 --- arch/xtensa/mm/fault.c | 22 ++++++++---------- arch/xtensa/mm/tlb.c | 6 ++--- arch/xtensa/platforms/iss/console.c | 4 ++-- arch/xtensa/platforms/iss/network.c | 14 +++++------- 11 files changed, 66 insertions(+), 117 deletions(-) diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c index b715237bae61..902845ddacb7 100644 --- a/arch/xtensa/kernel/module.c +++ b/arch/xtensa/kernel/module.c @@ -22,8 +22,6 @@ #include #include -#undef DEBUG_RELOCATE - static int decode_calln_opcode (unsigned char *location) { @@ -58,10 +56,9 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, unsigned char *location; uint32_t value; -#ifdef DEBUG_RELOCATE - printk("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); -#endif + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { location = (char *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rela[i].r_offset; @@ -87,7 +84,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, value -= ((unsigned long)location & -4) + 4; if ((value & 3) != 0 || ((value + (1 << 19)) >> 20) != 0) { - printk("%s: relocation out of range, " + pr_err("%s: relocation out of range, " "section %d reloc %d " "sym '%s'\n", mod->name, relsec, i, @@ -111,7 +108,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, value -= (((unsigned long)location + 3) & -4); if ((value & 3) != 0 || (signed int)value >> 18 != -1) { - printk("%s: relocation out of range, " + pr_err("%s: relocation out of range, " "section %d reloc %d " "sym '%s'\n", mod->name, relsec, i, @@ -156,7 +153,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_XTENSA_SLOT12_OP: case R_XTENSA_SLOT13_OP: case R_XTENSA_SLOT14_OP: - printk("%s: unexpected FLIX relocation: %u\n", + pr_err("%s: unexpected FLIX relocation: %u\n", mod->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; @@ -176,13 +173,13 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, case R_XTENSA_SLOT12_ALT: case R_XTENSA_SLOT13_ALT: case R_XTENSA_SLOT14_ALT: - printk("%s: unexpected ALT relocation: %u\n", + pr_err("%s: unexpected ALT relocation: %u\n", mod->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; default: - printk("%s: unexpected relocation: %u\n", + pr_err("%s: unexpected relocation: %u\n", mod->name, ELF32_R_TYPE(rela[i].r_info)); return -ENOEXEC; diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 903963ee495d..d981f01c8d89 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -29,14 +29,6 @@ #include #include -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - /* PCI Controller */ @@ -101,8 +93,8 @@ pcibios_enable_resources(struct pci_dev *dev, int mask) for(idx=0; idx<6; idx++) { r = &dev->resource[idx]; if (!r->start && r->end) { - printk (KERN_ERR "PCI: Device %s not available because " - "of resource collisions\n", pci_name(dev)); + pr_err("PCI: Device %s not available because " + "of resource collisions\n", pci_name(dev)); return -EINVAL; } if (r->flags & IORESOURCE_IO) @@ -113,7 +105,7 @@ pcibios_enable_resources(struct pci_dev *dev, int mask) if (dev->resource[PCI_ROM_RESOURCE].start) cmd |= PCI_COMMAND_MEMORY; if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", + pr_info("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } @@ -144,8 +136,8 @@ static void __init pci_controller_apertures(struct pci_controller *pci_ctrl, res = &pci_ctrl->io_resource; if (!res->flags) { if (io_offset) - printk (KERN_ERR "I/O resource not set for host" - " bridge %d\n", pci_ctrl->index); + pr_err("I/O resource not set for host bridge %d\n", + pci_ctrl->index); res->start = 0; res->end = IO_SPACE_LIMIT; res->flags = IORESOURCE_IO; @@ -159,8 +151,8 @@ static void __init pci_controller_apertures(struct pci_controller *pci_ctrl, if (!res->flags) { if (i > 0) continue; - printk(KERN_ERR "Memory resource not set for " - "host bridge %d\n", pci_ctrl->index); + pr_err("Memory resource not set for host bridge %d\n", + pci_ctrl->index); res->start = 0; res->end = ~0U; res->flags = IORESOURCE_MEM; @@ -176,7 +168,7 @@ static int __init pcibios_init(void) struct pci_bus *bus; int next_busno = 0, ret; - printk("PCI: Probing PCI hardware\n"); + pr_info("PCI: Probing PCI hardware\n"); /* Scan all of the recorded PCI controllers. */ for (pci_ctrl = pci_ctrl_head; pci_ctrl; pci_ctrl = pci_ctrl->next) { @@ -232,7 +224,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) for (idx=0; idx<6; idx++) { r = &dev->resource[idx]; if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because " + pr_err("PCI: Device %s not available because " "of resource collisions\n", pci_name(dev)); return -EINVAL; } @@ -242,8 +234,8 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) cmd |= PCI_COMMAND_MEMORY; } if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); + pr_info("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); } diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 3732c91b7200..cf7516c52a19 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -156,7 +156,7 @@ static int __init parse_bootparam(const bp_tag_t* tag) /* Boot parameters must start with a BP_TAG_FIRST tag. */ if (tag->id != BP_TAG_FIRST) { - printk(KERN_WARNING "Invalid boot parameters!\n"); + pr_warn("Invalid boot parameters!\n"); return 0; } @@ -165,15 +165,14 @@ static int __init parse_bootparam(const bp_tag_t* tag) /* Parse all tags. */ while (tag != NULL && tag->id != BP_TAG_LAST) { - for (t = &__tagtable_begin; t < &__tagtable_end; t++) { + for (t = &__tagtable_begin; t < &__tagtable_end; t++) { if (tag->id == t->tag) { t->parse(tag); break; } } if (t == &__tagtable_end) - printk(KERN_WARNING "Ignoring tag " - "0x%08x\n", tag->id); + pr_warn("Ignoring tag 0x%08x\n", tag->id); tag = (bp_tag_t*)((unsigned long)(tag + 1) + tag->size); } diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index d427e784ab44..f88e7a0b232c 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -28,8 +28,6 @@ #include #include -#define DEBUG_SIG 0 - extern struct task_struct *coproc_owners[]; struct rt_sigframe @@ -399,10 +397,8 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, regs->areg[8] = (unsigned long) &frame->uc; regs->threadptr = tp; -#if DEBUG_SIG - printk("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08x\n", - current->comm, current->pid, sig, frame, regs->pc); -#endif + pr_debug("SIG rt deliver (%s:%d): signal=%d sp=%p pc=%08lx\n", + current->comm, current->pid, sig, frame, regs->pc); return 0; } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index bae697a06a98..9a1fef9c1cc6 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -178,13 +179,14 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err) void do_unhandled(struct pt_regs *regs, unsigned long exccause) { __die_if_kernel("Caught unhandled exception - should not happen", - regs, SIGKILL); + regs, SIGKILL); /* If in user mode, send SIGILL signal to current process */ - printk("Caught unhandled exception in '%s' " - "(pid = %d, pc = %#010lx) - should not happen\n" - "\tEXCCAUSE is %ld\n", - current->comm, task_pid_nr(current), regs->pc, exccause); + pr_info_ratelimited("Caught unhandled exception in '%s' " + "(pid = %d, pc = %#010lx) - should not happen\n" + "\tEXCCAUSE is %ld\n", + current->comm, task_pid_nr(current), regs->pc, + exccause); force_sig(SIGILL, current); } @@ -305,8 +307,8 @@ do_illegal_instruction(struct pt_regs *regs) /* If in user mode, send SIGILL signal to current process. */ - printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n", - current->comm, task_pid_nr(current), regs->pc); + pr_info_ratelimited("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n", + current->comm, task_pid_nr(current), regs->pc); force_sig(SIGILL, current); } @@ -325,13 +327,14 @@ do_unaligned_user (struct pt_regs *regs) siginfo_t info; __die_if_kernel("Unhandled unaligned exception in kernel", - regs, SIGKILL); + regs, SIGKILL); current->thread.bad_vaddr = regs->excvaddr; current->thread.error_code = -3; - printk("Unaligned memory access to %08lx in '%s' " - "(pid = %d, pc = %#010lx)\n", - regs->excvaddr, current->comm, task_pid_nr(current), regs->pc); + pr_info_ratelimited("Unaligned memory access to %08lx in '%s' " + "(pid = %d, pc = %#010lx)\n", + regs->excvaddr, current->comm, + task_pid_nr(current), regs->pc); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; diff --git a/arch/xtensa/lib/pci-auto.c b/arch/xtensa/lib/pci-auto.c index 34d05abbd921..a2b558161d6d 100644 --- a/arch/xtensa/lib/pci-auto.c +++ b/arch/xtensa/lib/pci-auto.c @@ -49,17 +49,6 @@ * */ - -/* define DEBUG to print some debugging messages. */ - -#undef DEBUG - -#ifdef DEBUG -# define DBG(x...) printk(x) -#else -# define DBG(x...) -#endif - static int pciauto_upper_iospc; static int pciauto_upper_memspc; @@ -97,7 +86,7 @@ pciauto_setup_bars(struct pci_dev *dev, int bar_limit) { bar_size &= PCI_BASE_ADDRESS_IO_MASK; upper_limit = &pciauto_upper_iospc; - DBG("PCI Autoconfig: BAR %d, I/O, ", bar_nr); + pr_debug("PCI Autoconfig: BAR %d, I/O, ", bar_nr); } else { @@ -107,7 +96,7 @@ pciauto_setup_bars(struct pci_dev *dev, int bar_limit) bar_size &= PCI_BASE_ADDRESS_MEM_MASK; upper_limit = &pciauto_upper_memspc; - DBG("PCI Autoconfig: BAR %d, Mem, ", bar_nr); + pr_debug("PCI Autoconfig: BAR %d, Mem, ", bar_nr); } /* Allocate a base address (bar_size is negative!) */ @@ -125,7 +114,8 @@ pciauto_setup_bars(struct pci_dev *dev, int bar_limit) if (found_mem64) pci_write_config_dword(dev, (bar+=4), 0x00000000); - DBG("size=0x%x, address=0x%x\n", ~bar_size + 1, *upper_limit); + pr_debug("size=0x%x, address=0x%x\n", + ~bar_size + 1, *upper_limit); } } @@ -150,7 +140,7 @@ pciauto_setup_irq(struct pci_controller* pci_ctrl,struct pci_dev *dev,int devfn) if (irq == -1) irq = 0; - DBG("PCI Autoconfig: Interrupt %d, pin %d\n", irq, pin); + pr_debug("PCI Autoconfig: Interrupt %d, pin %d\n", irq, pin); pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } @@ -289,8 +279,8 @@ int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) int iosave, memsave; - DBG("PCI Autoconfig: Found P2P bridge, device %d\n", - PCI_SLOT(pci_devfn)); + pr_debug("PCI Autoconfig: Found P2P bridge, device %d\n", + PCI_SLOT(pci_devfn)); /* Allocate PCI I/O and/or memory space */ pciauto_setup_bars(dev, PCI_BASE_ADDRESS_1); @@ -306,23 +296,6 @@ int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) } - -#if 0 - /* Skip legacy mode IDE controller */ - - if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) { - - unsigned char prg_iface; - pci_read_config_byte(dev, PCI_CLASS_PROG, &prg_iface); - - if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) { - DBG("PCI Autoconfig: Skipping legacy mode " - "IDE controller\n"); - continue; - } - } -#endif - /* * Found a peripheral, enable some standard * settings @@ -337,8 +310,8 @@ int __init pciauto_bus_scan(struct pci_controller *pci_ctrl, int current_bus) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x80); /* Allocate PCI I/O and/or memory space */ - DBG("PCI Autoconfig: Found Bus %d, Device %d, Function %d\n", - current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn) ); + pr_debug("PCI Autoconfig: Found Bus %d, Device %d, Function %d\n", + current_bus, PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn)); pciauto_setup_bars(dev, PCI_BASE_ADDRESS_5); pciauto_setup_irq(pci_ctrl, dev, pci_devfn); diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 3c75c4e597da..57dc231a0709 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -33,9 +33,6 @@ #include #include -//#define printd(x...) printk(x) -#define printd(x...) do { } while(0) - /* * Note: * The kernel provides one architecture bit PG_arch_1 in the page flags that diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index a14df5aa98c8..8b9b6f44bb06 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -25,8 +25,6 @@ DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST; void bad_page_fault(struct pt_regs*, unsigned long, int); -#undef DEBUG_PAGE_FAULT - /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -68,10 +66,10 @@ void do_page_fault(struct pt_regs *regs) exccause == EXCCAUSE_ITLB_MISS || exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0; -#ifdef DEBUG_PAGE_FAULT - printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid, - address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); -#endif + pr_debug("[%s:%d:%08x:%d:%08lx:%s%s]\n", + current->comm, current->pid, + address, exccause, regs->pc, + is_write ? "w" : "", is_exec ? "x" : ""); if (user_mode(regs)) flags |= FAULT_FLAG_USER; @@ -247,10 +245,8 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Are we prepared to handle this kernel fault? */ if ((entry = search_exception_tables(regs->pc)) != NULL) { -#ifdef DEBUG_PAGE_FAULT - printk(KERN_DEBUG "%s: Exception at pc=%#010lx (%lx)\n", - current->comm, regs->pc, entry->fixup); -#endif + pr_debug("%s: Exception at pc=%#010lx (%lx)\n", + current->comm, regs->pc, entry->fixup); current->thread.bad_uaddr = address; regs->pc = entry->fixup; return; @@ -259,9 +255,9 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - printk(KERN_ALERT "Unable to handle kernel paging request at virtual " - "address %08lx\n pc = %08lx, ra = %08lx\n", - address, regs->pc, regs->areg[0]); + pr_alert("Unable to handle kernel paging request at virtual " + "address %08lx\n pc = %08lx, ra = %08lx\n", + address, regs->pc, regs->areg[0]); die("Oops", regs, sig); do_exit(sig); } diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 35c822286bbe..59153d0aa890 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -95,10 +95,8 @@ void local_flush_tlb_range(struct vm_area_struct *vma, if (mm->context.asid[cpu] == NO_CONTEXT) return; -#if 0 - printk("[tlbrange<%02lx,%08lx,%08lx>]\n", - (unsigned long)mm->context.asid[cpu], start, end); -#endif + pr_debug("[tlbrange<%02lx,%08lx,%08lx>]\n", + (unsigned long)mm->context.asid[cpu], start, end); local_irq_save(flags); if (end-start + (PAGE_SIZE-1) <= _TLB_ENTRIES << PAGE_SHIFT) { diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 0140a22551c8..6fc0a946ad64 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -186,7 +186,7 @@ int __init rs_init(void) serial_driver = alloc_tty_driver(SERIAL_MAX_NUM_LINES); - printk ("%s %s\n", serial_name, serial_version); + pr_info("%s %s\n", serial_name, serial_version); /* Initialize the tty_driver structure */ @@ -215,7 +215,7 @@ static __exit void rs_exit(void) int error; if ((error = tty_unregister_driver(serial_driver))) - printk("ISS_SERIAL: failed to unregister serial driver (%d)\n", + pr_err("ISS_SERIAL: failed to unregister serial driver (%d)\n", error); put_tty_driver(serial_driver); tty_port_destroy(&serial_port); diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 66a5d15a9e0e..538d17e8fbc1 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -16,6 +16,8 @@ * */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include #include #include @@ -609,8 +611,6 @@ struct iss_net_init { * those fields. They will be later initialized in iss_net_init. */ -#define ERR KERN_ERR "iss_net_setup: " - static int __init iss_net_setup(char *str) { struct iss_net_private *device = NULL; @@ -622,14 +622,14 @@ static int __init iss_net_setup(char *str) end = strchr(str, '='); if (!end) { - printk(ERR "Expected '=' after device number\n"); + pr_err("Expected '=' after device number\n"); return 1; } *end = 0; rc = kstrtouint(str, 0, &n); *end = '='; if (rc < 0) { - printk(ERR "Failed to parse '%s'\n", str); + pr_err("Failed to parse '%s'\n", str); return 1; } str = end; @@ -645,13 +645,13 @@ static int __init iss_net_setup(char *str) spin_unlock(&devices_lock); if (device && device->index == n) { - printk(ERR "Device %u already configured\n", n); + pr_err("Device %u already configured\n", n); return 1; } new = alloc_bootmem(sizeof(*new)); if (new == NULL) { - printk(ERR "Alloc_bootmem failed\n"); + pr_err("Alloc_bootmem failed\n"); return 1; } @@ -663,8 +663,6 @@ static int __init iss_net_setup(char *str) return 1; } -#undef ERR - __setup("eth", iss_net_setup); /* -- cgit v1.2.3 From f21a79cab3773bc17aa845b7738c7f200778a260 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 15 Dec 2017 16:08:16 -0800 Subject: xtensa: clean up exception handling structure Instead of using flat array of longs use normal C structure and generate EXC_TABLE_* constants in the asm-offsets.c Signed-off-by: Max Filippov --- arch/xtensa/include/asm/ptrace.h | 12 ------------ arch/xtensa/include/asm/regs.h | 1 + arch/xtensa/include/asm/traps.h | 23 +++++++++++++++++++++++ arch/xtensa/kernel/asm-offsets.c | 13 +++++++++++++ arch/xtensa/kernel/traps.c | 39 +++++++++++++++++++-------------------- 5 files changed, 56 insertions(+), 32 deletions(-) diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 05beae3c6376..3a5c5918aea3 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -39,18 +39,6 @@ * +-----------------------+ -------- */ -/* Offsets for exception_handlers[] (3 x 64-entries x 4-byte tables). */ - -#define EXC_TABLE_KSTK 0x004 /* Kernel Stack */ -#define EXC_TABLE_DOUBLE_SAVE 0x008 /* Double exception save area for a0 */ -#define EXC_TABLE_FIXUP 0x00c /* Fixup handler */ -#define EXC_TABLE_PARAM 0x010 /* For passing a parameter to fixup */ -#define EXC_TABLE_SYSCALL_SAVE 0x014 /* For fast syscall handler */ -#define EXC_TABLE_FAST_USER 0x100 /* Fast user exception handler */ -#define EXC_TABLE_FAST_KERNEL 0x200 /* Fast kernel exception handler */ -#define EXC_TABLE_DEFAULT 0x300 /* Default C-Handler */ -#define EXC_TABLE_SIZE 0x400 - #ifndef __ASSEMBLY__ #include diff --git a/arch/xtensa/include/asm/regs.h b/arch/xtensa/include/asm/regs.h index 881a1134a4b4..477594e5817f 100644 --- a/arch/xtensa/include/asm/regs.h +++ b/arch/xtensa/include/asm/regs.h @@ -76,6 +76,7 @@ #define EXCCAUSE_COPROCESSOR5_DISABLED 37 #define EXCCAUSE_COPROCESSOR6_DISABLED 38 #define EXCCAUSE_COPROCESSOR7_DISABLED 39 +#define EXCCAUSE_N 64 /* PS register fields. */ diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 2e69aa4b843f..5bd197097b77 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -12,6 +12,29 @@ #include +/* + * Per-CPU exception handling data structure. + * EXCSAVE1 points to it. + */ +struct exc_table { + /* Kernel Stack */ + void *kstk; + /* Double exception save area for a0 */ + unsigned long double_save; + /* Fixup handler */ + void *fixup; + /* For passing a parameter to fixup */ + void *fixup_param; + /* For fast syscall handler */ + unsigned long syscall_save; + /* Fast user exception handlers */ + void *fast_user_handler[EXCCAUSE_N]; + /* Fast kernel exception handlers */ + void *fast_kernel_handler[EXCCAUSE_N]; + /* Default C-Handlers */ + void *default_handler[EXCCAUSE_N]; +}; + /* * handler must be either of the following: * void (*)(struct pt_regs *regs); diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 3b119b372efd..022cf918ec20 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -132,5 +132,18 @@ int main(void) offsetof(struct debug_table, icount_level_save)); #endif + /* struct exc_table */ + DEFINE(EXC_TABLE_KSTK, offsetof(struct exc_table, kstk)); + DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save)); + DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup)); + DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param)); + DEFINE(EXC_TABLE_SYSCALL_SAVE, + offsetof(struct exc_table, syscall_save)); + DEFINE(EXC_TABLE_FAST_USER, + offsetof(struct exc_table, fast_user_handler)); + DEFINE(EXC_TABLE_FAST_KERNEL, + offsetof(struct exc_table, fast_kernel_handler)); + DEFINE(EXC_TABLE_DEFAULT, offsetof(struct exc_table, default_handler)); + return 0; } diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 9a1fef9c1cc6..32c5207f1226 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -159,8 +159,7 @@ COPROCESSOR(7), * 2. it is a temporary memory buffer for the exception handlers. */ -DEFINE_PER_CPU(unsigned long, exc_table[EXC_TABLE_SIZE/4]); - +DEFINE_PER_CPU(struct exc_table, exc_table); DEFINE_PER_CPU(struct debug_table, debug_table); void die(const char*, struct pt_regs*, long); @@ -368,28 +367,28 @@ do_debug(struct pt_regs *regs) } -static void set_handler(int idx, void *handler) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) - per_cpu(exc_table, cpu)[idx] = (unsigned long)handler; -} +#define set_handler(type, cause, handler) \ + do { \ + unsigned int cpu; \ + \ + for_each_possible_cpu(cpu) \ + per_cpu(exc_table, cpu).type[cause] = (handler);\ + } while (0) /* Set exception C handler - for temporary use when probing exceptions */ void * __init trap_set_handler(int cause, void *handler) { - void *previous = (void *)per_cpu(exc_table, 0)[ - EXC_TABLE_DEFAULT / 4 + cause]; - set_handler(EXC_TABLE_DEFAULT / 4 + cause, handler); + void *previous = per_cpu(exc_table, 0).default_handler[cause]; + + set_handler(default_handler, cause, handler); return previous; } static void trap_init_excsave(void) { - unsigned long excsave1 = (unsigned long)this_cpu_ptr(exc_table); + unsigned long excsave1 = (unsigned long)this_cpu_ptr(&exc_table); __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (excsave1)); } @@ -421,10 +420,10 @@ void __init trap_init(void) /* Setup default vectors. */ - for(i = 0; i < 64; i++) { - set_handler(EXC_TABLE_FAST_USER/4 + i, user_exception); - set_handler(EXC_TABLE_FAST_KERNEL/4 + i, kernel_exception); - set_handler(EXC_TABLE_DEFAULT/4 + i, do_unhandled); + for (i = 0; i < EXCCAUSE_N; i++) { + set_handler(fast_user_handler, i, user_exception); + set_handler(fast_kernel_handler, i, kernel_exception); + set_handler(default_handler, i, do_unhandled); } /* Setup specific handlers. */ @@ -436,11 +435,11 @@ void __init trap_init(void) void *handler = dispatch_init_table[i].handler; if (fast == 0) - set_handler (EXC_TABLE_DEFAULT/4 + cause, handler); + set_handler(default_handler, cause, handler); if (fast && fast & USER) - set_handler (EXC_TABLE_FAST_USER/4 + cause, handler); + set_handler(fast_user_handler, cause, handler); if (fast && fast & KRNL) - set_handler (EXC_TABLE_FAST_KERNEL/4 + cause, handler); + set_handler(fast_kernel_handler, cause, handler); } /* Initialize EXCSAVE_1 to hold the address of the exception table. */ -- cgit v1.2.3 From 501c26e82df8d253851b80082778eeb37e4bab5c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 15 Dec 2017 16:20:56 -0800 Subject: xtensa: implement early_trap_init Paging on xtensa architecture requires functioning exception handling because hardware cannot transparently access page tables that are not currently mapped by TLB. Exception handling is set up late in the initialization process, but working paging is needed for KASAN. Provide early_trap_init that sets up minimal exception handling sufficient for KASAN to work. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/traps.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 5bd197097b77..f5cd7a7e65e0 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -42,6 +42,18 @@ struct exc_table { */ extern void * __init trap_set_handler(int cause, void *handler); extern void do_unhandled(struct pt_regs *regs, unsigned long exccause); +void fast_second_level_miss(void); + +/* Initialize minimal exc_table structure sufficient for basic paging */ +static inline void __init early_trap_init(void) +{ + static struct exc_table exc_table __initdata = { + .fast_kernel_handler[EXCCAUSE_DTLB_MISS] = + fast_second_level_miss, + }; + __asm__ __volatile__("wsr %0, excsave1\n" : : "a" (&exc_table)); +} + void secondary_trap_init(void); static inline void spill_registers(void) -- cgit v1.2.3 From c2edb35ae342fedb5a39312c0fa676b74973887a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 15 Dec 2017 20:45:35 -0800 Subject: xtensa: extract init_kio KIO region placement may be specified in the device tree, that's why it's initialized with the rest of MMU after the early_init_devtree. In order to support KASAN the MMU must be initialized earlier. Separate KIO initialization from the rest of MMU initialization. Reinitialize KIO if its location is specified in the device tree. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/mmu_context.h | 1 + arch/xtensa/include/asm/nommu_context.h | 4 ++++ arch/xtensa/kernel/setup.c | 10 ++++++---- arch/xtensa/mm/mmu.c | 30 +++++++++++++++++------------- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h index f7e186dfc4e4..de5e6cbbafe4 100644 --- a/arch/xtensa/include/asm/mmu_context.h +++ b/arch/xtensa/include/asm/mmu_context.h @@ -52,6 +52,7 @@ DECLARE_PER_CPU(unsigned long, asid_cache); #define ASID_INSERT(x) (0x03020001 | (((x) & ASID_MASK) << 8)) void init_mmu(void); +void init_kio(void); static inline void set_rasid_register (unsigned long val) { diff --git a/arch/xtensa/include/asm/nommu_context.h b/arch/xtensa/include/asm/nommu_context.h index 2cebdbbdb633..37251b2ef871 100644 --- a/arch/xtensa/include/asm/nommu_context.h +++ b/arch/xtensa/include/asm/nommu_context.h @@ -3,6 +3,10 @@ static inline void init_mmu(void) { } +static inline void init_kio(void) +{ +} + static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index cf7516c52a19..960212e72a70 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -207,6 +207,8 @@ static int __init xtensa_dt_io_area(unsigned long node, const char *uname, /* round down to nearest 256MB boundary */ xtensa_kio_paddr &= 0xf0000000; + init_kio(); + return 1; } #else @@ -245,6 +247,10 @@ void __init early_init_devtree(void *params) void __init init_arch(bp_tag_t *bp_start) { + /* Initialize MMU. */ + + init_mmu(); + /* Parse boot parameters */ if (bp_start) @@ -262,10 +268,6 @@ void __init init_arch(bp_tag_t *bp_start) /* Early hook for platforms */ platform_init(bp_start); - - /* Initialize MMU. */ - - init_mmu(); } /* diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 358d748d9083..54c01e3ebf05 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -82,6 +82,23 @@ void init_mmu(void) set_itlbcfg_register(0); set_dtlbcfg_register(0); #endif + init_kio(); + local_flush_tlb_all(); + + /* Set rasid register to a known value. */ + + set_rasid_register(ASID_INSERT(ASID_USER_FIRST)); + + /* Set PTEVADDR special register to the start of the page + * table, which is in kernel mappable space (ie. not + * statically mapped). This register's value is undefined on + * reset. + */ + set_ptevaddr_register(XCHAL_PAGE_TABLE_VADDR); +} + +void init_kio(void) +{ #if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) /* * Update the IO area mapping in case xtensa_kio_paddr has changed @@ -95,17 +112,4 @@ void init_mmu(void) write_itlb_entry(__pte(xtensa_kio_paddr + CA_BYPASS), XCHAL_KIO_BYPASS_VADDR + 6); #endif - - local_flush_tlb_all(); - - /* Set rasid register to a known value. */ - - set_rasid_register(ASID_INSERT(ASID_USER_FIRST)); - - /* Set PTEVADDR special register to the start of the page - * table, which is in kernel mappable space (ie. not - * statically mapped). This register's value is undefined on - * reset. - */ - set_ptevaddr_register(PGTABLE_START); } -- cgit v1.2.3 From d4e337fe822354895334dbaded61f08206dcac25 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 15 Dec 2017 20:46:55 -0800 Subject: xtensa: don't clear swapper_pg_dir in paging_init swapper_pg_dir is located in the .bss, so it's zero-initialized anyway. With KASAN enabled paging_init will be called after KASAN initialization, it must not erase page directory entries set up for KASAN shadow map. Signed-off-by: Max Filippov --- arch/xtensa/mm/mmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 54c01e3ebf05..9d1ecfc53670 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -56,7 +56,6 @@ static void __init fixedrange_init(void) void __init paging_init(void) { - memset(swapper_pg_dir, 0, PAGE_SIZE); #ifdef CONFIG_HIGHMEM fixedrange_init(); pkmap_page_table = init_pmd(PKMAP_BASE, LAST_PKMAP); -- cgit v1.2.3 From 1af1e8a39dc0fab5e50f10462c636da8c1e0cfbb Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 3 Dec 2017 19:09:41 -0800 Subject: xtensa: move fixmap and kmap just above the KSEG The virtual address space between the page table and the VMALLOC region is big enough to host KASAN shadow map and there's enough space between the VMALLOC area and KSEG for the fixmap and kmap. Move fixmap and kmap to the gap between VMALLOC area and KSEG, just above the KSEG. Reorder entries in the kernel memory layout printing code. Drop duplicate PGTABLE_START definition, use XCHAL_PAGE_TABLE_VADDR instead. Signed-off-by: Max Filippov --- Documentation/xtensa/mmu.txt | 72 +++++++++++++++++++-------------------- arch/xtensa/include/asm/fixmap.h | 4 +-- arch/xtensa/include/asm/highmem.h | 2 +- arch/xtensa/include/asm/page.h | 2 -- arch/xtensa/mm/init.c | 12 +++---- 5 files changed, 45 insertions(+), 47 deletions(-) diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt index 5de8715d5bec..16921393e366 100644 --- a/Documentation/xtensa/mmu.txt +++ b/Documentation/xtensa/mmu.txt @@ -69,19 +69,8 @@ Default MMUv2-compatible layout. | Userspace | 0x00000000 TASK_SIZE +------------------+ 0x40000000 +------------------+ -| Page table | 0x80000000 -+------------------+ 0x80400000 +| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ -| KMAP area | PKMAP_BASE PTRS_PER_PTE * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -| | (4MB * DCACHE_N_COLORS) -+------------------+ -| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * -| | NR_CPUS * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -+------------------+ FIXADDR_TOP 0xbffff000 +------------------+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB +------------------+ VMALLOC_END @@ -92,6 +81,17 @@ Default MMUv2-compatible layout. | remap area 2 | +------------------+ +------------------+ +| KMAP area | PKMAP_BASE PTRS_PER_PTE * +| | DCACHE_N_COLORS * +| | PAGE_SIZE +| | (4MB * DCACHE_N_COLORS) ++------------------+ +| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * +| | NR_CPUS * +| | DCACHE_N_COLORS * +| | PAGE_SIZE ++------------------+ FIXADDR_TOP 0xcffff000 ++------------------+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xd0000000 128MB +------------------+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xd8000000 128MB @@ -109,19 +109,8 @@ Default MMUv2-compatible layout. | Userspace | 0x00000000 TASK_SIZE +------------------+ 0x40000000 +------------------+ -| Page table | 0x80000000 -+------------------+ 0x80400000 +| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ -| KMAP area | PKMAP_BASE PTRS_PER_PTE * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -| | (4MB * DCACHE_N_COLORS) -+------------------+ -| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * -| | NR_CPUS * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -+------------------+ FIXADDR_TOP 0x9ffff000 +------------------+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB +------------------+ VMALLOC_END @@ -132,6 +121,17 @@ Default MMUv2-compatible layout. | remap area 2 | +------------------+ +------------------+ +| KMAP area | PKMAP_BASE PTRS_PER_PTE * +| | DCACHE_N_COLORS * +| | PAGE_SIZE +| | (4MB * DCACHE_N_COLORS) ++------------------+ +| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * +| | NR_CPUS * +| | DCACHE_N_COLORS * +| | PAGE_SIZE ++------------------+ FIXADDR_TOP 0xaffff000 ++------------------+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xb0000000 256MB +------------------+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 256MB @@ -150,19 +150,8 @@ Default MMUv2-compatible layout. | Userspace | 0x00000000 TASK_SIZE +------------------+ 0x40000000 +------------------+ -| Page table | 0x80000000 -+------------------+ 0x80400000 +| Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ -| KMAP area | PKMAP_BASE PTRS_PER_PTE * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -| | (4MB * DCACHE_N_COLORS) -+------------------+ -| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * -| | NR_CPUS * -| | DCACHE_N_COLORS * -| | PAGE_SIZE -+------------------+ FIXADDR_TOP 0x8ffff000 +------------------+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB +------------------+ VMALLOC_END @@ -173,6 +162,17 @@ Default MMUv2-compatible layout. | remap area 2 | +------------------+ +------------------+ +| KMAP area | PKMAP_BASE PTRS_PER_PTE * +| | DCACHE_N_COLORS * +| | PAGE_SIZE +| | (4MB * DCACHE_N_COLORS) ++------------------+ +| Atomic KMAP area | FIXADDR_START KM_TYPE_NR * +| | NR_CPUS * +| | DCACHE_N_COLORS * +| | PAGE_SIZE ++------------------+ FIXADDR_TOP 0x9ffff000 ++------------------+ | Cached KSEG | XCHAL_KSEG_CACHED_VADDR 0xa0000000 512MB +------------------+ | Uncached KSEG | XCHAL_KSEG_BYPASS_VADDR 0xc0000000 512MB diff --git a/arch/xtensa/include/asm/fixmap.h b/arch/xtensa/include/asm/fixmap.h index 0d30403b6c95..7e25c1b50ac0 100644 --- a/arch/xtensa/include/asm/fixmap.h +++ b/arch/xtensa/include/asm/fixmap.h @@ -44,7 +44,7 @@ enum fixed_addresses { __end_of_fixed_addresses }; -#define FIXADDR_TOP (VMALLOC_START - PAGE_SIZE) +#define FIXADDR_TOP (XCHAL_KSEG_CACHED_VADDR - PAGE_SIZE) #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK) @@ -63,7 +63,7 @@ static __always_inline unsigned long fix_to_virt(const unsigned int idx) * table. */ BUILD_BUG_ON(FIXADDR_START < - XCHAL_PAGE_TABLE_VADDR + XCHAL_PAGE_TABLE_SIZE); + TLBTEMP_BASE_1 + TLBTEMP_SIZE); BUILD_BUG_ON(idx >= __end_of_fixed_addresses); return __fix_to_virt(idx); } diff --git a/arch/xtensa/include/asm/highmem.h b/arch/xtensa/include/asm/highmem.h index 6e070db1022e..04e9340eac4b 100644 --- a/arch/xtensa/include/asm/highmem.h +++ b/arch/xtensa/include/asm/highmem.h @@ -72,7 +72,7 @@ static inline void *kmap(struct page *page) * page table. */ BUILD_BUG_ON(PKMAP_BASE < - XCHAL_PAGE_TABLE_VADDR + XCHAL_PAGE_TABLE_SIZE); + TLBTEMP_BASE_1 + TLBTEMP_SIZE); BUG_ON(in_interrupt()); if (!PageHighMem(page)) return page_address(page); diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 4ddbfd57a7c8..5d69c11c01b8 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -36,8 +36,6 @@ #define MAX_LOW_PFN PHYS_PFN(0xfffffffful) #endif -#define PGTABLE_START 0x80000000 - /* * Cache aliasing: * diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 720fe4e8b497..6fc1cb093fb3 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -100,23 +100,23 @@ void __init mem_init(void) mem_init_print_info(NULL); pr_info("virtual kernel memory layout:\n" +#ifdef CONFIG_MMU + " vmalloc : 0x%08lx - 0x%08lx (%5lu MB)\n" +#endif #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%5lu kB)\n" " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" -#endif -#ifdef CONFIG_MMU - " vmalloc : 0x%08lx - 0x%08lx (%5lu MB)\n" #endif " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n", +#ifdef CONFIG_MMU + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE + LAST_PKMAP * PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, #endif -#ifdef CONFIG_MMU - VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, PAGE_OFFSET, PAGE_OFFSET + (max_low_pfn - min_low_pfn) * PAGE_SIZE, #else -- cgit v1.2.3 From c633544a6154146a210cf158157a1ae7c55473b6 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 3 Dec 2017 13:28:52 -0800 Subject: xtensa: add support for KASAN Cover kernel addresses above 0x90000000 by the shadow map. Enable HAVE_ARCH_KASAN when MMU is enabled. Provide kasan_early_init that fills shadow map with writable copies of kasan_zero_page. Call kasan_early_init right after mmu initialization in the setup_arch. Provide kasan_init that allocates proper shadow map pages from the memblock and puts these pages into the shadow map for addresses from VMALLOC area to the end of KSEG. Call kasan_init right after memblock initialization. Don't use KASAN for the boot code, MMU and KASAN initialization and page fault handler. Make kernel stack size 4 times larger when KASAN is enabled to avoid stack overflows. GCC 7.3, 8 or newer is required to build the xtensa kernel with KASAN. Signed-off-by: Max Filippov --- .../features/debug/KASAN/arch-support.txt | 2 +- Documentation/xtensa/mmu.txt | 6 ++ arch/xtensa/Kconfig | 5 ++ arch/xtensa/boot/lib/Makefile | 2 + arch/xtensa/include/asm/kasan.h | 37 +++++++++ arch/xtensa/include/asm/kmem_layout.h | 4 + arch/xtensa/include/asm/pgtable.h | 3 +- arch/xtensa/include/asm/string.h | 19 +++++ arch/xtensa/kernel/setup.c | 7 +- arch/xtensa/kernel/xtensa_ksyms.c | 3 + arch/xtensa/lib/memcopy.S | 10 ++- arch/xtensa/lib/memset.S | 5 +- arch/xtensa/mm/Makefile | 5 ++ arch/xtensa/mm/init.c | 7 ++ arch/xtensa/mm/kasan_init.c | 95 ++++++++++++++++++++++ 15 files changed, 201 insertions(+), 9 deletions(-) create mode 100644 arch/xtensa/include/asm/kasan.h create mode 100644 arch/xtensa/mm/kasan_init.c diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index 76bbd7fe27b3..8abb013db8d1 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -35,5 +35,5 @@ | um: | TODO | | unicore32: | TODO | | x86: | ok | - | xtensa: | TODO | + | xtensa: | ok | ----------------------- diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt index 16921393e366..318114de63f3 100644 --- a/Documentation/xtensa/mmu.txt +++ b/Documentation/xtensa/mmu.txt @@ -71,6 +71,8 @@ Default MMUv2-compatible layout. +------------------+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ +| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE ++------------------+ 0x8e400000 +------------------+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB +------------------+ VMALLOC_END @@ -111,6 +113,8 @@ Default MMUv2-compatible layout. +------------------+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ +| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE ++------------------+ 0x8e400000 +------------------+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB +------------------+ VMALLOC_END @@ -152,6 +156,8 @@ Default MMUv2-compatible layout. +------------------+ | Page table | XCHAL_PAGE_TABLE_VADDR 0x80000000 XCHAL_PAGE_TABLE_SIZE +------------------+ +| KASAN shadow map | KASAN_SHADOW_START 0x80400000 KASAN_SHADOW_SIZE ++------------------+ 0x8e400000 +------------------+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB +------------------+ VMALLOC_END diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index fffe05b698ac..f9f95d6e8da8 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -15,6 +15,7 @@ config XTENSA select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select HAVE_ARCH_KASAN if MMU select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG @@ -80,6 +81,10 @@ config VARIANT_IRQ_SWITCH config HAVE_XTENSA_GPIO32 def_bool n +config KASAN_SHADOW_OFFSET + hex + default 0x6e400000 + menu "Processor type and features" choice diff --git a/arch/xtensa/boot/lib/Makefile b/arch/xtensa/boot/lib/Makefile index 2fe182915b63..355127faade1 100644 --- a/arch/xtensa/boot/lib/Makefile +++ b/arch/xtensa/boot/lib/Makefile @@ -15,6 +15,8 @@ CFLAGS_REMOVE_inftrees.o = -pg CFLAGS_REMOVE_inffast.o = -pg endif +KASAN_SANITIZE := n + CFLAGS_REMOVE_inflate.o += -fstack-protector -fstack-protector-strong CFLAGS_REMOVE_zmem.o += -fstack-protector -fstack-protector-strong CFLAGS_REMOVE_inftrees.o += -fstack-protector -fstack-protector-strong diff --git a/arch/xtensa/include/asm/kasan.h b/arch/xtensa/include/asm/kasan.h new file mode 100644 index 000000000000..54be80876e57 --- /dev/null +++ b/arch/xtensa/include/asm/kasan.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include +#include +#include + +/* Start of area covered by KASAN */ +#define KASAN_START_VADDR __XTENSA_UL_CONST(0x90000000) +/* Start of the shadow map */ +#define KASAN_SHADOW_START (XCHAL_PAGE_TABLE_VADDR + XCHAL_PAGE_TABLE_SIZE) +/* Size of the shadow map */ +#define KASAN_SHADOW_SIZE (-KASAN_START_VADDR >> KASAN_SHADOW_SCALE_SHIFT) +/* Offset for mem to shadow address transformation */ +#define KASAN_SHADOW_OFFSET __XTENSA_UL_CONST(CONFIG_KASAN_SHADOW_OFFSET) + +void __init kasan_early_init(void); +void __init kasan_init(void); + +#else + +static inline void kasan_early_init(void) +{ +} + +static inline void kasan_init(void) +{ +} + +#endif +#endif +#endif diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 28f9260a766c..2317c835a4db 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -71,7 +71,11 @@ #endif +#ifndef CONFIG_KASAN #define KERNEL_STACK_SHIFT 13 +#else +#define KERNEL_STACK_SHIFT 15 +#endif #define KERNEL_STACK_SIZE (1 << KERNEL_STACK_SHIFT) #endif diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 30dd5b2e4ad5..38802259978f 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -12,9 +12,9 @@ #define _XTENSA_PGTABLE_H #define __ARCH_USE_5LEVEL_HACK -#include #include #include +#include /* * We only use two ring levels, user and kernel space. @@ -170,6 +170,7 @@ #define PAGE_SHARED_EXEC \ __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITABLE | _PAGE_HW_EXEC) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_HW_WRITE) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT|_PAGE_HW_WRITE|_PAGE_HW_EXEC) #if (DCACHE_WAY_SIZE > PAGE_SIZE) diff --git a/arch/xtensa/include/asm/string.h b/arch/xtensa/include/asm/string.h index 8d5d9dfadb09..586bad9fe187 100644 --- a/arch/xtensa/include/asm/string.h +++ b/arch/xtensa/include/asm/string.h @@ -108,14 +108,33 @@ static inline int strncmp(const char *__cs, const char *__ct, size_t __n) #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); +extern void *__memset(void *__s, int __c, size_t __count); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); +extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); +extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); /* Don't build bcopy at all ... */ #define __HAVE_ARCH_BCOPY +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif +#endif + #endif /* _XTENSA_STRING_H */ diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 960212e72a70..a931af9075f2 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -36,6 +36,7 @@ #endif #include +#include #include #include #include @@ -251,6 +252,10 @@ void __init init_arch(bp_tag_t *bp_start) init_mmu(); + /* Initialize initial KASAN shadow map */ + + kasan_early_init(); + /* Parse boot parameters */ if (bp_start) @@ -388,7 +393,7 @@ void __init setup_arch(char **cmdline_p) #endif parse_early_param(); bootmem_init(); - + kasan_init(); unflatten_and_copy_device_tree(); #ifdef CONFIG_SMP diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 672391003e40..3a443f83ae87 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -41,6 +41,9 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(__strncpy_user); EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(copy_page); diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index 24d650864c3a..c0f6981719d6 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -109,7 +109,8 @@ addi a5, a5, 2 j .Ldstaligned # dst is now aligned, return to main algorithm -ENTRY(memcpy) +ENTRY(__memcpy) +WEAK(memcpy) entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len @@ -271,7 +272,7 @@ ENTRY(memcpy) s8i a6, a5, 0 retw -ENDPROC(memcpy) +ENDPROC(__memcpy) /* * void bcopy(const void *src, void *dest, size_t n); @@ -376,7 +377,8 @@ ENDPROC(bcopy) j .Lbackdstaligned # dst is now aligned, # return to main algorithm -ENTRY(memmove) +ENTRY(__memmove) +WEAK(memmove) entry sp, 16 # minimal stack frame # a2/ dst, a3/ src, a4/ len @@ -548,4 +550,4 @@ ENTRY(memmove) s8i a6, a5, 0 retw -ENDPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index a6cd04ba966f..276747dec300 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -31,7 +31,8 @@ */ .text -ENTRY(memset) +ENTRY(__memset) +WEAK(memset) entry sp, 16 # minimal stack frame # a2/ dst, a3/ c, a4/ length @@ -140,7 +141,7 @@ EX(10f) s8i a3, a5, 0 .Lbytesetdone: retw -ENDPROC(memset) +ENDPROC(__memset) .section .fixup, "ax" .align 4 diff --git a/arch/xtensa/mm/Makefile b/arch/xtensa/mm/Makefile index 0b3d296a016a..734888a00dc8 100644 --- a/arch/xtensa/mm/Makefile +++ b/arch/xtensa/mm/Makefile @@ -5,3 +5,8 @@ obj-y := init.o misc.o obj-$(CONFIG_MMU) += cache.o fault.o ioremap.o mmu.o tlb.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_KASAN) += kasan_init.o + +KASAN_SANITIZE_fault.o := n +KASAN_SANITIZE_kasan_init.o := n +KASAN_SANITIZE_mmu.o := n diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 6fc1cb093fb3..0d980f05da82 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -100,6 +100,9 @@ void __init mem_init(void) mem_init_print_info(NULL); pr_info("virtual kernel memory layout:\n" +#ifdef CONFIG_KASAN + " kasan : 0x%08lx - 0x%08lx (%5lu MB)\n" +#endif #ifdef CONFIG_MMU " vmalloc : 0x%08lx - 0x%08lx (%5lu MB)\n" #endif @@ -108,6 +111,10 @@ void __init mem_init(void) " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" #endif " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n", +#ifdef CONFIG_KASAN + KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE, + KASAN_SHADOW_SIZE >> 20, +#endif #ifdef CONFIG_MMU VMALLOC_START, VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c new file mode 100644 index 000000000000..6b532b6bd785 --- /dev/null +++ b/arch/xtensa/mm/kasan_init.c @@ -0,0 +1,95 @@ +/* + * Xtensa KASAN shadow map initialization + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2017 Cadence Design Systems Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void __init kasan_early_init(void) +{ + unsigned long vaddr = KASAN_SHADOW_START; + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = pmd_offset(pgd, vaddr); + int i; + + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_zero_pte + i, + mk_pte(virt_to_page(kasan_zero_page), PAGE_KERNEL)); + + for (vaddr = 0; vaddr < KASAN_SHADOW_SIZE; vaddr += PMD_SIZE, ++pmd) { + BUG_ON(!pmd_none(*pmd)); + set_pmd(pmd, __pmd((unsigned long)kasan_zero_pte)); + } + early_trap_init(); +} + +static void __init populate(void *start, void *end) +{ + unsigned long n_pages = (end - start) / PAGE_SIZE; + unsigned long n_pmds = n_pages / PTRS_PER_PTE; + unsigned long i, j; + unsigned long vaddr = (unsigned long)start; + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = pmd_offset(pgd, vaddr); + pte_t *pte = memblock_virt_alloc(n_pages * sizeof(pte_t), PAGE_SIZE); + + pr_debug("%s: %p - %p\n", __func__, start, end); + + for (i = j = 0; i < n_pmds; ++i) { + int k; + + for (k = 0; k < PTRS_PER_PTE; ++k, ++j) { + phys_addr_t phys = + memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, + MEMBLOCK_ALLOC_ANYWHERE); + + set_pte(pte + j, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL)); + } + } + + for (i = 0; i < n_pmds ; ++i, pte += PTRS_PER_PTE) + set_pmd(pmd + i, __pmd((unsigned long)pte)); + + local_flush_tlb_all(); + memset(start, 0, end - start); +} + +void __init kasan_init(void) +{ + int i; + + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_START - + (KASAN_START_VADDR >> KASAN_SHADOW_SCALE_SHIFT)); + BUILD_BUG_ON(VMALLOC_START < KASAN_START_VADDR); + + /* + * Replace shadow map pages that cover addresses from VMALLOC area + * start to the end of KSEG with clean writable pages. + */ + populate(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)XCHAL_KSEG_BYPASS_VADDR)); + + /* Write protect kasan_zero_page and zero-initialize it again. */ + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_zero_pte + i, + mk_pte(virt_to_page(kasan_zero_page), PAGE_KERNEL_RO)); + + local_flush_tlb_all(); + memset(kasan_zero_page, 0, PAGE_SIZE); + + /* At this point kasan is fully initialized. Enable error messages. */ + current->kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized\n"); +} -- cgit v1.2.3 From e0baa01438d3fa3979f94f98be19ca3df88e0b7c Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 4 Dec 2017 21:20:33 -0800 Subject: xtensa: use __memset in __xtensa_clear_user memset on xtensa is capable of accessing user memory, but KASAN checks if memset function is actually used for that and reports it as an error: ================================================================== BUG: KASAN: user-memory-access in padzero+0x4d/0x58 Write of size 519 at addr 0049ddf9 by task init/1 Call Trace: [] kasan_report+0x160/0x238 [] check_memory_region+0xf8/0x100 [] memset+0x20/0x34 [] padzero+0x4d/0x58 ================================================================== Use __memset in __xtensa_clear_user to avoid that. Signed-off-by: Max Filippov --- arch/xtensa/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index b8f152b6aaa5..18bbe1caad94 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -261,7 +261,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static inline unsigned long __xtensa_clear_user(void *addr, unsigned long size) { - if ( ! memset(addr, 0, size) ) + if (!__memset(addr, 0, size)) return size; return 0; } -- cgit v1.2.3 From db601f3ad3f7e0e8acac230c658aa434f26e48cd Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Sun, 10 Dec 2017 16:19:56 +0100 Subject: mtd: mchp23k256: propagate return value of spi_sync() The call to spi_sync() can fail. Check the return value and propagate it. Signed-off-by: Antonio Borneo Reviewed-by: Andrew Lunn Signed-off-by: Boris Brezillon --- drivers/mtd/devices/mchp23k256.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c index 8956b7dcc984..75f71d166fd6 100644 --- a/drivers/mtd/devices/mchp23k256.c +++ b/drivers/mtd/devices/mchp23k256.c @@ -68,6 +68,7 @@ static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len, struct spi_transfer transfer[2] = {}; struct spi_message message; unsigned char command[MAX_CMD_SIZE]; + int ret; spi_message_init(&message); @@ -84,12 +85,16 @@ static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len, mutex_lock(&flash->lock); - spi_sync(flash->spi, &message); + ret = spi_sync(flash->spi, &message); + + mutex_unlock(&flash->lock); + + if (ret) + return ret; if (retlen && message.actual_length > sizeof(command)) *retlen += message.actual_length - sizeof(command); - mutex_unlock(&flash->lock); return 0; } @@ -100,6 +105,7 @@ static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len, struct spi_transfer transfer[2] = {}; struct spi_message message; unsigned char command[MAX_CMD_SIZE]; + int ret; spi_message_init(&message); @@ -117,12 +123,16 @@ static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len, mutex_lock(&flash->lock); - spi_sync(flash->spi, &message); + ret = spi_sync(flash->spi, &message); + + mutex_unlock(&flash->lock); + + if (ret) + return ret; if (retlen && message.actual_length > sizeof(command)) *retlen += message.actual_length - sizeof(command); - mutex_unlock(&flash->lock); return 0; } -- cgit v1.2.3 From 1784f9144b143a1e8b19fe94083b040aa559182b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 5 Dec 2017 14:14:47 +0100 Subject: drivers/misc/intel/pti: Rename the header file to free up the namespace We'd like to use the 'PTI' acronym for 'Page Table Isolation' - free up the namespace by renaming the driver header to . (Also standardize the header guard name while at it.) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: J Freyensee Cc: Greg Kroah-Hartman Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/misc/pti.c | 2 +- include/linux/intel-pti.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pti.h | 43 ------------------------------------------- 3 files changed, 44 insertions(+), 44 deletions(-) create mode 100644 include/linux/intel-pti.h delete mode 100644 include/linux/pti.h diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c index eda38cbe8530..41f2a9f6851d 100644 --- a/drivers/misc/pti.c +++ b/drivers/misc/pti.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h new file mode 100644 index 000000000000..2710d72de3c9 --- /dev/null +++ b/include/linux/intel-pti.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) Intel 2011 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The PTI (Parallel Trace Interface) driver directs trace data routed from + * various parts in the system out through the Intel Penwell PTI port and + * out of the mobile device for analysis with a debugging tool + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, + * compact JTAG, standard. + * + * This header file will allow other parts of the OS to use the + * interface to write out it's contents for debugging a mobile system. + */ + +#ifndef LINUX_INTEL_PTI_H_ +#define LINUX_INTEL_PTI_H_ + +/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ +#define PTI_LASTDWORD_DTS 0x30 + +/* basic structure used as a write address to the PTI HW */ +struct pti_masterchannel { + u8 master; + u8 channel; +}; + +/* the following functions are defined in misc/pti.c */ +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); +struct pti_masterchannel *pti_request_masterchannel(u8 type, + const char *thread_name); +void pti_release_masterchannel(struct pti_masterchannel *mc); + +#endif /* LINUX_INTEL_PTI_H_ */ diff --git a/include/linux/pti.h b/include/linux/pti.h deleted file mode 100644 index b3ea01a3197e..000000000000 --- a/include/linux/pti.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) Intel 2011 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * The PTI (Parallel Trace Interface) driver directs trace data routed from - * various parts in the system out through the Intel Penwell PTI port and - * out of the mobile device for analysis with a debugging tool - * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, - * compact JTAG, standard. - * - * This header file will allow other parts of the OS to use the - * interface to write out it's contents for debugging a mobile system. - */ - -#ifndef PTI_H_ -#define PTI_H_ - -/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ -#define PTI_LASTDWORD_DTS 0x30 - -/* basic structure used as a write address to the PTI HW */ -struct pti_masterchannel { - u8 master; - u8 channel; -}; - -/* the following functions are defined in misc/pti.c */ -void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); -struct pti_masterchannel *pti_request_masterchannel(u8 type, - const char *thread_name); -void pti_release_masterchannel(struct pti_masterchannel *mc); - -#endif /*PTI_H_*/ -- cgit v1.2.3 From a8b4db562e7283a1520f9e9730297ecaab7622ea Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Sun, 5 Nov 2017 18:27:51 -0800 Subject: x86/cpufeature: Add User-Mode Instruction Prevention definitions [ Note, this is a Git cherry-pick of the following commit: (limited to the cpufeatures.h file) 3522c2a6a4f3 ("x86/cpufeature: Add User-Mode Instruction Prevention definitions") ... for easier x86 PTI code testing and back-porting. ] User-Mode Instruction Prevention is a security feature present in new Intel processors that, when set, prevents the execution of a subset of instructions if such instructions are executed in user mode (CPL > 0). Attempting to execute such instructions causes a general protection exception. The subset of instructions comprises: * SGDT - Store Global Descriptor Table * SIDT - Store Interrupt Descriptor Table * SLDT - Store Local Descriptor Table * SMSW - Store Machine Status Word * STR - Store Task Register This feature is also added to the list of disabled-features to allow a cleaner handling of build-time configuration. Signed-off-by: Ricardo Neri Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Chen Yucong Cc: Chris Metcalf Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Huang Rui Cc: Jiri Slaby Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Ravi V. Shankar Cc: Shuah Khan Cc: Tony Luck Cc: Vlastimil Babka Cc: ricardo.neri@intel.com Link: http://lkml.kernel.org/r/1509935277-22138-7-git-send-email-ricardo.neri-calderon@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cdf5be866863..c0b0e9e8aa66 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -296,6 +296,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -- cgit v1.2.3 From f2dbad36c55e5d3a91dccbde6e8cae345fe5632f Mon Sep 17 00:00:00 2001 From: Rudolf Marek Date: Tue, 28 Nov 2017 22:01:06 +0100 Subject: x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD [ Note, this is a Git cherry-pick of the following commit: 2b67799bdf25 ("x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD") ... for easier x86 PTI code testing and back-porting. ] The latest AMD AMD64 Architecture Programmer's Manual adds a CPUID feature XSaveErPtr (CPUID_Fn80000008_EBX[2]). If this feature is set, the FXSAVE, XSAVE, FXSAVEOPT, XSAVEC, XSAVES / FXRSTOR, XRSTOR, XRSTORS always save/restore error pointers, thus making the X86_BUG_FXSAVE_LEAK workaround obsolete on such CPUs. Signed-Off-By: Rudolf Marek Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/bdcebe90-62c5-1f05-083c-eba7f08b2540@assembler.cz Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/amd.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66..800104c8a3ed 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -266,6 +266,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d58184b7cd44..bcb75dc97d44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x17: init_amd_zn(c); break; } - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature + */ + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); -- cgit v1.2.3 From 2fe1bc1f501d55e5925b4035bcd85781adc76c63 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 14:46:30 -0700 Subject: perf/x86: Enable free running PEBS for REGS_USER/INTR [ Note, this is a Git cherry-pick of the following commit: a47ba4d77e12 ("perf/x86: Enable free running PEBS for REGS_USER/INTR") ... for easier x86 PTI code testing and back-porting. ] Currently free running PEBS is disabled when user or interrupt registers are requested. Most of the registers are actually available in the PEBS record and can be supported. So we just need to check for the supported registers and then allow it: it is all except for the segment register. For user registers this only works when the counter is limited to ring 3 only, so this also needs to be checked. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170831214630.21892-1-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 4 ++++ arch/x86/events/perf_event.h | 24 +++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9fb9a1f1e47b..43445da30cea 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; + if (!event->attr.exclude_kernel) + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_user & ~PEBS_REGS) + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4196f81ec0e1..f7aaadf9331f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -85,13 +85,15 @@ struct amd_nb { * Flags PEBS can handle without an PMI. * * TID can only be handled by flushing at context switch. + * REGS_USER can be handled for events limited to ring 3. * */ #define PEBS_FREERUNNING_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) /* * A debug store configuration. @@ -110,6 +112,26 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define PEBS_REGS \ + (PERF_REG_X86_AX | \ + PERF_REG_X86_BX | \ + PERF_REG_X86_CX | \ + PERF_REG_X86_DX | \ + PERF_REG_X86_DI | \ + PERF_REG_X86_SI | \ + PERF_REG_X86_SP | \ + PERF_REG_X86_BP | \ + PERF_REG_X86_IP | \ + PERF_REG_X86_FLAGS | \ + PERF_REG_X86_R8 | \ + PERF_REG_X86_R9 | \ + PERF_REG_X86_R10 | \ + PERF_REG_X86_R11 | \ + PERF_REG_X86_R12 | \ + PERF_REG_X86_R13 | \ + PERF_REG_X86_R14 | \ + PERF_REG_X86_R15) + /* * Per register state. */ -- cgit v1.2.3 From ab95477e7cb35557ecfc837687007b646bab9a9f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Dec 2017 02:25:31 +0100 Subject: bpf: fix build issues on um due to mising bpf_perf_event.h [ Note, this is a Git cherry-pick of the following commit: a23f06f06dbe ("bpf: fix build issues on um due to mising bpf_perf_event.h") ... for easier x86 PTI code testing and back-porting. ] Since c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type") um (uml) won't build on i386 or x86_64: [...] CC init/main.o In file included from ../include/linux/perf_event.h:18:0, from ../include/linux/trace_events.h:10, from ../include/trace/syscall.h:7, from ../include/linux/syscalls.h:82, from ../init/main.c:20: ../include/uapi/linux/bpf_perf_event.h:11:32: fatal error: asm/bpf_perf_event.h: No such file or directory #include [...] Lets add missing bpf_perf_event.h also to um arch. This seems to be the only one still missing. Fixes: c895f6f703ad ("bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type") Reported-by: Randy Dunlap Suggested-by: Richard Weinberger Signed-off-by: Daniel Borkmann Tested-by: Randy Dunlap Cc: Hendrik Brueckner Cc: Richard Weinberger Acked-by: Alexei Starovoitov Acked-by: Richard Weinberger Signed-off-by: Alexei Starovoitov Signed-off-by: Ingo Molnar --- arch/um/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 50a32c33d729..73c57f614c9e 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += barrier.h +generic-y += bpf_perf_event.h generic-y += bug.h generic-y += clkdev.h generic-y += current.h -- cgit v1.2.3 From c2bc66082e1048c7573d72e62f597bdc5ce13fea Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:47 +0100 Subject: locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() [ Note, this is a Git cherry-pick of the following commit: 76ebbe78f739 ("locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()") ... for easier x86 PTI code testing and back-porting. ] In preparation for the removal of lockless_dereference(), which is the same as READ_ONCE() on all architectures other than Alpha, add an implicit smp_read_barrier_depends() to READ_ONCE() so that it can be used to head dependency chains on all architectures. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 202710420d6d..712cd8bb00b4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -341,6 +341,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __read_once_size(&(x), __u.__c, sizeof(x)); \ else \ __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) #define READ_ONCE(x) __READ_ONCE(x, 1) -- cgit v1.2.3 From 3382290ed2d5e275429cef510ab21889d3ccd164 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:48 +0100 Subject: locking/barriers: Convert users of lockless_dereference() to READ_ONCE() [ Note, this is a Git cherry-pick of the following commit: 506458efaf15 ("locking/barriers: Convert users of lockless_dereference() to READ_ONCE()") ... for easier x86 PTI code testing and back-porting. ] READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it can be used instead of lockless_dereference() without any change in semantics. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/mmu_context.h | 4 ++-- arch/x86/kernel/ldt.c | 2 +- drivers/md/dm-mpath.c | 20 ++++++++++---------- fs/dcache.c | 4 ++-- fs/overlayfs/ovl_entry.h | 2 +- fs/overlayfs/readdir.c | 2 +- include/linux/rculist.h | 4 ++-- include/linux/rcupdate.h | 4 ++-- kernel/events/core.c | 4 ++-- kernel/seccomp.c | 2 +- kernel/task_work.c | 2 +- mm/slab.h | 2 +- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 80534d3c2480..589af1eec7c1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) struct ldt_struct *ldt; /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = lockless_dereference(current->active_mm->context.ldt); + ldt = READ_ONCE(current->active_mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6699fc441644..6d16d15d09a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - /* lockless_dereference synchronizes with smp_store_release */ - ldt = lockless_dereference(mm->context.ldt); + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ae5615b03def..1c1eae961340 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) static void install_ldt(struct mm_struct *current_mm, struct ldt_struct *ldt) { - /* Synchronizes with lockless_dereference in load_mm_ldt. */ + /* Synchronizes with READ_ONCE in load_mm_ldt. */ smp_store_release(¤t_mm->context.ldt, ldt); /* Activate the LDT for all CPUs using current_mm. */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 11f273d2f018..3f88c9d32f7e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m, pgpath = path_to_pgpath(path); - if (unlikely(lockless_dereference(m->current_pg) != pg)) { + if (unlikely(READ_ONCE(m->current_pg) != pg)) { /* Only update current_pgpath if pg changed */ spin_lock_irqsave(&m->lock, flags); m->current_pgpath = pgpath; @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) } /* Were we instructed to switch PG? */ - if (lockless_dereference(m->next_pg)) { + if (READ_ONCE(m->next_pg)) { spin_lock_irqsave(&m->lock, flags); pg = m->next_pg; if (!pg) { @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) /* Don't change PG until it has no remaining paths */ check_current_pg: - pg = lockless_dereference(m->current_pg); + pg = READ_ONCE(m->current_pg); if (pg) { pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct request *clone; /* Do we need to select a new pgpath? */ - pgpath = lockless_dereference(m->current_pgpath); + pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) pgpath = choose_pgpath(m, nr_bytes); @@ -535,7 +535,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m bool queue_io; /* Do we need to select a new pgpath? */ - pgpath = lockless_dereference(m->current_pgpath); + pgpath = READ_ONCE(m->current_pgpath); queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); if (!pgpath || !queue_io) pgpath = choose_pgpath(m, nr_bytes); @@ -1804,7 +1804,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, struct pgpath *current_pgpath; int r; - current_pgpath = lockless_dereference(m->current_pgpath); + current_pgpath = READ_ONCE(m->current_pgpath); if (!current_pgpath) current_pgpath = choose_pgpath(m, 0); @@ -1826,7 +1826,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, } if (r == -ENOTCONN) { - if (!lockless_dereference(m->current_pg)) { + if (!READ_ONCE(m->current_pg)) { /* Path status changed, redo selection */ (void) choose_pgpath(m, 0); } @@ -1895,9 +1895,9 @@ static int multipath_busy(struct dm_target *ti) return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); /* Guess which priority_group will be used at next mapping time */ - pg = lockless_dereference(m->current_pg); - next_pg = lockless_dereference(m->next_pg); - if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg)) + pg = READ_ONCE(m->current_pg); + next_pg = READ_ONCE(m->next_pg); + if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) pg = next_pg; if (!pg) { diff --git a/fs/dcache.c b/fs/dcache.c index f90141387f01..34c852af215c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c { /* * Be careful about RCU walk racing with rename: - * use 'lockless_dereference' to fetch the name pointer. + * use 'READ_ONCE' to fetch the name pointer. * * NOTE! Even if a rename will mean that the length * was not loaded atomically, we don't care. The @@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c * early because the data cannot match (there can * be no NUL in the ct/tcount data) */ - const unsigned char *cs = lockless_dereference(dentry->d_name.name); + const unsigned char *cs = READ_ONCE(dentry->d_name.name); return dentry_string_cmp(cs, ct, tcount); } diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index 25d9b5adcd42..36b49bd09264 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode) static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) { - return lockless_dereference(oi->__upperdentry); + return READ_ONCE(oi->__upperdentry); } diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 698b74dd750e..c310e3ff7f3f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -754,7 +754,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { struct inode *inode = file_inode(file); - realfile = lockless_dereference(od->upperfile); + realfile = READ_ONCE(od->upperfile); if (!realfile) { struct path upperpath; diff --git a/include/linux/rculist.h b/include/linux/rculist.h index c2cdd45a880a..127f534fec94 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -275,7 +275,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(lockless_dereference(ptr), type, member) + container_of(READ_ONCE(ptr), type, member) /* * Where are list_empty_rcu() and list_first_entry_rcu()? @@ -368,7 +368,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * example is when items are added to the list, but never deleted. */ #define list_entry_lockless(ptr, type, member) \ - container_of((typeof(ptr))lockless_dereference(ptr), type, member) + container_of((typeof(ptr))READ_ONCE(ptr), type, member) /** * list_for_each_entry_lockless - iterate over rcu list of given type diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a9f70d44af9..a6ddc42f87a5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define __rcu_dereference_check(p, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_dereference_raw(p) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = lockless_dereference(p); \ + typeof(p) ________p1 = READ_ONCE(p); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) diff --git a/kernel/events/core.c b/kernel/events/core.c index 10cdb9c26b5d..6eee4ed97af0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struct perf_event *event) * indeed free this event, otherwise we need to serialize on * owner->perf_event_mutex. */ - owner = lockless_dereference(event->owner); + owner = READ_ONCE(event->owner); if (owner) { /* * Since delayed_put_task_struct() also drops the last @@ -4330,7 +4330,7 @@ again: * Cannot change, child events are not migrated, see the * comment with perf_event_ctx_lock_nested(). */ - ctx = lockless_dereference(child->ctx); + ctx = READ_ONCE(child->ctx); /* * Since child_mutex nests inside ctx::mutex, we must jump * through hoops. We start by grabbing a reference on the ctx. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 418a1c045933..5f0dfb2abb8d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, u32 ret = SECCOMP_RET_ALLOW; /* Make sure cross-thread synced filter points somewhere sane. */ struct seccomp_filter *f = - lockless_dereference(current->seccomp.filter); + READ_ONCE(current->seccomp.filter); /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) diff --git a/kernel/task_work.c b/kernel/task_work.c index 5718b3ea202a..0fef395662a6 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - while ((work = lockless_dereference(*pprev))) { + while ((work = READ_ONCE(*pprev))) { if (work->func != func) pprev = &work->next; else if (cmpxchg(pprev, work, work->next) == work) diff --git a/mm/slab.h b/mm/slab.h index 028cdc7df67e..86d7c7d860f9 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx) * memcg_caches issues a write barrier to match this (see * memcg_create_kmem_cache()). */ - cachep = lockless_dereference(arr->entries[idx]); + cachep = READ_ONCE(arr->entries[idx]); rcu_read_unlock(); return cachep; -- cgit v1.2.3 From 2aeb07365bcd489620f71390a7d2031cd4dfb83e Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 15 Nov 2017 17:36:35 -0800 Subject: x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow [ Note, this is a Git cherry-pick of the following commit: d17a1d97dc20: ("x86/mm/kasan: don't use vmemmap_populate() to initialize shadow") ... for easier x86 PTI code testing and back-porting. ] The KASAN shadow is currently mapped using vmemmap_populate() since that provides a semi-convenient way to map pages into init_top_pgt. However, since that no longer zeroes the mapped pages, it is not suitable for KASAN, which requires zeroed shadow memory. Add kasan_populate_shadow() interface and use it instead of vmemmap_populate(). Besides, this allows us to take advantage of gigantic pages and use them to populate the shadow, which should save us some memory wasted on page tables and reduce TLB pressure. Link: http://lkml.kernel.org/r/20171103185147.2688-2-pasha.tatashin@oracle.com Signed-off-by: Andrey Ryabinin Signed-off-by: Pavel Tatashin Cc: Andy Lutomirski Cc: Steven Sistare Cc: Daniel Jordan Cc: Bob Picco Cc: Michal Hocko Cc: Alexander Potapenko Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David S. Miller Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mark Rutland Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Sam Ravnborg Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/mm/kasan_init_64.c | 143 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 137 insertions(+), 8 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4ae940a0ed3b..665eba1b6103 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,7 +108,7 @@ config X86 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP + select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KGDB select HAVE_ARCH_KMEMCHECK select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 2b60dc6e64b1..99dfed6dfef8 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -4,12 +4,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -18,7 +20,134 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static int __init map_range(struct range *range) +static __init void *early_alloc(size_t size, int nid) +{ + return memblock_virt_alloc_try_nid_nopanic(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); +} + +static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, + unsigned long end, int nid) +{ + pte_t *pte; + + if (pmd_none(*pmd)) { + void *p; + + if (boot_cpu_has(X86_FEATURE_PSE) && + ((end - addr) == PMD_SIZE) && + IS_ALIGNED(addr, PMD_SIZE)) { + p = early_alloc(PMD_SIZE, nid); + if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) + return; + else if (p) + memblock_free(__pa(p), PMD_SIZE); + } + + p = early_alloc(PAGE_SIZE, nid); + pmd_populate_kernel(&init_mm, pmd, p); + } + + pte = pte_offset_kernel(pmd, addr); + do { + pte_t entry; + void *p; + + if (!pte_none(*pte)) + continue; + + p = early_alloc(PAGE_SIZE, nid); + entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, + unsigned long end, int nid) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none(*pud)) { + void *p; + + if (boot_cpu_has(X86_FEATURE_GBPAGES) && + ((end - addr) == PUD_SIZE) && + IS_ALIGNED(addr, PUD_SIZE)) { + p = early_alloc(PUD_SIZE, nid); + if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) + return; + else if (p) + memblock_free(__pa(p), PUD_SIZE); + } + + p = early_alloc(PAGE_SIZE, nid); + pud_populate(&init_mm, pud, p); + } + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (!pmd_large(*pmd)) + kasan_populate_pmd(pmd, addr, next, nid); + } while (pmd++, addr = next, addr != end); +} + +static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, + unsigned long end, int nid) +{ + pud_t *pud; + unsigned long next; + + if (p4d_none(*p4d)) { + void *p = early_alloc(PAGE_SIZE, nid); + + p4d_populate(&init_mm, p4d, p); + } + + pud = pud_offset(p4d, addr); + do { + next = pud_addr_end(addr, end); + if (!pud_large(*pud)) + kasan_populate_pud(pud, addr, next, nid); + } while (pud++, addr = next, addr != end); +} + +static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, + unsigned long end, int nid) +{ + void *p; + p4d_t *p4d; + unsigned long next; + + if (pgd_none(*pgd)) { + p = early_alloc(PAGE_SIZE, nid); + pgd_populate(&init_mm, pgd, p); + } + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + kasan_populate_p4d(p4d, addr, next, nid); + } while (p4d++, addr = next, addr != end); +} + +static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, + int nid) +{ + pgd_t *pgd; + unsigned long next; + + addr = addr & PAGE_MASK; + end = round_up(end, PAGE_SIZE); + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + kasan_populate_pgd(pgd, addr, next, nid); + } while (pgd++, addr = next, addr != end); +} + +static void __init map_range(struct range *range) { unsigned long start; unsigned long end; @@ -26,7 +155,7 @@ static int __init map_range(struct range *range) start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); - return vmemmap_populate(start, end, NUMA_NO_NODE); + kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); } static void __init clear_pgds(unsigned long start, @@ -189,16 +318,16 @@ void __init kasan_init(void) if (pfn_mapped[i].end == 0) break; - if (map_range(&pfn_mapped[i])) - panic("kasan: unable to allocate shadow!"); + map_range(&pfn_mapped[i]); } + kasan_populate_zero_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), kasan_mem_to_shadow((void *)__START_KERNEL_map)); - vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), - (unsigned long)kasan_mem_to_shadow(_end), - NUMA_NO_NODE); + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + early_pfn_to_nid(__pa(_stext))); kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); -- cgit v1.2.3 From e17f8234538d1ff708673f287a42457c4dee720d Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 4 Dec 2017 15:07:07 +0100 Subject: x86/entry/64/paravirt: Use paravirt-safe macro to access eflags Commit 1d3e53e8624a ("x86/entry/64: Refactor IRQ stacks and make them NMI-safe") added DEBUG_ENTRY_ASSERT_IRQS_OFF macro that acceses eflags using 'pushfq' instruction when testing for IF bit. On PV Xen guests looking at IF flag directly will always see it set, resulting in 'ud2'. Introduce SAVE_FLAGS() macro that will use appropriate save_fl pv op when running paravirt. Signed-off-by: Boris Ostrovsky Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: xen-devel@lists.xenproject.org Link: https://lkml.kernel.org/r/20171204150604.899457242@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 7 ++++--- arch/x86/include/asm/irqflags.h | 3 +++ arch/x86/include/asm/paravirt.h | 9 +++++++++ arch/x86/kernel/asm-offsets_64.c | 3 +++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a2b30ec69497..32306788821c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -462,12 +462,13 @@ END(irq_entries_start) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY - pushfq - testl $X86_EFLAGS_IF, (%rsp) + pushq %rax + SAVE_FLAGS(CLBR_RAX) + testl $X86_EFLAGS_IF, %eax jz .Lokay_\@ ud2 .Lokay_\@: - addq $8, %rsp + popq %rax #endif .endm diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c8ef23f2c28f..89f08955fff7 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif #else #define INTERRUPT_RETURN iret #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 283efcaac8af..892df375b615 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -927,6 +927,15 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#endif + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 630212fa9b9d..e3a5175a444b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -23,6 +23,9 @@ int main(void) #ifdef CONFIG_PARAVIRT OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); +#ifdef CONFIG_DEBUG_ENTRY + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); +#endif BLANK(); #endif -- cgit v1.2.3 From d3a09104018cf2ad5973dfa8a9c138ef9f5015a3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:08 +0100 Subject: x86/unwinder/orc: Dont bail on stack overflow If the stack overflows into a guard page and the ORC unwinder should work well: by construction, there can't be any meaningful data in the guard page because no writes to the guard page will have succeeded. But there is a bug that prevents unwinding from working correctly: if the starting register state has RSP pointing into a stack guard page, the ORC unwinder bails out immediately. Instead of bailing out immediately check whether the next page up is a valid check page and if so analyze that. As a result the ORC unwinder will start the unwind. Tested by intentionally overflowing the task stack. The result is an accurate call trace instead of a trace consisting purely of '?' entries. There are a few other bugs that are triggered if the unwinder encounters a stack overflow after the first step, but they are outside the scope of this fix. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150604.991389777@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_orc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index a3f973b2c97a..ff8e1132b2ae 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -553,8 +553,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } if (get_stack_info((unsigned long *)state->sp, state->task, - &state->stack_info, &state->stack_mask)) - return; + &state->stack_info, &state->stack_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + if (get_stack_info(next_page, state->task, &state->stack_info, + &state->stack_mask)) + return; + } /* * The caller can provide the address of the first frame directly -- cgit v1.2.3 From b02fcf9ba1211097754b286043cd87a8b4907e75 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 4 Dec 2017 15:07:09 +0100 Subject: x86/unwinder: Handle stack overflows more gracefully There are at least two unwinder bugs hindering the debugging of stack-overflow crashes: - It doesn't deal gracefully with the case where the stack overflows and the stack pointer itself isn't on a valid stack but the to-be-dereferenced data *is*. - The ORC oops dump code doesn't know how to print partial pt_regs, for the case where if we get an interrupt/exception in *early* entry code before the full pt_regs have been saved. Fix both issues. http://lkml.kernel.org/r/20171126024031.uxi4numpbjm5rlbr@treble Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.071425003@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kdebug.h | 1 + arch/x86/include/asm/unwind.h | 7 ++++ arch/x86/kernel/dumpstack.c | 32 ++++++++++++++++--- arch/x86/kernel/process_64.c | 11 +++---- arch/x86/kernel/unwind_orc.c | 74 +++++++++++++++---------------------------- 5 files changed, 65 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f86a8caa561e..395c9631e000 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, int all); +extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index e9cc6fe1fc6f..c1688c2d0a12 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -7,6 +7,9 @@ #include #include +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) + struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; @@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) +/* + * WARNING: The entire pt_regs may not be safe to dereference. In some cases, + * only the iret frame registers are accessible. Use with caution! + */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) { if (unwind_done(state)) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f13b4c00a5de..0bc95be5c638 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -50,6 +50,28 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +void show_iret_regs(struct pt_regs *regs) +{ + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, + regs->sp, regs->flags); +} + +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +{ + if (on_stack(info, regs, sizeof(*regs))) + __show_regs(regs, 0); + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { + /* + * When an interrupt or exception occurs in entry code, the + * full pt_regs might not have been saved yet. In that case + * just print the iret frame. + */ + show_iret_regs(regs); + } +} + void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { @@ -94,8 +116,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, if (stack_name) printk("%s <%s>\n", log_lvl, stack_name); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); /* * Scan the stack, printing any text addresses we find. At the @@ -119,7 +141,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by __show_regs() below. + * by show_regs_safe() below. */ if (regs && stack == ®s->ip) goto next; @@ -155,8 +177,8 @@ next: /* if the frame has entry regs, print them */ regs = unwind_get_entry_regs(&state); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_safe(&stack_info, regs); } if (stack_name) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eeeb34f85c25..01b119bebb68 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, - regs->sp, regs->flags); + show_iret_regs(regs); + if (regs->orig_ax != -1) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); else @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); + if (!all) + return; + asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%es,%0" : "=r" (es)); @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); - if (!all) - return; - cr0 = read_cr0(); cr2 = read_cr2(); cr3 = __read_cr3(); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index ff8e1132b2ae..be86a865087a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) return NULL; } -static bool stack_access_ok(struct unwind_state *state, unsigned long addr, +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, size_t len) { struct stack_info *info = &state->stack_info; + void *addr = (void *)_addr; - /* - * If the address isn't on the current stack, switch to the next one. - * - * We may have to traverse multiple stacks to deal with the possibility - * that info->next_sp could point to an empty stack and the address - * could be on a subsequent stack. - */ - while (!on_stack(info, (void *)addr, len)) - if (get_stack_info(info->next_sp, state->task, info, - &state->stack_mask)) - return false; + if (!on_stack(info, addr, len) && + (get_stack_info(addr, state->task, info, &state->stack_mask))) + return false; return true; } @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, return true; } -#define REGS_SIZE (sizeof(struct pt_regs)) -#define SP_OFFSET (offsetof(struct pt_regs, sp)) -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) - static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, - unsigned long *ip, unsigned long *sp, bool full) + unsigned long *ip, unsigned long *sp) { - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); - - if (IS_ENABLED(CONFIG_X86_64)) { - if (!stack_access_ok(state, addr, regs_size)) - return false; - - *ip = regs->ip; - *sp = regs->sp; + struct pt_regs *regs = (struct pt_regs *)addr; - return true; - } + /* x86-32 support will be more complicated due to the ®s->sp hack */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); - if (!stack_access_ok(state, addr, sp_offset)) + if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; *ip = regs->ip; + *sp = regs->sp; + return true; +} - if (user_mode(regs)) { - if (!stack_access_ok(state, addr + sp_offset, - REGS_SIZE - SP_OFFSET)) - return false; +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, + unsigned long *ip, unsigned long *sp) +{ + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; - *sp = regs->sp; - } else - *sp = (unsigned long)®s->sp; + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) + return false; + *ip = regs->ip; + *sp = regs->sp; return true; } @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; - struct pt_regs *ptregs; bool indirect = false; if (unwind_done(state)) @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS_IRET: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } - ptregs = container_of((void *)sp, struct pt_regs, ip); - if ((unsigned long)ptregs >= prev_sp && - on_stack(&state->stack_info, ptregs, REGS_SIZE)) { - state->regs = ptregs; - state->full_regs = false; - } else - state->regs = NULL; - + state->regs = (void *)sp - IRET_FRAME_OFFSET; + state->full_regs = false; state->signal = true; break; -- cgit v1.2.3 From 6669a692605547892a026445e460bf233958bd7f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:10 +0100 Subject: x86/irq: Remove an old outdated comment about context tracking races That race has been fixed and code cleaned up for a while now. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.150551639@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 52089c043160..aa9d51eea9d0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; - /* - * NB: Unlike exception entries, IRQ entries do not reliably - * handle context tracking in the low-level entry code. This is - * because syscall entries execute briefly with IRQs on before - * updating context tracking state, so we can take an IRQ from - * kernel mode with CONTEXT_USER. The low-level entry code only - * updates the context if we came from user mode, so we won't - * switch to CONTEXT_KERNEL. We'll fix that once the syscall - * code is cleaned up enough that we can cleanly defer enabling - * IRQs. - */ - entering_irq(); /* entering_irq() tells RCU that we're not quiescent. Check it. */ -- cgit v1.2.3 From 4f3789e792296e21405f708cf3cb409d7c7d5683 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:11 +0100 Subject: x86/irq/64: Print the offending IP in the stack overflow warning In case something goes wrong with unwind (not unlikely in case of overflow), print the offending IP where we detected the overflow. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.231677119@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 020efbf5786b..d86e344f5b3d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, - estack_top, estack_bottom); + estack_top, estack_bottom, (void *)regs->ip); if (sysctl_panic_on_stackoverflow) panic("low stack detected by irq handler - check messages\n"); -- cgit v1.2.3 From 1a79797b58cddfa948420a7553241c79c013e3ca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:12 +0100 Subject: x86/entry/64: Allocate and enable the SYSENTER stack This will simplify future changes that want scratch variables early in the SYSENTER handler -- they'll be able to spill registers to the stack. It also lets us get rid of a SWAPGS_UNSAFE_STACK user. This does not depend on CONFIG_IA32_EMULATION=y because we'll want the stack space even without IA32 emulation. As far as I can tell, the reason that this wasn't done from day 1 is that we use IST for #DB and #BP, which is IMO rather nasty and causes a lot more problems than it solves. But, since #DB uses IST, we don't actually need a real stack for SYSENTER (because SYSENTER with TF set will invoke #DB on the IST stack rather than the SYSENTER stack). I want to remove IST usage from these vectors some day, and this patch is a prerequisite for that as well. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.312726423@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 2 +- arch/x86/include/asm/processor.h | 3 --- arch/x86/kernel/asm-offsets.c | 5 +++++ arch/x86/kernel/asm-offsets_32.c | 5 ----- arch/x86/kernel/cpu/common.c | 4 +++- arch/x86/kernel/process.c | 2 -- arch/x86/kernel/traps.c | 3 +-- 7 files changed, 10 insertions(+), 14 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 568e130d932c..dcc6987f9bae 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,7 +48,7 @@ */ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK + SWAPGS movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2db7cf720b04..789dad5da20f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -339,14 +339,11 @@ struct tss_struct { */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; -#ifdef CONFIG_X86_32 /* * Space for the temporary SYSENTER stack. */ unsigned long SYSENTER_stack_canary; unsigned long SYSENTER_stack[64]; -#endif - } ____cacheline_aligned; DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 8ea78275480d..b275863128eb 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -93,4 +93,9 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + + /* Offset from cpu_tss to SYSENTER_stack */ + OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); + /* Size of SYSENTER_stack */ + DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index dedf428b20b6..52ce4ea16e53 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -50,11 +50,6 @@ void foo(void) DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - offsetofend(struct tss_struct, SYSENTER_stack)); - /* Offset from cpu_tss to SYSENTER_stack */ - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); - /* Size of SYSENTER_stack */ - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); - #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); OFFSET(stack_canary_offset, stack_canary, canary); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cdf79ab628c2..22f542170198 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1361,7 +1361,9 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, + (unsigned long)this_cpu_ptr(&cpu_tss) + + offsetofend(struct tss_struct, SYSENTER_stack)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97fb3e5737f5..35d674157fda 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -71,9 +71,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif -#ifdef CONFIG_X86_32 .SYSENTER_stack_canary = STACK_END_MAGIC, -#endif }; EXPORT_PER_CPU_SYMBOL(cpu_tss); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d366adfc61da..d3e3bbd5d3a0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -794,14 +794,13 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: -#if defined(CONFIG_X86_32) /* * This is the most likely code path that involves non-trivial use * of the SYSENTER stack. Check that we haven't overrun it. */ WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, "Overran or corrupted SYSENTER stack\n"); -#endif + ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); -- cgit v1.2.3 From 33a2f1a6c4d7c0a02d1c006fb0379cc5ca3b96bb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:13 +0100 Subject: x86/dumpstack: Add get_stack_info() support for the SYSENTER stack get_stack_info() doesn't currently know about the SYSENTER stack, so unwinding will fail if we entered the kernel on the SYSENTER stack and haven't fully switched off. Teach get_stack_info() about the SYSENTER stack. With future patches applied that run part of the entry code on the SYSENTER stack and introduce an intentional BUG(), I would get: PANIC: double fault, error_code: 0x0 ... RIP: 0010:do_error_trap+0x33/0x1c0 ... Call Trace: Code: ... With this patch, I get: PANIC: double fault, error_code: 0x0 ... Call Trace: ? async_page_fault+0x36/0x60 ? invalid_op+0x22/0x40 ? async_page_fault+0x36/0x60 ? sync_regs+0x3c/0x40 ? sync_regs+0x2e/0x40 ? error_entry+0x6c/0xd0 ? async_page_fault+0x36/0x60 Code: ... which is a lot more informative. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.392711508@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 3 +++ arch/x86/kernel/dumpstack.c | 19 +++++++++++++++++++ arch/x86/kernel/dumpstack_32.c | 6 ++++++ arch/x86/kernel/dumpstack_64.c | 6 ++++++ 4 files changed, 34 insertions(+) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 8da111b3c342..f8062bfd43a0 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,6 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, + STACK_TYPE_SYSENTER, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -28,6 +29,8 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); + int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0bc95be5c638..a33a1373a252 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -43,6 +43,25 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +{ + struct tss_struct *tss = this_cpu_ptr(&cpu_tss); + + /* Treat the canary as part of the stack for unwinding purposes. */ + void *begin = &tss->SYSENTER_stack_canary; + void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); + + if ((void *)stack < begin || (void *)stack >= end) + return false; + + info->type = STACK_TYPE_SYSENTER; + info->begin = begin; + info->end = end; + info->next_sp = NULL; + + return true; +} + static void printk_stack_address(unsigned long address, int reliable, char *log_lvl) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index daefae83a3aa..5ff13a6b3680 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; + if (type == STACK_TYPE_SYSENTER) + return "SYSENTER"; + return NULL; } @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; + if (in_sysenter_stack(stack, info)) + goto recursion_check; + if (in_hardirq_stack(stack, info)) goto recursion_check; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 88ce2ffdb110..abc828f8c297 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; + if (type == STACK_TYPE_SYSENTER) + return "SYSENTER"; + if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; + if (in_sysenter_stack(stack, info)) + goto recursion_check; + goto unknown; recursion_check: -- cgit v1.2.3 From aaeed3aeb39c1ba69f0a49baec8cb728121d0a91 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:14 +0100 Subject: x86/entry/gdt: Put per-CPU GDT remaps in ascending order We currently have CPU 0's GDT at the top of the GDT range and higher-numbered CPUs at lower addresses. This happens because the fixmap is upside down (index 0 is the top of the fixmap). Flip it so that GDTs are in ascending order by virtual address. This will simplify a future patch that will generalize the GDT remap to contain multiple pages. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.471561421@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 0a3e808b9123..01fd944fd721 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -63,7 +63,7 @@ static inline struct desc_struct *get_current_gdt_rw(void) /* Get the fixmap index for a specific processor */ static inline unsigned int get_cpu_gdt_ro_index(int cpu) { - return FIX_GDT_REMAP_BEGIN + cpu; + return FIX_GDT_REMAP_END - cpu; } /* Provide the fixmap address of the remapped GDT */ -- cgit v1.2.3 From ef8813ab280507972bb57e4b1b502811ad4411e9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:15 +0100 Subject: x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area Currently, the GDT is an ad-hoc array of pages, one per CPU, in the fixmap. Generalize it to be an array of a new 'struct cpu_entry_area' so that we can cleanly add new things to it. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.563271721@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 9 +-------- arch/x86/include/asm/fixmap.h | 37 +++++++++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/common.c | 14 +++++++------- arch/x86/xen/mmu_pv.c | 2 +- 4 files changed, 44 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 01fd944fd721..f6f428432a68 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void) return this_cpu_ptr(&gdt_page)->gdt; } -/* Get the fixmap index for a specific processor */ -static inline unsigned int get_cpu_gdt_ro_index(int cpu) -{ - return FIX_GDT_REMAP_END - cpu; -} - /* Provide the fixmap address of the remapped GDT */ static inline struct desc_struct *get_cpu_gdt_ro(int cpu) { - unsigned int idx = get_cpu_gdt_ro_index(cpu); - return (struct desc_struct *)__fix_to_virt(idx); + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; } /* Provide the current read-only GDT */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b0c505fe9a95..b61f0242f9d0 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -44,6 +44,19 @@ extern unsigned long __FIXADDR_TOP; PAGE_SIZE) #endif +/* + * cpu_entry_area is a percpu region in the fixmap that contains things + * needed by the CPU and early entry/exit code. Real types aren't used + * for all fields here to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; +}; + +#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) /* * Here we define all the compile-time 'special' virtual @@ -101,8 +114,8 @@ enum fixed_addresses { FIX_LNW_VRTC, #endif /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_GDT_REMAP_BEGIN, - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, + FIX_CPU_ENTRY_AREA_TOP, + FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -191,5 +204,25 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); +static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) +{ + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; +} + +#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ + BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ + __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ + }) + +#define get_cpu_entry_area_index(cpu, field) \ + __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) + +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22f542170198..2cb394dc4153 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -466,12 +466,12 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -/* Setup the fixmap mapping only once per-processor */ -static inline void setup_fixmap_gdt(int cpu) +/* Setup the fixmap mappings only once per-processor */ +static inline void setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 /* On 64-bit systems, we use a read-only fixmap GDT. */ - pgprot_t prot = PAGE_KERNEL_RO; + pgprot_t gdt_prot = PAGE_KERNEL_RO; #else /* * On native 32-bit systems, the GDT cannot be read-only because @@ -482,11 +482,11 @@ static inline void setup_fixmap_gdt(int cpu) * On Xen PV, the GDT must be read-only because the hypervisor requires * it. */ - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? PAGE_KERNEL_RO : PAGE_KERNEL; #endif - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); } /* Load the original GDT from the per-cpu structure */ @@ -1589,7 +1589,7 @@ void cpu_init(void) if (is_uv_system()) uv_cpu_init(); - setup_fixmap_gdt(cpu); + setup_cpu_entry_area(cpu); load_fixmap_gdt(cpu); } @@ -1651,7 +1651,7 @@ void cpu_init(void) fpu__init_cpu(); - setup_fixmap_gdt(cpu); + setup_cpu_entry_area(cpu); load_fixmap_gdt(cpu); } #endif diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2ccdaba31a07..c2454237fa67 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: + case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: /* All local page mappings */ pte = pfn_pte(phys, prot); break; -- cgit v1.2.3 From 21506525fb8ddb0342f2a2370812d47f6a1f3833 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:16 +0100 Subject: x86/kasan/64: Teach KASAN about the cpu_entry_area The cpu_entry_area will contain stacks. Make sure that KASAN has appropriate shadow mappings for them. Signed-off-by: Andy Lutomirski Signed-off-by: Andrey Ryabinin Signed-off-by: Thomas Gleixner Cc: Alexander Potapenko Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: kasan-dev@googlegroups.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.642806442@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 99dfed6dfef8..9ec70d780f1f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -277,6 +277,7 @@ void __init kasan_early_init(void) void __init kasan_init(void) { int i; + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; #ifdef CONFIG_KASAN_INLINE register_die_notifier(&kasan_die_notifier); @@ -329,8 +330,23 @@ void __init kasan_init(void) (unsigned long)kasan_mem_to_shadow(_end), early_pfn_to_nid(__pa(_stext))); + shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, + PAGE_SIZE); + + shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, + PAGE_SIZE); + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - (void *)KASAN_SHADOW_END); + shadow_cpu_entry_begin); + + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, + (unsigned long)shadow_cpu_entry_end, 0); + + kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); -- cgit v1.2.3 From 7fb983b4dd569e08564134a850dfd4eb1c63d9b8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:17 +0100 Subject: x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss A future patch will move SYSENTER_stack to the beginning of cpu_tss to help detect overflow. Before this can happen, fix several code paths that hardcode assumptions about the old layout. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.722425540@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 +- arch/x86/include/asm/processor.h | 9 +++++++-- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/kernel/doublefault.c | 36 +++++++++++++++++------------------- arch/x86/kvm/vmx.c | 2 +- arch/x86/power/cpu.c | 13 +++++++------ 6 files changed, 37 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index f6f428432a68..2ace1f90d138 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -178,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, #endif } -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) { struct desc_struct *d = get_cpu_gdt_rw(cpu); tss_desc tss; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 789dad5da20f..555c9478f3df 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -162,7 +162,7 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct tss_struct doublefault_tss; +extern struct x86_hw_tss doublefault_tss; extern __u32 cpu_caps_cleared[NCAPINTS]; extern __u32 cpu_caps_set[NCAPINTS]; @@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir) write_cr3(__sme_pa(pgdir)); } +/* + * Note that while the legacy 'TSS' name comes from 'Task State Segment', + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, + * unrelated to the task-switch mechanism: + */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { @@ -322,7 +327,7 @@ struct x86_hw_tss { #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 struct tss_struct { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2cb394dc4153..3f285b973f50 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1557,7 +1557,7 @@ void cpu_init(void) } } - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; /* * <= is required because the CPU will access up to @@ -1576,7 +1576,7 @@ void cpu_init(void) * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &t->x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); @@ -1634,12 +1634,12 @@ void cpu_init(void) * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &t->x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0e662c55ae90..0b8cedb20d6d 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -50,25 +50,23 @@ static void doublefault_fn(void) cpu_relax(); } -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa_nodebug(swapper_pg_dir), - } +struct x86_hw_tss doublefault_tss __cacheline_aligned = { + .sp0 = STACK_START, + .ss0 = __KERNEL_DS, + .ldt = 0, + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + + .ip = (unsigned long) doublefault_fn, + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, + .es = __USER_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, + .ds = __USER_DS, + .fs = __KERNEL_PERCPU, + + .__cr3 = __pa_nodebug(swapper_pg_dir), }; /* dummy for do_double_fault() call */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a6f4f095f8f4..2abe0073b573 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2291,7 +2291,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. See 22.2.4. */ vmcs_writel(HOST_TR_BASE, - (unsigned long)this_cpu_ptr(&cpu_tss)); + (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss)); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ /* diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 84fcfde53f8f..50593e138281 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -165,12 +165,13 @@ static void fix_processor_context(void) struct desc_struct *desc = get_cpu_gdt_rw(cpu); tss_desc tss; #endif - set_tss_desc(cpu, t); /* - * This just modifies memory; should not be - * necessary. But... This is necessary, because - * 386 hardware has concept of busy TSS or some - * similar stupidity. - */ + + /* + * This just modifies memory; should not be necessary. But... This is + * necessary, because 386 hardware has concept of busy TSS or some + * similar stupidity. + */ + set_tss_desc(cpu, &t->x86_tss); #ifdef CONFIG_X86_64 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); -- cgit v1.2.3 From 6e60e583426c2f8751c22c2dfe5c207083b4483a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:18 +0100 Subject: x86/dumpstack: Handle stack overflow on all stacks We currently special-case stack overflow on the task stack. We're going to start putting special stacks in the fixmap with a custom layout, so they'll have guard pages, too. Teach the unwinder to be able to unwind an overflow of any of the stacks. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.802057305@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index a33a1373a252..64f8ed2a4827 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -112,24 +112,28 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) + * - SYSENTER stack * - * x86-32 can have up to three stacks: + * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack + * - SYSENTER stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; - /* - * If we overflowed the task stack into a guard page, jump back - * to the bottom of the usable stack. - */ - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) - stack = task_stack_page(task); - - if (get_stack_info(stack, task, &stack_info, &visit_mask)) - break; + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) + break; + } stack_name = stack_type_name(stack_info.type); if (stack_name) -- cgit v1.2.3 From 1a935bc3d4ea61556461a9e92a68ca3556232efd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:19 +0100 Subject: x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct SYSENTER_stack should have reliable overflow detection, which means that it needs to be at the bottom of a page, not the top. Move it to the beginning of struct tss_struct and page-align it. Also add an assertion to make sure that the fixed hardware TSS doesn't cross a page boundary. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.881827433@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 21 ++++++++++++--------- arch/x86/kernel/cpu/common.c | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 555c9478f3df..759051251664 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -332,7 +332,16 @@ struct x86_hw_tss { struct tss_struct { /* - * The hardware state: + * Space for the temporary SYSENTER stack, used for SYSENTER + * and the entry trampoline as well. + */ + unsigned long SYSENTER_stack_canary; + unsigned long SYSENTER_stack[64]; + + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. */ struct x86_hw_tss x86_tss; @@ -343,15 +352,9 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +} __aligned(PAGE_SIZE); - /* - * Space for the temporary SYSENTER stack. - */ - unsigned long SYSENTER_stack_canary; - unsigned long SYSENTER_stack[64]; -} ____cacheline_aligned; - -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); /* * sizeof(unsigned long) coming from an extra "long" at the end diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f285b973f50..60b2dfd2a58b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,6 +487,27 @@ static inline void setup_cpu_entry_area(int cpu) #endif __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + } /* Load the original GDT from the per-cpu structure */ -- cgit v1.2.3 From 72f5e08dbba2d01aa90b592cf76c378ea233b00b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:20 +0100 Subject: x86/entry: Remap the TSS into the CPU entry area This has a secondary purpose: it puts the entry stack into a region with a well-controlled layout. A subsequent patch will take advantage of this to streamline the SYSCALL entry code to be able to find it more easily. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 6 ++++-- arch/x86/include/asm/fixmap.h | 7 +++++++ arch/x86/kernel/asm-offsets.c | 3 +++ arch/x86/kernel/cpu/common.c | 41 +++++++++++++++++++++++++++++++++++------ arch/x86/kernel/dumpstack.c | 3 ++- arch/x86/kvm/vmx.c | 2 +- arch/x86/power/cpu.c | 11 ++++++----- 7 files changed, 58 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4838037f97f6..0ab316c46806 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -941,7 +941,8 @@ ENTRY(debug) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Ldebug_from_sysenter_stack @@ -984,7 +985,8 @@ ENTRY(nmi) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Lnmi_from_sysenter_stack diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b61f0242f9d0..84558b611ad3 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP; */ struct cpu_entry_area { char gdt[PAGE_SIZE]; + + /* + * The GDT is just below cpu_tss and thus serves (on x86_64) as a + * a read-only guard page for the SYSENTER stack at the bottom + * of the TSS region. + */ + struct tss_struct tss; }; #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index b275863128eb..55858b277cf6 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -98,4 +98,7 @@ void common(void) { OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); /* Size of SYSENTER_stack */ DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 60b2dfd2a58b..e5837bd6c672 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -466,6 +466,22 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } +static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, + int pages, pgprot_t prot) +{ + int i; + + for (i = 0; i < pages; i++) { + __set_fixmap(fixmap_index - i, + per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); + } +} + +#ifdef CONFIG_X86_32 +/* The 32-bit entry code needs to find cpu_entry_area. */ +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); +#endif + /* Setup the fixmap mappings only once per-processor */ static inline void setup_cpu_entry_area(int cpu) { @@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area(int cpu) */ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), + &per_cpu(cpu_tss, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, + PAGE_KERNEL); +#ifdef CONFIG_X86_32 + this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); +#endif } /* Load the original GDT from the per-cpu structure */ @@ -1257,7 +1281,8 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); wrmsr(MSR_IA32_SYSENTER_ESP, - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), + (unsigned long)&get_cpu_entry_area(cpu)->tss + + offsetofend(struct tss_struct, SYSENTER_stack), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); @@ -1370,6 +1395,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks /* May not be marked __init: used by software suspend */ void syscall_init(void) { + int cpu = smp_processor_id(); + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); @@ -1383,7 +1410,7 @@ void syscall_init(void) */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, - (unsigned long)this_cpu_ptr(&cpu_tss) + + (unsigned long)&get_cpu_entry_area(cpu)->tss + offsetofend(struct tss_struct, SYSENTER_stack)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else @@ -1593,11 +1620,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); + setup_cpu_entry_area(cpu); + /* * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, &t->x86_tss); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); @@ -1610,7 +1639,6 @@ void cpu_init(void) if (is_uv_system()) uv_cpu_init(); - setup_cpu_entry_area(cpu); load_fixmap_gdt(cpu); } @@ -1651,11 +1679,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); + setup_cpu_entry_area(cpu); + /* * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, &t->x86_tss); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); @@ -1672,7 +1702,6 @@ void cpu_init(void) fpu__init_cpu(); - setup_cpu_entry_area(cpu); load_fixmap_gdt(cpu); } #endif diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 64f8ed2a4827..60267850125e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) { - struct tss_struct *tss = this_cpu_ptr(&cpu_tss); + int cpu = smp_processor_id(); + struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; /* Treat the canary as part of the stack for unwinding purposes. */ void *begin = &tss->SYSENTER_stack_canary; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2abe0073b573..62ee4362e1c1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2291,7 +2291,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. See 22.2.4. */ vmcs_writel(HOST_TR_BASE, - (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss)); + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ /* diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 50593e138281..04d5157fe7f8 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -160,18 +160,19 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(cpu_tss, cpu); #ifdef CONFIG_X86_64 struct desc_struct *desc = get_cpu_gdt_rw(cpu); tss_desc tss; #endif /* - * This just modifies memory; should not be necessary. But... This is - * necessary, because 386 hardware has concept of busy TSS or some - * similar stupidity. + * We need to reload TR, which requires that we change the + * GDT entry to indicate "available" first. + * + * XXX: This could probably all be replaced by a call to + * force_reload_TR(). */ - set_tss_desc(cpu, &t->x86_tss); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); #ifdef CONFIG_X86_64 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); -- cgit v1.2.3 From 9aaefe7b59ae00605256a7d6bd1c1456432495fc Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:21 +0100 Subject: x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0 On 64-bit kernels, we used to assume that TSS.sp0 was the current top of stack. With the addition of an entry trampoline, this will no longer be the case. Store the current top of stack in TSS.sp1, which is otherwise unused but shares the same cacheline. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.050864668@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 18 +++++++++++++----- arch/x86/include/asm/thread_info.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/kernel/process.c | 10 ++++++++++ arch/x86/kernel/process_64.c | 1 + 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 759051251664..b0cf0612a454 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -309,7 +309,13 @@ struct x86_hw_tss { struct x86_hw_tss { u32 reserved1; u64 sp0; + + /* + * We store cpu_current_top_of_stack in sp1 so it's always accessible. + * Linux does not use ring 1, so sp1 is not otherwise needed. + */ u64 sp1; + u64 sp2; u64 reserved2; u64 ist[7]; @@ -368,6 +374,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +#else +#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1 #endif /* @@ -539,12 +547,12 @@ static inline void native_swapgs(void) static inline unsigned long current_top_of_stack(void) { -#ifdef CONFIG_X86_64 - return this_cpu_read_stable(cpu_tss.x86_tss.sp0); -#else - /* sp0 on x86_32 is special in and around vm86 mode. */ + /* + * We can't read directly from tss.sp0: sp0 on x86_32 is special in + * and around vm86 mode and sp0 on x86_64 is special because of the + * entry trampoline. + */ return this_cpu_read_stable(cpu_current_top_of_stack); -#endif } static inline bool on_thread_stack(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 70f425947dc5..44a04999791e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +# define cpu_current_top_of_stack (cpu_tss + TSS_sp1) #endif #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e3a5175a444b..bf51e51d808d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -66,6 +66,7 @@ int main(void) OFFSET(TSS_ist, tss_struct, x86_tss.ist); OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); BLANK(); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 35d674157fda..86e83762e3b3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { * Poison it. */ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, + +#ifdef CONFIG_X86_64 + /* + * .sp1 is cpu_current_top_of_stack. The init task never + * runs user code, but cpu_current_top_of_stack should still + * be well defined before the first context switch. + */ + .sp1 = TOP_OF_INIT_STACK, +#endif + #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 01b119bebb68..157f81816915 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -461,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); /* Reload sp0. */ update_sp0(next_p); -- cgit v1.2.3 From 6d9256f0a89eaff97fca6006100bcaea8d1d8bdb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:22 +0100 Subject: x86/espfix/64: Stop assuming that pt_regs is on the entry stack When we start using an entry trampoline, a #GP from userspace will be delivered on the entry stack, not on the task stack. Fix the espfix64 #DF fixup to set up #GP according to TSS.SP0, rather than assuming that pt_regs + 1 == SP0. This won't change anything without an entry stack, but it will make the code continue to work when an entry stack is added. While we're at it, improve the comments to explain what's actually going on. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.130778051@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d3e3bbd5d3a0..f0029d17b14b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) /* * If IRET takes a non-IST fault on the espfix64 stack, then we - * end up promoting it to a doublefault. In that case, modify - * the stack to make it look like we just entered the #GP - * handler from user space, similar to bad_iret. + * end up promoting it to a doublefault. In that case, take + * advantage of the fact that we're not using the normal (TSS.sp0) + * stack right now. We can write a fake #GP(0) frame at TSS.sp0 + * and then modify our own IRET frame so that, when we return, + * we land directly at the #GP(0) vector with the stack already + * set up according to its expectations. + * + * The net result is that our #GP handler will think that we + * entered from usermode with the bad user context. * * No need for ist_enter here because we don't use RCU. */ @@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { - struct pt_regs *normal_regs = task_pt_regs(current); + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; + + /* + * regs->sp points to the failing IRET frame on the + * ESPFIX64 stack. Copy it to the entry stack. This fills + * in gpregs->ss through gpregs->ip. + * + */ + memmove(&gpregs->ip, (void *)regs->sp, 5*8); + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ - /* Fake a #GP(0) from userspace. */ - memmove(&normal_regs->ip, (void *)regs->sp, 5*8); - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + /* + * Adjust our frame so that we return straight to the #GP + * vector with the expected RSP value. This is safe because + * we won't enable interupts or schedule before we invoke + * general_protection, so nothing will clobber the stack + * frame we just set up. + */ regs->ip = (unsigned long)general_protection; - regs->sp = (unsigned long)&normal_regs->orig_ax; + regs->sp = (unsigned long)&gpregs->orig_ax; return; } @@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * Processors update CR2 whenever a page fault is detected. If a * second page fault occurs while an earlier page fault is being - * deliv- ered, the faulting linear address of the second fault will + * delivered, the faulting linear address of the second fault will * overwrite the contents of CR2 (replacing the previous * address). These updates to CR2 occur even if the page fault * results in a double fault or occurs during the delivery of a -- cgit v1.2.3 From 7f2590a110b837af5679d08fc25c6227c5a8c497 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:23 +0100 Subject: x86/entry/64: Use a per-CPU trampoline stack for IDT entries Historically, IDT entries from usermode have always gone directly to the running task's kernel stack. Rearrange it so that we enter on a per-CPU trampoline stack and then manually switch to the task's stack. This touches a couple of extra cachelines, but it gives us a chance to run some code before we touch the kernel stack. The asm isn't exactly beautiful, but I think that fully refactoring it can wait. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.225330557@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 67 ++++++++++++++++++++++++++++++---------- arch/x86/entry/entry_64_compat.S | 5 ++- arch/x86/include/asm/switch_to.h | 4 ++- arch/x86/include/asm/traps.h | 1 - arch/x86/kernel/cpu/common.c | 6 ++-- arch/x86/kernel/traps.c | 21 +++++++------ 6 files changed, 72 insertions(+), 32 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 32306788821c..35b8e949ac2f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -560,6 +560,13 @@ END(irq_entries_start) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func cld + + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS + call switch_to_thread_stack +1: + ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS @@ -569,12 +576,8 @@ END(irq_entries_start) jz 1f /* - * IRQ from user mode. Switch to kernel gsbase and inform context - * tracking that we're in kernel mode. - */ - SWAPGS - - /* + * IRQ from user mode. + * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode * (which can take locks). Since TRACE_IRQS_OFF idempotent, @@ -828,6 +831,32 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) +/* + * Switch to the thread stack. This is called with the IRET frame and + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and + * space has not been allocated for them.) + */ +ENTRY(switch_to_thread_stack) + UNWIND_HINT_FUNC + + pushq %rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi + ret +END(switch_to_thread_stack) + .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -845,11 +874,12 @@ ENTRY(\sym) ALLOC_PT_GPREGS_ON_STACK - .if \paranoid - .if \paranoid == 1 + .if \paranoid < 2 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ - jnz 1f + jnz .Lfrom_usermode_switch_stack_\@ .endif + + .if \paranoid call paranoid_entry .else call error_entry @@ -891,20 +921,15 @@ ENTRY(\sym) jmp error_exit .endif - .if \paranoid == 1 + .if \paranoid < 2 /* - * Paranoid entry from userspace. Switch stacks and treat it + * Entry from userspace. Switch stacks and treat it * as a normal entry. This means that paranoid handlers * run in real process context if user_mode(regs). */ -1: +.Lfrom_usermode_switch_stack_\@: call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - call sync_regs - movq %rax, %rsp /* switch stack */ - movq %rsp, %rdi /* pt_regs pointer */ .if \has_error_code @@ -1165,6 +1190,14 @@ ENTRY(error_entry) SWAPGS .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call sync_regs + movq %rax, %rsp /* switch stack */ + ENCODE_FRAME_POINTER + pushq %r12 + /* * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index dcc6987f9bae..95ad40eb7eff 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax - /* Construct struct pt_regs on stack (iret frame is already on stack) */ pushq %rax /* pt_regs->orig_ax */ + + /* switch to thread stack expects orig_ax to be pushed */ + call switch_to_thread_stack + pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8c6bd6863db9..cbc71e73bd32 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread) /* This is used when switching tasks or entering/exiting vm86 mode. */ static inline void update_sp0(struct task_struct *task) { + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ #ifdef CONFIG_X86_32 load_sp0(task->thread.sp0); #else - load_sp0(task_top_of_stack(task)); + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task_top_of_stack(task)); #endif } diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 1fadd310ff68..31051f35cbb7 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e5837bd6c672..57968880e39b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1623,11 +1623,13 @@ void cpu_init(void) setup_cpu_entry_area(cpu); /* - * Initialize the TSS. Don't bother initializing sp0, as the initial - * task never enters user mode. + * Initialize the TSS. sp0 points to the entry trampoline stack + * regardless of what task is running. */ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); + load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss + + offsetofend(struct tss_struct, SYSENTER_stack)); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f0029d17b14b..ee9ca0ad4388 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -619,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch off the IST stack if the - * interrupted code was in user mode. The actual stack switch is done in - * entry_64.S + * Help handler running on a per-cpu (IST or entry trampoline) stack + * to switch to the normal thread stack if the interrupted code was in + * user mode. The actual stack switch is done in entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = task_pt_regs(current); - *regs = *eregs; + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; + if (regs != eregs) + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); @@ -642,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) /* * This is called from entry_64.S early in handling a fault * caused by a bad iret to user mode. To handle the fault - * correctly, we want move our stack frame to task_pt_regs - * and we want to pretend that the exception came from the - * iret target. + * correctly, we want to move our stack frame to where it would + * be had we entered directly on the entry stack (rather than + * just below the IRET frame) and we want to pretend that the + * exception came from the IRET target. */ struct bad_iret_stack *new_stack = - container_of(task_pt_regs(current), - struct bad_iret_stack, regs); + (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; /* Copy the IRET target to the new stack. */ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); -- cgit v1.2.3 From 3e3b9293d392c577b62e24e4bc9982320438e749 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:24 +0100 Subject: x86/entry/64: Return to userspace from the trampoline stack By itself, this is useless. It gives us the ability to run some final code before exit that cannnot run on the kernel stack. This could include a CR3 switch a la PAGE_TABLE_ISOLATION or some kernel stack erasing, for example. (Or even weird things like *changing* which kernel stack gets used as an ASLR-strengthening mechanism.) The SYSRET32 path is not covered yet. It could be in the future or we could just ignore it and force the slow path if needed. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.306546484@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 55 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 35b8e949ac2f..42a9379f7acb 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -326,8 +326,24 @@ syscall_return_via_sysret: popq %rsi /* skip rcx */ popq %rdx popq %rsi + + /* + * Now all regs are restored except RSP and RDI. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + popq %rdi - movq RSP-ORIG_RAX(%rsp), %rsp + popq %rsp USERGS_SYSRET64 END(entry_SYSCALL_64) @@ -630,10 +646,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) ud2 1: #endif - SWAPGS POP_EXTRA_REGS - POP_C_REGS - addq $8, %rsp /* skip regs->orig_ax */ + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + + /* + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ + pushq 5*8(%rdi) /* RSP */ + pushq 4*8(%rdi) /* EFLAGS */ + pushq 3*8(%rdi) /* CS */ + pushq 2*8(%rdi) /* RIP */ + + /* Push user RDI on the trampoline stack. */ + pushq (%rdi) + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + + /* Restore RDI. */ + popq %rdi + SWAPGS INTERRUPT_RETURN -- cgit v1.2.3 From 3386bc8aed825e9f1f65ce38df4b109b2019b71a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:25 +0100 Subject: x86/entry/64: Create a per-CPU SYSCALL entry trampoline Handling SYSCALL is tricky: the SYSCALL handler is entered with every single register (except FLAGS), including RSP, live. It somehow needs to set RSP to point to a valid stack, which means it needs to save the user RSP somewhere and find its own stack pointer. The canonical way to do this is with SWAPGS, which lets us access percpu data using the %gs prefix. With PAGE_TABLE_ISOLATION-like pagetable switching, this is problematic. Without a scratch register, switching CR3 is impossible, so %gs-based percpu memory would need to be mapped in the user pagetables. Doing that without information leaks is difficult or impossible. Instead, use a different sneaky trick. Map a copy of the first part of the SYSCALL asm at a different address for each CPU. Now RIP varies depending on the CPU, so we can use RIP-relative memory access to access percpu memory. By putting the relevant information (one scratch slot and the stack address) at a constant offset relative to RIP, we can make SYSCALL work without relying on %gs. A nice thing about this approach is that we can easily switch it on and off if we want pagetable switching to be configurable. The compat variant of SYSCALL doesn't have this problem in the first place -- there are plenty of scratch registers, since we don't care about preserving r8-r15. This patch therefore doesn't touch SYSCALL32 at all. This patch actually seems to be a small speedup. With this patch, SYSCALL touches an extra cache line and an extra virtual page, but the pipeline no longer stalls waiting for SWAPGS. It seems that, at least in a tight loop, the latter outweights the former. Thanks to David Laight for an optimization tip. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.403607157@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 58 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/fixmap.h | 2 ++ arch/x86/kernel/asm-offsets.c | 1 + arch/x86/kernel/cpu/common.c | 15 ++++++++++- arch/x86/kernel/vmlinux.lds.S | 9 +++++++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 42a9379f7acb..2582984ffb4b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -136,6 +136,64 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ + .pushsection .entry_trampoline, "ax" + +/* + * The code in here gets remapped into cpu_entry_area's trampoline. This means + * that the assembler and linker have the wrong idea as to where this code + * lives (and, in fact, it's mapped more than once, so it's not even at a + * fixed address). So we can't reference any symbols outside the entry + * trampoline and expect it to work. + * + * Instead, we carefully abuse %rip-relative addressing. + * _entry_trampoline(%rip) refers to the start of the remapped) entry + * trampoline. We can thus find cpu_entry_area with this macro: + */ + +#define CPU_ENTRY_AREA \ + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \ + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA + +ENTRY(entry_SYSCALL_64_trampoline) + UNWIND_HINT_EMPTY + swapgs + + /* Stash the user RSP. */ + movq %rsp, RSP_SCRATCH + + /* Load the top of the task stack into RSP */ + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + + /* Start building the simulated IRET frame. */ + pushq $__USER_DS /* pt_regs->ss */ + pushq RSP_SCRATCH /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + + /* + * x86 lacks a near absolute jump, and we can't jump to the real + * entry text with a relative jump. We could push the target + * address and then use retq, but this destroys the pipeline on + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, + * spill RDI and restore it in a second-stage trampoline. + */ + pushq %rdi + movq $entry_SYSCALL_64_stage2, %rdi + jmp *%rdi +END(entry_SYSCALL_64_trampoline) + + .popsection + +ENTRY(entry_SYSCALL_64_stage2) + UNWIND_HINT_EMPTY + popq %rdi + jmp entry_SYSCALL_64_after_hwframe +END(entry_SYSCALL_64_stage2) + ENTRY(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 84558b611ad3..6a699474c2c7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -61,6 +61,8 @@ struct cpu_entry_area { * of the TSS region. */ struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; }; #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 55858b277cf6..61b1af88ac07 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -101,4 +101,5 @@ void common(void) { /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 57968880e39b..430f950b0b7f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -486,6 +486,8 @@ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); static inline void setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + /* On 64-bit systems, we use a read-only fixmap GDT. */ pgprot_t gdt_prot = PAGE_KERNEL_RO; #else @@ -532,6 +534,11 @@ static inline void setup_cpu_entry_area(int cpu) #ifdef CONFIG_X86_32 this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); #endif + +#ifdef CONFIG_X86_64 + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif } /* Load the original GDT from the per-cpu structure */ @@ -1395,10 +1402,16 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks /* May not be marked __init: used by software suspend */ void syscall_init(void) { + extern char _entry_trampoline[]; + extern char entry_SYSCALL_64_trampoline[]; + int cpu = smp_processor_id(); + unsigned long SYSCALL64_entry_trampoline = + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + + (entry_SYSCALL_64_trampoline - _entry_trampoline); wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a4009fb9be87..d2a8b5a24a44 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -107,6 +107,15 @@ SECTIONS SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + _entry_trampoline = .; + *(.entry_trampoline) + . = ALIGN(PAGE_SIZE); + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); +#endif + /* End of text section */ _etext = .; } :text = 0x9090 -- cgit v1.2.3 From 40e7f949e0d9a33968ebde5d67f7e3a47c97742a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:26 +0100 Subject: x86/entry/64: Move the IST stacks into struct cpu_entry_area The IST stacks are needed when an IST exception occurs and are accessed before any kernel code at all runs. Move them into struct cpu_entry_area. The IST stacks are unlike the rest of cpu_entry_area: they're used even for entries from kernel mode. This means that they should be set up before we load the final IDT. Move cpu_entry_area setup to trap_init() for the boot CPU and set it up for all possible CPUs at once in native_smp_prepare_cpus(). Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.480598743@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fixmap.h | 12 +++++++ arch/x86/kernel/cpu/common.c | 74 ++++++++++++++++++++++++------------------- arch/x86/kernel/traps.c | 3 ++ 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6a699474c2c7..451da7d9a502 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -63,10 +63,22 @@ struct cpu_entry_area { struct tss_struct tss; char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif }; #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) +extern void setup_cpu_entry_areas(void); + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 430f950b0b7f..fb01a8e5e9b7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -466,24 +466,36 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, - int pages, pgprot_t prot) -{ - int i; - - for (i = 0; i < pages; i++) { - __set_fixmap(fixmap_index - i, - per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); - } -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); #endif +#ifdef CONFIG_X86_64 +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; + +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +static void __init +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); +} + /* Setup the fixmap mappings only once per-processor */ -static inline void setup_cpu_entry_area(int cpu) +static void __init setup_cpu_entry_area(int cpu) { #ifdef CONFIG_X86_64 extern char _entry_trampoline[]; @@ -532,15 +544,31 @@ static inline void setup_cpu_entry_area(int cpu) PAGE_KERNEL); #ifdef CONFIG_X86_32 - this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); #endif #ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, + PAGE_KERNEL); + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif } +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} + /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { @@ -1385,20 +1413,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { @@ -1607,7 +1621,7 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!oist->ist[0]) { - char *estacks = per_cpu(exception_stacks, cpu); + char *estacks = get_cpu_entry_area(cpu)->exception_stacks; for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; @@ -1633,8 +1647,6 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); - setup_cpu_entry_area(cpu); - /* * Initialize the TSS. sp0 points to the entry trampoline stack * regardless of what task is running. @@ -1694,8 +1706,6 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); - setup_cpu_entry_area(cpu); - /* * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ee9ca0ad4388..3e29aad5c7cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -947,6 +947,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { + /* Init cpu_entry_area before IST entries are set up */ + setup_cpu_entry_areas(); + idt_setup_traps(); /* -- cgit v1.2.3 From 7fbbd5cbebf118a9e09f5453f686656a167c3d1c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:27 +0100 Subject: x86/entry/64: Remove the SYSENTER stack canary Now that the SYSENTER stack has a guard page, there's no need for a canary to detect overflow after the fact. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.572577316@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/dumpstack.c | 3 +-- arch/x86/kernel/process.c | 1 - arch/x86/kernel/traps.c | 7 ------- 4 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0cf0612a454..d34ac13c5866 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -341,7 +341,6 @@ struct tss_struct { * Space for the temporary SYSENTER stack, used for SYSENTER * and the entry trampoline as well. */ - unsigned long SYSENTER_stack_canary; unsigned long SYSENTER_stack[64]; /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 60267850125e..ae1ce2e3f132 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -48,8 +48,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) int cpu = smp_processor_id(); struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; - /* Treat the canary as part of the stack for unwinding purposes. */ - void *begin = &tss->SYSENTER_stack_canary; + void *begin = &tss->SYSENTER_stack; void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); if ((void *)stack < begin || (void *)stack >= end) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 86e83762e3b3..6a04287f222b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -81,7 +81,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif - .SYSENTER_stack_canary = STACK_END_MAGIC, }; EXPORT_PER_CPU_SYMBOL(cpu_tss); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3e29aad5c7cc..5ade4f89a6d1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -814,13 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - /* - * This is the most likely code path that involves non-trivial use - * of the SYSENTER stack. Check that we haven't overrun it. - */ - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, - "Overran or corrupted SYSENTER stack\n"); - ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); -- cgit v1.2.3 From 0f9a48100fba3f189724ae88a450c2261bf91c80 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:28 +0100 Subject: x86/entry: Clean up the SYSENTER_stack code The existing code was a mess, mainly because C arrays are nasty. Turn SYSENTER_stack into a struct, add a helper to find it, and do all the obvious cleanups this enables. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.653244723@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/fixmap.h | 5 +++++ arch/x86/include/asm/processor.h | 6 +++++- arch/x86/kernel/asm-offsets.c | 6 ++---- arch/x86/kernel/cpu/common.c | 14 +++----------- arch/x86/kernel/dumpstack.c | 7 +++---- 7 files changed, 21 insertions(+), 23 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 0ab316c46806..3629bcbf85a2 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -942,7 +942,7 @@ ENTRY(debug) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Ldebug_from_sysenter_stack @@ -986,7 +986,7 @@ ENTRY(nmi) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Lnmi_from_sysenter_stack diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2582984ffb4b..575b184f377f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -154,7 +154,7 @@ END(native_usergs_sysret64) _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ -#define RSP_SCRATCH CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + \ +#define RSP_SCRATCH CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \ SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA ENTRY(entry_SYSCALL_64_trampoline) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 451da7d9a502..cc5d98bdca37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -245,5 +245,10 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); } +static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d34ac13c5866..f933869470b8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -336,12 +336,16 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 +struct SYSENTER_stack { + unsigned long words[64]; +}; + struct tss_struct { /* * Space for the temporary SYSENTER stack, used for SYSENTER * and the entry trampoline as well. */ - unsigned long SYSENTER_stack[64]; + struct SYSENTER_stack SYSENTER_stack; /* * The fixed hardware portion. This must not cross a page boundary diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 61b1af88ac07..46c0995344aa 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -94,10 +94,8 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); - /* Offset from cpu_tss to SYSENTER_stack */ - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); - /* Size of SYSENTER_stack */ - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack); + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fb01a8e5e9b7..3de7480e4f32 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1314,12 +1314,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - - wrmsr(MSR_IA32_SYSENTER_ESP, - (unsigned long)&get_cpu_entry_area(cpu)->tss + - offsetofend(struct tss_struct, SYSENTER_stack), - 0); - + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1436,9 +1431,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, - (unsigned long)&get_cpu_entry_area(cpu)->tss + - offsetofend(struct tss_struct, SYSENTER_stack)); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1653,8 +1646,7 @@ void cpu_init(void) */ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); - load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss + - offsetofend(struct tss_struct, SYSENTER_stack)); + load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae1ce2e3f132..bbd6d986e2d0 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -45,11 +45,10 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) { - int cpu = smp_processor_id(); - struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); - void *begin = &tss->SYSENTER_stack; - void *end = (void *)&tss->SYSENTER_stack + sizeof(tss->SYSENTER_stack); + void *begin = ss; + void *end = ss + 1; if ((void *)stack < begin || (void *)stack >= end) return false; -- cgit v1.2.3 From c482feefe1aeb150156248ba0fd3e029bc886605 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:29 +0100 Subject: x86/entry/64: Make cpu_entry_area.tss read-only The TSS is a fairly juicy target for exploits, and, now that the TSS is in the cpu_entry_area, it's no longer protected by kASLR. Make it read-only on x86_64. On x86_32, it can't be RO because it's written by the CPU during task switches, and we use a task gate for double faults. I'd also be nervous about errata if we tried to make it RO even on configurations without double fault handling. [ tglx: AMD confirmed that there is no problem on 64-bit with TSS RO. So it's probably safe to assume that it's a non issue, though Intel might have been creative in that area. Still waiting for confirmation. ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.733700132@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 4 ++-- arch/x86/entry/entry_64.S | 8 ++++---- arch/x86/include/asm/fixmap.h | 13 +++++++++---- arch/x86/include/asm/processor.h | 17 ++++++++--------- arch/x86/include/asm/switch_to.h | 4 ++-- arch/x86/include/asm/thread_info.h | 2 +- arch/x86/kernel/asm-offsets.c | 5 ++--- arch/x86/kernel/asm-offsets_32.c | 4 ++-- arch/x86/kernel/cpu/common.c | 29 +++++++++++++++++++---------- arch/x86/kernel/ioport.c | 2 +- arch/x86/kernel/process.c | 6 +++--- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/traps.c | 4 ++-- arch/x86/lib/delay.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 2 +- 16 files changed, 60 insertions(+), 48 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 3629bcbf85a2..bd8b57a5c874 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -942,7 +942,7 @@ ENTRY(debug) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Ldebug_from_sysenter_stack @@ -986,7 +986,7 @@ ENTRY(nmi) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ cmpl $SIZEOF_SYSENTER_stack, %ecx jb .Lnmi_from_sysenter_stack diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 575b184f377f..2812ce043a7a 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -154,7 +154,7 @@ END(native_usergs_sysret64) _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ -#define RSP_SCRATCH CPU_ENTRY_AREA_tss + TSS_STRUCT_SYSENTER_stack + \ +#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA ENTRY(entry_SYSCALL_64_trampoline) @@ -390,7 +390,7 @@ syscall_return_via_sysret: * Save old stack pointer and switch to trampoline stack. */ movq %rsp, %rdi - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -719,7 +719,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * Save old stack pointer and switch to trampoline stack. */ movq %rsp, %rdi - movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ @@ -934,7 +934,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /* * Exception entry points. */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) /* * Switch to the thread stack. This is called with the IRET frame and diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index cc5d98bdca37..94fc4fa14127 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -56,9 +56,14 @@ struct cpu_entry_area { char gdt[PAGE_SIZE]; /* - * The GDT is just below cpu_tss and thus serves (on x86_64) as a - * a read-only guard page for the SYSENTER stack at the bottom - * of the TSS region. + * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct SYSENTER_stack_page SYSENTER_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. */ struct tss_struct tss; @@ -247,7 +252,7 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) { - return &get_cpu_entry_area(cpu)->tss.SYSENTER_stack; + return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f933869470b8..e8991d7f7034 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -340,13 +340,11 @@ struct SYSENTER_stack { unsigned long words[64]; }; -struct tss_struct { - /* - * Space for the temporary SYSENTER stack, used for SYSENTER - * and the entry trampoline as well. - */ - struct SYSENTER_stack SYSENTER_stack; +struct SYSENTER_stack_page { + struct SYSENTER_stack stack; +} __aligned(PAGE_SIZE); +struct tss_struct { /* * The fixed hardware portion. This must not cross a page boundary * at risk of violating the SDM's advice and potentially triggering @@ -363,7 +361,7 @@ struct tss_struct { unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; } __aligned(PAGE_SIZE); -DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* * sizeof(unsigned long) coming from an extra "long" at the end @@ -378,7 +376,8 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #else -#define cpu_current_top_of_stack cpu_tss.x86_tss.sp1 +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif /* @@ -538,7 +537,7 @@ static inline void native_set_iopl_mask(unsigned mask) static inline void native_load_sp0(unsigned long sp0) { - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static inline void native_swapgs(void) diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index cbc71e73bd32..9b6df68d8fd1 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,10 +79,10 @@ do { \ static inline void refresh_sysenter_cs(struct thread_struct *thread) { /* Only happens when SEP is enabled, no need to test "SEP"arately: */ - if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) return; - this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 44a04999791e..00223333821a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 -# define cpu_current_top_of_stack (cpu_tss + TSS_sp1) +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) #endif #endif diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 46c0995344aa..cd360a5e0dca 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -94,10 +94,9 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); - OFFSET(TSS_STRUCT_SYSENTER_stack, tss_struct, SYSENTER_stack); - DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); - /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); + OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 52ce4ea16e53..7d20d9c0b3d6 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -47,8 +47,8 @@ void foo(void) BLANK(); /* Offset from the sysenter stack to tss.sp0 */ - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - - offsetofend(struct tss_struct, SYSENTER_stack)); + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - + offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3de7480e4f32..c2eada1056de 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,6 +487,9 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif +static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, + SYSENTER_stack_storage); + static void __init set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) { @@ -500,23 +503,29 @@ static void __init setup_cpu_entry_area(int cpu) #ifdef CONFIG_X86_64 extern char _entry_trampoline[]; - /* On 64-bit systems, we use a read-only fixmap GDT. */ + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; #else /* * On native 32-bit systems, the GDT cannot be read-only because * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the GDT - * is read-only, that will triple fault. + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. * - * On Xen PV, the GDT must be read-only because the hypervisor requires - * it. + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. */ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; #endif __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), + per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, + PAGE_KERNEL); /* * The Intel SDM says (Volume 3, 7.2.1): @@ -539,9 +548,9 @@ static void __init setup_cpu_entry_area(int cpu) offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss, cpu), + &per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct) / PAGE_SIZE, - PAGE_KERNEL); + tss_prot); #ifdef CONFIG_X86_32 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); @@ -1305,7 +1314,7 @@ void enable_sep_cpu(void) return; cpu = get_cpu(); - tss = &per_cpu(cpu_tss, cpu); + tss = &per_cpu(cpu_tss_rw, cpu); /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- @@ -1575,7 +1584,7 @@ void cpu_init(void) if (cpu) load_ucode_ap(); - t = &per_cpu(cpu_tss, cpu); + t = &per_cpu(cpu_tss_rw, cpu); oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1667,7 +1676,7 @@ void cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); wait_for_master_cpu(cpu); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 3feb648781c4..2f723301eb58 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(cpu_tss, get_cpu()); + tss = &per_cpu(cpu_tss_rw, get_cpu()); if (turn_on) bitmap_clear(t->io_bitmap_ptr, from, num); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6a04287f222b..517415978409 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower @@ -82,7 +82,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif }; -EXPORT_PER_CPU_SYMBOL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); @@ -111,7 +111,7 @@ void exit_thread(struct task_struct *tsk) struct fpu *fpu = &t->fpu; if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 45bf0c5f93e1..5224c6099184 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 157f81816915..c75466232016 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -399,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5ade4f89a6d1..74136fd16f49 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -364,7 +364,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { - struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* * regs->sp points to the failing IRET frame on the @@ -649,7 +649,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) * exception came from the IRET target. */ struct bad_iret_stack *new_stack = - (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1; + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the new stack. */ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 553f8fd23cc4..4846eff7e4c8 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops) delay = min_t(u64, MWAITX_MAX_LOOPS, loops); /* - * Use cpu_tss as a cacheline-aligned, seldomly + * Use cpu_tss_rw as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index fbd054d6ac97..ae3a071e1d0f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -818,7 +818,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } void xen_set_iopl_mask(unsigned mask) -- cgit v1.2.3 From a035795499ca1c2bd1928808d1a156eda1420383 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:30 +0100 Subject: x86/paravirt: Dont patch flush_tlb_single native_flush_tlb_single() will be changed with the upcoming PAGE_TABLE_ISOLATION feature. This requires to have more code in there than INVLPG. Remove the paravirt patching for it. Signed-off-by: Thomas Gleixner Reviewed-by: Josh Poimboeuf Reviewed-by: Juergen Gross Acked-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Cc: michael.schwarz@iaik.tugraz.at Cc: moritz.lipp@iaik.tugraz.at Cc: richard.fellner@student.tugraz.at Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt_patch_64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index ac0be8283325..9edadabf04f6 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); #if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): -- cgit v1.2.3 From 79cc74155218316b9a5d28577c7077b2adba8e58 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:31 +0100 Subject: x86/paravirt: Provide a way to check for hypervisors There is no generic way to test whether a kernel is running on a specific hypervisor. But that's required to prevent the upcoming user address space separation feature in certain guest modes. Make the hypervisor type enum unconditionally available and provide a helper function which allows to test for a specific type. Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.912938129@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hypervisor.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 1b0a5abcd8ae..96aa6b9884dc 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,16 +20,7 @@ #ifndef _ASM_X86_HYPERVISOR_H #define _ASM_X86_HYPERVISOR_H -#ifdef CONFIG_HYPERVISOR_GUEST - -#include -#include -#include - -/* - * x86 hypervisor information - */ - +/* x86 hypervisor types */ enum x86_hypervisor_type { X86_HYPER_NATIVE = 0, X86_HYPER_VMWARE, @@ -39,6 +30,12 @@ enum x86_hypervisor_type { X86_HYPER_KVM, }; +#ifdef CONFIG_HYPERVISOR_GUEST + +#include +#include +#include + struct hypervisor_x86 { /* Hypervisor name */ const char *name; @@ -58,7 +55,15 @@ struct hypervisor_x86 { extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return x86_hyper_type == type; +} #else static inline void init_hypervisor_platform(void) { } +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return type == X86_HYPER_NATIVE; +} #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ -- cgit v1.2.3 From 6cbd2171e89b13377261d15e64384df60ecb530e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:32 +0100 Subject: x86/cpufeatures: Make CPU bugs sticky There is currently no way to force CPU bug bits like CPU feature bits. That makes it impossible to set a bug bit once at boot and have it stick for all upcoming CPUs. Extend the force set/clear arrays to handle bug bits as well. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171204150606.992156574@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 2 ++ arch/x86/include/asm/processor.h | 4 ++-- arch/x86/kernel/cpu/common.c | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bf6a76202a77..ea9a7dde62e5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index e8991d7f7034..da943411d3d8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -163,8 +163,8 @@ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern struct x86_hw_tss doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c2eada1056de..034900623adf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -812,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) { int i; - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } -- cgit v1.2.3 From db410b2b3839e962f9df4bc87b4fea9a2996047c Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 14 Dec 2017 22:25:25 -0600 Subject: cpufreq: ti-cpufreq: Convert to module_platform_driver ti-cpufreq will be responsible for calling dev_pm_opp_set_regulators on platforms that require AVS and ABB regulator support so we must be able to defer probe if regulators are not yet available, so change ti-cpufreq to be a module_platform_driver to allow for probe defer. Acked-by: Viresh Kumar Signed-off-by: Dave Gerlach Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ti-cpufreq.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 923317f03b4b..b1c230a1e2aa 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -195,7 +196,7 @@ static const struct of_device_id ti_cpufreq_of_match[] = { {}, }; -static int ti_cpufreq_init(void) +static int ti_cpufreq_probe(struct platform_device *pdev) { u32 version[VERSION_COUNT]; struct device_node *np; @@ -269,4 +270,22 @@ free_opp_data: return ret; } -device_initcall(ti_cpufreq_init); + +static int ti_cpufreq_init(void) +{ + platform_device_register_simple("ti-cpufreq", -1, NULL, 0); + return 0; +} +module_init(ti_cpufreq_init); + +static struct platform_driver ti_cpufreq_driver = { + .probe = ti_cpufreq_probe, + .driver = { + .name = "ti-cpufreq", + }, +}; +module_platform_driver(ti_cpufreq_driver); + +MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver"); +MODULE_AUTHOR("Dave Gerlach "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From c8343e83d45fb074da5dfc304e101995d19f05b2 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 14 Dec 2017 22:25:26 -0600 Subject: cpufreq: ti-cpufreq: Add support for multiple regulators Some platforms, like those in the DRA7 and AM57 families, require the scaling of multiple regulators in order to properly support higher OPPs. Let the ti-cpufreq driver determine when this is required and pass the appropriate regulator names to the OPP core so that they can be properly managed. Acked-by: Viresh Kumar Signed-off-by: Dave Gerlach Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ti-cpufreq.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index b1c230a1e2aa..a099b7bf74cd 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -51,6 +51,7 @@ struct ti_cpufreq_soc_data { unsigned long efuse_mask; unsigned long efuse_shift; unsigned long rev_offset; + bool multi_regulator; }; struct ti_cpufreq_data { @@ -58,6 +59,7 @@ struct ti_cpufreq_data { struct device_node *opp_node; struct regmap *syscon; const struct ti_cpufreq_soc_data *soc_data; + struct opp_table *opp_table; }; static unsigned long amx3_efuse_xlate(struct ti_cpufreq_data *opp_data, @@ -96,6 +98,7 @@ static struct ti_cpufreq_soc_data am3x_soc_data = { .efuse_offset = 0x07fc, .efuse_mask = 0x1fff, .rev_offset = 0x600, + .multi_regulator = false, }; static struct ti_cpufreq_soc_data am4x_soc_data = { @@ -104,6 +107,7 @@ static struct ti_cpufreq_soc_data am4x_soc_data = { .efuse_offset = 0x0610, .efuse_mask = 0x3f, .rev_offset = 0x600, + .multi_regulator = false, }; static struct ti_cpufreq_soc_data dra7_soc_data = { @@ -112,6 +116,7 @@ static struct ti_cpufreq_soc_data dra7_soc_data = { .efuse_mask = 0xf80000, .efuse_shift = 19, .rev_offset = 0x204, + .multi_regulator = true, }; /** @@ -201,7 +206,9 @@ static int ti_cpufreq_probe(struct platform_device *pdev) u32 version[VERSION_COUNT]; struct device_node *np; const struct of_device_id *match; + struct opp_table *ti_opp_table; struct ti_cpufreq_data *opp_data; + const char * const reg_names[] = {"vdd", "vbb"}; int ret; np = of_find_node_by_path("/"); @@ -248,16 +255,29 @@ static int ti_cpufreq_probe(struct platform_device *pdev) if (ret) goto fail_put_node; - ret = PTR_ERR_OR_ZERO(dev_pm_opp_set_supported_hw(opp_data->cpu_dev, - version, VERSION_COUNT)); - if (ret) { + ti_opp_table = dev_pm_opp_set_supported_hw(opp_data->cpu_dev, + version, VERSION_COUNT); + if (IS_ERR(ti_opp_table)) { dev_err(opp_data->cpu_dev, "Failed to set supported hardware\n"); + ret = PTR_ERR(ti_opp_table); goto fail_put_node; } - of_node_put(opp_data->opp_node); + opp_data->opp_table = ti_opp_table; + + if (opp_data->soc_data->multi_regulator) { + ti_opp_table = dev_pm_opp_set_regulators(opp_data->cpu_dev, + reg_names, + ARRAY_SIZE(reg_names)); + if (IS_ERR(ti_opp_table)) { + dev_pm_opp_put_supported_hw(opp_data->opp_table); + ret = PTR_ERR(ti_opp_table); + goto fail_put_node; + } + } + of_node_put(opp_data->opp_node); register_cpufreq_dt: platform_device_register_simple("cpufreq-dt", -1, NULL, 0); -- cgit v1.2.3 From 212b7287ae6098337fffa9c0cd7e139dceb98125 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 14 Dec 2017 22:25:27 -0600 Subject: dt-bindings: opp: Introduce ti-opp-supply bindings Document the devicetree bindings that describe Texas Instruments opp-supply which allow a platform to describe multiple regulators and additional information, such as registers containing data needed to program aforementioned regulators. Signed-off-by: Dave Gerlach Acked-by: Viresh Kumar Acked-by: Rob Herring Signed-off-by: Rafael J. Wysocki --- .../bindings/opp/ti-omap5-opp-supply.txt | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt new file mode 100644 index 000000000000..832346e489a3 --- /dev/null +++ b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt @@ -0,0 +1,63 @@ +Texas Instruments OMAP compatible OPP supply description + +OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which +contain data that can be used to adjust voltages programmed for some of their +supplies for more efficient operation. This binding provides the information +needed to read these values and use them to program the main regulator during +an OPP transitions. + +Also, some supplies may have an associated vbb-supply which is an Adaptive Body +Bias regulator which much be transitioned in a specific sequence with regards +to the vdd-supply and clk when making an OPP transition. By supplying two +regulators to the device that will undergo OPP transitions we can make use +of the multi regulator binding that is part of the OPP core described here [1] +to describe both regulators needed by the platform. + +[1] Documentation/devicetree/bindings/opp/opp.txt + +Required Properties for Device Node: +- vdd-supply: phandle to regulator controlling VDD supply +- vbb-supply: phandle to regulator controlling Body Bias supply + (Usually Adaptive Body Bias regulator) + +Required Properties for opp-supply node: +- compatible: Should be one of: + "ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB + "ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD + along with VBB + "ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD + but no VBB. +- reg: Address and length of the efuse register set for the device (mandatory + only for "ti,omap5-opp-supply") +- ti,efuse-settings: An array of u32 tuple items providing information about + optimized efuse configuration. Each item consists of the following: + volt: voltage in uV - reference voltage (OPP voltage) + efuse_offseet: efuse offset from reg where the optimized voltage is stored. +- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply. + +Example: + +/* Device Node (CPU) */ +cpus { + cpu0: cpu@0 { + device_type = "cpu"; + + ... + + vdd-supply = <&vcc>; + vbb-supply = <&abb_mpu>; + }; +}; + +/* OMAP OPP Supply with Class0 registers */ +opp_supply_mpu: opp_supply@4a003b20 { + compatible = "ti,omap5-opp-supply"; + reg = <0x4a003b20 0x8>; + ti,efuse-settings = < + /* uV offset */ + 1060000 0x0 + 1160000 0x4 + 1210000 0x8 + >; + ti,absolute-max-voltage-uv = <1500000>; +}; -- cgit v1.2.3 From 9a835fa6e47f27b1ae71390b6f12efce7335aaac Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 14 Dec 2017 22:25:28 -0600 Subject: PM / OPP: Add ti-opp-supply driver Introduce a ti-opp-supply driver that will use new multiple regulator support that is part of the OPP core This is needed on TI platforms like DRA7/AM57 in order to control both CPU regulator and Adaptive Body Bias (ABB) regulator. These regulators must be scaled in sequence during an OPP transition depending on whether or not the frequency is being scaled up or down. This driver also implements AVS Class0 for these parts by looking up the required values from registers in the SoC and programming adjusted optimal voltage values for each OPP. Signed-off-by: Dave Gerlach Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/opp/Makefile | 1 + drivers/opp/ti-opp-supply.c | 425 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 426 insertions(+) create mode 100644 drivers/opp/ti-opp-supply.c diff --git a/drivers/opp/Makefile b/drivers/opp/Makefile index e70ceb406fe9..6ce6aefacc81 100644 --- a/drivers/opp/Makefile +++ b/drivers/opp/Makefile @@ -2,3 +2,4 @@ ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG obj-y += core.o cpu.o obj-$(CONFIG_OF) += of.o obj-$(CONFIG_DEBUG_FS) += debugfs.o +obj-$(CONFIG_ARM_TI_CPUFREQ) += ti-opp-supply.o diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c new file mode 100644 index 000000000000..44dae3e51aac --- /dev/null +++ b/drivers/opp/ti-opp-supply.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016-2017 Texas Instruments Incorporated - http://www.ti.com/ + * Nishanth Menon + * Dave Gerlach + * + * TI OPP supply driver that provides override into the regulator control + * for generic opp core to handle devices with ABB regulator and/or + * SmartReflex Class0. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * struct ti_opp_supply_optimum_voltage_table - optimized voltage table + * @reference_uv: reference voltage (usually Nominal voltage) + * @optimized_uv: Optimized voltage from efuse + */ +struct ti_opp_supply_optimum_voltage_table { + unsigned int reference_uv; + unsigned int optimized_uv; +}; + +/** + * struct ti_opp_supply_data - OMAP specific opp supply data + * @vdd_table: Optimized voltage mapping table + * @num_vdd_table: number of entries in vdd_table + * @vdd_absolute_max_voltage_uv: absolute maximum voltage in UV for the supply + */ +struct ti_opp_supply_data { + struct ti_opp_supply_optimum_voltage_table *vdd_table; + u32 num_vdd_table; + u32 vdd_absolute_max_voltage_uv; +}; + +static struct ti_opp_supply_data opp_data; + +/** + * struct ti_opp_supply_of_data - device tree match data + * @flags: specific type of opp supply + * @efuse_voltage_mask: mask required for efuse register representing voltage + * @efuse_voltage_uv: Are the efuse entries in micro-volts? if not, assume + * milli-volts. + */ +struct ti_opp_supply_of_data { +#define OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE BIT(1) +#define OPPDM_HAS_NO_ABB BIT(2) + const u8 flags; + const u32 efuse_voltage_mask; + const bool efuse_voltage_uv; +}; + +/** + * _store_optimized_voltages() - store optimized voltages + * @dev: ti opp supply device for which we need to store info + * @data: data specific to the device + * + * Picks up efuse based optimized voltages for VDD unique per device and + * stores it in internal data structure for use during transition requests. + * + * Return: If successful, 0, else appropriate error value. + */ +static int _store_optimized_voltages(struct device *dev, + struct ti_opp_supply_data *data) +{ + void __iomem *base; + struct property *prop; + struct resource *res; + const __be32 *val; + int proplen, i; + int ret = 0; + struct ti_opp_supply_optimum_voltage_table *table; + const struct ti_opp_supply_of_data *of_data = dev_get_drvdata(dev); + + /* pick up Efuse based voltages */ + res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "Unable to get IO resource\n"); + ret = -ENODEV; + goto out_map; + } + + base = ioremap_nocache(res->start, resource_size(res)); + if (!base) { + dev_err(dev, "Unable to map Efuse registers\n"); + ret = -ENOMEM; + goto out_map; + } + + /* Fetch efuse-settings. */ + prop = of_find_property(dev->of_node, "ti,efuse-settings", NULL); + if (!prop) { + dev_err(dev, "No 'ti,efuse-settings' property found\n"); + ret = -EINVAL; + goto out; + } + + proplen = prop->length / sizeof(int); + data->num_vdd_table = proplen / 2; + /* Verify for corrupted OPP entries in dt */ + if (data->num_vdd_table * 2 * sizeof(int) != prop->length) { + dev_err(dev, "Invalid 'ti,efuse-settings'\n"); + ret = -EINVAL; + goto out; + } + + ret = of_property_read_u32(dev->of_node, "ti,absolute-max-voltage-uv", + &data->vdd_absolute_max_voltage_uv); + if (ret) { + dev_err(dev, "ti,absolute-max-voltage-uv is missing\n"); + ret = -EINVAL; + goto out; + } + + table = kzalloc(sizeof(*data->vdd_table) * + data->num_vdd_table, GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; + } + data->vdd_table = table; + + val = prop->value; + for (i = 0; i < data->num_vdd_table; i++, table++) { + u32 efuse_offset; + u32 tmp; + + table->reference_uv = be32_to_cpup(val++); + efuse_offset = be32_to_cpup(val++); + + tmp = readl(base + efuse_offset); + tmp &= of_data->efuse_voltage_mask; + tmp >>= __ffs(of_data->efuse_voltage_mask); + + table->optimized_uv = of_data->efuse_voltage_uv ? tmp : + tmp * 1000; + + dev_dbg(dev, "[%d] efuse=0x%08x volt_table=%d vset=%d\n", + i, efuse_offset, table->reference_uv, + table->optimized_uv); + + /* + * Some older samples might not have optimized efuse + * Use reference voltage for those - just add debug message + * for them. + */ + if (!table->optimized_uv) { + dev_dbg(dev, "[%d] efuse=0x%08x volt_table=%d:vset0\n", + i, efuse_offset, table->reference_uv); + table->optimized_uv = table->reference_uv; + } + } +out: + iounmap(base); +out_map: + return ret; +} + +/** + * _free_optimized_voltages() - free resources for optvoltages + * @dev: device for which we need to free info + * @data: data specific to the device + */ +static void _free_optimized_voltages(struct device *dev, + struct ti_opp_supply_data *data) +{ + kfree(data->vdd_table); + data->vdd_table = NULL; + data->num_vdd_table = 0; +} + +/** + * _get_optimal_vdd_voltage() - Finds optimal voltage for the supply + * @dev: device for which we need to find info + * @data: data specific to the device + * @reference_uv: reference voltage (OPP voltage) for which we need value + * + * Return: if a match is found, return optimized voltage, else return + * reference_uv, also return reference_uv if no optimization is needed. + */ +static int _get_optimal_vdd_voltage(struct device *dev, + struct ti_opp_supply_data *data, + int reference_uv) +{ + int i; + struct ti_opp_supply_optimum_voltage_table *table; + + if (!data->num_vdd_table) + return reference_uv; + + table = data->vdd_table; + if (!table) + return -EINVAL; + + /* Find a exact match - this list is usually very small */ + for (i = 0; i < data->num_vdd_table; i++, table++) + if (table->reference_uv == reference_uv) + return table->optimized_uv; + + /* IF things are screwed up, we'd make a mess on console.. ratelimit */ + dev_err_ratelimited(dev, "%s: Failed optimized voltage match for %d\n", + __func__, reference_uv); + return reference_uv; +} + +static int _opp_set_voltage(struct device *dev, + struct dev_pm_opp_supply *supply, + int new_target_uv, struct regulator *reg, + char *reg_name) +{ + int ret; + unsigned long vdd_uv, uv_max; + + if (new_target_uv) + vdd_uv = new_target_uv; + else + vdd_uv = supply->u_volt; + + /* + * If we do have an absolute max voltage specified, then we should + * use that voltage instead to allow for cases where the voltage rails + * are ganged (example if we set the max for an opp as 1.12v, and + * the absolute max is 1.5v, for another rail to get 1.25v, it cannot + * be achieved if the regulator is constrainted to max of 1.12v, even + * if it can function at 1.25v + */ + if (opp_data.vdd_absolute_max_voltage_uv) + uv_max = opp_data.vdd_absolute_max_voltage_uv; + else + uv_max = supply->u_volt_max; + + if (vdd_uv > uv_max || + vdd_uv < supply->u_volt_min || + supply->u_volt_min > uv_max) { + dev_warn(dev, + "Invalid range voltages [Min:%lu target:%lu Max:%lu]\n", + supply->u_volt_min, vdd_uv, uv_max); + return -EINVAL; + } + + dev_dbg(dev, "%s scaling to %luuV[min %luuV max %luuV]\n", reg_name, + vdd_uv, supply->u_volt_min, + uv_max); + + ret = regulator_set_voltage_triplet(reg, + supply->u_volt_min, + vdd_uv, + uv_max); + if (ret) { + dev_err(dev, "%s failed for %luuV[min %luuV max %luuV]\n", + reg_name, vdd_uv, supply->u_volt_min, + uv_max); + return ret; + } + + return 0; +} + +/** + * ti_opp_supply_set_opp() - do the opp supply transition + * @data: information on regulators and new and old opps provided by + * opp core to use in transition + * + * Return: If successful, 0, else appropriate error value. + */ +int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data) +{ + struct dev_pm_opp_supply *old_supply_vdd = &data->old_opp.supplies[0]; + struct dev_pm_opp_supply *old_supply_vbb = &data->old_opp.supplies[1]; + struct dev_pm_opp_supply *new_supply_vdd = &data->new_opp.supplies[0]; + struct dev_pm_opp_supply *new_supply_vbb = &data->new_opp.supplies[1]; + struct device *dev = data->dev; + unsigned long old_freq = data->old_opp.rate, freq = data->new_opp.rate; + struct clk *clk = data->clk; + struct regulator *vdd_reg = data->regulators[0]; + struct regulator *vbb_reg = data->regulators[1]; + int vdd_uv; + int ret; + + vdd_uv = _get_optimal_vdd_voltage(dev, &opp_data, + new_supply_vbb->u_volt); + + /* Scaling up? Scale voltage before frequency */ + if (freq > old_freq) { + ret = _opp_set_voltage(dev, new_supply_vdd, vdd_uv, vdd_reg, + "vdd"); + if (ret) + goto restore_voltage; + + ret = _opp_set_voltage(dev, new_supply_vbb, 0, vbb_reg, "vbb"); + if (ret) + goto restore_voltage; + } + + /* Change frequency */ + dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", + __func__, old_freq, freq); + + ret = clk_set_rate(clk, freq); + if (ret) { + dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, + ret); + goto restore_voltage; + } + + /* Scaling down? Scale voltage after frequency */ + if (freq < old_freq) { + ret = _opp_set_voltage(dev, new_supply_vbb, 0, vbb_reg, "vbb"); + if (ret) + goto restore_freq; + + ret = _opp_set_voltage(dev, new_supply_vdd, vdd_uv, vdd_reg, + "vdd"); + if (ret) + goto restore_freq; + } + + return 0; + +restore_freq: + ret = clk_set_rate(clk, old_freq); + if (ret) + dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", + __func__, old_freq); +restore_voltage: + /* This shouldn't harm even if the voltages weren't updated earlier */ + if (old_supply_vdd->u_volt) { + ret = _opp_set_voltage(dev, old_supply_vbb, 0, vbb_reg, "vbb"); + if (ret) + return ret; + + ret = _opp_set_voltage(dev, old_supply_vdd, 0, vdd_reg, + "vdd"); + if (ret) + return ret; + } + + return ret; +} + +static const struct ti_opp_supply_of_data omap_generic_of_data = { +}; + +static const struct ti_opp_supply_of_data omap_omap5_of_data = { + .flags = OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE, + .efuse_voltage_mask = 0xFFF, + .efuse_voltage_uv = false, +}; + +static const struct ti_opp_supply_of_data omap_omap5core_of_data = { + .flags = OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE | OPPDM_HAS_NO_ABB, + .efuse_voltage_mask = 0xFFF, + .efuse_voltage_uv = false, +}; + +static const struct of_device_id ti_opp_supply_of_match[] = { + {.compatible = "ti,omap-opp-supply", .data = &omap_generic_of_data}, + {.compatible = "ti,omap5-opp-supply", .data = &omap_omap5_of_data}, + {.compatible = "ti,omap5-core-opp-supply", + .data = &omap_omap5core_of_data}, + {}, +}; +MODULE_DEVICE_TABLE(of, ti_opp_supply_of_match); + +static int ti_opp_supply_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device *cpu_dev = get_cpu_device(0); + const struct of_device_id *match; + const struct ti_opp_supply_of_data *of_data; + int ret = 0; + + match = of_match_device(ti_opp_supply_of_match, dev); + if (!match) { + /* We do not expect this to happen */ + dev_err(dev, "%s: Unable to match device\n", __func__); + return -ENODEV; + } + if (!match->data) { + /* Again, unlikely.. but mistakes do happen */ + dev_err(dev, "%s: Bad data in match\n", __func__); + return -EINVAL; + } + of_data = match->data; + + dev_set_drvdata(dev, (void *)of_data); + + /* If we need optimized voltage */ + if (of_data->flags & OPPDM_EFUSE_CLASS0_OPTIMIZED_VOLTAGE) { + ret = _store_optimized_voltages(dev, &opp_data); + if (ret) + return ret; + } + + ret = PTR_ERR_OR_ZERO(dev_pm_opp_register_set_opp_helper(cpu_dev, + ti_opp_supply_set_opp)); + if (ret) + _free_optimized_voltages(dev, &opp_data); + + return ret; +} + +static struct platform_driver ti_opp_supply_driver = { + .probe = ti_opp_supply_probe, + .driver = { + .name = "ti_opp_supply", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(ti_opp_supply_of_match), + }, +}; +module_platform_driver(ti_opp_supply_driver); + +MODULE_DESCRIPTION("Texas Instruments OMAP OPP Supply driver"); +MODULE_AUTHOR("Texas Instruments Inc."); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 203c110b39a89b48156c7450504e454fedb7f7f6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 12 Dec 2017 21:32:16 +0100 Subject: parisc: Fix indenting in puts() Static analysis tools complain that we intended to have curly braces around this indent block. In this case this assumption is wrong, so fix the indenting. Fixes: 2f3c7b8137ef ("parisc: Add core code for self-extracting kernel") Reported-by: Dan Carpenter Signed-off-by: Helge Deller Cc: # v4.14+ --- arch/parisc/boot/compressed/misc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 9345b44b86f0..f57118e1f6b4 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -123,8 +123,8 @@ int puts(const char *s) while ((nuline = strchr(s, '\n')) != NULL) { if (nuline != s) pdc_iodc_print(s, nuline - s); - pdc_iodc_print("\r\n", 2); - s = nuline + 1; + pdc_iodc_print("\r\n", 2); + s = nuline + 1; } if (*s != '\0') pdc_iodc_print(s, strlen(s)); -- cgit v1.2.3 From 0ed9d3de5f8f97e6efd5ca0e3377cab5f0451ead Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 12 Dec 2017 21:25:41 +0100 Subject: parisc: Align os_hpmc_size on word boundary The os_hpmc_size variable sometimes wasn't aligned at word boundary and thus triggered the unaligned fault handler at startup. Fix it by aligning it properly. Signed-off-by: Helge Deller Cc: # v4.14+ --- arch/parisc/kernel/hpmc.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index e3a8e5e4d5de..8d072c44f300 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc) __INITRODATA + .align 4 .export os_hpmc_size os_hpmc_size: .word .os_hpmc_end-.os_hpmc -- cgit v1.2.3 From bcf3f1752a622f1372d3252d0fea8855d89812e7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 12 Dec 2017 21:52:26 +0100 Subject: parisc: Hide Diva-built-in serial aux and graphics card Diva GSP card has built-in serial AUX port and ATI graphic card which simply don't work and which both don't have external connectors. User Guides even mention that those devices shouldn't be used. So, prevent that Linux drivers try to enable those devices. Signed-off-by: Helge Deller Cc: # v3.0+ --- drivers/parisc/lba_pci.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index a25fed52f7e9..41b740aed3a3 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask) iounmap(base_addr); } + +/* + * The design of the Diva management card in rp34x0 machines (rp3410, rp3440) + * seems rushed, so that many built-in components simply don't work. + * The following quirks disable the serial AUX port and the built-in ATI RV100 + * Radeon 7000 graphics card which both don't have any external connectors and + * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as + * such makes those machines the only PARISC machines on which we can't use + * ttyS0 as boot console. + */ +static void quirk_diva_ati_card(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1292) + return; + + dev_info(&dev->dev, "Hiding Diva built-in ATI card"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY, + quirk_diva_ati_card); + +static void quirk_diva_aux_disable(struct pci_dev *dev) +{ + if (dev->subsystem_vendor != PCI_VENDOR_ID_HP || + dev->subsystem_device != 0x1291) + return; + + dev_info(&dev->dev, "Hiding Diva built-in AUX serial device"); + dev->device = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX, + quirk_diva_aux_disable); -- cgit v1.2.3 From 6a16fc322085bb3163d7d6e44856adfda06a8001 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Sun, 10 Dec 2017 23:54:33 +0530 Subject: parisc: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 1 - arch/parisc/lib/delay.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 5a657986ebbf..143f90e2f9f3 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c index 7eab4bb8abe6..66e506520505 100644 --- a/arch/parisc/lib/delay.c +++ b/arch/parisc/lib/delay.c @@ -16,9 +16,7 @@ #include #include -#include #include - #include /* for mfctl() */ #include /* for boot_cpu_data */ -- cgit v1.2.3 From 9352aeada4d8d8753fc0e414fbfe8fdfcb68a12c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 13 Nov 2017 19:35:33 -0500 Subject: Revert "parisc: Re-enable interrupts early" This reverts commit 5c38602d83e584047906b41b162ababd4db4106d. Interrupts can't be enabled early because the register saves are done on the thread stack prior to switching to the IRQ stack. This caused stack overflows and the thread stack needed increasing to 32k. Even then, stack overflows still occasionally occurred. Background: Even with a 32 kB thread stack, I have seen instances where the thread stack overflowed on the mx3210 buildd. Detection of stack overflow only occurs when we have an external interrupt. When an external interrupt occurs, we switch to the thread stack if we are not already on a kernel stack. Then, registers and specials are saved to the kernel stack. The bug occurs in intr_return where interrupts are reenabled prior to returning from the interrupt. This was done incase we need to schedule or deliver signals. However, it introduces the possibility that multiple external interrupts may occur on the thread stack and cause a stack overflow. These might not be detected and cause the kernel to misbehave in random ways. This patch changes the code back to only reenable interrupts when we are going to schedule or deliver signals. As a result, we generally return from an interrupt before reenabling interrupts. This minimizes the growth of the thread stack. Fixes: 5c38602d83e5 ("parisc: Re-enable interrupts early") Signed-off-by: John David Anglin Cc: # v4.10+ Signed-off-by: Helge Deller --- arch/parisc/kernel/entry.S | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index a4fd296c958e..f3cecf5117cf 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi) STREG %r19,PT_SR7(%r16) intr_return: - /* NOTE: Need to enable interrupts incase we schedule. */ - ssm PSW_SM_I, %r0 - /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ @@ -907,6 +904,11 @@ intr_check_sig: LDREG PT_IASQ1(%r16), %r20 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ + /* NOTE: We need to enable interrupts if we have to deliver + * signals. We used to do this earlier but it caused kernel + * stack overflows. */ + ssm PSW_SM_I, %r0 + copy %r0, %r25 /* long in_syscall = 0 */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ @@ -958,6 +960,10 @@ intr_do_resched: cmpib,COND(=) 0, %r20, intr_do_preempt nop + /* NOTE: We need to enable interrupts if we schedule. We used + * to do this earlier but it caused kernel stack overflows. */ + ssm PSW_SM_I, %r0 + #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif -- cgit v1.2.3 From da57c5414f49ef9e4bcb9ae0bbafd1d650b31411 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 13 Nov 2017 19:35:33 -0500 Subject: parisc: Reduce thread stack to 16 kb In testing, I found that the thread stack can be 16 kB when using an irq stack. Without it, the thread stack needs to be 32 kB. Currently, the irq stack is 32 kB. While it probably could be 16 kB, I would prefer to leave it as is for safety. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/thread_info.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index c980a02a52bc..598c8d60fa5e 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -35,7 +35,12 @@ struct thread_info { /* thread information allocation */ +#ifdef CONFIG_IRQSTACKS +#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#else #define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ +#endif + /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -- cgit v1.2.3 From b9f5fb1800d8a4a3bc6cd3152c5f3d252986cf79 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 10 Nov 2017 15:57:21 +0100 Subject: cramfs: fix MTD dependency With CONFIG_MTD=m and CONFIG_CRAMFS=y, we now get a link failure: fs/cramfs/inode.o: In function `cramfs_mount': inode.c:(.text+0x220): undefined reference to `mount_mtd' fs/cramfs/inode.o: In function `cramfs_mtd_fill_super': inode.c:(.text+0x6d8): undefined reference to `mtd_point' inode.c:(.text+0xae4): undefined reference to `mtd_unpoint' This adds a more specific Kconfig dependency to avoid the broken configuration. Alternatively we could make CRAMFS itself depend on "MTD || !MTD" with a similar result. Fixes: 99c18ce580c6 ("cramfs: direct memory access support") Signed-off-by: Arnd Bergmann Signed-off-by: Nicolas Pitre Signed-off-by: Linus Torvalds --- fs/cramfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index f937082f3244..58e2fe40b2a0 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV config CRAMFS_MTD bool "Support CramFs image directly mapped in physical memory" depends on CRAMFS && MTD + depends on CRAMFS=m || MTD=y default y if !CRAMFS_BLOCKDEV help This option allows the CramFs driver to load data directly from -- cgit v1.2.3 From 36b0cb84ee858f02c256d26f0cb4229c78e3399e Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 1 Dec 2017 03:51:04 +0100 Subject: ARM: 8731/1: Fix csum_partial_copy_from_user() stack mismatch An additional 'ip' will be pushed to the stack, for restoring the DACR later, if CONFIG_CPU_SW_DOMAIN_PAN defined. However, the fixup still get the err_ptr by add #8*4 to sp, which results in the fact that the code area pointed by the LR will be overwritten, or the kernel will crash if CONFIG_DEBUG_RODATA is enabled. This patch fixes the stack mismatch. Fixes: a5e090acbf54 ("ARM: software-based priviledged-no-access support") Signed-off-by: Lvqiang Huang Signed-off-by: Chunyan Zhang Signed-off-by: Russell King --- arch/arm/lib/csumpartialcopyuser.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 1712f132b80d..b83fdc06286a 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -85,7 +85,11 @@ .pushsection .text.fixup,"ax" .align 4 9001: mov r4, #-EFAULT +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + ldr r5, [sp, #9*4] @ *err_ptr +#else ldr r5, [sp, #8*4] @ *err_ptr +#endif str r4, [r5] ldmia sp, {r1, r2} @ retrieve dst, len add r2, r2, r1 -- cgit v1.2.3 From 779f4e1c6c7c661db40dfebd6dd6bda7b5f88aa3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Dec 2017 11:28:38 -0800 Subject: Revert "exec: avoid RLIMIT_STACK races with prlimit()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 04e35f4495dd560db30c25efca4eecae8ec8c375. SELinux runs with secureexec for all non-"noatsecure" domain transitions, which means lots of processes end up hitting the stack hard-limit change that was introduced in order to fix a race with prlimit(). That race fix will need to be redesigned. Reported-by: Laura Abbott Reported-by: Tomáš Trnka Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/exec.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 156f56acfe8e..5688b5e1b937 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1339,15 +1339,10 @@ void setup_new_exec(struct linux_binprm * bprm) * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid - * races from other threads changing the limits. This also - * must be protected from races with prlimit() calls. + * needing to clean up the change on failure. */ - task_lock(current->group_leader); if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; - if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM) - current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM; - task_unlock(current->group_leader); } arch_pick_mmap_layout(current->mm); -- cgit v1.2.3 From 57358ba9564a0520f870dc14a0f91e7dacc18236 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 17 Dec 2017 14:43:15 -0800 Subject: xtensa: use generic strncpy_from_user with KASAN This enables KASAN check of the destination buffer. Signed-off-by: Max Filippov --- arch/xtensa/Kconfig | 1 + arch/xtensa/include/asm/uaccess.h | 7 +++++++ arch/xtensa/kernel/xtensa_ksyms.c | 2 ++ 3 files changed, 10 insertions(+) diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index f9f95d6e8da8..e2afffb71a6b 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -15,6 +15,7 @@ config XTENSA select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select GENERIC_STRNCPY_FROM_USER if KASAN select HAVE_ARCH_KASAN if MMU select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_KMEMLEAK diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 18bbe1caad94..f1158b4c629c 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -44,6 +44,8 @@ #define __access_ok(addr, size) (__kernel_ok || __user_ok((addr), (size))) #define access_ok(type, addr, size) __access_ok((unsigned long)(addr), (size)) +#define user_addr_max() (uaccess_kernel() ? ~0UL : TASK_SIZE) + /* * These are the main single-value transfer routines. They * automatically use the right size if we just have the right pointer @@ -277,6 +279,8 @@ clear_user(void *addr, unsigned long size) #define __clear_user __xtensa_clear_user +#ifndef CONFIG_GENERIC_STRNCPY_FROM_USER + extern long __strncpy_user(char *, const char *, long); static inline long @@ -286,6 +290,9 @@ strncpy_from_user(char *dst, const char *src, long count) return __strncpy_user(dst, src, count); return -EFAULT; } +#else +long strncpy_from_user(char *dst, const char *src, long count); +#endif /* * Return the size of a string (including the ending 0!) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 3a443f83ae87..04f19de46700 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -44,7 +44,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); +#ifndef CONFIG_GENERIC_STRNCPY_FROM_USER EXPORT_SYMBOL(__strncpy_user); +#endif EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(copy_page); -- cgit v1.2.3 From fed566ca44ce99ff4604e3af941049f9a6bba405 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 10 Dec 2017 20:15:37 -0800 Subject: xtensa: print kernel sections info in mem_init Output virtual addresses and sizes occupied by the main kernel sections: .text, .rodata, .data, .init and .bss. Signed-off-by: Max Filippov --- arch/xtensa/mm/init.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 0d980f05da82..d776ec0d7b22 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -110,7 +110,12 @@ void __init mem_init(void) " pkmap : 0x%08lx - 0x%08lx (%5lu kB)\n" " fixmap : 0x%08lx - 0x%08lx (%5lu kB)\n" #endif - " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n", + " lowmem : 0x%08lx - 0x%08lx (%5lu MB)\n" + " .text : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .rodata : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .data : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .init : 0x%08lx - 0x%08lx (%5lu kB)\n" + " .bss : 0x%08lx - 0x%08lx (%5lu kB)\n", #ifdef CONFIG_KASAN KASAN_SHADOW_START, KASAN_SHADOW_START + KASAN_SHADOW_SIZE, KASAN_SHADOW_SIZE >> 20, @@ -129,7 +134,17 @@ void __init mem_init(void) #else min_low_pfn * PAGE_SIZE, max_low_pfn * PAGE_SIZE, #endif - ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20); + ((max_low_pfn - min_low_pfn) * PAGE_SIZE) >> 20, + (unsigned long)_text, (unsigned long)_etext, + (unsigned long)(_etext - _text) >> 10, + (unsigned long)__start_rodata, (unsigned long)_sdata, + (unsigned long)(_sdata - __start_rodata) >> 10, + (unsigned long)_sdata, (unsigned long)_edata, + (unsigned long)(_edata - _sdata) >> 10, + (unsigned long)__init_begin, (unsigned long)__init_end, + (unsigned long)(__init_end - __init_begin) >> 10, + (unsigned long)__bss_start, (unsigned long)__bss_stop, + (unsigned long)(__bss_stop - __bss_start) >> 10); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v1.2.3 From 1291a0d5049dbc06baaaf66a9ff3f53db493b19b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Dec 2017 18:59:59 -0800 Subject: Linux 4.15-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3f4d157add54..7e02f951b284 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From d82c3682168431d29ba1741d0cd5ef45c68bf8e0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 18 Dec 2017 08:26:28 +0100 Subject: mtd: Fix mtd_check_oob_ops() The mtd_check_oob_ops() helper verifies if the operation defined by the user is correct. Fix the check that verifies if the entire requested area exists. This check is too restrictive and will fail anytime the last data byte of the very last page is included in an operation. Fixes: 5cdd929da53d ("mtd: Add sanity checks in mtd_write/read_oob()") Signed-off-by: Miquel Raynal Acked-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/mtdcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index f80e911b8843..73b605577447 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, if (!ops->oobbuf) ops->ooblen = 0; - if (offs < 0 || offs + ops->len >= mtd->size) + if (offs < 0 || offs + ops->len > mtd->size) return -EINVAL; if (ops->ooblen) { -- cgit v1.2.3 From 9e343e87d2c4c707ef8fae2844864d4dde3a2d13 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Oct 2017 15:54:10 +0200 Subject: mtd: cfi: convert inline functions to macros The map_word_() functions, dating back to linux-2.6.8, try to perform bitwise operations on a 'map_word' structure. This may have worked with compilers that were current then (gcc-3.4 or earlier), but end up being rather inefficient on any version I could try now (gcc-4.4 or higher). Specifically we hit a problem analyzed in gcc PR81715 where we fail to reuse the stack space for local variables. This can be seen immediately in the stack consumption for cfi_staa_erase_varsize() and other functions that (with CONFIG_KASAN) can be up to 2200 bytes. Changing the inline functions into macros brings this down to 1280 bytes. Without KASAN, the same problem exists, but the stack consumption is lower to start with, my patch shrinks it from 920 to 496 bytes on with arm-linux-gnueabi-gcc-5.4, and saves around 1KB in .text size for cfi_cmdset_0020.c, as it avoids copying map_word structures for each call to one of these helpers. With the latest gcc-8 snapshot, the problem is fixed in upstream gcc, but nobody uses that yet, so we should still work around it in mainline kernels and probably backport the workaround to stable kernels as well. We had a couple of other functions that suffered from the same gcc bug, and all of those had a simpler workaround involving dummy variables in the inline function. Unfortunately that did not work here, the macro hack was the best I could come up with. It would also be helpful to have someone to a little performance testing on the patch, to see how much it helps in terms of CPU utilitzation. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Acked-by: Richard Weinberger Signed-off-by: Boris Brezillon --- include/linux/mtd/map.h | 130 +++++++++++++++++++++++------------------------- 1 file changed, 61 insertions(+), 69 deletions(-) diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 3aa56e3104bb..b5b43f94f311 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -270,75 +270,67 @@ void map_destroy(struct mtd_info *mtd); #define INVALIDATE_CACHED_RANGE(map, from, size) \ do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0) - -static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] != val2.x[i]) - return 0; - } - - return 1; -} - -static inline map_word map_word_and(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & val2.x[i]; - - return r; -} - -static inline map_word map_word_clr(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] & ~val2.x[i]; - - return r; -} - -static inline map_word map_word_or(struct map_info *map, map_word val1, map_word val2) -{ - map_word r; - int i; - - for (i = 0; i < map_words(map); i++) - r.x[i] = val1.x[i] | val2.x[i]; - - return r; -} - -static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if ((val1.x[i] & val2.x[i]) != val3.x[i]) - return 0; - } - - return 1; -} - -static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2) -{ - int i; - - for (i = 0; i < map_words(map); i++) { - if (val1.x[i] & val2.x[i]) - return 1; - } - - return 0; -} +#define map_word_equal(map, val1, val2) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) \ + if ((val1).x[i] != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + ret; \ +}) + +#define map_word_and(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & (val2).x[i]; \ + r; \ +}) + +#define map_word_clr(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] & ~(val2).x[i]; \ + r; \ +}) + +#define map_word_or(map, val1, val2) \ +({ \ + map_word r; \ + int i; \ + for (i = 0; i < map_words(map); i++) \ + r.x[i] = (val1).x[i] | (val2).x[i]; \ + r; \ +}) + +#define map_word_andequal(map, val1, val2, val3) \ +({ \ + int i, ret = 1; \ + for (i = 0; i < map_words(map); i++) { \ + if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + ret = 0; \ + break; \ + } \ + } \ + ret; \ +}) + +#define map_word_bitsset(map, val1, val2) \ +({ \ + int i, ret = 0; \ + for (i = 0; i < map_words(map); i++) { \ + if ((val1).x[i] & (val2).x[i]) { \ + ret = 1; \ + break; \ + } \ + } \ + ret; \ +}) static inline map_word map_word_load(struct map_info *map, const void *ptr) { -- cgit v1.2.3 From bfe766cf65fb65e68c4764f76158718560bdcee5 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 6 Dec 2017 17:09:49 +0000 Subject: arm64: kvm: Prevent restoring stale PMSCR_EL1 for vcpu When VHE is not present, KVM needs to save and restores PMSCR_EL1 when possible. If SPE is used by the host, value of PMSCR_EL1 cannot be saved for the guest. If the host starts using SPE between two save+restore on the same vcpu, restore will write the value of PMSCR_EL1 read during the first save. Make sure __debug_save_spe_nvhe clears the value of the saved PMSCR_EL1 when the guest cannot use SPE. Signed-off-by: Julien Thierry Cc: Christoffer Dall Cc: Marc Zyngier Cc: Catalin Marinas Cc: Reviewed-by: Will Deacon Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/hyp/debug-sr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 321c9c05dd9e..f4363d40e2cd 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) { u64 reg; + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + /* SPE present on this CPU? */ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), ID_AA64DFR0_PMSVER_SHIFT)) -- cgit v1.2.3 From 7839c672e58bf62da8f2f0197fefb442c02ba1dd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Dec 2017 11:45:45 +0000 Subject: KVM: arm/arm64: Fix HYP unmapping going off limits When we unmap the HYP memory, we try to be clever and unmap one PGD at a time. If we start with a non-PGD aligned address and try to unmap a whole PGD, things go horribly wrong in unmap_hyp_range (addr and end can never match, and it all goes really badly as we keep incrementing pgd and parse random memory as page tables...). The obvious fix is to let unmap_hyp_range do what it does best, which is to iterate over a range. The size of the linear mapping, which begins at PAGE_OFFSET, can be easily calculated by subtracting PAGE_OFFSET form high_memory, because high_memory is defined as the linear map address of the last byte of DRAM, plus one. The size of the vmalloc region is given trivially by VMALLOC_END - VMALLOC_START. Cc: stable@vger.kernel.org Reported-by: Andre Przywara Tested-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..b4b69c2d1012 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) */ void free_hyp_pgds(void) { - unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { @@ -521,10 +519,10 @@ void free_hyp_pgds(void) if (hyp_pgd) { unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), + (uintptr_t)high_memory - PAGE_OFFSET); + unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), + VMALLOC_END - VMALLOC_START); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; -- cgit v1.2.3 From f384dcfe4d918c1d80477d290c22ce0093823771 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Dec 2017 11:46:15 +0000 Subject: KVM: arm/arm64: timer: Don't set irq as forwarded if no usable GIC If we don't have a usable GIC, do not try to set the vcpu affinity as this is guaranteed to fail. Reported-by: Andre Przywara Reviewed-by: Andre Przywara Tested-by: Andre Przywara Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/kvm/arm_arch_timer.h | 2 +- virt/kvm/arm/arch_timer.c | 13 ++++++++----- virt/kvm/arm/arm.c | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 6e45608b2399..9da6ce22803f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -62,7 +62,7 @@ struct arch_timer_cpu { bool enabled; }; -int kvm_timer_hyp_init(void); +int kvm_timer_hyp_init(bool); int kvm_timer_enable(struct kvm_vcpu *vcpu); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f9555b1e7f15..aa9adfafe12b 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -720,7 +720,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } -int kvm_timer_hyp_init(void) +int kvm_timer_hyp_init(bool has_gic) { struct arch_timer_kvm_info *info; int err; @@ -756,10 +756,13 @@ int kvm_timer_hyp_init(void) return err; } - err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus()); - if (err) { - kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); - goto out_free_irq; + if (has_gic) { + err = irq_set_vcpu_affinity(host_vtimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } } kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..2e43f9d42bd5 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1326,7 +1326,7 @@ static int init_subsystems(void) /* * Init HYP architected timer support */ - err = kvm_timer_hyp_init(); + err = kvm_timer_hyp_init(vgic_present); if (err) goto out; -- cgit v1.2.3 From 36e5cfd410ad6060b527e51d1b4bc174a8068cfd Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 14 Dec 2017 19:54:50 +0100 Subject: KVM: arm/arm64: Properly handle arch-timer IRQs after vtimer_save_state The recent timer rework was assuming that once the timer was disabled, we should no longer see any interrupts from the timer. This assumption turns out to not be true, and instead we have to handle the case when the timer ISR runs even after the timer has been disabled. This requires a couple of changes: First, we should never overwrite the cached guest state of the timer control register when the ISR runs, because KVM may have disabled its timers when doing vcpu_put(), even though the guest still had the timer enabled. Second, we shouldn't assume that the timer is actually firing just because we see an interrupt, but we should check the actual state of the timer in the timer control register to understand if the hardware timer is really firing or not. We also add an ISB to vtimer_save_state() to ensure the timer is actually disabled once we enable interrupts, which should clarify the intention of the implementation, and reduce the risk of unwanted interrupts. Fixes: b103cc3f10c0 ("KVM: arm/arm64: Avoid timer save/restore in vcpu entry/exit") Reported-by: Marc Zyngier Reported-by: Jia He Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index aa9adfafe12b..14c018f990a7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; struct arch_timer_context *vtimer; + u32 cnt_ctl; - if (!vcpu) { - pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n"); - return IRQ_NONE; - } - vtimer = vcpu_vtimer(vcpu); + /* + * We may see a timer interrupt after vcpu_put() has been called which + * sets the CPU's vcpu pointer to NULL, because even though the timer + * has been disabled in vtimer_save_state(), the hardware interrupt + * signal may not have been retired from the interrupt controller yet. + */ + if (!vcpu) + return IRQ_HANDLED; + vtimer = vcpu_vtimer(vcpu); if (!vtimer->irq.level) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - if (kvm_timer_irq_can_fire(vtimer)) + cnt_ctl = read_sysreg_el0(cntv_ctl); + cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT | + ARCH_TIMER_CTRL_IT_MASK; + if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT)) kvm_timer_update_irq(vcpu, true, vtimer); } @@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); + isb(); vtimer->loaded = false; out: -- cgit v1.2.3 From 0eb7c33cadf6b2f1a94e58ded8b0eb89b4eba382 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Dec 2017 00:30:12 +0100 Subject: KVM: arm/arm64: Fix timer enable flow When enabling the timer on the first run, we fail to ever restore the state and mark it as loaded. That means, that in the initial entry to the VCPU ioctl, unless we exit to userspace for some reason such as a pending signal, if the guest programs a timer and blocks, we will wait forever, because we never read back the hardware state (the loaded flag is not set), and so we think the timer is disabled, and we never schedule a background soft timer. The end result? The VCPU blocks forever, and the only solution is to kill the thread. Fixes: 4a2c4da1250d ("arm/arm64: KVM: Load the timer state when enabling the timer") Reported-by: Marc Zyngier Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 14c018f990a7..cc29a8148328 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -846,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) no_vgic: preempt_disable(); timer->enabled = 1; - if (!irqchip_in_kernel(vcpu->kvm)) - kvm_timer_vcpu_load_user(vcpu); - else - kvm_timer_vcpu_load_vgic(vcpu); + kvm_timer_vcpu_load(vcpu); preempt_enable(); return 0; -- cgit v1.2.3 From 9226665159f0367ad08bc7d5dd194aeadb90316f Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 14 Dec 2017 15:28:58 +0800 Subject: ALSA: hda/realtek - Fix Dell AIO LineOut issue Dell AIO had LineOut jack. Add LineOut verb into this patch. [ Additional notes: the ALC274 codec seems requiring the fixed pin / DAC connections for HP / line-out pins for enabling EQ for speakers; i.e. the HP / LO pins expect to be connected with NID 0x03 while keeping the speaker with NID 0x02. However, by adding a new line-out pin, the auto-parser assigns the NID 0x02 for HP/LO pins as primary outputs. As an easy workaround, we provide the preferred_pairs[] to map forcibly for these pins. -- tiwai ] Fixes: 75ee94b20b46 ("ALSA: hda - fix headset mic problem for Dell machines with alc274") Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4b21f71d685c..6a4db00511ab 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5185,6 +5185,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec, } } +/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */ +static void alc274_fixup_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + static hda_nid_t preferred_pairs[] = { + 0x21, 0x03, 0x1b, 0x03, 0x16, 0x02, + 0 + }; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + spec->gen.preferred_dacs = preferred_pairs; +} + /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" @@ -5302,6 +5318,8 @@ enum { ALC233_FIXUP_LENOVO_MULTI_CODECS, ALC294_FIXUP_LENOVO_MIC_LOCATION, ALC700_FIXUP_INTEL_REFERENCE, + ALC274_FIXUP_DELL_BIND_DACS, + ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, }; static const struct hda_fixup alc269_fixups[] = { @@ -6112,6 +6130,21 @@ static const struct hda_fixup alc269_fixups[] = { {} } }, + [ALC274_FIXUP_DELL_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_bind_dacs, + .chained = true, + .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE + }, + [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x0401102f }, + { } + }, + .chained = true, + .chain_id = ALC274_FIXUP_DELL_BIND_DACS + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6578,7 +6611,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), - SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, {0x12, 0xb7a60130}, {0x13, 0xb8a61140}, {0x16, 0x90170110}, -- cgit v1.2.3 From 5839ee7389e893a31e4e3c9cf17b50d14103c902 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Dec 2017 03:07:18 +0100 Subject: PCI / PM: Force devices to D0 in pci_pm_thaw_noirq() It is incorrect to call pci_restore_state() for devices in low-power states (D1-D3), as that involves the restoration of MSI setup which requires MMIO to be operational and that is only the case in D0. However, pci_pm_thaw_noirq() may do that if the driver's "freeze" callbacks put the device into a low-power state, so fix it by making it force devices into D0 via pci_set_power_state() instead of trying to "update" their power state which is pointless. Fixes: e60514bd4485 (PCI/PM: Restore the status of PCI devices across hibernation) Cc: 4.13+ # 4.13+ Reported-by: Thomas Gleixner Reported-by: Maarten Lankhorst Tested-by: Thomas Gleixner Tested-by: Maarten Lankhorst Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/pci/pci-driver.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 945099d49f8f..14fd865a5120 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_update_current_state(pci_dev, PCI_D0); + /* + * pci_restore_state() requires the device to be in D0 (because of MSI + * restoration among other things), so force it into D0 in case the + * driver's "freeze" callbacks put it into a low-power state directly. + */ + pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); if (drv && drv->pm && drv->pm->thaw_noirq) -- cgit v1.2.3 From ccc153a6de1f7741b5ef7c996f9be133772b2092 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 11 Dec 2017 14:19:00 +0100 Subject: cpufreq: imx6q: fix speed grading regression on i.MX6 QuadPlus The commit moving the speed grading check to the cpufreq driver introduced some additional checks, so the OPP disable is only attempted on SoCs where those OPPs are present. The compatible checks are missing the QuadPlus compatible, so invalid OPPs are not correctly disabled there. Move both checks to a single condition, so we don't need to sprinkle even more calls to of_machine_is_compatible(). Fixes: 2b3d58a3adca (cpufreq: imx6q: Move speed grading check to cpufreq driver) Signed-off-by: Lucas Stach Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 628fe899cb48..d9b2c2de49c4 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev) val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if ((val != OCOTP_CFG3_SPEED_1P2GHZ) && - of_machine_is_compatible("fsl,imx6q")) - if (dev_pm_opp_disable(dev, 1200000000)) - dev_warn(dev, "failed to disable 1.2GHz OPP\n"); if (val < OCOTP_CFG3_SPEED_996MHZ) if (dev_pm_opp_disable(dev, 996000000)) dev_warn(dev, "failed to disable 996MHz OPP\n"); - if (of_machine_is_compatible("fsl,imx6q")) { + + if (of_machine_is_compatible("fsl,imx6q") || + of_machine_is_compatible("fsl,imx6qp")) { if (val != OCOTP_CFG3_SPEED_852MHZ) if (dev_pm_opp_disable(dev, 852000000)) dev_warn(dev, "failed to disable 852MHz OPP\n"); + if (val != OCOTP_CFG3_SPEED_1P2GHZ) + if (dev_pm_opp_disable(dev, 1200000000)) + dev_warn(dev, "failed to disable 1.2GHz OPP\n"); } iounmap(base); put_node: -- cgit v1.2.3 From 56026645e2b6f11ede34a5e6ab69d3eb56f9c8fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 18 Dec 2017 02:15:32 +0100 Subject: cpufreq: governor: Ensure sufficiently large sampling intervals After commit aa7519af450d (cpufreq: Use transition_delay_us for legacy governors as well) the sampling_rate field of struct dbs_data may be less than the tick period which causes dbs_update() to produce incorrect results, so make the code ensure that the value of that field will always be sufficiently large. Fixes: aa7519af450d (cpufreq: Use transition_delay_us for legacy governors as well) Reported-by: Andy Tang Reported-by: Doug Smythies Tested-by: Andy Tang Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq_governor.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 58d4f4e1ad6a..ca38229b045a 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,8 @@ #include "cpufreq_governor.h" +#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC) + static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); @@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, { struct dbs_data *dbs_data = to_dbs_data(attr_set); struct policy_dbs_info *policy_dbs; + unsigned int sampling_interval; int ret; - ret = sscanf(buf, "%u", &dbs_data->sampling_rate); - if (ret != 1) + + ret = sscanf(buf, "%u", &sampling_interval); + if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL) return -EINVAL; + dbs_data->sampling_rate = sampling_interval; + /* * We are operating under dbs_data->mutex and so the list and its * entries can't be freed concurrently. @@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) if (ret) goto free_policy_dbs_info; - dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); + /* + * The sampling interval should not be less than the transition latency + * of the CPU and it also cannot be too small for dbs_update() to work + * correctly. + */ + dbs_data->sampling_rate = max_t(unsigned int, + CPUFREQ_DBS_MIN_SAMPLING_INTERVAL, + cpufreq_policy_transition_delay_us(policy)); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; -- cgit v1.2.3 From 951ef0e19f0736b45d1c4d81f4dfa04a43f87df5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Dec 2017 23:59:49 +0000 Subject: ACPI: CPPC: remove initial assignment of pcc_ss_data The initialization of pcc_ss_data from pcc_data[pcc_ss_id] before pcc_ss_id is being range checked could lead to an out-of-bounds array read. This very same initialization is also being performed after the range check on pcc_ss_id, so we can just remove this problematic and also redundant assignment to fix the issue. Detected by cppcheck: warning: Value stored to 'pcc_ss_data' during its initialization is never read Fixes: 85b1407bf6d2 (ACPI / CPPC: Make CPPC ACPI driver aware of PCC subspace IDs) Signed-off-by: Colin Ian King Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 30e84cc600ae..06ea4749ebd9 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cpc_register_resource *desired_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; int ret = 0; if (!cpc_desc || pcc_ss_id < 0) { -- cgit v1.2.3 From bb82e0b4a7e96494f0c1004ce50cec3d7b5fb3d1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Dec 2017 13:31:16 +0100 Subject: ACPI: APEI / ERST: Fix missing error handling in erst_reader() The commit f6f828513290 ("pstore: pass allocated memory region back to caller") changed the check of the return value from erst_read() in erst_reader() in the following way: if (len == -ENOENT) goto skip; - else if (len < 0) { - rc = -1; + else if (len < sizeof(*rcd)) { + rc = -EIO; goto out; This introduced another bug: since the comparison with sizeof() is cast to unsigned, a negative len value doesn't hit any longer. As a result, when an error is returned from erst_read(), the code falls through, and it may eventually lead to some weird thing like memory corruption. This patch adds the negative error value check more explicitly for addressing the issue. Fixes: f6f828513290 (pstore: pass allocated memory region back to caller) Cc: All applicable Tested-by: Jerry Tang Signed-off-by: Takashi Iwai Acked-by: Kees Cook Reviewed-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/erst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6742f6c68034..9bff853e85f3 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1007,7 +1007,7 @@ skip: /* The record may be cleared by others, try read next record */ if (len == -ENOENT) goto skip; - else if (len < sizeof(*rcd)) { + else if (len < 0 || len < sizeof(*rcd)) { rc = -EIO; goto out; } -- cgit v1.2.3 From c68f0676ef7df08d52a65031db3e0ba017dbfd89 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 21 Nov 2017 03:33:06 -0500 Subject: ACPI / battery: Add quirk for Asus GL502VSK and UX305LA On Asus GL502VSK and UX305LA, ACPI incorrectly reports discharging when battery is full and AC is plugged. However rate_now is correct under this circumstance, hence we can use "rate_now == 0" as a predicate to report battery full status correctly. Link: https://bugs.launchpad.net/bugs/1482390 Signed-off-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index f2eb6c37ea0a..19bc440820e6 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -70,6 +70,7 @@ static async_cookie_t async_cookie; static bool battery_driver_registered; static int battery_bix_broken_package; static int battery_notification_delay_ms; +static int battery_full_discharging; static unsigned int cache_time = 1000; module_param(cache_time, uint, 0644); MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); @@ -214,9 +215,12 @@ static int acpi_battery_get_property(struct power_supply *psy, return -ENODEV; switch (psp) { case POWER_SUPPLY_PROP_STATUS: - if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) - val->intval = POWER_SUPPLY_STATUS_DISCHARGING; - else if (battery->state & ACPI_BATTERY_STATE_CHARGING) + if (battery->state & ACPI_BATTERY_STATE_DISCHARGING) { + if (battery_full_discharging && battery->rate_now == 0) + val->intval = POWER_SUPPLY_STATUS_FULL; + else + val->intval = POWER_SUPPLY_STATUS_DISCHARGING; + } else if (battery->state & ACPI_BATTERY_STATE_CHARGING) val->intval = POWER_SUPPLY_STATUS_CHARGING; else if (acpi_battery_is_charged(battery)) val->intval = POWER_SUPPLY_STATUS_FULL; @@ -1166,6 +1170,12 @@ battery_notification_delay_quirk(const struct dmi_system_id *d) return 0; } +static int __init battery_full_discharging_quirk(const struct dmi_system_id *d) +{ + battery_full_discharging = 1; + return 0; +} + static const struct dmi_system_id bat_dmi_table[] __initconst = { { .callback = battery_bix_broken_package_quirk, @@ -1183,6 +1193,22 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire V5-573G"), }, }, + { + .callback = battery_full_discharging_quirk, + .ident = "ASUS GL502VSK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "GL502VSK"), + }, + }, + { + .callback = battery_full_discharging_quirk, + .ident = "ASUS UX305LA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX305LA"), + }, + }, {}, }; -- cgit v1.2.3 From e39d200fa5bf5b94a0948db0dae44c1b73b84a56 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 14 Dec 2017 17:40:50 -0800 Subject: KVM: Fix stack-out-of-bounds read in write_mmio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: BUG: KASAN: stack-out-of-bounds in write_mmio+0x11e/0x270 [kvm] Read of size 8 at addr ffff8803259df7f8 by task syz-executor/32298 CPU: 6 PID: 32298 Comm: syz-executor Tainted: G OE 4.15.0-rc2+ #18 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 Call Trace: dump_stack+0xab/0xe1 print_address_description+0x6b/0x290 kasan_report+0x28a/0x370 write_mmio+0x11e/0x270 [kvm] emulator_read_write_onepage+0x311/0x600 [kvm] emulator_read_write+0xef/0x240 [kvm] emulator_fix_hypercall+0x105/0x150 [kvm] em_hypercall+0x2b/0x80 [kvm] x86_emulate_insn+0x2b1/0x1640 [kvm] x86_emulate_instruction+0x39a/0xb90 [kvm] handle_exception+0x1b4/0x4d0 [kvm_intel] vcpu_enter_guest+0x15a0/0x2640 [kvm] kvm_arch_vcpu_ioctl_run+0x549/0x7d0 [kvm] kvm_vcpu_ioctl+0x479/0x880 [kvm] do_vfs_ioctl+0x142/0x9a0 SyS_ioctl+0x74/0x80 entry_SYSCALL_64_fastpath+0x23/0x9a The path of patched vmmcall will patch 3 bytes opcode 0F 01 C1(vmcall) to the guest memory, however, write_mmio tracepoint always prints 8 bytes through *(u64 *)val since kvm splits the mmio access into 8 bytes. This leaks 5 bytes from the kernel stack (CVE-2017-17741). This patch fixes it by just accessing the bytes which we operate on. Before patch: syz-executor-5567 [007] .... 51370.561696: kvm_mmio: mmio write len 3 gpa 0x10 val 0x1ffff10077c1010f After patch: syz-executor-13416 [002] .... 51302.299573: kvm_mmio: mmio write len 3 gpa 0x10 val 0xc1010f Reported-by: Dmitry Vyukov Reviewed-by: Darren Kenny Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 ++++---- include/trace/events/kvm.h | 7 +++++-- virt/kvm/arm/mmio.c | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a82f2d4333b..1cec2c62a0b0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index e4b0b8e09932..2c735a3e6613 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq, { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, - TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( @@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio, __entry->type = type; __entry->len = len; __entry->gpa = gpa; - __entry->val = val; + __entry->val = 0; + if (val) + memcpy(&__entry->val, val, + min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index b6e715fd3c90..dac7ceb1a677 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, - data); + &data); data = vcpu_data_host_to_guest(vcpu, data, len); vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data); } @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), len); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); kvm_mmio_write_buf(data_buf, len, data); ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); } else { trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, - fault_ipa, 0); + fault_ipa, NULL); ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, data_buf); -- cgit v1.2.3 From 11cf887728a3d1de77cc12ce247b64ef32608891 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 18 Dec 2017 12:37:12 +0100 Subject: x86/MCE/AMD: Define a function to get SMCA bank type Scalable MCA systems have various types of banks. The bank's type can determine how we handle errors from it. For example, if a bank represents a UMC (Unified Memory Controller) then we will need to convert its address from a normalized address to a system physical address before handling the error. [ bp: Verify m->bank is within range and use bank pointer. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20171207203955.118171-1-Yazen.Ghannam@amd.com --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a38ab1fa53a2..661c4738be27 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -110,6 +110,20 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); +static enum smca_bank_types smca_get_bank_type(struct mce *m) +{ + struct smca_bank *b; + + if (m->bank >= N_SMCA_BANK_TYPES) + return N_SMCA_BANK_TYPES; + + b = &smca_banks[m->bank]; + if (!b->hwid) + return N_SMCA_BANK_TYPES; + + return b->hwid->bank_type; +} + static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ -- cgit v1.2.3 From c6708d50f166bea2d763c96485d31fdbc50204f1 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 18 Dec 2017 12:37:13 +0100 Subject: x86/MCE: Report only DRAM ECC as memory errors on AMD systems The MCA_STATUS[ErrorCodeExt] field is very bank type specific. We currently check if the ErrorCodeExt value is 0x0 or 0x8 in mce_is_memory_error(), but we don't check the bank number. This means that we could flag non-memory errors as memory errors. We know that we want to flag DRAM ECC errors as memory errors, so let's do those cases first. We can add more cases later when needed. Define a wrapper function in mce_amd.c so we can use SMCA enums. [ bp: Remove brackets around return statements. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20171207203955.118171-2-Yazen.Ghannam@amd.com --- arch/x86/include/asm/mce.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 4 +--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b1e8d8db921f..96ea4b5ba658 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -376,6 +376,7 @@ struct smca_bank { extern struct smca_bank smca_banks[MAX_NR_BANKS]; extern const char *smca_get_long_name(enum smca_bank_types t); +extern bool amd_mce_is_memory_error(struct mce *m); extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); @@ -384,6 +385,7 @@ extern int mce_threshold_remove_device(unsigned int cpu); static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; }; +static inline bool amd_mce_is_memory_error(struct mce *m) { return false; }; #endif diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1d616d08eee..321c7a80be66 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -503,10 +503,8 @@ static int mce_usable_address(struct mce *m) bool mce_is_memory_error(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD) { - /* ErrCodeExt[20:16] */ - u8 xec = (m->status >> 16) & 0x1f; + return amd_mce_is_memory_error(m); - return (xec == 0x0 || xec == 0x8); } else if (m->cpuvendor == X86_VENDOR_INTEL) { /* * Intel SDM Volume 3B - 15.9.2 Compound Error Codes diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 661c4738be27..0f32ad242324 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -754,6 +754,17 @@ out_err: } EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); +bool amd_mce_is_memory_error(struct mce *m) +{ + /* ErrCodeExt[20:16] */ + u8 xec = (m->status >> 16) & 0x1f; + + if (mce_flags.smca) + return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0; + + return m->bank == 4 && xec == 0x8; +} + static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) { struct mce m; -- cgit v1.2.3 From 179eb850ac57c06edaed67fc744ba9d902172f96 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 18 Dec 2017 12:37:14 +0100 Subject: x86/MCE: Make correctable error detection look at the Deferred bit AMD systems may log Deferred errors. These are errors that are uncorrected but which do not need immediate action. The MCA_STATUS[UC] bit may not be set for Deferred errors. Flag the error as not correctable when MCA_STATUS[Deferred] is set and do not feed it into the Correctable Errors Collector. [ bp: Massage commit message. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20171212165143.27475-1-Yazen.Ghannam@amd.com --- arch/x86/kernel/cpu/mcheck/mce.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 321c7a80be66..1b2c11473376 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -528,6 +528,17 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool mce_is_correctable(struct mce *m) +{ + if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) + return false; + + if (m->status & MCI_STATUS_UC) + return false; + + return true; +} + static bool cec_add_mce(struct mce *m) { if (!m) @@ -535,7 +546,7 @@ static bool cec_add_mce(struct mce *m) /* We eat only correctable DRAM errors with usable addresses. */ if (mce_is_memory_error(m) && - !(m->status & MCI_STATUS_UC) && + mce_is_correctable(m) && mce_usable_address(m)) if (!cec_add_elem(m->addr >> PAGE_SHIFT)) return true; -- cgit v1.2.3 From 9d5f38ba6c82359b7cec31fb27fb78ecc02f3946 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 15 Dec 2017 10:20:12 -0600 Subject: x86/mm: Unbreak modules that use the DMA API Commit d8aa7eea78a1 ("x86/mm: Add Secure Encrypted Virtualization (SEV) support") changed sme_active() from an inline function that referenced sme_me_mask to a non-inlined function in order to make the sev_enabled variable a static variable. This function was marked EXPORT_SYMBOL_GPL because at the time the patch was submitted, sme_me_mask was marked EXPORT_SYMBOL_GPL. Commit 87df26175e67 ("x86/mm: Unbreak modules that rely on external PAGE_KERNEL availability") changed sme_me_mask variable from EXPORT_SYMBOL_GPL to EXPORT_SYMBOL, allowing external modules the ability to build with CONFIG_AMD_MEM_ENCRYPT=y. Now, however, with sev_active() no longer an inline function and marked as EXPORT_SYMBOL_GPL, external modules that use the DMA API are once again broken in 4.15. Since the DMA API is meant to be used by external modules, this needs to be changed. Change the sme_active() and sev_active() functions from EXPORT_SYMBOL_GPL to EXPORT_SYMBOL. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brijesh Singh Link: https://lkml.kernel.org/r/20171215162011.14125.7113.stgit@tlendack-t1.amdoffice.net --- arch/x86/mm/mem_encrypt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index d9a9e9fc75dd..391b13402e40 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -405,13 +405,13 @@ bool sme_active(void) { return sme_me_mask && !sev_enabled; } -EXPORT_SYMBOL_GPL(sme_active); +EXPORT_SYMBOL(sme_active); bool sev_active(void) { return sme_me_mask && sev_enabled; } -EXPORT_SYMBOL_GPL(sev_active); +EXPORT_SYMBOL(sev_active); static const struct dma_map_ops sev_dma_ops = { .alloc = sev_alloc, -- cgit v1.2.3 From b1d030f804fbf8f502756231d475fcf8fd6a86ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 8 Nov 2017 16:13:23 +0000 Subject: mtd: sharpslpart: fix overflow on block_adr calculation Multiplying block_num and mtd->erasesize may potentially overflow as they are both unsigned ints and so the multiplication is evaluated in unsigned int arithmetic. Cast block_adr to off_t to ensure multiplication is off_t sized to avoid any potential overflow. Detected by CoverityScan, CID#1461264 ("Unintentional integer overflow") Signed-off-by: Colin Ian King Acked-by: Andrea Adami Signed-off-by: Boris Brezillon --- drivers/mtd/parsers/sharpslpart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/parsers/sharpslpart.c b/drivers/mtd/parsers/sharpslpart.c index 5fe0079ea5ed..0ddb79ac390d 100644 --- a/drivers/mtd/parsers/sharpslpart.c +++ b/drivers/mtd/parsers/sharpslpart.c @@ -192,7 +192,7 @@ static int sharpsl_nand_init_ftl(struct mtd_info *mtd, struct sharpsl_ftl *ftl) /* create physical-logical table */ for (block_num = 0; block_num < phymax; block_num++) { - block_adr = block_num * mtd->erasesize; + block_adr = (loff_t)block_num * mtd->erasesize; if (mtd_block_isbad(mtd, block_adr)) continue; @@ -244,7 +244,7 @@ static int sharpsl_nand_read_laddr(struct mtd_info *mtd, return -EINVAL; block_num = ftl->log2phy[log_num]; - block_adr = block_num * mtd->erasesize; + block_adr = (loff_t)block_num * mtd->erasesize; block_ofs = mtd_mod_by_eb((u32)from, mtd); err = mtd_read(mtd, block_adr + block_ofs, len, &retlen, buf); -- cgit v1.2.3 From bf29cb238dc0656e6564b6a94bb82e11d2129437 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Dec 2017 19:18:25 +0100 Subject: sched/isolation: Make CONFIG_NO_HZ_FULL select CONFIG_CPU_ISOLATION CONFIG_NO_HZ_FULL doesn't make sense without CONFIG_CPU_ISOLATION. In fact enabling the first without the second is a regression as nohz_full= boot parameter gets silently ignored. Besides this unnatural combination hangs RCU gp kthread when running rcutorture for reasons that are not yet fully understood: rcu_preempt kthread starved for 9974 jiffies! g4294967208 +c4294967207 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x402 ->cpu=0 rcu_preempt I 7464 8 2 0x80000000 Call Trace: __schedule+0x493/0x620 schedule+0x24/0x40 schedule_timeout+0x330/0x3b0 ? preempt_count_sub+0xea/0x140 ? collect_expired_timers+0xb0/0xb0 rcu_gp_kthread+0x6bf/0xef0 This commit therefore makes NO_HZ_FULL select CPU_ISOLATION, which prevents all these bad behaviours. Reported-by: kernel test robot Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: John Stultz Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Fixes: 5c4991e24c69 ("sched/isolation: Split out new CONFIG_CPU_ISOLATION=y config from CONFIG_NO_HZ_FULL") Link: http://lkml.kernel.org/r/1513275507-29200-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e776fc8cc1df..f6b5f19223d6 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -95,6 +95,7 @@ config NO_HZ_FULL select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN select IRQ_WORK + select CPU_ISOLATION help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single -- cgit v1.2.3 From 2c43838c99d9d23f17eb2bdadafcb2879cca6995 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 14 Dec 2017 19:18:26 +0100 Subject: sched/isolation: Enable CONFIG_CPU_ISOLATION=y by default The "isolcpus=" boot parameter support was always built-in before we moved the related code under CONFIG_CPU_ISOLATION. Having it disabled by default is very confusing for people accustomed to use this parameter. So enable it by dafault to keep the previous behaviour but keep it optable for those who want to tinify their kernels. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: John Stultz Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: kernel test robot Link: http://lkml.kernel.org/r/1513275507-29200-3-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- init/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 2934249fba46..690a381adee0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -461,10 +461,14 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" + default y help Make sure that CPUs running critical tasks are not disturbed by any source of "noise" such as unbound workqueues, timers, kthreads... - Unbound jobs get offloaded to housekeeping CPUs. + Unbound jobs get offloaded to housekeeping CPUs. This is driven by + the "isolcpus=" boot parameter. + + Say Y if unsure. source "kernel/rcu/Kconfig" -- cgit v1.2.3 From d94d105329e4a8a874853b5bd854b6587c41adda Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 14 Dec 2017 19:18:27 +0100 Subject: sched/isolation: Document boot parameters dependency on CONFIG_CPU_ISOLATION=y The "isolcpus=" and "nohz_full=" boot parameters depend on CPU Isolation support. Let's document that. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: John Stultz Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Wanpeng Li Cc: kernel test robot Link: http://lkml.kernel.org/r/1513275507-29200-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.rst | 1 + Documentation/admin-guide/kernel-parameters.txt | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b2598cc9834c..7242cbda15dd 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -109,6 +109,7 @@ parameter is applicable:: IPV6 IPv6 support is enabled. ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. + ISOL CPU Isolation is enabled. JOY Appropriate joystick support is enabled. KGDB Kernel debugger support is enabled. KVM Kernel Virtual Machine support is enabled. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..168310707ec2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1737,7 +1737,7 @@ isapnp= [ISAPNP] Format: ,,, - isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. + isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance. [Deprecated - use cpusets instead] Format: [flag-list,] @@ -2662,7 +2662,7 @@ Valid arguments: on, off Default: on - nohz_full= [KNL,BOOT] + nohz_full= [KNL,BOOT,SMP,ISOL] The argument is a cpu list, as described above. In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped -- cgit v1.2.3 From 0c36fc0dfb4c0fa068d077b9e2806ef87d0221a7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 18 Dec 2017 01:00:21 +0100 Subject: mmc: tmio: use ioread* for repeated access to a register Not all archs define reads* and writes*. Switch to ioread*_rep and friends which is defined everywhere, so we can enable COMPILE_TEST after that. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 3e6ff8921440..cd3d7c8d24bf 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -246,7 +246,7 @@ static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, u16 *buf, int count) { - readsw(host->ctl + (addr << host->bus_shift), buf, count); + ioread16_rep(host->ctl + (addr << host->bus_shift), buf, count); } static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, @@ -259,7 +259,7 @@ static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, static inline void sd_ctrl_read32_rep(struct tmio_mmc_host *host, int addr, u32 *buf, int count) { - readsl(host->ctl + (addr << host->bus_shift), buf, count); + ioread32_rep(host->ctl + (addr << host->bus_shift), buf, count); } static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, @@ -276,7 +276,7 @@ static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, u16 *buf, int count) { - writesw(host->ctl + (addr << host->bus_shift), buf, count); + iowrite16_rep(host->ctl + (addr << host->bus_shift), buf, count); } static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, @@ -289,7 +289,7 @@ static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, static inline void sd_ctrl_write32_rep(struct tmio_mmc_host *host, int addr, const u32 *buf, int count) { - writesl(host->ctl + (addr << host->bus_shift), buf, count); + iowrite32_rep(host->ctl + (addr << host->bus_shift), buf, count); } #endif -- cgit v1.2.3 From e578afab6e5f57e7ed22a42d261942b4ac923ffd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:37 +0900 Subject: mmc: renesas_sdhi: remove wrong depends on to enable compile test ARCH_RENESAS is a stronger condition than (ARM || ARM64). If ARCH_RENESAS is enabled, (ARM || ARM64) is met as well. What is worse, the first depends on line prevents COMPILE_TEST from enabling this driver. It should be removed. Signed-off-by: Masahiro Yamada Acked-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index d63a6ba47501..cc4fd07735a7 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -596,7 +596,6 @@ config MMC_TMIO config MMC_SDHI tristate "Renesas SDHI SD/SDIO controller support" - depends on SUPERH || ARM || ARM64 depends on SUPERH || ARCH_RENESAS || COMPILE_TEST select MMC_TMIO_CORE help -- cgit v1.2.3 From 4ce6281791676c134d3ae919edaf76da3cef1d76 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:38 +0900 Subject: mmc: renesas_sdhi: remove eprobe jump label "goto eprobe" does nothing. Return directly. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fcf7235d5742..0590ae06cd7b 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -497,7 +497,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(priv->clk)) { ret = PTR_ERR(priv->clk); dev_err(&pdev->dev, "cannot get clock: %d\n", ret); - goto eprobe; + return ret; } /* @@ -524,10 +524,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, } host = tmio_mmc_host_alloc(pdev); - if (!host) { - ret = -ENOMEM; - goto eprobe; - } + if (!host) + return -ENOMEM; if (of_data) { mmc_data->flags |= of_data->tmio_flags; @@ -652,7 +650,7 @@ eirq: tmio_mmc_host_remove(host); efree: tmio_mmc_host_free(host); -eprobe: + return ret; } EXPORT_SYMBOL_GPL(renesas_sdhi_probe); -- cgit v1.2.3 From a3b05373e0e06dcb04adf2c50b58cd3feb5f8294 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:39 +0900 Subject: mmc: tmio: set tmio_mmc_host to driver data The remove, suspend, resume hooks need to get tmio_mmc_host. It is tedious to call mmc_priv() to convert mmc_host to tmio_mmc_host. We can directly set tmio_mmc_host to driver data. Signed-off-by: Masahiro Yamada Acked-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 3 +-- drivers/mmc/host/tmio_mmc.c | 12 ++++-------- drivers/mmc/host/tmio_mmc_core.c | 8 +++----- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 0590ae06cd7b..9baf4d1791ea 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -657,8 +657,7 @@ EXPORT_SYMBOL_GPL(renesas_sdhi_probe); int renesas_sdhi_remove(struct platform_device *pdev) { - struct mmc_host *mmc = platform_get_drvdata(pdev); - struct tmio_mmc_host *host = mmc_priv(mmc); + struct tmio_mmc_host *host = platform_get_drvdata(pdev); tmio_mmc_host_remove(host); diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 64b7e9f18361..ccfbc154ee5b 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -128,15 +128,11 @@ out: static int tmio_mmc_remove(struct platform_device *pdev) { const struct mfd_cell *cell = mfd_get_cell(pdev); - struct mmc_host *mmc = platform_get_drvdata(pdev); + struct tmio_mmc_host *host = platform_get_drvdata(pdev); - if (mmc) { - struct tmio_mmc_host *host = mmc_priv(mmc); - - tmio_mmc_host_remove(host); - if (cell->disable) - cell->disable(pdev); - } + tmio_mmc_host_remove(host); + if (cell->disable) + cell->disable(pdev); return 0; } diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index d6ca57be16c2..8787a996f6a5 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1193,7 +1193,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, return ret; _host->pdata = pdata; - platform_set_drvdata(pdev, mmc); + platform_set_drvdata(pdev, _host); _host->set_pwr = pdata->set_pwr; _host->set_clk_div = pdata->set_clk_div; @@ -1351,8 +1351,7 @@ EXPORT_SYMBOL_GPL(tmio_mmc_host_remove); #ifdef CONFIG_PM int tmio_mmc_host_runtime_suspend(struct device *dev) { - struct mmc_host *mmc = dev_get_drvdata(dev); - struct tmio_mmc_host *host = mmc_priv(mmc); + struct tmio_mmc_host *host = dev_get_drvdata(dev); tmio_mmc_disable_mmc_irqs(host, TMIO_MASK_ALL); @@ -1372,8 +1371,7 @@ static bool tmio_mmc_can_retune(struct tmio_mmc_host *host) int tmio_mmc_host_runtime_resume(struct device *dev) { - struct mmc_host *mmc = dev_get_drvdata(dev); - struct tmio_mmc_host *host = mmc_priv(mmc); + struct tmio_mmc_host *host = dev_get_drvdata(dev); tmio_mmc_reset(host); tmio_mmc_clk_enable(host); -- cgit v1.2.3 From 4139696b7978d57ec840b6c9293d4709a46af3bd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:40 +0900 Subject: mmc: tmio: use devm_ioremap_resource() instead of devm_ioremap() The TMIO core misses to call request_mem_region(). devm_ioremap_resource() takes care of it and makes the code cleaner. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 8787a996f6a5..151a542b75d5 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1185,8 +1185,9 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, _host->write16_hook = NULL; res_ctl = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res_ctl) - return -EINVAL; + _host->ctl = devm_ioremap_resource(&pdev->dev, res_ctl); + if (IS_ERR(_host->ctl)) + return PTR_ERR(_host->ctl); ret = mmc_of_parse(mmc); if (ret < 0) @@ -1202,11 +1203,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (ret < 0) return ret; - _host->ctl = devm_ioremap(&pdev->dev, - res_ctl->start, resource_size(res_ctl)); - if (!_host->ctl) - return -ENOMEM; - tmio_mmc_ops.card_busy = _host->card_busy; tmio_mmc_ops.start_signal_voltage_switch = _host->start_signal_voltage_switch; -- cgit v1.2.3 From c055fc75c1757b220108489038cfe60496b13865 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:41 +0900 Subject: mmc: tmio: move mmc_host_ops to struct tmio_mmc_host from static data Currently, tmio_mmc_ops is static data and tmio_mmc_host_probe() updates some hooks in the static data. This is a problem when two or more instances call tmio_mmc_host_probe() and each of them requests to use its own card_busy/start_signal_voltage_switch. We can borrow a solution from sdhci_alloc_host(). Copy the whole ops structure to host->mmc_host_ops, then override the hooks in malloc'ed data. Constify tmio_mmc_ops since it is now a template ops used by default. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc.h | 1 + drivers/mmc/host/tmio_mmc_core.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index cd3d7c8d24bf..405547f88421 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -134,6 +134,7 @@ struct tmio_mmc_host { struct mmc_request *mrq; struct mmc_data *data; struct mmc_host *mmc; + struct mmc_host_ops ops; /* Callbacks for clock / power control */ void (*set_pwr)(struct platform_device *host, int state); diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 151a542b75d5..ea5181d7948d 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1098,7 +1098,7 @@ static int tmio_multi_io_quirk(struct mmc_card *card, return blk_size; } -static struct mmc_host_ops tmio_mmc_ops = { +static const struct mmc_host_ops tmio_mmc_ops = { .request = tmio_mmc_request, .set_ios = tmio_mmc_set_ios, .get_ro = tmio_mmc_get_ro, @@ -1158,6 +1158,8 @@ tmio_mmc_host_alloc(struct platform_device *pdev) host = mmc_priv(mmc); host->mmc = mmc; host->pdev = pdev; + host->ops = tmio_mmc_ops; + mmc->ops = &host->ops; return host; } @@ -1203,10 +1205,9 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (ret < 0) return ret; - tmio_mmc_ops.card_busy = _host->card_busy; - tmio_mmc_ops.start_signal_voltage_switch = + _host->ops.card_busy = _host->card_busy; + _host->ops.start_signal_voltage_switch = _host->start_signal_voltage_switch; - mmc->ops = &tmio_mmc_ops; mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; -- cgit v1.2.3 From 2aaa3c5193db9cdfe62201aa4eb4e1007a43fdc8 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:42 +0900 Subject: mmc: tmio, renesas_sdhi: set mmc_host_ops hooks directly Drivers can set any mmc_host_ops hooks between tmio_mmc_host_alloc() and tmio_mmc_host_probe(). Remove duplicated hooks in tmio_mmc_host. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- drivers/mmc/host/tmio_mmc.h | 3 --- drivers/mmc/host/tmio_mmc_core.c | 4 ---- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 9baf4d1791ea..267e2e0077e9 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -549,8 +549,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, /* SDR speeds are only available on Gen2+ */ if (mmc_data->flags & TMIO_MMC_MIN_RCAR2) { /* card_busy caused issues on r8a73a4 (pre-Gen2) CD-less SDHI */ - host->card_busy = renesas_sdhi_card_busy; - host->start_signal_voltage_switch = + host->ops.card_busy = renesas_sdhi_card_busy; + host->ops.start_signal_voltage_switch = renesas_sdhi_start_signal_voltage_switch; } diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 405547f88421..dd40b9631b3a 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -186,9 +186,6 @@ struct tmio_mmc_host { void (*clk_disable)(struct tmio_mmc_host *host); int (*multi_io_quirk)(struct mmc_card *card, unsigned int direction, int blk_size); - int (*card_busy)(struct mmc_host *mmc); - int (*start_signal_voltage_switch)(struct mmc_host *mmc, - struct mmc_ios *ios); int (*write16_hook)(struct tmio_mmc_host *host, int addr); void (*hw_reset)(struct tmio_mmc_host *host); void (*prepare_tuning)(struct tmio_mmc_host *host, unsigned long tap); diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index ea5181d7948d..1abe83b9f568 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1205,10 +1205,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (ret < 0) return ret; - _host->ops.card_busy = _host->card_busy; - _host->ops.start_signal_voltage_switch = - _host->start_signal_voltage_switch; - mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; mmc->max_segs = pdata->max_segs ? : 32; -- cgit v1.2.3 From cd82cd213bfa532ca368e4333ba6a0f14185ef9c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:43 +0900 Subject: mmc: tmio: move mmc_gpio_request_cd() before mmc_add_host() Drivers do not need to call mmc_gpiod_request_cd_irq() explicitly because mmc_start_host() calls it. To make it work, cd_gpio must be set before mmc_add_host(). Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 1abe83b9f568..6b18c0509ff4 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1205,6 +1205,12 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (ret < 0) return ret; + if (pdata->flags & TMIO_MMC_USE_GPIO_CD) { + ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio, 0); + if (ret) + return ret; + } + mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; mmc->max_segs = pdata->max_segs ? : 32; @@ -1300,14 +1306,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, dev_pm_qos_expose_latency_limit(&pdev->dev, 100); - if (pdata->flags & TMIO_MMC_USE_GPIO_CD) { - ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio, 0); - if (ret) - goto remove_host; - - mmc_gpiod_request_cd_irq(mmc); - } - return 0; remove_host: -- cgit v1.2.3 From b4fcb5e5094b5f13e5c9aadea82cdbd1a4f0dd02 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 22:27:03 +0530 Subject: mmc: android-goldfish: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Signed-off-by: Ulf Hansson --- drivers/mmc/host/android-goldfish.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index 63fe5091ca59..63d27589cd89 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -42,13 +42,11 @@ #include #include #include -#include #include #include #include -#include #include #define DRIVER_NAME "goldfish_mmc" -- cgit v1.2.3 From de8dcc3d2c0e08e5068ee1e26fc46415c15e3637 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 12 Dec 2017 10:49:02 +0000 Subject: mmc: avoid removing non-removable hosts during suspend The Weibu F3C MiniPC has an onboard AP6255 module, presenting two SDIO functions on a single MMC host (Bluetooth/btsdio and WiFi/brcmfmac), and the mmc layer correctly detects this as non-removable. After suspend/resume, the wifi and bluetooth interfaces disappear and do not get probed again. The conditions here are: 1. During suspend, we reach mmc_pm_notify() 2. mmc_pm_notify() calls mmc_sdio_pre_suspend() to see if we can suspend the SDIO host. However, mmc_sdio_pre_suspend() returns -ENOSYS because btsdio_driver does not have a suspend method. 3. mmc_pm_notify() proceeds to remove the card 4. Upon resume, mmc_rescan() does nothing with this host, because of the rescan_entered check which aims to only scan a non-removable device a single time (i.e. during boot). Fix the loss of functionality by detecting that we are unable to suspend a non-removable host, so avoid the forced removal in that case. The comment above this function already indicates that this code was only intended for removable devices. Signed-off-by: Daniel Drake Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index fd64e6d425e5..c0ba6d8823b7 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2761,6 +2761,14 @@ static int mmc_pm_notify(struct notifier_block *notify_block, if (!err) break; + if (!mmc_card_is_removable(host)) { + dev_warn(mmc_dev(host), + "pre_suspend failed for non-removable host: " + "%d\n", err); + /* Avoid removing non-removable hosts */ + break; + } + /* Calling bus_ops->remove() with a claimed host can deadlock */ host->bus_ops->remove(host); mmc_claim_host(host); -- cgit v1.2.3 From 0be55579a127916ebe39db2a74d906a2dfceed42 Mon Sep 17 00:00:00 2001 From: "Liu, Changcheng" Date: Sat, 16 Dec 2017 23:15:45 +0800 Subject: mmc: block: fix logical error to avoid memory leak If the MMC_DRV_OP_GET_EXT_CSD request completes successfully, then ext_csd must be freed, but in one case it was not. Fix that. Signed-off-by: Liu Changcheng Acked-by: Adrian Hunter Acked-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 654fc1ebd675..20135a5de748 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2795,6 +2795,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp) if (n != EXT_CSD_STR_LEN) { err = -EINVAL; + kfree(ext_csd); goto out_free; } -- cgit v1.2.3 From 869b5567e12f63ea7407f81728ca87f8c0abbfdb Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 14 Nov 2017 06:53:32 -0700 Subject: vmbus: unregister device_obj->channels_kset Without the patch, a device can't be thoroughly destroyed, because vmbus_device_register() -> kset_create_and_add() still holds a reference to the hv_device's device.kobj. Signed-off-by: Dexuan Cui Cc: Stephen Hemminger Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info") Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 76ed9a216f10..610223f0e945 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj) pr_debug("child device %s unregistered\n", dev_name(&device_obj->device)); + kset_unregister(device_obj->channels_kset); + /* * Kick off the process of unregistering the device. * This will call vmbus_remove() and eventually vmbus_device_release() -- cgit v1.2.3 From 7f3dc0088b98533f17128058fac73cd8b2752ef1 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 27 Nov 2017 09:32:33 -0800 Subject: binder: fix proc->files use-after-free proc->files cleanup is initiated by binder_vma_close. Therefore a reference on the binder_proc is not enough to prevent the files_struct from being released while the binder_proc still has a reference. This can lead to an attempt to dereference the stale pointer obtained from proc->files prior to proc->files cleanup. This has been seen once in task_get_unused_fd_flags() when __alloc_fd() is called with a stale "files". The fix is to protect proc->files with a mutex to prevent cleanup while in use. Signed-off-by: Todd Kjos Cc: stable # 4.14 Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bccec9de0533..a7ecfde66b7b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -482,7 +482,8 @@ enum binder_deferred_state { * @tsk task_struct for group_leader of process * (invariant after initialized) * @files files_struct for process - * (invariant after initialized) + * (protected by @files_lock) + * @files_lock mutex to protect @files * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -530,6 +531,7 @@ struct binder_proc { int pid; struct task_struct *tsk; struct files_struct *files; + struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { - struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; - if (files == NULL) - return -ESRCH; - - if (!lock_task_sighand(proc->tsk, &irqs)) - return -EMFILE; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + ret = -ESRCH; + goto err; + } + if (!lock_task_sighand(proc->tsk, &irqs)) { + ret = -EMFILE; + goto err; + } rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + ret = __alloc_fd(proc->files, 0, rlim_cur, flags); +err: + mutex_unlock(&proc->files_lock); + return ret; } /* @@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { + mutex_lock(&proc->files_lock); if (proc->files) __fd_install(proc->files, fd, file); + mutex_unlock(&proc->files_lock); } /* @@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { int retval; - if (proc->files == NULL) - return -ESRCH; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + retval = -ESRCH; + goto err; + } retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; - +err: + mutex_unlock(&proc->files_lock); return retval; } @@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) ret = binder_alloc_mmap_handler(&proc->alloc, vma); if (ret) return ret; + mutex_lock(&proc->files_lock); proc->files = get_files_struct(current); + mutex_unlock(&proc->files_lock); return 0; err_bad_arg: @@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + mutex_init(&proc->files_lock); INIT_LIST_HEAD(&proc->todo); proc->default_priority = task_nice(current); binder_dev = container_of(filp->private_data, struct binder_device, @@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work) files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { + mutex_lock(&proc->files_lock); files = proc->files; if (files) proc->files = NULL; + mutex_unlock(&proc->files_lock); } if (defer & BINDER_DEFERRED_FLUSH) -- cgit v1.2.3 From 5cfee7a357f60675cae32b494bb2096d7203efd3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Nov 2017 11:27:37 +0100 Subject: perf tools: Use shell function for perl cflags retrieval Using the shell function for perl CFLAGS retrieval instead of back quotes (``). Both execute shell with the command, but the latter is more explicit and seems to be the preferred way. Also we don't have any other use of the back quotes in perf Makefiles. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171108102739.30338-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ed65e82f034e..710623ddb8af 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -583,7 +583,7 @@ else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3 From 61fb26a6a23c0f1a07a0f8a11b54bafb1ac2398b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Dec 2017 12:23:08 -0300 Subject: perf tools: Fix up build in hardened environments On Fedora systems the perl and python CFLAGS/LDFLAGS include the hardened specs from redhat-rpm-config package. We apply them only for perl/python objects, which makes them not compatible with the rest of the objects and the build fails with: /usr/bin/ld: perf-in.o: relocation R_X86_64_32 against `.rodata.str1.1' can not be used when making a shared object; recompile with -f +PIC /usr/bin/ld: libperf.a(libperf-in.o): relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile w +ith -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status make[2]: *** [Makefile.perf:507: perf] Error 1 make[1]: *** [Makefile.perf:210: sub-make] Error 2 make: *** [Makefile:69: all] Error 2 Mainly it's caused by perl/python objects being compiled with: -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 which prevent the final link impossible, because it will check for 'proper' objects with following option: -specs=/usr/lib/rpm/redhat/redhat-hardened-ld Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20171204082437.GC30564@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 710623ddb8af..0294bfb6c5f8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC_NO_CLANG), 1) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -576,7 +574,6 @@ ifndef NO_GTK2 endif endif - ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else @@ -584,6 +581,8 @@ else PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3 From ca58d7e64bdfc54f7dfe46713c1e2acc68d7522d Mon Sep 17 00:00:00 2001 From: Ben Gainey Date: Wed, 22 Nov 2017 18:25:41 -0600 Subject: perf jvmti: Generate correct debug information for inlined code tools/perf/jvmti is broken in so far as it generates incorrect debug information. Specifically it attributes all debug lines to the original method being output even in the case that some code is being inlined from elsewhere. This patch fixes the issue. To test (from within linux/tools/perf): export JDIR=/usr/lib/jvm/java-8-openjdk-amd64/ make cat << __EOF > Test.java public class Test { private StringBuilder b = new StringBuilder(); private void loop(int i, String... args) { for (String a : args) b.append(a); long hc = b.hashCode() * System.nanoTime(); b = new StringBuilder(); b.append(hc); System.out.printf("Iteration %d = %d\n", i, hc); } public void run(String... args) { for (int i = 0; i < 10000; ++i) { loop(i, args); } } public static void main(String... args) { Test t = new Test(); t.run(args); } } __EOF $JDIR/bin/javac Test.java ./perf record -F 10000 -g -k mono $JDIR/bin/java -agentpath:`pwd`/libperf-jvmti.so Test ./perf inject --jit -i perf.data -o perf.data.jitted ./perf annotate -i perf.data.jitted --stdio | grep Test\.java: | sort -u Before this patch, Test.java line numbers get reported that are greater than the number of lines in the Test.java file. They come from the source file of the inlined function, e.g. java/lang/String.java:1085. For further validation one can examine those lines in the JDK source distribution and confirm that they map to inlined functions called by Test.java. After this patch, the filename of the inlined function is output rather than the incorrect original source filename. Signed-off-by: Ben Gainey Tested-by: Arnaldo Carvalho de Melo Tested-by: Stephane Eranian Cc: Alexander Shishkin Cc: Ben Gainey Cc: Colin King Cc: Darren Hart Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 598b7c6919c7 ("perf jit: add source line info support") Link: http://lkml.kernel.org/r/20171122182541.d25599a3eb1ada3480d142fa@arm.com Signed-off-by: Kim Phillips Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/jvmti/jvmti_agent.c | 16 +++-- tools/perf/jvmti/jvmti_agent.h | 7 +- tools/perf/jvmti/libjvmti.c | 147 ++++++++++++++++++++++++++++++++++------- 3 files changed, 134 insertions(+), 36 deletions(-) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index cf36de7ea255..0c6d1002b524 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym, } int -jvmti_write_debug_info(void *agent, uint64_t code, const char *file, - jvmti_line_info_t *li, int nr_lines) +jvmti_write_debug_info(void *agent, uint64_t code, + int nr_lines, jvmti_line_info_t *li, + const char * const * file_names) { struct jr_code_debug_info rec; - size_t sret, len, size, flen; + size_t sret, len, size, flen = 0; uint64_t addr; - const char *fn = file; FILE *fp = agent; int i; @@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, return -1; } - flen = strlen(file) + 1; + for (i = 0; i < nr_lines; ++i) { + flen += strlen(file_names[i]) + 1; + } rec.p.id = JIT_CODE_DEBUG_INFO; size = sizeof(rec); @@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, * file[] : source file name */ size += nr_lines * sizeof(struct debug_entry); - size += flen * nr_lines; + size += flen; rec.p.total_size = size; /* @@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file, if (sret != 1) goto error; - sret = fwrite_unlocked(fn, flen, 1, fp); + sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp); if (sret != 1) goto error; } diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h index fe32d8344a82..6ed82f6c06dd 100644 --- a/tools/perf/jvmti/jvmti_agent.h +++ b/tools/perf/jvmti/jvmti_agent.h @@ -14,6 +14,7 @@ typedef struct { unsigned long pc; int line_number; int discrim; /* discriminator -- 0 for now */ + jmethodID methodID; } jvmti_line_info_t; void *jvmti_open(void); @@ -22,11 +23,9 @@ int jvmti_write_code(void *agent, char const *symbol_name, uint64_t vma, void const *code, const unsigned int code_size); -int jvmti_write_debug_info(void *agent, - uint64_t code, - const char *file, +int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines, jvmti_line_info_t *li, - int nr_lines); + const char * const * file_names); #if defined(__cplusplus) } diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c index c62c9fc9a525..6add3e982614 100644 --- a/tools/perf/jvmti/libjvmti.c +++ b/tools/perf/jvmti/libjvmti.c @@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, tab[lines].pc = (unsigned long)pc; tab[lines].line_number = loc_tab[i].line_number; tab[lines].discrim = 0; /* not yet used */ + tab[lines].methodID = m; lines++; } else { break; @@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t ** return JVMTI_ERROR_NONE; } +static void +copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length) +{ + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + result[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++) + result[i] = file_name[j]; + + result[i] = '\0'; + } else { + /* fallback case */ + size_t file_name_len = strlen(file_name); + strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length); + } +} + +static jvmtiError +get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer) +{ + jvmtiError ret; + jclass decl_class; + char *file_name = NULL; + char *class_sign = NULL; + char fn[PATH_MAX]; + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetMethodDeclaringClass", ret); + return ret; + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetSourceFileName", ret); + return ret; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + print_error(jvmti, "GetClassSignature", ret); + goto free_file_name_error; + } + + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + len = strlen(fn); + *buffer = malloc((len + 1) * sizeof(char)); + if (!*buffer) { + print_error(jvmti, "GetClassSignature", ret); + ret = JVMTI_ERROR_OUT_OF_MEMORY; + goto free_class_sign_error; + } + strcpy(*buffer, fn); + ret = JVMTI_ERROR_NONE; + +free_class_sign_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); +free_file_name_error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + + return ret; +} + +static jvmtiError +fill_source_filenames(jvmtiEnv *jvmti, int nr_lines, + const jvmti_line_info_t * line_tab, + char ** file_names) +{ + int index; + jvmtiError ret; + + for (index = 0; index < nr_lines; ++index) { + ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index])); + if (ret != JVMTI_ERROR_NONE) + return ret; + } + + return JVMTI_ERROR_NONE; +} + static void JNICALL compiled_method_load_cb(jvmtiEnv *jvmti, jmethodID method, @@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti, const void *compile_info) { jvmti_line_info_t *line_tab = NULL; + char ** line_file_names = NULL; jclass decl_class; char *class_sign = NULL; char *func_name = NULL; char *func_sign = NULL; - char *file_name= NULL; + char *file_name = NULL; char fn[PATH_MAX]; uint64_t addr = (uint64_t)(uintptr_t)code_addr; jvmtiError ret; int nr_lines = 0; /* in line_tab[] */ size_t len; + int output_debug_info = 0; ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, &decl_class); @@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti, if (ret != JVMTI_ERROR_NONE) { warnx("jvmti: cannot get line table for method"); nr_lines = 0; + } else if (nr_lines > 0) { + line_file_names = malloc(sizeof(char*) * nr_lines); + if (!line_file_names) { + warnx("jvmti: cannot allocate space for line table method names"); + } else { + memset(line_file_names, 0, sizeof(char*) * nr_lines); + ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: fill_source_filenames failed"); + } else { + output_debug_info = 1; + } + } } } @@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti, goto error; } - /* - * Assume path name is class hierarchy, this is a common practice with Java programs - */ - if (*class_sign == 'L') { - int j, i = 0; - char *p = strrchr(class_sign, '/'); - if (p) { - /* drop the 'L' prefix and copy up to the final '/' */ - for (i = 0; i < (p - class_sign); i++) - fn[i] = class_sign[i+1]; - } - /* - * append file name, we use loops and not string ops to avoid modifying - * class_sign which is used later for the symbol name - */ - for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) - fn[i] = file_name[j]; - fn[i] = '\0'; - } else { - /* fallback case */ - strcpy(fn, file_name); - } + copy_class_filename(class_sign, file_name, fn, PATH_MAX); + /* * write source line info record if we have it */ - if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) - warnx("jvmti: write_debug_info() failed"); + if (output_debug_info) + if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names)) + warnx("jvmti: write_debug_info() failed"); len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; { @@ -223,6 +313,13 @@ error: (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); free(line_tab); + while (line_file_names && (nr_lines > 0)) { + if (line_file_names[nr_lines - 1]) { + free(line_file_names[nr_lines - 1]); + } + nr_lines -= 1; + } + free(line_file_names); } static void JNICALL -- cgit v1.2.3 From 10b9baa701d5023897f70a4acb3bf0235da3dc4f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Nov 2017 11:08:41 -0300 Subject: tools arch s390: Do not include header files from the kernel sources Long ago we decided to be verbotten including files in the kernel git sources from tools/ living source code, to avoid disturbing kernel development (and perf's and other tools/) when, say, a kernel hacker adds something, tests everything but tools/ and have tools/ build broken. This got broken recently by s/390, fix it by copying arch/s390/include/uapi/asm/perf_regs.h to tools/arch/s390/include/uapi/asm/, making this one be used by means of and updating tools/perf/check_headers.sh to make sure we are notified when the original changes, so that we can check if anything is needed on the tooling side. This would have been caught by the 'tarkpg' test entry in: $ make -C tools/perf build-test When run on a s/390 build system or container. Acked-by: Heiko Carstens Cc: Hendrik Brueckner Cc: Thomas Richter Cc: Martin Schwidefsky Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: f704ef44602f ("s390/perf: add support for perf_regs and libdw") Link: https://lkml.kernel.org/n/tip-n57139ic0v9uffx8wdqi3d8a@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/perf_regs.h | 44 ++++++++++++++++++++++++++++ tools/perf/arch/s390/include/perf_regs.h | 2 +- tools/perf/check-headers.sh | 1 + 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tools/arch/s390/include/uapi/asm/perf_regs.h diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..d17dd9e5d516 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/perf_regs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_S390_PERF_REGS_H +#define _ASM_S390_PERF_REGS_H + +enum perf_event_s390_regs { + PERF_REG_S390_R0, + PERF_REG_S390_R1, + PERF_REG_S390_R2, + PERF_REG_S390_R3, + PERF_REG_S390_R4, + PERF_REG_S390_R5, + PERF_REG_S390_R6, + PERF_REG_S390_R7, + PERF_REG_S390_R8, + PERF_REG_S390_R9, + PERF_REG_S390_R10, + PERF_REG_S390_R11, + PERF_REG_S390_R12, + PERF_REG_S390_R13, + PERF_REG_S390_R14, + PERF_REG_S390_R15, + PERF_REG_S390_FP0, + PERF_REG_S390_FP1, + PERF_REG_S390_FP2, + PERF_REG_S390_FP3, + PERF_REG_S390_FP4, + PERF_REG_S390_FP5, + PERF_REG_S390_FP6, + PERF_REG_S390_FP7, + PERF_REG_S390_FP8, + PERF_REG_S390_FP9, + PERF_REG_S390_FP10, + PERF_REG_S390_FP11, + PERF_REG_S390_FP12, + PERF_REG_S390_FP13, + PERF_REG_S390_FP14, + PERF_REG_S390_FP15, + PERF_REG_S390_MASK, + PERF_REG_S390_PC, + + PERF_REG_S390_MAX +}; + +#endif /* _ASM_S390_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index d2df54a6bc5a..bcfbaed78cc2 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -3,7 +3,7 @@ #include #include -#include <../../../../arch/s390/include/uapi/asm/perf_regs.h> +#include void perf_regs_load(u64 *regs); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 6db9d809fe97..3e64f10b6d66 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h +arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm_perf.h -- cgit v1.2.3 From ca26cffa4e4aaeb09bb9e308f95c7835cb149248 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 4 Dec 2017 13:08:47 -0300 Subject: x86/asm: Allow again using asm.h when building for the 'bpf' clang target Up to f5caf621ee35 ("x86/asm: Fix inline asm call constraints for Clang") we were able to use x86 headers to build to the 'bpf' clang target, as done by the BPF code in tools/perf/. With that commit, we ended up with following failure for 'perf test LLVM', this is because "clang ... -target bpf ..." fails since 4.0 does not have bpf inline asm support and 6.0 does not recognize the register 'esp', fix it by guarding that part with an #ifndef __BPF__, that is defined by clang when building to the "bpf" target. # perf test -v LLVM 37: LLVM search and compile : 37.1: Basic BPF llvm compile : --- start --- test child forked, pid 25526 Kernel build dir is set to /lib/modules/4.14.0+/build set env: KBUILD_DIR=/lib/modules/4.14.0+/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40e00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.14.0+/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-example.c * Test basic LLVM building */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define BPF_ANY 0 #define BPF_MAP_TYPE_ARRAY 2 #define BPF_FUNC_map_lookup_elem 1 #define BPF_FUNC_map_update_elem 2 static void *(*bpf_map_lookup_elem)(void *map, void *key) = (void *) BPF_FUNC_map_lookup_elem; static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = (void *) BPF_FUNC_map_update_elem; struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def SEC("maps") flip_table = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=SyS_epoll_wait") int bpf_func__SyS_epoll_wait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); int new_flag; if (!flag) return 0; /* flip flag and store back */ new_flag = !*flag; bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY); return new_flag; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; ' | $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o - test child finished with 0 ---- end ---- LLVM search and compile subtest 0: Ok 37.2: kbuild searching : --- start --- test child forked, pid 25950 Kernel build dir is set to /lib/modules/4.14.0+/build set env: KBUILD_DIR=/lib/modules/4.14.0+/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40e00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.14.0+/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-test-kbuild.c * Test include from kernel header */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define SEC(NAME) __attribute__((section(NAME), used)) #include #include SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) { return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; ' | $CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS -DLINUX_VERSION_CODE=$LINUX_VERSION_CODE $CLANG_OPTIONS $KERNEL_INC_OPTIONS -Wno-unused-value -Wno-pointer-sign -working-directory $WORKING_DIR -c "$CLANG_SOURCE" -target bpf -O2 -o - In file included from :12: In file included from /home/acme/git/linux/arch/x86/include/uapi/asm/ptrace.h:5: In file included from /home/acme/git/linux/include/linux/compiler.h:242: In file included from /home/acme/git/linux/arch/x86/include/asm/barrier.h:5: In file included from /home/acme/git/linux/arch/x86/include/asm/alternative.h:10: /home/acme/git/linux/arch/x86/include/asm/asm.h:145:50: error: unknown register name 'esp' in asm register unsigned long current_stack_pointer asm(_ASM_SP); ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:44:18: note: expanded from macro '_ASM_SP' #define _ASM_SP __ASM_REG(sp) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:27:32: note: expanded from macro '__ASM_REG' #define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:18:29: note: expanded from macro '__ASM_SEL_RAW' # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) ^ /home/acme/git/linux/arch/x86/include/asm/asm.h:11:32: note: expanded from macro '__ASM_FORM_RAW' # define __ASM_FORM_RAW(x) #x ^ :4:1: note: expanded from here "esp" ^ 1 error generated. ERROR: unable to compile - Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c - with proper -I and -D options. Failed to compile test case: 'kbuild searching' test child finished with -1 ---- end ---- LLVM search and compile subtest 1: FAILED! Cc: Adrian Hunter Cc: Alexander Potapenko Cc: Alexei Starovoitov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Daniel Borkmann Cc: David Ahern Cc: Dmitriy Vyukov Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Wang Nan Cc: Yonghong Song Link: https://lkml.kernel.org/r/20171128175948.GL3298@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/include/asm/asm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..386a6900e206 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,6 +136,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -145,5 +146,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ -- cgit v1.2.3 From 8b5c51a45c39c6d4f05c6f87d05f54b6c28f7c36 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Dec 2017 07:03:49 +0100 Subject: mtd: onenand: samsung: use devm_ function to simplify code and fix some leaks Convert all error handling code in 's3c_onenand_probe()' to resource-managed alternatives in order to simplify code. This fixes a resource leak if 'platform_get_resource()' fails at line 872. The 'request_irq()' at line 971 was also un-balanced. It is now resource-managed. Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/samsung.c | 165 ++++++++---------------------------------- 1 file changed, 30 insertions(+), 135 deletions(-) diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index af0ac1a7bf8f..bad59f5cc56d 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -129,16 +129,13 @@ struct s3c_onenand { struct platform_device *pdev; enum soc_type type; void __iomem *base; - struct resource *base_res; void __iomem *ahb_addr; - struct resource *ahb_res; int bootram_command; void __iomem *page_buf; void __iomem *oob_buf; unsigned int (*mem_addr)(int fba, int fpa, int fsa); unsigned int (*cmd_map)(unsigned int type, unsigned int val); void __iomem *dma_addr; - struct resource *dma_res; unsigned long phys_base; struct completion complete; }; @@ -851,15 +848,14 @@ static int s3c_onenand_probe(struct platform_device *pdev) /* No need to check pdata. the platform data is optional */ size = sizeof(struct mtd_info) + sizeof(struct onenand_chip); - mtd = kzalloc(size, GFP_KERNEL); + mtd = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); if (!mtd) return -ENOMEM; - onenand = kzalloc(sizeof(struct s3c_onenand), GFP_KERNEL); - if (!onenand) { - err = -ENOMEM; - goto onenand_fail; - } + onenand = devm_kzalloc(&pdev->dev, sizeof(struct s3c_onenand), + GFP_KERNEL); + if (!onenand) + return -ENOMEM; this = (struct onenand_chip *) &mtd[1]; mtd->priv = this; @@ -870,26 +866,12 @@ static int s3c_onenand_probe(struct platform_device *pdev) s3c_onenand_setup(mtd); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!r) { - dev_err(&pdev->dev, "no memory resource defined\n"); - return -ENOENT; - goto ahb_resource_failed; - } + onenand->base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(onenand->base)) + return PTR_ERR(onenand->base); - onenand->base_res = request_mem_region(r->start, resource_size(r), - pdev->name); - if (!onenand->base_res) { - dev_err(&pdev->dev, "failed to request memory resource\n"); - err = -EBUSY; - goto resource_failed; - } + onenand->phys_base = r->start; - onenand->base = ioremap(r->start, resource_size(r)); - if (!onenand->base) { - dev_err(&pdev->dev, "failed to map memory resource\n"); - err = -EFAULT; - goto ioremap_failed; - } /* Set onenand_chip also */ this->base = onenand->base; @@ -898,40 +880,20 @@ static int s3c_onenand_probe(struct platform_device *pdev) if (onenand->type != TYPE_S5PC110) { r = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r) { - dev_err(&pdev->dev, "no buffer memory resource defined\n"); - err = -ENOENT; - goto ahb_resource_failed; - } - - onenand->ahb_res = request_mem_region(r->start, resource_size(r), - pdev->name); - if (!onenand->ahb_res) { - dev_err(&pdev->dev, "failed to request buffer memory resource\n"); - err = -EBUSY; - goto ahb_resource_failed; - } - - onenand->ahb_addr = ioremap(r->start, resource_size(r)); - if (!onenand->ahb_addr) { - dev_err(&pdev->dev, "failed to map buffer memory resource\n"); - err = -EINVAL; - goto ahb_ioremap_failed; - } + onenand->ahb_addr = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(onenand->ahb_addr)) + return PTR_ERR(onenand->ahb_addr); /* Allocate 4KiB BufferRAM */ - onenand->page_buf = kzalloc(SZ_4K, GFP_KERNEL); - if (!onenand->page_buf) { - err = -ENOMEM; - goto page_buf_fail; - } + onenand->page_buf = devm_kzalloc(&pdev->dev, SZ_4K, + GFP_KERNEL); + if (!onenand->page_buf) + return -ENOMEM; /* Allocate 128 SpareRAM */ - onenand->oob_buf = kzalloc(128, GFP_KERNEL); - if (!onenand->oob_buf) { - err = -ENOMEM; - goto oob_buf_fail; - } + onenand->oob_buf = devm_kzalloc(&pdev->dev, 128, GFP_KERNEL); + if (!onenand->oob_buf) + return -ENOMEM; /* S3C doesn't handle subpage write */ mtd->subpage_sft = 0; @@ -939,28 +901,9 @@ static int s3c_onenand_probe(struct platform_device *pdev) } else { /* S5PC110 */ r = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!r) { - dev_err(&pdev->dev, "no dma memory resource defined\n"); - err = -ENOENT; - goto dma_resource_failed; - } - - onenand->dma_res = request_mem_region(r->start, resource_size(r), - pdev->name); - if (!onenand->dma_res) { - dev_err(&pdev->dev, "failed to request dma memory resource\n"); - err = -EBUSY; - goto dma_resource_failed; - } - - onenand->dma_addr = ioremap(r->start, resource_size(r)); - if (!onenand->dma_addr) { - dev_err(&pdev->dev, "failed to map dma memory resource\n"); - err = -EINVAL; - goto dma_ioremap_failed; - } - - onenand->phys_base = onenand->base_res->start; + onenand->dma_addr = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(onenand->dma_addr)) + return PTR_ERR(onenand->dma_addr); s5pc110_dma_ops = s5pc110_dma_poll; /* Interrupt support */ @@ -968,19 +911,19 @@ static int s3c_onenand_probe(struct platform_device *pdev) if (r) { init_completion(&onenand->complete); s5pc110_dma_ops = s5pc110_dma_irq; - err = request_irq(r->start, s5pc110_onenand_irq, - IRQF_SHARED, "onenand", &onenand); + err = devm_request_irq(&pdev->dev, r->start, + s5pc110_onenand_irq, + IRQF_SHARED, "onenand", + &onenand); if (err) { dev_err(&pdev->dev, "failed to get irq\n"); - goto scan_failed; + return err; } } } - if (onenand_scan(mtd, 1)) { - err = -EFAULT; - goto scan_failed; - } + if (onenand_scan(mtd, 1)) + return -EFAULT; if (onenand->type != TYPE_S5PC110) { /* S3C doesn't handle subpage write */ @@ -998,36 +941,6 @@ static int s3c_onenand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mtd); return 0; - -scan_failed: - if (onenand->dma_addr) - iounmap(onenand->dma_addr); -dma_ioremap_failed: - if (onenand->dma_res) - release_mem_region(onenand->dma_res->start, - resource_size(onenand->dma_res)); - kfree(onenand->oob_buf); -oob_buf_fail: - kfree(onenand->page_buf); -page_buf_fail: - if (onenand->ahb_addr) - iounmap(onenand->ahb_addr); -ahb_ioremap_failed: - if (onenand->ahb_res) - release_mem_region(onenand->ahb_res->start, - resource_size(onenand->ahb_res)); -dma_resource_failed: -ahb_resource_failed: - iounmap(onenand->base); -ioremap_failed: - if (onenand->base_res) - release_mem_region(onenand->base_res->start, - resource_size(onenand->base_res)); -resource_failed: - kfree(onenand); -onenand_fail: - kfree(mtd); - return err; } static int s3c_onenand_remove(struct platform_device *pdev) @@ -1035,25 +948,7 @@ static int s3c_onenand_remove(struct platform_device *pdev) struct mtd_info *mtd = platform_get_drvdata(pdev); onenand_release(mtd); - if (onenand->ahb_addr) - iounmap(onenand->ahb_addr); - if (onenand->ahb_res) - release_mem_region(onenand->ahb_res->start, - resource_size(onenand->ahb_res)); - if (onenand->dma_addr) - iounmap(onenand->dma_addr); - if (onenand->dma_res) - release_mem_region(onenand->dma_res->start, - resource_size(onenand->dma_res)); - - iounmap(onenand->base); - release_mem_region(onenand->base_res->start, - resource_size(onenand->base_res)); - - kfree(onenand->oob_buf); - kfree(onenand->page_buf); - kfree(onenand); - kfree(mtd); + return 0; } -- cgit v1.2.3 From ad99be47726ccb0bcbc4b0fac12c20cba78e4c0d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Dec 2017 07:03:50 +0100 Subject: mtd: onenand: samsung: return an error if 'mtd_device_parse_register()' fails If 'mtd_device_parse_register()' fails, we still return 0 which mean success. Return the error code instead, as done in all the other error handling paths. Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/samsung.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index bad59f5cc56d..c5416d6ad571 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -937,6 +937,11 @@ static int s3c_onenand_probe(struct platform_device *pdev) err = mtd_device_parse_register(mtd, NULL, NULL, pdata ? pdata->parts : NULL, pdata ? pdata->nr_parts : 0); + if (err) { + dev_err(&pdev->dev, "failed to parse partitions and register the MTD device\n"); + onenand_release(mtd); + return err; + } platform_set_drvdata(pdev, mtd); -- cgit v1.2.3 From 0598344df0f093eba3c6e0af3caa934929c4b3aa Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Dec 2017 07:03:51 +0100 Subject: mtd: onenand: samsung: Propagate the error returned by 'onenand_scan()' Propagate the error code returned by 'onenand_scan()' instead of a hard-coded -EFAULT. Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/samsung.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index c5416d6ad571..7e84a1bb91f3 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -922,8 +922,9 @@ static int s3c_onenand_probe(struct platform_device *pdev) } } - if (onenand_scan(mtd, 1)) - return -EFAULT; + err = onenand_scan(mtd, 1); + if (err) + return err; if (onenand->type != TYPE_S5PC110) { /* S3C doesn't handle subpage write */ -- cgit v1.2.3 From 24f648c2ba5d4492ce6c923c9cf3fdb3c6d821bd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Dec 2017 07:03:52 +0100 Subject: mtd: onenand: samsung: Remove a useless include This include is not needed, so remove it. Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/samsung.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index 7e84a1bb91f3..dfdfb478ba35 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -25,8 +25,6 @@ #include #include -#include - #include "samsung.h" enum soc_type { -- cgit v1.2.3 From 234833991e14681f61cbfd93e65a5c976089cf11 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 18 Dec 2017 17:34:16 +0100 Subject: tipc: fix lost member events bug Group messages are not supposed to be returned to sender when the destination socket disappears. This is done correctly for regular traffic messages, by setting the 'dest_droppable' bit in the header. But we forget to do that in group protocol messages. This has the effect that such messages may sometimes bounce back to the sender, be perceived as a legitimate peer message, and wreak general havoc for the rest of the session. In particular, we have seen that a member in state LEAVING may go back to state RECLAIMED or REMITTED, hence causing suppression of an otherwise expected 'member down' event to the user. We fix this by setting the 'dest_droppable' bit even in group protocol messages. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/group.c b/net/tipc/group.c index 95fec2c057d6..efb5714e7a85 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -648,6 +648,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, } else if (mtyp == GRP_REMIT_MSG) { msg_set_grp_remitted(hdr, m->window); } + msg_set_dest_droppable(hdr, true); __skb_queue_tail(xmitq, skb); } -- cgit v1.2.3 From 3f42f5fe31c8715a34064bfd7b788488d1ea2f7c Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 18 Dec 2017 18:13:34 +0100 Subject: tipc: remove leaving group member from all lists A group member going into state LEAVING should never go back to any other state before it is finally deleted. However, this might happen if the socket needs to send out a RECLAIM message during this interval. Since we forget to remove the leaving member from the group's 'active' or 'pending' list, the member might be selected for reclaiming, change state to RECLAIMING, and get stuck in this state instead of being deleted. This might lead to suppression of the expected 'member down' event to the receiver. We fix this by removing the member from all lists, except the RB tree, at the moment it goes into state LEAVING. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index efb5714e7a85..b96ec429bb9b 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -699,6 +699,9 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); + list_del_init(&m->list); + list_del_init(&m->congested); + *usr_wakeup = true; /* Wait until WITHDRAW event is received */ if (m->state != MBR_LEAVING) { @@ -710,8 +713,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, ehdr = buf_msg(m->event_msg); msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); - *usr_wakeup = true; - list_del_init(&m->congested); return; case GRP_ADV_MSG: if (!m) @@ -863,6 +864,7 @@ void tipc_group_member_evt(struct tipc_group *grp, msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); __skb_queue_tail(inputq, skb); } + list_del_init(&m->list); list_del_init(&m->congested); } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); -- cgit v1.2.3 From c505873eaece2b4aefd07d339dc7e1400e0235ac Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Mon, 18 Dec 2017 10:26:43 +0800 Subject: net: phy: marvell: Limit 88m1101 autoneg errata to 88E1145 as well. 88E1145 also need this autoneg errata. Fixes: f2899788353c ("net: phy: marvell: Limit errata to 88m1101") Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b5a8f750e433..26c9a11220ca 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2073,7 +2073,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1145_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1101_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, -- cgit v1.2.3 From ac3241d5c81bf6e85095481435f29a4627ff820e Mon Sep 17 00:00:00 2001 From: Hemanth Puranik Date: Mon, 18 Dec 2017 11:27:47 +0530 Subject: net: qcom/emac: Change the order of mac up and sgmii open This patch fixes the order of mac_up and sgmii_open for the reasons noted below: - If open takes more time(if the SGMII block is not responding or if we want to do some delay based task) in this situation we will hit NETDEV watchdog - The main reason : We should signal to upper layers that we are ready to receive packets "only" when the entire path is initialized not the other way around, this is followed in the reset path where we do mac_down, sgmii_reset and mac_up. This also makes the driver uniform across the reset and open paths. - In the future there may be need for delay based tasks to be done in sgmii open which will result in NETDEV watchdog - As per the documentation the order of init should be sgmii, mac, rings and DMA Signed-off-by: Hemanth Puranik Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 70c92b649b29..38c924bdd32e 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev) return ret; } - ret = emac_mac_up(adpt); + ret = adpt->phy.open(adpt); if (ret) { emac_mac_rx_tx_rings_free_all(adpt); free_irq(irq->irq, irq); return ret; } - ret = adpt->phy.open(adpt); + ret = emac_mac_up(adpt); if (ret) { - emac_mac_down(adpt); emac_mac_rx_tx_rings_free_all(adpt); free_irq(irq->irq, irq); + adpt->phy.close(adpt); return ret; } -- cgit v1.2.3 From 5c468674d17056148da06218d4da5d04baf22eac Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:07:25 +0800 Subject: sctp: fix the issue that a __u16 variable may overflow in sctp_ulpq_renege Now when reneging events in sctp_ulpq_renege(), the variable freed could be increased by a __u16 value twice while freed is of __u16 type. It means freed may overflow at the second addition. This patch is to fix it by using __u32 type for 'freed', while at it, also to remove 'if (chunk)' check, as all renege commands are generated in sctp_eat_data and it can't be NULL. Reported-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ulpqueue.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index a71be33f3afe..e36ec5dd64c6 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { - struct sctp_association *asoc; - __u16 needed, freed; - - asoc = ulpq->asoc; + struct sctp_association *asoc = ulpq->asoc; + __u32 freed = 0; + __u16 needed; - if (chunk) { - needed = ntohs(chunk->chunk_hdr->length); - needed -= sizeof(struct sctp_data_chunk); - } else - needed = SCTP_DEFAULT_MAXWINDOW; - - freed = 0; + needed = ntohs(chunk->chunk_hdr->length) - + sizeof(struct sctp_data_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_order(ulpq, needed); - if (freed < needed) { + if (freed < needed) freed += sctp_ulpq_renege_frags(ulpq, needed - freed); - } } /* If able to free enough room, accept this chunk. */ - if (chunk && (freed >= needed)) { - int retval; - retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); + if (freed >= needed) { + int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been * delivered; otherwise, drain the reassembly queue. -- cgit v1.2.3 From d196975905b2bb227dc54547c03b3d9d0013805c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:13:17 +0800 Subject: sctp: add SCTP_CID_RECONF conversion in sctp_cname Whenever a new type of chunk is added, the corresp conversion in sctp_cname should be added. Otherwise, in some places, pr_debug will print it as "unknown chunk". Fixes: cc16f00f6529 ("sctp: add support for generating stream reconf ssn reset request chunk") Signed-off-by: Xin Long Acked-by: Marcelo R. Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 3f619fdcbf0a..291c97b07058 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid) case SCTP_CID_AUTH: return "AUTH"; + case SCTP_CID_RECONF: + return "RECONF"; + default: break; } -- cgit v1.2.3 From 84aeb437ab98a2bce3d4b2111c79723aedfceb33 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 18 Dec 2017 17:35:09 +0200 Subject: net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks The early call to br_stp_change_bridge_id in bridge's newlink can cause a memory leak if an error occurs during the newlink because the fdb entries are not cleaned up if a different lladdr was specified, also another minor issue is that it generates fdb notifications with ifindex = 0. Another unrelated memory leak is the bridge sysfs entries which get added on NETDEV_REGISTER event, but are not cleaned up in the newlink error path. To remove this special case the call to br_stp_change_bridge_id is done after netdev register and we cleanup the bridge on changelink error via br_dev_delete to plug all leaks. This patch makes netlink bridge destruction on newlink error the same as dellink and ioctl del which is necessary since at that point we have a fully initialized bridge device. To reproduce the issue: $ ip l add br0 address 00:11:22:33:44:55 type bridge group_fwd_mask 1 RTNETLINK answers: Invalid argument $ rmmod bridge [ 1822.142525] ============================================================================= [ 1822.143640] BUG bridge_fdb_cache (Tainted: G O ): Objects remaining in bridge_fdb_cache on __kmem_cache_shutdown() [ 1822.144821] ----------------------------------------------------------------------------- [ 1822.145990] Disabling lock debugging due to kernel taint [ 1822.146732] INFO: Slab 0x0000000092a844b2 objects=32 used=2 fp=0x00000000fef011b0 flags=0x1ffff8000000100 [ 1822.147700] CPU: 2 PID: 13584 Comm: rmmod Tainted: G B O 4.15.0-rc2+ #87 [ 1822.148578] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 1822.150008] Call Trace: [ 1822.150510] dump_stack+0x78/0xa9 [ 1822.151156] slab_err+0xb1/0xd3 [ 1822.151834] ? __kmalloc+0x1bb/0x1ce [ 1822.152546] __kmem_cache_shutdown+0x151/0x28b [ 1822.153395] shutdown_cache+0x13/0x144 [ 1822.154126] kmem_cache_destroy+0x1c0/0x1fb [ 1822.154669] SyS_delete_module+0x194/0x244 [ 1822.155199] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 1822.155773] entry_SYSCALL_64_fastpath+0x23/0x9a [ 1822.156343] RIP: 0033:0x7f929bd38b17 [ 1822.156859] RSP: 002b:00007ffd160e9a98 EFLAGS: 00000202 ORIG_RAX: 00000000000000b0 [ 1822.157728] RAX: ffffffffffffffda RBX: 00005578316ba090 RCX: 00007f929bd38b17 [ 1822.158422] RDX: 00007f929bd9ec60 RSI: 0000000000000800 RDI: 00005578316ba0f0 [ 1822.159114] RBP: 0000000000000003 R08: 00007f929bff5f20 R09: 00007ffd160e8a11 [ 1822.159808] R10: 00007ffd160e9860 R11: 0000000000000202 R12: 00007ffd160e8a80 [ 1822.160513] R13: 0000000000000000 R14: 0000000000000000 R15: 00005578316ba090 [ 1822.161278] INFO: Object 0x000000007645de29 @offset=0 [ 1822.161666] INFO: Object 0x00000000d5df2ab5 @offset=128 Fixes: 30313a3d5794 ("bridge: Handle IFLA_ADDRESS correctly when creating bridge device") Fixes: 5b8d5429daa0 ("bridge: netlink: register netdevice before executing changelink") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index d0ef0a8e8831..015f465c514b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, struct net_bridge *br = netdev_priv(dev); int err; + err = register_netdevice(dev); + if (err) + return err; + if (tb[IFLA_ADDRESS]) { spin_lock_bh(&br->lock); br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); spin_unlock_bh(&br->lock); } - err = register_netdevice(dev); - if (err) - return err; - err = br_changelink(dev, tb, data, extack); if (err) - unregister_netdevice(dev); + br_dev_delete(dev, NULL); + return err; } -- cgit v1.2.3 From bb422a738f6566f7439cd347d54e321e4fe92a9f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 18 Dec 2017 20:31:41 +0900 Subject: mm,vmscan: Make unregister_shrinker() no-op if register_shrinker() failed. Syzbot caught an oops at unregister_shrinker() because combination of commit 1d3d4437eae1bb29 ("vmscan: per-node deferred work") and fault injection made register_shrinker() fail and the caller of register_shrinker() did not check for failure. ---------- [ 554.881422] FAULT_INJECTION: forcing a failure. [ 554.881422] name failslab, interval 1, probability 0, space 0, times 0 [ 554.881438] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.881443] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.881445] Call Trace: [ 554.881459] dump_stack+0x194/0x257 [ 554.881474] ? arch_local_irq_restore+0x53/0x53 [ 554.881486] ? find_held_lock+0x35/0x1d0 [ 554.881507] should_fail+0x8c0/0xa40 [ 554.881522] ? fault_create_debugfs_attr+0x1f0/0x1f0 [ 554.881537] ? check_noncircular+0x20/0x20 [ 554.881546] ? find_next_zero_bit+0x2c/0x40 [ 554.881560] ? ida_get_new_above+0x421/0x9d0 [ 554.881577] ? find_held_lock+0x35/0x1d0 [ 554.881594] ? __lock_is_held+0xb6/0x140 [ 554.881628] ? check_same_owner+0x320/0x320 [ 554.881634] ? lock_downgrade+0x990/0x990 [ 554.881649] ? find_held_lock+0x35/0x1d0 [ 554.881672] should_failslab+0xec/0x120 [ 554.881684] __kmalloc+0x63/0x760 [ 554.881692] ? lock_downgrade+0x990/0x990 [ 554.881712] ? register_shrinker+0x10e/0x2d0 [ 554.881721] ? trace_event_raw_event_module_request+0x320/0x320 [ 554.881737] register_shrinker+0x10e/0x2d0 [ 554.881747] ? prepare_kswapd_sleep+0x1f0/0x1f0 [ 554.881755] ? _down_write_nest_lock+0x120/0x120 [ 554.881765] ? memcpy+0x45/0x50 [ 554.881785] sget_userns+0xbcd/0xe20 (...snipped...) [ 554.898693] kasan: CONFIG_KASAN_INLINE enabled [ 554.898724] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 554.898732] general protection fault: 0000 [#1] SMP KASAN [ 554.898737] Dumping ftrace buffer: [ 554.898741] (ftrace buffer empty) [ 554.898743] Modules linked in: [ 554.898752] CPU: 1 PID: 13231 Comm: syz-executor1 Not tainted 4.14.0-rc8+ #82 [ 554.898755] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 554.898760] task: ffff8801d1dbe5c0 task.stack: ffff8801c9e38000 [ 554.898772] RIP: 0010:__list_del_entry_valid+0x7e/0x150 [ 554.898775] RSP: 0018:ffff8801c9e3f108 EFLAGS: 00010246 [ 554.898780] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 554.898784] RDX: 0000000000000000 RSI: ffff8801c53c6f98 RDI: ffff8801c53c6fa0 [ 554.898788] RBP: ffff8801c9e3f120 R08: 1ffff100393c7d55 R09: 0000000000000004 [ 554.898791] R10: ffff8801c9e3ef70 R11: 0000000000000000 R12: 0000000000000000 [ 554.898795] R13: dffffc0000000000 R14: 1ffff100393c7e45 R15: ffff8801c53c6f98 [ 554.898800] FS: 0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 [ 554.898804] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 554.898807] CR2: 00000000dbc23000 CR3: 00000001c7269000 CR4: 00000000001406e0 [ 554.898813] DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 [ 554.898816] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 [ 554.898818] Call Trace: [ 554.898828] unregister_shrinker+0x79/0x300 [ 554.898837] ? perf_trace_mm_vmscan_writepage+0x750/0x750 [ 554.898844] ? down_write+0x87/0x120 [ 554.898851] ? deactivate_super+0x139/0x1b0 [ 554.898857] ? down_read+0x150/0x150 [ 554.898864] ? check_same_owner+0x320/0x320 [ 554.898875] deactivate_locked_super+0x64/0xd0 [ 554.898883] deactivate_super+0x141/0x1b0 ---------- Since allowing register_shrinker() callers to call unregister_shrinker() when register_shrinker() failed can simplify error recovery path, this patch makes unregister_shrinker() no-op when register_shrinker() failed. Also, reset shrinker->nr_deferred in case unregister_shrinker() was by error called twice. Signed-off-by: Tetsuo Handa Signed-off-by: Aliaksei Karaliou Reported-by: syzbot Cc: Glauber Costa Cc: Al Viro Signed-off-by: Al Viro --- mm/vmscan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index c02c850ea349..47d5ced51f2d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); -- cgit v1.2.3 From 6623c0fba10ef45b64ca213ad5dec926f37fa9a0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 15 Dec 2017 16:10:20 +0000 Subject: net: phy: marvell: avoid pause mode on SGMII-to-Copper for 88e151x Observed on the 88e1512 in SGMII-to-Copper mode, negotiating pause is unreliable. While the pause bits can be set in the advertisment register, they clear shortly after negotiation with a link partner commences irrespective of the cause of the negotiation. While these bits may be correctly conveyed to the link partner on the first negotiation, a subsequent negotiation (eg, due to negotiation restart by the link partner, or reconnection of the cable) will result in the link partner seeing these bits as zero, while the kernel believes that it has advertised pause modes. This leads to the local kernel evaluating (eg) symmetric pause mode, while the remote end evaluates that we have no pause mode capability. Since we can't guarantee the advertisment, disable pause mode support with this PHY when used in SGMII-to-Copper mode. The 88e1510 in RGMII-to-Copper mode appears to behave correctly. Reviewed-by: Andrew Lunn Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 26c9a11220ca..82104edca393 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev) /* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + u32 pause; + /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev) err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; + + /* There appears to be a bug in the 88e1512 when used in + * SGMII to copper mode, where the AN advertisment register + * clears the pause bits each time a negotiation occurs. + * This means we can never be truely sure what was advertised, + * so disable Pause support. + */ + pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause; + phydev->supported &= ~pause; + phydev->advertising &= ~pause; } return m88e1121_config_init(phydev); -- cgit v1.2.3 From 9ee332d99e4d5a97548943b81c54668450ce641b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 Dec 2017 15:05:07 -0500 Subject: sget(): handle failures of register_shrinker() Signed-off-by: Al Viro --- fs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index 7ff1349609e4..06bd25d90ba5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -517,7 +517,11 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker(&s->s_shrink); + err = register_shrinker(&s->s_shrink); + if (err) { + deactivate_locked_super(s); + s = ERR_PTR(err); + } return s; } -- cgit v1.2.3 From ab14436065c8066c265540312742390d6d07ddd2 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 16 Dec 2017 00:52:39 +0300 Subject: net: phy: xgene: disable clk on error paths There are several error paths in xgene_mdio_probe(), where clk is left undisabled. The patch fixes them. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/phy/mdio-xgene.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index bfd3090fb055..07c6048200c6 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata) } ret = xgene_enet_ecc_init(pdata); - if (ret) + if (ret) { + if (pdata->dev->of_node) + clk_disable_unprepare(pdata->clk); return ret; + } xgene_gmac_reset(pdata); return 0; @@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev) return ret; mdio_bus = mdiobus_alloc(); - if (!mdio_bus) - return -ENOMEM; + if (!mdio_bus) { + ret = -ENOMEM; + goto out_clk; + } mdio_bus->name = "APM X-Gene MDIO bus"; @@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev) mdio_bus->phy_mask = ~0; ret = mdiobus_register(mdio_bus); if (ret) - goto out; + goto out_mdiobus; acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1, acpi_register_phy, NULL, mdio_bus, NULL); @@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev) } if (ret) - goto out; + goto out_mdiobus; pdata->mdio_bus = mdio_bus; xgene_mdio_status = true; return 0; -out: +out_mdiobus: mdiobus_free(mdio_bus); +out_clk: + if (dev->of_node) + clk_disable_unprepare(pdata->clk); + return ret; } -- cgit v1.2.3 From 14cb0dc6479dc5ebc63b3a459a5d89a2f1b39fed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 15:40:43 +0800 Subject: block: don't let passthrough IO go into .make_request_fn() Commit a8821f3f3("block: Improvements to bounce-buffer handling") tries to make sure that the bio to .make_request_fn won't exceed BIO_MAX_PAGES, but ignores that passthrough I/O can use blk_queue_bounce() too. Especially, passthrough IO may not be sector-aligned, and the check of 'sectors < bio_sectors(*bio_orig)' inside __blk_queue_bounce() may become true even though the max bvec number doesn't exceed BIO_MAX_PAGES, then cause the bio splitted, and the original passthrough bio is submited to generic_make_request(). This patch fixes this issue by checking if the bio is passthrough IO, and use bio_kmalloc() to allocate the cloned passthrough bio. Cc: NeilBrown Fixes: a8821f3f3("block: Improvements to bounce-buffer handling") Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bounce.c | 6 ++++-- include/linux/blkdev.h | 21 +++++++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/block/bounce.c b/block/bounce.c index fceb1a96480b..1d05c422c932 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, unsigned i = 0; bool bounce = false; int sectors = 0; + bool passthrough = bio_is_passthrough(*bio_orig); bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_PAGES) @@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bounce) return; - if (sectors < bio_sectors(*bio_orig)) { + if (!passthrough && sectors < bio_sectors(*bio_orig)) { bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); bio_chain(bio, *bio_orig); generic_make_request(*bio_orig); *bio_orig = bio; } - bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); + bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL : + bounce_bio_set); bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..abd06f540863 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -241,14 +241,24 @@ struct request { struct request *next_rq; }; +static inline bool blk_op_is_scsi(unsigned int op) +{ + return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; +} + +static inline bool blk_op_is_private(unsigned int op) +{ + return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; +} + static inline bool blk_rq_is_scsi(struct request *rq) { - return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; + return blk_op_is_scsi(req_op(rq)); } static inline bool blk_rq_is_private(struct request *rq) { - return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; + return blk_op_is_private(req_op(rq)); } static inline bool blk_rq_is_passthrough(struct request *rq) @@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq) return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } +static inline bool bio_is_passthrough(struct bio *bio) +{ + unsigned op = bio_op(bio); + + return blk_op_is_scsi(op) || blk_op_is_private(op); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; -- cgit v1.2.3 From 0abc2a10389f0c9070f76ca906c7382788036b93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Dec 2017 15:40:44 +0800 Subject: block: fix blk_rq_append_bio Commit caa4b02476e3(blk-map: call blk_queue_bounce from blk_rq_append_bio) moves blk_queue_bounce() into blk_rq_append_bio(), but don't consider the fact that the bounced bio becomes invisible to caller since the parameter type is 'struct bio *'. Make it a pointer to a pointer to a bio, so the caller sees the right bio also after a bounce. Fixes: caa4b02476e3 ("blk-map: call blk_queue_bounce from blk_rq_append_bio") Cc: Christoph Hellwig Reported-by: Michele Ballabio (handling failure of blk_rq_append_bio(), only call bio_get() after blk_rq_append_bio() returns OK) Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-map.c | 38 ++++++++++++++++++++++---------------- drivers/scsi/osd/osd_initiator.c | 4 +++- drivers/target/target_core_pscsi.c | 4 ++-- include/linux/blkdev.h | 2 +- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index b21f8e86f120..d3a94719f03f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -12,22 +12,29 @@ #include "blk.h" /* - * Append a bio to a passthrough request. Only works can be merged into - * the request based on the driver constraints. + * Append a bio to a passthrough request. Only works if the bio can be merged + * into the request based on the driver constraints. */ -int blk_rq_append_bio(struct request *rq, struct bio *bio) +int blk_rq_append_bio(struct request *rq, struct bio **bio) { - blk_queue_bounce(rq->q, &bio); + struct bio *orig_bio = *bio; + + blk_queue_bounce(rq->q, bio); if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq->q, rq, *bio); } else { - if (!ll_back_merge_fn(rq->q, rq, bio)) + if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (orig_bio != *bio) { + bio_put(*bio); + *bio = orig_bio; + } return -EINVAL; + } - rq->biotail->bi_next = bio; - rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; + rq->biotail->bi_next = *bio; + rq->biotail = *bio; + rq->__data_len += (*bio)->bi_iter.bi_size; } return 0; @@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq, * We link the bounce buffer in and could have to traverse it * later so we have to get a ref to prevent it from being freed */ - ret = blk_rq_append_bio(rq, bio); - bio_get(bio); + ret = blk_rq_append_bio(rq, &bio); if (ret) { - bio_endio(bio); __blk_rq_unmap_user(orig_bio); - bio_put(bio); return ret; } + bio_get(bio); return 0; } @@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; int do_copy = 0; - struct bio *bio; + struct bio *bio, *orig_bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) @@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->rq_flags |= RQF_COPY_USER; - ret = blk_rq_append_bio(rq, bio); + orig_bio = bio; + ret = blk_rq_append_bio(rq, &bio); if (unlikely(ret)) { /* request is too big */ - bio_put(bio); + bio_put(orig_bio); return ret; } diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index a4f28b7e4c65..e18877177f1b 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write, return req; for_each_bio(bio) { - ret = blk_rq_append_bio(req, bio); + struct bio *bounce_bio = bio; + + ret = blk_rq_append_bio(req, &bounce_bio); if (ret) return ERR_PTR(ret); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 7c69b4a9694d..0d99b242e82e 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, " %d i: %d bio: %p, allocating another" " bio\n", bio->bi_vcnt, i, bio); - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; @@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } if (bio) { - rc = blk_rq_append_bio(req, bio); + rc = blk_rq_append_bio(req, &bio); if (rc) { pr_err("pSCSI: failed to append bio\n"); goto fail; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index abd06f540863..100d0df38026 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -965,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio *bio); +extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); -- cgit v1.2.3 From 8b7e9d9e2d8b4de6f0d5d7a5fc63f48b1fbcf4d4 Mon Sep 17 00:00:00 2001 From: Anthony Kim Date: Mon, 18 Dec 2017 11:50:48 -0800 Subject: Input: hideep - fix compile error due to missing include file gpiod_() API requires including "linux/gpio/consumer.h". Also, we are not using the legacy API nor the static board files descriptions, so no need to include gpio.h nor gpio/machine.h. Reported-by: kbuild test robot Signed-off-by: Anthony Kim Patchwork-Id: 10094831 Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/hideep.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/hideep.c b/drivers/input/touchscreen/hideep.c index fc080a7c2e1f..f1cd4dd9a4a3 100644 --- a/drivers/input/touchscreen/hideep.c +++ b/drivers/input/touchscreen/hideep.c @@ -10,8 +10,7 @@ #include #include #include -#include -#include +#include #include #include #include -- cgit v1.2.3 From 34112bf4935dabe3c1d1fd42842ed771e279bf61 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 6 Nov 2017 16:20:33 +0100 Subject: drm/nouveau/fbcon: fix NULL pointer access in nouveau_fbcon_destroy When the fbcon object is initialized, but nouveau_fbcon_create is not called, we run into a NULL pointer access within nouveau_fbcon_create when unloading nouveau. The call to drm_fb_helper_funcs.fb_probe is deferred until there is a display for real since 4.14, that's why fbcon->helper.fb is still not set. Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index c533d8e04afc..be7357bf2246 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon) drm_fb_helper_unregister_fbi(&fbcon->helper); drm_fb_helper_fini(&fbcon->helper); - if (nouveau_fb->nvbo) { + if (nouveau_fb && nouveau_fb->nvbo) { nouveau_vma_del(&nouveau_fb->vma); nouveau_bo_unmap(nouveau_fb->nvbo); nouveau_bo_unpin(nouveau_fb->nvbo); -- cgit v1.2.3 From f60707a69a225f2dd87f42628b44e24ceb219d28 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 10:49:35 +1000 Subject: drm/nouveau/bios/dp: support DP Info Table 2.0 Reported-by: Hans de Goede Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 972370ed36f0..7c7efa4ea0d0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) if (data) { *ver = nvbios_rd08(bios, data + 0x00); switch (*ver) { + case 0x20: case 0x21: case 0x30: case 0x40: @@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx, if (data && idx < *cnt) { u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len); switch (*ver * !!outp) { + case 0x20: case 0x21: case 0x30: *hdr = nvbios_rd08(bios, data + 0x04); @@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx, info->type = nvbios_rd16(bios, data + 0x00); info->mask = nvbios_rd16(bios, data + 0x02); switch (*ver) { + case 0x20: + info->mask |= 0x00c0; /* match any link */ + /* fall-through */ case 0x21: case 0x30: info->flags = nvbios_rd08(bios, data + 0x05); info->script[0] = nvbios_rd16(bios, data + 0x06); info->script[1] = nvbios_rd16(bios, data + 0x08); - info->lnkcmp = nvbios_rd16(bios, data + 0x0a); + if (*len >= 0x0c) + info->lnkcmp = nvbios_rd16(bios, data + 0x0a); if (*len >= 0x0f) { info->script[2] = nvbios_rd16(bios, data + 0x0c); info->script[3] = nvbios_rd16(bios, data + 0x0e); @@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx, memset(info, 0x00, sizeof(*info)); if (data) { switch (*ver) { + case 0x20: case 0x21: info->dc = nvbios_rd08(bios, data + 0x02); info->pe = nvbios_rd08(bios, data + 0x03); -- cgit v1.2.3 From 81a24b9ae8eea95b74337c253059da761043ed06 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 11:08:52 +1000 Subject: drm/nouveau/imem/nv50: fix refcount_t warning Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index 1ba7289684aa..db48a1daca0c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; - refcount_inc(&iobj->maps); + refcount_set(&iobj->maps, 1); } mutex_unlock(&imem->subdev.mutex); -- cgit v1.2.3 From a121027d2747168df0aac0c3da35509eea39f61c Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 24 Nov 2017 03:56:26 +0100 Subject: drm/nouveau/pci: do a msi rearm on init On my GP107 when I load nouveau after unloading it, for some reason the GPU stopped sending or the CPU stopped receiving interrupts if MSI was enabled. Doing a rearm once before getting any interrupts fixes this. Signed-off-by: Karol Herbst Reviewed-by: Thierry Reding Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index b1b1f3626b96..deb96de54b00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev) return ret; pci->irq = pdev->irq; + + /* Ensure MSI interrupts are armed, for the case where there are + * already interrupts pending (for whatever reason) at load time. + */ + if (pci->msi) + pci->func->msi_rearm(pci); + return ret; } -- cgit v1.2.3 From 6cb0f2a39d3b7ccdd7269af4ddadb38e78aee744 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 15:04:32 +1000 Subject: drm/nouveau/mmu/gp10b: use correct implementation Reported-by: Mikko Perttunen Fixes: 6359c98224 ("drm/nouveau/mmu/gp10b: fork from gf100") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e14643615698..00eeaaffeae5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2369,7 +2369,7 @@ nv13b_chipset = { .imem = gk20a_instmem_new, .ltc = gp100_ltc_new, .mc = gp10b_mc_new, - .mmu = gf100_mmu_new, + .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, .pmu = gm20b_pmu_new, .timer = gk20a_timer_new, -- cgit v1.2.3 From f29f18eb952bc3e71deedf8bd8fc902f66853c48 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 7 Dec 2017 15:25:14 +1000 Subject: drm/nouveau: avoid GPU page sizes > PAGE_SIZE for buffer objects in host memory While the Tegra (GK20A, GM20B, GP10B) MMUs support large pages in host memory, we're currently lacking IOMMU support for merging system pages into large enough chunks to be mapped as such by the GPU. The core VMM code actually supports automatically determining the best page size to map with, which is intended for these situations, but for various complicated reasons the DRM is currently forcing the page size selection on a per-BO basis. This should fix breakage reported on Tegra GPUs in the meantime, until one or both of the above issues are resolved properly. Reported-by: Mikko Perttunen Fixes: 7dc6a446da7c ("drm/nouveau: improve selection of GPU page size") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2615912430cc..42c1827bbb8e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE && (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram) continue; - if ((flags & TTM_PL_FLAG_TT ) && !vmm->page[i].host) + if ((flags & TTM_PL_FLAG_TT) && + (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; /* Select this page size if it's the first that supports -- cgit v1.2.3 From 74a39954a4900a7dea7010e3063e2bf16b23934b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 14 Dec 2017 11:19:27 +1000 Subject: drm/nouveau: use alternate memory type for system-memory buffers with kind != 0 Fixes bug on Tegra where we'd strip kind information from system memory (ie. all) buffers, resulting in misrendering. Behaviour on dGPU should be unchanged. Reported-by: Thierry Reding Fixes: d7722134b8 ("drm/nouveau: switch over to new memory and vmm interfaces") Signed-off-by: Ben Skeggs Tested-by: Thierry Reding --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 11 ++++++++-- drivers/gpu/drm/nouveau/nouveau_mem.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_ttm.c | 39 ++++++++++++++++++++++++++--------- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 42c1827bbb8e..435ff8662cfa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, /* Determine if we can get a cache-coherent map, forcing * uncached mapping if we can't. */ - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) nvbo->force_coherent = true; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e86b8220a4bb..6a1b1debe5b8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -156,8 +156,8 @@ struct nouveau_drm { struct nvif_object copy; int mtrr; int type_vram; - int type_host; - int type_ncoh; + int type_host[2]; + int type_ncoh[2]; } ttm; /* GEM interface support */ @@ -216,6 +216,13 @@ nouveau_drm(struct drm_device *dev) return dev->dev_private; } +static inline bool +nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm) +{ + struct nvif_mmu *mmu = &drm->client.mmu; + return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED); +} + int nouveau_pmops_suspend(struct device *); int nouveau_pmops_resume(struct device *); bool nouveau_pmops_runtime(void); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 589a9621db76..c002f8968507 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt) u8 type; int ret; - if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED) - type = drm->ttm.type_ncoh; + if (!nouveau_drm_use_coherent_gpu_mapping(drm)) + type = drm->ttm.type_ncoh[!!mem->kind]; else - type = drm->ttm.type_host; + type = drm->ttm.type_host[0]; if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND)) mem->comp = mem->kind = 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 08b974b30482..dff51a0ee028 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm) drm->ttm.mem_global_ref.release = NULL; } -int -nouveau_ttm_init(struct nouveau_drm *drm) +static int +nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind) { - struct nvkm_device *device = nvxx_device(&drm->client.device); - struct nvkm_pci *pci = device->pci; struct nvif_mmu *mmu = &drm->client.mmu; - struct drm_device *dev = drm->dev; - int typei, ret; + int typei; typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | - NVIF_MEM_COHERENT); + kind | NVIF_MEM_COHERENT); if (typei < 0) return -ENOSYS; - drm->ttm.type_host = typei; + drm->ttm.type_host[!!kind] = typei; - typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE); + typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind); if (typei < 0) return -ENOSYS; - drm->ttm.type_ncoh = typei; + drm->ttm.type_ncoh[!!kind] = typei; + return 0; +} + +int +nouveau_ttm_init(struct nouveau_drm *drm) +{ + struct nvkm_device *device = nvxx_device(&drm->client.device); + struct nvkm_pci *pci = device->pci; + struct nvif_mmu *mmu = &drm->client.mmu; + struct drm_device *dev = drm->dev; + int typei, ret; + + ret = nouveau_ttm_init_host(drm, 0); + if (ret) + return ret; + + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && + drm->client.device.info.chipset != 0x50) { + ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND); + if (ret) + return ret; + } if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC && drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) { -- cgit v1.2.3 From c682ccc4962a8fab949e1f2d7325b3e825dbf6d1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 18 Dec 2017 14:09:57 +0100 Subject: bpf: fix broken BPF selftest build on s390 With 720f228e8d31 ("bpf: fix broken BPF selftest build") the inclusion of arch-specific header files changed. Including the asm/bpf_perf_event.h on s390, correctly includes the s390 specific header file. This header file tries then to include the s390 asm/ptrace.h and the build fails with: cc -Wall -O2 -I../../../include/uapi -I../../../lib -I../../../../include/generated -I../../../include test_verifier.c +/root/git/linux/tools/testing/selftests/bpf/libbpf.a /root/git/linux/tools/testing/selftests/bpf/cgroup_helpers.c -lcap -lelf -o +/root/git/linux/tools/testing/selftests/bpf/test_verifier In file included from ../../../include/uapi/asm/bpf_perf_event.h:4:0, from ../../../include/uapi/linux/bpf_perf_event.h:11, from test_verifier.c:29: ../../../include/uapi/../../arch/s390/include/uapi/asm/bpf_perf_event.h:7:9: error: unknown type name 'user_pt_regs' typedef user_pt_regs bpf_user_pt_regs_t; ^~~~~~~~~~~~ make: *** [../lib.mk:109: /root/git/linux/tools/testing/selftests/bpf/test_verifier] Error 1 This is caused by a recent update to the s390 asm/ptrace.h file that is not (yet) available in the local installation. That means, the s390 asm/ptrace.h must be included from the tools/arch/s390 directory. Because there is no proper framework to deal with asm specific includes in tools/, slightly modify the s390 asm/bpf_perf_event.h to include the local ptrace.h header file. See also discussion on https://marc.info/?l=linux-s390&m=151359424420691&w=2 Please note that this needs to be preserved until tools/ is able to correctly handle asm specific headers. References: https://marc.info/?l=linux-s390&m=151359424420691&w=2 Fixes: 720f228e8d31 ("bpf: fix broken BPF selftest build") Signed-off-by: Hendrik Brueckner Cc: Daniel Borkmann Cc: Hendrik Brueckner Cc: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/arch/s390/include/uapi/asm/bpf_perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h index cefe7c7cd4f6..0a8e37a519f2 100644 --- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h +++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -2,7 +2,7 @@ #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ #define _UAPI__ASM_BPF_PERF_EVENT_H__ -#include +#include "ptrace.h" typedef user_pt_regs bpf_user_pt_regs_t; -- cgit v1.2.3 From 86ddd2db1f75a30f21a4c4de7a29249ee8c37ed8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 18 Dec 2017 15:30:48 -0800 Subject: PM / wakeup: only recommend "call"ing device_init_wakeup() once I'll admit admit it: I've written bad driver code that tries to configure a device's wake IRQ without having called device_init_wakeup() first. But do you really have to ask ask me twice? Signed-off-by: Brian Norris Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 38559f04db2c..cb72965b3281 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -298,7 +298,7 @@ int device_wakeup_attach_irq(struct device *dev, ws = dev->power.wakeup; if (!ws) { - dev_err(dev, "forgot to call call device_init_wakeup?\n"); + dev_err(dev, "forgot to call device_init_wakeup?\n"); return -EINVAL; } -- cgit v1.2.3 From 182dc9c7f217146d69d9c0b75c150c0314b9b170 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 18 Dec 2017 16:33:36 +1100 Subject: powerpc/kernel: Print actual address of regs when oopsing When we oops or otherwise call show_regs() we print the address of the regs structure. Being able to see the address is fairly useful, firstly to verify that the regs pointer is not completely bogus, and secondly it allows you to dump the regs and surrounding memory with a debugger if you have one. In the normal case the regs will be located somewhere on the stack, so printing their location discloses no further information than printing the stack pointer does already. So switch to %px and print the actual address, not the hashed value. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); -- cgit v1.2.3 From 81b6c999897919d5a16fedc018fe375dbab091c5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 13 Dec 2017 14:21:37 +0100 Subject: scsi: core: check for device state in __scsi_remove_target() As it turned out device_get() doesn't use kref_get_unless_zero(), so we will be always getting a device pointer. Consequently, we need to check for the device state in __scsi_remove_target() to avoid tripping over deleted objects. Fixes: fbce4d97fd43 ("scsi: fixup kernel warning during rmmod()") Reported-by: Jason Yan Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a9996c16f4ae..26ce17178401 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1415,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget) * check. */ if (sdev->channel != starget->channel || - sdev->id != starget->id || + sdev->id != starget->id) + continue; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL || !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); -- cgit v1.2.3 From 5a15f289ee87eaf33f13f08a4909ec99d837ec5f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Dec 2017 23:36:57 +0100 Subject: ALSA: usb-audio: Fix the missing ctl name suffix at parsing SU The commit 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()") added the check of the return value from snd_usb_copy_string_desc(), which is correct per se, but it introduced a regression. In the original code, either the "Clock Source", "Playback Source" or "Capture Source" suffix is added after the terminal string, while the commit changed it to add the suffix only when get_term_name() is failing. It ended up with an incorrect ctl name like "PCM" instead of "PCM Capture Source". Also, even the original code has a similar bug: when the ctl name is generated from snd_usb_copy_string_desc() for the given iSelector, it also doesn't put the suffix. This patch addresses these issues: the suffix is added always when no static mapping is found. Also the patch tries to put more comments and cleans up the if/else block for better readability in order to avoid the same pitfall again. Fixes: 89b89d121ffc ("ALSA: usb-audio: Add check return value for usb_string()") Reported-and-tested-by: Mauro Santos Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index afc208e1c756..60ebc99ae323 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, kctl->private_value = (unsigned long)namelist; kctl->private_free = usb_mixer_selector_elem_free; - nameid = uac_selector_unit_iSelector(desc); + /* check the static mapping table at first */ len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name)); - if (len) - ; - else if (nameid) - len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, - sizeof(kctl->id.name)); - else - len = get_term_name(state, &state->oterm, - kctl->id.name, sizeof(kctl->id.name), 0); - if (!len) { - strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* no mapping ? */ + /* if iSelector is given, use it */ + nameid = uac_selector_unit_iSelector(desc); + if (nameid) + len = snd_usb_copy_string_desc(state, nameid, + kctl->id.name, + sizeof(kctl->id.name)); + /* ... or pick up the terminal name at next */ + if (!len) + len = get_term_name(state, &state->oterm, + kctl->id.name, sizeof(kctl->id.name), 0); + /* ... or use the fixed string "USB" as the last resort */ + if (!len) + strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name)); + /* and add the proper suffix */ if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR) append_ctl_name(kctl, " Clock Source"); else if ((state->oterm.type & 0xff00) == 0x0100) -- cgit v1.2.3 From acf568ee859f098279eadf551612f103afdacb4e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 15 Dec 2017 16:40:44 +1100 Subject: xfrm: Reinject transport-mode packets through tasklet This is an old bugbear of mine: https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html By crafting special packets, it is possible to cause recursion in our kernel when processing transport-mode packets at levels that are only limited by packet size. The easiest one is with DNAT, but an even worse one is where UDP encapsulation is used in which case you just have to insert an UDP encapsulation header in between each level of recursion. This patch avoids this problem by reinjecting tranport-mode packets through a tasklet. Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_input.c | 12 ++++++++++- net/ipv6/xfrm6_input.c | 10 ++++++++- net/xfrm/xfrm_input.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dc28a98ce97c..ae35991b5877 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index e50b7fea57ee..bcfc00e88756 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } +static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, iph->tos, skb->dev)) goto drop; } - return dst_input(skb); + + if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2)) + goto drop; + + return 0; drop: kfree_skb(skb); return NET_RX_DROP; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index fe04e23af986..841f4a07438e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, } EXPORT_SYMBOL(xfrm6_rcv_spi); +static int xfrm6_transport_finish2(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + if (xfrm_trans_queue(skb, ip6_rcv_finish)) + __kfree_skb(skb); + return -1; +} + int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); @@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_rcv_finish); + xfrm6_transport_finish2); return -1; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index da6447389ffb..3f6f6f8c9fa5 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -8,15 +8,29 @@ * */ +#include +#include #include #include #include +#include #include #include #include #include #include +struct xfrm_trans_tasklet { + struct tasklet_struct tasklet; + struct sk_buff_head queue; +}; + +struct xfrm_trans_cb { + int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); +}; + +#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) + static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); @@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; +static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); + int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; @@ -477,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr) } EXPORT_SYMBOL(xfrm_input_resume); +static void xfrm_trans_reinject(unsigned long data) +{ + struct xfrm_trans_tasklet *trans = (void *)data; + struct sk_buff_head queue; + struct sk_buff *skb; + + __skb_queue_head_init(&queue); + skb_queue_splice_init(&trans->queue, &queue); + + while ((skb = __skb_dequeue(&queue))) + XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); +} + +int xfrm_trans_queue(struct sk_buff *skb, + int (*finish)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct xfrm_trans_tasklet *trans; + + trans = this_cpu_ptr(&xfrm_trans_tasklet); + + if (skb_queue_len(&trans->queue) >= netdev_max_backlog) + return -ENOBUFS; + + XFRM_TRANS_SKB_CB(skb)->finish = finish; + skb_queue_tail(&trans->queue, skb); + tasklet_schedule(&trans->tasklet); + return 0; +} +EXPORT_SYMBOL(xfrm_trans_queue); + void __init xfrm_input_init(void) { int err; + int i; init_dummy_netdev(&xfrm_napi_dev); err = gro_cells_init(&gro_cells, &xfrm_napi_dev); @@ -490,4 +538,13 @@ void __init xfrm_input_init(void) sizeof(struct sec_path), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + + for_each_possible_cpu(i) { + struct xfrm_trans_tasklet *trans; + + trans = &per_cpu(xfrm_trans_tasklet, i); + __skb_queue_head_init(&trans->queue); + tasklet_init(&trans->tasklet, xfrm_trans_reinject, + (unsigned long)trans); + } } -- cgit v1.2.3 From 2487e7efc9e13bd11fdc86f1ac12a5a45c4af778 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:46 +0900 Subject: mmc: renesas_sdhi: remove always false condition renesas_sdhi_probe() always sets host->dma as follows: host->dma = dma_priv; !host->dma is always false. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index 9ab10436e4b8..e210644f1116 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -360,8 +360,8 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { /* We can only either use DMA for both Tx and Rx or not use it at all */ - if (!host->dma || (!host->pdev->dev.of_node && - (!pdata->chan_priv_tx || !pdata->chan_priv_rx))) + if (!host->pdev->dev.of_node && + (!pdata->chan_priv_tx || !pdata->chan_priv_rx)) return; if (!host->chan_tx && !host->chan_rx) { -- cgit v1.2.3 From 058db2868cd88b5474f26974253407fcbe932c22 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:47 +0900 Subject: mmc: tmio, renesas_sdhi: move struct tmio_mmc_dma to renesas_sdhi.h struct tmio_mmc_dma looks like TMIO core data, but in fact, Renesas private data. Move it to renesas_sdhi.h (probably, it is better to rename it to renesas_sdhi_dma, or squash it into struct renesas_sdhi). I also moved struct renesas_sdhi and host_to_priv() to that header because they are necessary to convert the tmio_mmc_host pointer into the renesas_sdhi pointer. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi.h | 19 +++++++++++++++++++ drivers/mmc/host/renesas_sdhi_core.c | 14 -------------- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 6 ++++-- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 16 ++++++++++------ drivers/mmc/host/tmio_mmc.h | 7 ------- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h index b9dfea5d8193..9a507b3a9838 100644 --- a/drivers/mmc/host/renesas_sdhi.h +++ b/drivers/mmc/host/renesas_sdhi.h @@ -35,6 +35,25 @@ struct renesas_sdhi_of_data { unsigned short max_segs; }; +struct tmio_mmc_dma { + enum dma_slave_buswidth dma_buswidth; + bool (*filter)(struct dma_chan *chan, void *arg); + void (*enable)(struct tmio_mmc_host *host, bool enable); +}; + +struct renesas_sdhi { + struct clk *clk; + struct clk *clk_cd; + struct tmio_mmc_data mmc_data; + struct tmio_mmc_dma dma_priv; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default, *pins_uhs; + void __iomem *scc_ctl; +}; + +#define host_to_priv(host) \ + container_of((host)->pdata, struct renesas_sdhi, mmc_data) + int renesas_sdhi_probe(struct platform_device *pdev, const struct tmio_mmc_dma_ops *dma_ops); int renesas_sdhi_remove(struct platform_device *pdev); diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index ae8099690b1a..0eb62353630f 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -47,19 +47,6 @@ #define SDHI_VER_GEN3_SD 0xcc10 #define SDHI_VER_GEN3_SDMMC 0xcd10 -#define host_to_priv(host) \ - container_of((host)->pdata, struct renesas_sdhi, mmc_data) - -struct renesas_sdhi { - struct clk *clk; - struct clk *clk_cd; - struct tmio_mmc_data mmc_data; - struct tmio_mmc_dma dma_priv; - struct pinctrl *pinctrl; - struct pinctrl_state *pins_default, *pins_uhs; - void __iomem *scc_ctl; -}; - static void renesas_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width) { u32 val; @@ -540,7 +527,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->bus_shift = of_data->bus_shift; } - host->dma = dma_priv; host->write16_hook = renesas_sdhi_write16_hook; host->clk_enable = renesas_sdhi_clk_enable; host->clk_update = renesas_sdhi_clk_update; diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 396ae8a1c250..9498decf3165 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -103,6 +103,8 @@ renesas_sdhi_internal_dmac_dm_write(struct tmio_mmc_host *host, static void renesas_sdhi_internal_dmac_enable_dma(struct tmio_mmc_host *host, bool enable) { + struct renesas_sdhi *priv = host_to_priv(host); + if (!host->chan_tx || !host->chan_rx) return; @@ -110,8 +112,8 @@ renesas_sdhi_internal_dmac_enable_dma(struct tmio_mmc_host *host, bool enable) renesas_sdhi_internal_dmac_dm_write(host, DM_CM_INFO1, INFO1_CLEAR); - if (host->dma->enable) - host->dma->enable(host, enable); + if (priv->dma_priv.enable) + priv->dma_priv.enable(host, enable); } static void diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index e210644f1116..aeb3838c05e3 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -117,11 +117,13 @@ MODULE_DEVICE_TABLE(of, renesas_sdhi_sys_dmac_of_match); static void renesas_sdhi_sys_dmac_enable_dma(struct tmio_mmc_host *host, bool enable) { + struct renesas_sdhi *priv = host_to_priv(host); + if (!host->chan_tx || !host->chan_rx) return; - if (host->dma->enable) - host->dma->enable(host, enable); + if (priv->dma_priv.enable) + priv->dma_priv.enable(host, enable); } static void renesas_sdhi_sys_dmac_abort_dma(struct tmio_mmc_host *host) @@ -359,6 +361,8 @@ static void renesas_sdhi_sys_dmac_issue_tasklet_fn(unsigned long priv) static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { + struct renesas_sdhi *priv = host_to_priv(host); + /* We can only either use DMA for both Tx and Rx or not use it at all */ if (!host->pdev->dev.of_node && (!pdata->chan_priv_tx || !pdata->chan_priv_rx)) @@ -378,7 +382,7 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, dma_cap_set(DMA_SLAVE, mask); host->chan_tx = dma_request_slave_channel_compat(mask, - host->dma->filter, pdata->chan_priv_tx, + priv->dma_priv.filter, pdata->chan_priv_tx, &host->pdev->dev, "tx"); dev_dbg(&host->pdev->dev, "%s: TX: got channel %p\n", __func__, host->chan_tx); @@ -389,7 +393,7 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, cfg.direction = DMA_MEM_TO_DEV; cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->bus_shift); - cfg.dst_addr_width = host->dma->dma_buswidth; + cfg.dst_addr_width = priv->dma_priv.dma_buswidth; if (!cfg.dst_addr_width) cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.src_addr = 0; @@ -398,7 +402,7 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, goto ecfgtx; host->chan_rx = dma_request_slave_channel_compat(mask, - host->dma->filter, pdata->chan_priv_rx, + priv->dma_priv.filter, pdata->chan_priv_rx, &host->pdev->dev, "rx"); dev_dbg(&host->pdev->dev, "%s: RX: got channel %p\n", __func__, host->chan_rx); @@ -408,7 +412,7 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, cfg.direction = DMA_DEV_TO_MEM; cfg.src_addr = cfg.dst_addr + host->pdata->dma_rx_offset; - cfg.src_addr_width = host->dma->dma_buswidth; + cfg.src_addr_width = priv->dma_priv.dma_buswidth; if (!cfg.src_addr_width) cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.dst_addr = 0; diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index dd40b9631b3a..ed375a9056de 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -112,12 +112,6 @@ struct tmio_mmc_data; struct tmio_mmc_host; -struct tmio_mmc_dma { - enum dma_slave_buswidth dma_buswidth; - bool (*filter)(struct dma_chan *chan, void *arg); - void (*enable)(struct tmio_mmc_host *host, bool enable); -}; - struct tmio_mmc_dma_ops { void (*start)(struct tmio_mmc_host *host, struct mmc_data *data); void (*enable)(struct tmio_mmc_host *host, bool enable); @@ -149,7 +143,6 @@ struct tmio_mmc_host { struct platform_device *pdev; struct tmio_mmc_data *pdata; - struct tmio_mmc_dma *dma; /* DMA support */ bool force_pio; -- cgit v1.2.3 From 90d9510645765401c56d75f6003d6cb6c1f7ca2a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:48 +0900 Subject: mmc: tmio, renesas_sdhi: move Renesas-specific DMA data to renesas_sdhi.h struct tmio_mmc_host has "dma_dataend" and "dma_complete", but in fact, they are Renesas private data. Move them to renesas_sdhi.h Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi.h | 2 ++ drivers/mmc/host/renesas_sdhi_internal_dmac.c | 8 ++++++-- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 15 ++++++++++----- drivers/mmc/host/tmio_mmc.h | 2 -- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h index 9a507b3a9838..3250dbed402f 100644 --- a/drivers/mmc/host/renesas_sdhi.h +++ b/drivers/mmc/host/renesas_sdhi.h @@ -39,6 +39,8 @@ struct tmio_mmc_dma { enum dma_slave_buswidth dma_buswidth; bool (*filter)(struct dma_chan *chan, void *arg); void (*enable)(struct tmio_mmc_host *host, bool enable); + struct completion dma_dataend; + struct tasklet_struct dma_complete; }; struct renesas_sdhi { diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index 9498decf3165..7c03cfead6f9 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -132,7 +132,9 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) { static void renesas_sdhi_internal_dmac_dataend_dma(struct tmio_mmc_host *host) { - tasklet_schedule(&host->dma_complete); + struct renesas_sdhi *priv = host_to_priv(host); + + tasklet_schedule(&priv->dma_priv.dma_complete); } static void @@ -222,10 +224,12 @@ static void renesas_sdhi_internal_dmac_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata) { + struct renesas_sdhi *priv = host_to_priv(host); + /* Each value is set to non-zero to assume "enabling" each DMA */ host->chan_rx = host->chan_tx = (void *)0xdeadbeaf; - tasklet_init(&host->dma_complete, + tasklet_init(&priv->dma_priv.dma_complete, renesas_sdhi_internal_dmac_complete_tasklet_fn, (unsigned long)host); tasklet_init(&host->dma_issue, diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index aeb3838c05e3..c8a74b2dee00 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -140,12 +140,15 @@ static void renesas_sdhi_sys_dmac_abort_dma(struct tmio_mmc_host *host) static void renesas_sdhi_sys_dmac_dataend_dma(struct tmio_mmc_host *host) { - complete(&host->dma_dataend); + struct renesas_sdhi *priv = host_to_priv(host); + + complete(&priv->dma_priv.dma_dataend); } static void renesas_sdhi_sys_dmac_dma_callback(void *arg) { struct tmio_mmc_host *host = arg; + struct renesas_sdhi *priv = host_to_priv(host); spin_lock_irq(&host->lock); @@ -163,7 +166,7 @@ static void renesas_sdhi_sys_dmac_dma_callback(void *arg) spin_unlock_irq(&host->lock); - wait_for_completion(&host->dma_dataend); + wait_for_completion(&priv->dma_priv.dma_dataend); spin_lock_irq(&host->lock); tmio_mmc_do_data_irq(host); @@ -173,6 +176,7 @@ out: static void renesas_sdhi_sys_dmac_start_dma_rx(struct tmio_mmc_host *host) { + struct renesas_sdhi *priv = host_to_priv(host); struct scatterlist *sg = host->sg_ptr, *sg_tmp; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_rx; @@ -216,7 +220,7 @@ static void renesas_sdhi_sys_dmac_start_dma_rx(struct tmio_mmc_host *host) DMA_CTRL_ACK); if (desc) { - reinit_completion(&host->dma_dataend); + reinit_completion(&priv->dma_priv.dma_dataend); desc->callback = renesas_sdhi_sys_dmac_dma_callback; desc->callback_param = host; @@ -247,6 +251,7 @@ pio: static void renesas_sdhi_sys_dmac_start_dma_tx(struct tmio_mmc_host *host) { + struct renesas_sdhi *priv = host_to_priv(host); struct scatterlist *sg = host->sg_ptr, *sg_tmp; struct dma_async_tx_descriptor *desc = NULL; struct dma_chan *chan = host->chan_tx; @@ -295,7 +300,7 @@ static void renesas_sdhi_sys_dmac_start_dma_tx(struct tmio_mmc_host *host) DMA_CTRL_ACK); if (desc) { - reinit_completion(&host->dma_dataend); + reinit_completion(&priv->dma_priv.dma_dataend); desc->callback = renesas_sdhi_sys_dmac_dma_callback; desc->callback_param = host; @@ -424,7 +429,7 @@ static void renesas_sdhi_sys_dmac_request_dma(struct tmio_mmc_host *host, if (!host->bounce_buf) goto ebouncebuf; - init_completion(&host->dma_dataend); + init_completion(&priv->dma_priv.dma_dataend); tasklet_init(&host->dma_issue, renesas_sdhi_sys_dmac_issue_tasklet_fn, (unsigned long)host); diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index ed375a9056de..5972438105a3 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -148,8 +148,6 @@ struct tmio_mmc_host { bool force_pio; struct dma_chan *chan_rx; struct dma_chan *chan_tx; - struct completion dma_dataend; - struct tasklet_struct dma_complete; struct tasklet_struct dma_issue; struct scatterlist bounce_sg; u8 *bounce_buf; -- cgit v1.2.3 From 852d258f8465aa65adcce99f28552dd9b66a14a7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:49 +0900 Subject: mmc: tmio,renesas_sdhi: move ssc_tappos to renesas_sdhi.h struct tmio_mmc_host has "scc_tappos", but in fact, it is Renesas private data. Move it to renesas_sdhi.h Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi.h | 1 + drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- drivers/mmc/host/tmio_mmc.h | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi.h b/drivers/mmc/host/renesas_sdhi.h index 3250dbed402f..f13f798d8506 100644 --- a/drivers/mmc/host/renesas_sdhi.h +++ b/drivers/mmc/host/renesas_sdhi.h @@ -51,6 +51,7 @@ struct renesas_sdhi { struct pinctrl *pinctrl; struct pinctrl_state *pins_default, *pins_uhs; void __iomem *scc_ctl; + u32 scc_tappos; }; #define host_to_priv(host) \ diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 0eb62353630f..6a2988bd51a2 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -268,7 +268,7 @@ static unsigned int renesas_sdhi_init_tuning(struct tmio_mmc_host *host) ~SH_MOBILE_SDHI_SCC_RVSCNTL_RVSEN & sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_RVSCNTL)); - sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DT2FF, host->scc_tappos); + sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_DT2FF, priv->scc_tappos); /* Read TAPNUM */ return (sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_DTCNTL) >> @@ -591,7 +591,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, for (i = 0; i < of_data->taps_num; i++) { if (taps[i].clk_rate == 0 || taps[i].clk_rate == host->mmc->f_max) { - host->scc_tappos = taps->tap; + priv->scc_tappos = taps->tap; hit = true; break; } diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 5972438105a3..a099fde27026 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -166,7 +166,6 @@ struct tmio_mmc_host { struct mutex ios_lock; /* protect set_ios() context */ bool native_hotplug; bool sdio_irq_enabled; - u32 scc_tappos; /* Mandatory callback */ int (*clk_enable)(struct tmio_mmc_host *host); -- cgit v1.2.3 From c4ba0e4abda39fb1ca81683be068b4556b2680d4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:50 +0900 Subject: mmc: tmio: change bus_shift to unsigned int Sane values for bus_shift are: 0 - for 16 bit bus 1 - for 32 bit bus 2 - for 64 bit bus "unsigned long" is too much. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index a099fde27026..15537c85c51a 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -139,7 +139,7 @@ struct tmio_mmc_host { struct scatterlist *sg_orig; unsigned int sg_len; unsigned int sg_off; - unsigned long bus_shift; + unsigned int bus_shift; struct platform_device *pdev; struct tmio_mmc_data *pdata; -- cgit v1.2.3 From 8d876bf472dba73c015cea9feea80dcb80626a7c Mon Sep 17 00:00:00 2001 From: Zhoujie Wu Date: Mon, 18 Dec 2017 14:38:47 -0800 Subject: mmc: sdhci-xenon: wait 5ms after set 1.8V signal enable According to SD spec 3.00 3.6.1 signal voltage switch procedure step 6~8, (6) Set 1.8V Signal Enable in the Host Control 2 register. (7) Wait 5ms. 1.8V voltage regulator shall be stable within this period. (8) If 1.8V Signal Enable is cleared by Host Controller, go to step (12). Host should wait 5ms after set 1.8V signal enable bit in Host Control 2 register and check if 1.8V is stable or not. But current code checks this bit right after set it. On some platforms with xenon controller found the bit is cleared right away and host reports "1.8V regulator output did not became stable" and 5ms delay can help. Implement voltage_switch callback for xenon controller to add 5ms delay to make sure the 1.8V signal enable bit is set by controller. Signed-off-by: Zhoujie Wu Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index 0842bbc2d7ad..4d0791f6ec23 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -230,7 +230,14 @@ static void xenon_set_power(struct sdhci_host *host, unsigned char mode, mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); } +static void xenon_voltage_switch(struct sdhci_host *host) +{ + /* Wait for 5ms after set 1.8V signal enable bit */ + usleep_range(5000, 5500); +} + static const struct sdhci_ops sdhci_xenon_ops = { + .voltage_switch = xenon_voltage_switch, .set_clock = sdhci_set_clock, .set_power = xenon_set_power, .set_bus_width = sdhci_set_bus_width, -- cgit v1.2.3 From 6454b3bdd138dfc640deb5e7b9a0668fca2d55dd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Dec 2017 15:13:44 -0600 Subject: x86/stacktrace: Make zombie stack traces reliable Commit: 1959a60182f4 ("x86/dumpstack: Pin the target stack when dumping it") changed the behavior of stack traces for zombies. Before that commit, /proc//stack reported the last execution path of the zombie before it died: [] do_exit+0x6f7/0xa80 [] do_group_exit+0x39/0xa0 [] __wake_up_parent+0x0/0x30 [] system_call_fastpath+0x16/0x1b [<00007fd128f9c4f9>] 0x7fd128f9c4f9 [] 0xffffffffffffffff After the commit, it just reports an empty stack trace. The new behavior is actually probably more correct. If the stack refcount has gone down to zero, then the task has already gone through do_exit() and isn't going to run anymore. The stack could be freed at any time and is basically gone, so reporting an empty stack makes sense. However, save_stack_trace_tsk_reliable() treats such a missing stack condition as an error. That can cause livepatch transition stalls if there are any unreaped zombies. Instead, just treat it as a reliable, empty stack. Reported-and-tested-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Fixes: af085d9084b4 ("stacktrace/x86: add function for detecting reliable stack traces") Link: http://lkml.kernel.org/r/e4b09e630e99d0c1080528f0821fc9d9dbaeea82.1513631620.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 77835bc021c7..20161ef53537 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk, { int ret; + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ if (!try_get_task_stack(tsk)) - return -EINVAL; + return 0; ret = __save_stack_trace_reliable(trace, tsk); -- cgit v1.2.3 From b65c7b8aeac818eb8f80ce825073c12ad081b177 Mon Sep 17 00:00:00 2001 From: Adiel Aloni Date: Mon, 18 Dec 2017 12:14:04 +0200 Subject: mac80211_hwsim: enable TODS BIT in null data frame Same as in ieee80211_nullfunc_get, enable the TODS bit, otherwise the nullfunc packet will not be handled in ap rx path. (will be dropped in ieee80211_accept_frame()). Signed-off-by: Adiel Aloni Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 10b075a46b26..59b0cedcdf7b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN); hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS | (ps ? IEEE80211_FCTL_PM : 0)); hdr->duration_id = cpu_to_le16(0); memcpy(hdr->addr1, vp->bssid, ETH_ALEN); -- cgit v1.2.3 From 5d32407396b0433f9b738fcfcb9599bcba7379ae Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 14 Dec 2017 14:33:38 +0100 Subject: cfg80211: always rewrite generated files from scratch Currently the certs C code generation appends to the generated files, which is most likely a leftover from commit 715a12334764 ("wireless: don't write C files on failures"). This causes duplicate code in the generated files if the certificates have their timestamps modified between builds and thereby trigger the generation rules. Fixes: 715a12334764 ("wireless: don't write C files on failures") Signed-off-by: Thierry Reding Signed-off-by: Johannes Berg --- net/wireless/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d7d6cb00c47b..b662be3422e1 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -43,7 +43,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) echo "$$allf"; \ echo '};'; \ echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ - ) >> $@) + ) > $@) $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @@ -66,4 +66,4 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ echo "$$allf"; \ echo '};'; \ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ - ) >> $@) + ) > $@) -- cgit v1.2.3 From 162bd5e5fd921785077b5862d8f2ffabe2fe11e5 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 12 Dec 2017 17:26:36 +0800 Subject: mac80211_hwsim: Fix a possible sleep-in-atomic bug in hwsim_get_radio_nl The driver may sleep under a spinlock. The function call path is: hwsim_get_radio_nl (acquire the spinlock) nlmsg_new(GFP_KERNEL) --> may sleep To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool(DSAC) and checked by my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 59b0cedcdf7b..e8189c07b41f 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3216,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info))) continue; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) { res = -ENOMEM; goto out_err; -- cgit v1.2.3 From 958a1b5a5ed02a768eb27760268251af93090caf Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 11 Dec 2017 15:37:49 -0700 Subject: nl80211: Remove obsolete kerneldoc line Commit ca986ad9bcd3 (nl80211: allow multiple active scheduled scan requests) removed WIPHY_FLAG_SUPPORTS_SCHED_SCAN but left the kerneldoc description in place, leading to this docs-build warning: ./include/net/cfg80211.h:3278: warning: Excess enum value 'WIPHY_FLAG_SUPPORTS_SCHED_SCAN' description in 'wiphy_flags' Remove the line and gain a bit of peace. Signed-off-by: Jonathan Corbet Acked-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8b8118a7fadb..cb4d92b79cd9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3226,7 +3226,6 @@ struct cfg80211_ops { * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing * auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH. - * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans. * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the * firmware. * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP. -- cgit v1.2.3 From 04a7279ff12fc47b657f70731d401c0064f5838a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Dec 2017 09:26:17 +0100 Subject: cfg80211: ship certificates as hex files Not only does this remove the need for the hexdump code in most normal kernel builds (still there for the extra directory), but it also removes the need to ship binary files, which apparently is somewhat problematic, as Randy reported. While at it, also add the generated files to clean-files. Reported-by: Randy Dunlap Signed-off-by: Johannes Berg --- net/wireless/Makefile | 29 ++++--------- net/wireless/certs/sforshee.hex | 86 +++++++++++++++++++++++++++++++++++++++ net/wireless/certs/sforshee.x509 | Bin 680 -> 0 bytes 3 files changed, 95 insertions(+), 20 deletions(-) create mode 100644 net/wireless/certs/sforshee.hex delete mode 100644 net/wireless/certs/sforshee.x509 diff --git a/net/wireless/Makefile b/net/wireless/Makefile index b662be3422e1..1d84f91bbfb0 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) cfg80211-y += extra-certs.o endif -$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) +$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) @$(kecho) " GEN $@" - @(set -e; \ - allf=""; \ - for f in $^ ; do \ - # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \ - thisf=$$(od -An -v -tx1 < $$f | \ - sed -e 's/ /\n/g' | \ - sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \ - sed -e 's/^/0x/;s/$$/,/'); \ - # file should not be empty - maybe command substitution failed? \ - test ! -z "$$thisf";\ - allf=$$allf$$thisf;\ - done; \ - ( \ - echo '#include "reg.h"'; \ - echo 'const u8 shipped_regdb_certs[] = {'; \ - echo "$$allf"; \ - echo '};'; \ - echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ - ) > $@) + @(echo '#include "reg.h"'; \ + echo 'const u8 shipped_regdb_certs[] = {'; \ + cat $^ ; \ + echo '};'; \ + echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ + ) > $@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) @@ -67,3 +54,5 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ echo '};'; \ echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \ ) > $@) + +clean-files += shipped-certs.c extra-certs.c diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex new file mode 100644 index 000000000000..14ea66643ffa --- /dev/null +++ b/net/wireless/certs/sforshee.hex @@ -0,0 +1,86 @@ +/* Seth Forshee's regdb certificate */ +0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c, +0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae, +0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a, +0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b, +0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, +0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, +0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, +0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30, +0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a, +0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39, +0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, +0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06, +0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66, +0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82, +0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, +0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, +0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82, +0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5, +0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2, +0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac, +0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c, +0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38, +0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d, +0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20, +0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b, +0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57, +0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b, +0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51, +0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a, +0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18, +0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98, +0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1, +0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28, +0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71, +0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a, +0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85, +0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30, +0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7, +0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65, +0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3, +0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18, +0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36, +0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1, +0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96, +0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c, +0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11, +0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7, +0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6, +0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0, +0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02, +0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09, +0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, +0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00, +0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf, +0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93, +0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7, +0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9, +0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3, +0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec, +0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0, +0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3, +0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4, +0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32, +0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74, +0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22, +0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86, +0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c, +0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06, +0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1, +0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58, +0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4, +0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72, +0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79, +0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a, +0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f, +0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47, +0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a, +0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28, +0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2, +0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87, +0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d, +0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc, +0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16, +0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f, +0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14, diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509 deleted file mode 100644 index c6f8f9d6b988..000000000000 Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ -- cgit v1.2.3 From eac6a3639decefcc8eb0941dd3cebe79993670ad Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Dec 2017 16:58:59 +0100 Subject: ARM: dts: sun8i: a711: Reinstate the PMIC compatible When we added the regulator support in commit 90c5d7cdae64 ("ARM: dts: sun8i: a711: Add regulator support"), we also dropped the PMIC's compatible. Since it's not in the PMIC DTSI, unlike most other PMIC DTSI, it obviously wasn't probing anymore. Re-add it so that everything works again. Fixes: 90c5d7cdae64 ("ARM: dts: sun8i: a711: Add regulator support") Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 98715538932f..a021ee6da396 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -146,6 +146,7 @@ status = "okay"; axp81x: pmic@3a3 { + compatible = "x-powers,axp813"; reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3 From 92411f6d7f1afcc95e54295d40e96a75385212ec Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 7 Dec 2017 16:58:50 +0100 Subject: drm/sun4i: Fix error path handling The commit 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence") moved a bunch of logic around, but forgot to update the gotos after the introduction of the err_free_dotclock label. It means that if we fail later that the one introduced in that commit, we'll just to the old label which isn't free the clock we created. This will result in a breakage as soon as someone tries to do something with that clock, since its resources will have been long reclaimed. Cc: Fixes: 4c7f16d14a33 ("drm/sun4i: Fix TCON clock and regmap initialization sequence") Reviewed-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/f83c1cebc731f0b4251f5ddd7b38c718cd79bb0b.1512662253.git-series.maxime.ripard@free-electrons.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index e122f5b2a395..f4284b51bdca 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, if (IS_ERR(tcon->crtc)) { dev_err(dev, "Couldn't create our CRTC\n"); ret = PTR_ERR(tcon->crtc); - goto err_free_clocks; + goto err_free_dotclock; } ret = sun4i_rgb_init(drm, tcon); if (ret < 0) - goto err_free_clocks; + goto err_free_dotclock; if (tcon->quirks->needs_de_be_mux) { /* -- cgit v1.2.3 From f31768349447cdf975abbcb7d4a18c0b5d4971c3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:00 -0800 Subject: ASoC: fsl_ssi: Rename fsl_ssi_private to fsl_ssi Shorten the private data structure to save some wrapped lines. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 456 +++++++++++++++++++++++------------------------- 1 file changed, 220 insertions(+), 236 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index c350117c8e31..84d2f7ecb5e1 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -185,7 +185,7 @@ struct fsl_ssi_soc_data { }; /** - * fsl_ssi_private: per-SSI private data + * fsl_ssi: per-SSI private data * * @reg: Pointer to the regmap registers * @irq: IRQ of this SSI @@ -224,7 +224,7 @@ struct fsl_ssi_soc_data { * @dma_maxburst: max number of words to transfer in one go. So far, * this is always the same as fifo_watermark. */ -struct fsl_ssi_private { +struct fsl_ssi { struct regmap *regs; int irq; struct snd_soc_dai_driver cpu_dai_drv; @@ -325,21 +325,21 @@ static const struct of_device_id fsl_ssi_ids[] = { }; MODULE_DEVICE_TABLE(of, fsl_ssi_ids); -static bool fsl_ssi_is_ac97(struct fsl_ssi_private *ssi_private) +static bool fsl_ssi_is_ac97(struct fsl_ssi *ssi) { - return (ssi_private->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) == + return (ssi->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_AC97; } -static bool fsl_ssi_is_i2s_master(struct fsl_ssi_private *ssi_private) +static bool fsl_ssi_is_i2s_master(struct fsl_ssi *ssi) { - return (ssi_private->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == + return (ssi->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == SND_SOC_DAIFMT_CBS_CFS; } -static bool fsl_ssi_is_i2s_cbm_cfs(struct fsl_ssi_private *ssi_private) +static bool fsl_ssi_is_i2s_cbm_cfs(struct fsl_ssi *ssi) { - return (ssi_private->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == + return (ssi->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == SND_SOC_DAIFMT_CBM_CFS; } /** @@ -352,12 +352,12 @@ static bool fsl_ssi_is_i2s_cbm_cfs(struct fsl_ssi_private *ssi_private) * This interrupt handler is used only to gather statistics. * * @irq: IRQ of the SSI device - * @dev_id: pointer to the ssi_private structure for this SSI device + * @dev_id: pointer to the fsl_ssi structure for this SSI device */ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) { - struct fsl_ssi_private *ssi_private = dev_id; - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = dev_id; + struct regmap *regs = ssi->regs; __be32 sisr; __be32 sisr2; @@ -367,12 +367,12 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) */ regmap_read(regs, CCSR_SSI_SISR, &sisr); - sisr2 = sisr & ssi_private->soc->sisr_write_mask; + sisr2 = sisr & ssi->soc->sisr_write_mask; /* Clear the bits that we set */ if (sisr2) regmap_write(regs, CCSR_SSI_SISR, sisr2); - fsl_ssi_dbg_isr(&ssi_private->dbg_stats, sisr); + fsl_ssi_dbg_isr(&ssi->dbg_stats, sisr); return IRQ_HANDLED; } @@ -380,11 +380,10 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) /* * Enable/Disable all rx/tx config flags at once. */ -static void fsl_ssi_rxtx_config(struct fsl_ssi_private *ssi_private, - bool enable) +static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) { - struct regmap *regs = ssi_private->regs; - struct fsl_ssi_rxtx_reg_val *vals = &ssi_private->rxtx_reg_val; + struct regmap *regs = ssi->regs; + struct fsl_ssi_rxtx_reg_val *vals = &ssi->rxtx_reg_val; if (enable) { regmap_update_bits(regs, CCSR_SSI_SIER, @@ -414,14 +413,13 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi_private *ssi_private, * Note: The SOR is not documented in recent IMX datasheet, but * is described in IMX51 reference manual at section 56.3.3.15. */ -static void fsl_ssi_fifo_clear(struct fsl_ssi_private *ssi_private, - bool is_rx) +static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) { if (is_rx) { - regmap_update_bits(ssi_private->regs, CCSR_SSI_SOR, + regmap_update_bits(ssi->regs, CCSR_SSI_SOR, CCSR_SSI_SOR_RX_CLR, CCSR_SSI_SOR_RX_CLR); } else { - regmap_update_bits(ssi_private->regs, CCSR_SSI_SOR, + regmap_update_bits(ssi->regs, CCSR_SSI_SOR, CCSR_SSI_SOR_TX_CLR, CCSR_SSI_SOR_TX_CLR); } } @@ -448,12 +446,12 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi_private *ssi_private, /* * Enable/Disable a ssi configuration. You have to pass either - * ssi_private->rxtx_reg_val.rx or tx as vals parameter. + * ssi->rxtx_reg_val.rx or tx as vals parameter. */ -static void fsl_ssi_config(struct fsl_ssi_private *ssi_private, bool enable, +static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, struct fsl_ssi_reg_val *vals) { - struct regmap *regs = ssi_private->regs; + struct regmap *regs = ssi->regs; struct fsl_ssi_reg_val *avals; int nr_active_streams; u32 scr_val; @@ -471,10 +469,10 @@ static void fsl_ssi_config(struct fsl_ssi_private *ssi_private, bool enable, /* Find the other direction values rx or tx which we do not want to * modify */ - if (&ssi_private->rxtx_reg_val.rx == vals) - avals = &ssi_private->rxtx_reg_val.tx; + if (&ssi->rxtx_reg_val.rx == vals) + avals = &ssi->rxtx_reg_val.tx; else - avals = &ssi_private->rxtx_reg_val.rx; + avals = &ssi->rxtx_reg_val.rx; /* If vals should be disabled, start with disabling the unit */ if (!enable) { @@ -488,10 +486,10 @@ static void fsl_ssi_config(struct fsl_ssi_private *ssi_private, bool enable, * reconfiguration, so we have to enable all necessary flags at once * even if we do not use them later (capture and playback configuration) */ - if (ssi_private->soc->offline_config) { + if (ssi->soc->offline_config) { if ((enable && !nr_active_streams) || (!enable && !keep_active)) - fsl_ssi_rxtx_config(ssi_private, enable); + fsl_ssi_rxtx_config(ssi, enable); goto config_done; } @@ -501,7 +499,7 @@ static void fsl_ssi_config(struct fsl_ssi_private *ssi_private, bool enable, * (online configuration) */ if (enable) { - fsl_ssi_fifo_clear(ssi_private, vals->scr & CCSR_SSI_SCR_RE); + fsl_ssi_fifo_clear(ssi, vals->scr & CCSR_SSI_SCR_RE); regmap_update_bits(regs, CCSR_SSI_SRCR, vals->srcr, vals->srcr); regmap_update_bits(regs, CCSR_SSI_STCR, vals->stcr, vals->stcr); @@ -536,7 +534,7 @@ static void fsl_ssi_config(struct fsl_ssi_private *ssi_private, bool enable, config_done: /* Enabling of subunits is done after configuration */ if (enable) { - if (ssi_private->use_dma && (vals->scr & CCSR_SSI_SCR_TE)) { + if (ssi->use_dma && (vals->scr & CCSR_SSI_SCR_TE)) { /* * Be sure the Tx FIFO is filled when TE is set. * Otherwise, there are some chances to start the @@ -563,7 +561,7 @@ config_done: break; } if (i == max_loop) { - dev_err(ssi_private->dev, + dev_err(ssi->dev, "Timeout waiting TX FIFO filling\n"); } } @@ -572,17 +570,17 @@ config_done: } -static void fsl_ssi_rx_config(struct fsl_ssi_private *ssi_private, bool enable) +static void fsl_ssi_rx_config(struct fsl_ssi *ssi, bool enable) { - fsl_ssi_config(ssi_private, enable, &ssi_private->rxtx_reg_val.rx); + fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.rx); } -static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi_private *ssi_private) +static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi *ssi) { - struct regmap *regs = ssi_private->regs; + struct regmap *regs = ssi->regs; /* no SACC{ST,EN,DIS} regs on imx21-class SSI */ - if (!ssi_private->soc->imx21regs) { + if (!ssi->soc->imx21regs) { /* * Note that these below aren't just normal registers. * They are a way to disable or enable bits in SACCST @@ -601,7 +599,7 @@ static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi_private *ssi_private) } } -static void fsl_ssi_tx_config(struct fsl_ssi_private *ssi_private, bool enable) +static void fsl_ssi_tx_config(struct fsl_ssi *ssi, bool enable) { /* * Why are we setting up SACCST everytime we are starting a @@ -622,10 +620,10 @@ static void fsl_ssi_tx_config(struct fsl_ssi_private *ssi_private, bool enable) * untrustworthy let's play safe here and make sure that no extra * slots are enabled every time a playback is started. */ - if (enable && fsl_ssi_is_ac97(ssi_private)) - fsl_ssi_tx_ac97_saccst_setup(ssi_private); + if (enable && fsl_ssi_is_ac97(ssi)) + fsl_ssi_tx_ac97_saccst_setup(ssi); - fsl_ssi_config(ssi_private, enable, &ssi_private->rxtx_reg_val.tx); + fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.tx); } /* @@ -633,9 +631,9 @@ static void fsl_ssi_tx_config(struct fsl_ssi_private *ssi_private, bool enable) * be used later in fsl_ssi_config to setup the streams without the need to * check for all different SSI modes. */ -static void fsl_ssi_setup_reg_vals(struct fsl_ssi_private *ssi_private) +static void fsl_ssi_setup_reg_vals(struct fsl_ssi *ssi) { - struct fsl_ssi_rxtx_reg_val *reg = &ssi_private->rxtx_reg_val; + struct fsl_ssi_rxtx_reg_val *reg = &ssi->rxtx_reg_val; reg->rx.sier = CCSR_SSI_SIER_RFF0_EN; reg->rx.srcr = CCSR_SSI_SRCR_RFEN0; @@ -644,12 +642,12 @@ static void fsl_ssi_setup_reg_vals(struct fsl_ssi_private *ssi_private) reg->tx.stcr = CCSR_SSI_STCR_TFEN0; reg->tx.scr = 0; - if (!fsl_ssi_is_ac97(ssi_private)) { + if (!fsl_ssi_is_ac97(ssi)) { reg->rx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_RE; reg->tx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE; } - if (ssi_private->use_dma) { + if (ssi->use_dma) { reg->rx.sier |= CCSR_SSI_SIER_RDMAE; reg->tx.sier |= CCSR_SSI_SIER_TDMAE; } else { @@ -661,9 +659,9 @@ static void fsl_ssi_setup_reg_vals(struct fsl_ssi_private *ssi_private) reg->tx.sier |= FSLSSI_SIER_DBG_TX_FLAGS; } -static void fsl_ssi_setup_ac97(struct fsl_ssi_private *ssi_private) +static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) { - struct regmap *regs = ssi_private->regs; + struct regmap *regs = ssi->regs; /* * Setup the clock control register @@ -702,11 +700,10 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct fsl_ssi_private *ssi_private = - snd_soc_dai_get_drvdata(rtd->cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); int ret; - ret = clk_prepare_enable(ssi_private->clk); + ret = clk_prepare_enable(ssi->clk); if (ret) return ret; @@ -715,7 +712,7 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, * task from fifo0, fifo1 would be neglected at the end of each * period. But SSI would still access fifo1 with an invalid data. */ - if (ssi_private->use_dual_fifo) + if (ssi->use_dual_fifo) snd_pcm_hw_constraint_step(substream->runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 2); @@ -730,10 +727,9 @@ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct fsl_ssi_private *ssi_private = - snd_soc_dai_get_drvdata(rtd->cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); - clk_disable_unprepare(ssi_private->clk); + clk_disable_unprepare(ssi->clk); } @@ -750,9 +746,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai, struct snd_pcm_hw_params *hw_params) { - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); - struct regmap *regs = ssi_private->regs; - int synchronous = ssi_private->cpu_dai_drv.symmetric_rates, ret; + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct regmap *regs = ssi->regs; + int synchronous = ssi->cpu_dai_drv.symmetric_rates, ret; u32 pm = 999, div2, psr, stccr, mask, afreq, factor, i; unsigned long clkrate, baudrate, tmprate; unsigned int slots = params_channels(hw_params); @@ -762,29 +758,29 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, bool baudclk_is_used; /* Override slots and slot_width if being specifically set... */ - if (ssi_private->slots) - slots = ssi_private->slots; + if (ssi->slots) + slots = ssi->slots; /* ...but keep 32 bits if slots is 2 -- I2S Master mode */ - if (ssi_private->slot_width && slots != 2) - slot_width = ssi_private->slot_width; + if (ssi->slot_width && slots != 2) + slot_width = ssi->slot_width; /* Generate bit clock based on the slot number and slot width */ freq = slots * slot_width * params_rate(hw_params); /* Don't apply it to any non-baudclk circumstance */ - if (IS_ERR(ssi_private->baudclk)) + if (IS_ERR(ssi->baudclk)) return -EINVAL; /* * Hardware limitation: The bclk rate must be * never greater than 1/5 IPG clock rate */ - if (freq * 5 > clk_get_rate(ssi_private->clk)) { + if (freq * 5 > clk_get_rate(ssi->clk)) { dev_err(cpu_dai->dev, "bitclk > ipgclk/5\n"); return -EINVAL; } - baudclk_is_used = ssi_private->baudclk_streams & ~(BIT(substream->stream)); + baudclk_is_used = ssi->baudclk_streams & ~(BIT(substream->stream)); /* It should be already enough to divide clock by setting pm alone */ psr = 0; @@ -796,9 +792,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, tmprate = freq * factor * (i + 1); if (baudclk_is_used) - clkrate = clk_get_rate(ssi_private->baudclk); + clkrate = clk_get_rate(ssi->baudclk); else - clkrate = clk_round_rate(ssi_private->baudclk, tmprate); + clkrate = clk_round_rate(ssi->baudclk, tmprate); clkrate /= factor; afreq = clkrate / (i + 1); @@ -844,7 +840,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, regmap_update_bits(regs, CCSR_SSI_SRCCR, mask, stccr); if (!baudclk_is_used) { - ret = clk_set_rate(ssi_private->baudclk, baudrate); + ret = clk_set_rate(ssi->baudclk, baudrate); if (ret) { dev_err(cpu_dai->dev, "failed to set baudclk rate\n"); return -EINVAL; @@ -870,8 +866,8 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *cpu_dai) { - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct regmap *regs = ssi->regs; unsigned int channels = params_channels(hw_params); unsigned int sample_size = params_width(hw_params); u32 wl = CCSR_SSI_SxCCR_WL(sample_size); @@ -886,35 +882,35 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, * If we're in synchronous mode, and the SSI is already enabled, * then STCCR is already set properly. */ - if (enabled && ssi_private->cpu_dai_drv.symmetric_rates) + if (enabled && ssi->cpu_dai_drv.symmetric_rates) return 0; - if (fsl_ssi_is_i2s_master(ssi_private)) { + if (fsl_ssi_is_i2s_master(ssi)) { ret = fsl_ssi_set_bclk(substream, cpu_dai, hw_params); if (ret) return ret; /* Do not enable the clock if it is already enabled */ - if (!(ssi_private->baudclk_streams & BIT(substream->stream))) { - ret = clk_prepare_enable(ssi_private->baudclk); + if (!(ssi->baudclk_streams & BIT(substream->stream))) { + ret = clk_prepare_enable(ssi->baudclk); if (ret) return ret; - ssi_private->baudclk_streams |= BIT(substream->stream); + ssi->baudclk_streams |= BIT(substream->stream); } } - if (!fsl_ssi_is_ac97(ssi_private)) { + if (!fsl_ssi_is_ac97(ssi)) { u8 i2smode; /* * Switch to normal net mode in order to have a frame sync * signal every 32 bits instead of 16 bits */ - if (fsl_ssi_is_i2s_cbm_cfs(ssi_private) && sample_size == 16) + if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) i2smode = CCSR_SSI_SCR_I2S_MODE_NORMAL | CCSR_SSI_SCR_NET; else - i2smode = ssi_private->i2s_mode; + i2smode = ssi->i2s_mode; regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_NET | CCSR_SSI_SCR_I2S_MODE_MASK, @@ -933,7 +929,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, /* In synchronous mode, the SSI uses STCCR for capture */ if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || - ssi_private->cpu_dai_drv.symmetric_rates) + ssi->cpu_dai_drv.symmetric_rates) regmap_update_bits(regs, CCSR_SSI_STCCR, CCSR_SSI_SxCCR_WL_MASK, wl); else @@ -947,34 +943,32 @@ static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct fsl_ssi_private *ssi_private = - snd_soc_dai_get_drvdata(rtd->cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); - if (fsl_ssi_is_i2s_master(ssi_private) && - ssi_private->baudclk_streams & BIT(substream->stream)) { - clk_disable_unprepare(ssi_private->baudclk); - ssi_private->baudclk_streams &= ~BIT(substream->stream); + if (fsl_ssi_is_i2s_master(ssi) && + ssi->baudclk_streams & BIT(substream->stream)) { + clk_disable_unprepare(ssi->baudclk); + ssi->baudclk_streams &= ~BIT(substream->stream); } return 0; } static int _fsl_ssi_set_dai_fmt(struct device *dev, - struct fsl_ssi_private *ssi_private, - unsigned int fmt) + struct fsl_ssi *ssi, unsigned int fmt) { - struct regmap *regs = ssi_private->regs; + struct regmap *regs = ssi->regs; u32 strcr = 0, stcr, srcr, scr, mask; u8 wm; - ssi_private->dai_fmt = fmt; + ssi->dai_fmt = fmt; - if (fsl_ssi_is_i2s_master(ssi_private) && IS_ERR(ssi_private->baudclk)) { + if (fsl_ssi_is_i2s_master(ssi) && IS_ERR(ssi->baudclk)) { dev_err(dev, "baudclk is missing which is necessary for master mode\n"); return -EINVAL; } - fsl_ssi_setup_reg_vals(ssi_private); + fsl_ssi_setup_reg_vals(ssi); regmap_read(regs, CCSR_SSI_SCR, &scr); scr &= ~(CCSR_SSI_SCR_SYN | CCSR_SSI_SCR_I2S_MODE_MASK); @@ -988,7 +982,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, stcr &= ~mask; srcr &= ~mask; - ssi_private->i2s_mode = CCSR_SSI_SCR_NET; + ssi->i2s_mode = CCSR_SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: regmap_update_bits(regs, CCSR_SSI_STCCR, @@ -1000,10 +994,10 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: - ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER; + ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER; break; case SND_SOC_DAIFMT_CBM_CFM: - ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE; + ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE; break; default: return -EINVAL; @@ -1028,12 +1022,12 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, CCSR_SSI_STCR_TXBIT0; break; case SND_SOC_DAIFMT_AC97: - ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_NORMAL; + ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_NORMAL; break; default: return -EINVAL; } - scr |= ssi_private->i2s_mode; + scr |= ssi->i2s_mode; /* DAI clock inversion */ switch (fmt & SND_SOC_DAIFMT_INV_MASK) { @@ -1072,15 +1066,14 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, scr &= ~CCSR_SSI_SCR_SYS_CLK_EN; break; default: - if (!fsl_ssi_is_ac97(ssi_private)) + if (!fsl_ssi_is_ac97(ssi)) return -EINVAL; } stcr |= strcr; srcr |= strcr; - if (ssi_private->cpu_dai_drv.symmetric_rates - || fsl_ssi_is_ac97(ssi_private)) { + if (ssi->cpu_dai_drv.symmetric_rates || fsl_ssi_is_ac97(ssi)) { /* Need to clear RXDIR when using SYNC or AC97 mode */ srcr &= ~CCSR_SSI_SRCR_RXDIR; scr |= CCSR_SSI_SCR_SYN; @@ -1090,13 +1083,13 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, regmap_write(regs, CCSR_SSI_SRCR, srcr); regmap_write(regs, CCSR_SSI_SCR, scr); - wm = ssi_private->fifo_watermark; + wm = ssi->fifo_watermark; regmap_write(regs, CCSR_SSI_SFCSR, CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) | CCSR_SSI_SFCSR_TFWM1(wm) | CCSR_SSI_SFCSR_RFWM1(wm)); - if (ssi_private->use_dual_fifo) { + if (ssi->use_dual_fifo) { regmap_update_bits(regs, CCSR_SSI_SRCR, CCSR_SSI_SRCR_RFEN1, CCSR_SSI_SRCR_RFEN1); regmap_update_bits(regs, CCSR_SSI_STCR, CCSR_SSI_STCR_TFEN1, @@ -1106,7 +1099,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, } if ((fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_AC97) - fsl_ssi_setup_ac97(ssi_private); + fsl_ssi_setup_ac97(ssi); return 0; @@ -1117,12 +1110,12 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, */ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) { - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi)) return 0; - return _fsl_ssi_set_dai_fmt(cpu_dai->dev, ssi_private, fmt); + return _fsl_ssi_set_dai_fmt(cpu_dai->dev, ssi, fmt); } /** @@ -1133,8 +1126,8 @@ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, u32 rx_mask, int slots, int slot_width) { - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(cpu_dai); - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct regmap *regs = ssi->regs; u32 val; /* The word length should be 8, 10, 12, 16, 18, 20, 22 or 24 */ @@ -1169,8 +1162,8 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, val); - ssi_private->slot_width = slot_width; - ssi_private->slots = slots; + ssi->slot_width = slot_width; + ssi->slots = slots; return 0; } @@ -1188,33 +1181,33 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(rtd->cpu_dai); - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); + struct regmap *regs = ssi->regs; switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - fsl_ssi_tx_config(ssi_private, true); + fsl_ssi_tx_config(ssi, true); else - fsl_ssi_rx_config(ssi_private, true); + fsl_ssi_rx_config(ssi, true); break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - fsl_ssi_tx_config(ssi_private, false); + fsl_ssi_tx_config(ssi, false); else - fsl_ssi_rx_config(ssi_private, false); + fsl_ssi_rx_config(ssi, false); break; default: return -EINVAL; } - if (fsl_ssi_is_ac97(ssi_private)) { + if (fsl_ssi_is_ac97(ssi)) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_TX_CLR); else @@ -1226,11 +1219,11 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, static int fsl_ssi_dai_probe(struct snd_soc_dai *dai) { - struct fsl_ssi_private *ssi_private = snd_soc_dai_get_drvdata(dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); - if (ssi_private->soc->imx && ssi_private->use_dma) { - dai->playback_dma_data = &ssi_private->dma_params_tx; - dai->capture_dma_data = &ssi_private->dma_params_rx; + if (ssi->soc->imx && ssi->use_dma) { + dai->playback_dma_data = &ssi->dma_params_tx; + dai->capture_dma_data = &ssi->dma_params_rx; } return 0; @@ -1292,7 +1285,7 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { }; -static struct fsl_ssi_private *fsl_ac97_data; +static struct fsl_ssi *fsl_ac97_data; static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, unsigned short val) @@ -1383,24 +1376,24 @@ static void make_lowercase(char *s) } static int fsl_ssi_imx_probe(struct platform_device *pdev, - struct fsl_ssi_private *ssi_private, void __iomem *iomem) + struct fsl_ssi *ssi, void __iomem *iomem) { struct device_node *np = pdev->dev.of_node; u32 dmas[4]; int ret; - if (ssi_private->has_ipg_clk_name) - ssi_private->clk = devm_clk_get(&pdev->dev, "ipg"); + if (ssi->has_ipg_clk_name) + ssi->clk = devm_clk_get(&pdev->dev, "ipg"); else - ssi_private->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(ssi_private->clk)) { - ret = PTR_ERR(ssi_private->clk); + ssi->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ssi->clk)) { + ret = PTR_ERR(ssi->clk); dev_err(&pdev->dev, "could not get clock: %d\n", ret); return ret; } - if (!ssi_private->has_ipg_clk_name) { - ret = clk_prepare_enable(ssi_private->clk); + if (!ssi->has_ipg_clk_name) { + ret = clk_prepare_enable(ssi->clk); if (ret) { dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret); return ret; @@ -1410,27 +1403,27 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, /* For those SLAVE implementations, we ignore non-baudclk cases * and, instead, abandon MASTER mode that needs baud clock. */ - ssi_private->baudclk = devm_clk_get(&pdev->dev, "baud"); - if (IS_ERR(ssi_private->baudclk)) + ssi->baudclk = devm_clk_get(&pdev->dev, "baud"); + if (IS_ERR(ssi->baudclk)) dev_dbg(&pdev->dev, "could not get baud clock: %ld\n", - PTR_ERR(ssi_private->baudclk)); + PTR_ERR(ssi->baudclk)); - ssi_private->dma_params_tx.maxburst = ssi_private->dma_maxburst; - ssi_private->dma_params_rx.maxburst = ssi_private->dma_maxburst; - ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0; - ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0; + ssi->dma_params_tx.maxburst = ssi->dma_maxburst; + ssi->dma_params_rx.maxburst = ssi->dma_maxburst; + ssi->dma_params_tx.addr = ssi->ssi_phys + CCSR_SSI_STX0; + ssi->dma_params_rx.addr = ssi->ssi_phys + CCSR_SSI_SRX0; ret = of_property_read_u32_array(np, "dmas", dmas, 4); - if (ssi_private->use_dma && !ret && dmas[2] == IMX_DMATYPE_SSI_DUAL) { - ssi_private->use_dual_fifo = true; + if (ssi->use_dma && !ret && dmas[2] == IMX_DMATYPE_SSI_DUAL) { + ssi->use_dual_fifo = true; /* When using dual fifo mode, we need to keep watermark * as even numbers due to dma script limitation. */ - ssi_private->dma_params_tx.maxburst &= ~0x1; - ssi_private->dma_params_rx.maxburst &= ~0x1; + ssi->dma_params_tx.maxburst &= ~0x1; + ssi->dma_params_rx.maxburst &= ~0x1; } - if (!ssi_private->use_dma) { + if (!ssi->use_dma) { /* * Some boards use an incompatible codec. To get it @@ -1439,14 +1432,12 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, * situation. */ - ssi_private->fiq_params.irq = ssi_private->irq; - ssi_private->fiq_params.base = iomem; - ssi_private->fiq_params.dma_params_rx = - &ssi_private->dma_params_rx; - ssi_private->fiq_params.dma_params_tx = - &ssi_private->dma_params_tx; + ssi->fiq_params.irq = ssi->irq; + ssi->fiq_params.base = iomem; + ssi->fiq_params.dma_params_rx = &ssi->dma_params_rx; + ssi->fiq_params.dma_params_tx = &ssi->dma_params_tx; - ret = imx_pcm_fiq_init(pdev, &ssi_private->fiq_params); + ret = imx_pcm_fiq_init(pdev, &ssi->fiq_params); if (ret) goto error_pcm; } else { @@ -1459,23 +1450,23 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, error_pcm: - if (!ssi_private->has_ipg_clk_name) - clk_disable_unprepare(ssi_private->clk); + if (!ssi->has_ipg_clk_name) + clk_disable_unprepare(ssi->clk); return ret; } static void fsl_ssi_imx_clean(struct platform_device *pdev, - struct fsl_ssi_private *ssi_private) + struct fsl_ssi *ssi) { - if (!ssi_private->use_dma) + if (!ssi->use_dma) imx_pcm_fiq_exit(pdev); - if (!ssi_private->has_ipg_clk_name) - clk_disable_unprepare(ssi_private->clk); + if (!ssi->has_ipg_clk_name) + clk_disable_unprepare(ssi->clk); } static int fsl_ssi_probe(struct platform_device *pdev) { - struct fsl_ssi_private *ssi_private; + struct fsl_ssi *ssi; int ret = 0; struct device_node *np = pdev->dev.of_node; const struct of_device_id *of_id; @@ -1490,42 +1481,41 @@ static int fsl_ssi_probe(struct platform_device *pdev) if (!of_id || !of_id->data) return -EINVAL; - ssi_private = devm_kzalloc(&pdev->dev, sizeof(*ssi_private), + ssi = devm_kzalloc(&pdev->dev, sizeof(*ssi), GFP_KERNEL); - if (!ssi_private) + if (!ssi) return -ENOMEM; - ssi_private->soc = of_id->data; - ssi_private->dev = &pdev->dev; + ssi->soc = of_id->data; + ssi->dev = &pdev->dev; sprop = of_get_property(np, "fsl,mode", NULL); if (sprop) { if (!strcmp(sprop, "ac97-slave")) - ssi_private->dai_fmt = SND_SOC_DAIFMT_AC97; + ssi->dai_fmt = SND_SOC_DAIFMT_AC97; } - ssi_private->use_dma = !of_property_read_bool(np, - "fsl,fiq-stream-filter"); + ssi->use_dma = !of_property_read_bool(np, "fsl,fiq-stream-filter"); - if (fsl_ssi_is_ac97(ssi_private)) { - memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_ac97_dai, + if (fsl_ssi_is_ac97(ssi)) { + memcpy(&ssi->cpu_dai_drv, &fsl_ssi_ac97_dai, sizeof(fsl_ssi_ac97_dai)); - fsl_ac97_data = ssi_private; + fsl_ac97_data = ssi; } else { /* Initialize this copy of the CPU DAI driver structure */ - memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template, + memcpy(&ssi->cpu_dai_drv, &fsl_ssi_dai_template, sizeof(fsl_ssi_dai_template)); } - ssi_private->cpu_dai_drv.name = dev_name(&pdev->dev); + ssi->cpu_dai_drv.name = dev_name(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); iomem = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(iomem)) return PTR_ERR(iomem); - ssi_private->ssi_phys = res->start; + ssi->ssi_phys = res->start; - if (ssi_private->soc->imx21regs) { + if (ssi->soc->imx21regs) { /* * According to datasheet imx21-class SSI * don't have SACC{ST,EN,DIS} regs. @@ -1537,42 +1527,42 @@ static int fsl_ssi_probe(struct platform_device *pdev) ret = of_property_match_string(np, "clock-names", "ipg"); if (ret < 0) { - ssi_private->has_ipg_clk_name = false; - ssi_private->regs = devm_regmap_init_mmio(&pdev->dev, iomem, + ssi->has_ipg_clk_name = false; + ssi->regs = devm_regmap_init_mmio(&pdev->dev, iomem, ®config); } else { - ssi_private->has_ipg_clk_name = true; - ssi_private->regs = devm_regmap_init_mmio_clk(&pdev->dev, + ssi->has_ipg_clk_name = true; + ssi->regs = devm_regmap_init_mmio_clk(&pdev->dev, "ipg", iomem, ®config); } - if (IS_ERR(ssi_private->regs)) { + if (IS_ERR(ssi->regs)) { dev_err(&pdev->dev, "Failed to init register map\n"); - return PTR_ERR(ssi_private->regs); + return PTR_ERR(ssi->regs); } - ssi_private->irq = platform_get_irq(pdev, 0); - if (ssi_private->irq < 0) { + ssi->irq = platform_get_irq(pdev, 0); + if (ssi->irq < 0) { dev_err(&pdev->dev, "no irq for node %s\n", pdev->name); - return ssi_private->irq; + return ssi->irq; } /* Are the RX and the TX clocks locked? */ if (!of_find_property(np, "fsl,ssi-asynchronous", NULL)) { - if (!fsl_ssi_is_ac97(ssi_private)) { - ssi_private->cpu_dai_drv.symmetric_rates = 1; - ssi_private->cpu_dai_drv.symmetric_samplebits = 1; + if (!fsl_ssi_is_ac97(ssi)) { + ssi->cpu_dai_drv.symmetric_rates = 1; + ssi->cpu_dai_drv.symmetric_samplebits = 1; } - ssi_private->cpu_dai_drv.symmetric_channels = 1; + ssi->cpu_dai_drv.symmetric_channels = 1; } /* Determine the FIFO depth. */ iprop = of_get_property(np, "fsl,fifo-depth", NULL); if (iprop) - ssi_private->fifo_depth = be32_to_cpup(iprop); + ssi->fifo_depth = be32_to_cpup(iprop); else /* Older 8610 DTs didn't have the fifo-depth property */ - ssi_private->fifo_depth = 8; + ssi->fifo_depth = 8; /* * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't @@ -1589,7 +1579,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) * fiq it is probably better to use the biggest possible watermark * size. */ - switch (ssi_private->fifo_depth) { + switch (ssi->fifo_depth) { case 15: /* * 2 samples is not enough when running at high data @@ -1598,8 +1588,8 @@ static int fsl_ssi_probe(struct platform_device *pdev) * for the DMA to fill the FIFO before it's over/under * run. */ - ssi_private->fifo_watermark = 8; - ssi_private->dma_maxburst = 8; + ssi->fifo_watermark = 8; + ssi->dma_maxburst = 8; break; case 8: default: @@ -1610,21 +1600,21 @@ static int fsl_ssi_probe(struct platform_device *pdev) * I suspect this could be changed to be something to * leave some more space in the fifo. */ - ssi_private->fifo_watermark = ssi_private->fifo_depth - 2; - ssi_private->dma_maxburst = ssi_private->fifo_depth - 2; + ssi->fifo_watermark = ssi->fifo_depth - 2; + ssi->dma_maxburst = ssi->fifo_depth - 2; break; } - dev_set_drvdata(&pdev->dev, ssi_private); + dev_set_drvdata(&pdev->dev, ssi); - if (ssi_private->soc->imx) { - ret = fsl_ssi_imx_probe(pdev, ssi_private, iomem); + if (ssi->soc->imx) { + ret = fsl_ssi_imx_probe(pdev, ssi, iomem); if (ret) return ret; } - if (fsl_ssi_is_ac97(ssi_private)) { - mutex_init(&ssi_private->ac97_reg_lock); + if (fsl_ssi_is_ac97(ssi)) { + mutex_init(&ssi->ac97_reg_lock); ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); if (ret) { dev_err(&pdev->dev, "could not set AC'97 ops\n"); @@ -1633,24 +1623,24 @@ static int fsl_ssi_probe(struct platform_device *pdev) } ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, - &ssi_private->cpu_dai_drv, 1); + &ssi->cpu_dai_drv, 1); if (ret) { dev_err(&pdev->dev, "failed to register DAI: %d\n", ret); goto error_asoc_register; } - if (ssi_private->use_dma) { - ret = devm_request_irq(&pdev->dev, ssi_private->irq, + if (ssi->use_dma) { + ret = devm_request_irq(&pdev->dev, ssi->irq, fsl_ssi_isr, 0, dev_name(&pdev->dev), - ssi_private); + ssi); if (ret < 0) { dev_err(&pdev->dev, "could not claim irq %u\n", - ssi_private->irq); + ssi->irq); goto error_asoc_register; } } - ret = fsl_ssi_debugfs_create(&ssi_private->dbg_stats, &pdev->dev); + ret = fsl_ssi_debugfs_create(&ssi->dbg_stats, &pdev->dev); if (ret) goto error_asoc_register; @@ -1675,20 +1665,18 @@ static int fsl_ssi_probe(struct platform_device *pdev) snprintf(name, sizeof(name), "snd-soc-%s", sprop); make_lowercase(name); - ssi_private->pdev = - platform_device_register_data(&pdev->dev, name, 0, NULL, 0); - if (IS_ERR(ssi_private->pdev)) { - ret = PTR_ERR(ssi_private->pdev); + ssi->pdev = platform_device_register_data(&pdev->dev, name, 0, NULL, 0); + if (IS_ERR(ssi->pdev)) { + ret = PTR_ERR(ssi->pdev); dev_err(&pdev->dev, "failed to register platform: %d\n", ret); goto error_sound_card; } done: - if (ssi_private->dai_fmt) - _fsl_ssi_set_dai_fmt(&pdev->dev, ssi_private, - ssi_private->dai_fmt); + if (ssi->dai_fmt) + _fsl_ssi_set_dai_fmt(&pdev->dev, ssi, ssi->dai_fmt); - if (fsl_ssi_is_ac97(ssi_private)) { + if (fsl_ssi_is_ac97(ssi)) { u32 ssi_idx; ret = of_property_read_u32(np, "cell-index", &ssi_idx); @@ -1697,11 +1685,10 @@ done: goto error_sound_card; } - ssi_private->pdev = - platform_device_register_data(NULL, + ssi->pdev = platform_device_register_data(NULL, "ac97-codec", ssi_idx, NULL, 0); - if (IS_ERR(ssi_private->pdev)) { - ret = PTR_ERR(ssi_private->pdev); + if (IS_ERR(ssi->pdev)) { + ret = PTR_ERR(ssi->pdev); dev_err(&pdev->dev, "failed to register AC97 codec platform: %d\n", ret); @@ -1712,37 +1699,37 @@ done: return 0; error_sound_card: - fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); + fsl_ssi_debugfs_remove(&ssi->dbg_stats); error_asoc_register: - if (fsl_ssi_is_ac97(ssi_private)) + if (fsl_ssi_is_ac97(ssi)) snd_soc_set_ac97_ops(NULL); error_ac97_ops: - if (fsl_ssi_is_ac97(ssi_private)) - mutex_destroy(&ssi_private->ac97_reg_lock); + if (fsl_ssi_is_ac97(ssi)) + mutex_destroy(&ssi->ac97_reg_lock); - if (ssi_private->soc->imx) - fsl_ssi_imx_clean(pdev, ssi_private); + if (ssi->soc->imx) + fsl_ssi_imx_clean(pdev, ssi); return ret; } static int fsl_ssi_remove(struct platform_device *pdev) { - struct fsl_ssi_private *ssi_private = dev_get_drvdata(&pdev->dev); + struct fsl_ssi *ssi = dev_get_drvdata(&pdev->dev); - fsl_ssi_debugfs_remove(&ssi_private->dbg_stats); + fsl_ssi_debugfs_remove(&ssi->dbg_stats); - if (ssi_private->pdev) - platform_device_unregister(ssi_private->pdev); + if (ssi->pdev) + platform_device_unregister(ssi->pdev); - if (ssi_private->soc->imx) - fsl_ssi_imx_clean(pdev, ssi_private); + if (ssi->soc->imx) + fsl_ssi_imx_clean(pdev, ssi); - if (fsl_ssi_is_ac97(ssi_private)) { + if (fsl_ssi_is_ac97(ssi)) { snd_soc_set_ac97_ops(NULL); - mutex_destroy(&ssi_private->ac97_reg_lock); + mutex_destroy(&ssi->ac97_reg_lock); } return 0; @@ -1751,13 +1738,11 @@ static int fsl_ssi_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int fsl_ssi_suspend(struct device *dev) { - struct fsl_ssi_private *ssi_private = dev_get_drvdata(dev); - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = dev_get_drvdata(dev); + struct regmap *regs = ssi->regs; - regmap_read(regs, CCSR_SSI_SFCSR, - &ssi_private->regcache_sfcsr); - regmap_read(regs, CCSR_SSI_SACNT, - &ssi_private->regcache_sacnt); + regmap_read(regs, CCSR_SSI_SFCSR, &ssi->regcache_sfcsr); + regmap_read(regs, CCSR_SSI_SACNT, &ssi->regcache_sacnt); regcache_cache_only(regs, true); regcache_mark_dirty(regs); @@ -1767,17 +1752,16 @@ static int fsl_ssi_suspend(struct device *dev) static int fsl_ssi_resume(struct device *dev) { - struct fsl_ssi_private *ssi_private = dev_get_drvdata(dev); - struct regmap *regs = ssi_private->regs; + struct fsl_ssi *ssi = dev_get_drvdata(dev); + struct regmap *regs = ssi->regs; regcache_cache_only(regs, false); regmap_update_bits(regs, CCSR_SSI_SFCSR, CCSR_SSI_SFCSR_RFWM1_MASK | CCSR_SSI_SFCSR_TFWM1_MASK | CCSR_SSI_SFCSR_RFWM0_MASK | CCSR_SSI_SFCSR_TFWM0_MASK, - ssi_private->regcache_sfcsr); - regmap_write(regs, CCSR_SSI_SACNT, - ssi_private->regcache_sacnt); + ssi->regcache_sfcsr); + regmap_write(regs, CCSR_SSI_SACNT, ssi->regcache_sacnt); return regcache_sync(regs); } -- cgit v1.2.3 From 8483c06797b62dbddac800b555b2fa1c689dbb1c Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:01 -0800 Subject: ASoC: fsl_ssi: Cache pdev->dev pointer There should be no trouble to understand dev = pdev->dev. This can save some space to have more print info or save some wrapped lines. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 64 ++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 84d2f7ecb5e1..e903c92d5512 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1379,23 +1379,24 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, struct fsl_ssi *ssi, void __iomem *iomem) { struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; u32 dmas[4]; int ret; if (ssi->has_ipg_clk_name) - ssi->clk = devm_clk_get(&pdev->dev, "ipg"); + ssi->clk = devm_clk_get(dev, "ipg"); else - ssi->clk = devm_clk_get(&pdev->dev, NULL); + ssi->clk = devm_clk_get(dev, NULL); if (IS_ERR(ssi->clk)) { ret = PTR_ERR(ssi->clk); - dev_err(&pdev->dev, "could not get clock: %d\n", ret); + dev_err(dev, "could not get clock: %d\n", ret); return ret; } if (!ssi->has_ipg_clk_name) { ret = clk_prepare_enable(ssi->clk); if (ret) { - dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret); + dev_err(dev, "clk_prepare_enable failed: %d\n", ret); return ret; } } @@ -1403,9 +1404,9 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, /* For those SLAVE implementations, we ignore non-baudclk cases * and, instead, abandon MASTER mode that needs baud clock. */ - ssi->baudclk = devm_clk_get(&pdev->dev, "baud"); + ssi->baudclk = devm_clk_get(dev, "baud"); if (IS_ERR(ssi->baudclk)) - dev_dbg(&pdev->dev, "could not get baud clock: %ld\n", + dev_dbg(dev, "could not get baud clock: %ld\n", PTR_ERR(ssi->baudclk)); ssi->dma_params_tx.maxburst = ssi->dma_maxburst; @@ -1469,6 +1470,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) struct fsl_ssi *ssi; int ret = 0; struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; const struct of_device_id *of_id; const char *p, *sprop; const uint32_t *iprop; @@ -1477,17 +1479,16 @@ static int fsl_ssi_probe(struct platform_device *pdev) char name[64]; struct regmap_config regconfig = fsl_ssi_regconfig; - of_id = of_match_device(fsl_ssi_ids, &pdev->dev); + of_id = of_match_device(fsl_ssi_ids, dev); if (!of_id || !of_id->data) return -EINVAL; - ssi = devm_kzalloc(&pdev->dev, sizeof(*ssi), - GFP_KERNEL); + ssi = devm_kzalloc(dev, sizeof(*ssi), GFP_KERNEL); if (!ssi) return -ENOMEM; ssi->soc = of_id->data; - ssi->dev = &pdev->dev; + ssi->dev = dev; sprop = of_get_property(np, "fsl,mode", NULL); if (sprop) { @@ -1507,10 +1508,10 @@ static int fsl_ssi_probe(struct platform_device *pdev) memcpy(&ssi->cpu_dai_drv, &fsl_ssi_dai_template, sizeof(fsl_ssi_dai_template)); } - ssi->cpu_dai_drv.name = dev_name(&pdev->dev); + ssi->cpu_dai_drv.name = dev_name(dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - iomem = devm_ioremap_resource(&pdev->dev, res); + iomem = devm_ioremap_resource(dev, res); if (IS_ERR(iomem)) return PTR_ERR(iomem); ssi->ssi_phys = res->start; @@ -1528,21 +1529,20 @@ static int fsl_ssi_probe(struct platform_device *pdev) ret = of_property_match_string(np, "clock-names", "ipg"); if (ret < 0) { ssi->has_ipg_clk_name = false; - ssi->regs = devm_regmap_init_mmio(&pdev->dev, iomem, - ®config); + ssi->regs = devm_regmap_init_mmio(dev, iomem, ®config); } else { ssi->has_ipg_clk_name = true; - ssi->regs = devm_regmap_init_mmio_clk(&pdev->dev, - "ipg", iomem, ®config); + ssi->regs = devm_regmap_init_mmio_clk(dev, "ipg", iomem, + ®config); } if (IS_ERR(ssi->regs)) { - dev_err(&pdev->dev, "Failed to init register map\n"); + dev_err(dev, "Failed to init register map\n"); return PTR_ERR(ssi->regs); } ssi->irq = platform_get_irq(pdev, 0); if (ssi->irq < 0) { - dev_err(&pdev->dev, "no irq for node %s\n", pdev->name); + dev_err(dev, "no irq for node %s\n", pdev->name); return ssi->irq; } @@ -1605,7 +1605,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) break; } - dev_set_drvdata(&pdev->dev, ssi); + dev_set_drvdata(dev, ssi); if (ssi->soc->imx) { ret = fsl_ssi_imx_probe(pdev, ssi, iomem); @@ -1617,30 +1617,28 @@ static int fsl_ssi_probe(struct platform_device *pdev) mutex_init(&ssi->ac97_reg_lock); ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); if (ret) { - dev_err(&pdev->dev, "could not set AC'97 ops\n"); + dev_err(dev, "could not set AC'97 ops\n"); goto error_ac97_ops; } } - ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component, + ret = devm_snd_soc_register_component(dev, &fsl_ssi_component, &ssi->cpu_dai_drv, 1); if (ret) { - dev_err(&pdev->dev, "failed to register DAI: %d\n", ret); + dev_err(dev, "failed to register DAI: %d\n", ret); goto error_asoc_register; } if (ssi->use_dma) { - ret = devm_request_irq(&pdev->dev, ssi->irq, - fsl_ssi_isr, 0, dev_name(&pdev->dev), - ssi); + ret = devm_request_irq(dev, ssi->irq, fsl_ssi_isr, 0, + dev_name(dev), ssi); if (ret < 0) { - dev_err(&pdev->dev, "could not claim irq %u\n", - ssi->irq); + dev_err(dev, "could not claim irq %u\n", ssi->irq); goto error_asoc_register; } } - ret = fsl_ssi_debugfs_create(&ssi->dbg_stats, &pdev->dev); + ret = fsl_ssi_debugfs_create(&ssi->dbg_stats, dev); if (ret) goto error_asoc_register; @@ -1665,23 +1663,23 @@ static int fsl_ssi_probe(struct platform_device *pdev) snprintf(name, sizeof(name), "snd-soc-%s", sprop); make_lowercase(name); - ssi->pdev = platform_device_register_data(&pdev->dev, name, 0, NULL, 0); + ssi->pdev = platform_device_register_data(dev, name, 0, NULL, 0); if (IS_ERR(ssi->pdev)) { ret = PTR_ERR(ssi->pdev); - dev_err(&pdev->dev, "failed to register platform: %d\n", ret); + dev_err(dev, "failed to register platform: %d\n", ret); goto error_sound_card; } done: if (ssi->dai_fmt) - _fsl_ssi_set_dai_fmt(&pdev->dev, ssi, ssi->dai_fmt); + _fsl_ssi_set_dai_fmt(dev, ssi, ssi->dai_fmt); if (fsl_ssi_is_ac97(ssi)) { u32 ssi_idx; ret = of_property_read_u32(np, "cell-index", &ssi_idx); if (ret) { - dev_err(&pdev->dev, "cannot get SSI index property\n"); + dev_err(dev, "cannot get SSI index property\n"); goto error_sound_card; } @@ -1689,7 +1687,7 @@ done: "ac97-codec", ssi_idx, NULL, 0); if (IS_ERR(ssi->pdev)) { ret = PTR_ERR(ssi->pdev); - dev_err(&pdev->dev, + dev_err(dev, "failed to register AC97 codec platform: %d\n", ret); goto error_sound_card; -- cgit v1.2.3 From 7a8fceb74de407f65201f3eaaee35377c2b71dbb Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:02 -0800 Subject: ASoC: fsl_ssi: Refine all comments This patch refines the comments by: 1) Removing all out-of-date comments 2) Removing all not-so-useful comments 3) Unifying the styles of all comments 4) Shortening comments to be more conise 5) Adding comments to improve code readablity 6) Moving all register related comments to fsl_ssi.h 7) Adding comments to all register and field defines Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 383 ++++++++++++++++---------------------------- sound/soc/fsl/fsl_ssi.h | 67 +++++++- sound/soc/fsl/fsl_ssi_dbg.c | 12 +- 3 files changed, 208 insertions(+), 254 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index e903c92d5512..ff1827a074be 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -187,42 +187,48 @@ struct fsl_ssi_soc_data { /** * fsl_ssi: per-SSI private data * - * @reg: Pointer to the regmap registers + * @regs: Pointer to the regmap registers * @irq: IRQ of this SSI * @cpu_dai_drv: CPU DAI driver for this device * * @dai_fmt: DAI configuration this device is currently used with - * @i2s_mode: i2s and network mode configuration of the device. Is used to - * switch between normal and i2s/network mode - * mode depending on the number of channels + * @i2s_mode: I2S and Network mode configuration of SCR register * @use_dma: DMA is used or FIQ with stream filter - * @use_dual_fifo: DMA with support for both FIFOs used - * @fifo_deph: Depth of the SSI FIFOs - * @slot_width: width of each DAI slot - * @slots: number of slots - * @rxtx_reg_val: Specific register settings for receive/transmit configuration + * @use_dual_fifo: DMA with support for dual FIFO mode + * @has_ipg_clk_name: If "ipg" is in the clock name list of device tree + * @fifo_depth: Depth of the SSI FIFOs + * @slot_width: Width of each DAI slot + * @slots: Number of slots + * @rxtx_reg_val: Specific RX/TX register settings * - * @clk: SSI clock - * @baudclk: SSI baud clock for master mode + * @clk: Clock source to access register + * @baudclk: Clock source to generate bit and frame-sync clocks * @baudclk_streams: Active streams that are using baudclk * + * @regcache_sfcsr: Cache sfcsr register value during suspend and resume + * @regcache_sacnt: Cache sacnt register value during suspend and resume + * * @dma_params_tx: DMA transmit parameters * @dma_params_rx: DMA receive parameters * @ssi_phys: physical address of the SSI registers * * @fiq_params: FIQ stream filtering parameters * - * @pdev: Pointer to pdev used for deprecated fsl-ssi sound card + * @pdev: Pointer to pdev when using fsl-ssi as sound card (ppc only) + * TODO: Should be replaced with simple-sound-card * * @dbg_stats: Debugging statistics * * @soc: SoC specific data + * @dev: Pointer to &pdev->dev + * + * @fifo_watermark: The FIFO watermark setting. Notifies DMA when there are + * @fifo_watermark or fewer words in TX fifo or + * @fifo_watermark or more empty words in RX fifo. + * @dma_maxburst: Max number of words to transfer in one go. So far, + * this is always the same as fifo_watermark. * - * @fifo_watermark: the FIFO watermark setting. Notifies DMA when - * there are @fifo_watermark or fewer words in TX fifo or - * @fifo_watermark or more empty words in RX fifo. - * @dma_maxburst: max number of words to transfer in one go. So far, - * this is always the same as fifo_watermark. + * @ac97_reg_lock: Mutex lock to serialize AC97 register access operations */ struct fsl_ssi { struct regmap *regs; @@ -243,20 +249,15 @@ struct fsl_ssi { struct clk *baudclk; unsigned int baudclk_streams; - /* regcache for volatile regs */ u32 regcache_sfcsr; u32 regcache_sacnt; - /* DMA params */ struct snd_dmaengine_dai_dma_data dma_params_tx; struct snd_dmaengine_dai_dma_data dma_params_rx; dma_addr_t ssi_phys; - /* params for non-dma FIQ stream filtered mode */ struct imx_pcm_fiq_params fiq_params; - /* Used when using fsl-ssi as sound-card. This is only used by ppc and - * should be replaced with simple-sound-card. */ struct platform_device *pdev; struct fsl_ssi_dbg dbg_stats; @@ -271,19 +272,19 @@ struct fsl_ssi { }; /* - * imx51 and later SoCs have a slightly different IP that allows the - * SSI configuration while the SSI unit is running. - * - * More important, it is necessary on those SoCs to configure the - * sperate TX/RX DMA bits just before starting the stream - * (fsl_ssi_trigger). The SDMA unit has to be configured before fsl_ssi - * sends any DMA requests to the SDMA unit, otherwise it is not defined - * how the SDMA unit handles the DMA request. + * SoC specific data * - * SDMA units are present on devices starting at imx35 but the imx35 - * reference manual states that the DMA bits should not be changed - * while the SSI unit is running (SSIEN). So we support the necessary - * online configuration of fsl-ssi starting at imx51. + * Notes: + * 1) SSI in earlier SoCS has critical bits in control registers that + * cannot be changed after SSI starts running -- a software reset + * (set SSIEN to 0) is required to change their values. So adding + * an offline_config flag for these SoCs. + * 2) SDMA is available since imx35. However, imx35 does not support + * DMA bits changing when SSI is running, so set offline_config. + * 3) imx51 and later versions support register configurations when + * SSI is running (SSIEN); For these versions, DMA needs to be + * configured before SSI sends DMA request to avoid an undefined + * DMA request on the SDMA side. */ static struct fsl_ssi_soc_data fsl_ssi_mpc8610 = { @@ -342,17 +343,9 @@ static bool fsl_ssi_is_i2s_cbm_cfs(struct fsl_ssi *ssi) return (ssi->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) == SND_SOC_DAIFMT_CBM_CFS; } + /** - * fsl_ssi_isr: SSI interrupt handler - * - * Although it's possible to use the interrupt handler to send and receive - * data to/from the SSI, we use the DMA instead. Programming is more - * complicated, but the performance is much better. - * - * This interrupt handler is used only to gather statistics. - * - * @irq: IRQ of the SSI device - * @dev_id: pointer to the fsl_ssi structure for this SSI device + * Interrupt handler to gather states */ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) { @@ -361,10 +354,6 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) __be32 sisr; __be32 sisr2; - /* We got an interrupt, so read the status register to see what we - were interrupted for. We mask it with the Interrupt Enable register - so that we only check for events that we're interested in. - */ regmap_read(regs, CCSR_SSI_SISR, &sisr); sisr2 = sisr & ssi->soc->sisr_write_mask; @@ -377,8 +366,8 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) return IRQ_HANDLED; } -/* - * Enable/Disable all rx/tx config flags at once. +/** + * Enable or disable all rx/tx config flags at once */ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) { @@ -405,13 +394,8 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) } } -/* - * Clear RX or TX FIFO to remove samples from the previous - * stream session which may be still present in the FIFO and - * may introduce bad samples and/or channel slipping. - * - * Note: The SOR is not documented in recent IMX datasheet, but - * is described in IMX51 reference manual at section 56.3.3.15. +/** + * Clear remaining data in the FIFO to avoid dirty data or channel slipping */ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) { @@ -424,7 +408,7 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) } } -/* +/** * Calculate the bits that have to be disabled for the current stream that is * getting disabled. This keeps the bits enabled that are necessary for the * second stream to work if 'stream_active' is true. @@ -444,9 +428,8 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) ((vals_disable) & \ ((vals_disable) ^ ((vals_stream) * (u32)!!(stream_active)))) -/* - * Enable/Disable a ssi configuration. You have to pass either - * ssi->rxtx_reg_val.rx or tx as vals parameter. +/** + * Enable or disable SSI configuration. */ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, struct fsl_ssi_reg_val *vals) @@ -467,24 +450,28 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, else keep_active = 0; - /* Find the other direction values rx or tx which we do not want to - * modify */ + /* Get the opposite direction to keep its values untouched */ if (&ssi->rxtx_reg_val.rx == vals) avals = &ssi->rxtx_reg_val.tx; else avals = &ssi->rxtx_reg_val.rx; - /* If vals should be disabled, start with disabling the unit */ if (!enable) { + /* + * To keep the other stream safe, exclude shared bits between + * both streams, and get safe bits to disable current stream + */ u32 scr = fsl_ssi_disable_val(vals->scr, avals->scr, keep_active); + /* Safely disable SCR register for the stream */ regmap_update_bits(regs, CCSR_SSI_SCR, scr, 0); } /* - * We are running on a SoC which does not support online SSI - * reconfiguration, so we have to enable all necessary flags at once - * even if we do not use them later (capture and playback configuration) + * For cases where online configuration is not supported, + * 1) Enable all necessary bits of both streams when 1st stream starts + * even if the opposite stream will not start + * 2) Disable all remaining bits of both streams when last stream ends */ if (ssi->soc->offline_config) { if ((enable && !nr_active_streams) || @@ -494,10 +481,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, goto config_done; } - /* - * Configure single direction units while the SSI unit is running - * (online configuration) - */ + /* Online configure single direction while SSI is running */ if (enable) { fsl_ssi_fifo_clear(ssi, vals->scr & CCSR_SSI_SCR_RE); @@ -510,15 +494,9 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, u32 stcr; /* - * Disabling the necessary flags for one of rx/tx while the - * other stream is active is a little bit more difficult. We - * have to disable only those flags that differ between both - * streams (rx XOR tx) and that are set in the stream that is - * disabled now. Otherwise we could alter flags of the other - * stream + * To keep the other stream safe, exclude shared bits between + * both streams, and get safe bits to disable current stream */ - - /* These assignments are simply vals without bits set in avals*/ sier = fsl_ssi_disable_val(vals->sier, avals->sier, keep_active); srcr = fsl_ssi_disable_val(vals->srcr, avals->srcr, @@ -526,6 +504,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, stcr = fsl_ssi_disable_val(vals->stcr, avals->stcr, keep_active); + /* Safely disable other control registers for the stream */ regmap_update_bits(regs, CCSR_SSI_SRCR, srcr, 0); regmap_update_bits(regs, CCSR_SSI_STCR, stcr, 0); regmap_update_bits(regs, CCSR_SSI_SIER, sier, 0); @@ -534,26 +513,21 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, config_done: /* Enabling of subunits is done after configuration */ if (enable) { + /* + * Start DMA before setting TE to avoid FIFO underrun + * which may cause a channel slip or a channel swap + * + * TODO: FIQ cases might also need this upon testing + */ if (ssi->use_dma && (vals->scr & CCSR_SSI_SCR_TE)) { - /* - * Be sure the Tx FIFO is filled when TE is set. - * Otherwise, there are some chances to start the - * playback with some void samples inserted first, - * generating a channel slip. - * - * First, SSIEN must be set, to let the FIFO be filled. - * - * Notes: - * - Limit this fix to the DMA case until FIQ cases can - * be tested. - * - Limit the length of the busy loop to not lock the - * system too long, even if 1-2 loops are sufficient - * in general. - */ int i; int max_loop = 100; + + /* Enable SSI first to send TX DMA request */ regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, CCSR_SSI_SCR_SSIEN); + + /* Busy wait until TX FIFO not empty -- DMA working */ for (i = 0; i < max_loop; i++) { u32 sfcsr; regmap_read(regs, CCSR_SSI_SFCSR, &sfcsr); @@ -565,6 +539,7 @@ config_done: "Timeout waiting TX FIFO filling\n"); } } + /* Enable all remaining bits */ regmap_update_bits(regs, CCSR_SSI_SCR, vals->scr, vals->scr); } } @@ -581,20 +556,9 @@ static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi *ssi) /* no SACC{ST,EN,DIS} regs on imx21-class SSI */ if (!ssi->soc->imx21regs) { - /* - * Note that these below aren't just normal registers. - * They are a way to disable or enable bits in SACCST - * register: - * - writing a '1' bit at some position in SACCEN sets the - * relevant bit in SACCST, - * - writing a '1' bit at some position in SACCDIS unsets - * the relevant bit in SACCST register. - * - * The two writes below first disable all channels slots, - * then enable just slots 3 & 4 ("PCM Playback Left Channel" - * and "PCM Playback Right Channel"). - */ + /* Disable all channel slots */ regmap_write(regs, CCSR_SSI_SACCDIS, 0xff); + /* Enable slots 3 & 4 -- PCM Playback Left & Right channels */ regmap_write(regs, CCSR_SSI_SACCEN, 0x300); } } @@ -602,23 +566,11 @@ static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi *ssi) static void fsl_ssi_tx_config(struct fsl_ssi *ssi, bool enable) { /* - * Why are we setting up SACCST everytime we are starting a - * playback? - * Some CODECs (like VT1613 CODEC on UDOO board) like to - * (sometimes) set extra bits in their SLOTREQ requests. - * When a bit is set in a SLOTREQ request then SSI sets the - * relevant bit in SACCST automatically (it is enough if a bit was - * set in a SLOTREQ just once, bits in SACCST are 'sticky'). - * If an extra slot gets enabled that's a disaster for playback - * because some of normal left or right channel samples are - * redirected instead to this extra slot. + * SACCST might be modified via AC Link by a CODEC if it sends + * extra bits in their SLOTREQ requests, which'll accidentally + * send valid data to slots other than normal playback slots. * - * A workaround implemented in fsl-asoc-card of setting an - * appropriate CODEC register so that slots 3 & 4 (the normal - * stereo playback slots) are used for S/PDIF seems to mostly fix - * this issue on the UDOO board but since this CODEC is so - * untrustworthy let's play safe here and make sure that no extra - * slots are enabled every time a playback is started. + * To be safe, configure SACCST right before TX starts. */ if (enable && fsl_ssi_is_ac97(ssi)) fsl_ssi_tx_ac97_saccst_setup(ssi); @@ -626,10 +578,8 @@ static void fsl_ssi_tx_config(struct fsl_ssi *ssi, bool enable) fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.tx); } -/* - * Setup rx/tx register values used to enable/disable the streams. These will - * be used later in fsl_ssi_config to setup the streams without the need to - * check for all different SSI modes. +/** + * Cache critical bits of SIER, SRCR, STCR and SCR to later set them safely */ static void fsl_ssi_setup_reg_vals(struct fsl_ssi *ssi) { @@ -642,6 +592,7 @@ static void fsl_ssi_setup_reg_vals(struct fsl_ssi *ssi) reg->tx.stcr = CCSR_SSI_STCR_TFEN0; reg->tx.scr = 0; + /* AC97 has already enabled SSIEN, RE and TE, so ignore them */ if (!fsl_ssi_is_ac97(ssi)) { reg->rx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_RE; reg->tx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE; @@ -663,24 +614,17 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) { struct regmap *regs = ssi->regs; - /* - * Setup the clock control register - */ + /* Setup the clock control register */ regmap_write(regs, CCSR_SSI_STCCR, CCSR_SSI_SxCCR_WL(17) | CCSR_SSI_SxCCR_DC(13)); regmap_write(regs, CCSR_SSI_SRCCR, CCSR_SSI_SxCCR_WL(17) | CCSR_SSI_SxCCR_DC(13)); - /* - * Enable AC97 mode and startup the SSI - */ + /* Enable AC97 mode and startup the SSI */ regmap_write(regs, CCSR_SSI_SACNT, CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV); - /* - * Enable SSI, Transmit and Receive. AC97 has to communicate with the - * codec before a stream is started. - */ + /* AC97 has to communicate with codec before starting a stream */ regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE | CCSR_SSI_SCR_RE, CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE | CCSR_SSI_SCR_RE); @@ -688,14 +632,6 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_WAIT(3)); } -/** - * fsl_ssi_startup: create a new substream - * - * This is the first function called when a stream is opened. - * - * If this is the first stream open, then grab the IRQ and program most of - * the SSI registers. - */ static int fsl_ssi_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -707,7 +643,8 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, if (ret) return ret; - /* When using dual fifo mode, it is safer to ensure an even period + /* + * When using dual fifo mode, it is safer to ensure an even period * size. If appearing to an odd number while DMA always starts its * task from fifo0, fifo1 would be neglected at the end of each * period. But SSI would still access fifo1 with an invalid data. @@ -719,10 +656,6 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, return 0; } -/** - * fsl_ssi_shutdown: shutdown the SSI - * - */ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -734,7 +667,7 @@ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, } /** - * fsl_ssi_set_bclk - configure Digital Audio Interface bit clock + * Configure Digital Audio Interface bit clock * * Note: This function can be only called when using SSI as DAI master * @@ -851,17 +784,15 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, } /** - * fsl_ssi_hw_params - program the sample size - * - * Most of the SSI registers have been programmed in the startup function, - * but the word length must be programmed here. Unfortunately, programming - * the SxCCR.WL bits requires the SSI to be temporarily disabled. This can - * cause a problem with supporting simultaneous playback and capture. If - * the SSI is already playing a stream, then that stream may be temporarily - * stopped when you start capture. + * Configure SSI based on PCM hardware parameters * - * Note: The SxCCR.DC and SxCCR.PM bits are only used if the SSI is the - * clock master. + * Notes: + * 1) SxCCR.WL bits are critical bits that require SSI to be temporarily + * disabled on offline_config SoCs. Even for online configurable SoCs + * running in synchronous mode (both TX and RX use STCCR), it is not + * safe to re-configure them when both two streams start running. + * 2) SxCCR.PM, SxCCR.DIV2 and SxCCR.PSR bits will be configured in the + * fsl_ssi_set_bclk() if SSI is the DAI clock master. */ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *cpu_dai) @@ -879,8 +810,10 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, enabled = scr_val & CCSR_SSI_SCR_SSIEN; /* - * If we're in synchronous mode, and the SSI is already enabled, - * then STCCR is already set properly. + * SSI is properly configured if it is enabled and running in + * the synchronous mode; Note that AC97 mode is an exception + * that should set separate configurations for STCCR and SRCCR + * despite running in the synchronous mode. */ if (enabled && ssi->cpu_dai_drv.symmetric_rates) return 0; @@ -902,10 +835,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, if (!fsl_ssi_is_ac97(ssi)) { u8 i2smode; - /* - * Switch to normal net mode in order to have a frame sync - * signal every 32 bits instead of 16 bits - */ + /* Normal + Network mode to send 16-bit data in 32-bit frames */ if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) i2smode = CCSR_SSI_SCR_I2S_MODE_NORMAL | CCSR_SSI_SCR_NET; @@ -917,16 +847,6 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, channels == 1 ? 0 : i2smode); } - /* - * FIXME: The documentation says that SxCCR[WL] should not be - * modified while the SSI is enabled. The only time this can - * happen is if we're trying to do simultaneous playback and - * capture in asynchronous mode. Unfortunately, I have been enable - * to get that to work at all on the P1022DS. Therefore, we don't - * bother to disable/enable the SSI when setting SxCCR[WL], because - * the SSI will stop anyway. Maybe one day, this will get fixed. - */ - /* In synchronous mode, the SSI uses STCCR for capture */ if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || ssi->cpu_dai_drv.symmetric_rates) @@ -972,6 +892,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, regmap_read(regs, CCSR_SSI_SCR, &scr); scr &= ~(CCSR_SSI_SCR_SYN | CCSR_SSI_SCR_I2S_MODE_MASK); + /* Synchronize frame sync clock for TE to avoid data slipping */ scr |= CCSR_SSI_SCR_SYNC_TX_FS; mask = CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TFDIR | CCSR_SSI_STCR_TXDIR | @@ -982,6 +903,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, stcr &= ~mask; srcr &= ~mask; + /* Use Network mode as default */ ssi->i2s_mode = CCSR_SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: @@ -1022,6 +944,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, CCSR_SSI_STCR_TXBIT0; break; case SND_SOC_DAIFMT_AC97: + /* Data on falling edge of bclk, frame high, 1clk before data */ ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_NORMAL; break; default: @@ -1054,13 +977,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, /* DAI clock master masks */ switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: + /* Output bit and frame sync clocks */ strcr |= CCSR_SSI_STCR_TFDIR | CCSR_SSI_STCR_TXDIR; scr |= CCSR_SSI_SCR_SYS_CLK_EN; break; case SND_SOC_DAIFMT_CBM_CFM: + /* Input bit or frame sync clocks */ scr &= ~CCSR_SSI_SCR_SYS_CLK_EN; break; case SND_SOC_DAIFMT_CBM_CFS: + /* Input bit clock but output frame sync clock */ strcr &= ~CCSR_SSI_STCR_TXDIR; strcr |= CCSR_SSI_STCR_TFDIR; scr &= ~CCSR_SSI_SCR_SYS_CLK_EN; @@ -1073,8 +999,8 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, stcr |= strcr; srcr |= strcr; + /* Set SYN mode and clear RXDIR bit when using SYN or AC97 mode */ if (ssi->cpu_dai_drv.symmetric_rates || fsl_ssi_is_ac97(ssi)) { - /* Need to clear RXDIR when using SYNC or AC97 mode */ srcr &= ~CCSR_SSI_SRCR_RXDIR; scr |= CCSR_SSI_SCR_SYN; } @@ -1106,12 +1032,13 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, } /** - * fsl_ssi_set_dai_fmt - configure Digital Audio Interface Format. + * Configure Digital Audio Interface (DAI) Format */ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) { struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + /* AC97 configured DAIFMT earlier in the probe() */ if (fsl_ssi_is_ac97(ssi)) return 0; @@ -1119,9 +1046,7 @@ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) } /** - * fsl_ssi_set_dai_tdm_slot - set TDM slot number - * - * Note: This function can be only called when using SSI as DAI master + * Set TDM slot number and slot width */ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, u32 rx_mask, int slots, int slot_width) @@ -1149,17 +1074,17 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, regmap_update_bits(regs, CCSR_SSI_SRCCR, CCSR_SSI_SxCCR_DC_MASK, CCSR_SSI_SxCCR_DC(slots)); - /* The register SxMSKs needs SSI to provide essential clock due to - * hardware design. So we here temporarily enable SSI to set them. - */ + /* Save SSIEN bit of the SCR register */ regmap_read(regs, CCSR_SSI_SCR, &val); val &= CCSR_SSI_SCR_SSIEN; + /* Temporarily enable SSI to allow SxMSKs to be configurable */ regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, CCSR_SSI_SCR_SSIEN); regmap_write(regs, CCSR_SSI_STMSK, ~tx_mask); regmap_write(regs, CCSR_SSI_SRMSK, ~rx_mask); + /* Restore the value of SSIEN bit */ regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, val); ssi->slot_width = slot_width; @@ -1169,10 +1094,7 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, } /** - * fsl_ssi_trigger: start and stop the DMA transfer. - * - * This function is called by ALSA to start, stop, pause, and resume the DMA - * transfer of data. + * Start or stop SSI and corresponding DMA transaction. * * The DMA channel is in external master start and pause mode, which * means the SSI completely controls the flow of data. @@ -1207,6 +1129,7 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, return -EINVAL; } + /* Clear corresponding FIFO */ if (fsl_ssi_is_ac97(ssi)) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_TX_CLR); @@ -1239,7 +1162,6 @@ static const struct snd_soc_dai_ops fsl_ssi_dai_ops = { .trigger = fsl_ssi_trigger, }; -/* Template for the CPU dai driver structure */ static struct snd_soc_dai_driver fsl_ssi_dai_template = { .probe = fsl_ssi_dai_probe, .playback = { @@ -1383,6 +1305,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, u32 dmas[4]; int ret; + /* Backward compatible for a DT without ipg clock name assigned */ if (ssi->has_ipg_clk_name) ssi->clk = devm_clk_get(dev, "ipg"); else @@ -1393,6 +1316,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, return ret; } + /* Enable the clock since regmap will not handle it in this case */ if (!ssi->has_ipg_clk_name) { ret = clk_prepare_enable(ssi->clk); if (ret) { @@ -1401,9 +1325,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, } } - /* For those SLAVE implementations, we ignore non-baudclk cases - * and, instead, abandon MASTER mode that needs baud clock. - */ + /* Do not error out for slave cases that live without a baud clock */ ssi->baudclk = devm_clk_get(dev, "baud"); if (IS_ERR(ssi->baudclk)) dev_dbg(dev, "could not get baud clock: %ld\n", @@ -1414,25 +1336,23 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, ssi->dma_params_tx.addr = ssi->ssi_phys + CCSR_SSI_STX0; ssi->dma_params_rx.addr = ssi->ssi_phys + CCSR_SSI_SRX0; + /* Set to dual FIFO mode according to the SDMA sciprt */ ret = of_property_read_u32_array(np, "dmas", dmas, 4); if (ssi->use_dma && !ret && dmas[2] == IMX_DMATYPE_SSI_DUAL) { ssi->use_dual_fifo = true; - /* When using dual fifo mode, we need to keep watermark - * as even numbers due to dma script limitation. + /* + * Use even numbers to avoid channel swap due to SDMA + * script design */ ssi->dma_params_tx.maxburst &= ~0x1; ssi->dma_params_rx.maxburst &= ~0x1; } if (!ssi->use_dma) { - /* - * Some boards use an incompatible codec. To get it - * working, we are using imx-fiq-pcm-audio, that - * can handle those codecs. DMA is not possible in this - * situation. + * Some boards use an incompatible codec. Use imx-fiq-pcm-audio + * to get it working, as DMA is not possible in this situation. */ - ssi->fiq_params.irq = ssi->irq; ssi->fiq_params.base = iomem; ssi->fiq_params.dma_params_rx = &ssi->dma_params_rx; @@ -1490,12 +1410,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) ssi->soc = of_id->data; ssi->dev = dev; + /* Check if being used in AC97 mode */ sprop = of_get_property(np, "fsl,mode", NULL); if (sprop) { if (!strcmp(sprop, "ac97-slave")) ssi->dai_fmt = SND_SOC_DAIFMT_AC97; } + /* Select DMA or FIQ */ ssi->use_dma = !of_property_read_bool(np, "fsl,fiq-stream-filter"); if (fsl_ssi_is_ac97(ssi)) { @@ -1504,7 +1426,6 @@ static int fsl_ssi_probe(struct platform_device *pdev) fsl_ac97_data = ssi; } else { - /* Initialize this copy of the CPU DAI driver structure */ memcpy(&ssi->cpu_dai_drv, &fsl_ssi_dai_template, sizeof(fsl_ssi_dai_template)); } @@ -1517,10 +1438,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) ssi->ssi_phys = res->start; if (ssi->soc->imx21regs) { - /* - * According to datasheet imx21-class SSI - * don't have SACC{ST,EN,DIS} regs. - */ + /* No SACC{ST,EN,DIS} regs in imx21-class SSI */ regconfig.max_register = CCSR_SSI_SRMSK; regconfig.num_reg_defaults_raw = CCSR_SSI_SRMSK / sizeof(uint32_t) + 1; @@ -1546,7 +1464,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) return ssi->irq; } - /* Are the RX and the TX clocks locked? */ + /* Set software limitations for synchronous mode */ if (!of_find_property(np, "fsl,ssi-asynchronous", NULL)) { if (!fsl_ssi_is_ac97(ssi)) { ssi->cpu_dai_drv.symmetric_rates = 1; @@ -1556,50 +1474,35 @@ static int fsl_ssi_probe(struct platform_device *pdev) ssi->cpu_dai_drv.symmetric_channels = 1; } - /* Determine the FIFO depth. */ + /* Fetch FIFO depth; Set to 8 for older DT without this property */ iprop = of_get_property(np, "fsl,fifo-depth", NULL); if (iprop) ssi->fifo_depth = be32_to_cpup(iprop); else - /* Older 8610 DTs didn't have the fifo-depth property */ ssi->fifo_depth = 8; /* - * Set the watermark for transmit FIFO 0 and receive FIFO 0. We don't - * use FIFO 1 but set the watermark appropriately nontheless. - * We program the transmit water to signal a DMA transfer - * if there are N elements left in the FIFO. For chips with 15-deep - * FIFOs, set watermark to 8. This allows the SSI to operate at a - * high data rate without channel slipping. Behavior is unchanged - * for the older chips with a fifo depth of only 8. A value of 4 - * might be appropriate for the older chips, but is left at - * fifo_depth-2 until sombody has a chance to test. + * Configure TX and RX DMA watermarks -- when to send a DMA request * - * We set the watermark on the same level as the DMA burstsize. For - * fiq it is probably better to use the biggest possible watermark - * size. + * Values should be tested to avoid FIFO under/over run. Set maxburst + * to fifo_watermark to maxiumize DMA transaction to reduce overhead. */ switch (ssi->fifo_depth) { case 15: /* - * 2 samples is not enough when running at high data - * rates (like 48kHz @ 16 bits/channel, 16 channels) - * 8 seems to split things evenly and leave enough time - * for the DMA to fill the FIFO before it's over/under - * run. + * Set to 8 as a balanced configuration -- When TX FIFO has 8 + * empty slots, send a DMA request to fill these 8 slots. The + * remaining 7 slots should be able to allow DMA to finish the + * transaction before TX FIFO underruns; Same applies to RX. + * + * Tested with cases running at 48kHz @ 16 bits x 16 channels */ ssi->fifo_watermark = 8; ssi->dma_maxburst = 8; break; case 8: default: - /* - * maintain old behavior for older chips. - * Keeping it the same because I don't have an older - * board to test with. - * I suspect this could be changed to be something to - * leave some more space in the fifo. - */ + /* Safely use old watermark configurations for older chips */ ssi->fifo_watermark = ssi->fifo_depth - 2; ssi->dma_maxburst = ssi->fifo_depth - 2; break; @@ -1642,18 +1545,14 @@ static int fsl_ssi_probe(struct platform_device *pdev) if (ret) goto error_asoc_register; - /* - * If codec-handle property is missing from SSI node, we assume - * that the machine driver uses new binding which does not require - * SSI driver to trigger machine driver's probe. - */ + /* Bypass it if using newer DT bindings of ASoC machine drivers */ if (!of_get_property(np, "codec-handle", NULL)) goto done; - /* Trigger the machine driver's probe function. The platform driver - * name of the machine driver is taken from /compatible property of the - * device tree. We also pass the address of the CPU DAI driver - * structure. + /* + * Backward compatible for older bindings by manually triggering the + * machine driver's probe(). Use /compatible property, including the + * address of CPU DAI driver structure, as the name of machine driver. */ sprop = of_get_property(of_find_node_by_path("/"), "compatible", NULL); /* Sometimes the compatible name has a "fsl," prefix, so we strip it. */ diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index 506510540d0a..1ad3bde7f065 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -1,5 +1,5 @@ /* - * fsl_ssi.h - ALSA SSI interface for the Freescale MPC8610 SoC + * fsl_ssi.h - ALSA SSI interface for the Freescale MPC8610 and i.MX SoC * * Author: Timur Tabi * @@ -12,31 +12,75 @@ #ifndef _MPC8610_I2S_H #define _MPC8610_I2S_H -/* SSI registers */ +/* -- SSI Register Map -- */ + +/* SSI Transmit Data Register 0 */ #define CCSR_SSI_STX0 0x00 +/* SSI Transmit Data Register 1 */ #define CCSR_SSI_STX1 0x04 +/* SSI Receive Data Register 0 */ #define CCSR_SSI_SRX0 0x08 +/* SSI Receive Data Register 1 */ #define CCSR_SSI_SRX1 0x0c +/* SSI Control Register */ #define CCSR_SSI_SCR 0x10 +/* SSI Interrupt Status Register */ #define CCSR_SSI_SISR 0x14 +/* SSI Interrupt Enable Register */ #define CCSR_SSI_SIER 0x18 +/* SSI Transmit Configuration Register */ #define CCSR_SSI_STCR 0x1c +/* SSI Receive Configuration Register */ #define CCSR_SSI_SRCR 0x20 +/* SSI Transmit Clock Control Register */ #define CCSR_SSI_STCCR 0x24 +/* SSI Receive Clock Control Register */ #define CCSR_SSI_SRCCR 0x28 +/* SSI FIFO Control/Status Register */ #define CCSR_SSI_SFCSR 0x2c +/* + * SSI Test Register (Intended for debugging purposes only) + * + * Note: STR is not documented in recent IMX datasheet, but + * is described in IMX51 reference manual at section 56.3.3.14 + */ #define CCSR_SSI_STR 0x30 +/* + * SSI Option Register (Intended for internal use only) + * + * Note: SOR is not documented in recent IMX datasheet, but + * is described in IMX51 reference manual at section 56.3.3.15 + */ #define CCSR_SSI_SOR 0x34 +/* SSI AC97 Control Register */ #define CCSR_SSI_SACNT 0x38 +/* SSI AC97 Command Address Register */ #define CCSR_SSI_SACADD 0x3c +/* SSI AC97 Command Data Register */ #define CCSR_SSI_SACDAT 0x40 +/* SSI AC97 Tag Register */ #define CCSR_SSI_SATAG 0x44 +/* SSI Transmit Time Slot Mask Register */ #define CCSR_SSI_STMSK 0x48 +/* SSI Receive Time Slot Mask Register */ #define CCSR_SSI_SRMSK 0x4c +/* + * SSI AC97 Channel Status Register + * + * The status could be changed by: + * 1) Writing a '1' bit at some position in SACCEN sets relevant bit in SACCST + * 2) Writing a '1' bit at some position in SACCDIS unsets the relevant bit + * 3) Receivng a '1' in SLOTREQ bit from external CODEC via AC Link + */ #define CCSR_SSI_SACCST 0x50 +/* SSI AC97 Channel Enable Register -- Set bits in SACCST */ #define CCSR_SSI_SACCEN 0x54 +/* SSI AC97 Channel Disable Register -- Clear bits in SACCST */ #define CCSR_SSI_SACCDIS 0x58 +/* -- SSI Register Field Maps -- */ + +/* SSI Control Register -- CCSR_SSI_SCR 0x10 */ #define CCSR_SSI_SCR_SYNC_TX_FS 0x00001000 #define CCSR_SSI_SCR_RFR_CLK_DIS 0x00000800 #define CCSR_SSI_SCR_TFR_CLK_DIS 0x00000400 @@ -52,6 +96,7 @@ #define CCSR_SSI_SCR_TE 0x00000002 #define CCSR_SSI_SCR_SSIEN 0x00000001 +/* SSI Interrupt Status Register -- CCSR_SSI_SISR 0x14 */ #define CCSR_SSI_SISR_RFRC 0x01000000 #define CCSR_SSI_SISR_TFRC 0x00800000 #define CCSR_SSI_SISR_CMDAU 0x00040000 @@ -74,6 +119,7 @@ #define CCSR_SSI_SISR_TFE1 0x00000002 #define CCSR_SSI_SISR_TFE0 0x00000001 +/* SSI Interrupt Enable Register -- CCSR_SSI_SIER 0x18 */ #define CCSR_SSI_SIER_RFRC_EN 0x01000000 #define CCSR_SSI_SIER_TFRC_EN 0x00800000 #define CCSR_SSI_SIER_RDMAE 0x00400000 @@ -100,6 +146,7 @@ #define CCSR_SSI_SIER_TFE1_EN 0x00000002 #define CCSR_SSI_SIER_TFE0_EN 0x00000001 +/* SSI Transmit Configuration Register -- CCSR_SSI_STCR 0x1C */ #define CCSR_SSI_STCR_TXBIT0 0x00000200 #define CCSR_SSI_STCR_TFEN1 0x00000100 #define CCSR_SSI_STCR_TFEN0 0x00000080 @@ -111,6 +158,7 @@ #define CCSR_SSI_STCR_TFSL 0x00000002 #define CCSR_SSI_STCR_TEFS 0x00000001 +/* SSI Receive Configuration Register -- CCSR_SSI_SRCR 0x20 */ #define CCSR_SSI_SRCR_RXEXT 0x00000400 #define CCSR_SSI_SRCR_RXBIT0 0x00000200 #define CCSR_SSI_SRCR_RFEN1 0x00000100 @@ -123,7 +171,10 @@ #define CCSR_SSI_SRCR_RFSL 0x00000002 #define CCSR_SSI_SRCR_REFS 0x00000001 -/* STCCR and SRCCR */ +/* + * SSI Transmit Clock Control Register -- CCSR_SSI_STCCR 0x24 + * SSI Receive Clock Control Register -- CCSR_SSI_SRCCR 0x28 + */ #define CCSR_SSI_SxCCR_DIV2_SHIFT 18 #define CCSR_SSI_SxCCR_DIV2 0x00040000 #define CCSR_SSI_SxCCR_PSR_SHIFT 17 @@ -142,9 +193,10 @@ ((((x) - 1) << CCSR_SSI_SxCCR_PM_SHIFT) & CCSR_SSI_SxCCR_PM_MASK) /* - * The xFCNT bits are read-only, and the xFWM bits are read/write. Use the - * CCSR_SSI_SFCSR_xFCNTy() macros to read the FIFO counters, and use the - * CCSR_SSI_SFCSR_xFWMy() macros to set the watermarks. + * SSI FIFO Control/Status Register -- CCSR_SSI_SFCSR 0x2c + * + * Tx or Rx FIFO Counter -- CCSR_SSI_SFCSR_xFCNTy Read-Only + * Tx or Rx FIFO Watermarks -- CCSR_SSI_SFCSR_xFWMy Read/Write */ #define CCSR_SSI_SFCSR_RFCNT1_SHIFT 28 #define CCSR_SSI_SFCSR_RFCNT1_MASK 0xF0000000 @@ -179,6 +231,7 @@ #define CCSR_SSI_SFCSR_TFWM0(x) \ (((x) << CCSR_SSI_SFCSR_TFWM0_SHIFT) & CCSR_SSI_SFCSR_TFWM0_MASK) +/* SSI Test Register -- CCSR_SSI_STR 0x30 */ #define CCSR_SSI_STR_TEST 0x00008000 #define CCSR_SSI_STR_RCK2TCK 0x00004000 #define CCSR_SSI_STR_RFS2TFS 0x00002000 @@ -188,6 +241,7 @@ #define CCSR_SSI_STR_TFS2RFS 0x00000020 #define CCSR_SSI_STR_TXSTATE(x) ((x) & 0x1F) +/* SSI Option Register -- CCSR_SSI_SOR 0x34 */ #define CCSR_SSI_SOR_CLKOFF 0x00000040 #define CCSR_SSI_SOR_RX_CLR 0x00000020 #define CCSR_SSI_SOR_TX_CLR 0x00000010 @@ -197,6 +251,7 @@ #define CCSR_SSI_SOR_WAIT(x) (((x) & 3) << CCSR_SSI_SOR_WAIT_SHIFT) #define CCSR_SSI_SOR_SYNRST 0x00000001 +/* SSI AC97 Control Register -- CCSR_SSI_SACNT 0x38 */ #define CCSR_SSI_SACNT_FRDIV(x) (((x) & 0x3f) << 5) #define CCSR_SSI_SACNT_WR 0x00000010 #define CCSR_SSI_SACNT_RD 0x00000008 diff --git a/sound/soc/fsl/fsl_ssi_dbg.c b/sound/soc/fsl/fsl_ssi_dbg.c index 5469ffbc0253..88d9e8e08905 100644 --- a/sound/soc/fsl/fsl_ssi_dbg.c +++ b/sound/soc/fsl/fsl_ssi_dbg.c @@ -82,9 +82,10 @@ void fsl_ssi_dbg_isr(struct fsl_ssi_dbg *dbg, u32 sisr) dbg->stats.tfe0++; } -/* Show the statistics of a flag only if its interrupt is enabled. The - * compiler will optimze this code to a no-op if the interrupt is not - * enabled. +/** + * Show the statistics of a flag only if its interrupt is enabled + * + * Compilers will optimize it to a no-op if the interrupt is disabled */ #define SIER_SHOW(flag, name) \ do { \ @@ -94,10 +95,9 @@ void fsl_ssi_dbg_isr(struct fsl_ssi_dbg *dbg, u32 sisr) /** - * fsl_sysfs_ssi_show: display SSI statistics + * Display the statistics for the current SSI device * - * Display the statistics for the current SSI device. To avoid confusion, - * we only show those counts that are enabled. + * To avoid confusion, only show those counts that are enabled */ static int fsl_ssi_stats_show(struct seq_file *s, void *unused) { -- cgit v1.2.3 From a818aa5f967ba60522ee0ad181a0c5a96b65d999 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:03 -0800 Subject: ASoC: fsl_ssi: Rename registers and fields macros This patch renames CCSR_SSI_xxx to REG_SSI_xxx and SSI_xxx_yyy style. It also slightly reduces the length of them to save some space. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 374 +++++++++++++++++++++---------------------- sound/soc/fsl/fsl_ssi.h | 376 ++++++++++++++++++++++---------------------- sound/soc/fsl/fsl_ssi_dbg.c | 44 +++--- 3 files changed, 397 insertions(+), 397 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index ff1827a074be..24d96956b53a 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -78,12 +78,12 @@ SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_LE) #endif -#define FSLSSI_SIER_DBG_RX_FLAGS (CCSR_SSI_SIER_RFF0_EN | \ - CCSR_SSI_SIER_RLS_EN | CCSR_SSI_SIER_RFS_EN | \ - CCSR_SSI_SIER_ROE0_EN | CCSR_SSI_SIER_RFRC_EN) -#define FSLSSI_SIER_DBG_TX_FLAGS (CCSR_SSI_SIER_TFE0_EN | \ - CCSR_SSI_SIER_TLS_EN | CCSR_SSI_SIER_TFS_EN | \ - CCSR_SSI_SIER_TUE0_EN | CCSR_SSI_SIER_TFRC_EN) +#define FSLSSI_SIER_DBG_RX_FLAGS (SSI_SIER_RFF0_EN | \ + SSI_SIER_RLS_EN | SSI_SIER_RFS_EN | \ + SSI_SIER_ROE0_EN | SSI_SIER_RFRC_EN) +#define FSLSSI_SIER_DBG_TX_FLAGS (SSI_SIER_TFE0_EN | \ + SSI_SIER_TLS_EN | SSI_SIER_TFS_EN | \ + SSI_SIER_TUE0_EN | SSI_SIER_TFRC_EN) enum fsl_ssi_type { FSL_SSI_MCP8610, @@ -107,8 +107,8 @@ struct fsl_ssi_rxtx_reg_val { static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg) { switch (reg) { - case CCSR_SSI_SACCEN: - case CCSR_SSI_SACCDIS: + case REG_SSI_SACCEN: + case REG_SSI_SACCDIS: return false; default: return true; @@ -118,18 +118,18 @@ static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg) static bool fsl_ssi_volatile_reg(struct device *dev, unsigned int reg) { switch (reg) { - case CCSR_SSI_STX0: - case CCSR_SSI_STX1: - case CCSR_SSI_SRX0: - case CCSR_SSI_SRX1: - case CCSR_SSI_SISR: - case CCSR_SSI_SFCSR: - case CCSR_SSI_SACNT: - case CCSR_SSI_SACADD: - case CCSR_SSI_SACDAT: - case CCSR_SSI_SATAG: - case CCSR_SSI_SACCST: - case CCSR_SSI_SOR: + case REG_SSI_STX0: + case REG_SSI_STX1: + case REG_SSI_SRX0: + case REG_SSI_SRX1: + case REG_SSI_SISR: + case REG_SSI_SFCSR: + case REG_SSI_SACNT: + case REG_SSI_SACADD: + case REG_SSI_SACDAT: + case REG_SSI_SATAG: + case REG_SSI_SACCST: + case REG_SSI_SOR: return true; default: return false; @@ -139,12 +139,12 @@ static bool fsl_ssi_volatile_reg(struct device *dev, unsigned int reg) static bool fsl_ssi_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { - case CCSR_SSI_SRX0: - case CCSR_SSI_SRX1: - case CCSR_SSI_SISR: - case CCSR_SSI_SACADD: - case CCSR_SSI_SACDAT: - case CCSR_SSI_SATAG: + case REG_SSI_SRX0: + case REG_SSI_SRX1: + case REG_SSI_SISR: + case REG_SSI_SACADD: + case REG_SSI_SACDAT: + case REG_SSI_SATAG: return true; default: return false; @@ -154,9 +154,9 @@ static bool fsl_ssi_precious_reg(struct device *dev, unsigned int reg) static bool fsl_ssi_writeable_reg(struct device *dev, unsigned int reg) { switch (reg) { - case CCSR_SSI_SRX0: - case CCSR_SSI_SRX1: - case CCSR_SSI_SACCST: + case REG_SSI_SRX0: + case REG_SSI_SRX1: + case REG_SSI_SACCST: return false; default: return true; @@ -164,12 +164,12 @@ static bool fsl_ssi_writeable_reg(struct device *dev, unsigned int reg) } static const struct regmap_config fsl_ssi_regconfig = { - .max_register = CCSR_SSI_SACCDIS, + .max_register = REG_SSI_SACCDIS, .reg_bits = 32, .val_bits = 32, .reg_stride = 4, .val_format_endian = REGMAP_ENDIAN_NATIVE, - .num_reg_defaults_raw = CCSR_SSI_SACCDIS / sizeof(uint32_t) + 1, + .num_reg_defaults_raw = REG_SSI_SACCDIS / sizeof(uint32_t) + 1, .readable_reg = fsl_ssi_readable_reg, .volatile_reg = fsl_ssi_volatile_reg, .precious_reg = fsl_ssi_precious_reg, @@ -290,9 +290,9 @@ struct fsl_ssi { static struct fsl_ssi_soc_data fsl_ssi_mpc8610 = { .imx = false, .offline_config = true, - .sisr_write_mask = CCSR_SSI_SISR_RFRC | CCSR_SSI_SISR_TFRC | - CCSR_SSI_SISR_ROE0 | CCSR_SSI_SISR_ROE1 | - CCSR_SSI_SISR_TUE0 | CCSR_SSI_SISR_TUE1, + .sisr_write_mask = SSI_SISR_RFRC | SSI_SISR_TFRC | + SSI_SISR_ROE0 | SSI_SISR_ROE1 | + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static struct fsl_ssi_soc_data fsl_ssi_imx21 = { @@ -305,16 +305,16 @@ static struct fsl_ssi_soc_data fsl_ssi_imx21 = { static struct fsl_ssi_soc_data fsl_ssi_imx35 = { .imx = true, .offline_config = true, - .sisr_write_mask = CCSR_SSI_SISR_RFRC | CCSR_SSI_SISR_TFRC | - CCSR_SSI_SISR_ROE0 | CCSR_SSI_SISR_ROE1 | - CCSR_SSI_SISR_TUE0 | CCSR_SSI_SISR_TUE1, + .sisr_write_mask = SSI_SISR_RFRC | SSI_SISR_TFRC | + SSI_SISR_ROE0 | SSI_SISR_ROE1 | + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static struct fsl_ssi_soc_data fsl_ssi_imx51 = { .imx = true, .offline_config = false, - .sisr_write_mask = CCSR_SSI_SISR_ROE0 | CCSR_SSI_SISR_ROE1 | - CCSR_SSI_SISR_TUE0 | CCSR_SSI_SISR_TUE1, + .sisr_write_mask = SSI_SISR_ROE0 | SSI_SISR_ROE1 | + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static const struct of_device_id fsl_ssi_ids[] = { @@ -354,12 +354,12 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) __be32 sisr; __be32 sisr2; - regmap_read(regs, CCSR_SSI_SISR, &sisr); + regmap_read(regs, REG_SSI_SISR, &sisr); sisr2 = sisr & ssi->soc->sisr_write_mask; /* Clear the bits that we set */ if (sisr2) - regmap_write(regs, CCSR_SSI_SISR, sisr2); + regmap_write(regs, REG_SSI_SISR, sisr2); fsl_ssi_dbg_isr(&ssi->dbg_stats, sisr); @@ -375,21 +375,21 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) struct fsl_ssi_rxtx_reg_val *vals = &ssi->rxtx_reg_val; if (enable) { - regmap_update_bits(regs, CCSR_SSI_SIER, + regmap_update_bits(regs, REG_SSI_SIER, vals->rx.sier | vals->tx.sier, vals->rx.sier | vals->tx.sier); - regmap_update_bits(regs, CCSR_SSI_SRCR, + regmap_update_bits(regs, REG_SSI_SRCR, vals->rx.srcr | vals->tx.srcr, vals->rx.srcr | vals->tx.srcr); - regmap_update_bits(regs, CCSR_SSI_STCR, + regmap_update_bits(regs, REG_SSI_STCR, vals->rx.stcr | vals->tx.stcr, vals->rx.stcr | vals->tx.stcr); } else { - regmap_update_bits(regs, CCSR_SSI_SRCR, + regmap_update_bits(regs, REG_SSI_SRCR, vals->rx.srcr | vals->tx.srcr, 0); - regmap_update_bits(regs, CCSR_SSI_STCR, + regmap_update_bits(regs, REG_SSI_STCR, vals->rx.stcr | vals->tx.stcr, 0); - regmap_update_bits(regs, CCSR_SSI_SIER, + regmap_update_bits(regs, REG_SSI_SIER, vals->rx.sier | vals->tx.sier, 0); } } @@ -400,11 +400,11 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) { if (is_rx) { - regmap_update_bits(ssi->regs, CCSR_SSI_SOR, - CCSR_SSI_SOR_RX_CLR, CCSR_SSI_SOR_RX_CLR); + regmap_update_bits(ssi->regs, REG_SSI_SOR, + SSI_SOR_RX_CLR, SSI_SOR_RX_CLR); } else { - regmap_update_bits(ssi->regs, CCSR_SSI_SOR, - CCSR_SSI_SOR_TX_CLR, CCSR_SSI_SOR_TX_CLR); + regmap_update_bits(ssi->regs, REG_SSI_SOR, + SSI_SOR_TX_CLR, SSI_SOR_TX_CLR); } } @@ -440,10 +440,10 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, u32 scr_val; int keep_active; - regmap_read(regs, CCSR_SSI_SCR, &scr_val); + regmap_read(regs, REG_SSI_SCR, &scr_val); - nr_active_streams = !!(scr_val & CCSR_SSI_SCR_TE) + - !!(scr_val & CCSR_SSI_SCR_RE); + nr_active_streams = !!(scr_val & SSI_SCR_TE) + + !!(scr_val & SSI_SCR_RE); if (nr_active_streams - 1 > 0) keep_active = 1; @@ -464,7 +464,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, u32 scr = fsl_ssi_disable_val(vals->scr, avals->scr, keep_active); /* Safely disable SCR register for the stream */ - regmap_update_bits(regs, CCSR_SSI_SCR, scr, 0); + regmap_update_bits(regs, REG_SSI_SCR, scr, 0); } /* @@ -483,11 +483,11 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, /* Online configure single direction while SSI is running */ if (enable) { - fsl_ssi_fifo_clear(ssi, vals->scr & CCSR_SSI_SCR_RE); + fsl_ssi_fifo_clear(ssi, vals->scr & SSI_SCR_RE); - regmap_update_bits(regs, CCSR_SSI_SRCR, vals->srcr, vals->srcr); - regmap_update_bits(regs, CCSR_SSI_STCR, vals->stcr, vals->stcr); - regmap_update_bits(regs, CCSR_SSI_SIER, vals->sier, vals->sier); + regmap_update_bits(regs, REG_SSI_SRCR, vals->srcr, vals->srcr); + regmap_update_bits(regs, REG_SSI_STCR, vals->stcr, vals->stcr); + regmap_update_bits(regs, REG_SSI_SIER, vals->sier, vals->sier); } else { u32 sier; u32 srcr; @@ -505,9 +505,9 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, keep_active); /* Safely disable other control registers for the stream */ - regmap_update_bits(regs, CCSR_SSI_SRCR, srcr, 0); - regmap_update_bits(regs, CCSR_SSI_STCR, stcr, 0); - regmap_update_bits(regs, CCSR_SSI_SIER, sier, 0); + regmap_update_bits(regs, REG_SSI_SRCR, srcr, 0); + regmap_update_bits(regs, REG_SSI_STCR, stcr, 0); + regmap_update_bits(regs, REG_SSI_SIER, sier, 0); } config_done: @@ -519,19 +519,19 @@ config_done: * * TODO: FIQ cases might also need this upon testing */ - if (ssi->use_dma && (vals->scr & CCSR_SSI_SCR_TE)) { + if (ssi->use_dma && (vals->scr & SSI_SCR_TE)) { int i; int max_loop = 100; /* Enable SSI first to send TX DMA request */ - regmap_update_bits(regs, CCSR_SSI_SCR, - CCSR_SSI_SCR_SSIEN, CCSR_SSI_SCR_SSIEN); + regmap_update_bits(regs, REG_SSI_SCR, + SSI_SCR_SSIEN, SSI_SCR_SSIEN); /* Busy wait until TX FIFO not empty -- DMA working */ for (i = 0; i < max_loop; i++) { u32 sfcsr; - regmap_read(regs, CCSR_SSI_SFCSR, &sfcsr); - if (CCSR_SSI_SFCSR_TFCNT0(sfcsr)) + regmap_read(regs, REG_SSI_SFCSR, &sfcsr); + if (SSI_SFCSR_TFCNT0(sfcsr)) break; } if (i == max_loop) { @@ -540,7 +540,7 @@ config_done: } } /* Enable all remaining bits */ - regmap_update_bits(regs, CCSR_SSI_SCR, vals->scr, vals->scr); + regmap_update_bits(regs, REG_SSI_SCR, vals->scr, vals->scr); } } @@ -557,9 +557,9 @@ static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi *ssi) /* no SACC{ST,EN,DIS} regs on imx21-class SSI */ if (!ssi->soc->imx21regs) { /* Disable all channel slots */ - regmap_write(regs, CCSR_SSI_SACCDIS, 0xff); + regmap_write(regs, REG_SSI_SACCDIS, 0xff); /* Enable slots 3 & 4 -- PCM Playback Left & Right channels */ - regmap_write(regs, CCSR_SSI_SACCEN, 0x300); + regmap_write(regs, REG_SSI_SACCEN, 0x300); } } @@ -585,25 +585,25 @@ static void fsl_ssi_setup_reg_vals(struct fsl_ssi *ssi) { struct fsl_ssi_rxtx_reg_val *reg = &ssi->rxtx_reg_val; - reg->rx.sier = CCSR_SSI_SIER_RFF0_EN; - reg->rx.srcr = CCSR_SSI_SRCR_RFEN0; + reg->rx.sier = SSI_SIER_RFF0_EN; + reg->rx.srcr = SSI_SRCR_RFEN0; reg->rx.scr = 0; - reg->tx.sier = CCSR_SSI_SIER_TFE0_EN; - reg->tx.stcr = CCSR_SSI_STCR_TFEN0; + reg->tx.sier = SSI_SIER_TFE0_EN; + reg->tx.stcr = SSI_STCR_TFEN0; reg->tx.scr = 0; /* AC97 has already enabled SSIEN, RE and TE, so ignore them */ if (!fsl_ssi_is_ac97(ssi)) { - reg->rx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_RE; - reg->tx.scr = CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE; + reg->rx.scr = SSI_SCR_SSIEN | SSI_SCR_RE; + reg->tx.scr = SSI_SCR_SSIEN | SSI_SCR_TE; } if (ssi->use_dma) { - reg->rx.sier |= CCSR_SSI_SIER_RDMAE; - reg->tx.sier |= CCSR_SSI_SIER_TDMAE; + reg->rx.sier |= SSI_SIER_RDMAE; + reg->tx.sier |= SSI_SIER_TDMAE; } else { - reg->rx.sier |= CCSR_SSI_SIER_RIE; - reg->tx.sier |= CCSR_SSI_SIER_TIE; + reg->rx.sier |= SSI_SIER_RIE; + reg->tx.sier |= SSI_SIER_TIE; } reg->rx.sier |= FSLSSI_SIER_DBG_RX_FLAGS; @@ -615,21 +615,21 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) struct regmap *regs = ssi->regs; /* Setup the clock control register */ - regmap_write(regs, CCSR_SSI_STCCR, - CCSR_SSI_SxCCR_WL(17) | CCSR_SSI_SxCCR_DC(13)); - regmap_write(regs, CCSR_SSI_SRCCR, - CCSR_SSI_SxCCR_WL(17) | CCSR_SSI_SxCCR_DC(13)); + regmap_write(regs, REG_SSI_STCCR, + SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); + regmap_write(regs, REG_SSI_SRCCR, + SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); /* Enable AC97 mode and startup the SSI */ - regmap_write(regs, CCSR_SSI_SACNT, - CCSR_SSI_SACNT_AC97EN | CCSR_SSI_SACNT_FV); + regmap_write(regs, REG_SSI_SACNT, + SSI_SACNT_AC97EN | SSI_SACNT_FV); /* AC97 has to communicate with codec before starting a stream */ - regmap_update_bits(regs, CCSR_SSI_SCR, - CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE | CCSR_SSI_SCR_RE, - CCSR_SSI_SCR_SSIEN | CCSR_SSI_SCR_TE | CCSR_SSI_SCR_RE); + regmap_update_bits(regs, REG_SSI_SCR, + SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE, + SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE); - regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_WAIT(3)); + regmap_write(regs, REG_SSI_SOR, SSI_SOR_WAIT(3)); } static int fsl_ssi_startup(struct snd_pcm_substream *substream, @@ -762,15 +762,15 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, return -EINVAL; } - stccr = CCSR_SSI_SxCCR_PM(pm + 1) | (div2 ? CCSR_SSI_SxCCR_DIV2 : 0) | - (psr ? CCSR_SSI_SxCCR_PSR : 0); - mask = CCSR_SSI_SxCCR_PM_MASK | CCSR_SSI_SxCCR_DIV2 | - CCSR_SSI_SxCCR_PSR; + stccr = SSI_SxCCR_PM(pm + 1) | (div2 ? SSI_SxCCR_DIV2 : 0) | + (psr ? SSI_SxCCR_PSR : 0); + mask = SSI_SxCCR_PM_MASK | SSI_SxCCR_DIV2 | + SSI_SxCCR_PSR; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK || synchronous) - regmap_update_bits(regs, CCSR_SSI_STCCR, mask, stccr); + regmap_update_bits(regs, REG_SSI_STCCR, mask, stccr); else - regmap_update_bits(regs, CCSR_SSI_SRCCR, mask, stccr); + regmap_update_bits(regs, REG_SSI_SRCCR, mask, stccr); if (!baudclk_is_used) { ret = clk_set_rate(ssi->baudclk, baudrate); @@ -801,13 +801,13 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct regmap *regs = ssi->regs; unsigned int channels = params_channels(hw_params); unsigned int sample_size = params_width(hw_params); - u32 wl = CCSR_SSI_SxCCR_WL(sample_size); + u32 wl = SSI_SxCCR_WL(sample_size); int ret; u32 scr_val; int enabled; - regmap_read(regs, CCSR_SSI_SCR, &scr_val); - enabled = scr_val & CCSR_SSI_SCR_SSIEN; + regmap_read(regs, REG_SSI_SCR, &scr_val); + enabled = scr_val & SSI_SCR_SSIEN; /* * SSI is properly configured if it is enabled and running in @@ -837,23 +837,23 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, u8 i2smode; /* Normal + Network mode to send 16-bit data in 32-bit frames */ if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) - i2smode = CCSR_SSI_SCR_I2S_MODE_NORMAL | - CCSR_SSI_SCR_NET; + i2smode = SSI_SCR_I2S_MODE_NORMAL | + SSI_SCR_NET; else i2smode = ssi->i2s_mode; - regmap_update_bits(regs, CCSR_SSI_SCR, - CCSR_SSI_SCR_NET | CCSR_SSI_SCR_I2S_MODE_MASK, + regmap_update_bits(regs, REG_SSI_SCR, + SSI_SCR_NET | SSI_SCR_I2S_MODE_MASK, channels == 1 ? 0 : i2smode); } /* In synchronous mode, the SSI uses STCCR for capture */ if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || ssi->cpu_dai_drv.symmetric_rates) - regmap_update_bits(regs, CCSR_SSI_STCCR, CCSR_SSI_SxCCR_WL_MASK, + regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_WL_MASK, wl); else - regmap_update_bits(regs, CCSR_SSI_SRCCR, CCSR_SSI_SxCCR_WL_MASK, + regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_WL_MASK, wl); return 0; @@ -890,62 +890,62 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, fsl_ssi_setup_reg_vals(ssi); - regmap_read(regs, CCSR_SSI_SCR, &scr); - scr &= ~(CCSR_SSI_SCR_SYN | CCSR_SSI_SCR_I2S_MODE_MASK); + regmap_read(regs, REG_SSI_SCR, &scr); + scr &= ~(SSI_SCR_SYN | SSI_SCR_I2S_MODE_MASK); /* Synchronize frame sync clock for TE to avoid data slipping */ - scr |= CCSR_SSI_SCR_SYNC_TX_FS; + scr |= SSI_SCR_SYNC_TX_FS; - mask = CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TFDIR | CCSR_SSI_STCR_TXDIR | - CCSR_SSI_STCR_TSCKP | CCSR_SSI_STCR_TFSI | CCSR_SSI_STCR_TFSL | - CCSR_SSI_STCR_TEFS; - regmap_read(regs, CCSR_SSI_STCR, &stcr); - regmap_read(regs, CCSR_SSI_SRCR, &srcr); + mask = SSI_STCR_TXBIT0 | SSI_STCR_TFDIR | SSI_STCR_TXDIR | + SSI_STCR_TSCKP | SSI_STCR_TFSI | SSI_STCR_TFSL | + SSI_STCR_TEFS; + regmap_read(regs, REG_SSI_STCR, &stcr); + regmap_read(regs, REG_SSI_SRCR, &srcr); stcr &= ~mask; srcr &= ~mask; /* Use Network mode as default */ - ssi->i2s_mode = CCSR_SSI_SCR_NET; + ssi->i2s_mode = SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: - regmap_update_bits(regs, CCSR_SSI_STCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); - regmap_update_bits(regs, CCSR_SSI_SRCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); + regmap_update_bits(regs, REG_SSI_STCCR, + SSI_SxCCR_DC_MASK, + SSI_SxCCR_DC(2)); + regmap_update_bits(regs, REG_SSI_SRCCR, + SSI_SxCCR_DC_MASK, + SSI_SxCCR_DC(2)); switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: - ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER; + ssi->i2s_mode |= SSI_SCR_I2S_MODE_MASTER; break; case SND_SOC_DAIFMT_CBM_CFM: - ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE; + ssi->i2s_mode |= SSI_SCR_I2S_MODE_SLAVE; break; default: return -EINVAL; } /* Data on rising edge of bclk, frame low, 1clk before data */ - strcr |= CCSR_SSI_STCR_TFSI | CCSR_SSI_STCR_TSCKP | - CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TEFS; + strcr |= SSI_STCR_TFSI | SSI_STCR_TSCKP | + SSI_STCR_TXBIT0 | SSI_STCR_TEFS; break; case SND_SOC_DAIFMT_LEFT_J: /* Data on rising edge of bclk, frame high */ - strcr |= CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TSCKP; + strcr |= SSI_STCR_TXBIT0 | SSI_STCR_TSCKP; break; case SND_SOC_DAIFMT_DSP_A: /* Data on rising edge of bclk, frame high, 1clk before data */ - strcr |= CCSR_SSI_STCR_TFSL | CCSR_SSI_STCR_TSCKP | - CCSR_SSI_STCR_TXBIT0 | CCSR_SSI_STCR_TEFS; + strcr |= SSI_STCR_TFSL | SSI_STCR_TSCKP | + SSI_STCR_TXBIT0 | SSI_STCR_TEFS; break; case SND_SOC_DAIFMT_DSP_B: /* Data on rising edge of bclk, frame high */ - strcr |= CCSR_SSI_STCR_TFSL | CCSR_SSI_STCR_TSCKP | - CCSR_SSI_STCR_TXBIT0; + strcr |= SSI_STCR_TFSL | SSI_STCR_TSCKP | + SSI_STCR_TXBIT0; break; case SND_SOC_DAIFMT_AC97: /* Data on falling edge of bclk, frame high, 1clk before data */ - ssi->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_NORMAL; + ssi->i2s_mode |= SSI_SCR_I2S_MODE_NORMAL; break; default: return -EINVAL; @@ -959,16 +959,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, break; case SND_SOC_DAIFMT_IB_NF: /* Invert bit clock */ - strcr ^= CCSR_SSI_STCR_TSCKP; + strcr ^= SSI_STCR_TSCKP; break; case SND_SOC_DAIFMT_NB_IF: /* Invert frame clock */ - strcr ^= CCSR_SSI_STCR_TFSI; + strcr ^= SSI_STCR_TFSI; break; case SND_SOC_DAIFMT_IB_IF: /* Invert both clocks */ - strcr ^= CCSR_SSI_STCR_TSCKP; - strcr ^= CCSR_SSI_STCR_TFSI; + strcr ^= SSI_STCR_TSCKP; + strcr ^= SSI_STCR_TFSI; break; default: return -EINVAL; @@ -978,18 +978,18 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBS_CFS: /* Output bit and frame sync clocks */ - strcr |= CCSR_SSI_STCR_TFDIR | CCSR_SSI_STCR_TXDIR; - scr |= CCSR_SSI_SCR_SYS_CLK_EN; + strcr |= SSI_STCR_TFDIR | SSI_STCR_TXDIR; + scr |= SSI_SCR_SYS_CLK_EN; break; case SND_SOC_DAIFMT_CBM_CFM: /* Input bit or frame sync clocks */ - scr &= ~CCSR_SSI_SCR_SYS_CLK_EN; + scr &= ~SSI_SCR_SYS_CLK_EN; break; case SND_SOC_DAIFMT_CBM_CFS: /* Input bit clock but output frame sync clock */ - strcr &= ~CCSR_SSI_STCR_TXDIR; - strcr |= CCSR_SSI_STCR_TFDIR; - scr &= ~CCSR_SSI_SCR_SYS_CLK_EN; + strcr &= ~SSI_STCR_TXDIR; + strcr |= SSI_STCR_TFDIR; + scr &= ~SSI_SCR_SYS_CLK_EN; break; default: if (!fsl_ssi_is_ac97(ssi)) @@ -1001,27 +1001,27 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, /* Set SYN mode and clear RXDIR bit when using SYN or AC97 mode */ if (ssi->cpu_dai_drv.symmetric_rates || fsl_ssi_is_ac97(ssi)) { - srcr &= ~CCSR_SSI_SRCR_RXDIR; - scr |= CCSR_SSI_SCR_SYN; + srcr &= ~SSI_SRCR_RXDIR; + scr |= SSI_SCR_SYN; } - regmap_write(regs, CCSR_SSI_STCR, stcr); - regmap_write(regs, CCSR_SSI_SRCR, srcr); - regmap_write(regs, CCSR_SSI_SCR, scr); + regmap_write(regs, REG_SSI_STCR, stcr); + regmap_write(regs, REG_SSI_SRCR, srcr); + regmap_write(regs, REG_SSI_SCR, scr); wm = ssi->fifo_watermark; - regmap_write(regs, CCSR_SSI_SFCSR, - CCSR_SSI_SFCSR_TFWM0(wm) | CCSR_SSI_SFCSR_RFWM0(wm) | - CCSR_SSI_SFCSR_TFWM1(wm) | CCSR_SSI_SFCSR_RFWM1(wm)); + regmap_write(regs, REG_SSI_SFCSR, + SSI_SFCSR_TFWM0(wm) | SSI_SFCSR_RFWM0(wm) | + SSI_SFCSR_TFWM1(wm) | SSI_SFCSR_RFWM1(wm)); if (ssi->use_dual_fifo) { - regmap_update_bits(regs, CCSR_SSI_SRCR, CCSR_SSI_SRCR_RFEN1, - CCSR_SSI_SRCR_RFEN1); - regmap_update_bits(regs, CCSR_SSI_STCR, CCSR_SSI_STCR_TFEN1, - CCSR_SSI_STCR_TFEN1); - regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_TCH_EN, - CCSR_SSI_SCR_TCH_EN); + regmap_update_bits(regs, REG_SSI_SRCR, SSI_SRCR_RFEN1, + SSI_SRCR_RFEN1); + regmap_update_bits(regs, REG_SSI_STCR, SSI_STCR_TFEN1, + SSI_STCR_TFEN1); + regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_TCH_EN, + SSI_SCR_TCH_EN); } if ((fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_AC97) @@ -1062,30 +1062,30 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, } /* The slot number should be >= 2 if using Network mode or I2S mode */ - regmap_read(regs, CCSR_SSI_SCR, &val); - val &= CCSR_SSI_SCR_I2S_MODE_MASK | CCSR_SSI_SCR_NET; + regmap_read(regs, REG_SSI_SCR, &val); + val &= SSI_SCR_I2S_MODE_MASK | SSI_SCR_NET; if (val && slots < 2) { dev_err(cpu_dai->dev, "slot number should be >= 2 in I2S or NET\n"); return -EINVAL; } - regmap_update_bits(regs, CCSR_SSI_STCCR, CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(slots)); - regmap_update_bits(regs, CCSR_SSI_SRCCR, CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(slots)); + regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_DC_MASK, + SSI_SxCCR_DC(slots)); + regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_DC_MASK, + SSI_SxCCR_DC(slots)); /* Save SSIEN bit of the SCR register */ - regmap_read(regs, CCSR_SSI_SCR, &val); - val &= CCSR_SSI_SCR_SSIEN; + regmap_read(regs, REG_SSI_SCR, &val); + val &= SSI_SCR_SSIEN; /* Temporarily enable SSI to allow SxMSKs to be configurable */ - regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, - CCSR_SSI_SCR_SSIEN); + regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_SSIEN, + SSI_SCR_SSIEN); - regmap_write(regs, CCSR_SSI_STMSK, ~tx_mask); - regmap_write(regs, CCSR_SSI_SRMSK, ~rx_mask); + regmap_write(regs, REG_SSI_STMSK, ~tx_mask); + regmap_write(regs, REG_SSI_SRMSK, ~rx_mask); /* Restore the value of SSIEN bit */ - regmap_update_bits(regs, CCSR_SSI_SCR, CCSR_SSI_SCR_SSIEN, val); + regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_SSIEN, val); ssi->slot_width = slot_width; ssi->slots = slots; @@ -1132,9 +1132,9 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, /* Clear corresponding FIFO */ if (fsl_ssi_is_ac97(ssi)) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_TX_CLR); + regmap_write(regs, REG_SSI_SOR, SSI_SOR_TX_CLR); else - regmap_write(regs, CCSR_SSI_SOR, CCSR_SSI_SOR_RX_CLR); + regmap_write(regs, REG_SSI_SOR, SSI_SOR_RX_CLR); } return 0; @@ -1230,13 +1230,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, } lreg = reg << 12; - regmap_write(regs, CCSR_SSI_SACADD, lreg); + regmap_write(regs, REG_SSI_SACADD, lreg); lval = val << 4; - regmap_write(regs, CCSR_SSI_SACDAT, lval); + regmap_write(regs, REG_SSI_SACDAT, lval); - regmap_update_bits(regs, CCSR_SSI_SACNT, CCSR_SSI_SACNT_RDWR_MASK, - CCSR_SSI_SACNT_WR); + regmap_update_bits(regs, REG_SSI_SACNT, SSI_SACNT_RDWR_MASK, + SSI_SACNT_WR); udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); @@ -1265,13 +1265,13 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, } lreg = (reg & 0x7f) << 12; - regmap_write(regs, CCSR_SSI_SACADD, lreg); - regmap_update_bits(regs, CCSR_SSI_SACNT, CCSR_SSI_SACNT_RDWR_MASK, - CCSR_SSI_SACNT_RD); + regmap_write(regs, REG_SSI_SACADD, lreg); + regmap_update_bits(regs, REG_SSI_SACNT, SSI_SACNT_RDWR_MASK, + SSI_SACNT_RD); udelay(100); - regmap_read(regs, CCSR_SSI_SACDAT, ®_val); + regmap_read(regs, REG_SSI_SACDAT, ®_val); val = (reg_val >> 4) & 0xffff; clk_disable_unprepare(fsl_ac97_data->clk); @@ -1333,8 +1333,8 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, ssi->dma_params_tx.maxburst = ssi->dma_maxburst; ssi->dma_params_rx.maxburst = ssi->dma_maxburst; - ssi->dma_params_tx.addr = ssi->ssi_phys + CCSR_SSI_STX0; - ssi->dma_params_rx.addr = ssi->ssi_phys + CCSR_SSI_SRX0; + ssi->dma_params_tx.addr = ssi->ssi_phys + REG_SSI_STX0; + ssi->dma_params_rx.addr = ssi->ssi_phys + REG_SSI_SRX0; /* Set to dual FIFO mode according to the SDMA sciprt */ ret = of_property_read_u32_array(np, "dmas", dmas, 4); @@ -1439,9 +1439,9 @@ static int fsl_ssi_probe(struct platform_device *pdev) if (ssi->soc->imx21regs) { /* No SACC{ST,EN,DIS} regs in imx21-class SSI */ - regconfig.max_register = CCSR_SSI_SRMSK; + regconfig.max_register = REG_SSI_SRMSK; regconfig.num_reg_defaults_raw = - CCSR_SSI_SRMSK / sizeof(uint32_t) + 1; + REG_SSI_SRMSK / sizeof(uint32_t) + 1; } ret = of_property_match_string(np, "clock-names", "ipg"); @@ -1638,8 +1638,8 @@ static int fsl_ssi_suspend(struct device *dev) struct fsl_ssi *ssi = dev_get_drvdata(dev); struct regmap *regs = ssi->regs; - regmap_read(regs, CCSR_SSI_SFCSR, &ssi->regcache_sfcsr); - regmap_read(regs, CCSR_SSI_SACNT, &ssi->regcache_sacnt); + regmap_read(regs, REG_SSI_SFCSR, &ssi->regcache_sfcsr); + regmap_read(regs, REG_SSI_SACNT, &ssi->regcache_sacnt); regcache_cache_only(regs, true); regcache_mark_dirty(regs); @@ -1654,11 +1654,11 @@ static int fsl_ssi_resume(struct device *dev) regcache_cache_only(regs, false); - regmap_update_bits(regs, CCSR_SSI_SFCSR, - CCSR_SSI_SFCSR_RFWM1_MASK | CCSR_SSI_SFCSR_TFWM1_MASK | - CCSR_SSI_SFCSR_RFWM0_MASK | CCSR_SSI_SFCSR_TFWM0_MASK, + regmap_update_bits(regs, REG_SSI_SFCSR, + SSI_SFCSR_RFWM1_MASK | SSI_SFCSR_TFWM1_MASK | + SSI_SFCSR_RFWM0_MASK | SSI_SFCSR_TFWM0_MASK, ssi->regcache_sfcsr); - regmap_write(regs, CCSR_SSI_SACNT, ssi->regcache_sacnt); + regmap_write(regs, REG_SSI_SACNT, ssi->regcache_sacnt); return regcache_sync(regs); } diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index 1ad3bde7f065..cdcf3d23873e 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -15,55 +15,55 @@ /* -- SSI Register Map -- */ /* SSI Transmit Data Register 0 */ -#define CCSR_SSI_STX0 0x00 +#define REG_SSI_STX0 0x00 /* SSI Transmit Data Register 1 */ -#define CCSR_SSI_STX1 0x04 +#define REG_SSI_STX1 0x04 /* SSI Receive Data Register 0 */ -#define CCSR_SSI_SRX0 0x08 +#define REG_SSI_SRX0 0x08 /* SSI Receive Data Register 1 */ -#define CCSR_SSI_SRX1 0x0c +#define REG_SSI_SRX1 0x0c /* SSI Control Register */ -#define CCSR_SSI_SCR 0x10 +#define REG_SSI_SCR 0x10 /* SSI Interrupt Status Register */ -#define CCSR_SSI_SISR 0x14 +#define REG_SSI_SISR 0x14 /* SSI Interrupt Enable Register */ -#define CCSR_SSI_SIER 0x18 +#define REG_SSI_SIER 0x18 /* SSI Transmit Configuration Register */ -#define CCSR_SSI_STCR 0x1c +#define REG_SSI_STCR 0x1c /* SSI Receive Configuration Register */ -#define CCSR_SSI_SRCR 0x20 +#define REG_SSI_SRCR 0x20 /* SSI Transmit Clock Control Register */ -#define CCSR_SSI_STCCR 0x24 +#define REG_SSI_STCCR 0x24 /* SSI Receive Clock Control Register */ -#define CCSR_SSI_SRCCR 0x28 +#define REG_SSI_SRCCR 0x28 /* SSI FIFO Control/Status Register */ -#define CCSR_SSI_SFCSR 0x2c +#define REG_SSI_SFCSR 0x2c /* * SSI Test Register (Intended for debugging purposes only) * * Note: STR is not documented in recent IMX datasheet, but * is described in IMX51 reference manual at section 56.3.3.14 */ -#define CCSR_SSI_STR 0x30 +#define REG_SSI_STR 0x30 /* * SSI Option Register (Intended for internal use only) * * Note: SOR is not documented in recent IMX datasheet, but * is described in IMX51 reference manual at section 56.3.3.15 */ -#define CCSR_SSI_SOR 0x34 +#define REG_SSI_SOR 0x34 /* SSI AC97 Control Register */ -#define CCSR_SSI_SACNT 0x38 +#define REG_SSI_SACNT 0x38 /* SSI AC97 Command Address Register */ -#define CCSR_SSI_SACADD 0x3c +#define REG_SSI_SACADD 0x3c /* SSI AC97 Command Data Register */ -#define CCSR_SSI_SACDAT 0x40 +#define REG_SSI_SACDAT 0x40 /* SSI AC97 Tag Register */ -#define CCSR_SSI_SATAG 0x44 +#define REG_SSI_SATAG 0x44 /* SSI Transmit Time Slot Mask Register */ -#define CCSR_SSI_STMSK 0x48 +#define REG_SSI_STMSK 0x48 /* SSI Receive Time Slot Mask Register */ -#define CCSR_SSI_SRMSK 0x4c +#define REG_SSI_SRMSK 0x4c /* * SSI AC97 Channel Status Register * @@ -72,193 +72,193 @@ * 2) Writing a '1' bit at some position in SACCDIS unsets the relevant bit * 3) Receivng a '1' in SLOTREQ bit from external CODEC via AC Link */ -#define CCSR_SSI_SACCST 0x50 +#define REG_SSI_SACCST 0x50 /* SSI AC97 Channel Enable Register -- Set bits in SACCST */ -#define CCSR_SSI_SACCEN 0x54 +#define REG_SSI_SACCEN 0x54 /* SSI AC97 Channel Disable Register -- Clear bits in SACCST */ -#define CCSR_SSI_SACCDIS 0x58 +#define REG_SSI_SACCDIS 0x58 /* -- SSI Register Field Maps -- */ -/* SSI Control Register -- CCSR_SSI_SCR 0x10 */ -#define CCSR_SSI_SCR_SYNC_TX_FS 0x00001000 -#define CCSR_SSI_SCR_RFR_CLK_DIS 0x00000800 -#define CCSR_SSI_SCR_TFR_CLK_DIS 0x00000400 -#define CCSR_SSI_SCR_TCH_EN 0x00000100 -#define CCSR_SSI_SCR_SYS_CLK_EN 0x00000080 -#define CCSR_SSI_SCR_I2S_MODE_MASK 0x00000060 -#define CCSR_SSI_SCR_I2S_MODE_NORMAL 0x00000000 -#define CCSR_SSI_SCR_I2S_MODE_MASTER 0x00000020 -#define CCSR_SSI_SCR_I2S_MODE_SLAVE 0x00000040 -#define CCSR_SSI_SCR_SYN 0x00000010 -#define CCSR_SSI_SCR_NET 0x00000008 -#define CCSR_SSI_SCR_RE 0x00000004 -#define CCSR_SSI_SCR_TE 0x00000002 -#define CCSR_SSI_SCR_SSIEN 0x00000001 +/* SSI Control Register -- REG_SSI_SCR 0x10 */ +#define SSI_SCR_SYNC_TX_FS 0x00001000 +#define SSI_SCR_RFR_CLK_DIS 0x00000800 +#define SSI_SCR_TFR_CLK_DIS 0x00000400 +#define SSI_SCR_TCH_EN 0x00000100 +#define SSI_SCR_SYS_CLK_EN 0x00000080 +#define SSI_SCR_I2S_MODE_MASK 0x00000060 +#define SSI_SCR_I2S_MODE_NORMAL 0x00000000 +#define SSI_SCR_I2S_MODE_MASTER 0x00000020 +#define SSI_SCR_I2S_MODE_SLAVE 0x00000040 +#define SSI_SCR_SYN 0x00000010 +#define SSI_SCR_NET 0x00000008 +#define SSI_SCR_RE 0x00000004 +#define SSI_SCR_TE 0x00000002 +#define SSI_SCR_SSIEN 0x00000001 -/* SSI Interrupt Status Register -- CCSR_SSI_SISR 0x14 */ -#define CCSR_SSI_SISR_RFRC 0x01000000 -#define CCSR_SSI_SISR_TFRC 0x00800000 -#define CCSR_SSI_SISR_CMDAU 0x00040000 -#define CCSR_SSI_SISR_CMDDU 0x00020000 -#define CCSR_SSI_SISR_RXT 0x00010000 -#define CCSR_SSI_SISR_RDR1 0x00008000 -#define CCSR_SSI_SISR_RDR0 0x00004000 -#define CCSR_SSI_SISR_TDE1 0x00002000 -#define CCSR_SSI_SISR_TDE0 0x00001000 -#define CCSR_SSI_SISR_ROE1 0x00000800 -#define CCSR_SSI_SISR_ROE0 0x00000400 -#define CCSR_SSI_SISR_TUE1 0x00000200 -#define CCSR_SSI_SISR_TUE0 0x00000100 -#define CCSR_SSI_SISR_TFS 0x00000080 -#define CCSR_SSI_SISR_RFS 0x00000040 -#define CCSR_SSI_SISR_TLS 0x00000020 -#define CCSR_SSI_SISR_RLS 0x00000010 -#define CCSR_SSI_SISR_RFF1 0x00000008 -#define CCSR_SSI_SISR_RFF0 0x00000004 -#define CCSR_SSI_SISR_TFE1 0x00000002 -#define CCSR_SSI_SISR_TFE0 0x00000001 +/* SSI Interrupt Status Register -- REG_SSI_SISR 0x14 */ +#define SSI_SISR_RFRC 0x01000000 +#define SSI_SISR_TFRC 0x00800000 +#define SSI_SISR_CMDAU 0x00040000 +#define SSI_SISR_CMDDU 0x00020000 +#define SSI_SISR_RXT 0x00010000 +#define SSI_SISR_RDR1 0x00008000 +#define SSI_SISR_RDR0 0x00004000 +#define SSI_SISR_TDE1 0x00002000 +#define SSI_SISR_TDE0 0x00001000 +#define SSI_SISR_ROE1 0x00000800 +#define SSI_SISR_ROE0 0x00000400 +#define SSI_SISR_TUE1 0x00000200 +#define SSI_SISR_TUE0 0x00000100 +#define SSI_SISR_TFS 0x00000080 +#define SSI_SISR_RFS 0x00000040 +#define SSI_SISR_TLS 0x00000020 +#define SSI_SISR_RLS 0x00000010 +#define SSI_SISR_RFF1 0x00000008 +#define SSI_SISR_RFF0 0x00000004 +#define SSI_SISR_TFE1 0x00000002 +#define SSI_SISR_TFE0 0x00000001 -/* SSI Interrupt Enable Register -- CCSR_SSI_SIER 0x18 */ -#define CCSR_SSI_SIER_RFRC_EN 0x01000000 -#define CCSR_SSI_SIER_TFRC_EN 0x00800000 -#define CCSR_SSI_SIER_RDMAE 0x00400000 -#define CCSR_SSI_SIER_RIE 0x00200000 -#define CCSR_SSI_SIER_TDMAE 0x00100000 -#define CCSR_SSI_SIER_TIE 0x00080000 -#define CCSR_SSI_SIER_CMDAU_EN 0x00040000 -#define CCSR_SSI_SIER_CMDDU_EN 0x00020000 -#define CCSR_SSI_SIER_RXT_EN 0x00010000 -#define CCSR_SSI_SIER_RDR1_EN 0x00008000 -#define CCSR_SSI_SIER_RDR0_EN 0x00004000 -#define CCSR_SSI_SIER_TDE1_EN 0x00002000 -#define CCSR_SSI_SIER_TDE0_EN 0x00001000 -#define CCSR_SSI_SIER_ROE1_EN 0x00000800 -#define CCSR_SSI_SIER_ROE0_EN 0x00000400 -#define CCSR_SSI_SIER_TUE1_EN 0x00000200 -#define CCSR_SSI_SIER_TUE0_EN 0x00000100 -#define CCSR_SSI_SIER_TFS_EN 0x00000080 -#define CCSR_SSI_SIER_RFS_EN 0x00000040 -#define CCSR_SSI_SIER_TLS_EN 0x00000020 -#define CCSR_SSI_SIER_RLS_EN 0x00000010 -#define CCSR_SSI_SIER_RFF1_EN 0x00000008 -#define CCSR_SSI_SIER_RFF0_EN 0x00000004 -#define CCSR_SSI_SIER_TFE1_EN 0x00000002 -#define CCSR_SSI_SIER_TFE0_EN 0x00000001 +/* SSI Interrupt Enable Register -- REG_SSI_SIER 0x18 */ +#define SSI_SIER_RFRC_EN 0x01000000 +#define SSI_SIER_TFRC_EN 0x00800000 +#define SSI_SIER_RDMAE 0x00400000 +#define SSI_SIER_RIE 0x00200000 +#define SSI_SIER_TDMAE 0x00100000 +#define SSI_SIER_TIE 0x00080000 +#define SSI_SIER_CMDAU_EN 0x00040000 +#define SSI_SIER_CMDDU_EN 0x00020000 +#define SSI_SIER_RXT_EN 0x00010000 +#define SSI_SIER_RDR1_EN 0x00008000 +#define SSI_SIER_RDR0_EN 0x00004000 +#define SSI_SIER_TDE1_EN 0x00002000 +#define SSI_SIER_TDE0_EN 0x00001000 +#define SSI_SIER_ROE1_EN 0x00000800 +#define SSI_SIER_ROE0_EN 0x00000400 +#define SSI_SIER_TUE1_EN 0x00000200 +#define SSI_SIER_TUE0_EN 0x00000100 +#define SSI_SIER_TFS_EN 0x00000080 +#define SSI_SIER_RFS_EN 0x00000040 +#define SSI_SIER_TLS_EN 0x00000020 +#define SSI_SIER_RLS_EN 0x00000010 +#define SSI_SIER_RFF1_EN 0x00000008 +#define SSI_SIER_RFF0_EN 0x00000004 +#define SSI_SIER_TFE1_EN 0x00000002 +#define SSI_SIER_TFE0_EN 0x00000001 -/* SSI Transmit Configuration Register -- CCSR_SSI_STCR 0x1C */ -#define CCSR_SSI_STCR_TXBIT0 0x00000200 -#define CCSR_SSI_STCR_TFEN1 0x00000100 -#define CCSR_SSI_STCR_TFEN0 0x00000080 -#define CCSR_SSI_STCR_TFDIR 0x00000040 -#define CCSR_SSI_STCR_TXDIR 0x00000020 -#define CCSR_SSI_STCR_TSHFD 0x00000010 -#define CCSR_SSI_STCR_TSCKP 0x00000008 -#define CCSR_SSI_STCR_TFSI 0x00000004 -#define CCSR_SSI_STCR_TFSL 0x00000002 -#define CCSR_SSI_STCR_TEFS 0x00000001 +/* SSI Transmit Configuration Register -- REG_SSI_STCR 0x1C */ +#define SSI_STCR_TXBIT0 0x00000200 +#define SSI_STCR_TFEN1 0x00000100 +#define SSI_STCR_TFEN0 0x00000080 +#define SSI_STCR_TFDIR 0x00000040 +#define SSI_STCR_TXDIR 0x00000020 +#define SSI_STCR_TSHFD 0x00000010 +#define SSI_STCR_TSCKP 0x00000008 +#define SSI_STCR_TFSI 0x00000004 +#define SSI_STCR_TFSL 0x00000002 +#define SSI_STCR_TEFS 0x00000001 -/* SSI Receive Configuration Register -- CCSR_SSI_SRCR 0x20 */ -#define CCSR_SSI_SRCR_RXEXT 0x00000400 -#define CCSR_SSI_SRCR_RXBIT0 0x00000200 -#define CCSR_SSI_SRCR_RFEN1 0x00000100 -#define CCSR_SSI_SRCR_RFEN0 0x00000080 -#define CCSR_SSI_SRCR_RFDIR 0x00000040 -#define CCSR_SSI_SRCR_RXDIR 0x00000020 -#define CCSR_SSI_SRCR_RSHFD 0x00000010 -#define CCSR_SSI_SRCR_RSCKP 0x00000008 -#define CCSR_SSI_SRCR_RFSI 0x00000004 -#define CCSR_SSI_SRCR_RFSL 0x00000002 -#define CCSR_SSI_SRCR_REFS 0x00000001 +/* SSI Receive Configuration Register -- REG_SSI_SRCR 0x20 */ +#define SSI_SRCR_RXEXT 0x00000400 +#define SSI_SRCR_RXBIT0 0x00000200 +#define SSI_SRCR_RFEN1 0x00000100 +#define SSI_SRCR_RFEN0 0x00000080 +#define SSI_SRCR_RFDIR 0x00000040 +#define SSI_SRCR_RXDIR 0x00000020 +#define SSI_SRCR_RSHFD 0x00000010 +#define SSI_SRCR_RSCKP 0x00000008 +#define SSI_SRCR_RFSI 0x00000004 +#define SSI_SRCR_RFSL 0x00000002 +#define SSI_SRCR_REFS 0x00000001 /* - * SSI Transmit Clock Control Register -- CCSR_SSI_STCCR 0x24 - * SSI Receive Clock Control Register -- CCSR_SSI_SRCCR 0x28 + * SSI Transmit Clock Control Register -- REG_SSI_STCCR 0x24 + * SSI Receive Clock Control Register -- REG_SSI_SRCCR 0x28 */ -#define CCSR_SSI_SxCCR_DIV2_SHIFT 18 -#define CCSR_SSI_SxCCR_DIV2 0x00040000 -#define CCSR_SSI_SxCCR_PSR_SHIFT 17 -#define CCSR_SSI_SxCCR_PSR 0x00020000 -#define CCSR_SSI_SxCCR_WL_SHIFT 13 -#define CCSR_SSI_SxCCR_WL_MASK 0x0001E000 -#define CCSR_SSI_SxCCR_WL(x) \ - (((((x) / 2) - 1) << CCSR_SSI_SxCCR_WL_SHIFT) & CCSR_SSI_SxCCR_WL_MASK) -#define CCSR_SSI_SxCCR_DC_SHIFT 8 -#define CCSR_SSI_SxCCR_DC_MASK 0x00001F00 -#define CCSR_SSI_SxCCR_DC(x) \ - ((((x) - 1) << CCSR_SSI_SxCCR_DC_SHIFT) & CCSR_SSI_SxCCR_DC_MASK) -#define CCSR_SSI_SxCCR_PM_SHIFT 0 -#define CCSR_SSI_SxCCR_PM_MASK 0x000000FF -#define CCSR_SSI_SxCCR_PM(x) \ - ((((x) - 1) << CCSR_SSI_SxCCR_PM_SHIFT) & CCSR_SSI_SxCCR_PM_MASK) +#define SSI_SxCCR_DIV2_SHIFT 18 +#define SSI_SxCCR_DIV2 0x00040000 +#define SSI_SxCCR_PSR_SHIFT 17 +#define SSI_SxCCR_PSR 0x00020000 +#define SSI_SxCCR_WL_SHIFT 13 +#define SSI_SxCCR_WL_MASK 0x0001E000 +#define SSI_SxCCR_WL(x) \ + (((((x) / 2) - 1) << SSI_SxCCR_WL_SHIFT) & SSI_SxCCR_WL_MASK) +#define SSI_SxCCR_DC_SHIFT 8 +#define SSI_SxCCR_DC_MASK 0x00001F00 +#define SSI_SxCCR_DC(x) \ + ((((x) - 1) << SSI_SxCCR_DC_SHIFT) & SSI_SxCCR_DC_MASK) +#define SSI_SxCCR_PM_SHIFT 0 +#define SSI_SxCCR_PM_MASK 0x000000FF +#define SSI_SxCCR_PM(x) \ + ((((x) - 1) << SSI_SxCCR_PM_SHIFT) & SSI_SxCCR_PM_MASK) /* - * SSI FIFO Control/Status Register -- CCSR_SSI_SFCSR 0x2c + * SSI FIFO Control/Status Register -- REG_SSI_SFCSR 0x2c * - * Tx or Rx FIFO Counter -- CCSR_SSI_SFCSR_xFCNTy Read-Only - * Tx or Rx FIFO Watermarks -- CCSR_SSI_SFCSR_xFWMy Read/Write + * Tx or Rx FIFO Counter -- SSI_SFCSR_xFCNTy Read-Only + * Tx or Rx FIFO Watermarks -- SSI_SFCSR_xFWMy Read/Write */ -#define CCSR_SSI_SFCSR_RFCNT1_SHIFT 28 -#define CCSR_SSI_SFCSR_RFCNT1_MASK 0xF0000000 -#define CCSR_SSI_SFCSR_RFCNT1(x) \ - (((x) & CCSR_SSI_SFCSR_RFCNT1_MASK) >> CCSR_SSI_SFCSR_RFCNT1_SHIFT) -#define CCSR_SSI_SFCSR_TFCNT1_SHIFT 24 -#define CCSR_SSI_SFCSR_TFCNT1_MASK 0x0F000000 -#define CCSR_SSI_SFCSR_TFCNT1(x) \ - (((x) & CCSR_SSI_SFCSR_TFCNT1_MASK) >> CCSR_SSI_SFCSR_TFCNT1_SHIFT) -#define CCSR_SSI_SFCSR_RFWM1_SHIFT 20 -#define CCSR_SSI_SFCSR_RFWM1_MASK 0x00F00000 -#define CCSR_SSI_SFCSR_RFWM1(x) \ - (((x) << CCSR_SSI_SFCSR_RFWM1_SHIFT) & CCSR_SSI_SFCSR_RFWM1_MASK) -#define CCSR_SSI_SFCSR_TFWM1_SHIFT 16 -#define CCSR_SSI_SFCSR_TFWM1_MASK 0x000F0000 -#define CCSR_SSI_SFCSR_TFWM1(x) \ - (((x) << CCSR_SSI_SFCSR_TFWM1_SHIFT) & CCSR_SSI_SFCSR_TFWM1_MASK) -#define CCSR_SSI_SFCSR_RFCNT0_SHIFT 12 -#define CCSR_SSI_SFCSR_RFCNT0_MASK 0x0000F000 -#define CCSR_SSI_SFCSR_RFCNT0(x) \ - (((x) & CCSR_SSI_SFCSR_RFCNT0_MASK) >> CCSR_SSI_SFCSR_RFCNT0_SHIFT) -#define CCSR_SSI_SFCSR_TFCNT0_SHIFT 8 -#define CCSR_SSI_SFCSR_TFCNT0_MASK 0x00000F00 -#define CCSR_SSI_SFCSR_TFCNT0(x) \ - (((x) & CCSR_SSI_SFCSR_TFCNT0_MASK) >> CCSR_SSI_SFCSR_TFCNT0_SHIFT) -#define CCSR_SSI_SFCSR_RFWM0_SHIFT 4 -#define CCSR_SSI_SFCSR_RFWM0_MASK 0x000000F0 -#define CCSR_SSI_SFCSR_RFWM0(x) \ - (((x) << CCSR_SSI_SFCSR_RFWM0_SHIFT) & CCSR_SSI_SFCSR_RFWM0_MASK) -#define CCSR_SSI_SFCSR_TFWM0_SHIFT 0 -#define CCSR_SSI_SFCSR_TFWM0_MASK 0x0000000F -#define CCSR_SSI_SFCSR_TFWM0(x) \ - (((x) << CCSR_SSI_SFCSR_TFWM0_SHIFT) & CCSR_SSI_SFCSR_TFWM0_MASK) +#define SSI_SFCSR_RFCNT1_SHIFT 28 +#define SSI_SFCSR_RFCNT1_MASK 0xF0000000 +#define SSI_SFCSR_RFCNT1(x) \ + (((x) & SSI_SFCSR_RFCNT1_MASK) >> SSI_SFCSR_RFCNT1_SHIFT) +#define SSI_SFCSR_TFCNT1_SHIFT 24 +#define SSI_SFCSR_TFCNT1_MASK 0x0F000000 +#define SSI_SFCSR_TFCNT1(x) \ + (((x) & SSI_SFCSR_TFCNT1_MASK) >> SSI_SFCSR_TFCNT1_SHIFT) +#define SSI_SFCSR_RFWM1_SHIFT 20 +#define SSI_SFCSR_RFWM1_MASK 0x00F00000 +#define SSI_SFCSR_RFWM1(x) \ + (((x) << SSI_SFCSR_RFWM1_SHIFT) & SSI_SFCSR_RFWM1_MASK) +#define SSI_SFCSR_TFWM1_SHIFT 16 +#define SSI_SFCSR_TFWM1_MASK 0x000F0000 +#define SSI_SFCSR_TFWM1(x) \ + (((x) << SSI_SFCSR_TFWM1_SHIFT) & SSI_SFCSR_TFWM1_MASK) +#define SSI_SFCSR_RFCNT0_SHIFT 12 +#define SSI_SFCSR_RFCNT0_MASK 0x0000F000 +#define SSI_SFCSR_RFCNT0(x) \ + (((x) & SSI_SFCSR_RFCNT0_MASK) >> SSI_SFCSR_RFCNT0_SHIFT) +#define SSI_SFCSR_TFCNT0_SHIFT 8 +#define SSI_SFCSR_TFCNT0_MASK 0x00000F00 +#define SSI_SFCSR_TFCNT0(x) \ + (((x) & SSI_SFCSR_TFCNT0_MASK) >> SSI_SFCSR_TFCNT0_SHIFT) +#define SSI_SFCSR_RFWM0_SHIFT 4 +#define SSI_SFCSR_RFWM0_MASK 0x000000F0 +#define SSI_SFCSR_RFWM0(x) \ + (((x) << SSI_SFCSR_RFWM0_SHIFT) & SSI_SFCSR_RFWM0_MASK) +#define SSI_SFCSR_TFWM0_SHIFT 0 +#define SSI_SFCSR_TFWM0_MASK 0x0000000F +#define SSI_SFCSR_TFWM0(x) \ + (((x) << SSI_SFCSR_TFWM0_SHIFT) & SSI_SFCSR_TFWM0_MASK) -/* SSI Test Register -- CCSR_SSI_STR 0x30 */ -#define CCSR_SSI_STR_TEST 0x00008000 -#define CCSR_SSI_STR_RCK2TCK 0x00004000 -#define CCSR_SSI_STR_RFS2TFS 0x00002000 -#define CCSR_SSI_STR_RXSTATE(x) (((x) >> 8) & 0x1F) -#define CCSR_SSI_STR_TXD2RXD 0x00000080 -#define CCSR_SSI_STR_TCK2RCK 0x00000040 -#define CCSR_SSI_STR_TFS2RFS 0x00000020 -#define CCSR_SSI_STR_TXSTATE(x) ((x) & 0x1F) +/* SSI Test Register -- REG_SSI_STR 0x30 */ +#define SSI_STR_TEST 0x00008000 +#define SSI_STR_RCK2TCK 0x00004000 +#define SSI_STR_RFS2TFS 0x00002000 +#define SSI_STR_RXSTATE(x) (((x) >> 8) & 0x1F) +#define SSI_STR_TXD2RXD 0x00000080 +#define SSI_STR_TCK2RCK 0x00000040 +#define SSI_STR_TFS2RFS 0x00000020 +#define SSI_STR_TXSTATE(x) ((x) & 0x1F) -/* SSI Option Register -- CCSR_SSI_SOR 0x34 */ -#define CCSR_SSI_SOR_CLKOFF 0x00000040 -#define CCSR_SSI_SOR_RX_CLR 0x00000020 -#define CCSR_SSI_SOR_TX_CLR 0x00000010 -#define CCSR_SSI_SOR_INIT 0x00000008 -#define CCSR_SSI_SOR_WAIT_SHIFT 1 -#define CCSR_SSI_SOR_WAIT_MASK 0x00000006 -#define CCSR_SSI_SOR_WAIT(x) (((x) & 3) << CCSR_SSI_SOR_WAIT_SHIFT) -#define CCSR_SSI_SOR_SYNRST 0x00000001 +/* SSI Option Register -- REG_SSI_SOR 0x34 */ +#define SSI_SOR_CLKOFF 0x00000040 +#define SSI_SOR_RX_CLR 0x00000020 +#define SSI_SOR_TX_CLR 0x00000010 +#define SSI_SOR_INIT 0x00000008 +#define SSI_SOR_WAIT_SHIFT 1 +#define SSI_SOR_WAIT_MASK 0x00000006 +#define SSI_SOR_WAIT(x) (((x) & 3) << SSI_SOR_WAIT_SHIFT) +#define SSI_SOR_SYNRST 0x00000001 -/* SSI AC97 Control Register -- CCSR_SSI_SACNT 0x38 */ -#define CCSR_SSI_SACNT_FRDIV(x) (((x) & 0x3f) << 5) -#define CCSR_SSI_SACNT_WR 0x00000010 -#define CCSR_SSI_SACNT_RD 0x00000008 -#define CCSR_SSI_SACNT_RDWR_MASK 0x00000018 -#define CCSR_SSI_SACNT_TIF 0x00000004 -#define CCSR_SSI_SACNT_FV 0x00000002 -#define CCSR_SSI_SACNT_AC97EN 0x00000001 +/* SSI AC97 Control Register -- REG_SSI_SACNT 0x38 */ +#define SSI_SACNT_FRDIV(x) (((x) & 0x3f) << 5) +#define SSI_SACNT_WR 0x00000010 +#define SSI_SACNT_RD 0x00000008 +#define SSI_SACNT_RDWR_MASK 0x00000018 +#define SSI_SACNT_TIF 0x00000004 +#define SSI_SACNT_FV 0x00000002 +#define SSI_SACNT_AC97EN 0x00000001 struct device; diff --git a/sound/soc/fsl/fsl_ssi_dbg.c b/sound/soc/fsl/fsl_ssi_dbg.c index 88d9e8e08905..362df91420f6 100644 --- a/sound/soc/fsl/fsl_ssi_dbg.c +++ b/sound/soc/fsl/fsl_ssi_dbg.c @@ -18,67 +18,67 @@ void fsl_ssi_dbg_isr(struct fsl_ssi_dbg *dbg, u32 sisr) { - if (sisr & CCSR_SSI_SISR_RFRC) + if (sisr & SSI_SISR_RFRC) dbg->stats.rfrc++; - if (sisr & CCSR_SSI_SISR_TFRC) + if (sisr & SSI_SISR_TFRC) dbg->stats.tfrc++; - if (sisr & CCSR_SSI_SISR_CMDAU) + if (sisr & SSI_SISR_CMDAU) dbg->stats.cmdau++; - if (sisr & CCSR_SSI_SISR_CMDDU) + if (sisr & SSI_SISR_CMDDU) dbg->stats.cmddu++; - if (sisr & CCSR_SSI_SISR_RXT) + if (sisr & SSI_SISR_RXT) dbg->stats.rxt++; - if (sisr & CCSR_SSI_SISR_RDR1) + if (sisr & SSI_SISR_RDR1) dbg->stats.rdr1++; - if (sisr & CCSR_SSI_SISR_RDR0) + if (sisr & SSI_SISR_RDR0) dbg->stats.rdr0++; - if (sisr & CCSR_SSI_SISR_TDE1) + if (sisr & SSI_SISR_TDE1) dbg->stats.tde1++; - if (sisr & CCSR_SSI_SISR_TDE0) + if (sisr & SSI_SISR_TDE0) dbg->stats.tde0++; - if (sisr & CCSR_SSI_SISR_ROE1) + if (sisr & SSI_SISR_ROE1) dbg->stats.roe1++; - if (sisr & CCSR_SSI_SISR_ROE0) + if (sisr & SSI_SISR_ROE0) dbg->stats.roe0++; - if (sisr & CCSR_SSI_SISR_TUE1) + if (sisr & SSI_SISR_TUE1) dbg->stats.tue1++; - if (sisr & CCSR_SSI_SISR_TUE0) + if (sisr & SSI_SISR_TUE0) dbg->stats.tue0++; - if (sisr & CCSR_SSI_SISR_TFS) + if (sisr & SSI_SISR_TFS) dbg->stats.tfs++; - if (sisr & CCSR_SSI_SISR_RFS) + if (sisr & SSI_SISR_RFS) dbg->stats.rfs++; - if (sisr & CCSR_SSI_SISR_TLS) + if (sisr & SSI_SISR_TLS) dbg->stats.tls++; - if (sisr & CCSR_SSI_SISR_RLS) + if (sisr & SSI_SISR_RLS) dbg->stats.rls++; - if (sisr & CCSR_SSI_SISR_RFF1) + if (sisr & SSI_SISR_RFF1) dbg->stats.rff1++; - if (sisr & CCSR_SSI_SISR_RFF0) + if (sisr & SSI_SISR_RFF0) dbg->stats.rff0++; - if (sisr & CCSR_SSI_SISR_TFE1) + if (sisr & SSI_SISR_TFE1) dbg->stats.tfe1++; - if (sisr & CCSR_SSI_SISR_TFE0) + if (sisr & SSI_SISR_TFE0) dbg->stats.tfe0++; } @@ -89,7 +89,7 @@ void fsl_ssi_dbg_isr(struct fsl_ssi_dbg *dbg, u32 sisr) */ #define SIER_SHOW(flag, name) \ do { \ - if (CCSR_SSI_SIER_##flag) \ + if (SSI_SIER_##flag) \ seq_printf(s, #name "=%u\n", ssi_dbg->stats.name); \ } while (0) -- cgit v1.2.3 From af4f7f388242d5e63e3026a03a12e18ef4d8f62c Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:04 -0800 Subject: ASoC: fsl_ssi: Refine indentations and wrappings This patch just simply unifies the coding style. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 239 +++++++++++++++++++++----------------------- sound/soc/fsl/fsl_ssi.h | 2 +- sound/soc/fsl/fsl_ssi_dbg.c | 3 +- 3 files changed, 118 insertions(+), 126 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 24d96956b53a..ed9ac758e35d 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -69,21 +69,35 @@ * samples will be written to STX properly. */ #ifdef __BIG_ENDIAN -#define FSLSSI_I2S_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_BE | \ - SNDRV_PCM_FMTBIT_S18_3BE | SNDRV_PCM_FMTBIT_S20_3BE | \ - SNDRV_PCM_FMTBIT_S24_3BE | SNDRV_PCM_FMTBIT_S24_BE) +#define FSLSSI_I2S_FORMATS \ + (SNDRV_PCM_FMTBIT_S8 | \ + SNDRV_PCM_FMTBIT_S16_BE | \ + SNDRV_PCM_FMTBIT_S18_3BE | \ + SNDRV_PCM_FMTBIT_S20_3BE | \ + SNDRV_PCM_FMTBIT_S24_3BE | \ + SNDRV_PCM_FMTBIT_S24_BE) #else -#define FSLSSI_I2S_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE | \ - SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S20_3LE | \ - SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_LE) +#define FSLSSI_I2S_FORMATS \ + (SNDRV_PCM_FMTBIT_S8 | \ + SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S18_3LE | \ + SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE) #endif -#define FSLSSI_SIER_DBG_RX_FLAGS (SSI_SIER_RFF0_EN | \ - SSI_SIER_RLS_EN | SSI_SIER_RFS_EN | \ - SSI_SIER_ROE0_EN | SSI_SIER_RFRC_EN) -#define FSLSSI_SIER_DBG_TX_FLAGS (SSI_SIER_TFE0_EN | \ - SSI_SIER_TLS_EN | SSI_SIER_TFS_EN | \ - SSI_SIER_TUE0_EN | SSI_SIER_TFRC_EN) +#define FSLSSI_SIER_DBG_RX_FLAGS \ + (SSI_SIER_RFF0_EN | \ + SSI_SIER_RLS_EN | \ + SSI_SIER_RFS_EN | \ + SSI_SIER_ROE0_EN | \ + SSI_SIER_RFRC_EN) +#define FSLSSI_SIER_DBG_TX_FLAGS \ + (SSI_SIER_TFE0_EN | \ + SSI_SIER_TLS_EN | \ + SSI_SIER_TFS_EN | \ + SSI_SIER_TUE0_EN | \ + SSI_SIER_TFRC_EN) enum fsl_ssi_type { FSL_SSI_MCP8610, @@ -291,8 +305,8 @@ static struct fsl_ssi_soc_data fsl_ssi_mpc8610 = { .imx = false, .offline_config = true, .sisr_write_mask = SSI_SISR_RFRC | SSI_SISR_TFRC | - SSI_SISR_ROE0 | SSI_SISR_ROE1 | - SSI_SISR_TUE0 | SSI_SISR_TUE1, + SSI_SISR_ROE0 | SSI_SISR_ROE1 | + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static struct fsl_ssi_soc_data fsl_ssi_imx21 = { @@ -306,15 +320,15 @@ static struct fsl_ssi_soc_data fsl_ssi_imx35 = { .imx = true, .offline_config = true, .sisr_write_mask = SSI_SISR_RFRC | SSI_SISR_TFRC | - SSI_SISR_ROE0 | SSI_SISR_ROE1 | - SSI_SISR_TUE0 | SSI_SISR_TUE1, + SSI_SISR_ROE0 | SSI_SISR_ROE1 | + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static struct fsl_ssi_soc_data fsl_ssi_imx51 = { .imx = true, .offline_config = false, .sisr_write_mask = SSI_SISR_ROE0 | SSI_SISR_ROE1 | - SSI_SISR_TUE0 | SSI_SISR_TUE1, + SSI_SISR_TUE0 | SSI_SISR_TUE1, }; static const struct of_device_id fsl_ssi_ids[] = { @@ -376,21 +390,21 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) if (enable) { regmap_update_bits(regs, REG_SSI_SIER, - vals->rx.sier | vals->tx.sier, - vals->rx.sier | vals->tx.sier); + vals->rx.sier | vals->tx.sier, + vals->rx.sier | vals->tx.sier); regmap_update_bits(regs, REG_SSI_SRCR, - vals->rx.srcr | vals->tx.srcr, - vals->rx.srcr | vals->tx.srcr); + vals->rx.srcr | vals->tx.srcr, + vals->rx.srcr | vals->tx.srcr); regmap_update_bits(regs, REG_SSI_STCR, - vals->rx.stcr | vals->tx.stcr, - vals->rx.stcr | vals->tx.stcr); + vals->rx.stcr | vals->tx.stcr, + vals->rx.stcr | vals->tx.stcr); } else { regmap_update_bits(regs, REG_SSI_SRCR, - vals->rx.srcr | vals->tx.srcr, 0); + vals->rx.srcr | vals->tx.srcr, 0); regmap_update_bits(regs, REG_SSI_STCR, - vals->rx.stcr | vals->tx.stcr, 0); + vals->rx.stcr | vals->tx.stcr, 0); regmap_update_bits(regs, REG_SSI_SIER, - vals->rx.sier | vals->tx.sier, 0); + vals->rx.sier | vals->tx.sier, 0); } } @@ -401,10 +415,10 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) { if (is_rx) { regmap_update_bits(ssi->regs, REG_SSI_SOR, - SSI_SOR_RX_CLR, SSI_SOR_RX_CLR); + SSI_SOR_RX_CLR, SSI_SOR_RX_CLR); } else { regmap_update_bits(ssi->regs, REG_SSI_SOR, - SSI_SOR_TX_CLR, SSI_SOR_TX_CLR); + SSI_SOR_TX_CLR, SSI_SOR_TX_CLR); } } @@ -432,7 +446,7 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) * Enable or disable SSI configuration. */ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, - struct fsl_ssi_reg_val *vals) + struct fsl_ssi_reg_val *vals) { struct regmap *regs = ssi->regs; struct fsl_ssi_reg_val *avals; @@ -442,8 +456,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, regmap_read(regs, REG_SSI_SCR, &scr_val); - nr_active_streams = !!(scr_val & SSI_SCR_TE) + - !!(scr_val & SSI_SCR_RE); + nr_active_streams = !!(scr_val & SSI_SCR_TE) + !!(scr_val & SSI_SCR_RE); if (nr_active_streams - 1 > 0) keep_active = 1; @@ -462,7 +475,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, * both streams, and get safe bits to disable current stream */ u32 scr = fsl_ssi_disable_val(vals->scr, avals->scr, - keep_active); + keep_active); /* Safely disable SCR register for the stream */ regmap_update_bits(regs, REG_SSI_SCR, scr, 0); } @@ -474,8 +487,7 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, * 2) Disable all remaining bits of both streams when last stream ends */ if (ssi->soc->offline_config) { - if ((enable && !nr_active_streams) || - (!enable && !keep_active)) + if ((enable && !nr_active_streams) || (!enable && !keep_active)) fsl_ssi_rxtx_config(ssi, enable); goto config_done; @@ -498,11 +510,11 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, * both streams, and get safe bits to disable current stream */ sier = fsl_ssi_disable_val(vals->sier, avals->sier, - keep_active); + keep_active); srcr = fsl_ssi_disable_val(vals->srcr, avals->srcr, - keep_active); + keep_active); stcr = fsl_ssi_disable_val(vals->stcr, avals->stcr, - keep_active); + keep_active); /* Safely disable other control registers for the stream */ regmap_update_bits(regs, REG_SSI_SRCR, srcr, 0); @@ -525,7 +537,7 @@ config_done: /* Enable SSI first to send TX DMA request */ regmap_update_bits(regs, REG_SSI_SCR, - SSI_SCR_SSIEN, SSI_SCR_SSIEN); + SSI_SCR_SSIEN, SSI_SCR_SSIEN); /* Busy wait until TX FIFO not empty -- DMA working */ for (i = 0; i < max_loop; i++) { @@ -544,7 +556,6 @@ config_done: } } - static void fsl_ssi_rx_config(struct fsl_ssi *ssi, bool enable) { fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.rx); @@ -615,19 +626,16 @@ static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) struct regmap *regs = ssi->regs; /* Setup the clock control register */ - regmap_write(regs, REG_SSI_STCCR, - SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); - regmap_write(regs, REG_SSI_SRCCR, - SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); + regmap_write(regs, REG_SSI_STCCR, SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); + regmap_write(regs, REG_SSI_SRCCR, SSI_SxCCR_WL(17) | SSI_SxCCR_DC(13)); /* Enable AC97 mode and startup the SSI */ - regmap_write(regs, REG_SSI_SACNT, - SSI_SACNT_AC97EN | SSI_SACNT_FV); + regmap_write(regs, REG_SSI_SACNT, SSI_SACNT_AC97EN | SSI_SACNT_FV); /* AC97 has to communicate with codec before starting a stream */ regmap_update_bits(regs, REG_SSI_SCR, - SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE, - SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE); + SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE, + SSI_SCR_SSIEN | SSI_SCR_TE | SSI_SCR_RE); regmap_write(regs, REG_SSI_SOR, SSI_SOR_WAIT(3)); } @@ -651,19 +659,18 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream, */ if (ssi->use_dual_fifo) snd_pcm_hw_constraint_step(substream->runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 2); + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, 2); return 0; } static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); clk_disable_unprepare(ssi->clk); - } /** @@ -676,8 +683,8 @@ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, * (In 2-channel I2S Master mode, slot_width is fixed 32) */ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, - struct snd_soc_dai *cpu_dai, - struct snd_pcm_hw_params *hw_params) + struct snd_soc_dai *cpu_dai, + struct snd_pcm_hw_params *hw_params) { struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); struct regmap *regs = ssi->regs; @@ -764,8 +771,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, stccr = SSI_SxCCR_PM(pm + 1) | (div2 ? SSI_SxCCR_DIV2 : 0) | (psr ? SSI_SxCCR_PSR : 0); - mask = SSI_SxCCR_PM_MASK | SSI_SxCCR_DIV2 | - SSI_SxCCR_PSR; + mask = SSI_SxCCR_PM_MASK | SSI_SxCCR_DIV2 | SSI_SxCCR_PSR; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK || synchronous) regmap_update_bits(regs, REG_SSI_STCCR, mask, stccr); @@ -795,7 +801,8 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, * fsl_ssi_set_bclk() if SSI is the DAI clock master. */ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *cpu_dai) + struct snd_pcm_hw_params *hw_params, + struct snd_soc_dai *cpu_dai) { struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); struct regmap *regs = ssi->regs; @@ -837,36 +844,33 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, u8 i2smode; /* Normal + Network mode to send 16-bit data in 32-bit frames */ if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) - i2smode = SSI_SCR_I2S_MODE_NORMAL | - SSI_SCR_NET; + i2smode = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; else i2smode = ssi->i2s_mode; regmap_update_bits(regs, REG_SSI_SCR, - SSI_SCR_NET | SSI_SCR_I2S_MODE_MASK, - channels == 1 ? 0 : i2smode); + SSI_SCR_NET | SSI_SCR_I2S_MODE_MASK, + channels == 1 ? 0 : i2smode); } /* In synchronous mode, the SSI uses STCCR for capture */ if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || ssi->cpu_dai_drv.symmetric_rates) - regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_WL_MASK, - wl); + regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_WL_MASK, wl); else - regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_WL_MASK, - wl); + regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_WL_MASK, wl); return 0; } static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, - struct snd_soc_dai *cpu_dai) + struct snd_soc_dai *cpu_dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); if (fsl_ssi_is_i2s_master(ssi) && - ssi->baudclk_streams & BIT(substream->stream)) { + ssi->baudclk_streams & BIT(substream->stream)) { clk_disable_unprepare(ssi->baudclk); ssi->baudclk_streams &= ~BIT(substream->stream); } @@ -896,8 +900,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, scr |= SSI_SCR_SYNC_TX_FS; mask = SSI_STCR_TXBIT0 | SSI_STCR_TFDIR | SSI_STCR_TXDIR | - SSI_STCR_TSCKP | SSI_STCR_TFSI | SSI_STCR_TFSL | - SSI_STCR_TEFS; + SSI_STCR_TSCKP | SSI_STCR_TFSI | SSI_STCR_TFSL | SSI_STCR_TEFS; regmap_read(regs, REG_SSI_STCR, &stcr); regmap_read(regs, REG_SSI_SRCR, &srcr); stcr &= ~mask; @@ -908,11 +911,9 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: regmap_update_bits(regs, REG_SSI_STCCR, - SSI_SxCCR_DC_MASK, - SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); regmap_update_bits(regs, REG_SSI_SRCCR, - SSI_SxCCR_DC_MASK, - SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: @@ -927,7 +928,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, /* Data on rising edge of bclk, frame low, 1clk before data */ strcr |= SSI_STCR_TFSI | SSI_STCR_TSCKP | - SSI_STCR_TXBIT0 | SSI_STCR_TEFS; + SSI_STCR_TXBIT0 | SSI_STCR_TEFS; break; case SND_SOC_DAIFMT_LEFT_J: /* Data on rising edge of bclk, frame high */ @@ -936,12 +937,11 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, case SND_SOC_DAIFMT_DSP_A: /* Data on rising edge of bclk, frame high, 1clk before data */ strcr |= SSI_STCR_TFSL | SSI_STCR_TSCKP | - SSI_STCR_TXBIT0 | SSI_STCR_TEFS; + SSI_STCR_TXBIT0 | SSI_STCR_TEFS; break; case SND_SOC_DAIFMT_DSP_B: /* Data on rising edge of bclk, frame high */ - strcr |= SSI_STCR_TFSL | SSI_STCR_TSCKP | - SSI_STCR_TXBIT0; + strcr |= SSI_STCR_TFSL | SSI_STCR_TSCKP | SSI_STCR_TXBIT0; break; case SND_SOC_DAIFMT_AC97: /* Data on falling edge of bclk, frame high, 1clk before data */ @@ -1012,23 +1012,22 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, wm = ssi->fifo_watermark; regmap_write(regs, REG_SSI_SFCSR, - SSI_SFCSR_TFWM0(wm) | SSI_SFCSR_RFWM0(wm) | - SSI_SFCSR_TFWM1(wm) | SSI_SFCSR_RFWM1(wm)); + SSI_SFCSR_TFWM0(wm) | SSI_SFCSR_RFWM0(wm) | + SSI_SFCSR_TFWM1(wm) | SSI_SFCSR_RFWM1(wm)); if (ssi->use_dual_fifo) { - regmap_update_bits(regs, REG_SSI_SRCR, SSI_SRCR_RFEN1, - SSI_SRCR_RFEN1); - regmap_update_bits(regs, REG_SSI_STCR, SSI_STCR_TFEN1, - SSI_STCR_TFEN1); - regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_TCH_EN, - SSI_SCR_TCH_EN); + regmap_update_bits(regs, REG_SSI_SRCR, + SSI_SRCR_RFEN1, SSI_SRCR_RFEN1); + regmap_update_bits(regs, REG_SSI_STCR, + SSI_STCR_TFEN1, SSI_STCR_TFEN1); + regmap_update_bits(regs, REG_SSI_SCR, + SSI_SCR_TCH_EN, SSI_SCR_TCH_EN); } if ((fmt & SND_SOC_DAIFMT_FORMAT_MASK) == SND_SOC_DAIFMT_AC97) fsl_ssi_setup_ac97(ssi); return 0; - } /** @@ -1049,7 +1048,7 @@ static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) * Set TDM slot number and slot width */ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, - u32 rx_mask, int slots, int slot_width) + u32 rx_mask, int slots, int slot_width) { struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); struct regmap *regs = ssi->regs; @@ -1069,17 +1068,16 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, return -EINVAL; } - regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_DC_MASK, - SSI_SxCCR_DC(slots)); - regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_DC_MASK, - SSI_SxCCR_DC(slots)); + regmap_update_bits(regs, REG_SSI_STCCR, + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); + regmap_update_bits(regs, REG_SSI_SRCCR, + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); /* Save SSIEN bit of the SCR register */ regmap_read(regs, REG_SSI_SCR, &val); val &= SSI_SCR_SSIEN; /* Temporarily enable SSI to allow SxMSKs to be configurable */ - regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_SSIEN, - SSI_SCR_SSIEN); + regmap_update_bits(regs, REG_SSI_SCR, SSI_SCR_SSIEN, SSI_SCR_SSIEN); regmap_write(regs, REG_SSI_STMSK, ~tx_mask); regmap_write(regs, REG_SSI_SRMSK, ~rx_mask); @@ -1153,13 +1151,13 @@ static int fsl_ssi_dai_probe(struct snd_soc_dai *dai) } static const struct snd_soc_dai_ops fsl_ssi_dai_ops = { - .startup = fsl_ssi_startup, - .shutdown = fsl_ssi_shutdown, - .hw_params = fsl_ssi_hw_params, - .hw_free = fsl_ssi_hw_free, - .set_fmt = fsl_ssi_set_dai_fmt, - .set_tdm_slot = fsl_ssi_set_dai_tdm_slot, - .trigger = fsl_ssi_trigger, + .startup = fsl_ssi_startup, + .shutdown = fsl_ssi_shutdown, + .hw_params = fsl_ssi_hw_params, + .hw_free = fsl_ssi_hw_free, + .set_fmt = fsl_ssi_set_dai_fmt, + .set_tdm_slot = fsl_ssi_set_dai_tdm_slot, + .trigger = fsl_ssi_trigger, }; static struct snd_soc_dai_driver fsl_ssi_dai_template = { @@ -1182,7 +1180,7 @@ static struct snd_soc_dai_driver fsl_ssi_dai_template = { }; static const struct snd_soc_component_driver fsl_ssi_component = { - .name = "fsl-ssi", + .name = "fsl-ssi", }; static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { @@ -1206,11 +1204,10 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { .ops = &fsl_ssi_dai_ops, }; - static struct fsl_ssi *fsl_ac97_data; static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, - unsigned short val) + unsigned short val) { struct regmap *regs = fsl_ac97_data->regs; unsigned int lreg; @@ -1235,8 +1232,8 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg, lval = val << 4; regmap_write(regs, REG_SSI_SACDAT, lval); - regmap_update_bits(regs, REG_SSI_SACNT, SSI_SACNT_RDWR_MASK, - SSI_SACNT_WR); + regmap_update_bits(regs, REG_SSI_SACNT, + SSI_SACNT_RDWR_MASK, SSI_SACNT_WR); udelay(100); clk_disable_unprepare(fsl_ac97_data->clk); @@ -1246,10 +1243,9 @@ ret_unlock: } static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, - unsigned short reg) + unsigned short reg) { struct regmap *regs = fsl_ac97_data->regs; - unsigned short val = 0; u32 reg_val; unsigned int lreg; @@ -1259,15 +1255,14 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97, ret = clk_prepare_enable(fsl_ac97_data->clk); if (ret) { - pr_err("ac97 read clk_prepare_enable failed: %d\n", - ret); + pr_err("ac97 read clk_prepare_enable failed: %d\n", ret); goto ret_unlock; } lreg = (reg & 0x7f) << 12; regmap_write(regs, REG_SSI_SACADD, lreg); - regmap_update_bits(regs, REG_SSI_SACNT, SSI_SACNT_RDWR_MASK, - SSI_SACNT_RD); + regmap_update_bits(regs, REG_SSI_SACNT, + SSI_SACNT_RDWR_MASK, SSI_SACNT_RD); udelay(100); @@ -1282,8 +1277,8 @@ ret_unlock: } static struct snd_ac97_bus_ops fsl_ssi_ac97_ops = { - .read = fsl_ssi_ac97_read, - .write = fsl_ssi_ac97_write, + .read = fsl_ssi_ac97_read, + .write = fsl_ssi_ac97_write, }; /** @@ -1298,7 +1293,7 @@ static void make_lowercase(char *s) } static int fsl_ssi_imx_probe(struct platform_device *pdev, - struct fsl_ssi *ssi, void __iomem *iomem) + struct fsl_ssi *ssi, void __iomem *iomem) { struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; @@ -1370,14 +1365,13 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, return 0; error_pcm: - if (!ssi->has_ipg_clk_name) clk_disable_unprepare(ssi->clk); + return ret; } -static void fsl_ssi_imx_clean(struct platform_device *pdev, - struct fsl_ssi *ssi) +static void fsl_ssi_imx_clean(struct platform_device *pdev, struct fsl_ssi *ssi) { if (!ssi->use_dma) imx_pcm_fiq_exit(pdev); @@ -1422,8 +1416,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) if (fsl_ssi_is_ac97(ssi)) { memcpy(&ssi->cpu_dai_drv, &fsl_ssi_ac97_dai, - sizeof(fsl_ssi_ac97_dai)); - + sizeof(fsl_ssi_ac97_dai)); fsl_ac97_data = ssi; } else { memcpy(&ssi->cpu_dai_drv, &fsl_ssi_dai_template, @@ -1582,8 +1575,8 @@ done: goto error_sound_card; } - ssi->pdev = platform_device_register_data(NULL, - "ac97-codec", ssi_idx, NULL, 0); + ssi->pdev = platform_device_register_data(NULL, "ac97-codec", + ssi_idx, NULL, 0); if (IS_ERR(ssi->pdev)) { ret = PTR_ERR(ssi->pdev); dev_err(dev, @@ -1597,11 +1590,9 @@ done: error_sound_card: fsl_ssi_debugfs_remove(&ssi->dbg_stats); - error_asoc_register: if (fsl_ssi_is_ac97(ssi)) snd_soc_set_ac97_ops(NULL); - error_ac97_ops: if (fsl_ssi_is_ac97(ssi)) mutex_destroy(&ssi->ac97_reg_lock); @@ -1655,9 +1646,9 @@ static int fsl_ssi_resume(struct device *dev) regcache_cache_only(regs, false); regmap_update_bits(regs, REG_SSI_SFCSR, - SSI_SFCSR_RFWM1_MASK | SSI_SFCSR_TFWM1_MASK | - SSI_SFCSR_RFWM0_MASK | SSI_SFCSR_TFWM0_MASK, - ssi->regcache_sfcsr); + SSI_SFCSR_RFWM1_MASK | SSI_SFCSR_TFWM1_MASK | + SSI_SFCSR_RFWM0_MASK | SSI_SFCSR_TFWM0_MASK, + ssi->regcache_sfcsr); regmap_write(regs, REG_SSI_SACNT, ssi->regcache_sacnt); return regcache_sync(regs); diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index cdcf3d23873e..fe38e6913f96 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -310,7 +310,7 @@ static inline void fsl_ssi_dbg_isr(struct fsl_ssi_dbg *stats, u32 sisr) } static inline int fsl_ssi_debugfs_create(struct fsl_ssi_dbg *ssi_dbg, - struct device *dev) + struct device *dev) { return 0; } diff --git a/sound/soc/fsl/fsl_ssi_dbg.c b/sound/soc/fsl/fsl_ssi_dbg.c index 362df91420f6..7aac63e2c561 100644 --- a/sound/soc/fsl/fsl_ssi_dbg.c +++ b/sound/soc/fsl/fsl_ssi_dbg.c @@ -147,7 +147,8 @@ int fsl_ssi_debugfs_create(struct fsl_ssi_dbg *ssi_dbg, struct device *dev) return -ENOMEM; ssi_dbg->dbg_stats = debugfs_create_file("stats", S_IRUGO, - ssi_dbg->dbg_dir, ssi_dbg, &fsl_ssi_stats_ops); + ssi_dbg->dbg_dir, ssi_dbg, + &fsl_ssi_stats_ops); if (!ssi_dbg->dbg_stats) { debugfs_remove(ssi_dbg->dbg_dir); return -ENOMEM; -- cgit v1.2.3 From 2c22503667709ced35fc6807c9ba79285c929114 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:05 -0800 Subject: ASoC: fsl_ssi: Refine printk outputs This patches unifies the error message in the "failed to xxxx" format. It also reduces the length of one line and adds spaces to an operator. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index ed9ac758e35d..eb9ac847bda3 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -716,7 +716,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, * never greater than 1/5 IPG clock rate */ if (freq * 5 > clk_get_rate(ssi->clk)) { - dev_err(cpu_dai->dev, "bitclk > ipgclk/5\n"); + dev_err(cpu_dai->dev, "bitclk > ipgclk / 5\n"); return -EINVAL; } @@ -888,7 +888,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, ssi->dai_fmt = fmt; if (fsl_ssi_is_i2s_master(ssi) && IS_ERR(ssi->baudclk)) { - dev_err(dev, "baudclk is missing which is necessary for master mode\n"); + dev_err(dev, "missing baudclk for master mode\n"); return -EINVAL; } @@ -1307,7 +1307,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, ssi->clk = devm_clk_get(dev, NULL); if (IS_ERR(ssi->clk)) { ret = PTR_ERR(ssi->clk); - dev_err(dev, "could not get clock: %d\n", ret); + dev_err(dev, "failed to get clock: %d\n", ret); return ret; } @@ -1323,7 +1323,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, /* Do not error out for slave cases that live without a baud clock */ ssi->baudclk = devm_clk_get(dev, "baud"); if (IS_ERR(ssi->baudclk)) - dev_dbg(dev, "could not get baud clock: %ld\n", + dev_dbg(dev, "failed to get baud clock: %ld\n", PTR_ERR(ssi->baudclk)); ssi->dma_params_tx.maxburst = ssi->dma_maxburst; @@ -1447,7 +1447,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) ®config); } if (IS_ERR(ssi->regs)) { - dev_err(dev, "Failed to init register map\n"); + dev_err(dev, "failed to init register map\n"); return PTR_ERR(ssi->regs); } @@ -1513,7 +1513,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) mutex_init(&ssi->ac97_reg_lock); ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev); if (ret) { - dev_err(dev, "could not set AC'97 ops\n"); + dev_err(dev, "failed to set AC'97 ops\n"); goto error_ac97_ops; } } @@ -1529,7 +1529,7 @@ static int fsl_ssi_probe(struct platform_device *pdev) ret = devm_request_irq(dev, ssi->irq, fsl_ssi_isr, 0, dev_name(dev), ssi); if (ret < 0) { - dev_err(dev, "could not claim irq %u\n", ssi->irq); + dev_err(dev, "failed to claim irq %u\n", ssi->irq); goto error_asoc_register; } } @@ -1571,7 +1571,7 @@ done: ret = of_property_read_u32(np, "cell-index", &ssi_idx); if (ret) { - dev_err(dev, "cannot get SSI index property\n"); + dev_err(dev, "failed to get SSI index property\n"); goto error_sound_card; } -- cgit v1.2.3 From 0c884bed6ba743b8456c6eee4d599aaad6ffa008 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:06 -0800 Subject: ASoC: fsl_ssi: Rename cpu_dai parameter to dai Shortens the variable name to save space, useful for dev_err outputs. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index eb9ac847bda3..237302fb9279 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -683,10 +683,10 @@ static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, * (In 2-channel I2S Master mode, slot_width is fixed 32) */ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, - struct snd_soc_dai *cpu_dai, + struct snd_soc_dai *dai, struct snd_pcm_hw_params *hw_params) { - struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); struct regmap *regs = ssi->regs; int synchronous = ssi->cpu_dai_drv.symmetric_rates, ret; u32 pm = 999, div2, psr, stccr, mask, afreq, factor, i; @@ -716,7 +716,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, * never greater than 1/5 IPG clock rate */ if (freq * 5 > clk_get_rate(ssi->clk)) { - dev_err(cpu_dai->dev, "bitclk > ipgclk / 5\n"); + dev_err(dai->dev, "bitclk > ipgclk / 5\n"); return -EINVAL; } @@ -765,7 +765,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, /* No proper pm found if it is still remaining the initial value */ if (pm == 999) { - dev_err(cpu_dai->dev, "failed to handle the required sysclk\n"); + dev_err(dai->dev, "failed to handle the required sysclk\n"); return -EINVAL; } @@ -781,7 +781,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, if (!baudclk_is_used) { ret = clk_set_rate(ssi->baudclk, baudrate); if (ret) { - dev_err(cpu_dai->dev, "failed to set baudclk rate\n"); + dev_err(dai->dev, "failed to set baudclk rate\n"); return -EINVAL; } } @@ -802,9 +802,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, */ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, - struct snd_soc_dai *cpu_dai) + struct snd_soc_dai *dai) { - struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); struct regmap *regs = ssi->regs; unsigned int channels = params_channels(hw_params); unsigned int sample_size = params_width(hw_params); @@ -826,7 +826,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, return 0; if (fsl_ssi_is_i2s_master(ssi)) { - ret = fsl_ssi_set_bclk(substream, cpu_dai, hw_params); + ret = fsl_ssi_set_bclk(substream, dai, hw_params); if (ret) return ret; @@ -864,7 +864,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, } static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, - struct snd_soc_dai *cpu_dai) + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(rtd->cpu_dai); @@ -1033,30 +1033,30 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, /** * Configure Digital Audio Interface (DAI) Format */ -static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *cpu_dai, unsigned int fmt) +static int fsl_ssi_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt) { - struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); /* AC97 configured DAIFMT earlier in the probe() */ if (fsl_ssi_is_ac97(ssi)) return 0; - return _fsl_ssi_set_dai_fmt(cpu_dai->dev, ssi, fmt); + return _fsl_ssi_set_dai_fmt(dai->dev, ssi, fmt); } /** * Set TDM slot number and slot width */ -static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, +static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mask, int slots, int slot_width) { - struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(cpu_dai); + struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); struct regmap *regs = ssi->regs; u32 val; /* The word length should be 8, 10, 12, 16, 18, 20, 22 or 24 */ if (slot_width & 1 || slot_width < 8 || slot_width > 24) { - dev_err(cpu_dai->dev, "invalid slot width: %d\n", slot_width); + dev_err(dai->dev, "invalid slot width: %d\n", slot_width); return -EINVAL; } @@ -1064,7 +1064,7 @@ static int fsl_ssi_set_dai_tdm_slot(struct snd_soc_dai *cpu_dai, u32 tx_mask, regmap_read(regs, REG_SSI_SCR, &val); val &= SSI_SCR_I2S_MODE_MASK | SSI_SCR_NET; if (val && slots < 2) { - dev_err(cpu_dai->dev, "slot number should be >= 2 in I2S or NET\n"); + dev_err(dai->dev, "slot number should be >= 2 in I2S or NET\n"); return -EINVAL; } -- cgit v1.2.3 From ff4adb090066c1636a43b88a497c34d2bd2312ec Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:07 -0800 Subject: ASoC: fsl_ssi: Rename scr_val to scr Simplify the variable name. This reduces one over-80-character line. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 237302fb9279..af3ba718d4bb 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -451,12 +451,12 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, struct regmap *regs = ssi->regs; struct fsl_ssi_reg_val *avals; int nr_active_streams; - u32 scr_val; + u32 scr; int keep_active; - regmap_read(regs, REG_SSI_SCR, &scr_val); + regmap_read(regs, REG_SSI_SCR, &scr); - nr_active_streams = !!(scr_val & SSI_SCR_TE) + !!(scr_val & SSI_SCR_RE); + nr_active_streams = !!(scr & SSI_SCR_TE) + !!(scr & SSI_SCR_RE); if (nr_active_streams - 1 > 0) keep_active = 1; @@ -810,11 +810,11 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, unsigned int sample_size = params_width(hw_params); u32 wl = SSI_SxCCR_WL(sample_size); int ret; - u32 scr_val; + u32 scr; int enabled; - regmap_read(regs, REG_SSI_SCR, &scr_val); - enabled = scr_val & SSI_SCR_SSIEN; + regmap_read(regs, REG_SSI_SCR, &scr); + enabled = scr & SSI_SCR_SSIEN; /* * SSI is properly configured if it is enabled and running in -- cgit v1.2.3 From 2474e4037c4e3fe8b4fe4ab37232973d9b17a573 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:08 -0800 Subject: ASoC: fsl_ssi: Replace fsl_ssi_rxtx_reg_val with fsl_ssi_regvals The name fsl_ssi_rxtx_reg_val is too long to read comfortably. So this patch shortens it by using an array (fsl_ssi_regvals, renamed from fsl_ssi_reg_val). To do that, it also introduces two macros (TX and RX) to replace the wrapper structure. This will also help further cleanups. Meanwhile, it unifies all local variable with the name "vals" to get rid of the name "reg" -- could be confusing with "regs" in the private struct for regmap. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 79 +++++++++++++++++++++++-------------------------- sound/soc/fsl/fsl_ssi.h | 3 ++ 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index af3ba718d4bb..aef014c46d96 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -106,18 +106,13 @@ enum fsl_ssi_type { FSL_SSI_MX51, }; -struct fsl_ssi_reg_val { +struct fsl_ssi_regvals { u32 sier; u32 srcr; u32 stcr; u32 scr; }; -struct fsl_ssi_rxtx_reg_val { - struct fsl_ssi_reg_val rx; - struct fsl_ssi_reg_val tx; -}; - static bool fsl_ssi_readable_reg(struct device *dev, unsigned int reg) { switch (reg) { @@ -213,7 +208,7 @@ struct fsl_ssi_soc_data { * @fifo_depth: Depth of the SSI FIFOs * @slot_width: Width of each DAI slot * @slots: Number of slots - * @rxtx_reg_val: Specific RX/TX register settings + * @regvals: Specific RX/TX register settings * * @clk: Clock source to access register * @baudclk: Clock source to generate bit and frame-sync clocks @@ -257,7 +252,7 @@ struct fsl_ssi { unsigned int fifo_depth; unsigned int slot_width; unsigned int slots; - struct fsl_ssi_rxtx_reg_val rxtx_reg_val; + struct fsl_ssi_regvals regvals[2]; struct clk *clk; struct clk *baudclk; @@ -386,25 +381,25 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) { struct regmap *regs = ssi->regs; - struct fsl_ssi_rxtx_reg_val *vals = &ssi->rxtx_reg_val; + struct fsl_ssi_regvals *vals = ssi->regvals; if (enable) { regmap_update_bits(regs, REG_SSI_SIER, - vals->rx.sier | vals->tx.sier, - vals->rx.sier | vals->tx.sier); + vals[RX].sier | vals[TX].sier, + vals[RX].sier | vals[TX].sier); regmap_update_bits(regs, REG_SSI_SRCR, - vals->rx.srcr | vals->tx.srcr, - vals->rx.srcr | vals->tx.srcr); + vals[RX].srcr | vals[TX].srcr, + vals[RX].srcr | vals[TX].srcr); regmap_update_bits(regs, REG_SSI_STCR, - vals->rx.stcr | vals->tx.stcr, - vals->rx.stcr | vals->tx.stcr); + vals[RX].stcr | vals[TX].stcr, + vals[RX].stcr | vals[TX].stcr); } else { regmap_update_bits(regs, REG_SSI_SRCR, - vals->rx.srcr | vals->tx.srcr, 0); + vals[RX].srcr | vals[TX].srcr, 0); regmap_update_bits(regs, REG_SSI_STCR, - vals->rx.stcr | vals->tx.stcr, 0); + vals[RX].stcr | vals[TX].stcr, 0); regmap_update_bits(regs, REG_SSI_SIER, - vals->rx.sier | vals->tx.sier, 0); + vals[RX].sier | vals[TX].sier, 0); } } @@ -446,10 +441,10 @@ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) * Enable or disable SSI configuration. */ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, - struct fsl_ssi_reg_val *vals) + struct fsl_ssi_regvals *vals) { struct regmap *regs = ssi->regs; - struct fsl_ssi_reg_val *avals; + struct fsl_ssi_regvals *avals; int nr_active_streams; u32 scr; int keep_active; @@ -464,10 +459,10 @@ static void fsl_ssi_config(struct fsl_ssi *ssi, bool enable, keep_active = 0; /* Get the opposite direction to keep its values untouched */ - if (&ssi->rxtx_reg_val.rx == vals) - avals = &ssi->rxtx_reg_val.tx; + if (&ssi->regvals[RX] == vals) + avals = &ssi->regvals[TX]; else - avals = &ssi->rxtx_reg_val.rx; + avals = &ssi->regvals[RX]; if (!enable) { /* @@ -558,7 +553,7 @@ config_done: static void fsl_ssi_rx_config(struct fsl_ssi *ssi, bool enable) { - fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.rx); + fsl_ssi_config(ssi, enable, &ssi->regvals[RX]); } static void fsl_ssi_tx_ac97_saccst_setup(struct fsl_ssi *ssi) @@ -586,39 +581,39 @@ static void fsl_ssi_tx_config(struct fsl_ssi *ssi, bool enable) if (enable && fsl_ssi_is_ac97(ssi)) fsl_ssi_tx_ac97_saccst_setup(ssi); - fsl_ssi_config(ssi, enable, &ssi->rxtx_reg_val.tx); + fsl_ssi_config(ssi, enable, &ssi->regvals[TX]); } /** * Cache critical bits of SIER, SRCR, STCR and SCR to later set them safely */ -static void fsl_ssi_setup_reg_vals(struct fsl_ssi *ssi) +static void fsl_ssi_setup_regvals(struct fsl_ssi *ssi) { - struct fsl_ssi_rxtx_reg_val *reg = &ssi->rxtx_reg_val; + struct fsl_ssi_regvals *vals = ssi->regvals; - reg->rx.sier = SSI_SIER_RFF0_EN; - reg->rx.srcr = SSI_SRCR_RFEN0; - reg->rx.scr = 0; - reg->tx.sier = SSI_SIER_TFE0_EN; - reg->tx.stcr = SSI_STCR_TFEN0; - reg->tx.scr = 0; + vals[RX].sier = SSI_SIER_RFF0_EN; + vals[RX].srcr = SSI_SRCR_RFEN0; + vals[RX].scr = 0; + vals[TX].sier = SSI_SIER_TFE0_EN; + vals[TX].stcr = SSI_STCR_TFEN0; + vals[TX].scr = 0; /* AC97 has already enabled SSIEN, RE and TE, so ignore them */ if (!fsl_ssi_is_ac97(ssi)) { - reg->rx.scr = SSI_SCR_SSIEN | SSI_SCR_RE; - reg->tx.scr = SSI_SCR_SSIEN | SSI_SCR_TE; + vals[RX].scr = SSI_SCR_SSIEN | SSI_SCR_RE; + vals[TX].scr = SSI_SCR_SSIEN | SSI_SCR_TE; } if (ssi->use_dma) { - reg->rx.sier |= SSI_SIER_RDMAE; - reg->tx.sier |= SSI_SIER_TDMAE; + vals[RX].sier |= SSI_SIER_RDMAE; + vals[TX].sier |= SSI_SIER_TDMAE; } else { - reg->rx.sier |= SSI_SIER_RIE; - reg->tx.sier |= SSI_SIER_TIE; + vals[RX].sier |= SSI_SIER_RIE; + vals[TX].sier |= SSI_SIER_TIE; } - reg->rx.sier |= FSLSSI_SIER_DBG_RX_FLAGS; - reg->tx.sier |= FSLSSI_SIER_DBG_TX_FLAGS; + vals[RX].sier |= FSLSSI_SIER_DBG_RX_FLAGS; + vals[TX].sier |= FSLSSI_SIER_DBG_TX_FLAGS; } static void fsl_ssi_setup_ac97(struct fsl_ssi *ssi) @@ -892,7 +887,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, return -EINVAL; } - fsl_ssi_setup_reg_vals(ssi); + fsl_ssi_setup_regvals(ssi); regmap_read(regs, REG_SSI_SCR, &scr); scr &= ~(SSI_SCR_SYN | SSI_SCR_I2S_MODE_MASK); diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index fe38e6913f96..52b88f1d6c6f 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -12,6 +12,9 @@ #ifndef _MPC8610_I2S_H #define _MPC8610_I2S_H +#define RX 0 +#define TX 1 + /* -- SSI Register Map -- */ /* SSI Transmit Data Register 0 */ -- cgit v1.2.3 From 8bc84a3344ca27836cff29bfbb42365753c9c557 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:09 -0800 Subject: ASoC: fsl_ssi: Rename i2smode to i2s_net Since this i2smode also includes the setting of Network mode, it should have it in the name. This patch also adds its MASK define. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 24 ++++++++++++------------ sound/soc/fsl/fsl_ssi.h | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index aef014c46d96..2b3915c45199 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -201,7 +201,7 @@ struct fsl_ssi_soc_data { * @cpu_dai_drv: CPU DAI driver for this device * * @dai_fmt: DAI configuration this device is currently used with - * @i2s_mode: I2S and Network mode configuration of SCR register + * @i2s_net: I2S and Network mode configurations of SCR register * @use_dma: DMA is used or FIQ with stream filter * @use_dual_fifo: DMA with support for dual FIFO mode * @has_ipg_clk_name: If "ipg" is in the clock name list of device tree @@ -245,7 +245,7 @@ struct fsl_ssi { struct snd_soc_dai_driver cpu_dai_drv; unsigned int dai_fmt; - u8 i2s_mode; + u8 i2s_net; bool use_dma; bool use_dual_fifo; bool has_ipg_clk_name; @@ -836,16 +836,16 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, } if (!fsl_ssi_is_ac97(ssi)) { - u8 i2smode; + u8 i2s_net; /* Normal + Network mode to send 16-bit data in 32-bit frames */ if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16) - i2smode = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; + i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET; else - i2smode = ssi->i2s_mode; + i2s_net = ssi->i2s_net; regmap_update_bits(regs, REG_SSI_SCR, - SSI_SCR_NET | SSI_SCR_I2S_MODE_MASK, - channels == 1 ? 0 : i2smode); + SSI_SCR_I2S_NET_MASK, + channels == 1 ? 0 : i2s_net); } /* In synchronous mode, the SSI uses STCCR for capture */ @@ -902,7 +902,7 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, srcr &= ~mask; /* Use Network mode as default */ - ssi->i2s_mode = SSI_SCR_NET; + ssi->i2s_net = SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: regmap_update_bits(regs, REG_SSI_STCCR, @@ -912,10 +912,10 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: - ssi->i2s_mode |= SSI_SCR_I2S_MODE_MASTER; + ssi->i2s_net |= SSI_SCR_I2S_MODE_MASTER; break; case SND_SOC_DAIFMT_CBM_CFM: - ssi->i2s_mode |= SSI_SCR_I2S_MODE_SLAVE; + ssi->i2s_net |= SSI_SCR_I2S_MODE_SLAVE; break; default: return -EINVAL; @@ -940,12 +940,12 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, break; case SND_SOC_DAIFMT_AC97: /* Data on falling edge of bclk, frame high, 1clk before data */ - ssi->i2s_mode |= SSI_SCR_I2S_MODE_NORMAL; + ssi->i2s_net |= SSI_SCR_I2S_MODE_NORMAL; break; default: return -EINVAL; } - scr |= ssi->i2s_mode; + scr |= ssi->i2s_net; /* DAI clock inversion */ switch (fmt & SND_SOC_DAIFMT_INV_MASK) { diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index 52b88f1d6c6f..b61008779e3c 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -95,6 +95,7 @@ #define SSI_SCR_I2S_MODE_SLAVE 0x00000040 #define SSI_SCR_SYN 0x00000010 #define SSI_SCR_NET 0x00000008 +#define SSI_SCR_I2S_NET_MASK (SSI_SCR_NET | SSI_SCR_I2S_MODE_MASK) #define SSI_SCR_RE 0x00000004 #define SSI_SCR_TE 0x00000002 #define SSI_SCR_SSIEN 0x00000001 -- cgit v1.2.3 From 52eee84e815e0fbaf9ada848ab5646314a529b61 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 17 Dec 2017 18:52:10 -0800 Subject: ASoC: fsl_ssi: Define ternary macros to simplify code Some regmap code looks redudant. So simplify it. Signed-off-by: Nicolin Chen Tested-by: Maciej S. Szmigiero Reviewed-by: Maciej S. Szmigiero Acked-by: Timur Tabi Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_ssi.c | 27 +++++++++++---------------- sound/soc/fsl/fsl_ssi.h | 4 ++++ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 2b3915c45199..aecd00f7929d 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -408,13 +408,10 @@ static void fsl_ssi_rxtx_config(struct fsl_ssi *ssi, bool enable) */ static void fsl_ssi_fifo_clear(struct fsl_ssi *ssi, bool is_rx) { - if (is_rx) { - regmap_update_bits(ssi->regs, REG_SSI_SOR, - SSI_SOR_RX_CLR, SSI_SOR_RX_CLR); - } else { - regmap_update_bits(ssi->regs, REG_SSI_SOR, - SSI_SOR_TX_CLR, SSI_SOR_TX_CLR); - } + bool tx = !is_rx; + + regmap_update_bits(ssi->regs, REG_SSI_SOR, + SSI_SOR_xX_CLR(tx), SSI_SOR_xX_CLR(tx)); } /** @@ -681,6 +678,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, struct snd_pcm_hw_params *hw_params) { + bool tx2, tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); struct regmap *regs = ssi->regs; int synchronous = ssi->cpu_dai_drv.symmetric_rates, ret; @@ -768,10 +766,9 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, (psr ? SSI_SxCCR_PSR : 0); mask = SSI_SxCCR_PM_MASK | SSI_SxCCR_DIV2 | SSI_SxCCR_PSR; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK || synchronous) - regmap_update_bits(regs, REG_SSI_STCCR, mask, stccr); - else - regmap_update_bits(regs, REG_SSI_SRCCR, mask, stccr); + /* STCCR is used for RX in synchronous mode */ + tx2 = tx || synchronous; + regmap_update_bits(regs, REG_SSI_SxCCR(tx2), mask, stccr); if (!baudclk_is_used) { ret = clk_set_rate(ssi->baudclk, baudrate); @@ -799,6 +796,7 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *dai) { + bool tx2, tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK; struct fsl_ssi *ssi = snd_soc_dai_get_drvdata(dai); struct regmap *regs = ssi->regs; unsigned int channels = params_channels(hw_params); @@ -849,11 +847,8 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream *substream, } /* In synchronous mode, the SSI uses STCCR for capture */ - if ((substream->stream == SNDRV_PCM_STREAM_PLAYBACK) || - ssi->cpu_dai_drv.symmetric_rates) - regmap_update_bits(regs, REG_SSI_STCCR, SSI_SxCCR_WL_MASK, wl); - else - regmap_update_bits(regs, REG_SSI_SRCCR, SSI_SxCCR_WL_MASK, wl); + tx2 = tx || ssi->cpu_dai_drv.symmetric_rates; + regmap_update_bits(regs, REG_SSI_SxCCR(tx2), SSI_SxCCR_WL_MASK, wl); return 0; } diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h index b61008779e3c..de2fdc5db726 100644 --- a/sound/soc/fsl/fsl_ssi.h +++ b/sound/soc/fsl/fsl_ssi.h @@ -35,10 +35,12 @@ #define REG_SSI_STCR 0x1c /* SSI Receive Configuration Register */ #define REG_SSI_SRCR 0x20 +#define REG_SSI_SxCR(tx) ((tx) ? REG_SSI_STCR : REG_SSI_SRCR) /* SSI Transmit Clock Control Register */ #define REG_SSI_STCCR 0x24 /* SSI Receive Clock Control Register */ #define REG_SSI_SRCCR 0x28 +#define REG_SSI_SxCCR(tx) ((tx) ? REG_SSI_STCCR : REG_SSI_SRCCR) /* SSI FIFO Control/Status Register */ #define REG_SSI_SFCSR 0x2c /* @@ -67,6 +69,7 @@ #define REG_SSI_STMSK 0x48 /* SSI Receive Time Slot Mask Register */ #define REG_SSI_SRMSK 0x4c +#define REG_SSI_SxMSK(tx) ((tx) ? REG_SSI_STMSK : REG_SSI_SRMSK) /* * SSI AC97 Channel Status Register * @@ -249,6 +252,7 @@ #define SSI_SOR_CLKOFF 0x00000040 #define SSI_SOR_RX_CLR 0x00000020 #define SSI_SOR_TX_CLR 0x00000010 +#define SSI_SOR_xX_CLR(tx) ((tx) ? SSI_SOR_TX_CLR : SSI_SOR_RX_CLR) #define SSI_SOR_INIT 0x00000008 #define SSI_SOR_WAIT_SHIFT 1 #define SSI_SOR_WAIT_MASK 0x00000006 -- cgit v1.2.3 From 66e900a3d225575c8b48b59ae1fe74bb6e5a65cc Mon Sep 17 00:00:00 2001 From: Radu Pirea Date: Fri, 15 Dec 2017 17:40:17 +0200 Subject: spi: atmel: fixed spin_lock usage inside atmel_spi_remove The only part of atmel_spi_remove which needs to be atomic is hardware reset. atmel_spi_stop_dma calls dma_terminate_all and this needs interrupts enabled. atmel_spi_release_dma calls dma_release_channel and dma_release_channel locks a mutex inside of spin_lock. So the call of these functions can't be inside a spin_lock. Reported-by: Jia-Ju Bai Signed-off-by: Radu Pirea Signed-off-by: Mark Brown --- drivers/spi/spi-atmel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index f95da364c283..669470971023 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); /* reset the hardware and block queue progress */ - spin_lock_irq(&as->lock); if (as->use_dma) { atmel_spi_stop_dma(master); atmel_spi_release_dma(master); } + spin_lock_irq(&as->lock); spi_writel(as, CR, SPI_BIT(SWRST)); spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */ spi_readl(as, SR); -- cgit v1.2.3 From 3920bb713038810f25770e7545b79f204685c8f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZ=20Lin=20=28=E6=9E=97=E4=B8=8A=E6=99=BA=29?= Date: Tue, 19 Dec 2017 17:40:32 +0800 Subject: USB: serial: option: adding support for YUGA CLM920-NC5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for YUGA CLM920-NC5 PID 0x9625 USB modem to option driver. Interface layout: 0: QCDM/DIAG 1: ADB 2: MODEM 3: AT 4: RMNET Signed-off-by: Taiyi Wu Signed-off-by: SZ Lin (林上智) Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b02fb576b856..b6320e3be429 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These Yuga products use Qualcomm's vendor ID */ +#define YUGA_PRODUCT_CLM920_NC5 0x9625 #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ @@ -680,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = { .reserved = BIT(4) | BIT(5), }; +static const struct option_blacklist_info yuga_clm920_nc5_blacklist = { + .reserved = BIT(1) | BIT(4), +}; + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -1184,6 +1190,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Yuga products use Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), + .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, -- cgit v1.2.3 From 07b9f12864d16c3a861aef4817eb1efccbc5d0e6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 19 Dec 2017 11:14:42 +0200 Subject: USB: Fix off by one in type-specific length check of BOS SSP capability USB 3.1 devices are not detected as 3.1 capable since 4.15-rc3 due to a off by one in commit 81cf4a45360f ("USB: core: Add type-specific length check of BOS descriptors") It uses USB_DT_USB_SSP_CAP_SIZE() to get SSP capability size which takes the zero based SSAC as argument, not the actual count of sublink speed attributes. USB3 spec 9.6.2.5 says "The number of Sublink Speed Attributes = SSAC + 1." The type-specific length check patch was added to stable and needs to be fixed there as well Fixes: 81cf4a45360f ("USB: core: Add type-specific length check of BOS descriptors") Cc: linux-stable CC: Masakazu Mokuno Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 78e92d29f8d9..c821b4b9647e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev) case USB_SSP_CAP_TYPE: ssp_cap = (struct usb_ssp_cap_descriptor *)buffer; ssac = (le32_to_cpu(ssp_cap->bmAttributes) & - USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1; + USB_SSP_SUBLINK_SPEED_ATTRIBS); if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac)) dev->bos->ssp_cap = ssp_cap; break; -- cgit v1.2.3 From 8272d099d05f7ab2776cf56a2ab9f9443be18907 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 18 Dec 2017 17:24:22 -0700 Subject: usbip: vhci: stop printing kernel pointer addresses in messages Remove and/or change debug, info. and error messages to not print kernel pointer addresses. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 10 ---------- drivers/usb/usbip/vhci_rx.c | 23 +++++++++++------------ drivers/usb/usbip/vhci_tx.c | 3 ++- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 6b3278c4b72a..9efab3dc3734 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct vhci_device *vdev; unsigned long flags; - usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n", - hcd, urb, mem_flags); - if (portnum > VHCI_HC_PORTS) { pr_err("invalid port number %d\n", portnum); return -ENODEV; @@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct vhci_device *vdev; unsigned long flags; - pr_info("dequeue a urb %p\n", urb); - spin_lock_irqsave(&vhci->lock, flags); priv = urb->hcpriv; @@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* tcp connection is closed */ spin_lock(&vdev->priv_lock); - pr_info("device %p seems to be disconnected\n", vdev); list_del(&priv->list); kfree(priv); urb->hcpriv = NULL; @@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) * vhci_rx will receive RET_UNLINK and give back the URB. * Otherwise, we give back it here. */ - pr_info("gives back urb %p\n", urb); - usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&vhci->lock, flags); @@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) unlink->unlink_seqnum = priv->seqnum; - pr_info("device %p seems to be still connected\n", vdev); - /* send cmd_unlink and try to cancel the pending URB in the * peer */ list_add_tail(&unlink->list, &vdev->unlink_tx); diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 90577e8b2282..112ebb90d8c9 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum) urb = priv->urb; status = urb->status; - usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n", - urb, priv, seqnum); + usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum); switch (status) { case -ENOENT: /* fall through */ case -ECONNRESET: - dev_info(&urb->dev->dev, - "urb %p was unlinked %ssynchronuously.\n", urb, - status == -ENOENT ? "" : "a"); + dev_dbg(&urb->dev->dev, + "urb seq# %u was unlinked %ssynchronuously\n", + seqnum, status == -ENOENT ? "" : "a"); break; case -EINPROGRESS: /* no info output */ break; default: - dev_info(&urb->dev->dev, - "urb %p may be in a error, status %d\n", urb, - status); + dev_dbg(&urb->dev->dev, + "urb seq# %u may be in a error, status %d\n", + seqnum, status); } list_del(&priv->list); @@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, spin_unlock_irqrestore(&vdev->priv_lock, flags); if (!urb) { - pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum); - pr_info("max seqnum %d\n", + pr_err("cannot find a urb of seqnum %u max seqnum %d\n", + pdu->base.seqnum, atomic_read(&vhci_hcd->seqnum)); usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); return; @@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb); @@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev, pr_info("the urb (seqnum %d) was already given back\n", pdu->base.seqnum); } else { - usbip_dbg_vhci_rx("now giveback urb %p\n", urb); + usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum); /* If unlink is successful, status is -ECONNRESET */ urb->status = pdu->u.ret_unlink.status; diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index d625a2ff4b71..9aed15a358b7 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) memset(&msg, 0, sizeof(msg)); memset(&iov, 0, sizeof(iov)); - usbip_dbg_vhci_tx("setup txdata urb %p\n", urb); + usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", + priv->seqnum); /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); -- cgit v1.2.3 From 248a22044366f588d46754c54dfe29ffe4f8b4df Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 18 Dec 2017 17:23:37 -0700 Subject: usbip: stub: stop printing kernel pointer addresses in messages Remove and/or change debug, info. and error messages to not print kernel pointer addresses. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 5 +++-- drivers/usb/usbip/stub_rx.c | 7 ++----- drivers/usb/usbip/stub_tx.c | 6 +++--- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 4f48b306713f..c31c8402a0c5 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev) struct stub_priv *priv; struct urb *urb; - dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev); + dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb %p\n", urb); + dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", + priv->seqnum); usb_kill_urb(urb); kmem_cache_free(stub_priv_cache, priv); diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 493ac2928391..2f29be474098 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, if (priv->seqnum != pdu->u.cmd_unlink.seqnum) continue; - dev_info(&priv->urb->dev->dev, "unlink urb %p\n", - priv->urb); - /* * This matched urb is not completed yet (i.e., be in * flight in usb hcd hardware/driver). Now we are @@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, ret = usb_unlink_urb(priv->urb); if (ret != -EINPROGRESS) dev_err(&priv->urb->dev->dev, - "failed to unlink a urb %p, ret %d\n", - priv->urb, ret); + "failed to unlink a urb # %lu, ret %d\n", + priv->seqnum, ret); return 0; } diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 53172b1f6257..f0ec41a50cbc 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -88,7 +88,7 @@ void stub_complete(struct urb *urb) /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { - usbip_dbg_stub_tx("ignore urb for closed connection %p", urb); + usbip_dbg_stub_tx("ignore urb for closed connection\n"); /* It will be freed in stub_device_cleanup_urbs(). */ } else if (priv->unlinking) { stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status); @@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; -- cgit v1.2.3 From 90120d15f4c397272aaf41077960a157fc4212bf Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Dec 2017 10:50:09 -0700 Subject: usbip: prevent leaking socket pointer address in messages usbip driver is leaking socket pointer address in messages. Remove the messages that aren't useful and print sockfd in the ones that are useful for debugging. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 3 +-- drivers/usb/usbip/usbip_common.c | 16 +++++----------- drivers/usb/usbip/vhci_hcd.c | 2 +- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index a3df8ee82faf..e31a6f204397 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud) * step 1? */ if (ud->tcp_socket) { - dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n", - ud->tcp_socket); + dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index f7978933b402..7b219d9109b4 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -317,26 +317,20 @@ int usbip_recv(struct socket *sock, void *buf, int size) struct msghdr msg = {.msg_flags = MSG_NOSIGNAL}; int total = 0; + if (!sock || !buf || !size) + return -EINVAL; + iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size); usbip_dbg_xmit("enter\n"); - if (!sock || !buf || !size) { - pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf, - size); - return -EINVAL; - } - do { - int sz = msg_data_left(&msg); + msg_data_left(&msg); sock->sk->sk_allocation = GFP_NOIO; result = sock_recvmsg(sock, &msg, MSG_WAITALL); - if (result <= 0) { - pr_debug("receive sock %p buf %p size %u ret %d total %d\n", - sock, buf + total, sz, result, total); + if (result <= 0) goto err; - } total += result; } while (msg_data_left(&msg)); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 9efab3dc3734..c3e1008aa491 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -965,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) /* need this? see stub_dev.c */ if (ud->tcp_socket) { - pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket); + pr_debug("shutdown tcp_socket %d\n", ud->sockfd); kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR); } -- cgit v1.2.3 From 10c90120930628e8b959bf58d4a0aaef3ae5d945 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 15 Dec 2017 10:05:15 -0700 Subject: usbip: stub_rx: fix static checker warning on unnecessary checks Fix the following static checker warnings: The patch c6688ef9f297: "usbip: fix stub_rx: harden CMD_SUBMIT path to handle malicious input" from Dec 7, 2017, leads to the following static checker warning: drivers/usb/usbip/stub_rx.c:346 get_pipe() warn: impossible condition '(pdu->u.cmd_submit.transfer_buffer_length > ((~0 >> 1))) => (s32min-s32max > s32max)' drivers/usb/usbip/stub_rx.c:486 stub_recv_cmd_submit() warn: always true condition '(pdu->u.cmd_submit.transfer_buffer_length <= ((~0 >> 1))) => (s32min-s32max <= s32max)' Reported-by: Dan Carpenter Signed-off-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_rx.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 2f29be474098..6c5a59313999 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -339,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) epd = &ep->desc; - /* validate transfer_buffer_length */ - if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { - dev_err(&sdev->udev->dev, - "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", - pdu->u.cmd_submit.transfer_buffer_length); - return -1; - } - if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -479,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev, } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0 && - pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { + if (pdu->u.cmd_submit.transfer_buffer_length > 0) { priv->urb->transfer_buffer = kzalloc(pdu->u.cmd_submit.transfer_buffer_length, GFP_KERNEL); -- cgit v1.2.3 From 544c4605acc5ae4afe7dd5914147947db182f2fb Mon Sep 17 00:00:00 2001 From: Juan Zea Date: Fri, 15 Dec 2017 10:21:20 +0100 Subject: usbip: fix usbip bind writing random string after command in match_busid usbip bind writes commands followed by random string when writing to match_busid attribute in sysfs, caused by using full variable size instead of string length. Signed-off-by: Juan Zea Acked-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/utils.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c index 2b3d6d235015..3d7b42e77299 100644 --- a/tools/usb/usbip/src/utils.c +++ b/tools/usb/usbip/src/utils.c @@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add) char command[SYSFS_BUS_ID_SIZE + 4]; char match_busid_attr_path[SYSFS_PATH_MAX]; int rc; + int cmd_size; snprintf(match_busid_attr_path, sizeof(match_busid_attr_path), "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME, @@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add) attr_name); if (add) - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", + busid); else - snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid); + cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", + busid); rc = write_sysfs_attribute(match_busid_attr_path, command, - sizeof(command)); + cmd_size); if (rc < 0) { dbg("failed to write match_busid: %s", strerror(errno)); return -1; -- cgit v1.2.3 From b9096d9f15c142574ebebe8fbb137012bb9d99c2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 12 Dec 2017 16:11:30 +0100 Subject: usb: add RESET_RESUME for ELSA MicroLink 56K This modem needs this quirk to operate. It produces timeouts when resumed without reset. Signed-off-by: Oliver Neukum CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a10b346b9777..95812656d9b9 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -149,6 +149,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */ { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM }, + /* ELSA MicroLink 56K */ + { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, -- cgit v1.2.3 From 7f038d256c723dd390d2fca942919573995f4cfd Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Dmitry Fleytman Date: Tue, 19 Dec 2017 06:02:04 +0200 Subject: usb: Add device quirk for Logitech HD Pro Webcam C925e Commit e0429362ab15 ("usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e") introduced quirk to workaround an issue with some Logitech webcams. There is one more model that has the same issue - C925e, so applying the same quirk as well. See aforementioned commit message for detailed explanation of the problem. Signed-off-by: Dmitry Fleytman Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 95812656d9b9..4024926c1d68 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, - /* Logitech HD Pro Webcams C920, C920-C and C930e */ + /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */ { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT }, { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT }, /* Logitech ConferenceCam CC3000e */ { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT }, -- cgit v1.2.3 From a93639090a2743c8e205c1ac25439702702b4ce4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 14 Dec 2017 15:43:43 +1100 Subject: staging: lustre: lnet: Fix recent breakage from list_for_each conversion Commit 8e55b6fd0660 ("staging: lustre: lnet: replace list_for_each with list_for_each_entry") was intended to be an idempotent change, but actually broke the behavior of ksocknal_add_peer() causing mounts to fail. The fact that it caused an existing "route2 = NULL;" to become redundant could have been a clue. The fact that the loop body set the new loop variable to NULL might also have been a clue The original code relied on "route2" being NULL if nothing was found. The new code would always set route2 to a non-NULL value if the list was empty, and would likely crash if the list was not empty. Restore correct functionality by using code-flow rather the value of "route2" to determine whether to use on old route, or to add a new one. Fixes: 8e55b6fd0660 ("staging: lustre: lnet: replace list_for_each with list_for_each_entry") Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- .../staging/lustre/lnet/klnds/socklnd/socklnd.c | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index 986c2a40d978..8267119ccc8e 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, ksocknal_nid2peerlist(id.nid)); } - route2 = NULL; list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) { - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (!route2) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; - } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; + if (route2->ksnr_ipaddr == ipaddr) { + /* Route already exists, use the old one */ + ksocknal_route_decref(route); + route2->ksnr_share_count++; + goto out; + } } - + /* Route doesn't already exist, add the new one */ + ksocknal_add_route_locked(peer, route); + route->ksnr_share_count++; +out: write_unlock_bh(&ksocknal_data.ksnd_global_lock); return 0; -- cgit v1.2.3 From d070f7c703ef26e3db613f24206823f916272fc6 Mon Sep 17 00:00:00 2001 From: Abhijeet Kumar Date: Tue, 12 Dec 2017 00:40:25 +0530 Subject: ASoC: nau8825: fix issue that pop noise when start capture In skylake platform, we hear a loud pop noise(0 dB) at start of audio capture power up sequence. This patch removes the pop noise from the recording by adding a delay before enabling ADC. Signed-off-by: Abhijeet Kumar Signed-off-by: Mark Brown --- sound/soc/codecs/nau8825.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 714ce17da717..e853a6dfd33b 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: + msleep(125); regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL, NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC); break; -- cgit v1.2.3 From 4c90f297ffcffb84e8667e4f447aeaba52a37220 Mon Sep 17 00:00:00 2001 From: Krzysztof Adamski Date: Thu, 30 Nov 2017 15:09:15 +0100 Subject: regmap: use proper part of work_buf for storing val The map->work_buf is a buffer preallocated in __regmap_init() with size allowing it to store all 3 parts of a buffer - reg, pad and val. While reg and val parts are always properly setup before each transaction, the pad part is left at its default value (zeros). Until it is overwritten, that is. _regmap_bus_read(), when calling _regmap_raw_read() uses beginning of work_buf as a place to store data read. Usually that is fine but if val_bits > reg_bits && pad_bits > 0, padding area of work_buf() may get overwritten. Since padding is not zeroed before each transaction, garbage will be used on next calls. This patch moves the val pointer used for _regmap_raw_read() to point to a part of work_buf intended for storing value read. Signed-off-by: Krzysztof Adamski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 8d516a9bfc01..9fb3a792642b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2423,13 +2423,15 @@ static int _regmap_bus_read(void *context, unsigned int reg, { int ret; struct regmap *map = context; + void *work_val = map->work_buf + map->format.reg_bytes + + map->format.pad_bytes; if (!map->format.parse_val) return -EINVAL; - ret = _regmap_raw_read(map, reg, map->work_buf, map->format.val_bytes); + ret = _regmap_raw_read(map, reg, work_val, map->format.val_bytes); if (ret == 0) - *val = map->format.parse_val(map->work_buf); + *val = map->format.parse_val(work_val); return ret; } -- cgit v1.2.3 From 2f8aab3d29fda158fa49ecae94b3b3a4b494909d Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 27 Nov 2017 20:03:14 +0800 Subject: ASoC: rl6231: get better PLL parameters For those which can only get approximation PLL out cases, this patch will use higher resolution to get a better PLL parameter. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rl6231.c | 95 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 29 deletions(-) diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c index 974a9040651d..33690e98e297 100644 --- a/sound/soc/codecs/rl6231.c +++ b/sound/soc/codecs/rl6231.c @@ -13,6 +13,7 @@ #include #include +#include #include "rl6231.h" /** @@ -106,6 +107,25 @@ static const struct pll_calc_map pll_preset_table[] = { {19200000, 24576000, 3, 30, 3, false}, }; +static unsigned int find_best_div(unsigned int in, + unsigned int max, unsigned int div) +{ + unsigned int d; + + if (in <= max) + return 1; + + d = in / max; + if (in % max) + d++; + + while (div % d != 0) + d++; + + + return d; +} + /** * rl6231_pll_calc - Calcualte PLL M/N/K code. * @freq_in: external clock provided to codec. @@ -120,9 +140,11 @@ int rl6231_pll_calc(const unsigned int freq_in, const unsigned int freq_out, struct rl6231_pll_code *pll_code) { int max_n = RL6231_PLL_N_MAX, max_m = RL6231_PLL_M_MAX; - int i, k, red, n_t, pll_out, in_t, out_t; - int n = 0, m = 0, m_t = 0; - int red_t = abs(freq_out - freq_in); + int i, k, n_t; + int k_t, min_k, max_k, n = 0, m = 0, m_t = 0; + unsigned int red, pll_out, in_t, out_t, div, div_t; + unsigned int red_t = abs(freq_out - freq_in); + unsigned int f_in, f_out, f_max; bool bypass = false; if (RL6231_PLL_INP_MAX < freq_in || RL6231_PLL_INP_MIN > freq_in) @@ -140,39 +162,54 @@ int rl6231_pll_calc(const unsigned int freq_in, } } - k = 100000000 / freq_out - 2; - if (k > RL6231_PLL_K_MAX) - k = RL6231_PLL_K_MAX; - for (n_t = 0; n_t <= max_n; n_t++) { - in_t = freq_in / (k + 2); - pll_out = freq_out / (n_t + 2); - if (in_t < 0) - continue; - if (in_t == pll_out) { - bypass = true; - n = n_t; - goto code_find; - } - red = abs(in_t - pll_out); - if (red < red_t) { - bypass = true; - n = n_t; - m = m_t; - if (red == 0) + min_k = 80000000 / freq_out - 2; + max_k = 150000000 / freq_out - 2; + if (max_k > RL6231_PLL_K_MAX) + max_k = RL6231_PLL_K_MAX; + if (min_k > RL6231_PLL_K_MAX) + min_k = max_k = RL6231_PLL_K_MAX; + div_t = gcd(freq_in, freq_out); + f_max = 0xffffffff / RL6231_PLL_N_MAX; + div = find_best_div(freq_in, f_max, div_t); + f_in = freq_in / div; + f_out = freq_out / div; + k = min_k; + for (k_t = min_k; k_t <= max_k; k_t++) { + for (n_t = 0; n_t <= max_n; n_t++) { + in_t = f_in * (n_t + 2); + pll_out = f_out * (k_t + 2); + if (in_t < 0) + continue; + if (in_t == pll_out) { + bypass = true; + n = n_t; + k = k_t; goto code_find; - red_t = red; - } - for (m_t = 0; m_t <= max_m; m_t++) { - out_t = in_t / (m_t + 2); - red = abs(out_t - pll_out); + } + out_t = in_t / (k_t + 2); + red = abs(f_out - out_t); if (red < red_t) { - bypass = false; + bypass = true; n = n_t; - m = m_t; + m = 0; + k = k_t; if (red == 0) goto code_find; red_t = red; } + for (m_t = 0; m_t <= max_m; m_t++) { + out_t = in_t / ((m_t + 2) * (k_t + 2)); + red = abs(f_out - out_t); + if (red < red_t) { + bypass = false; + n = n_t; + m = m_t; + k = k_t; + if (red == 0) + goto code_find; + red_t = red; + } + } } } pr_debug("Only get approximation about PLL\n"); -- cgit v1.2.3 From 20220945b1a8e77c789dd4bb9aa1471b6e8695cc Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 15 Dec 2017 20:07:23 -0800 Subject: ASoC: rt5514-spi: only enable wakeup when fully initialized If an rt5514-spi device is probed but the platform hasn't linked it in, we might never fully request the SPI IRQ, nor configure the rt5514 DSP, but we still might try to enable the SPI IRQ (enable_irq_wake()). This is bad, and among other things, can cause the interrupt to trigger every time we try to suspend the system (e.g., because the interrupt trigger setting was never set properly). Instead of setting our wakeup capabilities in the SPI driver probe routine, let's wait until we've actually requested the IRQ. Fixes issues seen on the "kevin" Chromebook (Samsung Chromebook Plus). Fixes: 58f1c07d23cd ("ASoC: rt5514: Voice wakeup support.") Signed-off-by: Brian Norris Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514-spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index ca6a90d8fc39..64bf26cec20d 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform) dev_err(&rt5514_spi->dev, "%s Failed to reguest IRQ: %d\n", __func__, ret); + else + device_init_wakeup(rt5514_dsp->dev, true); } return 0; @@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi) return ret; } - device_init_wakeup(&spi->dev, true); - return 0; } -- cgit v1.2.3 From 56a23ee52611ca76421f3d7cac100e1616716dae Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 19 Dec 2017 13:38:23 +0100 Subject: ALSA: usb-audio: Proper fallback at get_term_name() get_term_name() calls snd_usb_copy_string_desc() for retrieving the name when a specific ID (name field) is given. When this returns an error (zero), however, it simply returns as is. This will end up in a fixed name string in the caller side, which often is meaningless. For giving a bit more useful name string depending on the terminal type, change the get_term_name() function to go through the fallback mode. Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 2b4ceda36291..9afb8ab524c7 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -656,10 +656,14 @@ static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm unsigned char *name, int maxlen, int term_only) { struct iterm_name_combo *names; + int len; - if (iterm->name) - return snd_usb_copy_string_desc(state, iterm->name, + if (iterm->name) { + len = snd_usb_copy_string_desc(state, iterm->name, name, maxlen); + if (len) + return len; + } /* virtual type - not a real terminal */ if (iterm->type >> 16) { -- cgit v1.2.3 From e0795606ad565cc2da0b926a00c7e6b8187a6d71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Dec 2017 18:28:58 +0000 Subject: drm/i915/lpe: Remove double-encapsulation of info string Just printk the string, or at least do not double up on the newlines! Fixes: eef57324d926 ("drm/i915: setup bridge for HDMI LPE audio driver") Signed-off-by: Chris Wilson Cc: Pierre-Louis Bossart Cc: Jerome Anand Cc: Jani Nikula Cc: Takashi Iwai Reviewed-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20171213182858.2159-1-chris@chris-wilson.co.uk (cherry picked from commit 99cd05c43baac8ef56c20eb1776a15b02c81ccc3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lpe_audio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 3bf65288ffff..5809b29044fc 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv) }; if (!pci_dev_present(atom_hdaudio_ids)) { - DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n"); + DRM_INFO("HDaudio controller not detected, using LPE audio instead\n"); lpe_present = true; } } -- cgit v1.2.3 From a4ffdc2b6726958c07d535318400124e3a3bc19b Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 15 Dec 2017 14:43:10 -0800 Subject: drm/i915: Protect DDI port to DPLL map from theoretical race. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we have multiple modesets for different connectors happening in parallel we could have a race on the RMW on these shared registers. This possibility was initially raised by Paulo when reviewing commit '555e38d27317 ("drm/i915/cnl: DDI - PLL mapping")' but the original possibility comes from commit '5416d871136d ("drm/i915/skl: Set the eDP link rate on DPLL0")'. Or maybe later when atomic commits entered into picture. Apparently the discussion around this topic showed that the right solution would be on serializing the atomic commits in a way that we don't have the possibility of races here since if that parallel modeset happenings apparently many other things will be on fire. Code is there since SKL and there was no report of issue, but since we never looked back to that serialization possibility, and also we don't have an igt case for that it is better to at least protect this corner. Suggested-by: Paulo Zanoni Fixes: 555e38d27317 ("drm/i915/cnl: DDI - PLL mapping") Fixes: 5416d871136d ("drm/i915/skl: Set the eDP link rate on DPLL0") Cc: Paulo Zanoni Cc: Ville Syrjälä Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20171215224310.19103-1-rodrigo.vivi@intel.com (cherry picked from commit 8edcda1266f93816fde77c9754f388ae0ae343fc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ddi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e0843bb99169..58a3755544b2 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; + mutex_lock(&dev_priv->dpll_lock); + if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); @@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, } else if (INTEL_INFO(dev_priv)->gen < 9) { I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll)); } + + mutex_unlock(&dev_priv->dpll_lock); } static void intel_ddi_clk_disable(struct intel_encoder *encoder) -- cgit v1.2.3 From db5ff909798ef0099004ad50a0ff5fde92426fd1 Mon Sep 17 00:00:00 2001 From: Xinyu Lin Date: Sun, 17 Dec 2017 20:13:39 +0800 Subject: libata: apply MAX_SEC_1024 to all LITEON EP1 series devices LITEON EP1 has the same timeout issues as CX1 series devices. Revert max_sectors to the value of 1024. 'e0edc8c54646 ("libata: apply MAX_SEC_1024 to all CX1-JB*-HP devices")' Signed-off-by: Xinyu Lin Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8193b38a1cae..3c09122bf038 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4449,6 +4449,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * https://bugzilla.kernel.org/show_bug.cgi?id=121671 */ { "LITEON CX1-JB*-HP", NULL, ATA_HORKAGE_MAX_SEC_1024 }, + { "LITEON EP1-*", NULL, ATA_HORKAGE_MAX_SEC_1024 }, /* Devices we expect to fail diagnostics */ -- cgit v1.2.3 From 116d2f7496c51b2e02e8e4ecdd2bdf5fb9d5a641 Mon Sep 17 00:00:00 2001 From: Prateek Sood Date: Tue, 19 Dec 2017 12:56:57 +0530 Subject: cgroup: Fix deadlock in cpu hotplug path Deadlock during cgroup migration from cpu hotplug path when a task T is being moved from source to destination cgroup. kworker/0:0 cpuset_hotplug_workfn() cpuset_hotplug_update_tasks() hotplug_update_tasks_legacy() remove_tasks_in_empty_cpuset() cgroup_transfer_tasks() // stuck in iterator loop cgroup_migrate() cgroup_migrate_add_task() In cgroup_migrate_add_task() it checks for PF_EXITING flag of task T. Task T will not migrate to destination cgroup. css_task_iter_start() will keep pointing to task T in loop waiting for task T cg_list node to be removed. Task T do_exit() exit_signals() // sets PF_EXITING exit_task_namespaces() switch_task_namespaces() free_nsproxy() put_mnt_ns() drop_collected_mounts() namespace_unlock() synchronize_rcu() _synchronize_rcu_expedited() schedule_work() // on cpu0 low priority worker pool wait_event() // waiting for work item to execute Task T inserted a work item in the worklist of cpu0 low priority worker pool. It is waiting for expedited grace period work item to execute. This work item will only be executed once kworker/0:0 complete execution of cpuset_hotplug_workfn(). kworker/0:0 ==> Task T ==>kworker/0:0 In case of PF_EXITING task being migrated from source to destination cgroup, migrate next available task in source cgroup. Signed-off-by: Prateek Sood Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 024085daab1a..a2c05d2476ac 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) */ do { css_task_iter_start(&from->self, 0, &it); - task = css_task_iter_next(&it); + + do { + task = css_task_iter_next(&it); + } while (task && (task->flags & PF_EXITING)); + if (task) get_task_struct(task); css_task_iter_end(&it); -- cgit v1.2.3 From f292b9b28097d8fe870336108e91bd95a14294bf Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 15 Dec 2017 19:59:47 -0800 Subject: staging: ion: Fix ion_cma_heap allocations In trying to add support for drm_hwcomposer to HiKey, I've needed to utilize the ION CMA heap, and I've noticed problems with allocations on newer kernels failing. It seems back with 204f672255c2 ("ion: Use CMA APIs directly"), the ion_cma_heap code was modified to use the CMA API, but kept the arguments as buffer lengths rather then number of pages. This results in errors as we don't have enough pages in CMA to satisfy the exaggerated requests. This patch converts the ion_cma_heap CMA API usage to properly request pages. It also fixes a minor issue in the allocation where in the error path, the cma_release is called with the buffer->size value which hasn't yet been set. Cc: Laura Abbott Cc: Sumit Semwal Cc: Benjamin Gaignard Cc: Archit Taneja Cc: Greg KH Cc: Daniel Vetter Cc: Dmitry Shmidt Cc: Todd Kjos Cc: Amit Pundir Fixes: 204f672255c2 ("staging: android: ion: Use CMA APIs directly") Acked-by: Laura Abbott Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/Kconfig | 2 +- drivers/staging/android/ion/ion_cma_heap.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig index a517b2d29f1b..8f6494158d3d 100644 --- a/drivers/staging/android/ion/Kconfig +++ b/drivers/staging/android/ion/Kconfig @@ -37,7 +37,7 @@ config ION_CHUNK_HEAP config ION_CMA_HEAP bool "Ion CMA heap support" - depends on ION && CMA + depends on ION && DMA_CMA help Choose this option to enable CMA heaps with Ion. This heap is backed by the Contiguous Memory Allocator (CMA). If your system has these diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c index dd5545d9990a..86196ffd2faf 100644 --- a/drivers/staging/android/ion/ion_cma_heap.c +++ b/drivers/staging/android/ion/ion_cma_heap.c @@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, struct ion_cma_heap *cma_heap = to_cma_heap(heap); struct sg_table *table; struct page *pages; + unsigned long size = PAGE_ALIGN(len); + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long align = get_order(size); int ret; - pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL); + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL); if (!pages) return -ENOMEM; @@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, if (ret) goto free_mem; - sg_set_page(table->sgl, pages, len, 0); + sg_set_page(table->sgl, pages, size, 0); buffer->priv_virt = pages; buffer->sg_table = table; @@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer, free_mem: kfree(table); err: - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); return -ENOMEM; } @@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer) { struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap); struct page *pages = buffer->priv_virt; + unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT; /* release memory */ - cma_release(cma_heap->cma, pages, buffer->size); + cma_release(cma_heap->cma, pages, nr_pages); /* release sg table */ sg_free_table(buffer->sg_table); kfree(buffer->sg_table); -- cgit v1.2.3 From d6b246bb7a29703f53aa4c050b8b3205d749caee Mon Sep 17 00:00:00 2001 From: Sushmita Susheelendra Date: Fri, 15 Dec 2017 13:59:13 -0700 Subject: staging: android: ion: Fix dma direction for dma_sync_sg_for_cpu/device Use the direction argument passed into begin_cpu_access and end_cpu_access when calling the dma_sync_sg_for_cpu/device. The actual cache primitive called depends on the direction passed in. Signed-off-by: Sushmita Susheelendra Cc: stable Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index a7d9b0e98572..f480885e346b 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); @@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, mutex_lock(&buffer->lock); list_for_each_entry(a, &buffer->attachments, list) { dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents, - DMA_BIDIRECTIONAL); + direction); } mutex_unlock(&buffer->lock); -- cgit v1.2.3 From 748a240c589824e9121befb1cba5341c319885bc Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 15 Dec 2017 15:21:50 -0600 Subject: tg3: Fix rx hang on MTU change with 5717/5719 This fixes a hang issue seen when changing the MTU size from 1500 MTU to 9000 MTU on both 5717 and 5719 chips. In discussion with Broadcom, they've indicated that these chipsets have the same phy as the 57766 chipset, so the same workarounds apply. This has been tested by IBM on both Power 8 and Power 9 systems as well as by Broadcom on x86 hardware and has been confirmed to resolve the hang issue. Signed-off-by: Brian King Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index de51c2177d03..d09c5a9c53b5 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) /* Reset PHY, otherwise the read DMA engine will be in a mode that * breaks all requests to 256 bytes. */ - if (tg3_asic_rev(tp) == ASIC_REV_57766) + if (tg3_asic_rev(tp) == ASIC_REV_57766 || + tg3_asic_rev(tp) == ASIC_REV_5717 || + tg3_asic_rev(tp) == ASIC_REV_5719) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); -- cgit v1.2.3 From 8ba6b30ef700e16f3bc668e6f4f8375da9229e4d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 17 Dec 2017 17:16:43 +0100 Subject: mlxsw: spectrum_router: Remove batch neighbour deletion causing FW bug This reverts commit 63dd00fa3e524c27cc0509190084ab147ecc8ae2. RAUHT DELETE_ALL seems to trigger a bug in FW. That manifests by later calls to RAUHT ADD of an IPv6 neighbor to fail with "bad parameter" error code. Signed-off-by: Petr Machata Fixes: 63dd00fa3e52 ("mlxsw: spectrum_router: Add batch neighbour deletion") Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 72ef4f8025f0..be657b8533f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); -} - static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } enum mlxsw_sp_nexthop_type { -- cgit v1.2.3 From 2cc42bac1c795f75fcc062b95c6ca7ac1b84d5d8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 18 Dec 2017 09:37:45 -0700 Subject: x86-64/Xen: eliminate W+X mappings A few thousand such pages are usually left around due to the re-use of L1 tables having been provided by the hypervisor (Dom0) or tool stack (DomU). Set NX in the direct map variant, which needs to be done in L2 due to the dual use of the re-used L1s. For x86_configure_nx() to actually do what it is supposed to do, call get_cpu_cap() first. This was broken by commit 4763ed4d45 ("x86, mm: Clean up and simplify NX enablement") when switching away from the direct EFER read. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/mmu_pv.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 69b9deff7e5c..86f26ea99324 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -87,6 +87,8 @@ #include "multicalls.h" #include "pmu.h" +#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ + void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); @@ -1249,6 +1251,7 @@ asmlinkage __visible void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); x86_configure_nx(); /* Get mfn list */ diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 9d9cc3870722..7118f776cd49 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1916,6 +1916,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); + /* + * Zap execute permission from the ident map. Due to the sharing of + * L1 entries we need to do this in the L2. + */ + if (__supported_pte_mask & _PAGE_NX) { + for (i = 0; i < PTRS_PER_PMD; ++i) { + if (pmd_none(level2_ident_pgt[i])) + continue; + level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); + } + } + /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) -- cgit v1.2.3 From 7352e252b5bf40d59342494a70354a2d436fd0cd Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Mon, 18 Dec 2017 17:00:17 +0800 Subject: net: mediatek: setup proper state for disabled GMAC on the default The current solution would setup fixed and force link of 1Gbps to the both GMAC on the default. However, The GMAC should always be put to link down state when the GMAC is disabled on certain target boards. Otherwise, the driver possibly receives unexpected data from the floating hardware connection through the unused GMAC. Although the driver had been added certain protection in RX path to get rid of such kind of unexpected data sent to the upper stack. Signed-off-by: Sean Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 54adfd967858..fc67e35b253e 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth) /* set GE2 TUNE */ regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0); - /* GE1, Force 1000M/FD, FC ON */ - mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0)); - - /* GE2, Force 1000M/FD, FC ON */ - mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1)); + /* Set linkdown as the default for each GMAC. Its own MCR would be set + * up with the more appropriate value when mtk_phy_link_adjust call is + * being invoked. + */ + for (i = 0; i < MTK_MAC_COUNT; i++) + mtk_w32(eth, 0, MTK_MAC_MCR(i)); /* Indicates CDM to parse the MTK special tag from CPU * which also is working out for untag packets. -- cgit v1.2.3 From e688822d035b494071ecbadcccbd6f3325fb0f59 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Fri, 15 Dec 2017 20:20:06 +0300 Subject: net: arc_emac: fix arc_emac_rx() error paths arc_emac_rx() has some issues found by code review. In case netdev_alloc_skb_ip_align() or dma_map_single() failure rx fifo entry will not be returned to EMAC. In case dma_map_single() failure previously allocated skb became lost to driver. At the same time address of newly allocated skb will not be provided to EMAC. Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 53 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 3241af1ce718..5b422be56165 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -210,39 +210,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget) continue; } - pktlen = info & LEN_MASK; - stats->rx_packets++; - stats->rx_bytes += pktlen; - skb = rx_buff->skb; - skb_put(skb, pktlen); - skb->dev = ndev; - skb->protocol = eth_type_trans(skb, ndev); - - dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), - dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); - - /* Prepare the BD for next cycle */ - rx_buff->skb = netdev_alloc_skb_ip_align(ndev, - EMAC_BUFFER_SIZE); - if (unlikely(!rx_buff->skb)) { + /* Prepare the BD for next cycle. netif_receive_skb() + * only if new skb was allocated and mapped to avoid holes + * in the RX fifo. + */ + skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE); + if (unlikely(!skb)) { + if (net_ratelimit()) + netdev_err(ndev, "cannot allocate skb\n"); + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); stats->rx_errors++; - /* Because receive_skb is below, increment rx_dropped */ stats->rx_dropped++; continue; } - /* receive_skb only if new skb was allocated to avoid holes */ - netif_receive_skb(skb); - - addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data, + addr = dma_map_single(&ndev->dev, (void *)skb->data, EMAC_BUFFER_SIZE, DMA_FROM_DEVICE); if (dma_mapping_error(&ndev->dev, addr)) { if (net_ratelimit()) - netdev_err(ndev, "cannot dma map\n"); - dev_kfree_skb(rx_buff->skb); + netdev_err(ndev, "cannot map dma buffer\n"); + dev_kfree_skb(skb); + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); stats->rx_errors++; + stats->rx_dropped++; continue; } + + /* unmap previosly mapped skb */ + dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr), + dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE); + + pktlen = info & LEN_MASK; + stats->rx_packets++; + stats->rx_bytes += pktlen; + skb_put(rx_buff->skb, pktlen); + rx_buff->skb->dev = ndev; + rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev); + + netif_receive_skb(rx_buff->skb); + + rx_buff->skb = skb; dma_unmap_addr_set(rx_buff, addr, addr); dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE); -- cgit v1.2.3 From 78aa09754d69ba19a55c59f490788ec1c85f41f0 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Tue, 19 Dec 2017 14:03:57 +0300 Subject: net: arc_emac: restart stalled EMAC Under certain conditions EMAC stop reception of incoming packets and continuously increment R_MISS register instead of saving data into provided buffer. The commit implement workaround for such situation. Then the stall detected EMAC will be restarted. On device the stall looks like the device lost it's dynamic IP address. ifconfig shows that interface error counter rapidly increments. At the same time on the DHCP server we can see continues DHCP-requests from device. In real network stalls happen really rarely. To make them frequent the broadcast storm[1] should be simulated. For simulation it is necessary to make following connections: 1. connect radxarock to 1st port of switch 2. connect some PC to 2nd port of switch 3. connect two other free ports together using standard ethernet cable, in order to make a switching loop. After that, is necessary to make a broadcast storm. For example, running on PC 'ping' to some IP address triggers ARP-request storm. After some time (~10sec), EMAC on rk3188 will stall. Observed and tested on rk3188 radxarock. [1] https://en.wikipedia.org/wiki/Broadcast_radiation Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac.h | 2 + drivers/net/ethernet/arc/emac_main.c | 111 +++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index 3c63b16d485f..d9efbc8d783b 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -159,6 +159,8 @@ struct arc_emac_priv { unsigned int link; unsigned int duplex; unsigned int speed; + + unsigned int rx_missed_errors; }; /** diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 5b422be56165..bd277b0dc615 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -26,6 +26,8 @@ #include "emac.h" +static void arc_emac_restart(struct net_device *ndev); + /** * arc_emac_tx_avail - Return the number of available slots in the tx ring. * @priv: Pointer to ARC EMAC private data structure. @@ -267,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget) return work_done; } +/** + * arc_emac_rx_miss_handle - handle R_MISS register + * @ndev: Pointer to the net_device structure. + */ +static void arc_emac_rx_miss_handle(struct net_device *ndev) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + unsigned int miss; + + miss = arc_reg_get(priv, R_MISS); + if (miss) { + stats->rx_errors += miss; + stats->rx_missed_errors += miss; + priv->rx_missed_errors += miss; + } +} + +/** + * arc_emac_rx_stall_check - check RX stall + * @ndev: Pointer to the net_device structure. + * @budget: How many BDs requested to process on 1 call. + * @work_done: How many BDs processed + * + * Under certain conditions EMAC stop reception of incoming packets and + * continuously increment R_MISS register instead of saving data into + * provided buffer. This function detect that condition and restart + * EMAC. + */ +static void arc_emac_rx_stall_check(struct net_device *ndev, + int budget, unsigned int work_done) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct arc_emac_bd *rxbd; + + if (work_done) + priv->rx_missed_errors = 0; + + if (priv->rx_missed_errors && budget) { + rxbd = &priv->rxbd[priv->last_rx_bd]; + if (le32_to_cpu(rxbd->info) & FOR_EMAC) { + arc_emac_restart(ndev); + priv->rx_missed_errors = 0; + } + } +} + /** * arc_emac_poll - NAPI poll handler. * @napi: Pointer to napi_struct structure. @@ -281,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget) unsigned int work_done; arc_emac_tx_clean(ndev); + arc_emac_rx_miss_handle(ndev); work_done = arc_emac_rx(ndev, budget); if (work_done < budget) { @@ -288,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget) arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK); } + arc_emac_rx_stall_check(ndev, budget, work_done); + return work_done; } @@ -329,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance) if (status & MSER_MASK) { stats->rx_missed_errors += 0x100; stats->rx_errors += 0x100; + priv->rx_missed_errors += 0x100; + napi_schedule(&priv->napi); } if (status & RXCR_MASK) { @@ -741,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } +/** + * arc_emac_restart - Restart EMAC + * @ndev: Pointer to net_device structure. + * + * This function do hardware reset of EMAC in order to restore + * network packets reception. + */ +static void arc_emac_restart(struct net_device *ndev) +{ + struct arc_emac_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + int i; + + if (net_ratelimit()) + netdev_warn(ndev, "restarting stalled EMAC\n"); + + netif_stop_queue(ndev); + + /* Disable interrupts */ + arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK); + + /* Disable EMAC */ + arc_reg_clr(priv, R_CTRL, EN_MASK); + + /* Return the sk_buff to system */ + arc_free_tx_queue(ndev); + + /* Clean Tx BD's */ + priv->txbd_curr = 0; + priv->txbd_dirty = 0; + memset(priv->txbd, 0, TX_RING_SZ); + + for (i = 0; i < RX_BD_NUM; i++) { + struct arc_emac_bd *rxbd = &priv->rxbd[i]; + unsigned int info = le32_to_cpu(rxbd->info); + + if (!(info & FOR_EMAC)) { + stats->rx_errors++; + stats->rx_dropped++; + } + /* Return ownership to EMAC */ + rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE); + } + priv->last_rx_bd = 0; + + /* Make sure info is visible to EMAC before enable */ + wmb(); + + /* Enable interrupts */ + arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK); + + /* Enable EMAC */ + arc_reg_or(priv, R_CTRL, EN_MASK); + + netif_start_queue(ndev); +} + static const struct net_device_ops arc_emac_netdev_ops = { .ndo_open = arc_emac_open, .ndo_stop = arc_emac_stop, -- cgit v1.2.3 From a93bf0ff449064e6b7f44e58522e940f88c0d966 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:20:56 +0800 Subject: vxlan: update skb dst pmtu on tx path Unlike ip tunnels, now vxlan doesn't do any pmtu update for upper dst pmtu, even if it doesn't match the lower dst pmtu any more. The problem can be reproduced when reducing the vxlan lower dev's pmtu when running netperf. In jianlin's testing, the performance went to 1/7 of the previous. This patch is to update the upper dst pmtu to match the lower dst pmtu on tx path so that packets can be sent out even when lower dev's pmtu has been changed. It also works for metadata dst. Note that this patch doesn't process any pmtu icmp packet. But even in the future, the support for pmtu icmp packets process of udp tunnels will also needs this. The same thing will be done for geneve in another patch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1000b0e4ee01..31f4b7911ef8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ndst = &rt->dst; + if (skb_dst(skb)) { + int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, + skb, mtu); + } + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), @@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto out_unlock; } + if (skb_dst(skb)) { + int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, + skb, mtu); + } + tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); -- cgit v1.2.3 From cfddd4c33c254954927942599d299b3865743146 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:24:35 +0800 Subject: ip_gre: remove the incorrect mtu limit for ipgre tap ipgre tap driver calls ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. It causes the dev mtu of the ipgre tap device to not be greater than 1500, this limit value is not correct for ipgre tap device. Besides, it's .change_mtu already does the right check. So this patch is just to set max_mtu as 0, and leave the check to it's .change_mtu. Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9c1735632c8c..45ffd3d045d2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = { static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &gre_tap_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; -- cgit v1.2.3 From 2c52129a7d74d017320804c6928de770815c5f4a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:25:09 +0800 Subject: ip6_gre: remove the incorrect mtu limit for ipgre tap The same fix as the patch "ip_gre: remove the incorrect mtu limit for ipgre tap" is also needed for ip6_gre. Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4cfd8e0696fe..416c8913f132 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1308,6 +1308,7 @@ static void ip6gre_tap_setup(struct net_device *dev) ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &ip6gre_tap_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free; -- cgit v1.2.3 From c9fefa08190fc879fb2e681035d7774e0a8c5170 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Dec 2017 14:26:21 +0800 Subject: ip6_tunnel: get the min mtu properly in ip6_tnl_xmit Now it's using IPV6_MIN_MTU as the min mtu in ip6_tnl_xmit, but IPV6_MIN_MTU actually only works when the inner packet is ipv6. With IPV6_MIN_MTU for ipv4 packets, the new pmtu for inner dst couldn't be set less than 1280. It would cause tx_err and the packet to be dropped when the outer dst pmtu is close to 1280. Jianlin found it by running ipv4 traffic with the topo: (client) gre6 <---> eth1 (route) eth2 <---> gre6 (server) After changing eth2 mtu to 1300, the performance became very low, or the connection was even broken. The issue also affects ip4ip6 and ip6ip6 tunnels. So if the inner packet is ipv4, 576 should be considered as the min mtu. Note that for ip4ip6 and ip6ip6 tunnels, the inner packet can only be ipv4 or ipv6, but for gre6 tunnel, it may also be ARP. This patch using 576 as the min mtu for non-ipv6 packet works for all those cases. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index db84f523656d..931c38f6ff4a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1123,8 +1123,13 @@ route_lookup: max_headroom += 8; mtu -= 8; } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + } else if (mtu < 576) { + mtu = 576; + } + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { -- cgit v1.2.3 From 3db096011722fd8717e57687ae94b6917a11c9cc Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 18 Dec 2017 20:03:05 +0100 Subject: tipc: fix list sorting bug in function tipc_group_update_member() When, during a join operation, or during message transmission, a group member needs to be added to the group's 'congested' list, we sort it into the list in ascending order, according to its current advertised window size. However, we miss the case when the member is already on that list. This will have the result that the member, after the window size has been decremented, might be at the wrong position in that list. This again may have the effect that we during broadcast and multicast transmissions miss the fact that a destination is not yet ready for reception, and we end up sending anyway. From this point on, the behavior during the remaining session is unpredictable, e.g., with underflowing window sizes. We now correct this bug by unconditionally removing the member from the list before (re-)sorting it in. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index b96ec429bb9b..bbc004eaa31a 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len) if (m->window >= ADV_IDLE) return; - if (!list_empty(&m->congested)) - return; + list_del_init(&m->congested); /* Sort member into congested members' list */ list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { -- cgit v1.2.3 From 200922c93f008e03ddc804c6dacdf26ca1ba86d7 Mon Sep 17 00:00:00 2001 From: Fredrik Hallenberg Date: Mon, 18 Dec 2017 23:33:59 +0100 Subject: net: stmmac: Fix TX timestamp calculation When using GMAC4 the value written in PTP_SSIR should be shifted however the shifted value is also used in subsequent calculations which results in a bad timestamp value. Signed-off-by: Fredrik Hallenberg Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 721b61655261..08c19ebd5306 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; + u32 reg_value; /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second * formula = (1/ptp_clock) * 1000000000 @@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, data &= PTP_SSIR_SSINC_MASK; + reg_value = data; if (gmac4) - data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; + reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT; - writel(data, ioaddr + PTP_SSIR); + writel(reg_value, ioaddr + PTP_SSIR); return data; } -- cgit v1.2.3 From a1762456993893795030d911106a7650481db0ef Mon Sep 17 00:00:00 2001 From: Fredrik Hallenberg Date: Mon, 18 Dec 2017 23:34:00 +0100 Subject: net: stmmac: Fix bad RX timestamp extraction As noted in dwmac4_wrback_get_rx_timestamp_status the timestamp is found in the context descriptor following the current descriptor. However the current code looks for the context descriptor in the current descriptor, which will always fail. Signed-off-by: Fredrik Hallenberg Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 5 +++-- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 3 ++- drivers/net/ethernet/stmicro/stmmac/norm_desc.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e1e5ac053760..ce2ea2d491ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -409,7 +409,7 @@ struct stmmac_desc_ops { /* get timestamp value */ u64(*get_timestamp) (void *desc, u32 ats); /* get rx timestamp status */ - int (*get_rx_timestamp_status) (void *desc, u32 ats); + int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); /* Display ring */ void (*display_ring)(void *head, unsigned int size, bool rx); /* set MSS via context descriptor */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 4b286e27c4ca..7e089bf906b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc) return ret; } -static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) +static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc, + u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; int ret = -EINVAL; @@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) /* Check if timestamp is OK from context descriptor */ do { - ret = dwmac4_rx_check_timestamp(desc); + ret = dwmac4_rx_check_timestamp(next_desc); if (ret < 0) goto exit; i++; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 7546b3664113..2a828a312814 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats) return ns; } -static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats) +static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc, + u32 ats) { if (ats) { struct dma_extended_desc *p = (struct dma_extended_desc *)desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index f817f8f36569..db4cee57bb24 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats) return ns; } -static int ndesc_get_rx_timestamp_status(void *desc, u32 ats) +static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d7250539d0bd..337d53d12e94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, desc = np; /* Check if timestamp is available */ - if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) { + if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) { ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); -- cgit v1.2.3 From d03a45572efa068fa64db211d6d45222660e76c5 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 19 Dec 2017 15:17:13 +0100 Subject: ipv4: fib: Fix metrics match when deleting a route The recently added fib_metrics_match() causes a regression for routes with both RTAX_FEATURES and RTAX_CC_ALGO if the latter has TCP_CONG_NEEDS_ECN flag set: | # ip link add d0 type dummy | # ip link set d0 up | # ip route add 172.29.29.0/24 dev d0 features ecn congctl dctcp | # ip route del 172.29.29.0/24 dev d0 features ecn congctl dctcp | RTNETLINK answers: No such process During route insertion, fib_convert_metrics() detects that the given CC algo requires ECN and hence sets DST_FEATURE_ECN_CA bit in RTAX_FEATURES. During route deletion though, fib_metrics_match() compares stored RTAX_FEATURES value with that from userspace (which obviously has no knowledge about DST_FEATURE_ECN_CA) and fails. Fixes: 5f9ae3d9e7e4a ("ipv4: do metrics match when looking up and deleting a route") Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f04d944f8abe..c586597da20d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { int type = nla_type(nla); - u32 val; + u32 fi_val, val; if (!type) continue; @@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) val = nla_get_u32(nla); } - if (fi->fib_metrics->metrics[type - 1] != val) + fi_val = fi->fib_metrics->metrics[type - 1]; + if (type == RTAX_FEATURES) + fi_val &= ~DST_FEATURE_ECN_CA; + + if (fi_val != val) return false; } -- cgit v1.2.3 From 61d2f2a05765a5f57149efbd93e3e81a83cbc2c1 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 18 Dec 2017 11:57:51 +0800 Subject: clk: sunxi: sun9i-mmc: Implement reset callback for reset controls Our MMC host driver now issues a reset, instead of just deasserting the reset control, since commit c34eda69ad4c ("mmc: sunxi: Reset the device at probe time"). The sun9i-mmc clock driver does not support this, and will fail, which results in MMC not probing. This patch implements the reset callback by asserting the reset control, then deasserting it after a small delay. Fixes: 7a6fca879f59 ("clk: sunxi: Add driver for A80 MMC config clocks/resets") Cc: # 4.14.x Signed-off-by: Chen-Yu Tsai Acked-by: Philipp Zabel Acked-by: Maxime Ripard Signed-off-by: Michael Turquette Link: lkml.kernel.org/r/20171218035751.20661-1-wens@csie.org --- drivers/clk/sunxi/clk-sun9i-mmc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index a1a634253d6f..f00d8758ba24 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) -- cgit v1.2.3 From bae115a2bb479142605726e6aa130f43f50e801a Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Sun, 29 Oct 2017 04:03:37 +0200 Subject: net/mlx5: FPGA, return -EINVAL if size is zero Currently, if a size of zero is passed to mlx5_fpga_mem_{read|write}_i2c() the "err" return value will not be initialized, which triggers gcc warnings: [..]/mlx5/core/fpga/sdk.c:87 mlx5_fpga_mem_read_i2c() error: uninitialized symbol 'err'. [..]/mlx5/core/fpga/sdk.c:115 mlx5_fpga_mem_write_i2c() error: uninitialized symbol 'err'. fix that. Fixes: a9956d35d199 ('net/mlx5: FPGA, Add SBU infrastructure') Signed-off-by: Kamal Heib Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c index 3c11d6e2160a..14962969c5ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c @@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size, u8 actual_size; int err; + if (!size) + return -EINVAL; + if (!fdev->mdev) return -ENOTCONN; @@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size, u8 actual_size; int err; + if (!size) + return -EINVAL; + if (!fdev->mdev) return -ENOTCONN; -- cgit v1.2.3 From 231243c82793428467524227ae02ca451e6a98e7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 10 Nov 2017 15:59:52 +0900 Subject: Revert "mlx5: move affinity hints assignments to generic code" Before the offending commit, mlx5 core did the IRQ affinity itself, and it seems that the new generic code have some drawbacks and one of them is the lack for user ability to modify irq affinity after the initial affinity values got assigned. The issue is still being discussed and a solution in the new generic code is required, until then we need to revert this patch. This fixes the following issue: echo > /proc/irq//smp_affinity fails with -EIO This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664. Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since it is used in mlx5_ib driver. Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") Cc: Sagi Grimberg Cc: Thomas Gleixner Cc: Jes Sorensen Reported-by: Jes Sorensen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 75 +++++++++++++++++++++-- include/linux/mlx5/driver.h | 1 + 4 files changed, 93 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index c0872b3284cb..43f9054830e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -590,6 +590,7 @@ struct mlx5e_channel { struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; int ix; + int cpu; }; struct mlx5e_channels { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d2b057a3e512..cbec66bc82f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -71,11 +71,6 @@ struct mlx5e_channel_param { struct mlx5e_cq_param icosq_cq; }; -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix) -{ - return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix); -} - static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { return MLX5_CAP_GEN(mdev, striding_rq) && @@ -444,17 +439,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int wq_sz = mlx5_wq_ll_get_size(&rq->wq); int mtt_sz = mlx5e_get_wqe_mtt_sz(); int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; - int node = mlx5e_get_node(c->priv, c->ix); int i; rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), - GFP_KERNEL, node); + GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->mpwqe.info) goto err_out; /* We allocate more than mtt_sz as we will align the pointer */ - rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, - GFP_KERNEL, node); + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); if (unlikely(!rq->mpwqe.mtt_no_align)) goto err_free_wqe_info; @@ -562,7 +556,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, int err; int i; - rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + rqp->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); @@ -629,8 +623,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->wqe.frag_info = kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info), - GFP_KERNEL, - mlx5e_get_node(c->priv, c->ix)); + GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->wqe.frag_info) { err = -ENOMEM; goto err_rq_wq_destroy; @@ -1000,13 +993,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1053,13 +1046,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1126,13 +1119,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) return err; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix)); + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; @@ -1504,8 +1497,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->priv->mdev; int err; - param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix); - param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix); + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); param->eq_ix = c->ix; err = mlx5e_alloc_cq_common(mdev, param, cq); @@ -1604,6 +1597,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) +{ + return cpumask_first(priv->mdev->priv.irq_info[ix].mask); +} + static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1752,12 +1750,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; + int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; int eqn; - c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); + c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -1765,6 +1764,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mdev = priv->mdev; c->tstamp = &priv->tstamp; c->ix = ix; + c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); @@ -1853,8 +1853,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, - mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5f323442cc5a..8a89c7e8cd63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct mlx5_eq_table *table = &priv->eq_table; - struct irq_affinity irqdesc = { - .pre_vectors = MLX5_EQ_VEC_COMP_BASE, - }; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; @@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) if (!priv->irq_info) goto err_free_msix; - nvec = pci_alloc_irq_vectors_affinity(dev->pdev, + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, - &irqdesc); + PCI_IRQ_MSIX); if (nvec < 0) return nvec; @@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) return (u64)timer_l | (u64)timer_h1 << 32; } +static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + + if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + priv->irq_info[i].mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(priv->irq_info[i].mask); +} + +static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { + err = mlx5_irq_set_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + mlx5_irq_clear_affinity_hint(mdev, i); + + return err; +} + +static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) + mlx5_irq_clear_affinity_hint(mdev, i); +} + int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { @@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } + err = mlx5_irq_set_affinity_hints(dev); + if (err) { + dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); + goto err_affinity_hints; + } + err = mlx5_init_fs(dev); if (err) { dev_err(&pdev->dev, "Failed to init flow steering\n"); @@ -1154,6 +1213,9 @@ err_sriov: mlx5_cleanup_fs(dev); err_fs: + mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); + mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_put_uars_page(dev, priv->uar); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..40a6f33c4cde 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -556,6 +556,7 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { + cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; -- cgit v1.2.3 From 37e92a9d4fe38dc3e7308913575983a6a088c8d4 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 13 Nov 2017 10:11:27 +0200 Subject: net/mlx5: Fix rate limit packet pacing naming and struct In mlx5_ifc, struct size was not complete, and thus driver was sending garbage after the last defined field. Fixed it by adding reserved field to complete the struct size. In addition, rename all set_rate_limit to set_pp_rate_limit to be compliant with the Firmware <-> Driver definition. Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates") Fixes: 1466cc5b23d1 ("net/mlx5: Rate limit tables support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 22 +++++++++++----------- include/linux/mlx5/mlx5_ifc.h | 8 +++++--- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1fffdebbc9e8..e9a1fbcc4adf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: - case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: @@ -505,7 +505,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); - MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index e651e4c02867..d3c33e9eea72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, return ret_entry; } -static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, +static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0}; - MLX5_SET(set_rate_limit_in, in, opcode, - MLX5_CMD_OP_SET_RATE_LIMIT); - MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); - MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + MLX5_SET(set_pp_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_PP_RATE_LIMIT); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) entry->refcount++; } else { /* new rate limit */ - err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index); if (err) { mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", rate, err); @@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) entry->refcount--; if (!entry->refcount) { /* need to remove rate */ - mlx5_set_rate_limit_cmd(dev, 0, entry->index); + mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index); entry->rate = 0; } @@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) /* Clear all configured rates */ for (i = 0; i < table->max_size; i++) if (table->rl_entry[i].rate) - mlx5_set_rate_limit_cmd(dev, 0, - table->rl_entry[i].index); + mlx5_set_pp_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); kfree(dev->priv.rl_table.rl_entry); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 38a7577a9ce7..d44ec5f41d4a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -147,7 +147,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, @@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { -- cgit v1.2.3 From ff0891915cd7b24ab27eee9b360c0452853bf9f6 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 26 Oct 2017 09:56:34 -0500 Subject: net/mlx5e: Fix ETS BW check Fix bug that allows ets bw sum to be 0% when ets tc type exists. Fixes: 08fb1dacdd76 ('net/mlx5e: Support DCBNL IEEE ETS') Signed-off-by: Moshe Shemesh Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index c6d90b6dd80e..9bcf38f4123b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, struct ieee_ets *ets) { + bool have_ets_tc = false; int bw_sum = 0; int i; @@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, } /* Validate Bandwidth Sum */ - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + have_ets_tc = true; bw_sum += ets->tc_tx_bw[i]; + } + } - if (bw_sum != 0 && bw_sum != 100) { + if (have_ets_tc && bw_sum != 100) { netdev_err(netdev, "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; -- cgit v1.2.3 From 2989ad1ec03021ee6d2193c35414f1d970a243de Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 21 Nov 2017 17:49:36 +0200 Subject: net/mlx5e: Fix features check of IPv6 traffic The assumption that the next header field contains the transport protocol is wrong for IPv6 packets with extension headers. Instead, we should look the inner-most next header field in the buffer. This will fix TSO offload for tunnels over IPv6 with extension headers. Performance testing: 19.25x improvement, cool! Measuring bandwidth of 16 threads TCP traffic over IPv6 GRE tap. CPU: Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz NIC: Mellanox Technologies MT28800 Family [ConnectX-5 Ex] TSO: Enabled Before: 4,926.24 Mbps Now : 94,827.91 Mbps Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cbec66bc82f1..c535a44ab8ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3678,6 +3678,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) { + unsigned int offset = 0; struct udphdr *udph; u8 proto; u16 port; @@ -3687,7 +3688,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): - proto = ipv6_hdr(skb)->nexthdr; + proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL); break; default: goto out; -- cgit v1.2.3 From 696a97cf9f5c551fca257e0d4aa07b5cbde6084a Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 14 Nov 2017 09:44:55 +0200 Subject: net/mlx5e: Fix defaulting RX ring size when not needed Fixes the bug when turning on/off CQE compression mechanism resets the RX rings size to default value when it is not needed. Fixes: 2fc4bfb7250d ("net/mlx5e: Dynamic RQ type infrastructure") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 ++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++-------- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 43f9054830e5..543060c305a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -82,6 +82,9 @@ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) +#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ + (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ @@ -936,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u8 rq_type); static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 23425f028405..8f05efa5c829 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); - mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, - new_channels.params.rq_wq_type); + new_channels.params.mpwqe_log_stride_sz = + MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val); + new_channels.params.mpwqe_log_num_strides = + MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return err; mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c535a44ab8ac..d9d8227f195f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -78,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u8 rq_type) +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type) { params->rq_wq_type = rq_type; params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -88,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - params->mpwqe_log_stride_sz = - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); + params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev, + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - params->mpwqe_log_stride_sz; break; @@ -115,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_set_rq_type_params(mdev, params, rq_type); + mlx5e_init_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index d2a66dc4adc6..8812d7208e8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */ - mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); + mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST); /* RQ size in ipoib by default is 512 */ params->log_rq_size = is_kdump_kernel() ? -- cgit v1.2.3 From 777ec2b2a3f2760505db395de1a9fa4115d74548 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 16 Nov 2017 14:57:48 +0200 Subject: net/mlx5: Fix misspelling in the error message and comment Fix misspelling in word syndrome. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 60771865c99c..0308a2b4823c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_CQ_ERROR: cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n", + mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", cqn, eqe->data.cq_err.syndrome); mlx5_cq_event(dev, cqn, eqe->type); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1a0e797ad001..21d29f7936f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev) u32 fw; int i; - /* If the syndrom is 0, the device is OK and no need to print buffer */ + /* If the syndrome is 0, the device is OK and no need to print buffer */ if (!ioread8(&h->synd)) return; -- cgit v1.2.3 From dbff26e44dc3ec4de6578733b054a0114652a764 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Mon, 4 Dec 2017 08:59:25 +0200 Subject: net/mlx5: Fix error flow in CREATE_QP command In error flow, when DESTROY_QP command should be executed, the wrong mailbox was set with data, not the one that is written to hardware, Fix that. Fixes: 09a7d9eca1a6 '{net,IB}/mlx5: QP/XRCD commands via mlx5 ifc' Signed-off-by: Moni Shoua Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index db9e665ab104..889130edb715 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); - MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); - MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } -- cgit v1.2.3 From 6323514116404cc651df1b7fffa1311ddf8ce647 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Nov 2017 13:52:28 +0200 Subject: net/mlx5e: Fix possible deadlock of VXLAN lock mlx5e_vxlan_lookup_port is called both from mlx5e_add_vxlan_port (user context) and mlx5e_features_check (softirq), but the lock acquired does not disable bottom half and might result in deadlock. Fix it by simply replacing spin_lock() with spin_lock_bh(). While at it, replace all unnecessary spin_lock_irq() to spin_lock_bh(). lockdep's WARNING: inconsistent lock state [ 654.028136] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 654.028229] swapper/5/0 [HC0[0]:SC1[9]:HE1:SE0] takes: [ 654.028321] (&(&vxlan_db->lock)->rlock){+.?.}, at: [] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028528] {SOFTIRQ-ON-W} state was registered at: [ 654.028607] _raw_spin_lock+0x3c/0x70 [ 654.028689] mlx5e_vxlan_lookup_port+0x1e/0x50 [mlx5_core] [ 654.028794] mlx5e_vxlan_add_port+0x2e/0x120 [mlx5_core] [ 654.028878] process_one_work+0x1e9/0x640 [ 654.028942] worker_thread+0x4a/0x3f0 [ 654.029002] kthread+0x141/0x180 [ 654.029056] ret_from_fork+0x24/0x30 [ 654.029114] irq event stamp: 579088 [ 654.029174] hardirqs last enabled at (579088): [] ip6_finish_output2+0x49a/0x8c0 [ 654.029309] hardirqs last disabled at (579087): [] ip6_finish_output2+0x44e/0x8c0 [ 654.029446] softirqs last enabled at (579030): [] irq_enter+0x6d/0x80 [ 654.029567] softirqs last disabled at (579031): [] irq_exit+0xb5/0xc0 [ 654.029684] other info that might help us debug this: [ 654.029781] Possible unsafe locking scenario: [ 654.029868] CPU0 [ 654.029908] ---- [ 654.029947] lock(&(&vxlan_db->lock)->rlock); [ 654.030045] [ 654.030090] lock(&(&vxlan_db->lock)->rlock); [ 654.030162] *** DEADLOCK *** Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 07a9ba6cfc70..f8238275759f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); - spin_unlock(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); return vxlan; } @@ -100,9 +100,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) vxlan->udp_port = port; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (err) goto err_free; @@ -121,9 +121,9 @@ static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; struct mlx5e_vxlan *vxlan; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); if (!vxlan) return; @@ -171,12 +171,12 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_irq(&vxlan_db->lock); + spin_lock_bh(&vxlan_db->lock); } - spin_unlock_irq(&vxlan_db->lock); + spin_unlock_bh(&vxlan_db->lock); } -- cgit v1.2.3 From 23f4cc2cd9ed92570647220aca60d0197d8c1fa9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 3 Dec 2017 13:58:50 +0200 Subject: net/mlx5e: Add refcount to VXLAN structure A refcount mechanism must be implemented in order to prevent unwanted scenarios such as: - Open an IPv4 VXLAN interface - Open an IPv6 VXLAN interface (different socket) - Remove one of the interfaces With current implementation, the UDP port will be removed from our VXLAN database and turn off the offloads for the other interface, which is still active. The reference count mechanism will only allow UDP port removals once all consumers are gone. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 50 +++++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/vxlan.h | 1 + 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f8238275759f..25f782344667 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,8 +88,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; - if (mlx5e_vxlan_lookup_port(priv, port)) + vxlan = mlx5e_vxlan_lookup_port(priv, port); + if (vxlan) { + atomic_inc(&vxlan->refcount); goto free_work; + } if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; @@ -99,6 +102,7 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) goto err_delete_port; vxlan->udp_port = port; + atomic_set(&vxlan->refcount, 1); spin_lock_bh(&vxlan_db->lock); err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan); @@ -116,32 +120,33 @@ free_work: kfree(vxlan_work); } -static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port) +static void mlx5e_vxlan_del_port(struct work_struct *work) { + struct mlx5e_vxlan_work *vxlan_work = + container_of(work, struct mlx5e_vxlan_work, work); + struct mlx5e_priv *priv = vxlan_work->priv; struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan; + u16 port = vxlan_work->port; struct mlx5e_vxlan *vxlan; + bool remove = false; spin_lock_bh(&vxlan_db->lock); - vxlan = radix_tree_delete(&vxlan_db->tree, port); - spin_unlock_bh(&vxlan_db->lock); - + vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) - return; - - mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port); - - kfree(vxlan); -} + goto out_unlock; -static void mlx5e_vxlan_del_port(struct work_struct *work) -{ - struct mlx5e_vxlan_work *vxlan_work = - container_of(work, struct mlx5e_vxlan_work, work); - struct mlx5e_priv *priv = vxlan_work->priv; - u16 port = vxlan_work->port; + if (atomic_dec_and_test(&vxlan->refcount)) { + radix_tree_delete(&vxlan_db->tree, port); + remove = true; + } - __mlx5e_vxlan_core_del_port(priv, port); +out_unlock: + spin_unlock_bh(&vxlan_db->lock); + if (remove) { + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); + } kfree(vxlan_work); } @@ -171,12 +176,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) struct mlx5e_vxlan *vxlan; unsigned int port = 0; - spin_lock_bh(&vxlan_db->lock); + /* Lockless since we are the only radix-tree consumers, wq is disabled */ while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) { port = vxlan->udp_port; - spin_unlock_bh(&vxlan_db->lock); - __mlx5e_vxlan_core_del_port(priv, (u16)port); - spin_lock_bh(&vxlan_db->lock); + radix_tree_delete(&vxlan_db->tree, port); + mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); + kfree(vxlan); } - spin_unlock_bh(&vxlan_db->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 5def12c048e3..5ef6ae7d568a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -36,6 +36,7 @@ #include "en.h" struct mlx5e_vxlan { + atomic_t refcount; u16 udp_port; }; -- cgit v1.2.3 From 0c1cc8b2215f5122ca614b5adca60346018758c3 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 4 Dec 2017 09:57:43 +0200 Subject: net/mlx5e: Prevent possible races in VXLAN control flow When calling add/remove VXLAN port, a lock must be held in order to prevent race scenarios when more than one add/remove happens at the same time. Fix by holding our state_lock (mutex) as done by all other parts of the driver. Note that the spinlock protecting the radix-tree is still needed in order to synchronize radix-tree access from softirq context. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index 25f782344667..2f74953e4561 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -88,6 +88,7 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + mutex_lock(&priv->state_lock); vxlan = mlx5e_vxlan_lookup_port(priv, port); if (vxlan) { atomic_inc(&vxlan->refcount); @@ -117,6 +118,7 @@ err_free: err_delete_port: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); free_work: + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } @@ -130,6 +132,7 @@ static void mlx5e_vxlan_del_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; bool remove = false; + mutex_lock(&priv->state_lock); spin_lock_bh(&vxlan_db->lock); vxlan = radix_tree_lookup(&vxlan_db->tree, port); if (!vxlan) @@ -147,6 +150,7 @@ out_unlock: mlx5e_vxlan_core_del_port_cmd(priv->mdev, port); kfree(vxlan); } + mutex_unlock(&priv->state_lock); kfree(vxlan_work); } -- cgit v1.2.3 From 139ed6c6c46aa3d8970a086b8e0cf1f3522f5d4a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 5 Dec 2017 13:45:21 +0200 Subject: net/mlx5: Fix steering memory leak Flow steering priority and namespace are software only objects that didn't have the proper destructors and were not freed during steering cleanup. Fix it by adding destructor functions for these objects. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c70fd663a633..dfaad9ecb2b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node); static void del_sw_flow_table(struct fs_node *node); static void del_sw_flow_group(struct fs_node *node); static void del_sw_fte(struct fs_node *node); +static void del_sw_prio(struct fs_node *node); +static void del_sw_ns(struct fs_node *node); /* Delete rule (destination) is special case that * requires to lock the FTE for all the deletion process. */ @@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } +static void del_sw_ns(struct fs_node *node) +{ + kfree(node); +} + +static void del_sw_prio(struct fs_node *node) +{ + kfree(node); +} + static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; @@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, NULL, NULL); + tree_init_node(&fs_prio->node, NULL, del_sw_prio); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, NULL, NULL); + tree_init_node(&ns->node, NULL, del_sw_ns); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); -- cgit v1.2.3 From d6b2785cd55ee72e9608762650b3ef299f801b1b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 21 Nov 2017 15:15:51 +0200 Subject: net/mlx5: Cleanup IRQs in case of unload failure When mlx5_stop_eqs fails to destroy any of the eqs it returns with an error. In such failure flow the function will return without releasing all EQs irqs and then pci_free_irq_vectors will fail. Fix by only warn on destroy EQ failure and continue to release other EQs and their irqs. It fixes the following kernel trace: kernel: kernel BUG at drivers/pci/msi.c:352! ... ... kernel: Call Trace: kernel: pci_disable_msix+0xd3/0x100 kernel: pci_free_irq_vectors+0xe/0x20 kernel: mlx5_load_one.isra.17+0x9f5/0xec0 [mlx5_core] Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 20 +++++++++++++------- include/linux/mlx5/driver.h | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0308a2b4823c..ab4d1465b7e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -775,7 +775,7 @@ err1: return err; } -int mlx5_stop_eqs(struct mlx5_core_dev *dev) +void mlx5_stop_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; int err; @@ -784,22 +784,28 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) { err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); if (err) - return err; + mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", + err); } #endif err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); if (err) - return err; + mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", + err); - mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + if (err) + mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", + err); mlx5_cmd_use_polling(dev); err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); - if (err) + if (err) { + mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", + err); mlx5_cmd_use_events(dev); - - return err; + } } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 40a6f33c4cde..57b109c6e422 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1049,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, enum mlx5_eq_type type); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); -int mlx5_stop_eqs(struct mlx5_core_dev *dev); +void mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3 From a2fba188fd5eadd6061bef4f2f2577a43231ebf3 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 4 Dec 2017 15:23:51 +0200 Subject: net/mlx5: Stay in polling mode when command EQ destroy fails During unload, on mlx5_stop_eqs we move command interface from events mode to polling mode, but if command interface EQ destroy fail we move back to events mode. That's wrong since even if we fail to destroy command interface EQ, we do release its irq, so no interrupts will be received. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ab4d1465b7e4..e7e7cef2bde4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -801,11 +801,9 @@ void mlx5_stop_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_polling(dev); err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); - if (err) { + if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); - mlx5_cmd_use_events(dev); - } } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, -- cgit v1.2.3 From 4ef928929987c19fff4d3c1650f139560ba1cc13 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 20 Dec 2017 08:38:46 +1000 Subject: drm/nouveau: fix obvious memory leak fdo#104340. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_vmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index 9e2628dd8e4d..f5371d96b003 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma) nvif_vmm_put(&vma->vmm->vmm, &tmp); } list_del(&vma->head); - *pvma = NULL; kfree(*pvma); + *pvma = NULL; } } -- cgit v1.2.3 From 19deaa217bc04e83b59b5e8c8229eb0e53ad9efc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 19 Dec 2017 15:07:10 -0800 Subject: libnvdimm, pfn: fix start_pad handling for aligned namespaces The alignment checks at pfn driver startup fail to properly account for the 'start_pad' in the case where the namespace is misaligned relative to its internal alignment. This is typically triggered in 1G aligned namespace, but could theoretically trigger with small namespace alignments. When this triggers the kernel reports messages of the form: dax2.1: bad offset: 0x3c000000 dax disabled align: 0x40000000 Cc: Fixes: 1ee6667cd8d1 ("libnvdimm, pfn, dax: fix initialization vs autodetect...") Reported-by: Jane Chu Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 65cc171c721d..db2fc7c02e01 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; - unsigned long align; enum nd_pfn_mode mode; struct nd_namespace_io *nsio; + unsigned long align, start_pad; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); + start_pad = le32_to_cpu(pfn_sb->start_pad); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((align && !IS_ALIGNED(offset, align)) + if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled align: %#lx\n", -- cgit v1.2.3 From 19c832ed9b8f7b49fa5eeef06b4338af5fe5c1dc Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 19 Dec 2017 15:22:03 -0500 Subject: bpf: Fix tools and testing build. I'm getting various build failures on sparc64. The key is usually that the userland tools get built 32-bit. 1) clock_gettime() is in librt, so that must be added to the link libraries. 2) "sizeof(x)" must be printed with "%Z" printf prefix. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 792af7c3b74f..05fc4e2e7b3a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf +LDLIBS += -lcap -lelf -lrt TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 69427531408d..6761be18a91f 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -351,7 +351,7 @@ static void test_bpf_obj_id(void) info_len != sizeof(struct bpf_map_info) || strcmp((char *)map_infos[i].name, expected_map_name), "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", + "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", err, errno, map_infos[i].type, BPF_MAP_TYPE_ARRAY, info_len, sizeof(struct bpf_map_info), @@ -395,7 +395,7 @@ static void test_bpf_obj_id(void) *(int *)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", + "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -463,7 +463,7 @@ static void test_bpf_obj_id(void) memcmp(&prog_info, &prog_infos[i], info_len) || *(int *)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", + "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), *(int *)prog_info.map_ids, saved_map_id); @@ -509,7 +509,7 @@ static void test_bpf_obj_id(void) memcmp(&map_info, &map_infos[i], info_len) || array_value != array_magic_value, "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n", + "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n", err, errno, info_len, sizeof(struct bpf_map_info), memcmp(&map_info, &map_infos[i], info_len), array_value, array_magic_value); -- cgit v1.2.3 From 41fce90f26333c4fa82e8e43b9ace86c4e8a0120 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 4 Dec 2017 14:07:43 -0800 Subject: libnvdimm, dax: fix 1GB-aligned namespaces vs physical misalignment The following namespace configuration attempt: # ndctl create-namespace -e namespace0.0 -m devdax -a 1G -f libndctl: ndctl_dax_enable: dax0.1: failed to enable Error: namespace0.0: failed to enable failed to reconfigure namespace: No such device or address ...fails when the backing memory range is not physically aligned to 1G: # cat /proc/iomem | grep Persistent 210000000-30fffffff : Persistent Memory (legacy) In the above example the 4G persistent memory range starts and ends on a 256MB boundary. We handle this case correctly when needing to handle cases that violate section alignment (128MB) collisions against "System RAM", and we simply need to extend that padding/truncation for the 1GB alignment use case. Cc: Fixes: 315c562536c4 ("libnvdimm, pfn: add 'align' attribute...") Reported-and-tested-by: Jane Chu Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index db2fc7c02e01..2adada1a5855 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -583,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, return altmap; } +static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) +{ + return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys), + ALIGN_DOWN(phys, nd_pfn->align)); +} + static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; @@ -638,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) start = nsio->res.start; size = PHYS_SECTION_ALIGN_UP(start + size) - start; if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE) == REGION_MIXED) { + IORES_DESC_NONE) == REGION_MIXED + || !IS_ALIGNED(start + resource_size(&nsio->res), + nd_pfn->align)) { size = resource_size(&nsio->res); - end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + end_trunc = start + size - phys_pmem_align_down(nd_pfn, + start + size); } if (start_pad + end_trunc) - dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); /* -- cgit v1.2.3 From 10a7e9d849150a2879efc0b04d8a51068c9dd0c5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 19 Dec 2017 13:52:23 -0800 Subject: Do not hash userspace addresses in fault handlers The hashing of %p was designed to restrict kernel addresses. There is no reason to hash the userspace values seen during a segfault report, so switch these to %px. (Some architectures already use %lx.) Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- arch/sparc/mm/fault_32.c | 2 +- arch/sparc/mm/fault_64.c | 2 +- arch/um/kernel/trap.c | 2 +- arch/x86/mm/fault.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index be3136f142a9..a8103a84b4ac 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->pc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 815c03d7a765..41363f46797b 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 4e6fcb32620f..428644175956 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs) if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), (void *)UPT_IP(regs), (void *)UPT_SP(regs), diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index febf6980e653..06fe3d51d385 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); -- cgit v1.2.3 From 8e9d8e19b3d0c36d45161233eee3f2d368efe3ac Mon Sep 17 00:00:00 2001 From: "Subhransu S. Prusty" Date: Mon, 18 Dec 2017 10:46:49 +0530 Subject: ASoC: Intel: Skylake: Request IRQ late only after all context are initialized Sometimes during boot, panic is observed at sst_dsp_shim_read_unlocked(). This happens when interrupt occurs before the context is initialized. So move the irq initialization only after the context is initialized completely. Signed-off-by: Subhransu S. Prusty Signed-off-by: Pawse, GuruprasadX Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/bxt-sst.c | 2 +- sound/soc/intel/skylake/cnl-sst.c | 2 +- sound/soc/intel/skylake/skl-sst-dsp.c | 14 ++++++++++---- sound/soc/intel/skylake/skl-sst-dsp.h | 1 + sound/soc/intel/skylake/skl-sst.c | 2 +- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c index 4524211960e4..440bca7afbf1 100644 --- a/sound/soc/intel/skylake/bxt-sst.c +++ b/sound/soc/intel/skylake/bxt-sst.c @@ -595,7 +595,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, INIT_DELAYED_WORK(&skl->d0i3.work, bxt_set_dsp_D0i3); skl->d0i3.state = SKL_DSP_D0I3_NONE; - return 0; + return skl_dsp_acquire_irq(sst); } EXPORT_SYMBOL_GPL(bxt_sst_dsp_init); diff --git a/sound/soc/intel/skylake/cnl-sst.c b/sound/soc/intel/skylake/cnl-sst.c index 387de388ce29..245df1067ba8 100644 --- a/sound/soc/intel/skylake/cnl-sst.c +++ b/sound/soc/intel/skylake/cnl-sst.c @@ -458,7 +458,7 @@ int cnl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, cnl->boot_complete = false; init_waitqueue_head(&cnl->boot_wait); - return 0; + return skl_dsp_acquire_irq(sst); } EXPORT_SYMBOL_GPL(cnl_sst_dsp_init); diff --git a/sound/soc/intel/skylake/skl-sst-dsp.c b/sound/soc/intel/skylake/skl-sst-dsp.c index 19ee1d4f3bdf..71e31ad0bb3f 100644 --- a/sound/soc/intel/skylake/skl-sst-dsp.c +++ b/sound/soc/intel/skylake/skl-sst-dsp.c @@ -435,16 +435,22 @@ struct sst_dsp *skl_dsp_ctx_init(struct device *dev, return NULL; } + return sst; +} + +int skl_dsp_acquire_irq(struct sst_dsp *sst) +{ + struct sst_dsp_device *sst_dev = sst->sst_dev; + int ret; + /* Register the ISR */ ret = request_threaded_irq(sst->irq, sst->ops->irq_handler, sst_dev->thread, IRQF_SHARED, "AudioDSP", sst); - if (ret) { + if (ret) dev_err(sst->dev, "unable to grab threaded IRQ %d, disabling device\n", sst->irq); - return NULL; - } - return sst; + return ret; } void skl_dsp_free(struct sst_dsp *dsp) diff --git a/sound/soc/intel/skylake/skl-sst-dsp.h b/sound/soc/intel/skylake/skl-sst-dsp.h index b8e799ed65ef..12fc9a73dc8a 100644 --- a/sound/soc/intel/skylake/skl-sst-dsp.h +++ b/sound/soc/intel/skylake/skl-sst-dsp.h @@ -206,6 +206,7 @@ int skl_cldma_wait_interruptible(struct sst_dsp *ctx); void skl_dsp_set_state_locked(struct sst_dsp *ctx, int state); struct sst_dsp *skl_dsp_ctx_init(struct device *dev, struct sst_dsp_device *sst_dev, int irq); +int skl_dsp_acquire_irq(struct sst_dsp *sst); bool is_skl_dsp_running(struct sst_dsp *ctx); unsigned int skl_dsp_get_enabled_cores(struct sst_dsp *ctx); diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c index a436abf2fe3f..5a7e41b65ef3 100644 --- a/sound/soc/intel/skylake/skl-sst.c +++ b/sound/soc/intel/skylake/skl-sst.c @@ -569,7 +569,7 @@ int skl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, sst->fw_ops = skl_fw_ops; - return 0; + return skl_dsp_acquire_irq(sst); } EXPORT_SYMBOL_GPL(skl_sst_dsp_init); -- cgit v1.2.3 From 752c93aa72e60ba573bbcfcd508b9cc550db0b94 Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Mon, 18 Dec 2017 10:46:50 +0530 Subject: ASoC: Intel: Skylake: Ensure dai and dailink registration happens in sequence. Platform registration happens in probe work handler whereas machine device is registered during skl_probe. This sometimes results in cpu dais not found if the work handler is sufficiently delayed due to system load, even with deferred probe of machine driver. So move machine device registration after registering platform. Signed-off-by: Pankaj Bharadiya Signed-off-by: Subhransu S. Prusty Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl.c | 56 +++++++++++++++++++++++++++++-------------- sound/soc/intel/skylake/skl.h | 1 + 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index a89592b2850e..32ce64c6b2dc 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -453,19 +453,34 @@ static struct skl_ssp_clk skl_ssp_clks[] = { {.name = "ssp5_sclkfs"}, }; -static int skl_machine_device_register(struct skl *skl, void *driver_data) +static int skl_find_machine(struct skl *skl, void *driver_data) { - struct hdac_bus *bus = ebus_to_hbus(&skl->ebus); - struct platform_device *pdev; struct snd_soc_acpi_mach *mach = driver_data; - int ret; + struct hdac_bus *bus = ebus_to_hbus(&skl->ebus); + struct skl_machine_pdata *pdata; mach = snd_soc_acpi_find_machine(mach); if (mach == NULL) { dev_err(bus->dev, "No matching machine driver found\n"); return -ENODEV; } + + skl->mach = mach; skl->fw_name = mach->fw_filename; + pdata = skl->mach->pdata; + + if (mach->pdata) + skl->use_tplg_pcm = pdata->use_tplg_pcm; + + return 0; +} + +static int skl_machine_device_register(struct skl *skl) +{ + struct hdac_bus *bus = ebus_to_hbus(&skl->ebus); + struct snd_soc_acpi_mach *mach = skl->mach; + struct platform_device *pdev; + int ret; pdev = platform_device_alloc(mach->drv_name, -1); if (pdev == NULL) { @@ -480,11 +495,8 @@ static int skl_machine_device_register(struct skl *skl, void *driver_data) return -EIO; } - if (mach->pdata) { - skl->use_tplg_pcm = - ((struct skl_machine_pdata *)mach->pdata)->use_tplg_pcm; + if (mach->pdata) dev_set_drvdata(&pdev->dev, mach->pdata); - } skl->i2s_dev = pdev; @@ -701,18 +713,30 @@ static void skl_probe_work(struct work_struct *work) /* create codec instances */ skl_codec_create(ebus); + /* register platform dai and controls */ + err = skl_platform_register(bus->dev); + if (err < 0) { + dev_err(bus->dev, "platform register failed: %d\n", err); + return; + } + + if (bus->ppcap) { + err = skl_machine_device_register(skl); + if (err < 0) { + dev_err(bus->dev, "machine register failed: %d\n", err); + goto out_err; + } + } + if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { err = snd_hdac_display_power(bus, false); if (err < 0) { dev_err(bus->dev, "Cannot turn off display power on i915\n"); + skl_machine_device_unregister(skl); return; } } - /* register platform dai and controls */ - err = skl_platform_register(bus->dev); - if (err < 0) - return; /* * we are done probing so decrement link counts */ @@ -882,18 +906,16 @@ static int skl_probe(struct pci_dev *pci, if (err < 0) goto out_clk_free; - err = skl_machine_device_register(skl, - (void *)pci_id->driver_data); + err = skl_find_machine(skl, (void *)pci_id->driver_data); if (err < 0) goto out_nhlt_free; err = skl_init_dsp(skl); if (err < 0) { dev_dbg(bus->dev, "error failed to register dsp\n"); - goto out_mach_free; + goto out_nhlt_free; } skl->skl_sst->enable_miscbdcge = skl_enable_miscbdcge; - } if (bus->mlcap) snd_hdac_ext_bus_get_ml_capabilities(ebus); @@ -911,8 +933,6 @@ static int skl_probe(struct pci_dev *pci, out_dsp_free: skl_free_dsp(skl); -out_mach_free: - skl_machine_device_unregister(skl); out_clk_free: skl_clock_device_unregister(skl); out_nhlt_free: diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h index 46dda88ba139..f411579bc713 100644 --- a/sound/soc/intel/skylake/skl.h +++ b/sound/soc/intel/skylake/skl.h @@ -94,6 +94,7 @@ struct skl { struct skl_module **modules; bool use_tplg_pcm; struct skl_fw_config cfg; + struct snd_soc_acpi_mach *mach; }; #define skl_to_ebus(s) (&(s)->ebus) -- cgit v1.2.3 From d5aa24825da5711f8cb829f873160ddf1a29b19c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 06:11:59 +0000 Subject: ASoC: rsnd: fixup ADG register mask BRGCKR should use 0x80770000, instead of 0x80FF0000. R-Car Gen2 xxx_TIMSEL should use 0x0F1F, R-Car Gen3 xxx_TIMSEL should use 0x1F1F. Here, Gen3 doesn't support AVD, thus, both case can use 0x0F1F. Signed-off-by: Kuninori Morimoto Reviewed-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown --- sound/soc/sh/rcar/adg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 8ddb08714faa..4672688cac32 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod, NULL, &val, NULL); val = val << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val); @@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod, in = in << shift; out = out << shift; - mask = 0xffff << shift; + mask = 0x0f1f << shift; switch (id / 2) { case 0: @@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate) ckr = 0x80000000; } - rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr); + rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr); rsnd_mod_write(adg_mod, BRRA, adg->rbga); rsnd_mod_write(adg_mod, BRRB, adg->rbgb); -- cgit v1.2.3 From b67336eee3fcb8ecedc6c13e2bf88aacfa3151e2 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 27 Nov 2017 09:33:03 +0000 Subject: MIPS: Validate PR_SET_FP_MODE prctl(2) requests against the ABI of the task Fix an API loophole introduced with commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS"), where the caller of prctl(2) is incorrectly allowed to make a change to CP0.Status.FR or CP0.Config5.FRE register bits even if CONFIG_MIPS_O32_FP64_SUPPORT has not been enabled, despite that an executable requesting the mode requested via ELF file annotation would not be allowed to run in the first place, or for n64 and n64 ABI tasks which do not have non-default modes defined at all. Add suitable checks to `mips_set_process_fp_mode' and bail out if an invalid mode change has been requested for the ABI in effect, even if the FPU hardware or emulation would otherwise allow it. Always succeed however without taking any further action if the mode requested is the same as one already in effect, regardless of whether any mode change, should it be requested, would actually be allowed for the task concerned. Signed-off-by: Maciej W. Rozycki Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/17800/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/process.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 45d0b6b037ee..57028d49c202 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) struct task_struct *t; int max_users; + /* If nothing to change, return right away, successfully. */ + if (value == mips_get_process_fp_mode(task)) + return 0; + + /* Only accept a mode change if 64-bit FP enabled for o32. */ + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return -EOPNOTSUPP; + + /* And only for o32 tasks. */ + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) + return -EOPNOTSUPP; + /* Check the value is valid */ if (value & ~known_bits) return -EOPNOTSUPP; -- cgit v1.2.3 From 2c08cd7c20968ddf71feeac2265b4741d2b3fdde Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 20 Dec 2017 11:52:47 +0100 Subject: drm/sun4i: hdmi: Move the mode_valid callback to the encoder When attached to the connector, the mode_valid callback will only filter the modes provided by the connector itself as part of its probe. However, it will not be doing it when the mode is provided by the userspace, which still might result in a broken configuration. In order to enforce these constraints, move our mode_valid callback to the encoder which doesn't have this behaviour. Acked-by: Daniel Vetter Signed-off-by: Hans Verkuil [maxime: Wrote the commit log in order to update the patch from the merged v3 to the v4 that was correct.] Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/0fa230a8-d01d-561a-f74f-6b4fd421255b@xs4all.nl --- drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index c12f9bd12904..500b6fb3e028 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder, writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG); } +static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder); + unsigned long rate = mode->clock * 1000; + unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ + long rounded_rate; + + /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ + if (rate > 165000000) + return MODE_CLOCK_HIGH; + rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); + if (rounded_rate > 0 && + max_t(unsigned long, rounded_rate, rate) - + min_t(unsigned long, rounded_rate, rate) < diff) + return MODE_OK; + return MODE_NOCLOCK; +} + static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = { .atomic_check = sun4i_hdmi_atomic_check, .disable = sun4i_hdmi_disable, .enable = sun4i_hdmi_enable, .mode_set = sun4i_hdmi_mode_set, + .mode_valid = sun4i_hdmi_mode_valid, }; static const struct drm_encoder_funcs sun4i_hdmi_funcs = { @@ -208,27 +228,8 @@ static int sun4i_hdmi_get_modes(struct drm_connector *connector) return ret; } -static int sun4i_hdmi_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); - long rate = mode->clock * 1000; - long diff = rate / 200; /* +-0.5% allowed by HDMI spec */ - long rounded_rate; - - /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */ - if (rate > 165000000) - return MODE_CLOCK_HIGH; - rounded_rate = clk_round_rate(hdmi->tmds_clk, rate); - if (max(rounded_rate, rate) - min(rounded_rate, rate) < diff && - rounded_rate > 0) - return MODE_OK; - return MODE_NOCLOCK; -} - static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = { .get_modes = sun4i_hdmi_get_modes, - .mode_valid = sun4i_hdmi_mode_valid, }; static enum drm_connector_status -- cgit v1.2.3 From 12a9d3bb6a258011edec3fa341cbca3cd0734e5d Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 19 Dec 2017 00:37:57 -0500 Subject: backlight: apple_bl: Deprecate pci_get_bus_and_slot() pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Getting ready to remove pci_get_bus_and_slot() function in favor of pci_get_domain_bus_and_slot(). Hard-coding the domain as 0. Signed-off-by: Sinan Kaya Acked-by: Daniel Thompson Acked-by: Jingoo Han Signed-off-by: Lee Jones --- drivers/video/backlight/apple_bl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c index d84329676689..6a34ab936726 100644 --- a/drivers/video/backlight/apple_bl.c +++ b/drivers/video/backlight/apple_bl.c @@ -143,7 +143,7 @@ static int apple_bl_add(struct acpi_device *dev) struct pci_dev *host; int intensity; - host = pci_get_bus_and_slot(0, 0); + host = pci_get_domain_bus_and_slot(0, 0, 0); if (!host) { pr_err("unable to find PCI host\n"); -- cgit v1.2.3 From ce0769e0ea4b3e192466243a1a9fd39acf214f1e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 20 Dec 2017 10:35:43 +0100 Subject: drm/plane: Make framebuffer refcounting the responsibility of setplane_internal callers lock_all_ctx in setplane_internal may return -EINTR, and __setplane_internal could return -EDEADLK. Making more special cases for fb would make the code even harder to read, so the easiest solution is not taking over the fb refcount, and making callers responsible for dropping the ref. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102707 Fixes: 13736ba3b38b ("drm/legacy: Convert setplane ioctl locking to interruptible.") Testcase: kms_atomic_interruptible Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20171220093545.613-2-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_plane.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 37a93cdffb4a..2c90519576a3 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format) } /* - * setplane_internal - setplane handler for internal callers + * __setplane_internal - setplane handler for internal callers * - * Note that we assume an extra reference has already been taken on fb. If the - * update fails, this reference will be dropped before return; if it succeeds, - * the previous framebuffer (if any) will be unreferenced instead. + * This function will take a reference on the new fb for the plane + * on success. * * src_{x,y,w,h} are provided in 16.16 fixed point format */ @@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane, if (!ret) { plane->crtc = crtc; plane->fb = fb; - fb = NULL; + drm_framebuffer_get(plane->fb); } else { plane->old_fb = NULL; } out: - if (fb) - drm_framebuffer_put(fb); if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_plane *plane; struct drm_crtc *crtc = NULL; struct drm_framebuffer *fb = NULL; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data, } } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ - return setplane_internal(plane, crtc, fb, - plane_req->crtc_x, plane_req->crtc_y, - plane_req->crtc_w, plane_req->crtc_h, - plane_req->src_x, plane_req->src_y, - plane_req->src_w, plane_req->src_h); + ret = setplane_internal(plane, crtc, fb, + plane_req->crtc_x, plane_req->crtc_y, + plane_req->crtc_w, plane_req->crtc_h, + plane_req->src_x, plane_req->src_y, + plane_req->src_w, plane_req->src_h); + + if (fb) + drm_framebuffer_put(fb); + + return ret; } static int drm_mode_cursor_universal(struct drm_crtc *crtc, @@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ ret = __setplane_internal(crtc->cursor, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - 0, 0, src_w, src_h, ctx); + crtc_x, crtc_y, crtc_w, crtc_h, + 0, 0, src_w, src_h, ctx); + + if (fb) + drm_framebuffer_put(fb); /* Update successful; save new cursor position, if necessary */ if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { -- cgit v1.2.3 From 74d0833c659a8a54735e5efdd44f4b225af68586 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 20 Dec 2017 07:09:19 -0800 Subject: cgroup: fix css_task_iter crash on CSS_TASK_ITER_PROC While teaching css_task_iter to handle skipping over tasks which aren't group leaders, bc2fb7ed089f ("cgroup: add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS") introduced a silly bug. CSS_TASK_ITER_PROCS is implemented by repeating css_task_iter_advance() while the advanced cursor is pointing to a non-leader thread. However, the cursor variable, @l, wasn't updated when the iteration has to advance to the next css_set and the following repetition would operate on the terminal @l from the previous iteration which isn't pointing to a valid task leading to oopses like the following or infinite looping. BUG: unable to handle kernel NULL pointer dereference at 0000000000000254 IP: __task_pid_nr_ns+0xc7/0xf0 PGD 0 P4D 0 Oops: 0000 [#1] SMP ... CPU: 2 PID: 1 Comm: systemd Not tainted 4.14.4-200.fc26.x86_64 #1 Hardware name: System manufacturer System Product Name/PRIME B350M-A, BIOS 3203 11/09/2017 task: ffff88c4baee8000 task.stack: ffff96d5c3158000 RIP: 0010:__task_pid_nr_ns+0xc7/0xf0 RSP: 0018:ffff96d5c315bd50 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff88c4b68c6000 RCX: 0000000000000250 RDX: ffffffffa5e47960 RSI: 0000000000000000 RDI: ffff88c490f6ab00 RBP: ffff96d5c315bd50 R08: 0000000000001000 R09: 0000000000000005 R10: ffff88c4be006b80 R11: ffff88c42f1b8004 R12: ffff96d5c315bf18 R13: ffff88c42d7dd200 R14: ffff88c490f6a510 R15: ffff88c4b68c6000 FS: 00007f9446f8ea00(0000) GS:ffff88c4be680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000254 CR3: 00000007f956f000 CR4: 00000000003406e0 Call Trace: cgroup_procs_show+0x19/0x30 cgroup_seqfile_show+0x4c/0xb0 kernfs_seq_show+0x21/0x30 seq_read+0x2ec/0x3f0 kernfs_fop_read+0x134/0x180 __vfs_read+0x37/0x160 ? security_file_permission+0x9b/0xc0 vfs_read+0x8e/0x130 SyS_read+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xa5 RIP: 0033:0x7f94455f942d RSP: 002b:00007ffe81ba2d00 EFLAGS: 00000293 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00005574e2233f00 RCX: 00007f94455f942d RDX: 0000000000001000 RSI: 00005574e2321a90 RDI: 000000000000002b RBP: 0000000000000000 R08: 00005574e2321a90 R09: 00005574e231de60 R10: 00007f94458c8b38 R11: 0000000000000293 R12: 00007f94458c8ae0 R13: 00007ffe81ba3800 R14: 0000000000000000 R15: 00005574e2116560 Code: 04 74 0e 89 f6 48 8d 04 76 48 8d 04 c5 f0 05 00 00 48 8b bf b8 05 00 00 48 01 c7 31 c0 48 8b 0f 48 85 c9 74 18 8b b2 30 08 00 00 <3b> 71 04 77 0d 48 c1 e6 05 48 01 f1 48 3b 51 38 74 09 5d c3 8b RIP: __task_pid_nr_ns+0xc7/0xf0 RSP: ffff96d5c315bd50 Fix it by moving the initialization of the cursor below the repeat label. While at it, rename it to @next for readability. Signed-off-by: Tejun Heo Fixes: bc2fb7ed089f ("cgroup: add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Laura Abbott Reported-by: Bronek Kozicki Reported-by: George Amanakis Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f4c2f8cb5748..2cf06c274e4c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) static void css_task_iter_advance(struct css_task_iter *it) { - struct list_head *l = it->task_pos; + struct list_head *next; lockdep_assert_held(&css_set_lock); - WARN_ON_ONCE(!l); - repeat: /* * Advance iterator to find next entry. cset->tasks is consumed * first and then ->mg_tasks. After ->mg_tasks, we move onto the * next cset. */ - l = l->next; + next = it->task_pos->next; - if (l == it->tasks_head) - l = it->mg_tasks_head->next; + if (next == it->tasks_head) + next = it->mg_tasks_head->next; - if (l == it->mg_tasks_head) + if (next == it->mg_tasks_head) css_task_iter_advance_css_set(it); else - it->task_pos = l; + it->task_pos = next; /* if PROCS, skip over tasks which aren't group leaders */ if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos && -- cgit v1.2.3 From ded8a0447f49d7ec1778a79de433c281c64bf814 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 15 Dec 2017 08:33:31 +0100 Subject: mtd: spi-nor: add support for is25lq040b Signed-off-by: Sean Nyekjaer Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 9178139a39d0..f7676aa50ce6 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1046,6 +1046,8 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25lq040b", INFO(0x9d4013, 0, 64 * 1024, 8, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "is25lp128", INFO(0x9d6018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ) }, -- cgit v1.2.3 From 29d6b29f5040d3385510bd9766f5f7823537b26a Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 15 Dec 2017 08:33:32 +0100 Subject: mtd: spi-nor: indent issi section Signed-off-by: Sean Nyekjaer Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index f7676aa50ce6..8bafd462f0ae 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1045,10 +1045,10 @@ static const struct flash_info spi_nor_ids[] = { { "640s33b", INFO(0x898913, 0, 64 * 1024, 128, 0) }, /* ISSI */ - { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, - { "is25lq040b", INFO(0x9d4013, 0, 64 * 1024, 8, + { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25lq040b", INFO(0x9d4013, 0, 64 * 1024, 8, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, - { "is25lp128", INFO(0x9d6018, 0, 64 * 1024, 256, + { "is25lp128", INFO(0x9d6018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ) }, /* Macronix */ -- cgit v1.2.3 From 2e7c7f66b2b4c88a03c69b148e29a372432f35b9 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 18 Dec 2017 16:49:34 +0100 Subject: MAINTAINERS: Move all MTD related branches to a single repo Historically, branches targeting the next release (and pulled in linux-next) have been pushed on the l2-mtd repo and fixes branches on the linux-mtd one. Now that all MTD maintainers have RW permissions on linux-mtd tree, there's no good reason to have two different trees. Move all -next branches to linux-mtd. Signed-off-by: Boris Brezillon Cc: Stephen Rothwell Cc: Marek Vasut Cc: Cyrille Pitchen Cc: Richard Weinberger Cc: David Woodhouse Cc: Brian Norris Acked-by: Richard Weinberger Acked-by: Cyrille Pitchen --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..10732ecfc937 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8956,7 +8956,7 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git master -T: git git://git.infradead.org/l2-mtd.git master +T: git git://git.infradead.org/linux-mtd.git mtd/next S: Maintained F: Documentation/devicetree/bindings/mtd/ F: drivers/mtd/ @@ -9344,7 +9344,7 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git nand/fixes -T: git git://git.infradead.org/l2-mtd.git nand/next +T: git git://git.infradead.org/linux-mtd.git nand/next S: Maintained F: drivers/mtd/nand/ F: include/linux/mtd/*nand*.h @@ -12761,7 +12761,7 @@ L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ T: git git://git.infradead.org/linux-mtd.git spi-nor/fixes -T: git git://git.infradead.org/l2-mtd.git spi-nor/next +T: git git://git.infradead.org/linux-mtd.git spi-nor/next S: Maintained F: drivers/mtd/spi-nor/ F: include/linux/mtd/spi-nor.h -- cgit v1.2.3 From 1e02dac395fadfff1f2c6bd90f1180f64a9cbebe Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:48:13 +0000 Subject: ASoC: hdac_hdmi: keep DAI driver pointer in private data struct snd_soc_component::dai_drv will be removed soon. hdac_hdmi is only user of it. Let's keep it on private data. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 68a4a6b4e68e..15c3638fe345 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -136,6 +136,7 @@ struct hdac_hdmi_priv { struct mutex pin_mutex; struct hdac_chmap chmap; struct hdac_hdmi_drv_data *drv_data; + struct snd_soc_dai_driver *dai_drv; }; #define hdev_to_hdmi_priv(_hdev) ((to_ehdac_device(_hdev))->private_data) @@ -1035,7 +1036,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm) struct snd_soc_dapm_route *route; struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev); struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev); - struct snd_soc_dai_driver *dai_drv = dapm->component->dai_drv; + struct snd_soc_dai_driver *dai_drv = hdmi->dai_drv; char widget_name[NAME_SIZE]; struct hdac_hdmi_cvt *cvt; struct hdac_hdmi_pin *pin; @@ -1437,6 +1438,7 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdev, } *dais = hdmi_dais; + hdmi->dai_drv = hdmi_dais; return 0; } -- cgit v1.2.3 From 58bf4179000a37aa9b0ee9ab2796f7573c77fff0 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:48:29 +0000 Subject: ASoC: soc-core: remove dai_drv from snd_soc_component ALSA SoC has some duplicate parameter. snd_soc_component::dai_drv is one of them. Each DAI is keeping its driver as snd_soc_dai::driver, and component has dai_list. This means, we can reach to each DAI and its driver by using dai_link. Thus, there is no need to keep DAI driver pointer on component. Let's remove it Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - sound/soc/soc-core.c | 14 ++++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 1a7323238c49..871e1fabd701 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -858,7 +858,6 @@ struct snd_soc_component { struct list_head card_aux_list; /* for auxiliary bound components */ struct list_head card_list; - struct snd_soc_dai_driver *dai_drv; int num_dai; const struct snd_soc_component_driver *driver; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0edac80df34..6a13fbcba23f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3149,7 +3149,7 @@ static struct snd_soc_dai *soc_add_dai(struct snd_soc_component *component, if (!dai->driver->ops) dai->driver->ops = &null_dai_ops; - list_add(&dai->list, &component->dai_list); + list_add_tail(&dai->list, &component->dai_list); component->num_dai++; dev_dbg(dev, "ASoC: Registered DAI '%s'\n", dai->name); @@ -3176,8 +3176,6 @@ static int snd_soc_register_dais(struct snd_soc_component *component, dev_dbg(dev, "ASoC: dai register %s #%zu\n", dev_name(dev), count); - component->dai_drv = dai_drv; - for (i = 0; i < count; i++) { dai = soc_add_dai(component, dai_drv + i, @@ -4354,6 +4352,7 @@ int snd_soc_get_dai_name(struct of_phandle_args *args, args, dai_name); } else { + struct snd_soc_dai *dai; int id = -1; switch (args->args_count) { @@ -4375,7 +4374,14 @@ int snd_soc_get_dai_name(struct of_phandle_args *args, ret = 0; - *dai_name = pos->dai_drv[id].name; + /* find target DAI */ + list_for_each_entry(dai, &pos->dai_list, list) { + if (id == 0) + break; + id--; + } + + *dai_name = dai->driver->name; if (!*dai_name) *dai_name = pos->name; } -- cgit v1.2.3 From 4855f6a6283fc8ea1f79c9f49c91940afef64ef6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:48:44 +0000 Subject: ASoC: soc.h: Arrange DAI related parameter "dai_list" and "num_dai" on snd_soc_component are related parameter. Let's arrange these. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/sound/soc.h b/include/sound/soc.h index 871e1fabd701..be6b462e2c01 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -858,11 +858,10 @@ struct snd_soc_component { struct list_head card_aux_list; /* for auxiliary bound components */ struct list_head card_list; - int num_dai; - const struct snd_soc_component_driver *driver; struct list_head dai_list; + int num_dai; int (*read)(struct snd_soc_component *, unsigned int, unsigned int *); int (*write)(struct snd_soc_component *, unsigned int, unsigned int); -- cgit v1.2.3 From 4423c18e466afdfb02a36ee8b9f901d144b3c607 Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:45 +0100 Subject: net: mvneta: clear interface link status on port disable When port connect to PHY in polling mode (with poll interval 1 sec), port and phy link status must be synchronize in order don't loss link change event. [gregory.clement@free-electrons.com: add fixes tag] Cc: Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bc93b69cfd1e..16b2bfb2cf51 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp) val &= ~MVNETA_GMAC0_PORT_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_0, val); + pp->link = 0; + pp->duplex = -1; + pp->speed = 0; + udelay(200); } -- cgit v1.2.3 From ca5902a6547f662419689ca28b3c29a772446caa Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:46 +0100 Subject: net: mvneta: use proper rxq_number in loop on rx queues When adding the RX queue association with each CPU, a typo was made in the mvneta_cleanup_rxqs() function. This patch fixes it. [gregory.clement@free-electrons.com: add commit log and fixes tag] Cc: stable@vger.kernel.org Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 16b2bfb2cf51..1e0835655c93 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3015,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp) { int queue; - for (queue = 0; queue < txq_number; queue++) + for (queue = 0; queue < rxq_number; queue++) mvneta_rxq_deinit(pp, &pp->rxqs[queue]); } -- cgit v1.2.3 From 2eecb2e04abb62ef8ea7b43e1a46bdb5b99d1bf8 Mon Sep 17 00:00:00 2001 From: Yelena Krivosheev Date: Tue, 19 Dec 2017 17:59:47 +0100 Subject: net: mvneta: eliminate wrong call to handle rx descriptor error There are few reasons in mvneta_rx_swbm() function when received packet is dropped. mvneta_rx_error() should be called only if error bit [16] is set in rx descriptor. [gregory.clement@free-electrons.com: add fixes tag] Cc: stable@vger.kernel.org Fixes: dc35a10f68d3 ("net: mvneta: bm: add support for hardware buffer management") Signed-off-by: Yelena Krivosheev Tested-by: Dmitri Epshtein Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 1e0835655c93..a539263cd79c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1962,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { + mvneta_rx_error(pp, rx_desc); err_drop_frame: dev->stats.rx_errors++; - mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; } -- cgit v1.2.3 From 21b5944350052d2583e82dd59b19a9ba94a007f0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 19 Dec 2017 11:27:56 -0600 Subject: net: Fix double free and memory corruption in get_net_ns_by_id() (I can trivially verify that that idr_remove in cleanup_net happens after the network namespace count has dropped to zero --EWB) Function get_net_ns_by_id() does not check for net::count after it has found a peer in netns_ids idr. It may dereference a peer, after its count has already been finaly decremented. This leads to double free and memory corruption: put_net(peer) rtnl_lock() atomic_dec_and_test(&peer->count) [count=0] ... __put_net(peer) get_net_ns_by_id(net, id) spin_lock(&cleanup_list_lock) list_add(&net->cleanup_list, &cleanup_list) spin_unlock(&cleanup_list_lock) queue_work() peer = idr_find(&net->netns_ids, id) | get_net(peer) [count=1] | ... | (use after final put) v ... cleanup_net() ... spin_lock(&cleanup_list_lock) ... list_replace_init(&cleanup_list, ..) ... spin_unlock(&cleanup_list_lock) ... ... ... ... put_net(peer) ... atomic_dec_and_test(&peer->count) [count=0] ... spin_lock(&cleanup_list_lock) ... list_add(&net->cleanup_list, &cleanup_list) ... spin_unlock(&cleanup_list_lock) ... queue_work() ... rtnl_unlock() rtnl_lock() ... for_each_net(tmp) { ... id = __peernet2id(tmp, peer) ... spin_lock_irq(&tmp->nsid_lock) ... idr_remove(&tmp->netns_ids, id) ... ... ... net_drop_ns() ... net_free(peer) ... } ... | v cleanup_net() ... (Second free of peer) Also, put_net() on the right cpu may reorder with left's cpu list_replace_init(&cleanup_list, ..), and then cleanup_list will be corrupted. Since cleanup_net() is executed in worker thread, while put_net(peer) can happen everywhere, there should be enough time for concurrent get_net_ns_by_id() to pick the peer up, and the race does not seem to be unlikely. The patch fixes the problem in standard way. (Also, there is possible problem in peernet2id_alloc(), which requires check for net::count under nsid_lock and maybe_get_net(peer), but in current stable kernel it's used under rtnl_lock() and it has to be safe. Openswitch begun to use peernet2id_alloc(), and possibly it should be fixed too. While this is not in stable kernel yet, so I'll send a separate message to netdev@ later). Cc: Nicolas Dichtel Signed-off-by: Kirill Tkhai Fixes: 0c7aecd4bde4 "netns: add rtnl cmd to add and get peer netns ids" Reviewed-by: Andrey Ryabinin Reviewed-by: "Eric W. Biederman" Signed-off-by: Eric W. Biederman Reviewed-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b797832565d3..60a71be75aea 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id) spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) - get_net(peer); + peer = maybe_get_net(peer); spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); -- cgit v1.2.3 From 102740bd9436a3a6ba129af3a48271d794009fa5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Dec 2017 13:32:13 -0800 Subject: cls_bpf: fix offload assumptions after callback conversion cls_bpf used to take care of tracking what offload state a filter is in, i.e. it would track if offload request succeeded or not. This information would then be used to issue correct requests to the driver, e.g. requests for statistics only on offloaded filters, removing only filters which were offloaded, using add instead of replace if previous filter was not added etc. This tracking of offload state no longer functions with the new callback infrastructure. There could be multiple entities trying to offload the same filter. Throw out all the tracking and corresponding commands and simply pass to the drivers both old and new bpf program. Drivers will have to deal with offload state tracking by themselves. Fixes: 3f7889c4c79b ("net: sched: cls_bpf: call block callbacks for offload") Signed-off-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 12 +--- include/net/pkt_cls.h | 5 +- net/sched/cls_bpf.c | 93 +++++++++++---------------- 3 files changed, 43 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index e379b78e86ef..a4cf62ba4604 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -110,16 +110,10 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, return -EOPNOTSUPP; } - switch (cls_bpf->command) { - case TC_CLSBPF_REPLACE: - return nfp_net_bpf_offload(nn, cls_bpf->prog, true); - case TC_CLSBPF_ADD: - return nfp_net_bpf_offload(nn, cls_bpf->prog, false); - case TC_CLSBPF_DESTROY: - return nfp_net_bpf_offload(nn, NULL, true); - default: + if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; - } + + return nfp_net_bpf_offload(nn, cls_bpf->prog, cls_bpf->oldprog); } static int nfp_bpf_setup_tc_block(struct net_device *netdev, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0105445cab83..8e08b6da72f3 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -694,9 +694,7 @@ struct tc_cls_matchall_offload { }; enum tc_clsbpf_command { - TC_CLSBPF_ADD, - TC_CLSBPF_REPLACE, - TC_CLSBPF_DESTROY, + TC_CLSBPF_OFFLOAD, TC_CLSBPF_STATS, }; @@ -705,6 +703,7 @@ struct tc_cls_bpf_offload { enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; + struct bpf_prog *oldprog; const char *name; bool exts_integrated; u32 gen_flags; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6fe798c2df1a..8d78e7f4ecc3 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -42,7 +42,6 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; - bool offloaded; u32 gen_flags; struct tcf_exts exts; u32 handle; @@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) } static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, - enum tc_clsbpf_command cmd) + struct cls_bpf_prog *oldprog) { - bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; struct tcf_block *block = tp->chain->block; - bool skip_sw = tc_skip_sw(prog->gen_flags); struct tc_cls_bpf_offload cls_bpf = {}; + struct cls_bpf_prog *obj; + bool skip_sw; int err; + skip_sw = prog && tc_skip_sw(prog->gen_flags); + obj = prog ?: oldprog; + tc_cls_common_offload_init(&cls_bpf.common, tp); - cls_bpf.command = cmd; - cls_bpf.exts = &prog->exts; - cls_bpf.prog = prog->filter; - cls_bpf.name = prog->bpf_name; - cls_bpf.exts_integrated = prog->exts_integrated; - cls_bpf.gen_flags = prog->gen_flags; + cls_bpf.command = TC_CLSBPF_OFFLOAD; + cls_bpf.exts = &obj->exts; + cls_bpf.prog = prog ? prog->filter : NULL; + cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; + cls_bpf.name = obj->bpf_name; + cls_bpf.exts_integrated = obj->exts_integrated; + cls_bpf.gen_flags = obj->gen_flags; err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (addorrep) { + if (prog) { if (err < 0) { - cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + cls_bpf_offload_cmd(tp, oldprog, prog); return err; } else if (err > 0) { prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; } } - if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) + if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) return -EINVAL; return 0; @@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - struct cls_bpf_prog *obj = prog; - enum tc_clsbpf_command cmd; - bool skip_sw; - int ret; - - skip_sw = tc_skip_sw(prog->gen_flags) || - (oldprog && tc_skip_sw(oldprog->gen_flags)); - - if (oldprog && oldprog->offloaded) { - if (!tc_skip_hw(prog->gen_flags)) { - cmd = TC_CLSBPF_REPLACE; - } else if (!tc_skip_sw(prog->gen_flags)) { - obj = oldprog; - cmd = TC_CLSBPF_DESTROY; - } else { - return -EINVAL; - } - } else { - if (tc_skip_hw(prog->gen_flags)) - return skip_sw ? -EINVAL : 0; - cmd = TC_CLSBPF_ADD; - } - - ret = cls_bpf_offload_cmd(tp, obj, cmd); - if (ret) - return ret; + if (prog && oldprog && prog->gen_flags != oldprog->gen_flags) + return -EINVAL; - obj->offloaded = true; - if (oldprog) - oldprog->offloaded = false; + if (prog && tc_skip_hw(prog->gen_flags)) + prog = NULL; + if (oldprog && tc_skip_hw(oldprog->gen_flags)) + oldprog = NULL; + if (!prog && !oldprog) + return 0; - return 0; + return cls_bpf_offload_cmd(tp, prog, oldprog); } static void cls_bpf_stop_offload(struct tcf_proto *tp, @@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp, { int err; - if (!prog->offloaded) - return; - - err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); - if (err) { + err = cls_bpf_offload_cmd(tp, NULL, prog); + if (err) pr_err("Stopping hardware offload failed: %d\n", err); - return; - } - - prog->offloaded = false; } static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct cls_bpf_prog *prog) { - if (!prog->offloaded) - return; + struct tcf_block *block = tp->chain->block; + struct tc_cls_bpf_offload cls_bpf = {}; + + tc_cls_common_offload_init(&cls_bpf.common, tp); + cls_bpf.command = TC_CLSBPF_STATS; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + cls_bpf.gen_flags = prog->gen_flags; - cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); + tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); } static int cls_bpf_init(struct tcf_proto *tp) -- cgit v1.2.3 From d3f89b98e391475419ae2d8834813d3ecbb48f67 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Dec 2017 13:32:14 -0800 Subject: nfp: bpf: keep track of the offloaded program After TC offloads were converted to callbacks we have no choice but keep track of the offloaded filter in the driver. The check for nn->dp.bpf_offload_xdp was a stop gap solution to make sure failed TC offload won't disable XDP, it's no longer necessary. nfp_net_bpf_offload() will return -EBUSY on TC vs XDP conflicts. Fixes: 3f7889c4c79b ("net: sched: cls_bpf: call block callbacks for offload") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 47 ++++++++++++++++++++++++--- drivers/net/ethernet/netronome/nfp/bpf/main.h | 8 +++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index a4cf62ba4604..13190aa09faf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } +static int +nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + int err; + + nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL); + if (!nn->app_priv) + return -ENOMEM; + + err = nfp_app_nic_vnic_alloc(app, nn, id); + if (err) + goto err_free_priv; + + return 0; +err_free_priv: + kfree(nn->app_priv); + return err; +} + static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) { + struct nfp_bpf_vnic *bv = nn->app_priv; + if (nn->dp.bpf_offload_xdp) nfp_bpf_xdp_offload(app, nn, NULL); + WARN_ON(bv->tc_prog); + kfree(bv); } static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, @@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, { struct tc_cls_bpf_offload *cls_bpf = type_data; struct nfp_net *nn = cb_priv; + struct bpf_prog *oldprog; + struct nfp_bpf_vnic *bv; + int err; if (type != TC_SETUP_CLSBPF || !tc_can_offload(nn->dp.netdev) || @@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, cls_bpf->common.protocol != htons(ETH_P_ALL) || cls_bpf->common.chain_index) return -EOPNOTSUPP; - if (nn->dp.bpf_offload_xdp) - return -EBUSY; /* Only support TC direct action */ if (!cls_bpf->exts_integrated || @@ -113,7 +137,22 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, if (cls_bpf->command != TC_CLSBPF_OFFLOAD) return -EOPNOTSUPP; - return nfp_net_bpf_offload(nn, cls_bpf->prog, cls_bpf->oldprog); + bv = nn->app_priv; + oldprog = cls_bpf->oldprog; + + /* Don't remove if oldprog doesn't match driver's state */ + if (bv->tc_prog != oldprog) { + oldprog = NULL; + if (!cls_bpf->prog) + return 0; + } + + err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog); + if (err) + return err; + + bv->tc_prog = cls_bpf->prog; + return 0; } static int nfp_bpf_setup_tc_block(struct net_device *netdev, @@ -161,7 +200,7 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, - .vnic_alloc = nfp_app_nic_vnic_alloc, + .vnic_alloc = nfp_bpf_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 082a15f6dfb5..57b6043177a3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -172,6 +172,14 @@ struct nfp_prog { struct list_head insns; }; +/** + * struct nfp_bpf_vnic - per-vNIC BPF priv structure + * @tc_prog: currently loaded cls_bpf program + */ +struct nfp_bpf_vnic { + struct bpf_prog *tc_prog; +}; + int nfp_bpf_jit(struct nfp_prog *prog); extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; -- cgit v1.2.3 From 111be883981748acc9a56e855c8336404a8e787c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 11:10:17 -0700 Subject: block-throttle: avoid double charge If a bio is throttled and split after throttling, the bio could be resubmited and enters the throttling again. This will cause part of the bio to be charged multiple times. If the cgroup has an IO limit, the double charge will significantly harm the performance. The bio split becomes quite common after arbitrary bio size change. To fix this, we always set the BIO_THROTTLED flag if a bio is throttled. If the bio is cloned/split, we copy the flag to new bio too to avoid a double charge. However, cloned bio could be directed to a new disk, keeping the flag be a problem. The observation is we always set new disk for the bio in this case, so we can clear the flag in bio_set_dev(). This issue exists for a long time, arbitrary bio size change just makes it worse, so this should go into stable at least since v4.2. V1-> V2: Not add extra field in bio based on discussion with Tejun Cc: Vivek Goyal Cc: stable@vger.kernel.org Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/bio.c | 2 ++ block/blk-throttle.c | 8 +------- include/linux/bio.h | 2 ++ include/linux/blk_types.h | 9 ++++----- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/block/bio.c b/block/bio.c index 8bfdea58159b..9ef6cf3addb3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_disk = bio_src->bi_disk; bio->bi_partno = bio_src->bi_partno; bio_set_flag(bio, BIO_CLONED); + if (bio_flagged(bio_src, BIO_THROTTLED)) + bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e6..d19f416d6101 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2226,13 +2226,7 @@ again: out_unlock: spin_unlock_irq(q->queue_lock); out: - /* - * As multiple blk-throtls may stack in the same issue path, we - * don't want bios to leave with the flag set. Clear the flag if - * being issued. - */ - if (!throttled) - bio_clear_flag(bio, BIO_THROTTLED); + bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) diff --git a/include/linux/bio.h b/include/linux/bio.h index 82f0c8fd7be8..23d29b39f71e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx); #define bio_set_dev(bio, bdev) \ do { \ + if ((bio)->bi_disk != (bdev)->bd_disk) \ + bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ } while (0) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1e628e032da..9e7d8bd776d2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -50,8 +50,6 @@ struct blk_issue_stat { struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; - u8 bi_partno; - blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. @@ -59,8 +57,8 @@ struct bio { unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; - - struct bvec_iter bi_iter; + blk_status_t bi_status; + u8 bi_partno; /* Number of segments in this BIO after * physical address coalescing is performed. @@ -74,8 +72,9 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t __bi_remaining; + struct bvec_iter bi_iter; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3 From b3cf8528bb21febb650a7ecbf080d0647be40b9f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 12 Dec 2017 15:08:21 -0500 Subject: xen/balloon: Mark unallocated host memory as UNUSABLE Commit f5775e0b6116 ("x86/xen: discard RAM regions above the maximum reservation") left host memory not assigned to dom0 as available for memory hotplug. Unfortunately this also meant that those regions could be used by others. Specifically, commit fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") may try to map those addresses as MMIO. To prevent this mark unallocated host memory as E820_TYPE_UNUSABLE (thus effectively reverting f5775e0b6116) and keep track of that region as a hostmem resource that can be used for the hotplug. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/enlighten.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/setup.c | 6 ++-- drivers/xen/balloon.c | 65 ++++++++++++++++++++++++++++++++------ include/xen/balloon.h | 5 +++ 4 files changed, 144 insertions(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d669e9d89001..c9081c6671f0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,8 +1,12 @@ +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include +#endif #include #include #include #include +#include #include #include @@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +void __init arch_xen_balloon_init(struct resource *hostmem_resource) +{ + struct xen_memory_map memmap; + int rc; + unsigned int i, last_guest_ram; + phys_addr_t max_addr = PFN_PHYS(max_pfn); + struct e820_table *xen_e820_table; + const struct e820_entry *entry; + struct resource *res; + + if (!xen_initial_domain()) + return; + + xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); + if (!xen_e820_table) + return; + + memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); + set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); + rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); + if (rc) { + pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); + goto out; + } + + last_guest_ram = 0; + for (i = 0; i < memmap.nr_entries; i++) { + if (xen_e820_table->entries[i].addr >= max_addr) + break; + if (xen_e820_table->entries[i].type == E820_TYPE_RAM) + last_guest_ram = i; + } + + entry = &xen_e820_table->entries[last_guest_ram]; + if (max_addr >= entry->addr + entry->size) + goto out; /* No unallocated host RAM. */ + + hostmem_resource->start = max_addr; + hostmem_resource->end = entry->addr + entry->size; + + /* + * Mark non-RAM regions between the end of dom0 RAM and end of host RAM + * as unavailable. The rest of that region can be used for hotplug-based + * ballooning. + */ + for (; i < memmap.nr_entries; i++) { + entry = &xen_e820_table->entries[i]; + + if (entry->type == E820_TYPE_RAM) + continue; + + if (entry->addr >= hostmem_resource->end) + break; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto out; + + res->name = "Unavailable host RAM"; + res->start = entry->addr; + res->end = (entry->addr + entry->size < hostmem_resource->end) ? + entry->addr + entry->size : hostmem_resource->end; + rc = insert_resource(hostmem_resource, res); + if (rc) { + pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", + __func__, res->start, res->end, rc); + kfree(res); + goto out; + } + } + + out: + kfree(xen_e820_table); +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ac55c02f98e9..e9011e1ee3de 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -807,7 +807,6 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { - bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,11 +822,10 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - discard = true; + type = E820_TYPE_UNUSABLE; } - if (!discard) - xen_align_and_add_e820_region(addr, chunk_size, type); + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index f77e499afddd..065f0b607373 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } +/* + * Host memory not allocated to dom0. We can use this range for hotplug-based + * ballooning. + * + * It's a type-less resource. Setting IORESOURCE_MEM will make resource + * management algorithms (arch_remove_reservations()) look into guest e820, + * which we don't want. + */ +static struct resource hostmem_resource = { + .name = "Host RAM", +}; + +void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) +{} + static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res; - int ret; + struct resource *res, *res_hostmem; + int ret = -ENOMEM; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; + res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); + if (res_hostmem) { + /* Try to grab a range from hostmem */ + res_hostmem->name = "Host memory"; + ret = allocate_resource(&hostmem_resource, res_hostmem, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + } + + if (!ret) { + /* + * Insert this resource into iomem. Because hostmem_resource + * tracks portion of guest e820 marked as UNUSABLE noone else + * should try to use it. + */ + res->start = res_hostmem->start; + res->end = res_hostmem->end; + ret = insert_resource(&iomem_resource, res); + if (ret < 0) { + pr_err("Can't insert iomem_resource [%llx - %llx]\n", + res->start, res->end); + release_memory_resource(res_hostmem); + res_hostmem = NULL; + res->start = res->end = 0; + } + } + + if (ret) { + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } } #ifdef CONFIG_SPARSEMEM @@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); + release_memory_resource(res_hostmem); return NULL; } } @@ -765,6 +810,8 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); + + arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 8906361bb50c..d0adfc78dcbd 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -43,3 +43,8 @@ static inline void xen_balloon_init(void) { } #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +struct resource; +void arch_xen_balloon_init(struct resource *hostmem_resource); +#endif -- cgit v1.2.3 From 1c8e77fb361a4a116a41ac1d9819eb79d068735d Mon Sep 17 00:00:00 2001 From: Naresh Kamboju Date: Wed, 20 Dec 2017 12:50:22 +0530 Subject: selftests: net: Adding config fragment CONFIG_NUMA=y kernel config fragement CONFIG_NUMA=y is need for reuseport_bpf_numa. Signed-off-by: Naresh Kamboju Signed-off-by: David S. Miller --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index e57b4ac40e72..7177bea1fdfa 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,3 +1,4 @@ CONFIG_USER_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m +CONFIG_NUMA=y -- cgit v1.2.3 From bb25c3855a12cc58e33cd7ee9b69943790fe35f7 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 20 Dec 2017 11:03:15 +0100 Subject: tipc: remove joining group member from congested list When we receive a JOIN message from a peer member, the message may contain an advertised window value ADV_IDLE that permits removing the member in question from the tipc_group::congested list. However, since the removal has been made conditional on that the advertised window is *not* ADV_IDLE, we miss this case. This has the effect that a sender sometimes may enter a state of permanent, false, broadcast congestion. We fix this by unconditinally removing the member from the congested list before calling tipc_member_update(), which might potentially sort it into the list again. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index bbc004eaa31a..7ebbdeb2a90e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -689,10 +689,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); __skb_queue_tail(inputq, m->event_msg); } - if (m->window < ADV_IDLE) - tipc_group_update_member(m, 0); - else - list_del_init(&m->congested); + list_del_init(&m->congested); + tipc_group_update_member(m, 0); return; case GRP_LEAVE_MSG: if (!m) -- cgit v1.2.3 From ad3cbf61332914711e5f506972b1dc9af8d62146 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 20 Dec 2017 18:07:18 +0100 Subject: s390/qeth: fix error handling in checksum cmd callback Make sure to check both return code fields before processing the response. Otherwise we risk operating on invalid data. Fixes: c9475369bd2b ("s390/qeth: rework RX/TX checksum offload") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6c815207f4f5..3614df68830f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5386,6 +5386,13 @@ out: } EXPORT_SYMBOL_GPL(qeth_poll); +static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd) +{ + if (!cmd->hdr.return_code) + cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code; + return cmd->hdr.return_code; +} + int qeth_setassparms_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { @@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card, (struct qeth_checksum_cmd *)reply->param; QETH_CARD_TEXT(card, 4, "chkdoccb"); - if (cmd->hdr.return_code) + if (qeth_setassparms_inspect_rc(cmd)) return 0; memset(chksum_cb, 0, sizeof(*chksum_cb)); -- cgit v1.2.3 From b4681c2829e24943aadd1a7bb3a30d41d0a20050 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 20 Dec 2017 19:34:19 +0200 Subject: ipv4: Fix use-after-free when flushing FIB tables Since commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") the local table uses the same trie allocated for the main table when custom rules are not in use. When a net namespace is dismantled, the main table is flushed and freed (via an RCU callback) before the local table. In case the callback is invoked before the local table is iterated, a use-after-free can occur. Fix this by iterating over the FIB tables in reverse order, so that the main table is always freed after the local table. v3: Reworded comment according to Alex's suggestion. v2: Add a comment to make the fix more explicit per Dave's and Alex's feedback. Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") Signed-off-by: Ido Schimmel Reported-by: Fengguang Wu Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f52d27a422c3..08259d078b1c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1298,14 +1298,19 @@ err_table_hash_alloc: static void ip_fib_net_exit(struct net *net) { - unsigned int i; + int i; rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif - for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + /* Destroy the tables in reverse order to guarantee that the + * local table, ID 255, is destroyed before the main table, ID + * 254. This is necessary as the local table may contain + * references to data contained in the main table. + */ + for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; struct fib_table *tb; -- cgit v1.2.3 From 4ccafe032005e9b96acbef2e389a4de5b1254add Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2017 13:13:58 -0700 Subject: block: unalign call_single_data in struct request A previous change blindly added massive alignment to the call_single_data structure in struct request. This ballooned it in size from 296 to 320 bytes on my setup, for no valid reason at all. Use the unaligned struct __call_single_data variant instead. Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data") Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 100d0df38026..0ce8a372d506 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - call_single_data_t csd; + struct __call_single_data csd; u64 fifo_time; }; -- cgit v1.2.3 From 0864fe09ab90ab32b7d21fe3cd72df5b5af8492e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2017 13:14:42 -0700 Subject: null_blk: unalign call_single_data Commit 966a967116e6 randomly added alignment to this structure, but it's actually detrimental to performance of null_blk. Test case: Running on both the home and remote node shows a ~5% degradation in performance. While in there, move blk_status_t to the hole after the integer tag in the nullb_cmd structure. After this patch, we shrink the size from 192 to 152 bytes. Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data") Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ccb9975a97fa..ad0477ae820f 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps) struct nullb_cmd { struct list_head list; struct llist_node ll_list; - call_single_data_t csd; + struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; + blk_status_t error; struct nullb_queue *nq; struct hrtimer timer; - blk_status_t error; }; struct nullb_queue { -- cgit v1.2.3 From d0729bc6bee797fb4bcca87583af5adbfe79ecfb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 11 Dec 2017 21:50:25 +0900 Subject: arc: do not use __print_symbol() __print_symbol() uses extra stack space to sprintf() symbol information and then to feed that buffer to printk() char buffer[KSYM_SYMBOL_LEN]; sprint_symbol(buffer, address); printk(fmt, buffer); Replace __print_symbol() with a direct printk("%pS") call. Signed-off-by: Sergey Senozhatsky Signed-off-by: Vineet Gupta --- arch/arc/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 74315f302971..bf40e06f3fb8 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, */ static int __print_sym(unsigned int address, void *unused) { - __print_symbol(" %s\n", address); + printk(" %pS\n", (void *)address); return 0; } -- cgit v1.2.3 From c18fc9071762769acb4040cabae45c817aefc537 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 5 Dec 2017 13:19:38 +0300 Subject: ARC: [plat-hsdk] Switch DisplayLink driver from fbdev to DRM Currently there're 2 different implementations of the driver for DisplayLink USB2.0-to-HDMI/DVI adapters: older FBDEV and modern true DRM. We initially decided to use FBDEV version just because with it /dev/fbX is usable from user-space while in DRM version with DRM_FBDEV_EMULATION user-space cannot draw anything on a real screen, for more info read [1]. But today /dev/fbX is not that important as more and more software projects switch to use of DRI (/dev/dri/cardX). But what's even more important DRM driver allows building of complicated graphics processing chains. The most important for us is rendering of 3D on a dedicated GPU while outputting video through a simpler bitstreamer like DisplayLink. So let's use much more future-proof driver from now on. [1] https://lists.freedesktop.org/archives/dri-devel/2017-December/159519.html Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/configs/hsdk_defconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24..ac6b0ed8341e 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_UDL=y CONFIG_FB=y -CONFIG_FB_UDL=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y -- cgit v1.2.3 From a08c832f277d7a6f9d3b341a5d5df2f5576220d8 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Sat, 9 Dec 2017 16:59:15 +0300 Subject: ARC: [plat-hsdk]: Set initial core pll output frequency Set initial core pll output frequency specified in device tree to 1GHz. It will be applied at the core pll driver probing. Acked-by: Stephen Boyd Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8f627c200d60..006aa3de5348 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -114,6 +114,14 @@ reg = <0x00 0x10>, <0x14B8 0x4>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 1GHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <1000000000>; }; serial: serial@5000 { -- cgit v1.2.3 From 7bde846d0957fb81ac0bf8c4e2cab284a1da34e0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Sat, 9 Dec 2017 16:59:16 +0300 Subject: ARC: [plat-hsdk]: Get rid of core pll frequency set in platform code Get rid of core pll frequency set in platform code as we set it via device tree using 'assigned-clock-rates' property. Acked-by: Stephen Boyd Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/platform.c | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index fd0ae5e38639..2958aedb649a 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) -#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) -#define CREG_CORE_IF_CLK_DIV_2 0x1 -#define CGU_BASE ARC_PERIPHERAL_BASE -#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) -#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) -#define CGU_PLL_STATUS_LOCK BIT(0) -#define CGU_PLL_STATUS_ERR BIT(1) -#define CGU_PLL_CTRL_1GHZ 0x3A10 -#define HSDK_PLL_LOCK_TIMEOUT 500 - -#define HSDK_PLL_LOCKED() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) - -#define HSDK_PLL_ERR() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) - -static void __init hsdk_set_cpu_freq_1ghz(void) -{ - u32 timeout = HSDK_PLL_LOCK_TIMEOUT; - - /* - * As we set cpu clock which exceeds 500MHz, the divider for the interface - * clock must be programmed to div-by-2. - */ - iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); - - /* Set cpu clock to 1GHz */ - iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); - - while (!HSDK_PLL_LOCKED() && timeout--) - cpu_relax(); - - if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) - pr_err("Failed to setup CPU frequency to 1GHz!"); -} - #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) @@ -98,12 +62,6 @@ static void __init hsdk_init_early(void) * minimum possible div-by-2. */ iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); - - /* - * Setup CPU frequency to 1GHz. - * TODO: remove it after smart hsdk pll driver will be introduced. - */ - hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { -- cgit v1.2.3 From fbd1cec57064aa1380726ec899c49fcd84e702b9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Sat, 9 Dec 2017 16:59:17 +0300 Subject: ARC: [plat-axs103]: Set initial core pll output frequency Set initial core pll output frequency specified in device tree to 100MHz for SMP configuration and 90MHz for UP configuration. It will be applied at the core pll driver probing. Update platform quirk for decreasing core frequency for quad core configuration. Acked-by: Stephen Boyd Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/axc003.dtsi | 8 ++++++++ arch/arc/boot/dts/axc003_idu.dtsi | 8 ++++++++ arch/arc/plat-axs10x/axs10x.c | 8 ++------ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 4e6e9f57e790..dc91c663bcc0 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 90MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <90000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 63954a8b0100..69ff4895f2ba 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 100MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <100000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac6790da5f..ac1a712f6f1f 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -320,22 +320,18 @@ static void __init axs103_early_init(void) unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) { u32 freq = 50, orig; - /* - * TODO: use cpu node "cpu-freq" param instead of platform-specific - * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu. - */ int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); const struct fdt_property *prop; prop = fdt_get_property(initial_boot_params, off, - "clock-frequency", NULL); + "assigned-clock-rates", NULL); orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; /* Patching .dtb in-place with new core clock value */ if (freq != orig ) { freq = cpu_to_be32(freq * 1000000); fdt_setprop_inplace(initial_boot_params, off, - "clock-frequency", &freq, sizeof(freq)); + "assigned-clock-rates", &freq, sizeof(freq)); } } #endif -- cgit v1.2.3 From d7de73b586b2db540187ff8a077330fa1a8efd64 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Sat, 9 Dec 2017 16:59:18 +0300 Subject: ARC: [plat-axs103] refactor the quad core DT quirk code Refactor the quad core DT quirk code: get rid of waste division and multiplication by 1000000 constant. Acked-by: Stephen Boyd Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/plat-axs10x/axs10x.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index ac1a712f6f1f..46544e88492d 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -317,19 +317,21 @@ static void __init axs103_early_init(void) * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack * of fudging the freq in DT */ +#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000 + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) { - u32 freq = 50, orig; + u32 freq; int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); const struct fdt_property *prop; prop = fdt_get_property(initial_boot_params, off, "assigned-clock-rates", NULL); - orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; + freq = be32_to_cpu(*(u32 *)(prop->data)); /* Patching .dtb in-place with new core clock value */ - if (freq != orig ) { - freq = cpu_to_be32(freq * 1000000); + if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) { + freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ); fdt_setprop_inplace(initial_boot_params, off, "assigned-clock-rates", &freq, sizeof(freq)); } -- cgit v1.2.3 From 79435ac78d160e4c245544d457850a56f805ac0d Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Dec 2017 08:26:58 -0800 Subject: ARC: uaccess: dont use "l" gcc inline asm constraint modifier This used to setup the LP_COUNT register automatically, but now has been removed. There was an earlier fix 3c7c7a2fc8811 which fixed instance in delay.h but somehow missed this one as gcc change had not made its way into production toolchains and was not pedantic as it is now ! Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/uaccess.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index f35974ee7264..c9173c02081c 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } -- cgit v1.2.3 From 24c0df82ef7919e4d10cf2e4e65d368eb2e8ea21 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Dec 2017 12:01:21 +0100 Subject: netfilter: nf_tables: fix chain filter in nf_tables_dump_rules() ctx->chain may be null now that we have very large object names, so we cannot check for ctx->chain[0] here. Fixes: b7263e071aba7 ("netfilter: nf_tables: Allow table names of up to 255 chars") Signed-off-by: Pablo Neira Ayuso Acked-by: Phil Sutter --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 10798b357481..8d4526651661 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(chain, &table->chains, list) { - if (ctx && ctx->chain[0] && + if (ctx && ctx->chain && strcmp(ctx->chain, chain->name) != 0) continue; -- cgit v1.2.3 From f5a16b93e6291ba1f65f55647cb4cd8d75ed1b35 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 20 Dec 2017 12:37:54 -0800 Subject: ARC: handle gcc generated __builtin_trap() gcc toggle -fisolate-erroneous-paths-dereference (default at -O2 onwards) isolates faulty code paths such as null pointer access, divide by zero etc by emitting __builtin_trap() Newer ARC gcc generates TRAP_S 5 instruction which needs to be handled and treated like any other unexpected exception - user mode : task terminated with a SEGV - kernel mode: die() called after register and stack dump Signed-off-by: Vineet Gupta --- arch/arc/kernel/traps.c | 6 ++++++ arch/arc/kernel/troubleshoot.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index bcd7c9fc5d0f..004f4e4a4c10 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC) DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) +DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0) /* * Entry Point for Misaligned Data access Exception, for emulating in software @@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) * Thus TRAP_S can be used for specific purpose * -1 used for software breakpointing (gdb) * -2 used by kprobes + * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as + * -fno-isolate-erroneous-paths-dereference */ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { @@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) kgdb_trap(regs); break; + case 5: + do_trap5_error(address, regs); + break; default: break; } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7d8c1d6c2f60..6e9a0a9a6a04 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs) else pr_cont("Bus Error, check PRM\n"); #endif + } else if (vec == ECR_V_TRAP) { + if (regs->ecr_param == 5) + pr_cont("gcc generated __builtin_trap\n"); } else { pr_cont("Check Programmer's Manual\n"); } -- cgit v1.2.3 From 91aae6be4139b9e3902656d819e6af66e051bd7a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:42:22 -0800 Subject: xfs: track cowblocks separately in i_flags The EOFBLOCKS/COWBLOCKS tags are totally separate things, so track them with separate i_flags. Right now we're abusing IEOFBLOCKS for both, which is totally bogus because we won't tag the inode with COWBLOCKS if IEOFBLOCKS was set by a previous tagging of the inode with EOFBLOCKS. Found by wiring up clonerange to fsstress in xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_icache.c | 33 ++++++++++++++++++++++++--------- fs/xfs/xfs_inode.h | 1 + 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..58d2d4253c8e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks( return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); } +static inline unsigned long +xfs_iflag_for_tag( + int tag) +{ + switch (tag) { + case XFS_ICI_EOFBLOCKS_TAG: + return XFS_IEOFBLOCKS; + case XFS_ICI_COWBLOCKS_TAG: + return XFS_ICOWBLOCKS; + default: + ASSERT(0); + return 0; + } +} + static void -__xfs_inode_set_eofblocks_tag( +__xfs_inode_set_blocks_tag( xfs_inode_t *ip, void (*execute)(struct xfs_mount *mp), void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag( * Don't bother locking the AG and looking up in the radix trees * if we already know that we have the tag set. */ - if (ip->i_flags & XFS_IEOFBLOCKS) + if (ip->i_flags & xfs_iflag_for_tag(tag)) return; spin_lock(&ip->i_flags_lock); - ip->i_flags |= XFS_IEOFBLOCKS; + ip->i_flags |= xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_eofblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks, trace_xfs_perag_set_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } static void -__xfs_inode_clear_eofblocks_tag( +__xfs_inode_clear_blocks_tag( xfs_inode_t *ip, void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, int error, unsigned long caller_ip), @@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag( struct xfs_perag *pag; spin_lock(&ip->i_flags_lock); - ip->i_flags &= ~XFS_IEOFBLOCKS; + ip->i_flags &= ~xfs_iflag_for_tag(tag); spin_unlock(&ip->i_flags_lock); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_eofblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); } @@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_set_cowblocks_tag(ip); - return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, + return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks, trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { trace_xfs_inode_clear_cowblocks_tag(ip); - return __xfs_inode_clear_eofblocks_tag(ip, + return __xfs_inode_clear_blocks_tag(ip, trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b2136af9289f..d383e392ec9d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) * log recovery to replay a bmap operation on the inode. */ #define XFS_IRECOVERY (1 << 11) +#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* * Per-lifetime flags need to be reset when re-using a reclaimable inode during -- cgit v1.2.3 From 4374f256ce8182019353c0c639bb8d0695b4c941 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 18 Dec 2017 20:11:53 -0800 Subject: bpf/verifier: fix bounds calculation on BPF_RSH Incorrect signed bounds were being computed. If the old upper signed bound was positive and the old lower signed bound was negative, this could cause the new upper signed bound to be too low, leading to security issues. Fixes: b03c9f9fdc37 ("bpf/verifier: track signed and unsigned min/max values") Reported-by: Jann Horn Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov [jannh@google.com: changed description to reflect bug impact] Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e39b01317b6f..625e358ca765 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2190,20 +2190,22 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, mark_reg_unknown(env, regs, insn->dst_reg); break; } - /* BPF_RSH is an unsigned shift, so make the appropriate casts */ - if (dst_reg->smin_value < 0) { - if (umin_val) { - /* Sign bit will be cleared */ - dst_reg->smin_value = 0; - } else { - /* Lost sign bit information */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } - } else { - dst_reg->smin_value = - (u64)(dst_reg->smin_value) >> umax_val; - } + /* BPF_RSH is an unsigned shift. If the value in dst_reg might + * be negative, then either: + * 1) src_reg might be zero, so the sign bit of the result is + * unknown, so we lose our signed bounds + * 2) it's known negative, thus the unsigned bounds capture the + * signed bounds + * 3) the signed bounds cross zero, so they tell us nothing + * about the result + * If the value in dst_reg is known nonnegative, then again the + * unsigned bounts capture the signed bounds. + * Thus, in all cases it suffices to blow away our signed bounds + * and rely on inferring new ones from the unsigned bounds and + * var_off of the result. + */ + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; if (src_known) dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); -- cgit v1.2.3 From 95a762e2c8c942780948091f8f2a4f32fce1ac6f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:54 -0800 Subject: bpf: fix incorrect sign extension in check_alu_op() Distinguish between BPF_ALU64|BPF_MOV|BPF_K (load 32-bit immediate, sign-extended to 64-bit) and BPF_ALU|BPF_MOV|BPF_K (load 32-bit immediate, zero-padded to 64-bit); only perform sign extension in the first case. Starting with v4.14, this is exploitable by unprivileged users as long as the unprivileged_bpf_disabled sysctl isn't set. Debian assigned CVE-2017-16995 for this issue. v3: - add CVE number (Ben Hutchings) Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Jann Horn Acked-by: Edward Cree Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 625e358ca765..c086010ae51e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2408,7 +2408,13 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * remember the value we stored into this reg */ regs[insn->dst_reg].type = SCALAR_VALUE; - __mark_reg_known(regs + insn->dst_reg, insn->imm); + if (BPF_CLASS(insn->code) == BPF_ALU64) { + __mark_reg_known(regs + insn->dst_reg, + insn->imm); + } else { + __mark_reg_known(regs + insn->dst_reg, + (u32)insn->imm); + } } } else if (opcode > BPF_END) { -- cgit v1.2.3 From 0c17d1d2c61936401f4702e1846e2c19b200f958 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:55 -0800 Subject: bpf: fix incorrect tracking of register size truncation Properly handle register truncation to a smaller size. The old code first mirrors the clearing of the high 32 bits in the bitwise tristate representation, which is correct. But then, it computes the new arithmetic bounds as the intersection between the old arithmetic bounds and the bounds resulting from the bitwise tristate representation. Therefore, when coerce_reg_to_32() is called on a number with bounds [0xffff'fff8, 0x1'0000'0007], the verifier computes [0xffff'fff8, 0xffff'ffff] as bounds of the truncated number. This is incorrect: The truncated number could also be in the range [0, 7], and no meaningful arithmetic bounds can be computed in that case apart from the obvious [0, 0xffff'ffff]. Starting with v4.14, this is exploitable by unprivileged users as long as the unprivileged_bpf_disabled sysctl isn't set. Debian assigned CVE-2017-16996 for this issue. v2: - flip the mask during arithmetic bounds calculation (Ben Hutchings) v3: - add CVE number (Ben Hutchings) Fixes: b03c9f9fdc37 ("bpf/verifier: track signed and unsigned min/max values") Signed-off-by: Jann Horn Acked-by: Edward Cree Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c086010ae51e..f716bdf29dd0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1067,6 +1067,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +/* truncate register to smaller size (in bytes) + * must be called with size < BPF_REG_SIZE + */ +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) +{ + u64 mask; + + /* clear high bits in bit representation */ + reg->var_off = tnum_cast(reg->var_off, size); + + /* fix arithmetic bounds */ + mask = ((u64)1 << (size * 8)) - 1; + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { + reg->umin_value &= mask; + reg->umax_value &= mask; + } else { + reg->umin_value = 0; + reg->umax_value = mask; + } + reg->smin_value = reg->umin_value; + reg->smax_value = reg->umax_value; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -1200,9 +1223,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && regs[value_regno].type == SCALAR_VALUE) { /* b/h/w load zero-extends, mark upper bits as known 0 */ - regs[value_regno].var_off = - tnum_cast(regs[value_regno].var_off, size); - __update_reg_bounds(®s[value_regno]); + coerce_reg_to_size(®s[value_regno], size); } return err; } @@ -1772,14 +1793,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return 0; } -static void coerce_reg_to_32(struct bpf_reg_state *reg) -{ - /* clear high 32 bits */ - reg->var_off = tnum_cast(reg->var_off, 4); - /* Update bounds */ - __update_reg_bounds(reg); -} - static bool signed_add_overflows(s64 a, s64 b) { /* Do the add in u64, where overflow is well-defined */ @@ -2017,8 +2030,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops are (32,32)->64 */ - coerce_reg_to_32(dst_reg); - coerce_reg_to_32(&src_reg); + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); } smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; @@ -2398,10 +2411,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } mark_reg_unknown(env, regs, insn->dst_reg); - /* high 32 bits are known zero. */ - regs[insn->dst_reg].var_off = tnum_cast( - regs[insn->dst_reg].var_off, 4); - __update_reg_bounds(®s[insn->dst_reg]); + coerce_reg_to_size(®s[insn->dst_reg], 4); } } else { /* case: R = imm -- cgit v1.2.3 From 468f6eafa6c44cb2c5d8aad35e12f06c240a812a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:56 -0800 Subject: bpf: fix 32-bit ALU op verification 32-bit ALU ops operate on 32-bit values and have 32-bit outputs. Adjust the verifier accordingly. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f716bdf29dd0..ecdc265244ca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2017,6 +2017,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return 0; } +/* WARNING: This function does calculations on 64-bit values, but the actual + * execution may occur on 32-bit values. Therefore, things like bitshifts + * need extra checks in the 32-bit case. + */ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *dst_reg, @@ -2027,12 +2031,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, bool src_known, dst_known; s64 smin_val, smax_val; u64 umin_val, umax_val; + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; - if (BPF_CLASS(insn->code) != BPF_ALU64) { - /* 32-bit ALU ops are (32,32)->64 */ - coerce_reg_to_size(dst_reg, 4); - coerce_reg_to_size(&src_reg, 4); - } smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; @@ -2168,9 +2168,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); break; case BPF_LSH: - if (umax_val > 63) { - /* Shifts greater than 63 are undefined. This includes - * shifts by a negative number. + if (umax_val >= insn_bitness) { + /* Shifts greater than 31 or 63 are undefined. + * This includes shifts by a negative number. */ mark_reg_unknown(env, regs, insn->dst_reg); break; @@ -2196,9 +2196,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); break; case BPF_RSH: - if (umax_val > 63) { - /* Shifts greater than 63 are undefined. This includes - * shifts by a negative number. + if (umax_val >= insn_bitness) { + /* Shifts greater than 31 or 63 are undefined. + * This includes shifts by a negative number. */ mark_reg_unknown(env, regs, insn->dst_reg); break; @@ -2234,6 +2234,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, break; } + if (BPF_CLASS(insn->code) != BPF_ALU64) { + /* 32-bit ALU ops are (32,32)->32 */ + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); + } + __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); return 0; -- cgit v1.2.3 From ea25f914dc164c8d56b36147ecc86bc65f83c469 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:57 -0800 Subject: bpf: fix missing error return in check_stack_boundary() Prevent indirect stack accesses at non-constant addresses, which would permit reading and corrupting spilled pointers. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ecdc265244ca..77e4b5223867 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1303,6 +1303,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); verbose(env, "invalid variable stack read R%d var_off=%s\n", regno, tn_buf); + return -EACCES; } off = regs[regno].off + regs[regno].var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || -- cgit v1.2.3 From a5ec6ae161d72f01411169a938fa5f8baea16e8f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:58 -0800 Subject: bpf: force strict alignment checks for stack pointers Force strict alignment checks for stack pointers because the tracking of stack spills relies on it; unaligned stack accesses can lead to corruption of spilled registers, which is exploitable. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 77e4b5223867..102c519836f6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, break; case PTR_TO_STACK: pointer_desc = "stack "; + /* The stack spill tracking logic in check_stack_write() + * and check_stack_read() relies on stack accesses being + * aligned. + */ + strict = true; break; default: break; -- cgit v1.2.3 From 179d1c5602997fef5a940c6ddcf31212cbfebd14 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:11:59 -0800 Subject: bpf: don't prune branches when a scalar is replaced with a pointer This could be made safe by passing through a reference to env and checking for env->allow_ptr_leaks, but it would only work one way and is probably not worth the hassle - not doing it will not directly lead to program rejection. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 102c519836f6..982bd9ec721a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3467,15 +3467,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); } else { - /* if we knew anything about the old value, we're not - * equal, because we can't know anything about the - * scalar value of the pointer in the new value. + /* We're trying to use a pointer in place of a scalar. + * Even if the scalar was unbounded, this could lead to + * pointer leaks because scalars are allowed to leak + * while pointers are not. We could make this safe in + * special cases if root is calling us, but it's + * probably not worth the hassle. */ - return rold->umin_value == 0 && - rold->umax_value == U64_MAX && - rold->smin_value == S64_MIN && - rold->smax_value == S64_MAX && - tnum_is_unknown(rold->var_off); + return false; } case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and -- cgit v1.2.3 From bb7f0f989ca7de1153bd128a40a71709e339fa03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Dec 2017 20:12:00 -0800 Subject: bpf: fix integer overflows There were various issues related to the limited size of integers used in the verifier: - `off + size` overflow in __check_map_access() - `off + reg->off` overflow in check_mem_access() - `off + reg->var_off.value` overflow or 32-bit truncation of `reg->var_off.value` in check_mem_access() - 32-bit truncation in check_stack_boundary() Make sure that any integer math cannot overflow by not allowing pointer math with large values. Also reduce the scope of "scalar op scalar" tracking. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 4 ++-- kernel/bpf/verifier.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c561b986bab0..1632bb13ad8a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -15,11 +15,11 @@ * In practice this is far bigger than any realistic pointer offset; this limit * ensures that umax_value + (int)off + (int)size cannot overflow a u64. */ -#define BPF_MAX_VAR_OFF (1ULL << 31) +#define BPF_MAX_VAR_OFF (1 << 29) /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures * that converting umax_value to int cannot overflow. */ -#define BPF_MAX_VAR_SIZ INT_MAX +#define BPF_MAX_VAR_SIZ (1 << 29) /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 982bd9ec721a..86dfe6b5c243 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1819,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b) return res > a; } +static bool check_reg_sane_offset(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + enum bpf_reg_type type) +{ + bool known = tnum_is_const(reg->var_off); + s64 val = reg->var_off.value; + s64 smin = reg->smin_value; + + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { + verbose(env, "math between %s pointer and %lld is not allowed\n", + reg_type_str[type], val); + return false; + } + + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { + verbose(env, "%s pointer offset %d is not allowed\n", + reg_type_str[type], reg->off); + return false; + } + + if (smin == S64_MIN) { + verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", + reg_type_str[type]); + return false; + } + + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { + verbose(env, "value %lld makes %s pointer be out of bounds\n", + smin, reg_type_str[type]); + return false; + } + + return true; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -1887,6 +1922,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg->type = ptr_reg->type; dst_reg->id = ptr_reg->id; + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) + return -EINVAL; + switch (opcode) { case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow @@ -2017,6 +2056,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, return -EACCES; } + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) + return -EINVAL; + __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); @@ -2046,6 +2088,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, src_known = tnum_is_const(src_reg.var_off); dst_known = tnum_is_const(dst_reg->var_off); + if (!src_known && + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { + __mark_reg_unknown(dst_reg); + return 0; + } + switch (opcode) { case BPF_ADD: if (signed_add_overflows(dst_reg->smin_value, smin_val) || -- cgit v1.2.3 From 2255f8d520b0a318fc6d387d0940854b2f522a7f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 18 Dec 2017 20:12:01 -0800 Subject: selftests/bpf: add tests for recent bugfixes These tests should cover the following cases: - MOV with both zero-extended and sign-extended immediates - implicit truncation of register contents via ALU32/MOV32 - implicit 32-bit truncation of ALU32 output - oversized register source operand for ALU32 shift - right-shift of a number that could be positive or negative - map access where adding the operation size to the offset causes signed 32-bit overflow - direct stack access at a ~4GiB offset Also remove the F_LOAD_WITH_STRICT_ALIGNMENT flag from a bunch of tests that should fail independent of what flags userspace passes. Signed-off-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_verifier.c | 549 +++++++++++++++++++++++++++- 1 file changed, 533 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b03ecfd7185b..961c1426fbf2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -606,7 +606,6 @@ static struct bpf_test tests[] = { }, .errstr = "misaligned stack access", .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "invalid map_fd for function call", @@ -1797,7 +1796,6 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - bad alignment on reg", @@ -1810,7 +1808,6 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - out of bounds low", @@ -6324,7 +6321,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6348,7 +6345,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6374,7 +6371,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R8 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6399,7 +6396,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R8 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6447,7 +6444,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6518,7 +6515,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6569,7 +6566,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6596,7 +6593,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6622,7 +6619,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6651,7 +6648,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6681,7 +6678,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -6709,8 +6706,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer comparison prohibited", - .errstr = "R0 min value is negative", + .errstr = "unbounded min value", .result = REJECT, .result_unpriv = REJECT, }, @@ -6765,6 +6761,462 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, }, + { + "bounds check based on zero-extended MOV", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0x0000'0000'ffff'ffff */ + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check based on sign-extended MOV. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* r0 = */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 4294967295", + .result = REJECT + }, + { + "bounds check based on sign-extended MOV. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xfff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), + /* r0 = */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 min value is outside of the array range", + .result = REJECT + }, + { + "bounds check based on reg_off + var_off + insn_off. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "value_size=8 off=1073741825", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "bounds check based on reg_off + var_off + insn_off. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .errstr = "value 1073741823", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "bounds check after truncation of non-boundary-crossing range", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + /* r2 = 0x10'0000'0000 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = [0x00, 0xff] */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check after truncation of boundary-crossing range (1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check after truncation of boundary-crossing range (2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + * difference to previous test: truncation via MOV32 + * instead of ALU32. + */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check after wrapping 32-bit addition", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + /* r1 = 0x7fff'ffff */ + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), + /* r1 = 0xffff'fffe */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT + }, + { + "bounds check after shift with oversized count operand", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_2, 32), + BPF_MOV64_IMM(BPF_REG_1, 1), + /* r1 = (u32)1 << (u32)32 = ? */ + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), + /* r1 = [0x0000, 0xffff] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT + }, + { + "bounds check after right shift of maybe-negative number", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + /* r1 = [-0x01, 0xfe] */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + /* r1 = 0 or 0xff'ffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* r1 = 0 or 0xffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R0 unbounded memory access", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 2147483646", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset 1073741822", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset -1073741822", + .result = REJECT + }, + { + "bounds check map access with off+size signed 32bit overflow. test4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1000000), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "map_value pointer and 1000000000000", + .result = REJECT + }, + { + "pointer/scalar confusion in state equality check (way 1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" + }, + { + "pointer/scalar confusion in state equality check (way 2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" + }, { "variable-offset ctx access", .insns = { @@ -6806,6 +7258,71 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, + { + "indirect variable-offset stack access", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it indirectly */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 5 }, + .errstr = "variable stack read R2", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, + { + "direct stack access with 32-bit wraparound. test1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer and 2147483647", + .result = REJECT + }, + { + "direct stack access with 32-bit wraparound. test2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer and 1073741823", + .result = REJECT + }, + { + "direct stack access with 32-bit wraparound. test3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN() + }, + .errstr = "fp pointer offset 1073741822", + .result = REJECT + }, { "liveness pruning and write screening", .insns = { -- cgit v1.2.3 From 82abbf8d2fc46d79611ab58daa7c608df14bb3ee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Dec 2017 20:15:20 -0800 Subject: bpf: do not allow root to mangle valid pointers Do not allow root to convert valid pointers into unknown scalars. In particular disallow: ptr &= reg ptr <<= reg ptr += ptr and explicitly allow: ptr -= ptr since pkt_end - pkt == length 1. This minimizes amount of address leaks root can do. In the future may need to further tighten the leaks with kptr_restrict. 2. If program has such pointer math it's likely a user mistake and when verifier complains about it right away instead of many instructions later on invalid memory access it's easier for users to fix their progs. 3. when register holding a pointer cannot change to scalar it allows JITs to optimize better. Like 32-bit archs could use single register for pointers instead of a pair required to hold 64-bit scalars. 4. reduces architecture dependent behavior. Since code: r1 = r10; r1 &= 0xff; if (r1 ...) will behave differently arm64 vs x64 and offloaded vs native. A significant chunk of ptr mangling was allowed by commit f1174f77b50c ("bpf/verifier: rework value tracking") yet some of it was allowed even earlier. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 102 ++++++++++------------------ tools/testing/selftests/bpf/test_verifier.c | 56 +++++++-------- 2 files changed, 63 insertions(+), 95 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86dfe6b5c243..04b24876cd23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1890,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops on pointers produce (meaningless) scalars */ - if (!env->allow_ptr_leaks) - verbose(env, - "R%d 32-bit pointer arithmetic prohibited\n", - dst); + verbose(env, + "R%d 32-bit pointer arithmetic prohibited\n", + dst); return -EACCES; } if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", - dst); + verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", + dst); return -EACCES; } if (ptr_reg->type == CONST_PTR_TO_MAP) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", - dst); + verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", + dst); return -EACCES; } if (ptr_reg->type == PTR_TO_PACKET_END) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", - dst); + verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", + dst); return -EACCES; } @@ -1979,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_SUB: if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d tried to subtract pointer from scalar\n", - dst); + verbose(env, "R%d tried to subtract pointer from scalar\n", + dst); return -EACCES; } /* We don't allow subtraction from FP, because (according to @@ -1989,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * be able to deal with it. */ if (ptr_reg->type == PTR_TO_STACK) { - if (!env->allow_ptr_leaks) - verbose(env, "R%d subtraction from stack pointer prohibited\n", - dst); + verbose(env, "R%d subtraction from stack pointer prohibited\n", + dst); return -EACCES; } if (known && (ptr_reg->off - smin_val == @@ -2040,19 +2034,14 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_AND: case BPF_OR: case BPF_XOR: - /* bitwise ops on pointers are troublesome, prohibit for now. - * (However, in principle we could allow some cases, e.g. - * ptr &= ~3 which would reduce min_value by 3.) - */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d bitwise operator %s on pointer prohibited\n", - dst, bpf_alu_string[opcode >> 4]); + /* bitwise ops on pointers are troublesome, prohibit. */ + verbose(env, "R%d bitwise operator %s on pointer prohibited\n", + dst, bpf_alu_string[opcode >> 4]); return -EACCES; default: /* other operators (e.g. MUL,LSH) produce non-pointer results */ - if (!env->allow_ptr_leaks) - verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", - dst, bpf_alu_string[opcode >> 4]); + verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", + dst, bpf_alu_string[opcode >> 4]); return -EACCES; } @@ -2308,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code); - int rc; dst_reg = ®s[insn->dst_reg]; src_reg = NULL; @@ -2319,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (src_reg->type != SCALAR_VALUE) { if (dst_reg->type != SCALAR_VALUE) { /* Combining two pointers by any ALU op yields - * an arbitrary scalar. + * an arbitrary scalar. Disallow all math except + * pointer subtraction */ - if (!env->allow_ptr_leaks) { - verbose(env, "R%d pointer %s pointer prohibited\n", - insn->dst_reg, - bpf_alu_string[opcode >> 4]); - return -EACCES; + if (opcode == BPF_SUB){ + mark_reg_unknown(env, regs, insn->dst_reg); + return 0; } - mark_reg_unknown(env, regs, insn->dst_reg); - return 0; + verbose(env, "R%d pointer %s pointer prohibited\n", + insn->dst_reg, + bpf_alu_string[opcode >> 4]); + return -EACCES; } else { /* scalar += pointer * This is legal, but we have to reverse our * src/dest handling in computing the range */ - rc = adjust_ptr_min_max_vals(env, insn, - src_reg, dst_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* scalar += unknown scalar */ - __mark_reg_unknown(&off_reg); - return adjust_scalar_min_max_vals( - env, insn, - dst_reg, off_reg); - } - return rc; + return adjust_ptr_min_max_vals(env, insn, + src_reg, dst_reg); } } else if (ptr_reg) { /* pointer += scalar */ - rc = adjust_ptr_min_max_vals(env, insn, - dst_reg, src_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* unknown scalar += scalar */ - __mark_reg_unknown(dst_reg); - return adjust_scalar_min_max_vals( - env, insn, dst_reg, *src_reg); - } - return rc; + return adjust_ptr_min_max_vals(env, insn, + dst_reg, src_reg); } } else { /* Pretend the src is a reg with a known value, since we only @@ -2364,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, off_reg.type = SCALAR_VALUE; __mark_reg_known(&off_reg, insn->imm); src_reg = &off_reg; - if (ptr_reg) { /* pointer += K */ - rc = adjust_ptr_min_max_vals(env, insn, - ptr_reg, src_reg); - if (rc == -EACCES && env->allow_ptr_leaks) { - /* unknown scalar += K */ - __mark_reg_unknown(dst_reg); - return adjust_scalar_min_max_vals( - env, insn, dst_reg, off_reg); - } - return rc; - } + if (ptr_reg) /* pointer += K */ + return adjust_ptr_min_max_vals(env, insn, + ptr_reg, src_reg); } /* Got here implies adding two SCALAR_VALUEs */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 961c1426fbf2..b51017404c62 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -422,9 +422,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 subtraction from stack pointer", - .result_unpriv = REJECT, - .errstr = "R1 invalid mem access", + .errstr = "R1 subtraction from stack pointer", .result = REJECT, }, { @@ -1859,9 +1857,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer += pointer", + .result = REJECT, + .errstr = "R1 pointer += pointer", }, { "unpriv: neg pointer", @@ -2589,7 +2586,8 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct __sk_buff, data)), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, len)), BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), @@ -2896,7 +2894,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid access to packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3882,9 +3880,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3, 11 }, - .errstr_unpriv = "R0 pointer += pointer", - .errstr = "R0 invalid mem access 'inv'", - .result_unpriv = REJECT, + .errstr = "R0 pointer += pointer", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -3925,7 +3921,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -3946,7 +3942,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -3967,7 +3963,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 4 }, - .errstr = "R4 invalid mem access", + .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -5192,10 +5188,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 bitwise operator &= on pointer", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 bitwise operator &= on pointer", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 2", @@ -5211,10 +5205,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 32-bit pointer arithmetic prohibited", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 3", @@ -5230,10 +5222,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic with /= operator", - .errstr = "invalid mem access 'inv'", + .errstr = "R0 pointer arithmetic with /= operator", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value illegal alu op, 4", @@ -6016,8 +6006,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, - .errstr = "R1 type=inv expected=map_ptr", - .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", + .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", .result = REJECT, }, { @@ -7644,6 +7633,19 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "pkt_end - pkt_start is allowed", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "XDP pkt read, pkt_end mangling, bad access 1", .insns = { @@ -7659,7 +7661,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R1 offset is outside of the packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -7678,7 +7680,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R1 offset is outside of the packet", + .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, -- cgit v1.2.3 From d1b8b2391c24751e44f618fcf86fb55d9a9247fd Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Tue, 19 Dec 2017 13:32:48 -0500 Subject: scsi: storvsc: Fix scsi_cmd error assignments in storvsc_handle_error When an I/O is returned with an srb_status of SRB_STATUS_INVALID_LUN which has zero good_bytes it must be assigned an error. Otherwise the I/O will be continuously requeued and will cause a deadlock in the case where disks are being hot added and removed. sd_probe_async will wait forever for its I/O to complete while holding scsi_sd_probe_domain. Also returning the default error of DID_TARGET_FAILURE causes multipath to not retry the I/O resulting in applications receiving I/O errors before a failover can occur. Signed-off-by: Cathy Avery Signed-off-by: Long Li Reviewed-by: Stephen Hemminger Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 1b06cf0375dc..3b3d1d050cac 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case TEST_UNIT_READY: break; default: - set_host_byte(scmnd, DID_TARGET_FAILURE); + set_host_byte(scmnd, DID_ERROR); } break; case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); do_work = true; process_err_fn = storvsc_remove_lun; break; -- cgit v1.2.3 From 4c82fd0abb87e20d0d68ef5237e74732352806c8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Dec 2017 12:08:33 +0100 Subject: netfilter: uapi: correct UNTRACKED conntrack state bit number nft_ct exposes this bit to userspace. This used to be #define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1)) (IP_CT_NUMBER is 5, so this was 0x40) .. but this got changed to 8 (0x100) when the untracked object got removed. Replace this with a literal 6 to prevent further incompatible changes in case IP_CT_NUMBER ever increases. Fixes: cc41c84b7e7f2 ("netfilter: kill the fake untracked conntrack objects") Reported-by: Li Shuang Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 3fea7709a441..57ccfb32e87f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -36,7 +36,7 @@ enum ip_conntrack_info { #define NF_CT_STATE_INVALID_BIT (1 << 0) #define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1)) -#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_UNTRACKED + 1)) +#define NF_CT_STATE_UNTRACKED_BIT (1 << 6) /* Bitset representing status of connection. */ enum ip_conntrack_status { -- cgit v1.2.3 From d2a48e52541cdf474ef35d51e8d73ded5be33122 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 20 Dec 2017 22:54:24 -0800 Subject: drm: move lease init after validation in drm_lease_create Patch bd36d3bab2e3d08f80766c86487090dbceed4651 fixed a deadlock in the failure path of drm_lease_create. This made the partially initialized lease object visible for a short window of time. To avoid having the lessee state appear transiently, I've rearranged the code so that the lessor fields are not filled in until the parameters are all validated and the function will succeed. Signed-off-by: Keith Packard Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20171221065424.1304-1-keithp@keithp.com --- drivers/gpu/drm/drm_lease.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index 59849f02e2ad..1402c0e71b03 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr mutex_lock(&dev->mode_config.idr_mutex); - /* Insert the new lessee into the tree */ - id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); - if (id < 0) { - error = id; - goto out_lessee; - } - - lessee->lessee_id = id; - lessee->lessor = drm_master_get(lessor); - list_add_tail(&lessee->lessee_list, &lessor->lessees); - idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.crtc_idr, object)) @@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr } } + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + /* Move the leases over */ lessee->leases = *leases; DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); -- cgit v1.2.3 From 9b3fa47d4a76b1d606a396455f9bbeee083ef008 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 13 Dec 2017 15:21:22 -0800 Subject: kobject: fix suppressing modalias in uevents delivered over netlink The commit 4a336a23d619 ("kobject: copy env blob in one go") optimized constructing uevent data for delivery over netlink by using the raw environment buffer, instead of reconstructing it from individual environment pointers. Unfortunately in doing so it broke suppressing MODALIAS attribute for KOBJ_UNBIND events, as the code that suppressed this attribute only adjusted the environment pointers, but left the buffer itself alone. Let's fix it by making sure the offending attribute is obliterated form the buffer as well. Reported-by: Tariq Toukan Reported-by: Casey Leedom Fixes: 4a336a23d619 ("kobject: copy env blob in one go") Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- lib/kobject_uevent.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index c3e84edc47c9..2615074d3de5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; - int i; + size_t len; + int i, j; for (i = 0; i < env->envp_idx;) { if (strncmp(env->envp[i], modalias_prefix, @@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env) continue; } - if (i != env->envp_idx - 1) - memmove(&env->envp[i], &env->envp[i + 1], - sizeof(env->envp[i]) * env->envp_idx - 1); + len = strlen(env->envp[i]) + 1; + + if (i != env->envp_idx - 1) { + memmove(env->envp[i], env->envp[i + 1], + env->buflen - len); + + for (j = i; j < env->envp_idx - 1; j++) + env->envp[j] = env->envp[j + 1] - len; + } env->envp_idx--; + env->buflen -= len; } } -- cgit v1.2.3 From 966031f340185eddd05affcf72b740549f056348 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 20 Dec 2017 17:57:06 -0800 Subject: n_tty: fix EXTPROC vs ICANON interaction with TIOCINQ (aka FIONREAD) We added support for EXTPROC back in 2010 in commit 26df6d13406d ("tty: Add EXTPROC support for LINEMODE") and the intent was to allow it to override some (all?) ICANON behavior. Quoting from that original commit message: There is a new bit in the termios local flag word, EXTPROC. When this bit is set, several aspects of the terminal driver are disabled. Input line editing, character echo, and mapping of signals are all disabled. This allows the telnetd to turn off these functions when in linemode, but still keep track of what state the user wants the terminal to be in. but the problem turns out that "several aspects of the terminal driver are disabled" is a bit ambiguous, and you can really confuse the n_tty layer by setting EXTPROC and then causing some of the ICANON invariants to no longer be maintained. This fixes at least one such case (TIOCINQ) becoming unhappy because of the confusion over whether ICANON really means ICANON when EXTPROC is set. This basically makes TIOCINQ match the case of read: if EXTPROC is set, we ignore ICANON. Also, make sure to reset the ICANON state ie EXTPROC changes, not just if ICANON changes. Fixes: 26df6d13406d ("tty: Add EXTPROC support for LINEMODE") Reported-by: Tetsuo Handa Reported-by: syzkaller Cc: Jiri Slaby Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 427e0d5d8f13..539b49adb6af 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) { struct n_tty_data *ldata = tty->disc_data; - if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) { + if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) { bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->line_start = ldata->read_tail; if (!L_ICANON(tty) || !read_cnt(ldata)) { @@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file, return put_user(tty_chars_in_buffer(tty), (int __user *) arg); case TIOCINQ: down_write(&tty->termios_rwsem); - if (L_ICANON(tty)) + if (L_ICANON(tty) && !L_EXTPROC(tty)) retval = inq_canon(ldata); else retval = read_cnt(ldata); -- cgit v1.2.3 From 790dde243f7dd5b8e576686eba0b891470b09f57 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 21 Dec 2017 11:21:33 +0800 Subject: ASoC: rl6231: remove never matched if condition (in_t < 0) will never be true since in_t is unsigned. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rl6231.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c index 33690e98e297..7ef3b5476bcc 100644 --- a/sound/soc/codecs/rl6231.c +++ b/sound/soc/codecs/rl6231.c @@ -178,8 +178,6 @@ int rl6231_pll_calc(const unsigned int freq_in, for (n_t = 0; n_t <= max_n; n_t++) { in_t = f_in * (n_t + 2); pll_out = f_out * (k_t + 2); - if (in_t < 0) - continue; if (in_t == pll_out) { bypass = true; n = n_t; -- cgit v1.2.3 From 5fb6e0a1a933cfe13200ae0ae7589263236fa108 Mon Sep 17 00:00:00 2001 From: Guneshwor Singh Date: Thu, 21 Dec 2017 08:45:29 +0530 Subject: ASoC: hdac_hdmi: Add vendor id for Cannonlake HDMI codec Cannonlake HDMI codec has the same nid as Geminilake. This adds the vendor id for Cannonlake in hdmi device id list. Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index f3b4f4dfae6a..69416f465d94 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -2192,6 +2192,8 @@ static const struct hda_device_id hdmi_list[] = { HDA_CODEC_EXT_ENTRY(0x80862809, 0x100000, "Skylake HDMI", 0), HDA_CODEC_EXT_ENTRY(0x8086280a, 0x100000, "Broxton HDMI", 0), HDA_CODEC_EXT_ENTRY(0x8086280b, 0x100000, "Kabylake HDMI", 0), + HDA_CODEC_EXT_ENTRY(0x8086280c, 0x100000, "Cannonlake HDMI", + &intel_glk_drv_data), HDA_CODEC_EXT_ENTRY(0x8086280d, 0x100000, "Geminilake HDMI", &intel_glk_drv_data), {} -- cgit v1.2.3 From fae1a3e775cca8c3a9e0eb34443b310871a15a92 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 21 Dec 2017 00:49:14 +0100 Subject: kvm: x86: fix RSM when PCID is non-zero rsm_load_state_64() and rsm_enter_protected_mode() load CR3, then CR4 & ~PCIDE, then CR0, then CR4. However, setting CR4.PCIDE fails if CR3[11:0] != 0. It's probably easier in the long run to replace rsm_enter_protected_mode() with an emulator callback that sets all the special registers (like KVM_SET_SREGS would do). For now, set the PCID field of CR3 only after CR4.PCIDE is 1. Reported-by: Laszlo Ersek Tested-by: Laszlo Ersek Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index abe74f779f9d..b514b2b2845a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) } static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr4) + u64 cr0, u64 cr3, u64 cr4) { int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; /* * First enable PAE, long mode needs it before CR0.PG = 1 is set. @@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, bad = ctxt->ops->set_cr(ctxt, 4, cr4); if (bad) return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + } return X86EMUL_CONTINUE; @@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) struct desc_struct desc; struct desc_ptr dt; u16 selector; - u32 val, cr0, cr4; + u32 val, cr0, cr3, cr4; int i; cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); + cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); @@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) { struct desc_struct desc; struct desc_ptr dt; - u64 val, cr0, cr4; + u64 val, cr0, cr3, cr4; u32 base3; u16 selector; int i, r; @@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); + cr3 = GET_SMSTATE(u64, smbase, 0x7f50); cr4 = GET_SMSTATE(u64, smbase, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); val = GET_SMSTATE(u64, smbase, 0x7ed0); @@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr4); + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; -- cgit v1.2.3 From aa12f594f97efe50223611dbd13ecca4e8dafee6 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 21 Dec 2017 13:03:27 +0100 Subject: tools/kvm_stat: sort '-f help' output Sort the fields returned by specifying '-f help' on the command line. While at it, simplify the code a bit, indent the output and eliminate an extra blank line at the beginning. Signed-off-by: Stefan Raspl Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 566a70ddd005..a5684d0968b4 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1579,17 +1579,13 @@ def main(): stats = Stats(options) - if options.fields == "help": + if options.fields == 'help': stats.fields_filter = None - event_list = "\n" - s = stats.get() - for key in s.keys(): - if key.find('(') != -1: - key = key[0:key.find('(')] - if event_list.find('\n' + key + '\n') == -1: - event_list += key + '\n' - sys.stdout.write(event_list) - return "" + event_list = [] + for key in stats.get().keys(): + event_list.append(key.split('(', 1)[0]) + sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n') + sys.exit(0) if options.log: log(stats) -- cgit v1.2.3 From 221886646f75964ca31cf60f1811b2c9c4e965a5 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 21 Dec 2017 01:37:31 +0100 Subject: spi: pxa2xx: avoid redundant gpio_to_desc(desc_to_gpio()) round-trip gpio_free(gpio) simply does gpiod_free(gpio_to_desc(gpio)), so it's simpler and cleaner to use gpiod_free directly. Signed-off-by: Rasmus Villemoes Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 4cb515a3104c..c209dc1047b5 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1237,7 +1237,7 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip, * different chip_info, release previously requested GPIO */ if (chip->gpiod_cs) { - gpio_free(desc_to_gpio(chip->gpiod_cs)); + gpiod_free(chip->gpiod_cs); chip->gpiod_cs = NULL; } @@ -1417,7 +1417,7 @@ static void cleanup(struct spi_device *spi) if (drv_data->ssp_type != CE4100_SSP && !drv_data->cs_gpiods && chip->gpiod_cs) - gpio_free(desc_to_gpio(chip->gpiod_cs)); + gpiod_free(chip->gpiod_cs); kfree(chip); } -- cgit v1.2.3 From a5ba91c380b8bcca21e6166fc71c5e5ac9f0db68 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 21 Dec 2017 12:12:50 +0100 Subject: regmap: debugfs: emit a debug message when locking is disabled We currently silently omit creating the debugfs entries when regmap locking is disabled. Users may not be aware of the reason for which regmap files don't show up in debugfs. Add a dev_dbg() message explaining that. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index c8ecefd75d6f..ae962b756863 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -529,8 +529,10 @@ void regmap_debugfs_init(struct regmap *map, const char *name) struct regmap_range_node *range_node; const char *devname = "dummy"; - if (map->debugfs_disable) + if (map->debugfs_disable) { + dev_dbg(map->dev, "regmap locking disabled - not creating debugfs entries\n"); return; + } /* If we don't have the debugfs root yet, postpone init */ if (!regmap_debugfs_root) { -- cgit v1.2.3 From d63da8c64bbf800a12fe0a4a2804e5953b8cf35e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 19 Dec 2017 14:34:03 +0100 Subject: mmc: tmio: use io* accessors consistently Because we started using io*_rep accessors previously because they are more widely defined across architectures, let's be consistent and use this family for all accessor wrappers. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 15537c85c51a..52198f2929a5 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -228,7 +228,7 @@ int tmio_mmc_host_runtime_resume(struct device *dev); static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr) { - return readw(host->ctl + (addr << host->bus_shift)); + return ioread16(host->ctl + (addr << host->bus_shift)); } static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, @@ -240,8 +240,8 @@ static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr, static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, int addr) { - return readw(host->ctl + (addr << host->bus_shift)) | - readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16; + return ioread16(host->ctl + (addr << host->bus_shift)) | + ioread16(host->ctl + ((addr + 2) << host->bus_shift)) << 16; } static inline void sd_ctrl_read32_rep(struct tmio_mmc_host *host, int addr, @@ -258,7 +258,7 @@ static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, */ if (host->write16_hook && host->write16_hook(host, addr)) return; - writew(val, host->ctl + (addr << host->bus_shift)); + iowrite16(val, host->ctl + (addr << host->bus_shift)); } static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, @@ -270,8 +270,8 @@ static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr, static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int addr, u32 val) { - writew(val & 0xffff, host->ctl + (addr << host->bus_shift)); - writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); + iowrite16(val & 0xffff, host->ctl + (addr << host->bus_shift)); + iowrite16(val >> 16, host->ctl + ((addr + 2) << host->bus_shift)); } static inline void sd_ctrl_write32_rep(struct tmio_mmc_host *host, int addr, -- cgit v1.2.3 From 976a9b35d77a9d297cb03154aa61a6214a213b5e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 20 Dec 2017 18:17:29 +0100 Subject: ARM: dts: exynos: Enable Mixer node for Exynos5800 Peach Pi machine Commit 1cb686c08d12 ("ARM: dts: exynos: Add status property to Exynos 542x Mixer nodes") disabled the Mixer node by default in the DTSI and enabled for each Exynos 542x DTS. But unfortunately it missed to enable it for the Exynos5800 Peach Pi machine, since the 5800 is also an 542x SoC variant. Fixes: 1cb686c08d12 ("ARM: dts: exynos: Add status property to Exynos 542x Mixer nodes") Signed-off-by: Javier Martinez Canillas Acked-by: Marek Szyprowski Tested-by: Guillaume Tucker Signed-off-by: Krzysztof Kozlowski Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/exynos5800-peach-pi.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e8..0029ec27819c 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -664,6 +664,10 @@ status = "okay"; }; +&mixer { + status = "okay"; +}; + /* eMMC flash */ &mmc_0 { status = "okay"; -- cgit v1.2.3 From d2271826e58b83f9a75634a3f4334082ecf0a02e Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 15 Dec 2017 16:03:32 +1030 Subject: ARM: dts: aspeed-g4: Correct VUART IRQ number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should have always been 8. Fixes: db4d6d9d80fa ("ARM: dts: aspeed: Correctly order UART nodes") Cc: stable@vger.kernel.org Signed-off-by: Joel Stanley Reviewed-by: Cédric Le Goater Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/aspeed-g4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index 45d815a86d42..de08d9045cb8 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -219,7 +219,7 @@ compatible = "aspeed,ast2400-vuart"; reg = <0x1e787000 0x40>; reg-shift = <2>; - interrupts = <10>; + interrupts = <8>; clocks = <&clk_uart>; no-loopback-test; status = "disabled"; -- cgit v1.2.3 From 363e59baa4f76d3f97c0133ff7014cba3d90a7c3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:42:59 -0800 Subject: xfs: don't be so eager to clear the cowblocks tag on truncate Currently, xfs_itruncate_extents clears the cowblocks tag if i_cnextents is zero. This is wrong, since i_cnextents only tracks real extents in the CoW fork, which means that we could have some delayed CoW reservations still in there that will now never get cleaned. Fix a further bug where we /don't/ clear the reflink iflag if there are any attribute blocks -- really, it's only safe to clear the reflink flag if there are no data fork extents and no cow fork extents. Found by adding clonerange to fsstress in xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_inode.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b41952a4ddd8..6f95bdb408ce 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1487,6 +1487,24 @@ xfs_link( return error; } +/* Clear the reflink flag and the cowblocks tag if possible. */ +static void +xfs_itruncate_clear_reflink_flags( + struct xfs_inode *ip) +{ + struct xfs_ifork *dfork; + struct xfs_ifork *cfork; + + if (!xfs_is_reflink_inode(ip)) + return; + dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK); + if (dfork->if_bytes == 0 && cfork->if_bytes == 0) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; + if (cfork->if_bytes == 0) + xfs_inode_clear_cowblocks_tag(ip); +} + /* * Free up the underlying blocks past new_size. The new size must be smaller * than the current size. This routine can be used both for the attribute and @@ -1583,15 +1601,7 @@ xfs_itruncate_extents( if (error) goto out; - /* - * Clear the reflink flag if there are no data fork blocks and - * there are no extents staged in the cow fork. - */ - if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { - if (ip->i_d.di_nblocks == 0) - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; - xfs_inode_clear_cowblocks_tag(ip); - } + xfs_itruncate_clear_reflink_flags(ip); /* * Always re-log the inode so that our permanent transaction can keep -- cgit v1.2.3 From 10ddf64e420f7f6c1a871bfb4ff2de08faef8235 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:46:05 -0800 Subject: xfs: remove leftover CoW reservations when remounting ro When we're remounting the filesystem readonly, remove all CoW preallocations prior to going ro. If the fs goes down after the ro remount, we never clean up the staging extents, which means xfs_check will trip over them on a subsequent run. Practically speaking, the next mount will clean them up too, so this is unlikely to be seen. Since we shut down the cowblocks cleaner on remount-ro, we also have to make sure we start it back up if/when we remount-rw. Found by adding clonerange to fsstress and running xfs/017. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_icache.c | 2 +- fs/xfs/xfs_icache.h | 1 + fs/xfs/xfs_super.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 58d2d4253c8e..3861d61fb265 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -870,7 +870,7 @@ xfs_eofblocks_worker( * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). * (We'll just piggyback on the post-EOF prealloc space workqueue.) */ -STATIC void +void xfs_queue_cowblocks( struct xfs_mount *mp) { diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index bff4d85e5498..d4a77588eca1 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip); int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); void xfs_cowblocks_worker(struct work_struct *); +void xfs_queue_cowblocks(struct xfs_mount *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..2db6a40a96bd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1360,6 +1360,7 @@ xfs_fs_remount( xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } + xfs_queue_cowblocks(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); @@ -1369,6 +1370,14 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* Get rid of any leftover CoW reservations... */ + cancel_delayed_work_sync(&mp->m_cowblocks_work); + error = xfs_icache_free_cowblocks(mp, NULL); + if (error) { + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; + } + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { -- cgit v1.2.3 From 86d692bfad1b0097fa866f5fcfa5f5adf4cd82e8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 Dec 2017 15:46:06 -0800 Subject: xfs: set cowblocks tag for direct cow writes too If a user performs a direct CoW write, we end up loading the CoW fork with preallocated extents. Therefore, we must set the cowblocks tag so that they can be cleared out if we run low on space. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_reflink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e49e6db415f7..47aea2e82c26 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -454,6 +454,8 @@ retry: if (error) goto out_bmap_cancel; + xfs_inode_set_cowblocks_tag(ip); + /* Finish up. */ error = xfs_defer_finish(&tp, &dfops); if (error) -- cgit v1.2.3 From 0525e952dcceb9fc947c6d395de7f72220c7d081 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:03 -0800 Subject: xfs: queue deferred rmap ops for cow staging extent alloc/free in the right order Under the deferred rmap operation scheme, there's a certain order in which the rmap deferred ops have to be queued to maintain integrity during log replay. For alloc/map operations that order is cui -> rui; for free/unmap operations that order is cui -> rui -> efi. However, the initial refcount code got the ordering wrong in the free side of things because it queued refcount free op and an EFI and the refcount free op queued a rmap free op, resulting in the order cui -> efi -> rui. If we fail before the efd finishes, the efi recovery will try to do a wildcard rmap removal and the subsequent rui will fail to find the rmap and blow up. This didn't ever happen due to other screws up in handling unknown owner rmap removals, but those other screw ups broke recovery in other ways, so fix the ordering to follow the intended rules. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_refcount.c | 52 ++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 585b35d34142..c40d26763075 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Add refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); - if (error) - return error; - - /* Add rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* @@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free( xfs_extlen_t aglen, struct xfs_defer_ops *dfops) { - int error; - trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, agbno, aglen); /* Remove refcount btree reservation */ - error = xfs_refcount_adjust_cow(rcur, agbno, aglen, + return xfs_refcount_adjust_cow(rcur, agbno, aglen, XFS_REFCOUNT_ADJUST_COW_FREE, dfops); - if (error) - return error; - - /* Remove rmap entry */ - if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { - error = xfs_rmap_free_extent(rcur->bc_mp, dfops, - rcur->bc_private.a.agno, - agbno, aglen, XFS_RMAP_OWN_COW); - if (error) - return error; - } - - return error; } /* Record a CoW staging extent in the refcount btree. */ @@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; - return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, + error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, fsb, len); + if (error) + return error; + + /* Add rmap entry */ + return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); } /* Forget a CoW staging event in the refcount btree. */ @@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent( xfs_fsblock_t fsb, xfs_extlen_t len) { + int error; + if (!xfs_sb_version_hasreflink(&mp->m_sb)) return 0; + /* Remove rmap entry */ + error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), + XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); + if (error) + return error; + return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, fsb, len); } -- cgit v1.2.3 From 33df3a9cf925183a6a169bc3eff2bd0febd1298a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:27 -0800 Subject: xfs: always honor OWN_UNKNOWN rmap removal requests Calling xfs_rmap_free with an unknown owner is supposed to remove any rmaps covering that range regardless of owner. This is used by the EFI recovery code to say "we're freeing this, it mustn't be owned by anything anymore", but for whatever reason xfs_free_ag_extent filters them out. Therefore, remove the filter and make xfs_rmap_unmap actually treat it as a wildcard owner -- free anything that's already there, and if there's no owner at all then that's fine too. There are two existing callers of bmap_add_free that take care the rmap deferred ops themselves and use OWN_UNKNOWN to skip the EFI-based rmap cleanup; convert these to use OWN_NULL (via helpers), and now we really require that an RUI (if any) gets added to the defer ops before any EFI. Lastly, now that xfs_free_extent filters out OWN_NULL rmap free requests, growfs will have to consult directly with the rmap to ensure that there aren't any rmaps in the grown region. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_alloc.c | 4 ++-- fs/xfs/libxfs/xfs_rmap.c | 25 +++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rmap.h | 16 +++++++++++++++- fs/xfs/xfs_extfree_item.c | 2 +- fs/xfs/xfs_fsops.c | 5 +++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 0da80019a917..83ed7715f856 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -702,7 +702,7 @@ xfs_alloc_ag_vextent( ASSERT(args->agbno % args->alignment == 0); /* if not file data, insert new block into the reverse map btree */ - if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, args->agbno, args->len, &args->oinfo); if (error) @@ -1682,7 +1682,7 @@ xfs_free_ag_extent( bno_cur = cnt_cur = NULL; mp = tp->t_mountp; - if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + if (!xfs_rmap_should_skip_owner_update(oinfo)) { error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index dd019cee1b3b..7465cfb39276 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -444,6 +444,30 @@ xfs_rmap_unmap( goto out_done; } + /* + * If we're doing an unknown-owner removal for EFI recovery, we expect + * to find the full range in the rmapbt or nothing at all. If we + * don't find any rmaps overlapping either end of the range, we're + * done. Hopefully this means that the EFI creator already queued + * (and finished) a RUI to remove the rmap. + */ + if (owner == XFS_RMAP_OWN_UNKNOWN && + ltrec.rm_startblock + ltrec.rm_blockcount <= bno) { + struct xfs_rmap_irec rtrec; + + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto out_error; + if (i == 0) + goto out_done; + error = xfs_rmap_get_rec(cur, &rtrec, &i); + if (error) + goto out_error; + XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error); + if (rtrec.rm_startblock >= bno + len) + goto out_done; + } + /* Make sure the unwritten flag matches. */ XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); @@ -664,6 +688,7 @@ xfs_rmap_map( flags |= XFS_RMAP_UNWRITTEN; trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, unwritten, oinfo); + ASSERT(!xfs_rmap_should_skip_owner_update(oinfo)); /* * For the initial lookup, look for an exact match or the left-adjacent diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 466ede637080..0fcd5b1ba729 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -61,7 +61,21 @@ static inline void xfs_rmap_skip_owner_update( struct xfs_owner_info *oi) { - oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL); +} + +static inline bool +xfs_rmap_should_skip_owner_update( + struct xfs_owner_info *oi) +{ + return oi->oi_owner == XFS_RMAP_OWN_NULL; +} + +static inline void +xfs_rmap_any_owner_update( + struct xfs_owner_info *oi) +{ + xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN); } /* Reverse mapping functions. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 44f8c5451210..64da90655e95 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -538,7 +538,7 @@ xfs_efi_recover( return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); - xfs_rmap_skip_owner_update(&oinfo); + xfs_rmap_any_owner_update(&oinfo); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &efip->efi_format.efi_extents[i]; error = xfs_trans_free_extent(tp, efdp, extp->ext_start, diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8f22fc579dbb..60a2e128cb6a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -571,6 +571,11 @@ xfs_growfs_data_private( * this doesn't actually exist in the rmap btree. */ xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); + error = xfs_rmap_free(tp, bp, agno, + be32_to_cpu(agf->agf_length) - new, + new, &oinfo); + if (error) + goto error0; error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, be32_to_cpu(agf->agf_length) - new), -- cgit v1.2.3 From 68c58e9b9a88c1a9d0c2eaf6c7acefb00f5fbbfb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 7 Dec 2017 19:07:55 -0800 Subject: xfs: only skip rmap owner checks for unknown-owner rmap removal For rmap removal, refactor the rmap owner checks into a separate function, then skip the checks if we are performing an unknown-owner removal. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rmap.c | 76 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 7465cfb39276..50db920ceeeb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range( return error; } +/* + * Perform all the relevant owner checks for a removal op. If we're doing an + * unknown-owner removal then we have no owner information to check. + */ +static int +xfs_rmap_free_check_owner( + struct xfs_mount *mp, + uint64_t ltoff, + struct xfs_rmap_irec *rec, + xfs_fsblock_t bno, + xfs_filblks_t len, + uint64_t owner, + uint64_t offset, + unsigned int flags) +{ + int error = 0; + + if (owner == XFS_RMAP_OWN_UNKNOWN) + return 0; + + /* Make sure the unwritten flag matches. */ + XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == + (rec->rm_flags & XFS_RMAP_UNWRITTEN), out); + + /* Make sure the owner matches what we expect to find in the tree. */ + XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out); + + /* Check the offset, if necessary. */ + if (XFS_RMAP_NON_INODE_OWNER(owner)) + goto out; + + if (flags & XFS_RMAP_BMBT_BLOCK) { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK, + out); + } else { + XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out); + XFS_WANT_CORRUPTED_GOTO(mp, + ltoff + rec->rm_blockcount >= offset + len, + out); + } + +out: + return error; +} + /* * Find the extent in the rmap btree and remove it. * @@ -468,33 +513,16 @@ xfs_rmap_unmap( goto out_done; } - /* Make sure the unwritten flag matches. */ - XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == - (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); - /* Make sure the extent we found covers the entire freeing range. */ XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && - ltrec.rm_startblock + ltrec.rm_blockcount >= - bno + len, out_error); - - /* Make sure the owner matches what we expect to find in the tree. */ - XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || - XFS_RMAP_NON_INODE_OWNER(owner), out_error); + ltrec.rm_startblock + ltrec.rm_blockcount >= + bno + len, out_error); - /* Check the offset, if necessary. */ - if (!XFS_RMAP_NON_INODE_OWNER(owner)) { - if (flags & XFS_RMAP_BMBT_BLOCK) { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK, - out_error); - } else { - XFS_WANT_CORRUPTED_GOTO(mp, - ltrec.rm_offset <= offset, out_error); - XFS_WANT_CORRUPTED_GOTO(mp, - ltoff + ltrec.rm_blockcount >= offset + len, - out_error); - } - } + /* Check owner information. */ + error = xfs_rmap_free_check_owner(mp, ltoff, <rec, bno, len, owner, + offset, flags); + if (error) + goto out_error; if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { /* exact match, simply remove the record from rmap tree */ -- cgit v1.2.3 From 58acfd714e6b02e8617448b431c2b64a2f1f0792 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 20 Dec 2017 12:28:25 +0200 Subject: ipv6: Honor specified parameters in fibmatch lookup Currently, parameters such as oif and source address are not taken into account during fibmatch lookup. Example (IPv4 for reference) before patch: $ ip -4 route show 192.0.2.0/24 dev dummy0 proto kernel scope link src 192.0.2.1 198.51.100.0/24 dev dummy1 proto kernel scope link src 198.51.100.1 $ ip -6 route show 2001:db8:1::/64 dev dummy0 proto kernel metric 256 pref medium 2001:db8:2::/64 dev dummy1 proto kernel metric 256 pref medium fe80::/64 dev dummy0 proto kernel metric 256 pref medium fe80::/64 dev dummy1 proto kernel metric 256 pref medium $ ip -4 route get fibmatch 192.0.2.2 oif dummy0 192.0.2.0/24 dev dummy0 proto kernel scope link src 192.0.2.1 $ ip -4 route get fibmatch 192.0.2.2 oif dummy1 RTNETLINK answers: No route to host $ ip -6 route get fibmatch 2001:db8:1::2 oif dummy0 2001:db8:1::/64 dev dummy0 proto kernel metric 256 pref medium $ ip -6 route get fibmatch 2001:db8:1::2 oif dummy1 2001:db8:1::/64 dev dummy0 proto kernel metric 256 pref medium After: $ ip -6 route get fibmatch 2001:db8:1::2 oif dummy0 2001:db8:1::/64 dev dummy0 proto kernel metric 256 pref medium $ ip -6 route get fibmatch 2001:db8:1::2 oif dummy1 RTNETLINK answers: Network is unreachable The problem stems from the fact that the necessary route lookup flags are not set based on these parameters. Instead of duplicating the same logic for fibmatch, we can simply resolve the original route from its copy and dump it instead. Fixes: 18c3a61c4264 ("net: ipv6: RTM_GETROUTE: return matched fib result when requested") Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2bc91c349273..0458b761f3c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4298,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - if (!fibmatch) - dst = ip6_route_input_lookup(net, dev, &fl6, flags); - else - dst = ip6_route_lookup(net, &fl6, 0); + dst = ip6_route_input_lookup(net, dev, &fl6, flags); rcu_read_unlock(); } else { fl6.flowi6_oif = oif; - if (!fibmatch) - dst = ip6_route_output(net, NULL, &fl6); - else - dst = ip6_route_lookup(net, &fl6, 0); + dst = ip6_route_output(net, NULL, &fl6); } @@ -4327,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } + if (fibmatch && rt->dst.from) { + struct rt6_info *ort = container_of(rt->dst.from, + struct rt6_info, dst); + + dst_hold(&ort->dst); + ip6_rt_put(rt); + rt = ort; + } + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); -- cgit v1.2.3 From 6d0e4827b72afc71349784336d5eb6df4df106e6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Dec 2017 10:01:30 -0700 Subject: Revert "bdi: add error handle for bdi_debug_register" This reverts commit a0747a859ef6d3cc5b6cd50eb694499b78dd0025. It breaks some booting for some users, and more than a week into this, there's still no good fix. Revert this commit for now until a solution has been found. Reported-by: Laura Abbott Reported-by: Bruno Wolff III Signed-off-by: Jens Axboe --- mm/backing-dev.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 84b2dc76f140..b5f940ce0143 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) if (IS_ERR(dev)) return PTR_ERR(dev); - if (bdi_debug_register(bdi, dev_name(dev))) { - device_destroy(bdi_class, dev->devt); - return -ENOMEM; - } cgwb_bdi_register(bdi); bdi->dev = dev; + bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); spin_lock_bh(&bdi_lock); -- cgit v1.2.3 From 8bc0d7ac934b6f2d0dc8f38a3104d281c9db1e98 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 19 Dec 2017 22:24:10 -0200 Subject: i915: Reject CCS modifiers for pipe C on Geminilake Current code advertises (on the modifiers blob property) support for CCS modifier for pipe C on GLK, only to reject it later when validating the request before the atomic commit. This fixes the tests igt@kms_ccs@pipe-c-*, which should skip on GLK for pipe C (see bug 104096). A relevant discussion is archived at: https://lists.freedesktop.org/archives/intel-gfx/2017-December/150646.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104096 Signed-off-by: Gabriel Krisman Bertazi Cc: Ben Widawsky Reviewed-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171220002410.5604-1-krisman@collabora.co.uk (cherry picked from commit f0cbd8bd877f3d8c5b80a6b1add9ca9010d7f9d8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ff9397030092..30cf273d57aa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13194,7 +13194,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 10) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); modifiers = skl_format_modifiers_ccs; -- cgit v1.2.3 From c48e74736fccf25fb32bb015426359e1c2016e3b Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Wed, 20 Dec 2017 15:09:22 -0500 Subject: openvswitch: Fix pop_vlan action for double tagged frames skb_vlan_pop() expects skb->protocol to be a valid TPID for double tagged frames. So set skb->protocol to the TPID and let skb_vlan_pop() shift the true ethertype into position for us. Fixes: 5108bbaddc37 ("openvswitch: add processing of L3 packets") Signed-off-by: Eric Garver Reviewed-by: Jiri Benc Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dbe2379329c5..f039064ce922 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return -EINVAL; skb_reset_network_header(skb); + key->eth.type = skb->protocol; } else { eth = eth_hdr(skb); ether_addr_copy(key->eth.src, eth->h_source); @@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(parse_vlan(skb, key))) return -ENOMEM; - skb->protocol = parse_ethertype(skb); - if (unlikely(skb->protocol == htons(0))) + key->eth.type = parse_ethertype(skb); + if (unlikely(key->eth.type == htons(0))) return -ENOMEM; + /* Multiple tagged packets need to retain TPID to satisfy + * skb_vlan_pop(), which will later shift the ethertype into + * skb->protocol. + */ + if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT)) + skb->protocol = key->eth.cvlan.tpid; + else + skb->protocol = key->eth.type; + skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); } skb_reset_mac_len(skb); - key->eth.type = skb->protocol; /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { -- cgit v1.2.3 From 513674b5a2c9c7a67501506419da5c3c77ac6f08 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 12:10:21 -0800 Subject: net: reevalulate autoflowlabel setting after sysctl setting sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: Shaohua Li Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- net/ipv6/af_inet6.c | 1 - net/ipv6/ip6_output.c | 12 ++++++++++-- net/ipv6/ipv6_sockglue.c | 1 + 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb18c6290ca8..8415bf1a9776 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -273,7 +273,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c26f71234b9c..c9441ca45399 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -210,7 +210,6 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(net); np->repflow = net->ipv6.sysctl.flowlabel_reflect; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5110a418cc4d..f7dd51c42314 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +{ + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); + else + return np->autoflowlabel; +} + /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - np->autoflowlabel, fl6)); + ip6_autoflowlabel(net, np), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b9404feabd78..2d4680e0376f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -886,6 +886,7 @@ pref_skip_coa: break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; + np->autoflowlabel_set = 1; retv = 0; break; case IPV6_RECVFRAGSIZE: -- cgit v1.2.3 From 268b790679422a89e9ab0685d9f291edae780c98 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 20 Dec 2017 17:37:49 -0500 Subject: skbuff: orphan frags before zerocopy clone Call skb_zerocopy_clone after skb_orphan_frags, to avoid duplicate calls to skb_uarg(skb)->callback for the same data. skb_zerocopy_clone associates skb_shinfo(skb)->uarg from frag_skb with each segment. This is only safe for uargs that do refcounting, which is those that pass skb_orphan_frags without dropping their shared frags. For others, skb_orphan_frags drops the user frags and sets the uarg to NULL, after which sock_zerocopy_clone has no effect. Qemu hangs were reported due to duplicate vhost_net_zerocopy_callback calls for the same data causing the vhost_net_ubuf_ref_>refcount to drop below zero. Link: http://lkml.kernel.org/r/ Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY") Reported-by: Andreas Hartmann Reported-by: David Hill Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a592ca025fc4..edf40ac0cd07 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3654,8 +3654,6 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; - if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) - goto err; while (pos < offset + len) { if (i >= nfrags) { @@ -3681,6 +3679,8 @@ normal: if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) goto err; + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + goto err; *nskb_frag = *frag; __skb_frag_ref(nskb_frag); -- cgit v1.2.3 From b90ddd568792bcb0054eaf0f61785c8f80c3bd1c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 20 Dec 2017 17:37:50 -0500 Subject: skbuff: skb_copy_ubufs must release uarg even without user frags skb_copy_ubufs creates a private copy of frags[] to release its hold on user frags, then calls uarg->callback to notify the owner. Call uarg->callback even when no frags exist. This edge case can happen when zerocopy_sg_from_iter finds enough room in skb_headlen to copy all the data. Fixes: 3ece782693c4 ("sock: skb_copy_ubufs support for compound pages") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edf40ac0cd07..a3cb0be4c6f3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) u32 d_off; if (!num_frags) - return 0; + goto release; if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; @@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); skb_shinfo(skb)->nr_frags = new_frags; +release: skb_zcopy_clear(skb, false); return 0; } -- cgit v1.2.3 From 13b7954c0b8dd2d6382b4ddb5053f09e389d5c6e Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 14 Dec 2017 17:26:13 -0700 Subject: libnvdimm, btt: add a couple of missing kernel-doc lines Recent updates to btt.h neglected to add corresponding kernel-doc lines for new structure members. Add them. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 578c2057524d..884fbbbdd18a 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -125,6 +125,7 @@ struct aligned_lock { * @list: List head for list of arenas * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. + * @err_lock: Mutex for synchronizing error clearing. * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -176,6 +177,7 @@ struct arena_info { * @init_lock: Mutex used for the BTT initialization * @init_state: Flag describing the initialization state for the BTT * @num_arenas: Number of arenas in the BTT instance + * @phys_bb: Pointer to the namespace's badblocks structure */ struct btt { struct gendisk *btt_disk; -- cgit v1.2.3 From 24e3a7fb60a9187e5df90e5fa655ffc94b9c4f77 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 18 Dec 2017 09:28:39 -0700 Subject: libnvdimm, btt: Fix an incompatibility in the log layout Due to a spec misinterpretation, the Linux implementation of the BTT log area had different padding scheme from other implementations, such as UEFI and NVML. This fixes the padding scheme, and defaults to it for new BTT layouts. We attempt to detect the padding scheme in use when probing for an existing BTT. If we detect the older/incompatible scheme, we continue using it. Reported-by: Juston Li Cc: Dan Williams Cc: Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 201 ++++++++++++++++++++++++++++++++++++++++++--------- drivers/nvdimm/btt.h | 45 +++++++++++- 2 files changed, 211 insertions(+), 35 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e949e3302af4..c586bcdb5190 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, return ret; } -static int btt_log_read_pair(struct arena_info *arena, u32 lane, - struct log_entry *ent) +static int btt_log_group_read(struct arena_info *arena, u32 lane, + struct log_group *log) { return arena_read_bytes(arena, - arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, - 2 * LOG_ENT_SIZE, 0); + arena->logoff + (lane * LOG_GRP_SIZE), log, + LOG_GRP_SIZE, 0); } static struct dentry *debugfs_root; @@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); debugfs_create_x32("flags", S_IRUGO, d, &a->flags); + debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); + debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); } static void btt_debugfs_init(struct btt *btt) @@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt) } } +static u32 log_seq(struct log_group *log, int log_idx) +{ + return le32_to_cpu(log->ent[log_idx].seq); +} + /* * This function accepts two log entries, and uses the * sequence number to find the 'older' entry. @@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt) * * TODO The logic feels a bit kludge-y. make it better.. */ -static int btt_log_get_old(struct log_entry *ent) +static int btt_log_get_old(struct arena_info *a, struct log_group *log) { + int idx0 = a->log_index[0]; + int idx1 = a->log_index[1]; int old; /* @@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent) * the next time, the following logic works out to put this * (next) entry into [1] */ - if (ent[0].seq == 0) { - ent[0].seq = cpu_to_le32(1); + if (log_seq(log, idx0) == 0) { + log->ent[idx0].seq = cpu_to_le32(1); return 0; } - if (ent[0].seq == ent[1].seq) + if (log_seq(log, idx0) == log_seq(log, idx1)) return -EINVAL; - if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + if (log_seq(log, idx0) + log_seq(log, idx1) > 5) return -EINVAL; - if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { - if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + if (log_seq(log, idx0) < log_seq(log, idx1)) { + if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) old = 0; else old = 1; } else { - if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) old = 1; else old = 0; @@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, { int ret; int old_ent, ret_ent; - struct log_entry log[2]; + struct log_group log; - ret = btt_log_read_pair(arena, lane, log); + ret = btt_log_group_read(arena, lane, &log); if (ret) return -EIO; - old_ent = btt_log_get_old(log); + old_ent = btt_log_get_old(arena, &log); if (old_ent < 0 || old_ent > 1) { dev_err(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", - old_ent, lane, log[0].seq, log[1].seq); + old_ent, lane, log.ent[arena->log_index[0]].seq, + log.ent[arena->log_index[1]].seq); /* TODO set error state? */ return -EIO; } @@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, ret_ent = (old_flag ? old_ent : (1 - old_ent)); if (ent != NULL) - memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); return ret_ent; } @@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, u32 sub, struct log_entry *ent, unsigned long flags) { int ret; - /* - * Ignore the padding in log_entry for calculating log_half. - * The entry is 'committed' when we write the sequence number, - * and we want to ensure that that is the last thing written. - * We don't bother writing the padding as that would be extra - * media wear and write amplification - */ - unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; - u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + u32 group_slot = arena->log_index[sub]; + unsigned int log_half = LOG_ENT_SIZE / 2; void *src = ent; + u64 ns_off; + ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + + (group_slot * LOG_ENT_SIZE); /* split the 16B write into atomic, durable halves */ ret = arena_write_bytes(arena, ns_off, src, log_half, flags); if (ret) @@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena) { size_t logsize = arena->info2off - arena->logoff; size_t chunk_size = SZ_4K, offset = 0; - struct log_entry log; + struct log_entry ent; void *zerobuf; int ret; u32 i; @@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena) } for (i = 0; i < arena->nfree; i++) { - log.lba = cpu_to_le32(i); - log.old_map = cpu_to_le32(arena->external_nlba + i); - log.new_map = cpu_to_le32(arena->external_nlba + i); - log.seq = cpu_to_le32(LOG_SEQ_INIT); - ret = __btt_log_write(arena, i, 0, &log, 0); + ent.lba = cpu_to_le32(i); + ent.old_map = cpu_to_le32(arena->external_nlba + i); + ent.new_map = cpu_to_le32(arena->external_nlba + i); + ent.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &ent, 0); if (ret) goto free; } @@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena) return 0; } +static bool ent_is_padding(struct log_entry *ent) +{ + return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) + && (ent->seq == 0); +} + +/* + * Detecting valid log indices: We read a log group (see the comments in btt.h + * for a description of a 'log_group' and its 'slots'), and iterate over its + * four slots. We expect that a padding slot will be all-zeroes, and use this + * to detect a padding slot vs. an actual entry. + * + * If a log_group is in the initial state, i.e. hasn't been used since the + * creation of this BTT layout, it will have three of the four slots with + * zeroes. We skip over these log_groups for the detection of log_index. If + * all log_groups are in the initial state (i.e. the BTT has never been + * written to), it is safe to assume the 'new format' of log entries in slots + * (0, 1). + */ +static int log_set_indices(struct arena_info *arena) +{ + bool idx_set = false, initial_state = true; + int ret, log_index[2] = {-1, -1}; + u32 i, j, next_idx = 0; + struct log_group log; + u32 pad_count = 0; + + for (i = 0; i < arena->nfree; i++) { + ret = btt_log_group_read(arena, i, &log); + if (ret < 0) + return ret; + + for (j = 0; j < 4; j++) { + if (!idx_set) { + if (ent_is_padding(&log.ent[j])) { + pad_count++; + continue; + } else { + /* Skip if index has been recorded */ + if ((next_idx == 1) && + (j == log_index[0])) + continue; + /* valid entry, record index */ + log_index[next_idx] = j; + next_idx++; + } + if (next_idx == 2) { + /* two valid entries found */ + idx_set = true; + } else if (next_idx > 2) { + /* too many valid indices */ + return -ENXIO; + } + } else { + /* + * once the indices have been set, just verify + * that all subsequent log groups are either in + * their initial state or follow the same + * indices. + */ + if (j == log_index[0]) { + /* entry must be 'valid' */ + if (ent_is_padding(&log.ent[j])) + return -ENXIO; + } else if (j == log_index[1]) { + ; + /* + * log_index[1] can be padding if the + * lane never got used and it is still + * in the initial state (three 'padding' + * entries) + */ + } else { + /* entry must be invalid (padding) */ + if (!ent_is_padding(&log.ent[j])) + return -ENXIO; + } + } + } + /* + * If any of the log_groups have more than one valid, + * non-padding entry, then the we are no longer in the + * initial_state + */ + if (pad_count < 3) + initial_state = false; + pad_count = 0; + } + + if (!initial_state && !idx_set) + return -ENXIO; + + /* + * If all the entries in the log were in the initial state, + * assume new padding scheme + */ + if (initial_state) + log_index[1] = 1; + + /* + * Only allow the known permutations of log/padding indices, + * i.e. (0, 1), and (0, 2) + */ + if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) + ; /* known index possibilities */ + else { + dev_err(to_dev(arena), "Found an unknown padding scheme\n"); + return -ENXIO; + } + + arena->log_index[0] = log_index[0]; + arena->log_index[1] = log_index[1]; + dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); + dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); + return 0; +} + static int btt_rtt_init(struct arena_info *arena) { arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); @@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, available -= 2 * BTT_PG_SIZE; /* The log takes a fixed amount of space based on nfree */ - logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), - BTT_PG_SIZE); + logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); available -= logsize; /* Calculate optimal split between map and data area */ @@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->mapoff = arena->dataoff + datasize; arena->logoff = arena->mapoff + mapsize; arena->info2off = arena->logoff + logsize; + + /* Default log indices are (0,1) */ + arena->log_index[0] = 0; + arena->log_index[1] = 1; return arena; } @@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt) arena->external_lba_start = cur_nlba; parse_arena_meta(arena, super, cur_off); + ret = log_set_indices(arena); + if (ret) { + dev_err(to_dev(arena), + "Unable to deduce log/padding indices\n"); + goto out; + } + mutex_init(&arena->err_lock); ret = btt_freelist_init(arena); if (ret) diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 884fbbbdd18a..db3cb6d4d0d4 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -27,6 +27,7 @@ #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) #define MAP_ENT_NORMAL 0xC0000000 +#define LOG_GRP_SIZE sizeof(struct log_group) #define LOG_ENT_SIZE sizeof(struct log_entry) #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ @@ -50,12 +51,52 @@ enum btt_init_state { INIT_READY }; +/* + * A log group represents one log 'lane', and consists of four log entries. + * Two of the four entries are valid entries, and the remaining two are + * padding. Due to an old bug in the padding location, we need to perform a + * test to determine the padding scheme being used, and use that scheme + * thereafter. + * + * In kernels prior to 4.15, 'log group' would have actual log entries at + * indices (0, 2) and padding at indices (1, 3), where as the correct/updated + * format has log entries at indices (0, 1) and padding at indices (2, 3). + * + * Old (pre 4.15) format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | lba/old/new/seq | pad | + * +-----------------+-----------------+ + * + * New format: + * +-----------------+-----------------+ + * | ent[0] | ent[1] | + * | 16B | 16B | + * | lba/old/new/seq | lba/old/new/seq | + * +-----------------------------------+ + * | ent[2] | ent[3] | + * | 16B | 16B | + * | pad | pad | + * +-----------------+-----------------+ + * + * We detect during start-up which format is in use, and set + * arena->log_index[(0, 1)] with the detected format. + */ + struct log_entry { __le32 lba; __le32 old_map; __le32 new_map; __le32 seq; - __le64 padding[2]; +}; + +struct log_group { + struct log_entry ent[4]; }; struct btt_sb { @@ -126,6 +167,7 @@ struct aligned_lock { * @debugfs_dir: Debugfs dentry * @flags: Arena flags - may signify error states. * @err_lock: Mutex for synchronizing error clearing. + * @log_index: Indices of the valid log entries in a log_group * * arena_info is a per-arena handle. Once an arena is narrowed down for an * IO, this struct is passed around for the duration of the IO. @@ -158,6 +200,7 @@ struct arena_info { /* Arena flags */ u32 flags; struct mutex err_lock; + int log_index[2]; }; /** -- cgit v1.2.3 From f55688c45442bc863f40ad678c638785b26cdce6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 Dec 2017 13:10:00 -0800 Subject: iw_cxgb4: Only validate the MSN for successful completions If the RECV CQE is in error, ignore the MSN check. This was causing recvs that were flushed into the sw cq to be completed with the wrong status (BAD_MSN instead of FLUSHED). Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index b7bfc536e00f..7ed87622e461 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, ret = -EAGAIN; goto skip_cqe; } - if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + if (unlikely(!CQE_STATUS(hw_cqe) && + CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { t4_set_wq_in_error(wq); - hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN)); - goto proc_cqe; + hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; } -- cgit v1.2.3 From 96a236ed286776554fbd227c6d2876fd3b5dc65d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Dec 2017 10:29:25 -0800 Subject: iw_cxgb4: reflect the original WR opcode in drain cqes The flush/drain logic was not retaining the original wr opcode in its completion. This can cause problems if the application uses the completion opcode to make decisions. Use bit 10 of the CQE header word to indicate the CQE is a special drain completion, and save the original WR opcode in the cqe header opcode field. Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 7 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 -- drivers/infiniband/hw/cxgb4/qp.c | 46 +++++++++++++++++++++++++++++++--- drivers/infiniband/hw/cxgb4/t4.h | 6 +++++ 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 7ed87622e461..6f2b26126c64 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -395,7 +395,7 @@ next_cqe: static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) { - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(cqe)) { WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid); return 0; } @@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, /* * Special cqe for drain WR completions... */ - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { + if (DRAIN_CQE(hw_cqe)) { *cookie = CQE_DRAIN_COOKIE(hw_cqe); *cqe = *hw_cqe; goto skip_cqe; @@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) c4iw_invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; - case C4IW_DRAIN_OPCODE: - wc->opcode = IB_WC_SEND; - break; default: pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", CQE_OPCODE(&cqe), CQE_QPID(&cqe)); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 470f97a79ebb..65dd3726ca02 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state) return IB_QPS_ERR; } -#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN - static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 38bddd02a943..21495f917bcc 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) return 0; } -static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int ib_to_fw_opcode(int ib_opcode) +{ + int opcode; + + switch (ib_opcode) { + case IB_WR_SEND_WITH_INV: + opcode = FW_RI_SEND_WITH_INV; + break; + case IB_WR_SEND: + opcode = FW_RI_SEND; + break; + case IB_WR_RDMA_WRITE: + opcode = FW_RI_RDMA_WRITE; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + opcode = FW_RI_READ_REQ; + break; + case IB_WR_REG_MR: + opcode = FW_RI_FAST_REGISTER; + break; + case IB_WR_LOCAL_INV: + opcode = FW_RI_LOCAL_INV; + break; + default: + opcode = -EINVAL; + } + return opcode; +} + +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; unsigned long flag; struct t4_cq *cq; + int opcode; schp = to_c4iw_cq(qhp->ibqp.send_cq); cq = &schp->cq; + opcode = ib_to_fw_opcode(wr->opcode); + if (opcode < 0) + return opcode; + cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(opcode) | CQE_TYPE_V(1) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&schp->lock, flag); @@ -819,6 +855,7 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } + return 0; } static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) @@ -833,9 +870,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) cqe.u.drain_cookie = wr->wr_id; cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) | - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) | + CQE_OPCODE_V(FW_RI_SEND) | CQE_TYPE_V(0) | CQE_SWCQE_V(1) | + CQE_DRAIN_V(1) | CQE_QPID_V(qhp->wq.sq.qid)); spin_lock_irqsave(&rchp->lock, flag); @@ -875,7 +913,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wr(qhp, wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index e9ea94268d51..79e8ee12c391 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -197,6 +197,11 @@ struct t4_cqe { #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M) #define CQE_SWCQE_V(x) ((x)<> CQE_DRAIN_S)) & CQE_DRAIN_M) +#define CQE_DRAIN_V(x) ((x)<> CQE_STATUS_S)) & CQE_STATUS_M) @@ -213,6 +218,7 @@ struct t4_cqe { #define CQE_OPCODE_V(x) ((x)<header))) +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header))) #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header))) #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header))) #define SQ_TYPE(x) (CQE_TYPE((x))) -- cgit v1.2.3 From d14587334580bc94d3ee11e8320e0c157f91ae8f Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 19 Dec 2017 14:02:10 -0800 Subject: iw_cxgb4: when flushing, complete all wrs in a chain If a wr chain was posted and needed to be flushed, only the first wr in the chain was completed with FLUSHED status. The rest were never completed. This caused isert to hang on shutdown due to the missing completions which left iscsi IO commands referenced, stalling the shutdown. Fixes: 4fe7c2962e11 ("iw_cxgb4: refactor sq/rq drain logic") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 21495f917bcc..d5c92fc520d6 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -858,6 +858,22 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; } +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int ret = 0; + + while (wr) { + ret = complete_sq_drain_wr(qhp, wr); + if (ret) { + *bad_wr = wr; + break; + } + wr = wr->next; + } + return ret; +} + static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; @@ -890,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +{ + while (wr) { + complete_rq_drain_wr(qhp, wr); + wr = wr->next; + } +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -913,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - err = complete_sq_drain_wr(qhp, wr); + err = complete_sq_drain_wrs(qhp, wr, bad_wr); return err; } num_wrs = t4_sq_avail(&qhp->wq); @@ -1061,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, */ if (qhp->wq.flushed) { spin_unlock_irqrestore(&qhp->lock, flag); - complete_rq_drain_wr(qhp, wr); + complete_rq_drain_wrs(qhp, wr); return err; } num_wrs = t4_rq_avail(&qhp->wq); -- cgit v1.2.3 From 17748056ce123ee37fb7382bc698fc721e3c4a09 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:49:03 -0800 Subject: RDMA/vmw_pvrdma: Call ib_umem_release on destroy QP path The QP cleanup did not previously call ib_umem_release, resulting in a user-triggerable kernel resource leak. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 10420a18d02f..dceebc623d96 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -431,6 +431,13 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&qp->refcnt); wait_event(qp->wait, !atomic_read(&qp->refcnt)); + if (!qp->is_kernel) { + if (qp->rumem) + ib_umem_release(qp->rumem); + if (qp->sumem) + ib_umem_release(qp->sumem); + } + pvrdma_page_dir_cleanup(dev, &qp->pdir); kfree(qp); -- cgit v1.2.3 From 30a366a9dabd05a0d218288b7d732649886b6a53 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:50:01 -0800 Subject: RDMA/vmw_pvrdma: Use refcount_dec_and_test to avoid warning refcount_dec generates a warning when the operation causes the refcount to hit zero. Avoid this by using refcount_dec_and_test. Fixes: 8b10ba783c9d ("RDMA/vmw_pvrdma: Add shared receive queue support") Reviewed-by: Adit Ranadive Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Bryan Tan Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 826ccb864596..a2b1a3c115f2 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -236,8 +236,8 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) dev->srq_tbl[srq->srq_handle] = NULL; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); - refcount_dec(&srq->refcnt); - wait_event(srq->wait, !refcount_read(&srq->refcnt)); + if (!refcount_dec_and_test(&srq->refcnt)) + wait_event(srq->wait, !refcount_read(&srq->refcnt)); /* There is no support for kernel clients, so this is safe. */ ib_umem_release(srq->umem); -- cgit v1.2.3 From e3524b269e451cff68b19f32b15448933a53a4f4 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Wed, 20 Dec 2017 09:51:40 -0800 Subject: RDMA/vmw_pvrdma: Avoid use after free due to QP/CQ/SRQ destroy The use of wait queues in vmw_pvrdma for handling concurrent access to a resource leaves a race condition which can cause a use after free bug. Fix this by using the pattern from other drivers, complete() protected by dec_and_test to ensure complete() is called only once. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 6 +++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 7 ++++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 17 +++++++---------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 7 ++++--- drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c | 7 ++++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 63bc2efc34eb..4f7bd3b6a315 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -94,7 +94,7 @@ struct pvrdma_cq { u32 cq_handle; bool is_kernel; atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_id_table { @@ -175,7 +175,7 @@ struct pvrdma_srq { u32 srq_handle; int npages; refcount_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_qp { @@ -197,7 +197,7 @@ struct pvrdma_qp { bool is_kernel; struct mutex mutex; /* QP state mutex. */ atomic_t refcnt; - wait_queue_head_t wait; + struct completion free; }; struct pvrdma_dev { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 3562c0c30492..e529622cefad 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); atomic_set(&cq->refcnt, 1); - init_waitqueue_head(&cq->wait); + init_completion(&cq->free); spin_lock_init(&cq->cq_lock); memset(cmd, 0, sizeof(*cmd)); @@ -230,8 +230,9 @@ err_cq: static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) { - atomic_dec(&cq->refcnt); - wait_event(cq->wait, !atomic_read(&cq->refcnt)); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); + wait_for_completion(&cq->free); if (!cq->is_kernel) ib_umem_release(cq->umem); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 1f4e18717a00..e92681878c93 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type) ibqp->event_handler(&e, ibqp->qp_context); } if (qp) { - atomic_dec(&qp->refcnt); - if (atomic_read(&qp->refcnt) == 0) - wake_up(&qp->wait); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); } } @@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type) ibcq->event_handler(&e, ibcq->cq_context); } if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt) == 0) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } } @@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) } if (srq) { if (refcount_dec_and_test(&srq->refcnt)) - wake_up(&srq->wait); + complete(&srq->free); } } @@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); if (cq) { - atomic_dec(&cq->refcnt); - if (atomic_read(&cq->refcnt)) - wake_up(&cq->wait); + if (atomic_dec_and_test(&cq->refcnt)) + complete(&cq->free); } pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index dceebc623d96..4059308e1454 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, spin_lock_init(&qp->rq.lock); mutex_init(&qp->mutex); atomic_set(&qp->refcnt, 1); - init_waitqueue_head(&qp->wait); + init_completion(&qp->free); qp->state = IB_QPS_RESET; @@ -428,8 +428,9 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - atomic_dec(&qp->refcnt); - wait_event(qp->wait, !atomic_read(&qp->refcnt)); + if (atomic_dec_and_test(&qp->refcnt)) + complete(&qp->free); + wait_for_completion(&qp->free); if (!qp->is_kernel) { if (qp->rumem) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index a2b1a3c115f2..5acebb1ef631 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, spin_lock_init(&srq->lock); refcount_set(&srq->refcnt, 1); - init_waitqueue_head(&srq->wait); + init_completion(&srq->free); dev_dbg(&dev->pdev->dev, "create shared receive queue from user space\n"); @@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) dev->srq_tbl[srq->srq_handle] = NULL; spin_unlock_irqrestore(&dev->srq_tbl_lock, flags); - if (!refcount_dec_and_test(&srq->refcnt)) - wait_event(srq->wait, !refcount_read(&srq->refcnt)); + if (refcount_dec_and_test(&srq->refcnt)) + complete(&srq->free); + wait_for_completion(&srq->free); /* There is no support for kernel clients, so this is safe. */ ib_umem_release(srq->umem); -- cgit v1.2.3 From 71a0ff65a21bf3e2c4fde208c4a635ed2bbb4e81 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 21 Dec 2017 17:38:26 +0200 Subject: IB/mlx5: Fix congestion counters in LAG mode Congestion counters are counted and queried per physical function. When working in LAG mode, CNP packets can be sent or received on both of the functions, thus congestion counters should be aggregated from the two physical functions. Fixes: e1f24a79f424 ("IB/mlx5: Support congestion related counters") Signed-off-by: Majd Dibbiny Reviewed-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 11 ------ drivers/infiniband/hw/mlx5/cmd.h | 2 - drivers/infiniband/hw/mlx5/main.c | 35 +++-------------- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 56 +++++++++++++++++++++++++++ include/linux/mlx5/driver.h | 4 ++ 5 files changed, 66 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 470995fa38d2..6f6712f87a73 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey) return err; } -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size) -{ - u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; - - MLX5_SET(query_cong_statistics_in, in, opcode, - MLX5_CMD_OP_QUERY_CONG_STATISTICS); - MLX5_SET(query_cong_statistics_in, in, clear, reset); - return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); -} - int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size) { diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index af4c24596274..78ffded7cc2c 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -37,8 +37,6 @@ #include int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); -int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, - bool reset, void *out, int out_size); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 543d0a4c8bf3..b4ef4d9b6ce5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3737,34 +3737,6 @@ free: return ret; } -static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) -{ - int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - void *out; - int ret, i; - int offset = port->cnts.num_q_counters; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen); - if (ret) - goto free; - - for (i = 0; i < port->cnts.num_cong_counters; i++) { - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - port->cnts.offsets[i + offset])); - } - -free: - kvfree(out); - return ret; -} - static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u8 port_num, int index) @@ -3782,7 +3754,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, num_counters = port->cnts.num_q_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - ret = mlx5_ib_query_cong_counters(dev, port, stats); + ret = mlx5_lag_query_cong_counters(dev->mdev, + stats->value + + port->cnts.num_q_counters, + port->cnts.num_cong_counters, + port->cnts.offsets + + port->cnts.num_q_counters); if (ret) return ret; num_counters += port->cnts.num_cong_counters; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index f26f97fe4666..582b2f18010a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); +static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, + bool reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + MLX5_SET(query_cong_statistics_in, in, clear, reset); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) { return dev->priv.lag; @@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) /* If bonded, we do not add an IB device for PF1. */ return false; } + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_lag *ldev; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(values, 0, sizeof(*values) * num_counters); + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + if (ldev && mlx5_lag_is_bonded(ldev)) { + num_ports = MLX5_MAX_PORTS; + mdev[0] = ldev->pf[0].dev; + mdev[1] = ldev->pf[1].dev; + } else { + num_ports = 1; + mdev[0] = dev; + } + + for (i = 0; i < num_ports; ++i) { + ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); + if (ret) + goto unlock; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +unlock: + mutex_unlock(&lag_mutex); + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..8846919356ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1164,6 +1164,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -- cgit v1.2.3 From 1f80bd6a6cc8358b81194e1f5fc16449947396ec Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 21 Dec 2017 17:38:27 +0200 Subject: IB/ipoib: Fix lockdep issue found on ipoib_ib_dev_heavy_flush The locking order of vlan_rwsem (LOCK A) and then rtnl (LOCK B), contradicts other flows such as ipoib_open possibly causing a deadlock. To prevent this deadlock heavy flush is called with RTNL locked and only then tries to acquire vlan_rwsem. This deadlock is possible only when there are child interfaces. [ 140.941758] ====================================================== [ 140.946276] WARNING: possible circular locking dependency detected [ 140.950950] 4.15.0-rc1+ #9 Tainted: G O [ 140.954797] ------------------------------------------------------ [ 140.959424] kworker/u32:1/146 is trying to acquire lock: [ 140.963450] (rtnl_mutex){+.+.}, at: [] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 140.970006] but task is already holding lock: [ 140.975141] (&priv->vlan_rwsem){++++}, at: [] __ipoib_ib_dev_flush+0x51/0x4e0 [ib_ipoib] [ 140.982105] which lock already depends on the new lock. [ 140.990023] the existing dependency chain (in reverse order) is: [ 140.998650] -> #1 (&priv->vlan_rwsem){++++}: [ 141.005276] down_read+0x4d/0xb0 [ 141.009560] ipoib_open+0xad/0x120 [ib_ipoib] [ 141.014400] __dev_open+0xcb/0x140 [ 141.017919] __dev_change_flags+0x1a4/0x1e0 [ 141.022133] dev_change_flags+0x23/0x60 [ 141.025695] devinet_ioctl+0x704/0x7d0 [ 141.029156] sock_do_ioctl+0x20/0x50 [ 141.032526] sock_ioctl+0x221/0x300 [ 141.036079] do_vfs_ioctl+0xa6/0x6d0 [ 141.039656] SyS_ioctl+0x74/0x80 [ 141.042811] entry_SYSCALL_64_fastpath+0x1f/0x96 [ 141.046891] -> #0 (rtnl_mutex){+.+.}: [ 141.051701] lock_acquire+0xd4/0x220 [ 141.055212] __mutex_lock+0x88/0x970 [ 141.058631] __ipoib_ib_dev_flush+0x2da/0x4e0 [ib_ipoib] [ 141.063160] __ipoib_ib_dev_flush+0x71/0x4e0 [ib_ipoib] [ 141.067648] process_one_work+0x1f5/0x610 [ 141.071429] worker_thread+0x4a/0x3f0 [ 141.074890] kthread+0x141/0x180 [ 141.078085] ret_from_fork+0x24/0x30 [ 141.081559] other info that might help us debug this: [ 141.088967] Possible unsafe locking scenario: [ 141.094280] CPU0 CPU1 [ 141.097953] ---- ---- [ 141.101640] lock(&priv->vlan_rwsem); [ 141.104771] lock(rtnl_mutex); [ 141.109207] lock(&priv->vlan_rwsem); [ 141.114032] lock(rtnl_mutex); [ 141.116800] *** DEADLOCK *** Fixes: b4b678b06f6e ("IB/ipoib: Grab rtnl lock on heavy flush when calling ndo_open/stop") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 3b96cdaf9a83..e6151a29c412 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev); if (level == IPOIB_FLUSH_HEAVY) { - rtnl_lock(); if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) ipoib_ib_dev_stop(dev); - result = ipoib_ib_dev_open(dev); - rtnl_unlock(); - if (result) + if (ipoib_ib_dev_open(dev)) return; if (netif_queue_stopped(dev)) @@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, flush_heavy); + rtnl_lock(); __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0); + rtnl_unlock(); } void ipoib_ib_dev_cleanup(struct net_device *dev) -- cgit v1.2.3 From cd95a89282ef61458c3758d70ebfbd91f303033f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 21 Dec 2017 08:52:50 -0800 Subject: selftests/bpf: fix Makefile for passing LLC to the command line Makefile has a LLC variable that is initialised to "llc", but can theoretically be overridden from the command line ("make LLC=llc-6.0"). However, this fails because for LLVM probe check, "llc" is called directly. Use the $(LLC) variable instead to fix this. Fixes: 22c8852624fc ("bpf: improve selftests and add tests for meta pointer") Signed-off-by: Quentin Monnet Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 05fc4e2e7b3a..9316e648a880 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -39,7 +39,7 @@ $(BPFOBJ): force CLANG ?= clang LLC ?= llc -PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) # Let newer LLVM versions transparently probe the kernel for availability # of full BPF instruction set. -- cgit v1.2.3 From e7cdf5c82f1773c3386b93bbcf13b9bfff29fa31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 Dec 2017 12:07:00 +0000 Subject: drm/syncobj: Stop reusing the same struct file for all syncobj -> fd The vk cts test: dEQP-VK.api.external.semaphore.opaque_fd.export_multiple_times_temporary triggers a lot of VFS: Close: file count is 0 Dave pointed out that clearing the syncobj->file from drm_syncobj_file_release() was sufficient to silence the test, but that opens a can of worm since we assumed that the syncobj->file was never unset. Stop trying to reuse the same struct file for every fd pointing to the drm_syncobj, and allocate one file for each fd instead. v2: Fixup return handling of drm_syncobj_fd_to_handle v2.1: [airlied: fix possible syncobj ref race] Reported-by: Dave Airlie Signed-off-by: Chris Wilson Tested-by: Dave Airlie Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_syncobj.c | 77 ++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index f776fc1cc543..cb4d09c70fd4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = { .release = drm_syncobj_file_release, }; -static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) -{ - struct file *file = anon_inode_getfile("syncobj_file", - &drm_syncobj_file_fops, - syncobj, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - drm_syncobj_get(syncobj); - if (cmpxchg(&syncobj->file, NULL, file)) { - /* lost the race */ - fput(file); - } - - return 0; -} - int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { - int ret; + struct file *file; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; - if (!syncobj->file) { - ret = drm_syncobj_alloc_file(syncobj); - if (ret) { - put_unused_fd(fd); - return ret; - } + file = anon_inode_getfile("syncobj_file", + &drm_syncobj_file_fops, + syncobj, 0); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } - fd_install(fd, syncobj->file); + + drm_syncobj_get(syncobj); + fd_install(fd, file); + *p_fd = fd; return 0; } @@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private, return ret; } -static struct drm_syncobj *drm_syncobj_fdget(int fd) -{ - struct file *file = fget(fd); - - if (!file) - return NULL; - if (file->f_op != &drm_syncobj_file_fops) - goto err; - - return file->private_data; -err: - fput(file); - return NULL; -}; - static int drm_syncobj_fd_to_handle(struct drm_file *file_private, int fd, u32 *handle) { - struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); + struct drm_syncobj *syncobj; + struct file *file; int ret; - if (!syncobj) + file = fget(fd); + if (!file) return -EINVAL; + if (file->f_op != &drm_syncobj_file_fops) { + fput(file); + return -EINVAL; + } + /* take a reference to put in the idr */ + syncobj = file->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, spin_unlock(&file_private->syncobj_table_lock); idr_preload_end(); - if (ret < 0) { - fput(syncobj->file); - return ret; - } - *handle = ret; - return 0; + if (ret > 0) { + *handle = ret; + ret = 0; + } else + drm_syncobj_put(syncobj); + + fput(file); + return ret; } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, -- cgit v1.2.3 From dc1c4165d189350cb51bdd3057deb6ecd164beda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Tue, 12 Dec 2017 12:02:04 +0000 Subject: KVM: PPC: Book3S: fix XIVE migration of pending interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When restoring a pending interrupt, we are setting the Q bit to force a retrigger in xive_finish_unmask(). But we also need to force an EOI in this case to reach the same initial state : P=1, Q=0. This can be done by not setting 'old_p' for pending interrupts which will inform xive_finish_unmask() that an EOI needs to be sent. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Suggested-by: Benjamin Herrenschmidt Signed-off-by: Cédric Le Goater Reviewed-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..b5e6d227a034 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1558,7 +1558,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1566,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; -- cgit v1.2.3 From 7333b5aca412d6ad02667b5a513485838a91b136 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 12 Dec 2017 18:23:56 +0100 Subject: KVM: PPC: Book3S HV: Fix pending_pri value in kvmppc_xive_get_icp() When we migrate a VM from a POWER8 host (XICS) to a POWER9 host (XICS-on-XIVE), we have an error: qemu-kvm: Unable to restore KVM interrupt controller state \ (0xff000000) for CPU 0: Invalid argument This is because kvmppc_xics_set_icp() checks the new state is internaly consistent, and especially: ... 1129 if (xisr == 0) { 1130 if (pending_pri != 0xff) 1131 return -EINVAL; ... On the other side, kvmppc_xive_get_icp() doesn't set neither the pending_pri value, nor the xisr value (set to 0) (and kvmppc_xive_set_icp() ignores the pending_pri value) As xisr is 0, pending_pri must be set to 0xff. Fixes: 5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Laurent Vivier Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_xive.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index b5e6d227a034..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) -- cgit v1.2.3 From 972bc90ed8f6008f5e2d42c72d894b2401736519 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 18 Nov 2016 12:23:46 +0530 Subject: OPP: Allow OPP table to be used for power-domains Power-domains can also have their active states and this patch enhances the OPP binding to define those. The power domains can use the OPP bindings as is, with one additional change to Allow "operating-points-v2" property to contain multiple phandles for power domain providers providing multiple domains. Reviewed-by: Ulf Hansson Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 5 +++++ Documentation/devicetree/bindings/power/power_domain.txt | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 9d733af26be7..a3953a1bb1a1 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -45,6 +45,11 @@ Devices supporting OPPs must set their "operating-points-v2" property with phandle to a OPP table in their DT node. The OPP core will use this phandle to find the operating points for the device. +This can contain more than one phandle for power domain providers that provide +multiple power domains. That is, one phandle for each power domain. If only one +phandle is available, then the same OPP table will be used for all power domains +provided by the power domain provider. + If required, this can be extended for SoC vendor specific bindings. Such bindings should be documented as Documentation/devicetree/bindings/power/-opp.txt and should have a compatible description like: "operating-points-v2-". diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 14bd9e945ff6..61549840ab3b 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -40,6 +40,12 @@ Optional properties: domain's idle states. In the absence of this property, the domain would be considered as capable of being powered-on or powered-off. +- operating-points-v2 : Phandles to the OPP tables of power domains provided by + a power domain provider. If the provider provides a single power domain only + or all the power domains provided by the provider have identical OPP tables, + then this shall contain a single phandle. Refer to ../opp/opp.txt for more + information. + Example: power: power-controller@12340000 { -- cgit v1.2.3 From e856f078bcf120e1627c750014d2612974ed81a2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 18 Dec 2017 15:23:48 +0530 Subject: OPP: Introduce "required-opp" property Devices have inter-dependencies some times. For example a device that needs to run at 800 MHz, needs another device (e.g. Its power domain) to be configured at a particular operating performance point. This patch introduces a new property "required-opp" which can be present directly in a device's node (if it doesn't need to change its OPPs), or in device's OPP nodes. More details on the property can be seen in the binding itself. Reviewed-by: Ulf Hansson Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 8 +++ .../devicetree/bindings/power/power_domain.txt | 59 ++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index a3953a1bb1a1..4e4f30288c8b 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -159,6 +159,14 @@ Optional properties: - status: Marks the node enabled/disabled. +- required-opp: This contains phandle to an OPP node in another device's OPP + table. It may contain an array of phandles, where each phandle points to an + OPP of a different device. It should not contain multiple phandles to the OPP + nodes in the same OPP table. This specifies the minimum required OPP of the + device(s), whose OPP's phandle is present in this property, for the + functioning of the current device at the current OPP (where this property is + present). + Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. / { diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 61549840ab3b..f3355313c020 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -126,4 +126,63 @@ The node above defines a typical PM domain consumer device, which is located inside a PM domain with index 0 of a power controller represented by a node with the label "power". +Optional properties: +- required-opp: This contains phandle to an OPP node in another device's OPP + table. It may contain an array of phandles, where each phandle points to an + OPP of a different device. It should not contain multiple phandles to the OPP + nodes in the same OPP table. This specifies the minimum required OPP of the + device(s), whose OPP's phandle is present in this property, for the + functioning of the current device at the current OPP (where this property is + present). + +Example: +- OPP table for domain provider that provides two domains. + + domain0_opp_table: opp-table0 { + compatible = "operating-points-v2"; + + domain0_opp_0: opp-1000000000 { + opp-hz = /bits/ 64 <1000000000>; + opp-microvolt = <975000 970000 985000>; + }; + domain0_opp_1: opp-1100000000 { + opp-hz = /bits/ 64 <1100000000>; + opp-microvolt = <1000000 980000 1010000>; + }; + }; + + domain1_opp_table: opp-table1 { + compatible = "operating-points-v2"; + + domain1_opp_0: opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <975000 970000 985000>; + }; + domain1_opp_1: opp-1300000000 { + opp-hz = /bits/ 64 <1300000000>; + opp-microvolt = <1000000 980000 1010000>; + }; + }; + + power: power-controller@12340000 { + compatible = "foo,power-controller"; + reg = <0x12340000 0x1000>; + #power-domain-cells = <1>; + operating-points-v2 = <&domain0_opp_table>, <&domain1_opp_table>; + }; + + leaky-device0@12350000 { + compatible = "foo,i-leak-current"; + reg = <0x12350000 0x1000>; + power-domains = <&power 0>; + required-opp = <&domain0_opp_0>; + }; + + leaky-device1@12350000 { + compatible = "foo,i-leak-current"; + reg = <0x12350000 0x1000>; + power-domains = <&power 1>; + required-opp = <&domain1_opp_1>; + }; + [1]. Documentation/devicetree/bindings/power/domain-idle-state.txt -- cgit v1.2.3 From 506e8a912661c97b41adc8a286b875d01323ec45 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Dec 2017 22:35:19 +0100 Subject: ARM: dts: ls1021a: fix incorrect clock references dtc warns about two 'clocks' properties that have an extraneous '1' at the end: arch/arm/boot/dts/ls1021a-qds.dtb: Warning (clocks_property): arch/arm/boot/dts/ls1021a-twr.dtb: Warning (clocks_property): Property 'clocks', cell 1 is not a phandle reference in /soc/i2c@2180000/mux@77/i2c@4/sgtl5000@2a arch/arm/boot/dts/ls1021a-qds.dtb: Warning (clocks_property): Missing property '#clock-cells' in node /soc/interrupt-controller@1400000 or bad phandle (referred from /soc/i2c@2180000/mux@77/i2c@4/sgtl5000@2a:clocks[1]) Property 'clocks', cell 1 is not a phandle reference in /soc/i2c@2190000/sgtl5000@a arch/arm/boot/dts/ls1021a-twr.dtb: Warning (clocks_property): Missing property '#clock-cells' in node /soc/interrupt-controller@1400000 or bad phandle (referred from /soc/i2c@2190000/sgtl5000@a:clocks[1]) The clocks that get referenced here are fixed-rate, so they do not take any argument, and dtc interprets the next cell as a phandle, which is invalid. Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ls1021a-qds.dts | 2 +- arch/arm/boot/dts/ls1021a-twr.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts index 940875316d0f..67b4de0e3439 100644 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@ -215,7 +215,7 @@ reg = <0x2a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; }; diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index a8b148ad1dd2..44715c8ef756 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -187,7 +187,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; -- cgit v1.2.3 From fbd90b4cae105fbd8364fa1ce3f41d0c06296f58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Dec 2017 22:45:24 +0100 Subject: ARM: dts: tango4: remove bogus interrupt-controller property dtc points out that the parent node of the interrupt controllers is not actually an interrupt controller itself, and lacks an #interrupt-cells property: arch/arm/boot/dts/tango4-vantage-1172.dtb: Warning (interrupts_property): Missing #interrupt-cells in interrupt-parent /soc/interrupt-controller@6e000 This removes the annotation. Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/tango4-common.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi index 0ec1b0a317b4..ff72a8efb73d 100644 --- a/arch/arm/boot/dts/tango4-common.dtsi +++ b/arch/arm/boot/dts/tango4-common.dtsi @@ -156,7 +156,6 @@ reg = <0x6e000 0x400>; ranges = <0 0x6e000 0x400>; interrupt-parent = <&gic>; - interrupt-controller; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From d042566d8c704e1ecec370300545d4a409222e39 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Dec 2017 11:10:26 +0100 Subject: crypto: chelsio - select CRYPTO_GF128MUL Without the gf128mul library support, we can run into a link error: drivers/crypto/chelsio/chcr_algo.o: In function `chcr_update_tweak': chcr_algo.c:(.text+0x7e0): undefined reference to `gf128mul_x8_ble' This adds a Kconfig select statement for it, next to the ones we already have. Cc: Fixes: b8fd1f4170e7 ("crypto: chcr - Add ctr mode and process large sg entries for cipher") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 3e104f5aa0c2..b56b3f711d94 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC + select CRYPTO_GF128MUL ---help--- The Chelsio Crypto Co-processor driver for T6 adapters. -- cgit v1.2.3 From e57121d08c38dabec15cf3e1e2ad46721af30cae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Dec 2017 12:15:17 -0800 Subject: crypto: chacha20poly1305 - validate the digest size If the rfc7539 template was instantiated with a hash algorithm with digest size larger than 16 bytes (POLY1305_DIGEST_SIZE), then the digest overran the 'tag' buffer in 'struct chachapoly_req_ctx', corrupting the subsequent memory, including 'cryptlen'. This caused a crash during crypto_skcipher_decrypt(). Fix it by, when instantiating the template, requiring that the underlying hash algorithm has the digest size expected for Poly1305. Reproducer: #include #include #include int main() { int algfd, reqfd; struct sockaddr_alg addr = { .salg_type = "aead", .salg_name = "rfc7539(chacha20,sha256)", }; unsigned char buf[32] = { 0 }; algfd = socket(AF_ALG, SOCK_SEQPACKET, 0); bind(algfd, (void *)&addr, sizeof(addr)); setsockopt(algfd, SOL_ALG, ALG_SET_KEY, buf, sizeof(buf)); reqfd = accept(algfd, 0, 0); write(reqfd, buf, 16); read(reqfd, buf, 16); } Reported-by: syzbot Fixes: 71ebc4d1b27d ("crypto: chacha20poly1305 - Add a ChaCha20-Poly1305 AEAD construction, RFC7539") Cc: # v4.2+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/chacha20poly1305.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index db1bc3147bc4..600afa99941f 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) -- cgit v1.2.3 From af955bf15d2c27496b0269b1f05c26f758c68314 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 19 Dec 2017 10:27:24 +0000 Subject: crypto: af_alg - Fix race around ctx->rcvused by making it atomic_t This variable was increased and decreased without any protection. Result was an occasional misscount and negative wrap around resulting in false resource allocation failures. Fixes: 7d2c3f54e6f6 ("crypto: af_alg - remove locking in async callback") Signed-off-by: Jonathan Cameron Reviewed-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 4 ++-- crypto/algif_aead.c | 2 +- crypto/algif_skcipher.c | 2 +- include/crypto/if_alg.h | 5 +++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f1a2caf1b59b..d3f1c431724b 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) unsigned int i; list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { - ctx->rcvused -= rsgl->sg_num_bytes; + atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused); af_alg_free_sg(&rsgl->sgl); list_del(&rsgl->list); if (rsgl != &areq->first_rsgl) @@ -1162,7 +1162,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, areq->last_rsgl = rsgl; len += err; - ctx->rcvused += err; + atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; iov_iter_advance(&msg->msg_iter, err); } diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index b73db2b27656..20df8c1b6851 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -571,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index baef9bfccdda..c5c47b680152 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -390,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 38d9c5861ed8..f38227a78eae 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -150,7 +151,7 @@ struct af_alg_ctx { struct crypto_wait wait; size_t used; - size_t rcvused; + atomic_t rcvused; bool more; bool merge; @@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk) struct af_alg_ctx *ctx = ask->private; return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) - - ctx->rcvused, 0); + atomic_read(&ctx->rcvused), 0); } /** -- cgit v1.2.3 From 203f45003a3d03eea8fa28d74cfc74c354416fdb Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 19 Dec 2017 19:09:07 +0100 Subject: crypto: n2 - cure use after free queue_cache_init is first called for the Control Word Queue (n2_crypto_probe). At that time, queue_cache[0] is NULL and a new kmem_cache will be allocated. If the subsequent n2_register_algs call fails, the kmem_cache will be released in queue_cache_destroy, but queue_cache_init[0] is not set back to NULL. So when the Module Arithmetic Unit gets probed next (n2_mau_probe), queue_cache_init will not allocate a kmem_cache again, but leave it as its bogus value, causing a BUG() to trigger when queue_cache[0] is eventually passed to kmem_cache_zalloc: n2_crypto: Found N2CP at /virtual-devices@100/n2cp@7 n2_crypto: Registered NCS HVAPI version 2.0 called queue_cache_init n2_crypto: md5 alg registration failed n2cp f028687c: /virtual-devices@100/n2cp@7: Unable to register algorithms. called queue_cache_destroy n2cp: probe of f028687c failed with error -22 n2_crypto: Found NCP at /virtual-devices@100/ncp@6 n2_crypto: Registered NCS HVAPI version 2.0 called queue_cache_init kernel BUG at mm/slab.c:2993! Call Trace: [0000000000604488] kmem_cache_alloc+0x1a8/0x1e0 (inlined) kmem_cache_zalloc (inlined) new_queue (inlined) spu_queue_setup (inlined) handle_exec_unit [0000000010c61eb4] spu_mdesc_scan+0x1f4/0x460 [n2_crypto] [0000000010c62b80] n2_mau_probe+0x100/0x220 [n2_crypto] [000000000084b174] platform_drv_probe+0x34/0xc0 Cc: Signed-off-by: Jan Engelhardt Acked-by: David S. Miller Signed-off-by: Herbert Xu --- drivers/crypto/n2_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 48de52cf2ecc..662e709812cc 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1625,6 +1625,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static long spu_queue_register_workfn(void *arg) -- cgit v1.2.3 From d76c68109f37cb85b243a1cf0f40313afd2bae68 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 20 Dec 2017 14:28:25 -0800 Subject: crypto: pcrypt - fix freeing pcrypt instances pcrypt is using the old way of freeing instances, where the ->free() method specified in the 'struct crypto_template' is passed a pointer to the 'struct crypto_instance'. But the crypto_instance is being kfree()'d directly, which is incorrect because the memory was actually allocated as an aead_instance, which contains the crypto_instance at a nonzero offset. Thus, the wrong pointer was being kfree()'d. Fix it by switching to the new way to free aead_instance's where the ->free() method is specified in the aead_instance itself. Reported-by: syzbot Fixes: 0496f56065e0 ("crypto: pcrypt - Add support for new AEAD interface") Cc: # v4.2+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/pcrypt.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe25..f8ec3d4ba4a8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; -- cgit v1.2.3 From 87c059e9c39dae20b8b9bd19d9ec55a6d6c10468 Mon Sep 17 00:00:00 2001 From: Bogdan Mirea Date: Thu, 21 Dec 2017 17:18:58 +0200 Subject: arm64: dts: renesas: salvator-x: Remove renesas, no-ether-link property The present change is a bug fix for AVB link iteratively up/down. Steps to reproduce: - start AVB TX stream (Using aplay via MSE), - disconnect+reconnect the eth cable, - after a reconnection the eth connection goes iteratively up/down without user interaction, - this may heal after some seconds or even stay for minutes. As the documentation specifies, the "renesas,no-ether-link" option should be used when a board does not provide a proper AVB_LINK signal. There is no need for this option enabled on RCAR H3/M3 Salvator-X/XS and ULCB starter kits since the AVB_LINK is correctly handled by HW. Choosing to keep or remove the "renesas,no-ether-link" option will have impact on the code flow in the following ways: - keeping this option enabled may lead to unexpected behavior since the RX & TX are enabled/disabled directly from adjust_link function without any HW interrogation, - removing this option, the RX & TX will only be enabled/disabled after HW interrogation. The HW check is made through the LMON pin in PSR register which specifies AVB_LINK signal value (0 - at low level; 1 - at high level). In conclusion, the present change is also a safety improvement because it removes the "renesas,no-ether-link" option leading to a proper way of detecting the link state based on HW interrogation and not on software heuristic. Fixes: dc36965a8905 ("arm64: dts: r8a7796: salvator-x: Enable EthernetAVB") Fixes: 6fa501c549aa ("arm64: dts: r8a7795: enable EthernetAVB on Salvator-X") Signed-off-by: Bogdan Mirea Signed-off-by: Vladimir Zapolskiy Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/salvator-common.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a298df74ca6c..dbe2648649db 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -255,7 +255,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; -- cgit v1.2.3 From bbc25bee37d2b32cf3a1fab9195b6da3a185614a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 5 Dec 2017 23:31:35 +0000 Subject: lib/mpi: Fix umul_ppmm() for MIPS64r6 Current MIPS64r6 toolchains aren't able to generate efficient DMULU/DMUHU based code for the C implementation of umul_ppmm(), which performs an unsigned 64 x 64 bit multiply and returns the upper and lower 64-bit halves of the 128-bit result. Instead it widens the 64-bit inputs to 128-bits and emits a __multi3 intrinsic call to perform a 128 x 128 multiply. This is both inefficient, and it results in a link error since we don't include __multi3 in MIPS linux. For example commit 90a53e4432b1 ("cfg80211: implement regdb signature checking") merged in v4.15-rc1 recently broke the 64r6_defconfig and 64r6el_defconfig builds by indirectly selecting MPILIB. The same build errors can be reproduced on older kernels by enabling e.g. CRYPTO_RSA: lib/mpi/generic_mpih-mul1.o: In function `mpihelp_mul_1': lib/mpi/generic_mpih-mul1.c:50: undefined reference to `__multi3' lib/mpi/generic_mpih-mul2.o: In function `mpihelp_addmul_1': lib/mpi/generic_mpih-mul2.c:49: undefined reference to `__multi3' lib/mpi/generic_mpih-mul3.o: In function `mpihelp_submul_1': lib/mpi/generic_mpih-mul3.c:49: undefined reference to `__multi3' lib/mpi/mpih-div.o In function `mpihelp_divrem': lib/mpi/mpih-div.c:205: undefined reference to `__multi3' lib/mpi/mpih-div.c:142: undefined reference to `__multi3' Therefore add an efficient MIPS64r6 implementation of umul_ppmm() using inline assembly and the DMULU/DMUHU instructions, to prevent __multi3 calls being emitted. Fixes: 7fd08ca58ae6 ("MIPS: Add build support for the MIPS R6 ISA") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-mips@linux-mips.org Cc: linux-crypto@vger.kernel.org Signed-off-by: Herbert Xu --- lib/mpi/longlong.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index 57fd45ab7af1..08c60d10747f 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -671,7 +671,23 @@ do { \ ************** MIPS/64 ************** ***************************************/ #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 +/* + * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C + * code below, so we special case MIPS64r6 until the compiler can do better. + */ +#define umul_ppmm(w1, w0, u, v) \ +do { \ + __asm__ ("dmulu %0,%1,%2" \ + : "=d" ((UDItype)(w0)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ + __asm__ ("dmuhu %0,%1,%2" \ + : "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))); \ +} while (0) +#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ -- cgit v1.2.3 From 7d2901f809c110bd9a261e879d59efe62e3bc758 Mon Sep 17 00:00:00 2001 From: Bogdan Mirea Date: Thu, 21 Dec 2017 17:18:59 +0200 Subject: arm64: dts: renesas: ulcb: Remove renesas, no-ether-link property The present change is a bug fix for AVB link iteratively up/down. Steps to reproduce: - start AVB TX stream (Using aplay via MSE), - disconnect+reconnect the eth cable, - after a reconnection the eth connection goes iteratively up/down without user interaction, - this may heal after some seconds or even stay for minutes. As the documentation specifies, the "renesas,no-ether-link" option should be used when a board does not provide a proper AVB_LINK signal. There is no need for this option enabled on RCAR H3/M3 Salvator-X/XS and ULCB starter kits since the AVB_LINK is correctly handled by HW. Choosing to keep or remove the "renesas,no-ether-link" option will have impact on the code flow in the following ways: - keeping this option enabled may lead to unexpected behavior since the RX & TX are enabled/disabled directly from adjust_link function without any HW interrogation, - removing this option, the RX & TX will only be enabled/disabled after HW interrogation. The HW check is made through the LMON pin in PSR register which specifies AVB_LINK signal value (0 - at low level; 1 - at high level). In conclusion, the present change is also a safety improvement because it removes the "renesas,no-ether-link" option leading to a proper way of detecting the link state based on HW interrogation and not on software heuristic. Fixes: dc36965a8905 ("arm64: dts: r8a7796: salvator-x: Enable EthernetAVB") Fixes: 6fa501c549aa ("arm64: dts: r8a7795: enable EthernetAVB on Salvator-X") Signed-off-by: Bogdan Mirea Signed-off-by: Vladimir Zapolskiy Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/ulcb.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0d85b315ce71..73439cf48659 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -145,7 +145,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; -- cgit v1.2.3 From 1eb7b40386c97f6c4d1c62931bf306f4535a4bd6 Mon Sep 17 00:00:00 2001 From: Ofer Heifetz Date: Mon, 11 Dec 2017 12:10:55 +0100 Subject: crypto: inside-secure - per request invalidation When an invalidation request is needed we currently override the context .send and .handle_result helpers. This is wrong as under high load other requests can already be queued and overriding the context helpers will make them execute the wrong .send and .handle_result functions. This commit fixes this by adding a needs_inv flag in the request to choose the action to perform when sending requests or handling their results. This flag will be set when needed (i.e. when the context flag will be set). Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Ofer Heifetz [Antoine: commit message, and removed non related changes from the original commit] Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_cipher.c | 71 +++++++++++++++++++++----- drivers/crypto/inside-secure/safexcel_hash.c | 67 +++++++++++++++++++----- 2 files changed, 111 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 5438552bc6d7..9ea24868d860 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -14,6 +14,7 @@ #include #include +#include #include "safexcel.h" @@ -33,6 +34,10 @@ struct safexcel_cipher_ctx { unsigned int key_len; }; +struct safexcel_cipher_req { + bool needs_inv; +}; + static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, struct crypto_async_request *async, struct safexcel_command_desc *cdesc, @@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx, return 0; } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct skcipher_request *req = skcipher_request_cast(async); struct safexcel_result_desc *rdesc; @@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async, spin_unlock_bh(&priv->ring[ring].egress_lock); request->req = &req->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = n_rdesc; @@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_aes_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return ndesc; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int err; + + if (sreq->needs_inv) { + sreq->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_cipher_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, struct safexcel_crypto_priv *priv = ctx->priv; int ret; - ctx->base.handle_result = safexcel_handle_inv_result; - ret = safexcel_invalidate_cache(async, &ctx->base, priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -381,11 +401,29 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int ret; + + if (sreq->needs_inv) + ret = safexcel_cipher_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_aes_send(async, ring, request, + commands, results); + return ret; +} + static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; struct skcipher_request req; + struct safexcel_cipher_req *sreq = skcipher_request_ctx(&req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; @@ -399,7 +437,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); ctx = crypto_tfm_ctx(req.base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_cipher_send_inv; + sreq->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); crypto_enqueue_request(&priv->ring[ring].queue, &req.base); @@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req, enum safexcel_cipher_direction dir, u32 mode) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; + sreq->needs_inv = false; ctx->direction = dir; ctx->mode = mode; if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_cipher_send_inv; + if (ctx->base.needs_inv) { + sreq->needs_inv = true; + ctx->base.needs_inv = false; + } } else { ctx->base.ring = safexcel_select_ring(priv); - ctx->base.send = safexcel_aes_send; - ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, EIP197_GFP_FLAGS(req->base), &ctx->base.ctxr_dma); @@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm) alg.skcipher.base); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_send; + ctx->base.handle_result = safexcel_handle_result; + + crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm), + sizeof(struct safexcel_cipher_req)); return 0; } diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 74feb6227101..79fe149804d3 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -32,6 +32,7 @@ struct safexcel_ahash_req { bool last_req; bool finish; bool hmac; + bool needs_inv; u8 state_sz; /* expected sate size, only set once */ u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; @@ -119,9 +120,9 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx, } } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); @@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, return 1; } -static int safexcel_ahash_send(struct crypto_async_request *async, int ring, - struct safexcel_request *request, int *commands, - int *results) +static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, + struct safexcel_request *request, + int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); @@ -292,7 +293,6 @@ send_command: req->processed += len; request->req = &areq->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = 1; @@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_ahash_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return 1; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int err; + + if (req->needs_inv) { + req->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_ahash_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); int ret; - ctx->base.handle_result = safexcel_handle_inv_result; ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -412,11 +429,29 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_ahash_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int ret; + + if (req->needs_inv) + ret = safexcel_ahash_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_ahash_send_req(async, ring, request, + commands, results); + return ret; +} + static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) { struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; struct ahash_request req; + struct safexcel_ahash_req *rctx = ahash_request_ctx(&req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; @@ -430,7 +465,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); ctx = crypto_tfm_ctx(req.base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_ahash_send_inv; + rctx->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); crypto_enqueue_request(&priv->ring[ring].queue, &req.base); @@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; - ctx->base.send = safexcel_ahash_send; + req->needs_inv = false; if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq); if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_ahash_send_inv; + if (ctx->base.needs_inv) { + ctx->base.needs_inv = false; + req->needs_inv = true; + } } else { ctx->base.ring = safexcel_select_ring(priv); ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, @@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) struct safexcel_alg_template, alg.ahash); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_ahash_send; + ctx->base.handle_result = safexcel_handle_result; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct safexcel_ahash_req)); -- cgit v1.2.3 From 0a02dcca126280595950f3ea809f77c9cb0a235c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Mon, 11 Dec 2017 12:10:56 +0100 Subject: crypto: inside-secure - free requests even if their handling failed This patch frees the request private data even if its handling failed, as it would never be freed otherwise. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Suggested-by: Ofer Heifetz Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 89ba9e85c0f3..4bcef78a08aa 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv ndesc = ctx->handle_result(priv, ring, sreq->req, &should_complete, &ret); if (ndesc < 0) { + kfree(sreq); dev_err(priv->dev, "failed to handle result (%d)", ndesc); return; } -- cgit v1.2.3 From 7cad2fabd5691dbb17762877d4e7f236fe4bc181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Mon, 11 Dec 2017 12:10:57 +0100 Subject: crypto: inside-secure - fix request allocations in invalidation path This patch makes use of the SKCIPHER_REQUEST_ON_STACK and AHASH_REQUEST_ON_STACK helpers to allocate enough memory to contain both the crypto request structures and their embedded context (__ctx). Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Suggested-by: Ofer Heifetz Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_cipher.c | 16 ++++++++-------- drivers/crypto/inside-secure/safexcel_hash.c | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 9ea24868d860..fcc0a606d748 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -422,25 +422,25 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct skcipher_request req; - struct safexcel_cipher_req *sreq = skcipher_request_ctx(&req); + SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm)); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct skcipher_request)); + memset(req, 0, sizeof(struct skcipher_request)); /* create invalidation request */ init_completion(&result.completion); - skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + safexcel_inv_complete, &result); - skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; sreq->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 79fe149804d3..55ff8a340b11 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -450,25 +450,25 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) { struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct ahash_request req; - struct safexcel_ahash_req *rctx = ahash_request_ctx(&req); + AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm)); + struct safexcel_ahash_req *rctx = ahash_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct ahash_request)); + memset(req, 0, sizeof(struct ahash_request)); /* create invalidation request */ init_completion(&result.completion); - ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, safexcel_inv_complete, &result); - ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; rctx->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) -- cgit v1.2.3 From 2973633e9f09311e849f975d969737af81a521ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20T=C3=A9nart?= Date: Mon, 11 Dec 2017 12:10:58 +0100 Subject: crypto: inside-secure - do not use areq->result for partial results This patches update the SafeXcel driver to stop using the crypto ahash_request result field for partial results (i.e. on updates). Instead the driver local safexcel_ahash_req state field is used, and only on final operations the ahash_request result buffer is updated. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 55ff8a340b11..0c5a5820b06e 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -35,7 +35,7 @@ struct safexcel_ahash_req { bool needs_inv; u8 state_sz; /* expected sate size, only set once */ - u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; + u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32)); u64 len; u64 processed; @@ -128,7 +128,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); - int cache_len, result_sz = sreq->state_sz; + int cache_len; *ret = 0; @@ -149,8 +149,8 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin spin_unlock_bh(&priv->ring[ring].egress_lock); if (sreq->finish) - result_sz = crypto_ahash_digestsize(ahash); - memcpy(sreq->state, areq->result, result_sz); + memcpy(areq->result, sreq->state, + crypto_ahash_digestsize(ahash)); dma_unmap_sg(priv->dev, areq->src, sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); @@ -274,7 +274,7 @@ send_command: /* Add the token */ safexcel_hash_token(first_cdesc, len, req->state_sz); - ctx->base.result_dma = dma_map_single(priv->dev, areq->result, + ctx->base.result_dma = dma_map_single(priv->dev, req->state, req->state_sz, DMA_FROM_DEVICE); if (dma_mapping_error(priv->dev, ctx->base.result_dma)) { ret = -EINVAL; -- cgit v1.2.3 From 322f74ede933b3e2cb78768b6a6fdbfbf478a0c1 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:44 +0800 Subject: ALSA: hda - Add MIC_NO_PRESENCE fixup for 2 HP machines There is a headset jack on the front panel, when we plug a headset into it, the headset mic can't trigger unsol events, and read_pin_sense() can't detect its presence too. So add this fixup to fix this issue. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index a81aacf684b2..37e1cf8218ff 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -271,6 +271,8 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HEADSET_MIC, + CXT_FIXUP_HP_MIC_NO_PRESENCE, }; /* for hda_fixup_thinkpad_acpi() */ @@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec, } } +static void cxt_fixup_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct conexant_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + break; + } +} + /* OPLC XO 1.5 fixup */ /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors) @@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_headset_mic, + }, + [CXT_FIXUP_HP_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, + { } + }, + .chained = true, + .chain_id = CXT_FIXUP_HEADSET_MIC, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), -- cgit v1.2.3 From 285d5ddcffafa5d5e68c586f4c9eaa8b24a2897d Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:45 +0800 Subject: ALSA: hda - fix headset mic detection issue on a Dell machine It has the codec alc256, and add its pin definition to pin quirk table to let it apply ALC255_FIXUP_DELL1_MIC_NO_PRESENCE. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6a4db00511ab..682858548b9b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6585,6 +6585,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x1b, 0x01011020}, {0x21, 0x02211010}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60160}, {0x14, 0x90170120}, -- cgit v1.2.3 From 8da5bbfc7cbba909f4f32d5e1dda3750baa5d853 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 22 Dec 2017 11:17:46 +0800 Subject: ALSA: hda - change the location for one mic on a Lenovo machine There are two front mics on this machine, and current driver assign the same name Mic to both of them, but pulseaudio can't handle them. As a workaround, we change the location for one of them, then the driver will assign "Front Mic" and "Mic" for them. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 682858548b9b..1522ba31e16d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6328,6 +6328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), -- cgit v1.2.3 From a36c2638380c0a4676647a1f553b70b20d3ebce1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Dec 2017 10:45:07 +0100 Subject: ALSA: hda: Drop useless WARN_ON() Since the commit 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") cleared hdac_acomp pointer, the WARN_ON() non-NULL check in snd_hdac_i915_register_notifier() may give a false-positive warning, as the function gets called no matter whether the component is registered or not. For fixing it, let's get rid of the spurious WARN_ON(). Fixes: 97cc2ed27e5a ("ALSA: hda - Fix yet another i915 pointer leftover in error path") Cc: Reported-by: Kouta Okamoto Signed-off-by: Takashi Iwai --- sound/hda/hdac_i915.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c index 038a180d3f81..cbe818eda336 100644 --- a/sound/hda/hdac_i915.c +++ b/sound/hda/hdac_i915.c @@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data) */ int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops) { - if (WARN_ON(!hdac_acomp)) + if (!hdac_acomp) return -ENODEV; hdac_acomp->audio_ops = aops; -- cgit v1.2.3 From 32aa144fc32abfcbf7140f473dfbd94c5b9b4105 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 15 Dec 2017 13:14:31 +0100 Subject: KVM: s390: fix cmma migration for multiple memory slots When multiple memory slots are present the cmma migration code does not allocate enough memory for the bitmap. The memory slots are sorted in reverse order, so we must use gfn and size of slot[0] instead of the last one. Signed-off-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index efa439f6ffb3..abcd24fdde3f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * Get the first slot. They are reverse sorted by base_gfn, so + * the first slot is also the one at the end of the address + * space. We have verified above that at least one slot is + * present. */ - ms = slots->memslots + slots->used_slots - 1; + ms = slots->memslots; /* round up so we only use full longs */ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); /* allocate enough bytes to store all the bits */ -- cgit v1.2.3 From c2cf265d860882b51a200e4a7553c17827f2b730 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Dec 2017 09:18:22 +0100 Subject: KVM: s390: prevent buffer overrun on memory hotplug during migration We must not go beyond the pre-allocated buffer. This can happen when a new memory slot is added during migration. Reported-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 572496c688cc..0714bfa56da0 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc) { + if (orc && gfn < ms->bitmap_size) { /* increment only if we are really flipping the bit to 1 */ if (!test_and_set_bit(gfn, ms->pgste_bitmap)) atomic64_inc(&ms->dirty_pages); -- cgit v1.2.3 From 8bb65fc06c08f027980a917648e1cf6e4d51c5ad Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 6 Dec 2017 11:37:45 -0600 Subject: gpio: gpio-reg: fix build Revert changes introduced by commit f0fbe7bce733 ("gpio: Move irqdomain into struct gpio_irq_chip") as they are not aplicable to this driver. Reported-by: Russell King - ARM Linux Fixes: f0fbe7bce733 ("gpio: Move irqdomain into struct gpio_irq_chip") Signed-off-by: Grygorii Strashko Signed-off-by: Linus Walleij --- drivers/gpio/gpio-reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index 23e771dba4c1..e85903eddc68 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset) struct gpio_reg *r = to_gpio_reg(gc); int irq = r->irqs[offset]; - if (irq >= 0 && r->irq.domain) - irq = irq_find_mapping(r->irq.domain, irq); + if (irq >= 0 && r->irqdomain) + irq = irq_find_mapping(r->irqdomain, irq); return irq; } -- cgit v1.2.3 From 822703354774ec935169cbbc8d503236bcb54fda Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 15 Dec 2017 15:02:33 +0100 Subject: gpio: fix "gpio-line-names" property retrieval Following commit 9427ecbed46cc ("gpio: Rework of_gpiochip_set_names() to use device property accessors"), "gpio-line-names" DT property is not retrieved anymore when chip->parent is not set by the driver. This is due to OF based property reads having been replaced by device based property reads. This patch fixes that by making use of fwnode_property_read_string_array() instead of device_property_read_string_array() and handing over either of_fwnode_handle(chip->of_node) or dev_fwnode(chip->parent) to that function. Fixes: 9427ecbed46cc ("gpio: Rework of_gpiochip_set_names() to use device property accessors") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 2 +- drivers/gpio/gpiolib-devprop.c | 17 +++++++---------- drivers/gpio/gpiolib-of.c | 3 ++- drivers/gpio/gpiolib.h | 3 ++- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index eb4528c87c0b..d6f3d9ee1350 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) } if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent)); acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 27f383bda7d9..f748aa3e77f7 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -19,30 +19,27 @@ /** * devprop_gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible + * @fwnode: Property Node containing the gpio-line-names property * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ -void devprop_gpiochip_set_names(struct gpio_chip *chip) +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode) { struct gpio_device *gdev = chip->gpiodev; const char **names; int ret, i; - if (!chip->parent) { - dev_warn(&gdev->dev, "GPIO chip parent is NULL\n"); - return; - } - - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", NULL, 0); if (ret < 0) return; if (ret != gdev->ngpio) { - dev_warn(chip->parent, + dev_warn(&gdev->dev, "names %d do not match number of GPIOs %d\n", ret, gdev->ngpio); return; @@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return; - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, gdev->ngpio); if (ret < 0) { - dev_warn(chip->parent, "failed to read GPIO line names\n"); + dev_warn(&gdev->dev, "failed to read GPIO line names\n"); kfree(names); return; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0d59e61b52f..72a0695d2ac3 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip) /* If the chip defines names itself, these take precedence */ if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, + of_fwnode_handle(chip->of_node)); of_node_get(chip->of_node); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index af48322839c3..6c44d1652139 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) return desc - &desc->gdev->descs[0]; } -void devprop_gpiochip_set_names(struct gpio_chip *chip); +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode); /* With descriptor prefix */ -- cgit v1.2.3 From ac769ab17282e4ca1b95fdc22c58975cba8f3619 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:37:13 +0000 Subject: ASoC: wm2200: use snd_soc_codec_get_drvdata() snd_soc_codec_get_drvdata() is common function to get private data. Let's use it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm2200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c index d83dab57a1d1..2d03db6efb79 100644 --- a/sound/soc/codecs/wm2200.c +++ b/sound/soc/codecs/wm2200.c @@ -1550,7 +1550,7 @@ static const struct snd_soc_dapm_route wm2200_dapm_routes[] = { static int wm2200_probe(struct snd_soc_codec *codec) { - struct wm2200_priv *wm2200 = dev_get_drvdata(codec->dev); + struct wm2200_priv *wm2200 = snd_soc_codec_get_drvdata(codec); int ret; wm2200->codec = codec; -- cgit v1.2.3 From 0e2d95aadd5c3bddfa4145b964527df12d89ec6a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:37:54 +0000 Subject: ASoC: wm2200: don't use snd_soc_dai::symmetric_rates wm2200 is the only user of snd_soc_dai::symmetric_rates. Now, wm2200 is using single DAI on Component. Thus, wm2200_priv : snd_soc_dai : snd_soc_component are 1 : 1 : 1. We can replace snd_soc_dai::symmetric_rates on wm2200_priv. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/wm2200.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c index 2d03db6efb79..5c2f5727244d 100644 --- a/sound/soc/codecs/wm2200.c +++ b/sound/soc/codecs/wm2200.c @@ -98,6 +98,8 @@ struct wm2200_priv { int rev; int sysclk; + + unsigned int symmetric_rates:1; }; #define WM2200_DSP_RANGE_BASE (WM2200_MAX_REGISTER + 1) @@ -1758,7 +1760,7 @@ static int wm2200_hw_params(struct snd_pcm_substream *substream, lrclk = bclk_rates[bclk] / params_rate(params); dev_dbg(codec->dev, "Setting %dHz LRCLK\n", bclk_rates[bclk] / lrclk); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK || - dai->symmetric_rates) + wm2200->symmetric_rates) snd_soc_update_bits(codec, WM2200_AUDIO_IF_1_7, WM2200_AIF1RX_BCPF_MASK, lrclk); else @@ -2059,13 +2061,14 @@ static int wm2200_set_fll(struct snd_soc_codec *codec, int fll_id, int source, static int wm2200_dai_probe(struct snd_soc_dai *dai) { struct snd_soc_codec *codec = dai->codec; + struct wm2200_priv *wm2200 = snd_soc_codec_get_drvdata(codec); unsigned int val = 0; int ret; ret = snd_soc_read(codec, WM2200_GPIO_CTRL_1); if (ret >= 0) { if ((ret & WM2200_GP1_FN_MASK) != 0) { - dai->symmetric_rates = true; + wm2200->symmetric_rates = true; val = WM2200_AIF1TX_LRCLK_SRC; } } else { -- cgit v1.2.3 From 574d31d013d977be1cfe909da6c227c950856822 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 20 Dec 2017 01:38:26 +0000 Subject: ASoC: soc-dai.h: remove symmetric_xxx from snd_soc_dai ALSA SoC has some duplicate parameter. snd_soc_dai::symmetric_xxx are one of them. commit f0fba2ad1b6b ("ASoC: multi-component - ASoC Multi-Component Support") moved "symmetric_xxx" flags from snd_soc_dai to snd_soc_dai_driver. No one is using snd_soc_dai::symmetric_xxx now. Let's remove it Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 58acd00cae19..e05d05b5a790 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -294,9 +294,6 @@ struct snd_soc_dai { /* DAI runtime info */ unsigned int capture_active:1; /* stream is in use */ unsigned int playback_active:1; /* stream is in use */ - unsigned int symmetric_rates:1; - unsigned int symmetric_channels:1; - unsigned int symmetric_samplebits:1; unsigned int probed:1; unsigned int active; -- cgit v1.2.3 From a885eebc1b062c6a6a925db85828108779fb0e62 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Dec 2017 16:15:36 +0000 Subject: spi: pxa2xx: Use gpiod_put() not gpiod_free() gpiod_free() is an internal function for gpiolib, gpiod_put() is the correct external function. Reported-by: Stephen Rothwell Suggested-by: Rasmus Villemoes Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index c209dc1047b5..b0822d1dba29 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1237,7 +1237,7 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip, * different chip_info, release previously requested GPIO */ if (chip->gpiod_cs) { - gpiod_free(chip->gpiod_cs); + gpiod_put(chip->gpiod_cs); chip->gpiod_cs = NULL; } @@ -1417,7 +1417,7 @@ static void cleanup(struct spi_device *spi) if (drv_data->ssp_type != CE4100_SSP && !drv_data->cs_gpiods && chip->gpiod_cs) - gpiod_free(chip->gpiod_cs); + gpiod_put(chip->gpiod_cs); kfree(chip); } -- cgit v1.2.3 From 4c009af473b2026caaa26107e34d7cc68dad7756 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 22 Dec 2017 08:47:20 -0800 Subject: IB/hfi: Only read capability registers if the capability exists During driver init, various registers are saved to allow restoration after an FLR or gen3 bump. Some of these registers are not available in some circumstances (i.e. Virtual machines). This bug makes the driver unusable when the PCI device is passed into a VM, it fails during probe. Delete unnecessary register read/write, and only access register if the capability exists. Cc: # 4.14.x Fixes: a618b7e40af2 ("IB/hfi1: Move saving PCI values to a separate function") Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/hfi.h | 1 - drivers/infiniband/hw/hfi1/pcie.c | 30 ++++++++++++------------------ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 4a9b4d7efe63..8ce9118d4a7f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1131,7 +1131,6 @@ struct hfi1_devdata { u16 pcie_lnkctl; u16 pcie_devctl2; u32 pci_msix0; - u32 pci_lnkctl3; u32 pci_tph2; /* diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 09e50fd2a08f..8c7e7a60b715 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, + dd->pci_tph2); + if (ret) + goto error; + } return 0; error: @@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd) if (ret) goto error; - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto error; - + if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) { + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, + &dd->pci_tph2); + if (ret) + goto error; + } return 0; error: -- cgit v1.2.3 From 4e5dff41be7b5201c1c47ceb3a2a8d698516bc2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Nov 2017 10:24:58 -0700 Subject: blk-mq: improve heavily contended tag case Even with a number of waitqueues, we can get into a situation where we are heavily contended on the waitqueue lock. I got a report on spc1 where we're spending seconds doing this. Arguably the use case is nasty, I reproduce it with one device and 1000 threads banging on the device. But that doesn't mean we shouldn't be handling it better. What ends up happening is that a thread will fail to get a tag, add itself to the waitqueue, and subsequently get woken up when a tag is freed - only to find itself going back to sleep on the waitqueue. Instead of waking all threads, use an exclusive wait and wake up our sbitmap batch count instead. This seems to work well for me (massive improvement for this use case), and it survives basic testing. But I haven't fully verified it yet. An additional improvement is running the queue and checking for a new tag BEFORE needing to add ourselves to the waitqueue. Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 13 +++++++------ lib/sbitmap.c | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index c81b40ecd3f1..336dde07b230 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -134,12 +134,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) ws = bt_wait_ptr(bt, data->hctx); drop_ctx = data->ctx == NULL; do { - prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); - - tag = __blk_mq_get_tag(data, bt); - if (tag != -1) - break; - /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for @@ -155,6 +149,13 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != -1) break; + prepare_to_wait_exclusive(&ws->wait, &wait, + TASK_UNINTERRUPTIBLE); + + tag = __blk_mq_get_tag(data, bt); + if (tag != -1) + break; + if (data->ctx) blk_mq_put_ctx(data->ctx); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 80aa8d5463fa..42b5ca0acf93 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq) */ atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); - wake_up(&ws->wait); + wake_up_nr(&ws->wait, wake_batch); } } -- cgit v1.2.3 From 7bbcbd3d1cdcbacd0f9f8dc4c98d550972f1ca30 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:02:34 +0100 Subject: x86/Kconfig: Limit NR_CPUS on 32-bit to a sane amount The recent cpu_entry_area changes fail to compile on 32-bit when BIGSMP=y and NR_CPUS=512, because the fixmap area becomes too big. Limit the number of CPUs with BIGSMP to 64, which is already way to big for 32-bit, but it's at least a working limitation. We performed a quick survey of 32-bit-only machines that might be affected by this change negatively, but found none. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 665eba1b6103..cd5199de231e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -925,7 +925,8 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK + range 2 64 if SMP && X86_32 && X86_BIGSMP + range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP -- cgit v1.2.3 From c05344947b37f7cda726e802457370bc6eac4d26 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Dec 2017 01:14:39 +0100 Subject: x86/mm/dump_pagetables: Check PAGE_PRESENT for real The check for a present page in printk_prot(): if (!pgprot_val(prot)) { /* Not present */ is bogus. If a PTE is set to PAGE_NONE then the pgprot_val is not zero and the entry is decoded in bogus ways, e.g. as RX GLB. That is confusing when analyzing mapping correctness. Check for the present bit to make an informed decision. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e3ac6fe6c9e..1014cfb21c2c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -140,7 +140,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; - if (!pgprot_val(prot)) { + if (!(pr & _PAGE_PRESENT)) { /* Not present */ pt_dump_cont_printf(m, dmsg, " "); } else { -- cgit v1.2.3 From 146122e24bdf208015d629babba673e28d090709 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:07:42 +0100 Subject: x86/mm/dump_pagetables: Make the address hints correct and readable The address hints are a trainwreck. The array entry numbers have to kept magically in sync with the actual hints, which is doomed as some of the array members are initialized at runtime via the entry numbers. Designated initializers have been around before this code was implemented.... Use the entry numbers to populate the address hints array and add the missing bits and pieces. Split 32 and 64 bit for readability sake. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/dump_pagetables.c | 90 +++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 1014cfb21c2c..fdf09d8f98da 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -44,10 +44,12 @@ struct addr_marker { unsigned long max_lines; }; -/* indices for address_markers; keep sync'd w/ address_markers below */ +/* Address space markers hints */ + +#ifdef CONFIG_X86_64 + enum address_markers_idx { USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 KERNEL_SPACE_NR, LOW_KERNEL_NR, VMALLOC_START_NR, @@ -56,56 +58,70 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif -# ifdef CONFIG_X86_ESPFIX64 +#ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, -# endif +#endif +#ifdef CONFIG_EFI + EFI_END_NR, +#endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, -#else + FIXADDR_START_NR, + END_OF_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, + [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif +#ifdef CONFIG_X86_ESPFIX64 + [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +#endif +#ifdef CONFIG_EFI + [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, +#endif + [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, + [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, + [MODULES_END_NR] = { MODULES_END, "End Modules" }, + [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, + [END_OF_SPACE_NR] = { -1, NULL } +}; + +#else /* CONFIG_X86_64 */ + +enum address_markers_idx { + USER_SPACE_NR = 0, KERNEL_SPACE_NR, VMALLOC_START_NR, VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, #endif + FIXADDR_START_NR, + END_OF_SPACE_NR, }; -/* Address space markers hints */ static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/* VMEMMAP_START */, "Vmemmap" }, -#ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "KASAN shadow" }, - { KASAN_SHADOW_END, "KASAN shadow end" }, + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, +#ifdef CONFIG_HIGHMEM + [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif -# ifdef CONFIG_X86_ESPFIX64 - { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, -# endif -# ifdef CONFIG_EFI - { EFI_VA_END, "EFI Runtime Services" }, -# endif - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ + [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, + [END_OF_SPACE_NR] = { -1, NULL } }; +#endif /* !CONFIG_X86_64 */ + /* Multipliers for offsets within the PTEs */ #define PTE_LEVEL_MULT (PAGE_SIZE) #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) -- cgit v1.2.3 From 49275fef986abfb8b476e4708aaecc07e7d3e087 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 10 Dec 2017 22:47:19 -0800 Subject: x86/vsyscall/64: Explicitly set _PAGE_USER in the pagetable hierarchy The kernel is very erratic as to which pagetables have _PAGE_USER set. The vsyscall page gets lucky: it seems that all of the relevant pagetables are among the apparently arbitrary ones that set _PAGE_USER. Rather than relying on chance, just explicitly set _PAGE_USER. This will let us clean up pagetable setup to stop setting _PAGE_USER. The added code can also be reused by pagetable isolation to manage the _PAGE_USER bit in the usermode tables. [ tglx: Folded paravirt fix from Juergen Gross ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index f279ba2643dc..daad57c76e42 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -329,16 +330,47 @@ int in_gate_area_no_mm(unsigned long addr) return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +static void __init set_vsyscall_pgtable_user_bits(void) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); -- cgit v1.2.3 From 4831b779403a836158917d59a7ca880483c67378 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 10 Dec 2017 22:47:20 -0800 Subject: x86/vsyscall/64: Warn and fail vsyscall emulation in NATIVE mode If something goes wrong with pagetable setup, vsyscall=native will accidentally fall back to emulation. Make it warn and fail so that we notice. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index daad57c76e42..1faf40f2dda9 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -139,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); + /* This should be unreachable in NATIVE mode. */ + if (WARN_ON(vsyscall_mode == NATIVE)) + return false; + if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); -- cgit v1.2.3 From c10e83f598d08046dd1ebc8360d4bb12d802d51b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Dec 2017 12:27:29 +0100 Subject: arch, mm: Allow arch_dup_mmap() to fail In order to sanitize the LDT initialization on x86 arch_dup_mmap() must be allowed to fail. Fix up all instances. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 5 +++-- arch/um/include/asm/mmu_context.h | 3 ++- arch/unicore32/include/asm/mmu_context.h | 5 +++-- arch/x86/include/asm/mmu_context.h | 4 ++-- include/asm-generic/mm_hooks.h | 5 +++-- kernel/fork.c | 3 +-- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 492d8140a395..44fdf4786638 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -114,9 +114,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c..fca34b2177e2 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d..5c205a9cb5a6 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0..c76162439c8a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -176,10 +176,10 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index ea189d88a3cc..8ac4e68a12f0 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -7,9 +7,10 @@ #ifndef _ASM_GENERIC_MM_HOOKS_H #define _ASM_GENERIC_MM_HOOKS_H -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/kernel/fork.c b/kernel/fork.c index 07cc743698d3..500ce64517d9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; } /* a new mm has just been created */ - arch_dup_mmap(oldmm, mm); - retval = 0; + retval = arch_dup_mmap(oldmm, mm); out: up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); -- cgit v1.2.3 From c2b3496bb30bd159e9de42e5c952e1f1f33c9a77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Dec 2017 12:27:30 +0100 Subject: x86/ldt: Rework locking The LDT is duplicated on fork() and on exec(), which is wrong as exec() should start from a clean state, i.e. without LDT. To fix this the LDT duplication code will be moved into arch_dup_mmap() which is only called for fork(). This introduces a locking problem. arch_dup_mmap() holds mmap_sem of the parent process, but the LDT duplication code needs to acquire mm->context.lock to access the LDT data safely, which is the reverse lock order of write_ldt() where mmap_sem nests into context.lock. Solve this by introducing a new rw semaphore which serializes the read/write_ldt() syscall operations and use context.lock to protect the actual installment of the LDT descriptor. So context.lock stabilizes mm->context.ldt and can nest inside of the new semaphore or mmap_sem. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 4 +++- arch/x86/include/asm/mmu_context.h | 2 ++ arch/x86/kernel/ldt.c | 33 +++++++++++++++++++++------------ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9ea26f167497..5ff3e8af2c20 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include +#include #include #include @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c76162439c8a..4fdbe5efe535 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -132,6 +132,8 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1c1eae961340..1600aebc1ec7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include @@ -42,7 +47,7 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; @@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -133,7 +140,8 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); + init_rwsem(&mm->context.ldt_usr_sem); + old_mm = current->mm; if (!old_mm) { mm->context.ldt = NULL; @@ -180,7 +188,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +217,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +277,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -291,7 +300,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } -- cgit v1.2.3 From a4828f81037f491b2cc986595e3a969a6eeb2fb5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Dec 2017 12:27:31 +0100 Subject: x86/ldt: Prevent LDT inheritance on exec The LDT is inherited across fork() or exec(), but that makes no sense at all because exec() is supposed to start the process clean. The reason why this happens is that init_new_context_ldt() is called from init_new_context() which obviously needs to be called for both fork() and exec(). It would be surprising if anything relies on that behaviour, so it seems to be safe to remove that misfeature. Split the context initialization into two parts. Clear the LDT pointer and initialize the mutex from the general context init and move the LDT duplication to arch_dup_mmap() which is only called on fork(). Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Will Deacon Cc: aliguori@amazon.com Cc: dan.j.williams@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 21 ++++++++++++++------- arch/x86/kernel/ldt.c | 18 +++++------------- tools/testing/selftests/x86/ldt_gdt.c | 9 +++------ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4fdbe5efe535..5e25423bf9bb 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -57,11 +57,17 @@ struct ldt_struct { /* * Used for LDT copy/destruction. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context_ldt(struct task_struct *tsk, - struct mm_struct *mm) +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) { return 0; } @@ -137,15 +143,16 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } - #endif - return init_new_context_ldt(tsk, mm); +#endif + init_new_context_ldt(mm); + return 0; } static inline void destroy_context(struct mm_struct *mm) { @@ -181,7 +188,7 @@ do { \ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); - return 0; + return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1600aebc1ec7..a6b5d62f45a7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -131,28 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt) } /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. + * Called on fork from arch_dup_mmap(). Just copy the current LDT state, + * the new task is not running, so nothing can be installed. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) +int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ldt_struct *new_ldt; - struct mm_struct *old_mm; int retval = 0; - init_rwsem(&mm->context.ldt_usr_sem); - - old_mm = current->mm; - if (!old_mm) { - mm->context.ldt = NULL; + if (!old_mm) return 0; - } mutex_lock(&old_mm->context.lock); - if (!old_mm->context.ldt) { - mm->context.ldt = NULL; + if (!old_mm->context.ldt) goto out_unlock; - } new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 66e5ce5b91f0..0304ffb714f2 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -627,13 +627,10 @@ static void do_multicpu_tests(void) static int finish_exec_test(void) { /* - * In a sensible world, this would be check_invalid_segment(0, 1); - * For better or for worse, though, the LDT is inherited across exec. - * We can probably change this safely, but for now we test it. + * Older kernel versions did inherit the LDT on exec() which is + * wrong because exec() starts from a clean state. */ - check_valid_segment(0, 1, - AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, - 42, true); + check_invalid_segment(0, 1); return nerrs ? 1 : 0; } -- cgit v1.2.3 From 5a7ccf4754fb3660569a6de52ba7f7fc3dfaf280 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:43 -0800 Subject: x86/mm/64: Improve the memory map documentation The old docs had the vsyscall range wrong and were missing the fixmap. Fix both. There used to be 8 MB reserved for future vsyscalls, but that's long gone. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 3448e675b462..83ca5a3b90ac 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -19,8 +19,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable) +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Virtual memory map with 5 level page tables: @@ -41,8 +42,9 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support -- cgit v1.2.3 From e8ffe96e5933d417195268478479933d56213a3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:54 +0100 Subject: x86/doc: Remove obvious weirdnesses from the x86 MM layout documentation Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 83ca5a3b90ac..63a41671d25b 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -1,6 +1,4 @@ - - Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm @@ -49,8 +47,9 @@ ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 -through to the most-significant implemented bit are set to either all ones -or all zero. This causes hole between user space and kernel addresses. +through to the most-significant implemented bit are sign extended. +This causes hole between user space and kernel addresses if you interpret them +as unsigned. The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory @@ -60,9 +59,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of the processes using the page fault handler, with init_top_pgt as reference. -Current X86-64 implementations support up to 46 bits of address space (64 TB), -which is our current limit. This expands into MBZ space in the page tables. - We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available @@ -74,5 +70,3 @@ following fixmap section. Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. - --Andi Kleen, Jul 2004 -- cgit v1.2.3 From 4fe2d8b11a370af286287a2661de9d4e6c9a145a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 17:25:07 -0800 Subject: x86/entry: Rename SYSENTER_stack to CPU_ENTRY_AREA_entry_stack If the kernel oopses while on the trampoline stack, it will print "" even if SYSENTER is not involved. That is rather confusing. The "SYSENTER" stack is used for a lot more than SYSENTER now. Give it a better string to display in stack dumps, and rename the kernel code to match. Also move the 32-bit code over to the new naming even though it still uses the entry stack only for SYSENTER. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 12 ++++++------ arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/fixmap.h | 8 ++++---- arch/x86/include/asm/processor.h | 6 +++--- arch/x86/include/asm/stacktrace.h | 4 ++-- arch/x86/kernel/asm-offsets.c | 4 ++-- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/cpu/common.c | 14 +++++++------- arch/x86/kernel/dumpstack.c | 10 +++++----- arch/x86/kernel/dumpstack_32.c | 6 +++--- arch/x86/kernel/dumpstack_64.c | 12 +++++++++--- 11 files changed, 44 insertions(+), 38 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index bd8b57a5c874..ace8f321a5a1 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -942,9 +942,9 @@ ENTRY(debug) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Ldebug_from_sysenter_stack TRACE_IRQS_OFF @@ -986,9 +986,9 @@ ENTRY(nmi) /* Are we currently on the SYSENTER stack? */ movl PER_CPU_VAR(cpu_entry_area), %ecx - addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 2812ce043a7a..87cebe78bbef 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -154,8 +154,8 @@ END(native_usergs_sysret64) _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) /* The top word of the SYSENTER stack is hot and is usable as scratch space. */ -#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ - SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA +#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ + SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA ENTRY(entry_SYSCALL_64_trampoline) UNWIND_HINT_EMPTY diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 94fc4fa14127..8153b8d86a3c 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -56,10 +56,10 @@ struct cpu_entry_area { char gdt[PAGE_SIZE]; /* - * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as + * The GDT is just below entry_stack and thus serves (on x86_64) as * a a read-only guard page. */ - struct SYSENTER_stack_page SYSENTER_stack_page; + struct entry_stack_page entry_stack_page; /* * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because @@ -250,9 +250,9 @@ static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); } -static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) +static inline struct entry_stack *cpu_entry_stack(int cpu) { - return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index da943411d3d8..9e482d8b0b97 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -336,12 +336,12 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 -struct SYSENTER_stack { +struct entry_stack { unsigned long words[64]; }; -struct SYSENTER_stack_page { - struct SYSENTER_stack stack; +struct entry_stack_page { + struct entry_stack stack; } __aligned(PAGE_SIZE); struct tss_struct { diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f8062bfd43a0..f73706878772 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,7 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, - STACK_TYPE_SYSENTER, + STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -29,7 +29,7 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); +bool in_entry_stack(unsigned long *stack, struct stack_info *info); int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cd360a5e0dca..676b7cf4b62b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -97,6 +97,6 @@ void common(void) { /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); - OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); - DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); + OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); + DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 7d20d9c0b3d6..fa1261eefa16 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -48,7 +48,7 @@ void foo(void) /* Offset from the sysenter stack to tss.sp0 */ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - - offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); + offsetofend(struct cpu_entry_area, entry_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 034900623adf..ed4acbce37a8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,8 +487,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif -static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, - SYSENTER_stack_storage); +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, + entry_stack_storage); static void __init set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) @@ -523,8 +523,8 @@ static void __init setup_cpu_entry_area(int cpu) #endif __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), - per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), + per_cpu_ptr(&entry_stack_storage, cpu), 1, PAGE_KERNEL); /* @@ -1323,7 +1323,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1440,7 +1440,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1655,7 +1655,7 @@ void cpu_init(void) */ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); - load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); + load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index bbd6d986e2d0..1dd3f533d78c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -43,9 +43,9 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } -bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) +bool in_entry_stack(unsigned long *stack, struct stack_info *info) { - struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); void *begin = ss; void *end = ss + 1; @@ -53,7 +53,7 @@ bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) if ((void *)stack < begin || (void *)stack >= end) return false; - info->type = STACK_TYPE_SYSENTER; + info->type = STACK_TYPE_ENTRY; info->begin = begin; info->end = end; info->next_sp = NULL; @@ -111,13 +111,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) - * - SYSENTER stack + * - entry stack * * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack - * - SYSENTER stack + * - entry stack */ for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 5ff13a6b3680..04170f63e3a1 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,8 +26,8 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) + return "ENTRY_TRAMPOLINE"; return NULL; } @@ -96,7 +96,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; if (in_hardirq_stack(stack, info)) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index abc828f8c297..563e28d14f2c 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,8 +37,14 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; - if (type == STACK_TYPE_SYSENTER) - return "SYSENTER"; + if (type == STACK_TYPE_ENTRY) { + /* + * On 64-bit, we have a generic entry stack that we + * use for all the kernel entry points, including + * SYSENTER. + */ + return "ENTRY_TRAMPOLINE"; + } if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -118,7 +124,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; - if (in_sysenter_stack(stack, info)) + if (in_entry_stack(stack, info)) goto recursion_check; goto unknown; -- cgit v1.2.3 From 3e46e0f5ee3643a1239be9046c7ba6c66ca2b329 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:50 +0100 Subject: x86/uv: Use the right TLB-flush API Since uv_flush_tlb_others() implements flush_tlb_others() which is about flushing user mappings, we should use __flush_tlb_single(), which too is about flushing user mappings. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Andrew Banman Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Mike Travis Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index f44c0bc95aa2..8538a6723171 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_one(msg->address); + __flush_tlb_single(msg->address); stat->d_onetlb++; } stat->d_requestee++; -- cgit v1.2.3 From 23cb7d46f371844c004784ad9552a57446f73e5a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:51 +0100 Subject: x86/microcode: Dont abuse the TLB-flush interface Commit: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") ... grubbed into tlbflush internals without coherent explanation. Since it says its a precaution and the SDM doesn't mention anything like this, take it out back. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: fenghua.yu@intel.com Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 19 ++++++------------- arch/x86/kernel/cpu/microcode/intel.c | 13 ------------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 509046cfa5ce..c2e45da4e540 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -246,20 +246,9 @@ static inline void __native_flush_tlb(void) preempt_enable(); } -static inline void __native_flush_tlb_global_irq_disabled(void) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); -} - static inline void __native_flush_tlb_global(void) { - unsigned long flags; + unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* @@ -277,7 +266,11 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); raw_local_irq_restore(flags); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf797..8ccdca6d3f9e 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci) } #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - static inline void print_ucode(struct ucode_cpu_info *uci) { struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (rev != mc->hdr.rev) return -1; -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif uci->cpu_sig.rev = rev; if (early) -- cgit v1.2.3 From a501686b2923ce6f2ff2b1d0d50682c6411baf72 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:49 +0100 Subject: x86/mm: Use __flush_tlb_one() for kernel memory __flush_tlb_single() is for user mappings, __flush_tlb_one() for kernel mappings. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3118392cdf75..0569987f6da6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_single(addr); + __flush_tlb_one(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) -- cgit v1.2.3 From b5fc6d943808b570bdfbec80f40c6b3855f1c48b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:46 +0100 Subject: x86/mm: Remove superfluous barriers atomic64_inc_return() already implies smp_mb() before and after. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index c2e45da4e540..3e2227386abe 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -60,19 +60,13 @@ static inline void invpcid_flush_all_nonglobals(void) static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { - u64 new_tlb_gen; - /* * Bump the generation count. This also serves as a full barrier * that synchronizes with switch_mm(): callers are required to order * their read of mm_cpumask after their writes to the paging * structures. */ - smp_mb__before_atomic(); - new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen); - smp_mb__after_atomic(); - - return new_tlb_gen; + return atomic64_inc_return(&mm->context.tlb_gen); } #ifdef CONFIG_PARAVIRT -- cgit v1.2.3 From 3f67af51e56f291d7417d77c4f67cd774633c5e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:52 +0100 Subject: x86/mm: Add comments to clarify which TLB-flush functions are supposed to flush what Per popular request.. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 3e2227386abe..552d581c8f9f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -228,6 +228,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); +/* + * flush the entire current user mapping + */ static inline void __native_flush_tlb(void) { /* @@ -240,6 +243,9 @@ static inline void __native_flush_tlb(void) preempt_enable(); } +/* + * flush everything + */ static inline void __native_flush_tlb_global(void) { unsigned long cr4, flags; @@ -269,17 +275,27 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_restore(flags); } +/* + * flush one page in the user mapping + */ static inline void __native_flush_tlb_single(unsigned long addr) { asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); } +/* + * flush everything + */ static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); - else + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ __flush_tlb(); + } /* * Note: if we somehow had PCID but not PGE, then this wouldn't work -- @@ -290,6 +306,9 @@ static inline void __flush_tlb_all(void) */ } +/* + * flush one page in the kernel mapping + */ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); -- cgit v1.2.3 From 50fb83a62cf472dc53ba23bd3f7bd6c1b2b3b53e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:54 +0100 Subject: x86/mm: Move the CR3 construction functions to tlbflush.h For flushing the TLB, the ASID which has been programmed into the hardware must be known. That differs from what is in 'cpu_tlbstate'. Add functions to transform the 'cpu_tlbstate' values into to the one programmed into the hardware (CR3). It's not easy to include mmu_context.h into tlbflush.h, so just move the CR3 building over to tlbflush.h. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 29 +---------------------------- arch/x86/include/asm/tlbflush.h | 26 ++++++++++++++++++++++++++ arch/x86/mm/tlb.c | 8 ++++---- 3 files changed, 31 insertions(+), 32 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5e25423bf9bb..5ede7cae1d67 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -290,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID - * bits. This serves two purposes. It prevents a nasty situation in - * which PCID-unaware code saves CR3, loads some other value (with PCID - * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if - * the saved ASID was nonzero. It also means that any bugs involving - * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger - * deterministically. - */ - -static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) -{ - if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(mm->pgd); - } -} - -static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) -{ - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; -} - /* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). @@ -326,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 552d581c8f9f..ee7925adfb57 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -69,6 +69,32 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } +/* + * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. + * This serves two purposes. It prevents a nasty situation in which + * PCID-unaware code saves CR3, loads some other value (with PCID == 0), + * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved + * ASID was nonzero. It also means that any bugs involving loading a + * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically. + */ +struct pgd_t; +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) +{ + if (static_cpu_has(X86_FEATURE_PCID)) { + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(pgd) | (asid + 1); + } else { + VM_WARN_ON_ONCE(asid != 0); + return __sme_pa(pgd); + } +} + +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) +{ + VM_WARN_ON_ONCE(asid > 4094); + return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; +} + #ifdef CONFIG_PARAVIRT #include #else diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0569987f6da6..0a1be3adc97e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next, new_asid)); + write_cr3(build_cr3(next->pgd, new_asid)); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next, new_asid)); + write_cr3(build_cr3_noflush(next->pgd, new_asid)); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); @@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void) !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm, 0)); + write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); -- cgit v1.2.3 From cb0a9144a744e55207e24dcef812f05cd15a499a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:55 +0100 Subject: x86/mm: Remove hard-coded ASID limit checks First, it's nice to remove the magic numbers. Second, PAGE_TABLE_ISOLATION is going to consume half of the available ASID space. The space is currently unused, but add a comment to spell out this new restriction. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index ee7925adfb57..f88ccd3ae466 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -69,6 +69,22 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#define PTI_CONSUMED_ASID_BITS 0 + +#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because ASID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) + /* * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. * This serves two purposes. It prevents a nasty situation in which @@ -81,7 +97,7 @@ struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); return __sme_pa(pgd) | (asid + 1); } else { VM_WARN_ON_ONCE(asid != 0); @@ -91,7 +107,7 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { - VM_WARN_ON_ONCE(asid > 4094); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; } -- cgit v1.2.3 From dd95f1a4b5ca904c78e6a097091eb21436478abb Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:56 +0100 Subject: x86/mm: Put MMU to hardware ASID translation in one place There are effectively two ASID types: 1. The one stored in the mmu_context that goes from 0..5 2. The one programmed into the hardware that goes from 1..6 This consolidates the locations where converting between the two (by doing a +1) to a single place which gives us a nice place to comment. PAGE_TABLE_ISOLATION will also need to, given an ASID, know which hardware ASID to flush for the userspace mapping. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index f88ccd3ae466..8b27daff7a7f 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -85,20 +85,26 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) */ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) -/* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID bits. - * This serves two purposes. It prevents a nasty situation in which - * PCID-unaware code saves CR3, loads some other value (with PCID == 0), - * and then restores CR3, thus corrupting the TLB for ASID 0 if the saved - * ASID was nonzero. It also means that any bugs involving loading a - * PCID-enabled CR3 with CR4.PCIDE off will trigger deterministically. - */ +static inline u16 kern_pcid(u16 asid) +{ + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + /* + * If PCID is on, ASID-aware code paths put the ASID+1 into the + * PCID bits. This serves two purposes. It prevents a nasty + * situation in which PCID-unaware code saves CR3, loads some other + * value (with PCID == 0), and then restores CR3, thus corrupting + * the TLB for ASID 0 if the saved ASID was nonzero. It also means + * that any bugs involving loading a PCID-enabled CR3 with + * CR4.PCIDE off will trigger deterministically. + */ + return asid + 1; +} + struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - return __sme_pa(pgd) | (asid + 1); + return __sme_pa(pgd) | kern_pcid(asid); } else { VM_WARN_ON_ONCE(asid != 0); return __sme_pa(pgd); @@ -108,7 +114,8 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - return __sme_pa(pgd) | (asid + 1) | CR3_NOFLUSH; + VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } #ifdef CONFIG_PARAVIRT -- cgit v1.2.3 From 1a3b0caeb77edeac5ce5fa05e6a61c474c9a9745 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:47 +0100 Subject: x86/mm: Create asm/invpcid.h Unclutter tlbflush.h a little. Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/invpcid.h | 53 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/tlbflush.h | 49 +------------------------------------ 2 files changed, 54 insertions(+), 48 deletions(-) create mode 100644 arch/x86/include/asm/invpcid.h diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 000000000000..989cfa86de85 --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8b27daff7a7f..171b429f43a2 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -9,54 +9,7 @@ #include #include #include - -static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) -{ - struct { u64 d[2]; } desc = { { pcid, addr } }; - - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global - * mappings, we don't want the compiler to reorder any subsequent - * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. - */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); -} - -#define INVPCID_TYPE_INDIV_ADDR 0 -#define INVPCID_TYPE_SINGLE_CTXT 1 -#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 -#define INVPCID_TYPE_ALL_NON_GLOBAL 3 - -/* Flush all mappings for a given pcid and addr, not including globals. */ -static inline void invpcid_flush_one(unsigned long pcid, - unsigned long addr) -{ - __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); -} - -/* Flush all mappings for a given PCID, not including globals. */ -static inline void invpcid_flush_single_context(unsigned long pcid) -{ - __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); -} - -/* Flush all mappings, including globals, for all PCIDs. */ -static inline void invpcid_flush_all(void) -{ - __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); -} - -/* Flush all mappings for all PCIDs except globals. */ -static inline void invpcid_flush_all_nonglobals(void) -{ - __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); -} +#include static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { -- cgit v1.2.3 From ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:28:54 +0100 Subject: x86/cpu_entry_area: Move it to a separate unit Separate the cpu_entry_area code out of cpu/common.c and the fixmap. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++ arch/x86/include/asm/fixmap.h | 41 +------------- arch/x86/kernel/cpu/common.c | 94 ------------------------------ arch/x86/kernel/traps.c | 1 + arch/x86/mm/Makefile | 2 +- arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+), 135 deletions(-) create mode 100644 arch/x86/include/asm/cpu_entry_area.h create mode 100644 arch/x86/mm/cpu_entry_area.c diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 000000000000..5471826803af --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include +#include + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct entry_stack_page entry_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + +extern void setup_cpu_entry_areas(void); + +#endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8153b8d86a3c..fb801662a230 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -25,6 +25,7 @@ #else #include #endif +#include /* * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall @@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP; PAGE_SIZE) #endif -/* - * cpu_entry_area is a percpu region in the fixmap that contains things - * needed by the CPU and early entry/exit code. Real types aren't used - * for all fields here to avoid circular header dependencies. - * - * Every field is a virtual alias of some other allocated backing store. - * There is no direct allocation of a struct cpu_entry_area. - */ -struct cpu_entry_area { - char gdt[PAGE_SIZE]; - - /* - * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. - */ - struct entry_stack_page entry_stack_page; - - /* - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because - * we need task switches to work, and task switches write to the TSS. - */ - struct tss_struct tss; - - char entry_trampoline[PAGE_SIZE]; - -#ifdef CONFIG_X86_64 - /* - * Exception stacks used for IST entries. - * - * In the future, this should have a separate slot for each stack - * with guard pages between them. - */ - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; -#endif -}; - -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) - -extern void setup_cpu_entry_areas(void); - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ed4acbce37a8..8ddcfa4d4165 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, [DEBUG_STACK - 1] = DEBUG_STKSZ }; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); -#endif - -static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, - entry_stack_storage); - -static void __init -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) -{ - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); -} - -/* Setup the fixmap mappings only once per-processor */ -static void __init setup_cpu_entry_area(int cpu) -{ -#ifdef CONFIG_X86_64 - extern char _entry_trampoline[]; - - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ - pgprot_t gdt_prot = PAGE_KERNEL_RO; - pgprot_t tss_prot = PAGE_KERNEL_RO; -#else - /* - * On native 32-bit systems, the GDT cannot be read-only because - * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the - * GDT is read-only, that will triple fault. The TSS cannot be - * read-only because the CPU writes to it on task switches. - * - * On Xen PV, the GDT must be read-only because the hypervisor - * requires it. - */ - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? - PAGE_KERNEL_RO : PAGE_KERNEL; - pgprot_t tss_prot = PAGE_KERNEL; -#endif - - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), - per_cpu_ptr(&entry_stack_storage, cpu), 1, - PAGE_KERNEL); - - /* - * The Intel SDM says (Volume 3, 7.2.1): - * - * Avoid placing a page boundary in the part of the TSS that the - * processor reads during a task switch (the first 104 bytes). The - * processor may not correctly perform address translations if a - * boundary occurs in this area. During a task switch, the processor - * reads and writes into the first 104 bytes of each TSS (using - * contiguous physical addresses beginning with the physical address - * of the first byte of the TSS). So, after TSS access begins, if - * part of the 104 bytes is not physically contiguous, the processor - * will access incorrect information without generating a page-fault - * exception. - * - * There are also a lot of errata involving the TSS spanning a page - * boundary. Assert that we're not doing that. - */ - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss_rw, cpu), - sizeof(struct tss_struct) / PAGE_SIZE, - tss_prot); - -#ifdef CONFIG_X86_32 - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); #endif -#ifdef CONFIG_X86_64 - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); - BUILD_BUG_ON(sizeof(exception_stacks) != - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), - &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, - PAGE_KERNEL); - - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); -#endif -} - -void __init setup_cpu_entry_areas(void) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) - setup_cpu_entry_area(cpu); -} - /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 74136fd16f49..464daed6894f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 7ba7f3d7f477..2e0017af8f9b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 000000000000..235ff9cfaaf4 --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include +#include +#include +#include + +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +static void __init +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); +} + +/* Setup the fixmap mappings only once per-processor */ +static void __init setup_cpu_entry_area(int cpu) +{ +#ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; +#else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. + * + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; +#endif + + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, + tss_prot); + +#ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +#endif + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, + PAGE_KERNEL); + + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif +} + +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} -- cgit v1.2.3 From 92a0f81d89571e3e8759366e050ee05cc545ef99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Dec 2017 18:51:31 +0100 Subject: x86/cpu_entry_area: Move it out of the fixmap Put the cpu_entry_area into a separate P4D entry. The fixmap gets too big and 0-day already hit a case where the fixmap PTEs were cleared by cleanup_highmap(). Aside of that the fixmap API is a pain as it's all backwards. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 2 + arch/x86/include/asm/cpu_entry_area.h | 18 ++++++++- arch/x86/include/asm/desc.h | 1 + arch/x86/include/asm/fixmap.h | 32 +--------------- arch/x86/include/asm/pgtable_32_types.h | 15 ++++++-- arch/x86/include/asm/pgtable_64_types.h | 47 +++++++++++++---------- arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/traps.c | 5 ++- arch/x86/mm/cpu_entry_area.c | 66 +++++++++++++++++++++++++-------- arch/x86/mm/dump_pagetables.c | 6 ++- arch/x86/mm/init_32.c | 6 +++ arch/x86/mm/kasan_init_64.c | 29 ++++++++------- arch/x86/mm/pgtable_32.c | 1 + arch/x86/xen/mmu_pv.c | 2 - 14 files changed, 143 insertions(+), 88 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 63a41671d25b..51101708a03a 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space @@ -35,6 +36,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... +fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 5471826803af..2fbc69a0916e 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -43,10 +43,26 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE \ + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} #endif diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 2ace1f90d138..bc359dd2f7f6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index fb801662a230..64c4a30e0d39 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -25,7 +25,6 @@ #else #include #endif -#include /* * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall @@ -84,7 +83,6 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif - FIX_RO_IDT, /* Virtual mapping for read-only IDT */ #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -100,9 +98,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_CPU_ENTRY_AREA_TOP, - FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -143,7 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern int fixmaps_set; @@ -191,30 +186,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, void __early_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) -{ - BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); - - return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; -} - -#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ - BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ - __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ - }) - -#define get_cpu_entry_area_index(cpu, field) \ - __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) - -static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) -{ - return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); -} - -static inline struct entry_stack *cpu_entry_stack(int cpu) -{ - return &get_cpu_entry_area(cpu)->entry_stack_page.stack; -} - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index f2ca9b28fd68..ce245b0cdfca 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) +/* + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c + * to avoid include recursion hell + */ +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) + +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) + +#define PKMAP_BASE \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6d5f45dcd4a1..3d27831bc58d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #ifdef CONFIG_X86_5LEVEL -#define VMALLOC_SIZE_TB _AC(16384, UL) -#define __VMALLOC_BASE _AC(0xff92000000000000, UL) -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define VMALLOC_SIZE_TB _AC(16384, UL) +# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -#define VMALLOC_SIZE_TB _AC(32, UL) -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define VMALLOC_SIZE_TB _AC(32, UL) +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif + #ifdef CONFIG_RANDOMIZE_MEMORY -#define VMALLOC_START vmalloc_base -#define VMEMMAP_START vmemmap_base +# define VMALLOC_START vmalloc_base +# define VMEMMAP_START vmemmap_base #else -#define VMALLOC_START __VMALLOC_BASE -#define VMEMMAP_START __VMEMMAP_BASE +# define VMALLOC_START __VMALLOC_BASE +# define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define ESPFIX_PGD_ENTRY _AC(-2, UL) -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) -#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) +#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1dd3f533d78c..36b17e0febe8 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 464daed6894f..7c16fe0b60c2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -951,8 +951,9 @@ void __init trap_init(void) * "sidt" instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); - idt_descr.address = fix_to_virt(FIX_RO_IDT); + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; /* * Should be a barrier for any external CPU state: diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 235ff9cfaaf4..21e8b595cbb1 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -15,11 +15,27 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); #endif +struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +} +EXPORT_SYMBOL(get_cpu_entry_area); + +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) +{ + unsigned long va = (unsigned long) cea_vaddr; + + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); +} + static void __init -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } /* Setup the fixmap mappings only once per-processor */ @@ -47,10 +63,12 @@ static void __init setup_cpu_entry_area(int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), - per_cpu_ptr(&entry_stack_storage, cpu), 1, - PAGE_KERNEL); + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), + gdt_prot); + + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); /* * The Intel SDM says (Volume 3, 7.2.1): @@ -72,10 +90,9 @@ static void __init setup_cpu_entry_area(int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), - &per_cpu(cpu_tss_rw, cpu), - sizeof(struct tss_struct) / PAGE_SIZE, - tss_prot); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); #ifdef CONFIG_X86_32 per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); @@ -85,20 +102,37 @@ static void __init setup_cpu_entry_area(int cpu) BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); BUILD_BUG_ON(sizeof(exception_stacks) != sizeof(((struct cpu_entry_area *)0)->exception_stacks)); - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), - &per_cpu(exception_stacks, cpu), - sizeof(exception_stacks) / PAGE_SIZE, - PAGE_KERNEL); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif } +static __init void setup_cpu_entry_area_ptes(void) +{ +#ifdef CONFIG_X86_32 + unsigned long start, end; + + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; + end = start + CPU_ENTRY_AREA_MAP_SIZE; + + for (; start < end; start += PMD_SIZE) + populate_extra_pte(start); +#endif +} + void __init setup_cpu_entry_areas(void) { unsigned int cpu; + setup_cpu_entry_area_ptes(); + for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index fdf09d8f98da..43dedbfb7257 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -58,6 +58,7 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif + CPU_ENTRY_AREA_NR, #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif @@ -81,6 +82,7 @@ static struct addr_marker address_markers[] = { [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, #endif + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, #endif @@ -104,6 +106,7 @@ enum address_markers_idx { #ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, #endif + CPU_ENTRY_AREA_NR, FIXADDR_START_NR, END_OF_SPACE_NR, }; @@ -116,6 +119,7 @@ static struct addr_marker address_markers[] = { #ifdef CONFIG_HIGHMEM [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, [END_OF_SPACE_NR] = { -1, NULL } }; @@ -541,8 +545,8 @@ static int __init pt_dump_init(void) address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; #endif - return 0; } __initcall(pt_dump_init); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d..135c9a7898c7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "mm_internal.h" @@ -766,6 +767,7 @@ void __init mem_init(void) mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif @@ -777,6 +779,10 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, + CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE, + CPU_ENTRY_AREA_MAP_SIZE >> 10, + #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ec70d780f1f..47388f0c0e59 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,7 @@ #include #include #include +#include extern struct range pfn_mapped[E820_MAX_ENTRIES]; @@ -322,31 +323,33 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - kasan_populate_zero_shadow( - kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); - - kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), - (unsigned long)kasan_mem_to_shadow(_end), - early_pfn_to_nid(__pa(_stext))); - - shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, PAGE_SIZE); - kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - shadow_cpu_entry_begin); + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + shadow_cpu_entry_begin); kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); + kasan_populate_zero_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + early_pfn_to_nid(__pa(_stext))); + + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6b9bf023a700..c3c5274410a9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c2454237fa67..a0e2b8c6e5c7 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2261,7 +2261,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: - case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: # ifdef CONFIG_HIGHMEM @@ -2272,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: /* All local page mappings */ pte = pfn_pte(phys, prot); break; -- cgit v1.2.3 From 613e396bc0d4c7604fba23256644e78454c68cf6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 17 Dec 2017 10:56:29 +0100 Subject: init: Invoke init_espfix_bsp() from mm_init() init_espfix_bsp() needs to be invoked before the page table isolation initialization. Move it into mm_init() which is the place where pti_init() will be added. While at it get rid of the #ifdeffery and provide proper stub functions. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/espfix.h | 7 ++++--- arch/x86/kernel/smpboot.c | 6 +----- include/asm-generic/pgtable.h | 5 +++++ init/main.c | 6 ++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 0211029076ea..6777480d8a42 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ESPFIX_H #define _ASM_X86_ESPFIX_H -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_ESPFIX64 #include @@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); extern void init_espfix_ap(int cpu); - -#endif /* CONFIG_X86_64 */ +#else +static inline void init_espfix_ap(int cpu) { } +#endif #endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d56c1d209283..33d6000265aa 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -990,12 +990,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, initial_code = (unsigned long)start_secondary; initial_stack = idle->thread.sp; - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 + /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); -#endif /* So we see what's up */ announce_cpu(cpu, apicid); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 757dc6ffc7ba..231b35a76dd9 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1017,6 +1017,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) struct file; int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot); + +#ifndef CONFIG_X86_ESPFIX64 +static inline void init_espfix_bsp(void) { } +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/init/main.c b/init/main.c index 0ee9c6866ada..8a390f60ec81 100644 --- a/init/main.c +++ b/init/main.c @@ -504,6 +504,8 @@ static void __init mm_init(void) pgtable_init(); vmalloc_init(); ioremap_huge_init(); + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); } asmlinkage __visible void __init start_kernel(void) @@ -673,10 +675,6 @@ asmlinkage __visible void __init start_kernel(void) #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); -#endif -#ifdef CONFIG_X86_ESPFIX64 - /* Should be run before the first non-init thread is created */ - init_espfix_bsp(); #endif thread_stack_cache_init(); cred_init(); -- cgit v1.2.3 From b26a2319be3dd26edb3013504992a037a5902520 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 23 Dec 2017 08:54:28 +1000 Subject: drm/nouveau: fix race when adding delayed work items kernel.org bz#198221. Reported-by: Petr Vandrovec Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8d4a5be3b913..56fe261b6268 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, work->cli = cli; mutex_lock(&cli->lock); list_add_tail(&work->head, &cli->worker); - mutex_unlock(&cli->lock); if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence)) nouveau_cli_work_fence(fence, &work->cb); + mutex_unlock(&cli->lock); } static void -- cgit v1.2.3 From b3b1b6532890c70987821946f90c22b8021aaaf8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Dec 2017 11:36:05 -0800 Subject: tools: bpftool: maps: close json array on error paths of show We can't return from the middle of do_show(), because json_array will not be closed. Break out of the loop. Note that the error handling after the loop depends on errno, so no need to set err. Fixes: 831a0aafe5c3 ("tools: bpftool: add JSON output for `bpftool map *` commands") Signed-off-by: Jakub Kicinski Acked-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e2450c8e88e6..8368b7ea31b5 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -523,21 +523,21 @@ static int do_show(int argc, char **argv) break; p_err("can't get next map: %s%s", strerror(errno), errno == EINVAL ? " -- kernel too old?" : ""); - return -1; + break; } fd = bpf_map_get_fd_by_id(id); if (fd < 0) { p_err("can't get map by id (%u): %s", id, strerror(errno)); - return -1; + break; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); close(fd); - return -1; + break; } if (json_output) -- cgit v1.2.3 From 8207c6dd4746c345b689684c4cd0ce00a18c7ef2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Dec 2017 11:36:06 -0800 Subject: tools: bpftool: protect against races with disappearing objects On program/map show we may get an ID of an object from GETNEXT, but the object may disappear before we call GET_FD_BY_ID. If that happens, ignore the object and continue. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Jakub Kicinski Acked-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 2 ++ tools/bpf/bpftool/prog.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8368b7ea31b5..a8c3a33dd185 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -528,6 +528,8 @@ static int do_show(int argc, char **argv) fd = bpf_map_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get map by id (%u): %s", id, strerror(errno)); break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ad619b96c276..dded77345bfb 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -382,6 +382,8 @@ static int do_show(int argc, char **argv) fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { + if (errno == ENOENT) + continue; p_err("can't get prog by id (%u): %s", id, strerror(errno)); err = -1; -- cgit v1.2.3 From 8a42d3fc9dfccbf601c5f58f46dc3cdbc1a4b923 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 15 Dec 2017 13:42:04 +0000 Subject: nvmem: meson-mx-efuse: fix reading from an offset other than 0 meson_mx_efuse_read calculates the address internal to the eFuse based on the offset and the word size. This works fine with any given offset. However, the offset is also included when writing to the output buffer. This means that reading 4 bytes at offset 500 tries to write beyond the array allocated by the nvmem core as it wants to write the 4 bytes to "buffer address + offset (500)". This issue did not show up in the previous tests since no driver uses any value from the eFuse yet and reading the eFuse via sysfs simply reads the whole eFuse, starting at offset 0. Fix this by only including the offset in the internal address calculation. Fixes: 8caef1fa9176 ("nvmem: add a driver for the Amlogic Meson6/Meson8/Meson8b SoCs") Signed-off-by: Martin Blumenstingl Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/meson-mx-efuse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvmem/meson-mx-efuse.c b/drivers/nvmem/meson-mx-efuse.c index a346b4923550..41d3a3c1104e 100644 --- a/drivers/nvmem/meson-mx-efuse.c +++ b/drivers/nvmem/meson-mx-efuse.c @@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE, MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE); - for (i = offset; i < offset + bytes; i += efuse->config.word_size) { - addr = i / efuse->config.word_size; + for (i = 0; i < bytes; i += efuse->config.word_size) { + addr = (offset + i) / efuse->config.word_size; err = meson_mx_efuse_read_addr(efuse, addr, &tmp); if (err) -- cgit v1.2.3 From f6c4fd506cb626e4346aa81688f255e593a7c5a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Dec 2017 19:45:11 +0100 Subject: x86/cpu_entry_area: Prevent wraparound in setup_cpu_entry_area_ptes() on 32bit The loop which populates the CPU entry area PMDs can wrap around on 32bit machines when the number of CPUs is small. It worked wonderful for NR_CPUS=64 for whatever reason and the moron who wrote that code did not bother to test it with !SMP. Check for the wraparound to fix it. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: kernel test robot Signed-off-by: Thomas "Feels stupid" Gleixner Tested-by: Borislav Petkov --- arch/x86/mm/cpu_entry_area.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 21e8b595cbb1..fe814fd5e014 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -122,7 +122,8 @@ static __init void setup_cpu_entry_area_ptes(void) start = CPU_ENTRY_AREA_BASE; end = start + CPU_ENTRY_AREA_MAP_SIZE; - for (; start < end; start += PMD_SIZE) + /* Careful here: start + PMD_SIZE might wrap around */ + for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) populate_extra_pte(start); #endif } -- cgit v1.2.3 From a89f040fa34ec9cd682aed98b8f04e3c47d998bd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:33 +0100 Subject: x86/cpufeatures: Add X86_BUG_CPU_INSECURE Many x86 CPUs leak information to user space due to missing isolation of user space and kernel space page tables. There are many well documented ways to exploit that. The upcoming software migitation of isolating the user and kernel space page tables needs a misfeature flag so code can be made runtime conditional. Add the BUG bits which indicates that the CPU is affected and add a feature bit which indicates that the software migitation is enabled. Assume for now that _ALL_ x86 CPUs are affected by this. Exceptions can be made later. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 3 ++- arch/x86/include/asm/disabled-features.h | 8 +++++++- arch/x86/kernel/cpu/common.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..d8ec834ea884 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -201,7 +201,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +340,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c10c9128f54e..e428e16dd822 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -44,6 +44,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -54,7 +60,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8ddcfa4d4165..a9210f9b7cf8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -898,6 +898,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + /* Assume for now that ALL x86 CPUs are insecure */ + setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + fpu__init_system(c); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From c313ec66317d421fb5768d78c56abed2dc862264 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:34 +0100 Subject: x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y Global pages stay in the TLB across context switches. Since all contexts share the same kernel mapping, these mappings are marked as global pages so kernel entries in the TLB are not flushed out on a context switch. But, even having these entries in the TLB opens up something that an attacker can use, such as the double-page-fault attack: http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf That means that even when PAGE_TABLE_ISOLATION switches page tables on return to user space the global pages would stay in the TLB cache. Disable global pages so that kernel TLB entries can be flushed before returning to user space. This way, all accesses to kernel addresses from userspace result in a TLB miss independent of the existence of a kernel mapping. Suppress global pages via the __supported_pte_mask. The user space mappings set PAGE_GLOBAL for the minimal kernel mappings which are required for entry/exit. These mappings are set up manually so the filtering does not take place. [ The __supported_pte_mask simplification was written by Thomas Gleixner. ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a22c2b95e513..020223420308 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,6 +161,12 @@ struct map_range { static int page_size_mask; +static void enable_global_pages(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + __supported_pte_mask |= _PAGE_GLOBAL; +} + static void __init probe_page_size_mask(void) { /* @@ -179,11 +185,11 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ + __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else - __supported_pte_mask &= ~_PAGE_GLOBAL; + enable_global_pages(); + } /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { -- cgit v1.2.3 From 8a09317b895f073977346779df52f67c1056d81d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:35 +0100 Subject: x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching PAGE_TABLE_ISOLATION needs to switch to a different CR3 value when it enters the kernel and switch back when it exits. This essentially needs to be done before leaving assembly code. This is extra challenging because the switching context is tricky: the registers that can be clobbered can vary. It is also hard to store things on the stack because there is an established ABI (ptregs) or the stack is entirely unsafe to use. Establish a set of macros that allow changing to the user and kernel CR3 values. Interactions with SWAPGS: Previous versions of the PAGE_TABLE_ISOLATION code relied on having per-CPU scratch space to save/restore a register that can be used for the CR3 MOV. The %GS register is used to index into our per-CPU space, so SWAPGS *had* to be done before the CR3 switch. That scratch space is gone now, but the semantic that SWAPGS must be done before the CR3 MOV is retained. This is good to keep because it is not that hard to do and it allows to do things like add per-CPU debugging information. What this does in the NMI code is worth pointing out. NMIs can interrupt *any* context and they can also be nested with NMIs interrupting other NMIs. The comments below ".Lnmi_from_kernel" explain the format of the stack during this situation. Changing the format of this stack is hard. Instead of storing the old CR3 value on the stack, this depends on the *regular* register save/restore mechanism and then uses %r14 to keep CR3 during the NMI. It is callee-saved and will not be clobbered by the C NMI handlers that get called. [ PeterZ: ESPFIX optimization ] Based-on-code-from: Andy Lutomirski Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 66 ++++++++++++++++++++++++++++++++++++++++ arch/x86/entry/entry_64.S | 45 +++++++++++++++++++++++---- arch/x86/entry/entry_64_compat.S | 24 ++++++++++++++- 3 files changed, 128 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3fd8bc560fae..a9d17a7686ab 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include +#include /* @@ -187,6 +189,70 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +/* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two halves: */ +#define PTI_SWITCH_MASK (1< in kernel */ SWAPGS xorl %ebx, %ebx -1: ret + +1: + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + + ret END(paranoid_entry) /* @@ -1266,6 +1287,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_CR3 save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: @@ -1293,6 +1315,8 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1339,6 +1363,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done .Lbstep_iret: @@ -1348,10 +1373,11 @@ ENTRY(error_entry) .Lerror_bad_iret: /* - * We came from an IRET to user mode, so we have user gsbase. - * Switch to kernel gsbase: + * We came from an IRET to user mode, so we have user + * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* * Pretend that the exception came from user mode: set up pt_regs @@ -1383,6 +1409,10 @@ END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, * so we can use real assembly here. + * + * Registers: + * %r14: Used to save/restore the CR3 of the interrupted context + * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ ENTRY(nmi) UNWIND_HINT_IRET_REGS @@ -1446,6 +1476,7 @@ ENTRY(nmi) swapgs cld + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 @@ -1698,6 +1729,8 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_CR3 save_reg=%r14 + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 95ad40eb7eff..05238b29895e 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -49,6 +49,10 @@ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ SWAPGS + + /* We are about to clobber %rsp anyway, clobbering here is OK */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq $0 /* pt_regs->r14 = 0 */ pushq $0 /* pt_regs->r15 = 0 */ + /* + * We just saved %rdi so it is safe to clobber. It is not + * preserved during the C calls inside TRACE_IRQS_OFF anyway. + */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. @@ -256,10 +266,22 @@ sysret32_from_system_call: * when the system call started, which is already known to user * code. We zero R8-R10 to avoid info leaks. */ + movq RSP-ORIG_RAX(%rsp), %rsp + + /* + * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored + * on the process stack which is not mapped to userspace and + * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 + * switch until after after the last reference to the process + * stack. + * + * %r8 is zeroed before the sysret, thus safe to clobber. + */ + SWITCH_TO_USER_CR3 scratch_reg=%r8 + xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 - movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl END(entry_SYSCALL_compat) -- cgit v1.2.3 From aa8c6248f8c75acfd610fe15d8cae23cf70d9d09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:36 +0100 Subject: x86/mm/pti: Add infrastructure for page table isolation Add the initial files for kernel page table isolation, with a minimal init function and the boot time detection for this misfeature. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 2 + arch/x86/boot/compressed/pagetable.c | 3 + arch/x86/entry/calling.h | 7 +++ arch/x86/include/asm/pti.h | 14 +++++ arch/x86/mm/Makefile | 7 ++- arch/x86/mm/init.c | 2 + arch/x86/mm/pti.c | 84 +++++++++++++++++++++++++ include/linux/pti.h | 11 ++++ init/main.c | 3 + 9 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/pti.h create mode 100644 arch/x86/mm/pti.c create mode 100644 include/linux/pti.h diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 05496622b4ef..5dfd26265484 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2685,6 +2685,8 @@ steal time is computed, but won't influence scheduler behaviour + nopti [X86-64] Disable kernel page table isolation + nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 972319ff5b01..e691ff734cb5 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -23,6 +23,9 @@ */ #undef CONFIG_AMD_MEM_ENCRYPT +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index a9d17a7686ab..3d3389a92c33 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -205,18 +205,23 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI mov %cr3, \scratch_reg ADJUST_KERNEL_CR3 \scratch_reg mov \scratch_reg, %cr3 +.Lend_\@: .endm .macro SWITCH_TO_USER_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI mov %cr3, \scratch_reg ADJUST_USER_CR3 \scratch_reg mov \scratch_reg, %cr3 +.Lend_\@: .endm .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI movq %cr3, \scratch_reg movq \scratch_reg, \save_reg /* @@ -233,11 +238,13 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro RESTORE_CR3 save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI /* * The CR3 write could be avoided when not changing its value, * but would require a CR3 read *and* a scratch register. */ movq \save_reg, %cr3 +.Lend_\@: .endm #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 000000000000..0b5ef05b2d2d --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 2e0017af8f9b..52906808e277 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -43,9 +43,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o -obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_X86_INTEL_MPX) += mpx.o +obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 020223420308..af75069fb116 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -630,6 +631,7 @@ void __init init_mem_mapping(void) { unsigned long end; + pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c new file mode 100644 index 000000000000..375f23a758bc --- /dev/null +++ b/arch/x86/mm/pti.c @@ -0,0 +1,84 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This code is based in part on work published here: + * + * https://github.com/IAIK/KAISER + * + * The original work was written by and and signed off by for the Linux + * kernel by: + * + * Signed-off-by: Richard Fellner + * Signed-off-by: Moritz Lipp + * Signed-off-by: Daniel Gruss + * Signed-off-by: Michael Schwarz + * + * Major changes to the original code by: Dave Hansen + * Mostly rewritten by Thomas Gleixner and + * Andy Lutomirsky + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +static void __init pti_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + pr_info("%s\n", reason); +} + +void __init pti_check_boottime_disable(void) +{ + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_print_if_insecure("disabled on XEN PV."); + return; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_print_if_insecure("disabled on command line."); + return; + } + + if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + return; + + setup_force_cpu_cap(X86_FEATURE_PTI); +} + +/* + * Initialize kernel page table isolation + */ +void __init pti_init(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("enabled\n"); +} diff --git a/include/linux/pti.h b/include/linux/pti.h new file mode 100644 index 000000000000..0174883a935a --- /dev/null +++ b/include/linux/pti.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _INCLUDE_PTI_H +#define _INCLUDE_PTI_H + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +#include +#else +static inline void pti_init(void) { } +#endif + +#endif diff --git a/init/main.c b/init/main.c index 8a390f60ec81..b32ec72cdf3d 100644 --- a/init/main.c +++ b/init/main.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -506,6 +507,8 @@ static void __init mm_init(void) ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); + /* Should be run after espfix64 is set up. */ + pti_init(); } asmlinkage __visible void __init start_kernel(void) -- cgit v1.2.3 From 41f4c20b57a4890ea7f56ff8717cc83fefb8d537 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 12 Dec 2017 14:39:52 +0100 Subject: x86/pti: Add the pti= cmdline option and documentation Keep the "nopti" optional for traditional reasons. [ tglx: Don't allow force on when running on XEN PV and made 'on' printout conditional ] Requested-by: Linus Torvalds Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Link: https://lkml.kernel.org/r/20171212133952.10177-1-bp@alien8.de Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ arch/x86/mm/pti.c | 26 ++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5dfd26265484..520fdec15bbb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3255,6 +3255,12 @@ pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] + Control user/kernel address space isolation: + on - enable + off - disable + auto - default setting + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 375f23a758bc..a13f6b109865 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -54,21 +54,45 @@ static void __init pti_print_if_insecure(const char *reason) pr_info("%s\n", reason); } +static void __init pti_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + pr_info("%s\n", reason); +} + void __init pti_check_boottime_disable(void) { + char arg[5]; + int ret; + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { pti_print_if_insecure("disabled on XEN PV."); return; } + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (ret == 3 && !strncmp(arg, "off", 3)) { + pti_print_if_insecure("disabled on command line."); + return; + } + if (ret == 2 && !strncmp(arg, "on", 2)) { + pti_print_if_secure("force enabled on command line."); + goto enable; + } + if (ret == 4 && !strncmp(arg, "auto", 4)) + goto autosel; + } + if (cmdline_find_option_bool(boot_command_line, "nopti")) { pti_print_if_insecure("disabled on command line."); return; } +autosel: if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) return; - +enable: setup_force_cpu_cap(X86_FEATURE_PTI); } -- cgit v1.2.3 From 61e9b3671007a5da8127955a1a3bda7e0d5f42e8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:37 +0100 Subject: x86/mm/pti: Add mapping helper functions Add the pagetable helper functions do manage the separate user space page tables. [ tglx: Split out from the big combo kaiser patch. Folded Andys simplification and made it out of line as Boris suggested ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 6 ++- arch/x86/include/asm/pgtable_64.h | 92 +++++++++++++++++++++++++++++++++++++++ arch/x86/mm/pti.c | 41 +++++++++++++++++ 3 files changed, 138 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f735c3016325..af38d93c4fbb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -909,7 +909,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e9f05331e732..81462e9a34f6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) #endif } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); +} + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgd(pgdp, pgd); +} +#else +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif + static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); +#else *p4dp = p4d; +#endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + *pgdp = pti_set_user_pgd(pgdp, pgd); +#else *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index a13f6b109865..69a983365392 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -96,6 +96,47 @@ enable: setup_force_cpu_cap(X86_FEATURE_PTI); } +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Changes to the high (kernel) portion of the kernelmode page + * tables are not automatically propagated to the usermode tables. + * + * Users should keep in mind that, unlike the kernelmode tables, + * there is no vmalloc_fault equivalent for the usermode tables. + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ + if (!pgdp_maps_userspace(pgdp)) + return pgd; + + /* + * The user page tables get the full PGD, accessible from + * userspace: + */ + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + + /* + * If this is normal user memory, make it NX in the kernel + * pagetables so that, if we somehow screw up and return to + * usermode with the kernel CR3 loaded, we'll get a page fault + * instead of allowing user code to execute with the wrong CR3. + * + * As exceptions, we don't set NX if: + * - _PAGE_USER is not set. This could be an executable + * EFI runtime mapping or something similar, and the kernel + * may execute from it + * - we don't have NX support + * - we're clearing the PGD (i.e. the new pgd is not present). + */ + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) + pgd.pgd |= _PAGE_NX; + + /* return the copy of the PGD we want the kernel to use: */ + return pgd; +} + /* * Initialize kernel page table isolation */ -- cgit v1.2.3 From 1c4de1ff4fe50453b968579ee86fac3da80dd783 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:38 +0100 Subject: x86/mm/pti: Allow NX poison to be set in p4d/pgd With PAGE_TABLE_ISOLATION the user portion of the kernel page tables is poisoned with the NX bit so if the entry code exits with the kernel page tables selected in CR3, userspace crashes. But doing so trips the p4d/pgd_bad() checks. Make sure it does not do that. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af38d93c4fbb..2d2d07300b4a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -846,7 +846,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) static inline int p4d_bad(p4d_t p4d) { - return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; + unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ @@ -880,7 +885,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + unsigned long ignore_flags = _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) -- cgit v1.2.3 From d9e9a6418065bb376e5de8d93ce346939b9a37a6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:39 +0100 Subject: x86/mm/pti: Allocate a separate user PGD Kernel page table isolation requires to have two PGDs. One for the kernel, which contains the full kernel mapping plus the user space mapping and one for user space which contains the user space mappings and the minimal set of kernel mappings which are required by the architecture to be able to transition from and to user space. Add the necessary preliminaries. [ tglx: Split out from the big kaiser dump. EFI fixup from Kirill ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgalloc.h | 11 +++++++++++ arch/x86/kernel/head_64.S | 30 +++++++++++++++++++++++++++--- arch/x86/mm/pgtable.c | 5 +++-- arch/x86/platform/efi/efi_64.c | 5 ++++- 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 4b5e1eafada7..aff42e1da6ee 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7dca675fe78d..04a625f0fcda 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define PTI_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define PTI_USER_PGD_FILL 0 +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -350,13 +371,14 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_top_pgt) +NEXT_PGD_PAGE(early_top_pgt) .fill 511,8,0 #ifdef CONFIG_X86_5LEVEL .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #else .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #endif + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts) .data #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC @@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #else -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .fill 512,8,0 + .fill PTI_USER_PGD_FILL,8,0 #endif #ifdef CONFIG_X86_5LEVEL diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 17ebc5a978cc..9b7bcbd33cc2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else + static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 20fb31579b69..39c4b35ac7a4 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -195,6 +195,9 @@ static pgd_t *efi_pgd; * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. */ int __init efi_alloc_page_tables(void) { @@ -207,7 +210,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; -- cgit v1.2.3 From fc2fbc8512ed08d1de7720936fd7d2e4ce02c3a2 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:40 +0100 Subject: x86/mm/pti: Populate user PGD In clone_pgd_range() copy the init user PGDs which cover the kernel half of the address space, so a process has all the required kernel mappings visible. [ tglx: Split out from the big kaiser dump and folded Andys simplification ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2d2d07300b4a..cc6fa75884e9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1119,7 +1119,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) -- cgit v1.2.3 From 03f4424f348e8be95eb1bbeba09461cd7b867828 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:42 +0100 Subject: x86/mm/pti: Add functions to clone kernel PMDs Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pti.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 69a983365392..d58bcee470fc 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -48,6 +48,11 @@ #undef pr_fmt #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt +/* Backporting helper */ +#ifndef __GFP_NOTRACK +#define __GFP_NOTRACK 0 +#endif + static void __init pti_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) @@ -137,6 +142,128 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) return pgd; } +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a P4D on success, or NULL on failure. + */ +static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +{ + pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + + if (address < PAGE_OFFSET) { + WARN_ONCE(1, "attempt to walk user address\n"); + return NULL; + } + + if (pgd_none(*pgd)) { + unsigned long new_p4d_page = __get_free_page(gfp); + if (!new_p4d_page) + return NULL; + + if (pgd_none(*pgd)) { + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); + new_p4d_page = 0; + } + if (new_p4d_page) + free_page(new_p4d_page); + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + + return p4d_offset(pgd, address); +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a PMD on success, or NULL on failure. + */ +static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + pud_t *pud; + + BUILD_BUG_ON(p4d_large(*p4d) != 0); + if (p4d_none(*p4d)) { + unsigned long new_pud_page = __get_free_page(gfp); + if (!new_pud_page) + return NULL; + + if (p4d_none(*p4d)) { + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); + new_pud_page = 0; + } + if (new_pud_page) + free_page(new_pud_page); + } + + pud = pud_offset(p4d, address); + /* The user page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + + if (pud_none(*pud)) { + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + new_pmd_page = 0; + } + if (new_pmd_page) + free_page(new_pmd_page); + } + + return pmd_offset(pud, address); +} + +static void __init +pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) +{ + unsigned long addr; + + /* + * Clone the populated PMDs which cover start to end. These PMD areas + * can have holes. + */ + for (addr = start; addr < end; addr += PMD_SIZE) { + pmd_t *pmd, *target_pmd; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + pgd = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgd))) + return; + p4d = p4d_offset(pgd, addr); + if (WARN_ON(p4d_none(*p4d))) + return; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + target_pmd = pti_user_pagetable_walk_pmd(addr); + if (WARN_ON(!target_pmd)) + return; + + /* + * Copy the PMD. That is, the kernelmode and usermode + * tables will share the last-level page tables of this + * address range + */ + *target_pmd = pmd_clear_flags(*pmd, clear); + } +} + /* * Initialize kernel page table isolation */ -- cgit v1.2.3 From 8d4b067895791ab9fdb1aadfc505f64d71239dd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:43 +0100 Subject: x86/mm/pti: Force entry through trampoline when PTI active Force the entry through the trampoline only when PTI is active. Otherwise go through the normal entry code. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a9210f9b7cf8..f2a94dfb434e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1339,7 +1339,10 @@ void syscall_init(void) (entry_SYSCALL_64_trampoline - _entry_trampoline); wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + if (static_cpu_has(X86_FEATURE_PTI)) + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + else + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); -- cgit v1.2.3 From f7cfbee91559ca7e3e961a00ffac921208a115ad Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 4 Dec 2017 15:07:45 +0100 Subject: x86/mm/pti: Share cpu_entry_area with user space page tables Share the cpu entry area so the user space and kernel space page tables have the same P4D page. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pti.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index d58bcee470fc..59290356f19f 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -264,6 +264,29 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) } } +/* + * Clone a single p4d (i.e. a top-level entry on 4-level systems and a + * next-level entry on 5-level systems. + */ +static void __init pti_clone_p4d(unsigned long addr) +{ + p4d_t *kernel_p4d, *user_p4d; + pgd_t *kernel_pgd; + + user_p4d = pti_user_pagetable_walk_p4d(addr); + kernel_pgd = pgd_offset_k(addr); + kernel_p4d = p4d_offset(kernel_pgd, addr); + *user_p4d = *kernel_p4d; +} + +/* + * Clone the CPU_ENTRY_AREA into the user space visible page table. + */ +static void __init pti_clone_user_shared(void) +{ + pti_clone_p4d(CPU_ENTRY_AREA_BASE); +} + /* * Initialize kernel page table isolation */ @@ -273,4 +296,6 @@ void __init pti_init(void) return; pr_info("enabled\n"); + + pti_clone_user_shared(); } -- cgit v1.2.3 From 2f7412ba9c6af5ab16bdbb4a3fdb1dcd2b4fd3c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:46 +0100 Subject: x86/entry: Align entry text section to PMD boundary The (irq)entry text must be visible in the user space page tables. To allow simple PMD based sharing, make the entry text PMD aligned. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d2a8b5a24a44..1e413a9326aa 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -61,11 +61,17 @@ jiffies_64 = jiffies; . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; +#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); +#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); + #else #define X64_ALIGN_RODATA_BEGIN #define X64_ALIGN_RODATA_END +#define ALIGN_ENTRY_TEXT_BEGIN +#define ALIGN_ENTRY_TEXT_END + #endif PHDRS { @@ -102,8 +108,10 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT IRQENTRY_TEXT + ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) -- cgit v1.2.3 From 6dc72c3cbca0580642808d677181cad4c6433893 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:47 +0100 Subject: x86/mm/pti: Share entry text PMD Share the entry text PMD of the kernel mapping with the user space mapping. If large pages are enabled this is a single PMD entry and at the point where it is copied into the user page table the RW bit has not been cleared yet. Clear it right away so the user space visible map becomes RX. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pti.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 59290356f19f..0e78797650a7 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -287,6 +287,15 @@ static void __init pti_clone_user_shared(void) pti_clone_p4d(CPU_ENTRY_AREA_BASE); } +/* + * Clone the populated PMDs of the entry and irqentry text and force it RO. + */ +static void __init pti_clone_entry_text(void) +{ + pti_clone_pmds((unsigned long) __entry_text_start, + (unsigned long) __irqentry_text_end, _PAGE_RW); +} + /* * Initialize kernel page table isolation */ @@ -298,4 +307,5 @@ void __init pti_init(void) pr_info("enabled\n"); pti_clone_user_shared(); + pti_clone_entry_text(); } -- cgit v1.2.3 From 4b6bbe95b87966ba08999574db65c93c5e925a36 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 15 Dec 2017 22:08:18 +0100 Subject: x86/mm/pti: Map ESPFIX into user space Map the ESPFIX pages into user space when PTI is enabled. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/mm/pti.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 0e78797650a7..b1c38ef9fbbb 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -287,6 +287,16 @@ static void __init pti_clone_user_shared(void) pti_clone_p4d(CPU_ENTRY_AREA_BASE); } +/* + * Clone the ESPFIX P4D into the user space visinble page table + */ +static void __init pti_setup_espfix64(void) +{ +#ifdef CONFIG_X86_ESPFIX64 + pti_clone_p4d(ESPFIX_BASE_ADDR); +#endif +} + /* * Clone the populated PMDs of the entry and irqentry text and force it RO. */ @@ -308,4 +318,5 @@ void __init pti_init(void) pti_clone_user_shared(); pti_clone_entry_text(); + pti_setup_espfix64(); } -- cgit v1.2.3 From 10043e02db7f8a4161f76434931051e7d797a5f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:07:49 +0100 Subject: x86/cpu_entry_area: Add debugstore entries to cpu_entry_area The Intel PEBS/BTS debug store is a design trainwreck as it expects virtual addresses which must be visible in any execution context. So it is required to make these mappings visible to user space when kernel page table isolation is active. Provide enough room for the buffer mappings in the cpu_entry_area so the buffers are available in the user space visible page tables. At the point where the kernel side entry area is populated there is no buffer available yet, but the kernel PMD must be populated. To achieve this set the entries for these buffers to non present. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 5 +++-- arch/x86/events/perf_event.h | 21 ++------------------ arch/x86/include/asm/cpu_entry_area.h | 13 +++++++++++++ arch/x86/include/asm/intel_ds.h | 36 +++++++++++++++++++++++++++++++++++ arch/x86/mm/cpu_entry_area.c | 27 ++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 21 deletions(-) create mode 100644 arch/x86/include/asm/intel_ds.h diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3674a4b6f8bd..6522f0279cb8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -8,11 +8,12 @@ #include "../perf_event.h" +/* Waste a full page so it can be mapped into the cpu_entry_area */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 -#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_FIXUP_SIZE PAGE_SIZE /* diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f7aaadf9331f..373f9eda80b1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,6 +14,8 @@ #include +#include + /* To enable MSR tracing please use the generic trace points. */ /* @@ -77,8 +79,6 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) /* @@ -95,23 +95,6 @@ struct amd_nb { PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - #define PEBS_REGS \ (PERF_REG_X86_AX | \ PERF_REG_X86_BX | \ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 2fbc69a0916e..4a7884b8dca5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -5,6 +5,7 @@ #include #include +#include /* * cpu_entry_area is a percpu region that contains things needed by the CPU @@ -40,6 +41,18 @@ struct cpu_entry_area { */ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; #endif +#ifdef CONFIG_CPU_SUP_INTEL + /* + * Per CPU debug store for Intel performance monitoring. Wastes a + * full page at the moment. + */ + struct debug_store cpu_debug_store; + /* + * The actual PEBS/BTS buffers must be mapped to user space + * Reserve enough fixmap PTEs. + */ + struct debug_store_buffers cpu_debug_buffers; +#endif }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h new file mode 100644 index 000000000000..62a9f4966b42 --- /dev/null +++ b/arch/x86/include/asm/intel_ds.h @@ -0,0 +1,36 @@ +#ifndef _ASM_INTEL_DS_H +#define _ASM_INTEL_DS_H + +#include + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 8 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +struct debug_store_buffers { + char bts_buffer[BTS_BUFFER_SIZE]; + char pebs_buffer[PEBS_BUFFER_SIZE]; +}; + +#endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index fe814fd5e014..b9283cc27622 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } +static void percpu_setup_debug_store(int cpu) +{ +#ifdef CONFIG_CPU_SUP_INTEL + int npages; + void *cea; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + cea = &get_cpu_entry_area(cpu)->cpu_debug_store; + npages = sizeof(struct debug_store) / PAGE_SIZE; + BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); + cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, + PAGE_KERNEL); + + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; + /* + * Force the population of PMDs for not yet allocated per cpu + * memory like debug store buffers. + */ + npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; + for (; npages; npages--, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +#endif +} + /* Setup the fixmap mappings only once per-processor */ static void __init setup_cpu_entry_area(int cpu) { @@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu) cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); #endif + percpu_setup_debug_store(cpu); } static __init void setup_cpu_entry_area_ptes(void) -- cgit v1.2.3 From c1961a4631daef4aeabee8e368b1b13e8f173c91 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 4 Dec 2017 15:07:50 +0100 Subject: x86/events/intel/ds: Map debug buffers in cpu_entry_area The BTS and PEBS buffers both have their virtual addresses programmed into the hardware. This means that any access to them is performed via the page tables. The times that the hardware accesses these are entirely dependent on how the performance monitoring hardware events are set up. In other words, there is no way for the kernel to tell when the hardware might access these buffers. To avoid perf crashes, place 'debug_store' allocate pages and map them into the cpu_entry_area. The PEBS fixup buffer does not need this treatment. [ tglx: Got rid of the kaiser_add_mapping() complication ] Signed-off-by: Hugh Dickins Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 125 +++++++++++++++++++++++++++---------------- arch/x86/events/perf_event.h | 2 + 2 files changed, 82 insertions(+), 45 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 6522f0279cb8..8f0aace08b87 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -280,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + size_t msz = 0; + + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; - void *buffer, *ibuffer; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *ibuffer, *cea; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -301,25 +337,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.pebs) return; @@ -327,73 +365,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 373f9eda80b1..8e4ea143ed96 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -199,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; -- cgit v1.2.3 From 9f449772a3106bcdd4eb8fdeb281147b0e99fb30 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:44 -0800 Subject: x86/mm/64: Make a full PGD-entry size hole in the memory map Shrink vmalloc space from 16384TiB to 12800TiB to enlarge the hole starting at 0xff90000000000000 to be a full PGD entry. A subsequent patch will use this hole for the pagetable isolation LDT alias. Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 4 ++-- arch/x86/include/asm/pgtable_64_types.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 51101708a03a..496a1dbf139d 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -29,8 +29,8 @@ Virtual memory map with 5 level page tables: hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff91ffffffffffff (=49 bits) hole -ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space +ff90000000000000 - ff9fffffffffffff (=52 bits) hole +ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 3d27831bc58d..83e9489ae944 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -79,8 +79,8 @@ typedef struct { pteval_t pte; } pte_t; #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(16384, UL) -# define __VMALLOC_BASE _AC(0xff92000000000000, UL) +# define VMALLOC_SIZE_TB _AC(12800, UL) +# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else # define VMALLOC_SIZE_TB _AC(32, UL) -- cgit v1.2.3 From f55f0501cbf65ec41cca5058513031b711730b1d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:45 -0800 Subject: x86/pti: Put the LDT in its own PGD if PTI is on With PTI enabled, the LDT must be mapped in the usermode tables somewhere. The LDT is per process, i.e. per mm. An earlier approach mapped the LDT on context switch into a fixmap area, but that's a big overhead and exhausted the fixmap space when NR_CPUS got big. Take advantage of the fact that there is an address space hole which provides a completely unused pgd. Use this pgd to manage per-mm LDT mappings. This has a down side: the LDT isn't (currently) randomized, and an attack that can write the LDT is instant root due to call gates (thanks, AMD, for leaving call gates in AMD64 but designing them wrong so they're only useful for exploits). This can be mitigated by making the LDT read-only or randomizing the mapping, either of which is strightforward on top of this patch. This will significantly slow down LDT users, but that shouldn't matter for important workloads -- the LDT is only used by DOSEMU(2), Wine, and very old libc implementations. [ tglx: Cleaned it up. ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/x86/x86_64/mm.txt | 3 +- arch/x86/include/asm/mmu_context.h | 59 ++++++++++++-- arch/x86/include/asm/pgtable_64_types.h | 4 + arch/x86/include/asm/processor.h | 23 ++++-- arch/x86/kernel/ldt.c | 139 +++++++++++++++++++++++++++++++- arch/x86/mm/dump_pagetables.c | 9 +++ 6 files changed, 220 insertions(+), 17 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 496a1dbf139d..ad41b3813f0a 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... +fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... @@ -29,7 +30,7 @@ Virtual memory map with 5 level page tables: hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff9fffffffffffff (=52 bits) hole +ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5ede7cae1d67..c931b88982a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -50,10 +50,33 @@ struct ldt_struct { * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ - struct desc_struct *entries; - unsigned int nr_entries; + struct desc_struct *entries; + unsigned int nr_entries; + + /* + * If PTI is in use, then the entries array is not mapped while we're + * in user mode. The whole array will be aliased at the addressed + * given by ldt_slot_va(slot). We use two slots so that we can allocate + * and map, and enable a new LDT without invalidating the mapping + * of an older, still-in-use LDT. + * + * slot will be -1 if this LDT doesn't have an alias mapping. + */ + int slot; }; +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ +#ifdef CONFIG_X86_64 + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +#else + BUG(); +#endif +} + /* * Used for LDT copy/destruction. */ @@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm) } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); +void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, @@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm, { return 0; } -static inline void destroy_context_ldt(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) { } +static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm) * that we can see. */ - if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->nr_entries); - else + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { clear_LDT(); + } #else clear_LDT(); #endif @@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); + ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 83e9489ae944..b97a539bcdee 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -82,10 +82,14 @@ typedef struct { pteval_t pte; } pte_t; # define VMALLOC_SIZE_TB _AC(12800, UL) # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define LDT_PGD_ENTRY _AC(-112, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define LDT_PGD_ENTRY _AC(-4, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif #ifdef CONFIG_RANDOMIZE_MEMORY diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9e482d8b0b97..9c18da64daa9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -851,13 +851,22 @@ static inline void spin_lock_prefetch(const void *x) #else /* - * User space process size. 47bits minus one guard page. The guard - * page is necessary on Intel CPUs: if a SYSCALL instruction is at - * the highest possible canonical userspace address, then that - * syscall will enter the kernel with a non-canonical return - * address, and SYSRET will explode dangerously. We avoid this - * particular problem by preventing anything from being mapped - * at the maximum canonical address. + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] */ #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a6b5d62f45a7..9629c5d8267a 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -51,13 +52,11 @@ static void refresh_ldt_segments(void) static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; - mm_context_t *pc; if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) return; - pc = &mm->context; - set_ldt(pc->ldt->entries, pc->ldt->nr_entries); + load_mm_ldt(mm); refresh_ldt_segments(); } @@ -94,10 +93,121 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return NULL; } + /* The new LDT isn't aliased for PTI yet. */ + new_ldt->slot = -1; + new_ldt->nr_entries = num_entries; return new_ldt; } +/* + * If PTI is enabled, this maps the LDT into the kernelmode and + * usermode tables for the given mm. + * + * There is no corresponding unmap function. Even if the LDT is freed, we + * leave the PTEs around until the slot is reused or the mm is destroyed. + * This is harmless: the LDT is always in ordinary memory, and no one will + * access the freed slot. + * + * If we wanted to unmap freed LDTs, we'd also need to do a flush to make + * it useful, and the flush would slow down modify_ldt(). + */ +static int +map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + bool is_vmalloc, had_top_level_entry; + unsigned long va; + spinlock_t *ptl; + pgd_t *pgd; + int i; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return 0; + + /* + * Any given ldt_struct should have map_ldt_struct() called at most + * once. + */ + WARN_ON(ldt->slot != -1); + + /* + * Did we already have the top level entry allocated? We can't + * use pgd_none() for this because it doens't do anything on + * 4-level page table kernels. + */ + pgd = pgd_offset(mm, LDT_BASE_ADDR); + had_top_level_entry = (pgd->pgd != 0); + + is_vmalloc = is_vmalloc_addr(ldt->entries); + + for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + unsigned long offset = i << PAGE_SHIFT; + const void *src = (char *)ldt->entries + offset; + unsigned long pfn; + pte_t pte, *ptep; + + va = (unsigned long)ldt_slot_va(slot) + offset; + pfn = is_vmalloc ? vmalloc_to_pfn(src) : + page_to_pfn(virt_to_page(src)); + /* + * Treat the PTI LDT range as a *userspace* range. + * get_locked_pte() will allocate all needed pagetables + * and account for them in this mm. + */ + ptep = get_locked_pte(mm, va, &ptl); + if (!ptep) + return -ENOMEM; + pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL)); + set_pte_at(mm, va, ptep, pte); + pte_unmap_unlock(ptep, ptl); + } + + if (mm->context.ldt) { + /* + * We already had an LDT. The top-level entry should already + * have been allocated and synchronized with the usermode + * tables. + */ + WARN_ON(!had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) + WARN_ON(!kernel_to_user_pgdp(pgd)->pgd); + } else { + /* + * This is the first time we're mapping an LDT for this process. + * Sync the pgd to the usermode tables. + */ + WARN_ON(had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON(kernel_to_user_pgdp(pgd)->pgd); + set_pgd(kernel_to_user_pgdp(pgd), *pgd); + } + } + + va = (unsigned long)ldt_slot_va(slot); + flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); + + ldt->slot = slot; +#endif + return 0; +} + +static void free_ldt_pgtables(struct mm_struct *mm) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct mmu_gather tlb; + unsigned long start = LDT_BASE_ADDR; + unsigned long end = start + (1UL << PGDIR_SHIFT); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + tlb_gather_mmu(&tlb, mm, start, end); + free_pgd_range(&tlb, start, end, start, end); + tlb_finish_mmu(&tlb, start, end); +#endif +} + /* After calling this, the LDT is immutable. */ static void finalize_ldt_struct(struct ldt_struct *ldt) { @@ -156,6 +266,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) new_ldt->nr_entries * LDT_ENTRY_SIZE); finalize_ldt_struct(new_ldt); + retval = map_ldt_struct(mm, new_ldt, 0); + if (retval) { + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } mm->context.ldt = new_ldt; out_unlock: @@ -174,6 +290,11 @@ void destroy_context_ldt(struct mm_struct *mm) mm->context.ldt = NULL; } +void ldt_arch_exit_mmap(struct mm_struct *mm) +{ + free_ldt_pgtables(mm); +} + static int read_ldt(void __user *ptr, unsigned long bytecount) { struct mm_struct *mm = current->mm; @@ -287,6 +408,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) new_ldt->entries[ldt_info.entry_number] = ldt; finalize_ldt_struct(new_ldt); + /* + * If we are using PTI, map the new LDT into the userspace pagetables. + * If there is already an LDT, use the other slot so that other CPUs + * will continue to use the old LDT until install_ldt() switches + * them over to the new LDT. + */ + error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); + if (error) { + free_ldt_struct(old_ldt); + goto out_unlock; + } + install_ldt(mm, new_ldt); free_ldt_struct(old_ldt); error = 0; diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 43dedbfb7257..690eaf31ca34 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -52,11 +52,17 @@ enum address_markers_idx { USER_SPACE_NR = 0, KERNEL_SPACE_NR, LOW_KERNEL_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif VMALLOC_START_NR, VMEMMAP_START_NR, #ifdef CONFIG_KASAN KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, +#endif +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) + LDT_NR, #endif CPU_ENTRY_AREA_NR, #ifdef CONFIG_X86_ESPFIX64 @@ -81,6 +87,9 @@ static struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, #endif [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 -- cgit v1.2.3 From 85900ea51577e31b186e523c8f4e068c79ecc7d3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 12 Dec 2017 07:56:42 -0800 Subject: x86/pti: Map the vsyscall page if needed Make VSYSCALLs work fully in PTI mode by mapping them properly to the user space visible page tables. [ tglx: Hide unused functions (Patch by Arnd Bergmann) ] Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 6 ++-- arch/x86/include/asm/vsyscall.h | 1 + arch/x86/mm/pti.c | 65 +++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 1faf40f2dda9..577fa8adb785 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr) * vsyscalls but leave the page not present. If so, we skip calling * this. */ -static void __init set_vsyscall_pgtable_user_bits(void) +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) { pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(VSYSCALL_ADDR); + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 @@ -373,7 +373,7 @@ void __init map_vsyscall(void) vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - set_vsyscall_pgtable_user_bits(); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c..b986b2ca688a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index b1c38ef9fbbb..bce8aea65606 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -223,6 +224,69 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) return pmd_offset(pud, address); } +#ifdef CONFIG_X86_VSYSCALL_EMULATION +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + + if (pmd_none(*pmd)) { + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + new_pte_page = 0; + } + if (new_pte_page) + free_page(new_pte_page); + } + + pte = pte_offset_kernel(pmd, address); + if (pte_flags(*pte) & _PAGE_USER) { + WARN_ONCE(1, "attempt to walk to user pte\n"); + return NULL; + } + return pte; +} + +static void __init pti_setup_vsyscall(void) +{ + pte_t *pte, *target_pte; + unsigned int level; + + pte = lookup_address(VSYSCALL_ADDR, &level); + if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) + return; + + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + if (WARN_ON(!target_pte)) + return; + + *target_pte = *pte; + set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); +} +#else +static void __init pti_setup_vsyscall(void) { } +#endif + static void __init pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) { @@ -319,4 +383,5 @@ void __init pti_init(void) pti_clone_user_shared(); pti_clone_entry_text(); pti_setup_espfix64(); + pti_setup_vsyscall(); } -- cgit v1.2.3 From 2ea907c4fe7b78e5840c1dc07800eae93248cad1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:57 +0100 Subject: x86/mm: Allow flushing for future ASID switches If changing the page tables in such a way that an invalidation of all contexts (aka. PCIDs / ASIDs) is required, they can be actively invalidated by: 1. INVPCID for each PCID (works for single pages too). 2. Load CR3 with each PCID without the NOFLUSH bit set 3. Load CR3 with the NOFLUSH bit set for each and do INVLPG for each address. But, none of these are really feasible since there are ~6 ASIDs (12 with PAGE_TABLE_ISOLATION) at the time that invalidation is required. Instead of actively invalidating them, invalidate the *current* context and also mark the cpu_tlbstate _quickly_ to indicate future invalidation to be required. At the next context-switch, look for this indicator ('invalidate_other' being set) invalidate all of the cpu_tlbstate.ctxs[] entries. This ensures that any future context switches will do a full flush of the TLB, picking up the previous changes. [ tglx: Folded more fixups from Peter ] Signed-off-by: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 37 +++++++++++++++++++++++++++++-------- arch/x86/mm/tlb.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 171b429f43a2..490a706fdba8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -134,6 +134,17 @@ struct tlb_state { */ bool is_lazy; + /* + * If set we changed the page tables in such a way that we + * needed an invalidation of all contexts (aka. PCIDs / ASIDs). + * This tells us to go invalidate all the non-loaded ctxs[] + * on the next context switch. + * + * The current ctx was kept up-to-date as it ran and does not + * need to be invalidated. + */ + bool invalidate_other; + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. @@ -211,6 +222,14 @@ static inline unsigned long cr4_read_shadow(void) return this_cpu_read(cpu_tlbstate.cr4); } +/* + * Mark all other ASIDs as invalid, preserves the current. + */ +static inline void invalidate_other_asid(void) +{ + this_cpu_write(cpu_tlbstate.invalidate_other, true); +} + /* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot @@ -298,14 +317,6 @@ static inline void __flush_tlb_all(void) */ __flush_tlb(); } - - /* - * Note: if we somehow had PCID but not PGE, then this wouldn't work -- - * we'd end up flushing kernel translations for the current ASID but - * we might fail to flush kernel translations for other cached ASIDs. - * - * To avoid this issue, we force PCID off if PGE is off. - */ } /* @@ -315,6 +326,16 @@ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * __flush_tlb_single() will have cleared the TLB entry for this ASID, + * but since kernel space is replicated across all, we must also + * invalidate all others. + */ + invalidate_other_asid(); } #define TLB_FLUSH_ALL -1UL diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0a1be3adc97e..254c9eb79fe5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -28,6 +28,38 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * We get here when we do something requiring a TLB invalidation + * but could not go invalidate all of the contexts. We do the + * necessary invalidation by clearing out the 'ctx_id' which + * forces a TLB flush when the context is loaded. + */ +void clear_asid_other(void) +{ + u16 asid; + + /* + * This is only expected to be set if we have disabled + * kernel _PAGE_GLOBAL pages. + */ + if (!static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON_ONCE(1); + return; + } + + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { + /* Do not need to flush the current asid */ + if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) + continue; + /* + * Make sure the next time we go to switch to + * this asid, we do a flush: + */ + this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); + } + this_cpu_write(cpu_tlbstate.invalidate_other, false); +} + atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); @@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + if (this_cpu_read(cpu_tlbstate.invalidate_other)) + clear_asid_other(); + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != next->context.ctx_id) -- cgit v1.2.3 From 48e111982cda033fec832c6b0592c2acedd85d04 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:07:58 +0100 Subject: x86/mm: Abstract switching CR3 In preparation to adding additional PCID flushing, abstract the loading of a new ASID into CR3. [ PeterZ: Split out from big combo patch ] Signed-off-by: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/tlb.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 254c9eb79fe5..42a8875f73fe 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -100,6 +100,24 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) +{ + unsigned long new_mm_cr3; + + if (need_flush) { + new_mm_cr3 = build_cr3(pgdir, new_asid); + } else { + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); + } + + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + void leave_mm(int cpu) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); @@ -230,7 +248,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, true); /* * NB: This gets called via leave_mm() in the idle path @@ -243,7 +261,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next->pgd, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, false); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); -- cgit v1.2.3 From 6fd166aae78c0ab738d49bda653cbd9e3b1491cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Dec 2017 15:07:59 +0100 Subject: x86/mm: Use/Fix PCID to optimize user/kernel switches We can use PCID to retain the TLBs across CR3 switches; including those now part of the user/kernel switch. This increases performance of kernel entry/exit at the cost of more expensive/complicated TLB flushing. Now that we have two address spaces, one for kernel and one for user space, we need two PCIDs per mm. We use the top PCID bit to indicate a user PCID (just like we use the PFN LSB for the PGD). Since we do TLB invalidation from kernel space, the existing code will only invalidate the kernel PCID, we augment that by marking the corresponding user PCID invalid, and upon switching back to userspace, use a flushing CR3 write for the switch. In order to access the user_pcid_flush_mask we use PER_CPU storage, which means the previously established SWAPGS vs CR3 ordering is now mandatory and required. Having to do this memory access does require additional registers, most sites have a functioning stack and we can spill one (RAX), sites without functional stack need to otherwise provide the second scratch register. Note: PCID is generally available on Intel Sandybridge and later CPUs. Note: Up until this point TLB flushing was broken in this series. Based-on-code-from: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 72 ++++++++++++++++++----- arch/x86/entry/entry_64.S | 9 +-- arch/x86/entry/entry_64_compat.S | 4 +- arch/x86/include/asm/processor-flags.h | 5 ++ arch/x86/include/asm/tlbflush.h | 91 +++++++++++++++++++++++++---- arch/x86/include/uapi/asm/processor-flags.h | 7 ++- arch/x86/kernel/asm-offsets.c | 4 ++ arch/x86/mm/init.c | 2 +- arch/x86/mm/tlb.c | 1 + 9 files changed, 162 insertions(+), 33 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3d3389a92c33..7894e5c0eef7 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include /* @@ -191,17 +194,21 @@ For 32-bit we have the following conventions - kernel is built with #ifdef CONFIG_PAGE_TABLE_ISOLATION -/* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two halves: */ -#define PTI_SWITCH_MASK (1< #include #include -#include "calling.h" #include #include #include @@ -40,6 +39,8 @@ #include #include +#include "calling.h" + .code64 .section .entry.text, "ax" @@ -406,7 +407,7 @@ syscall_return_via_sysret: * We are on the trampoline stack. All regs except RDI are live. * We can do future final exit work right here. */ - SWITCH_TO_USER_CR3 scratch_reg=%rdi + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi popq %rdi popq %rsp @@ -744,7 +745,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) * We can do future final exit work right here. */ - SWITCH_TO_USER_CR3 scratch_reg=%rdi + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi /* Restore RDI. */ popq %rdi @@ -857,7 +858,7 @@ native_irq_return_ldt: */ orq PER_CPU_VAR(espfix_stack), %rax - SWITCH_TO_USER_CR3 scratch_reg=%rdi /* to user CR3 */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi SWAPGS /* to user GS */ popq %rdi /* Restore user RDI */ diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 05238b29895e..40f17009ec20 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,9 +275,9 @@ sysret32_from_system_call: * switch until after after the last reference to the process * stack. * - * %r8 is zeroed before the sysret, thus safe to clobber. + * %r8/%r9 are zeroed before the sysret, thus safe to clobber. */ - SWITCH_TO_USER_CR3 scratch_reg=%r8 + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 xorq %r8, %r8 xorq %r9, %r9 diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 43212a43ee69..6a60fea90b9d 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -38,6 +38,11 @@ #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_SWITCH_BIT 11 +#endif + #else /* * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 490a706fdba8..5dcc38b16604 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -10,6 +10,8 @@ #include #include #include +#include +#include static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { @@ -24,24 +26,54 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) /* There are 12 bits of space for ASIDS in CR3 */ #define CR3_HW_ASID_BITS 12 + /* * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for * user/kernel switches */ -#define PTI_CONSUMED_ASID_BITS 0 +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define PTI_CONSUMED_PCID_BITS 1 +#else +# define PTI_CONSUMED_PCID_BITS 0 +#endif + +#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) -#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS) /* * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account * for them being zero-based. Another -1 is because ASID 0 is reserved for * use by non-PCID-aware users. */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2) +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) + +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Make sure that the dynamic ASID space does not confict with the + * bit we are using to switch between user and kernel ASIDs. + */ + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT)); + /* + * The ASID being passed in here should have respected the + * MAX_ASID_AVAILABLE and thus never have the switch bit set. + */ + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); +#endif + /* + * The dynamically-assigned ASIDs that get passed in are small + * (mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * If current->mm == NULL then we borrow a mm which may change + * during a task switch and therefore we must not be preempted + * while we write CR3 back: */ preempt_disable(); native_write_cr3(__native_read_cr3()); @@ -301,7 +361,14 @@ static inline void __native_flush_tlb_global(void) */ static inline void __native_flush_tlb_single(unsigned long addr) { + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + invalidate_user_asid(loaded_mm_asid); } /* diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 53b4ca55ebb6..97abdaab9535 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -78,7 +78,12 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ + +#define X86_CR3_PCID_BITS 12 +#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) + +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 676b7cf4b62b..76417a9aab73 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -94,6 +95,9 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + /* TLB state for the entry code */ + OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); + /* Layout info for cpu_entry_area */ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index af75069fb116..caeb8a7bf0a4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -855,7 +855,7 @@ void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 42a8875f73fe..a1561957dccb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -105,6 +105,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) unsigned long new_mm_cr3; if (need_flush) { + invalidate_user_asid(new_asid); new_mm_cr3 = build_cr3(pgdir, new_asid); } else { new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); -- cgit v1.2.3 From 21e94459110252d41b45c0c8ba50fd72a664d50c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 4 Dec 2017 15:08:00 +0100 Subject: x86/mm: Optimize RESTORE_CR3 Most NMI/paranoid exceptions will not in fact change pagetables and would thus not require TLB flushing, however RESTORE_CR3 uses flushing CR3 writes. Restores to kernel PCIDs can be NOFLUSH, because we explicitly flush the kernel mappings and now that we track which user PCIDs need flushing we can avoid those too when possible. This does mean RESTORE_CR3 needs an additional scratch_reg, luckily both sites have plenty available. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/entry/calling.h | 30 ++++++++++++++++++++++++++++-- arch/x86/entry/entry_64.S | 4 ++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 7894e5c0eef7..45a63e00a6af 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -281,8 +281,34 @@ For 32-bit we have the following conventions - kernel is built with .Ldone_\@: .endm -.macro RESTORE_CR3 save_reg:req +.macro RESTORE_CR3 scratch_reg:req save_reg:req ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * KERNEL pages can always resume with NOFLUSH as we do + * explicit flushes. + */ + bt $X86_CR3_PTI_SWITCH_BIT, \save_reg + jnc .Lnoflush_\@ + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: /* * The CR3 write could be avoided when not changing its value, * but would require a CR3 read *and* a scratch register. @@ -301,7 +327,7 @@ For 32-bit we have the following conventions - kernel is built with .endm .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req .endm -.macro RESTORE_CR3 save_reg:req +.macro RESTORE_CR3 scratch_reg:req save_reg:req .endm #endif diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index fd501844af1f..ed31d00dc5ee 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1288,7 +1288,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ - RESTORE_CR3 save_reg=%r14 + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: @@ -1730,7 +1730,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi - RESTORE_CR3 save_reg=%r14 + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore -- cgit v1.2.3 From 6cff64b86aaaa07f89f50498055a20e45754b0c1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:08:01 +0100 Subject: x86/mm: Use INVPCID for __native_flush_tlb_single() This uses INVPCID to shoot down individual lines of the user mapping instead of marking the entire user map as invalid. This could/might/possibly be faster. This for sure needs tlb_single_page_flush_ceiling to be redetermined; esp. since INVPCID is _slow_. A detailed performance analysis is available here: https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com [ Peterz: Split out from big combo patch ] Signed-off-by: Dave Hansen Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/tlbflush.h | 23 +++++++++++++- arch/x86/mm/init.c | 64 ++++++++++++++++++++++---------------- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d8ec834ea884..07cdd1715705 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,6 +197,7 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5dcc38b16604..57072a1052fe 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid) return asid + 1; } +/* + * The user PCID is just the kernel one, plus the "switch bit". + */ +static inline u16 user_pcid(u16 asid) +{ + u16 ret = kern_pcid(asid); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + ret |= 1 << X86_CR3_PTI_SWITCH_BIT; +#endif + return ret; +} + struct pgd_t; static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) { @@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void) /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr) if (!static_cpu_has(X86_FEATURE_PTI)) return; - invalidate_user_asid(loaded_mm_asid); + /* + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. + * Just use invalidate_user_asid() in case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) + invalidate_user_asid(loaded_mm_asid); + else + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); } /* diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index caeb8a7bf0a4..80259ad8c386 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void) static void setup_pcid(void) { -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCID)) { - if (boot_cpu_has(X86_FEATURE_PGE)) { - /* - * This can't be cr4_set_bits_and_update_boot() -- - * the trampoline code can't handle CR4.PCIDE and - * it wouldn't do any good anyway. Despite the name, - * cr4_set_bits_and_update_boot() doesn't actually - * cause the bits in question to remain set all the - * way through the secondary boot asm. - * - * Instead, we brute-force it and set CR4.PCIDE - * manually in start_secondary(). - */ - cr4_set_bits(X86_CR4_PCIDE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't - * work if PCID is on but PGE is not. Since that - * combination doesn't exist on real hardware, there's - * no reason to try to fully support it, but it's - * polite to avoid corrupting data if we're on - * an improperly configured VM. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); - } + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (!boot_cpu_has(X86_FEATURE_PCID)) + return; + + if (boot_cpu_has(X86_FEATURE_PGE)) { + /* + * This can't be cr4_set_bits_and_update_boot() -- the + * trampoline code can't handle CR4.PCIDE and it wouldn't + * do any good anyway. Despite the name, + * cr4_set_bits_and_update_boot() doesn't actually cause + * the bits in question to remain set all the way through + * the secondary boot asm. + * + * Instead, we brute-force it and set CR4.PCIDE manually in + * start_secondary(). + */ + cr4_set_bits(X86_CR4_PCIDE); + + /* + * INVPCID's single-context modes (2/3) only work if we set + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable + * on systems that have X86_CR4_PCIDE clear, or that have + * no INVPCID support at all. + */ + if (boot_cpu_has(X86_FEATURE_INVPCID)) + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't work if + * PCID is on but PGE is not. Since that combination + * doesn't exist on real hardware, there's no reason to try + * to fully support it, but it's polite to avoid corrupting + * data if we're on an improperly configured VM. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); } -#endif } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 0a126abd576ebc6403f063dbe20cf7416c9d9393 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2017 13:34:53 +0100 Subject: x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming Ideally we'd also use sparse to enforce this separation so it becomes much more difficult to mess up. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 55 ++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 57072a1052fe..b519da4fc03c 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,16 +13,33 @@ #include #include -static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) -{ - /* - * Bump the generation count. This also serves as a full barrier - * that synchronizes with switch_mm(): callers are required to order - * their read of mm_cpumask after their writes to the paging - * structures. - */ - return atomic64_inc_return(&mm->context.tlb_gen); -} +/* + * The x86 feature is called PCID (Process Context IDentifier). It is similar + * to what is traditionally called ASID on the RISC processors. + * + * We don't use the traditional ASID implementation, where each process/mm gets + * its own ASID and flush/restart when we run out of ASID space. + * + * Instead we have a small per-cpu array of ASIDs and cache the last few mm's + * that came by on this CPU, allowing cheaper switch_mm between processes on + * this CPU. + * + * We end up with different spaces for different things. To avoid confusion we + * use different names for each of them: + * + * ASID - [0, TLB_NR_DYN_ASIDS-1] + * the canonical identifier for an mm + * + * kPCID - [1, TLB_NR_DYN_ASIDS] + * the value we write into the PCID part of CR3; corresponds to the + * ASID+1, because PCID 0 is special. + * + * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * for KPTI each mm has two address spaces and thus needs two + * PCID values, but we can still do with a single ASID denomination + * for each mm. Corresponds to kPCID + 2048. + * + */ /* There are 12 bits of space for ASIDS in CR3 */ #define CR3_HW_ASID_BITS 12 @@ -41,7 +58,7 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) /* * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because ASID 0 is reserved for + * for them being zero-based. Another -1 is because PCID 0 is reserved for * use by non-PCID-aware users. */ #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) @@ -52,6 +69,9 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) */ #define TLB_NR_DYN_ASIDS 6 +/* + * Given @asid, compute kPCID + */ static inline u16 kern_pcid(u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); @@ -86,7 +106,7 @@ static inline u16 kern_pcid(u16 asid) } /* - * The user PCID is just the kernel one, plus the "switch bit". + * Given @asid, compute uPCID */ static inline u16 user_pcid(u16 asid) { @@ -484,6 +504,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info); +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) +{ + /* + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. + */ + return atomic64_inc_return(&mm->context.tlb_gen); +} + static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm) { -- cgit v1.2.3 From 5f26d76c3fd67c48806415ef8b1116c97beff8ba Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 19 Dec 2017 22:33:46 +0100 Subject: x86/dumpstack: Indicate in Oops whether PTI is configured and enabled CONFIG_PAGE_TABLE_ISOLATION is relatively new and intrusive feature that may still have some corner cases which could take some time to manifest and be fixed. It would be useful to have Oops messages indicate whether it was enabled for building the kernel, and whether it was disabled during boot. Example of fully enabled: Oops: 0001 [#1] SMP PTI Example of enabled during build, but disabled during boot: Oops: 0001 [#1] SMP NOPTI We can decide to remove this after the feature has been tested in the field long enough. [ tglx: Made it use boot_cpu_has() as requested by Borislav ] Signed-off-by: Vlastimil Babka Signed-off-by: Thomas Gleixner Reviewed-by: Eduardo Valentin Acked-by: Dave Hansen Cc: Andy Lutomirski Cc: Andy Lutomirsky Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: bpetkov@suse.de Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: jkosina@suse.cz Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 36b17e0febe8..5fa110699ed2 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -297,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", - IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", + IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) -- cgit v1.2.3 From 385ce0ea4c078517fa51c261882c4e72fba53005 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 4 Dec 2017 15:08:03 +0100 Subject: x86/mm/pti: Add Kconfig Finally allow CONFIG_PAGE_TABLE_ISOLATION to be enabled. PARAVIRT generally requires that the kernel not manage its own page tables. It also means that the hypervisor and kernel must agree wholeheartedly about what format the page tables are in and what they contain. PAGE_TABLE_ISOLATION, unfortunately, changes the rules and they can not be used together. I've seen conflicting feedback from maintainers lately about whether they want the Kconfig magic to go first or last in a patch series. It's going last here because the partially-applied series leads to kernels that can not boot in a bunch of cases. I did a run through the entire series with CONFIG_PAGE_TABLE_ISOLATION=y to look for build errors, though. [ tglx: Removed SMP and !PARAVIRT dependencies as they not longer exist ] Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- security/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/security/Kconfig b/security/Kconfig index e8e449444e65..a623d13bf288 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -54,6 +54,16 @@ config SECURITY_NETWORK implement socket and networking access controls. If you are unsure how to answer this question, answer N. +config PAGE_TABLE_ISOLATION + bool "Remove the kernel mapping in user mode" + depends on X86_64 && !UML + help + This feature reduces the number of hardware side channels by + ensuring that the majority of kernel addresses are not mapped + into userspace. + + See Documentation/x86/pagetable-isolation.txt for more details. + config SECURITY_INFINIBAND bool "Infiniband Security Hooks" depends on SECURITY && INFINIBAND -- cgit v1.2.3 From 75298aa179d56cd64f54e58a19fffc8ab922b4c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Dec 2017 15:08:04 +0100 Subject: x86/mm/dump_pagetables: Add page table directory to the debugfs VFS hierarchy The upcoming support for dumping the kernel and the user space page tables of the current process would create more random files in the top level debugfs directory. Add a page table directory and move the existing file to it. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar --- arch/x86/mm/debug_pagetables.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index bfcffdf6c577..d1449fb6dc7a 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -22,21 +22,26 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static struct dentry *pe; +static struct dentry *dir, *pe; static int __init pt_dump_debug_init(void) { - pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, - &ptdump_fops); - if (!pe) + dir = debugfs_create_dir("page_tables", NULL); + if (!dir) return -ENOMEM; + pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops); + if (!pe) + goto err; return 0; +err: + debugfs_remove_recursive(dir); + return -ENOMEM; } static void __exit pt_dump_debug_exit(void) { - debugfs_remove_recursive(pe); + debugfs_remove_recursive(dir); } module_init(pt_dump_debug_init); -- cgit v1.2.3 From b4bf4f924b1d7bade38fd51b2e401d20d0956e4d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:08:05 +0100 Subject: x86/mm/dump_pagetables: Check user space page table for WX pages ptdump_walk_pgd_level_checkwx() checks the kernel page table for WX pages, but does not check the PAGE_TABLE_ISOLATION user space page table. Restructure the code so that dmesg output is selected by an explicit argument and not implicit via checking the pgd argument for !NULL. Add the check for the user space page table. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 1 + arch/x86/mm/debug_pagetables.c | 2 +- arch/x86/mm/dump_pagetables.c | 30 +++++++++++++++++++++++++----- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index cc6fa75884e9..03780d5c41c5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd); void ptdump_walk_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index d1449fb6dc7a..8e70c1599e51 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -5,7 +5,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level(m, NULL); + ptdump_walk_pgd_level_debugfs(m, NULL); return 0; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 690eaf31ca34..17f5b417f95e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -476,7 +476,7 @@ static inline bool is_hypervisor_range(int idx) } static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, - bool checkwx) + bool checkwx, bool dmesg) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_top_pgt; @@ -489,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, if (pgd) { start = pgd; - st.to_dmesg = true; + st.to_dmesg = dmesg; } st.check_wx = checkwx; @@ -527,13 +527,33 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { - ptdump_walk_pgd_level_core(m, pgd, false); + ptdump_walk_pgd_level_core(m, pgd, false, true); +} + +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd) +{ + ptdump_walk_pgd_level_core(m, pgd, false, false); +} +EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); + +static void ptdump_walk_user_pgd_level_checkwx(void) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pgd_t *pgd = (pgd_t *) &init_top_pgt; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("x86/mm: Checking user space page tables\n"); + pgd = kernel_to_user_pgdp(pgd); + ptdump_walk_pgd_level_core(NULL, pgd, true, false); +#endif } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true); + ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_user_pgd_level_checkwx(); } static int __init pt_dump_init(void) -- cgit v1.2.3 From a4b51ef6552c704764684cef7e753162dc87c5fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2017 15:08:06 +0100 Subject: x86/mm/dump_pagetables: Allow dumping current pagetables Add two debugfs files which allow to dump the pagetable of the current task. current_kernel dumps the regular page table. This is the page table which is normally shared between kernel and user space. If kernel page table isolation is enabled this is the kernel space mapping. If kernel page table isolation is enabled the second file, current_user, dumps the user space page table. These files allow to verify the resulting page tables for page table isolation, but even in the normal case its useful to be able to inspect user space page tables of current for debugging purposes. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: David Laight Cc: Denys Vlasenko Cc: Eduardo Valentin Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Will Deacon Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/mm/debug_pagetables.c | 71 +++++++++++++++++++++++++++++++++++++++--- arch/x86/mm/dump_pagetables.c | 6 +++- 3 files changed, 73 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 03780d5c41c5..6b43d677f8ca 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,7 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); void ptdump_walk_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 8e70c1599e51..421f2664ffa0 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -5,7 +5,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level_debugfs(m, NULL); + ptdump_walk_pgd_level_debugfs(m, NULL, false); return 0; } @@ -22,7 +22,57 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static struct dentry *dir, *pe; +static int ptdump_show_curknl(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curknl(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curknl, NULL); +} + +static const struct file_operations ptdump_curknl_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curknl, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static struct dentry *pe_curusr; + +static int ptdump_show_curusr(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curusr(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curusr, NULL); +} + +static const struct file_operations ptdump_curusr_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curusr, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static struct dentry *dir, *pe_knl, *pe_curknl; static int __init pt_dump_debug_init(void) { @@ -30,9 +80,22 @@ static int __init pt_dump_debug_init(void) if (!dir) return -ENOMEM; - pe = debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops); - if (!pe) + pe_knl = debugfs_create_file("kernel", 0400, dir, NULL, + &ptdump_fops); + if (!pe_knl) + goto err; + + pe_curknl = debugfs_create_file("current_kernel", 0400, + dir, NULL, &ptdump_curknl_fops); + if (!pe_curknl) + goto err; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pe_curusr = debugfs_create_file("current_user", 0400, + dir, NULL, &ptdump_curusr_fops); + if (!pe_curusr) goto err; +#endif return 0; err: debugfs_remove_recursive(dir); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 17f5b417f95e..f56902c1f04b 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -530,8 +530,12 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) ptdump_walk_pgd_level_core(m, pgd, false, true); } -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd) +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (user && static_cpu_has(X86_FEATURE_PTI)) + pgd = kernel_to_user_pgdp(pgd); +#endif ptdump_walk_pgd_level_core(m, pgd, false, false); } EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); -- cgit v1.2.3 From 9f5cb6b32d9e0a3a7453222baaf15664d92adbf2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 15 Dec 2017 20:35:11 +0100 Subject: x86/ldt: Make the LDT mapping RO Now that the LDT mapping is in a known area when PAGE_TABLE_ISOLATION is enabled its a primary target for attacks, if a user space interface fails to validate a write address correctly. That can never happen, right? The SDM states: If the segment descriptors in the GDT or an LDT are placed in ROM, the processor can enter an indefinite loop if software or the processor attempts to update (write to) the ROM-based segment descriptors. To prevent this problem, set the accessed bits for all segment descriptors placed in a ROM. Also, remove operating-system or executive code that attempts to modify segment descriptors located in ROM. So its a valid approach to set the ACCESS bit when setting up the LDT entry and to map the table RO. Fixup the selftest so it can handle that new mode. Remove the manual ACCESS bit setter in set_tls_desc() as this is now pointless. Folded the patch from Peter Ziljstra. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/include/asm/desc.h | 2 ++ arch/x86/kernel/ldt.c | 7 ++++++- arch/x86/kernel/tls.c | 11 ++--------- tools/testing/selftests/x86/ldt_gdt.c | 3 +-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index bc359dd2f7f6..85e23bb7b34e 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + /* Set the ACCESS bit so it can be mapped RO */ + desc->type |= 1; desc->s = 1; desc->dpl = 0x3; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 9629c5d8267a..579cc4a66fdf 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -158,7 +158,12 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) ptep = get_locked_pte(mm, va, &ptl); if (!ptep) return -ENOMEM; - pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL)); + /* + * Map it RO so the easy to find address is not a primary + * target via some kernel interface which misses a + * permission check. + */ + pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); set_pte_at(mm, va, ptep, pte); pte_unmap_unlock(ptep, ptl); } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9a9c9b076955..a5b802a12212 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info) || LDT_zero(info)) { + if (LDT_empty(info) || LDT_zero(info)) memset(desc, 0, sizeof(*desc)); - } else { + else fill_ldt(desc, info); - - /* - * Always set the accessed bit so that the CPU - * doesn't try to write to the (read-only) GDT. - */ - desc->type |= 1; - } ++info; ++desc; } diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 0304ffb714f2..1aef72df20a1 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt, * NB: Different Linux versions do different things with the * accessed bit in set_thread_area(). */ - if (ar != expected_ar && - (ldt || ar != (expected_ar | AR_ACCESSED))) { + if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) { printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", (ldt ? "LDT" : "GDT"), index, ar, expected_ar); nerrs++; -- cgit v1.2.3 From c0ee554906c3d6554fbddf95ae664cd9f817082b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 22 Dec 2017 12:37:43 -0600 Subject: pid: Handle failure to allocate the first pid in a pid namespace With the replacement of the pid bitmap and hashtable with an idr in alloc_pid started occassionally failing when allocating the first pid in a pid namespace. Things were not completely reset resulting in the first allocated pid getting the number 2 (not 1). Which further resulted in ns->proc_mnt not getting set and eventually causing an oops in proc_flush_task. Oops: 0000 [#1] SMP CPU: 2 PID: 6743 Comm: trinity-c117 Not tainted 4.15.0-rc4-think+ #2 RIP: 0010:proc_flush_task+0x8e/0x1b0 RSP: 0018:ffffc9000bbffc40 EFLAGS: 00010286 RAX: 0000000000000001 RBX: 0000000000000001 RCX: 00000000fffffffb RDX: 0000000000000000 RSI: ffffc9000bbffc50 RDI: 0000000000000000 RBP: ffffc9000bbffc63 R08: 0000000000000000 R09: 0000000000000002 R10: ffffc9000bbffb70 R11: ffffc9000bbffc64 R12: 0000000000000003 R13: 0000000000000000 R14: 0000000000000003 R15: ffff8804c10d7840 FS: 00007f7cb8965700(0000) GS:ffff88050a200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000003e21ae003 CR4: 00000000001606e0 DR0: 00007fb1d6c22000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: ? release_task+0xaf/0x680 release_task+0xd2/0x680 ? wait_consider_task+0xb82/0xce0 wait_consider_task+0xbe9/0xce0 ? do_wait+0xe1/0x330 do_wait+0x151/0x330 kernel_wait4+0x8d/0x150 ? task_stopped_code+0x50/0x50 SYSC_wait4+0x95/0xa0 ? rcu_read_lock_sched_held+0x6c/0x80 ? syscall_trace_enter+0x2d7/0x340 ? do_syscall_64+0x60/0x210 do_syscall_64+0x60/0x210 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f7cb82603aa RSP: 002b:00007ffd60770bc8 EFLAGS: 00000246 ORIG_RAX: 000000000000003d RAX: ffffffffffffffda RBX: 00007f7cb6cd4000 RCX: 00007f7cb82603aa RDX: 000000000000000b RSI: 00007ffd60770bd0 RDI: 0000000000007cca RBP: 0000000000007cca R08: 00007f7cb8965700 R09: 00007ffd607c7080 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd60770bd0 R14: 00007f7cb6cd4058 R15: 00000000cccccccd Code: c1 e2 04 44 8b 60 30 48 8b 40 38 44 8b 34 11 48 c7 c2 60 3a f5 81 44 89 e1 4c 8b 68 58 e8 4b b4 77 00 89 44 24 14 48 8d 74 24 10 <49> 8b 7d 00 e8 b9 6a f9 ff 48 85 c0 74 1a 48 89 c7 48 89 44 24 RIP: proc_flush_task+0x8e/0x1b0 RSP: ffffc9000bbffc40 CR2: 0000000000000000 ---[ end trace 53d67a6481059862 ]--- Improve the quality of the implementation by resetting the place to start allocating pids on failure to allocate the first pid. As improving the quality of the implementation is the goal remove the now unnecesarry disable_pid_allocations call when we fail to mount proc. Fixes: 95846ecf9dac ("pid: replace pid bitmap implementation with IDR API") Fixes: 8ef047aaaeb8 ("pid namespaces: make alloc_pid(), free_pid() and put_pid() work with struct upid") Reported-by: Dave Jones Signed-off-by: "Eric W. Biederman" --- kernel/pid.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/pid.c b/kernel/pid.c index b13b624e2c49..1e8bb6550ec4 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) { - disable_pid_allocation(ns); + if (pid_ns_prepare_proc(ns)) goto out_free; - } } get_pid_ns(ns); @@ -226,6 +224,10 @@ out_free: while (++i <= ns->level) idr_remove(&ns->idr, (pid->numbers + i)->nr); + /* On failure to allocate the first pid, reset the state */ + if (ns->pid_allocated == PIDNS_ADDING) + idr_set_cursor(&ns->idr, 0); + spin_unlock_irq(&pidmap_lock); kmem_cache_free(ns->pid_cachep, pid); -- cgit v1.2.3 From 464e1d5f23cca236b930ef068c328a64cab78fb1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 23 Dec 2017 20:47:16 -0800 Subject: Linux 4.15-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7e02f951b284..ac8c441866b7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 182088aa3c6c7f7c20a2c1dcc9ded4a3fc631f38 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Dec 2017 23:21:28 +0000 Subject: phylink: ensure the PHY interface mode is appropriately set When setting the ethtool settings, ensure that the validated PHY interface mode is propagated to the current link settings, so that 2500BaseX can be selected. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5dc9668dde34..8d06a083ac4c 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -951,6 +951,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, mutex_lock(&pl->state_mutex); /* Configure the MAC to match the new settings */ linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising); + pl->link_config.interface = config.interface; pl->link_config.speed = our_kset.base.speed; pl->link_config.duplex = our_kset.base.duplex; pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE; -- cgit v1.2.3 From 74ee0e8c1bf9925c59cc8f1c65c29adf6e4cf603 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 20 Dec 2017 23:21:34 +0000 Subject: phylink: ensure AN is enabled Ensure that we mark AN as enabled at boot time, rather than leaving it disabled. This is noticable if your SFP module is fiber, and it supports faster speeds than 1G with 2.5G support in place. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 8d06a083ac4c..827f3f92560e 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np, pl->link_config.pause = MLO_PAUSE_AN; pl->link_config.speed = SPEED_UNKNOWN; pl->link_config.duplex = DUPLEX_UNKNOWN; + pl->link_config.an_enabled = true; pl->ops = ops; __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); -- cgit v1.2.3 From 8bea728dce8972e534e6b99fd550f7b5cc3864e8 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 25 Dec 2017 11:34:54 +0800 Subject: netfilter: nf_tables: fix potential NULL-ptr deref in nf_tables_dump_obj_done() If there is no NFTA_OBJ_TABLE and NFTA_OBJ_TYPE, the c.data will be NULL in nf_tables_getobj(). So before free filter->table in nf_tables_dump_obj_done(), we need to check if filter is NULL first. Fixes: e46abbcc05aa ("netfilter: nf_tables: Allow table names of up to 255 chars") Signed-off-by: Hangbin Liu Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d4526651661..07bd4138c84e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_filter *filter = cb->data; - kfree(filter->table); - kfree(filter); + if (filter) { + kfree(filter->table); + kfree(filter); + } return 0; } -- cgit v1.2.3 From e5a9336adb317db55eb3fe8200856096f3c71109 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 20 Dec 2017 19:36:03 +0300 Subject: ip6_gre: fix device features for ioctl setup When ip6gre is created using ioctl, its features, such as scatter-gather, GSO and tx-checksumming will be turned off: # ip -f inet6 tunnel add gre6 mode ip6gre remote fd00::1 # ethtool -k gre6 (truncated output) tx-checksumming: off scatter-gather: off tcp-segmentation-offload: off generic-segmentation-offload: off [requested on] But when netlink is used, they will be enabled: # ip link add gre6 type ip6gre remote fd00::1 # ethtool -k gre6 (truncated output) tx-checksumming: on scatter-gather: on tcp-segmentation-offload: on generic-segmentation-offload: on This results in a loss of performance when gre6 is created via ioctl. The issue was found with LTP/gre tests. Fix it by moving the setup of device features to a separate function and invoke it with ndo_init callback because both netlink and ioctl will eventually call it via register_netdevice(): register_netdevice() - ndo_init() callback -> ip6gre_tunnel_init() or ip6gre_tap_init() - ip6gre_tunnel_init_common() - ip6gre_tnl_init_features() The moved code also contains two minor style fixes: * removed needless tab from GRE6_FEATURES on NETIF_F_HIGHDMA line. * fixed the issue reported by checkpatch: "Unnecessary parentheses around 'nt->encap.type == TUNNEL_ENCAP_NONE'" Fixes: ac4eb009e477 ("ip6gre: Add support for basic offloads offloads excluding GSO") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 57 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 416c8913f132..772695960890 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev) eth_random_addr(dev->perm_addr); } +#define GRE6_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + +static void ip6gre_tnl_init_features(struct net_device *dev) +{ + struct ip6_tnl *nt = netdev_priv(dev); + + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; + + if (!(nt->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(nt->parms.o_flags & TUNNEL_CSUM) || + nt->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } +} + static int ip6gre_tunnel_init_common(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; + ip6gre_tnl_init_features(dev); + return 0; } @@ -1298,11 +1330,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; -#define GRE6_FEATURES (NETIF_F_SG | \ - NETIF_F_FRAGLIST | \ - NETIF_F_HIGHDMA | \ - NETIF_F_HW_CSUM) - static void ip6gre_tap_setup(struct net_device *dev) { @@ -1383,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->net = dev_net(dev); ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - dev->features |= GRE6_FEATURES; - dev->hw_features |= GRE6_FEATURES; - - if (!(nt->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported, nor - * can we support 2 levels of outer headers requiring - * an update. - */ - if (!(nt->parms.o_flags & TUNNEL_CSUM) || - (nt->encap.type == TUNNEL_ENCAP_NONE)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } - - /* Can use a lockless transmit, unless we generate - * output sequences - */ - dev->features |= NETIF_F_LLTX; - } - err = register_netdevice(dev); if (err) goto out; -- cgit v1.2.3 From c1a8d0a3accf64a014d605e6806ce05d1c17adf1 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 20 Dec 2017 18:45:10 -0600 Subject: net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround Under some circumstances driver will perform PHY reset in ksz9031_read_status() to fix autoneg failure case (idle error count = 0xFF). When this happens ksz9031 will not detect link status change any more when connecting to Netgear 1G switch (link can be recovered sometimes by restarting netdevice "ifconfig down up"). Reproduced with TI am572x board equipped with ksz9031 PHY while connecting to Netgear 1G switch. Fix the issue by reconfiguring autonegotiation after PHY reset in ksz9031_read_status(). Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg") Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ab4614113403..422ff6333c52 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev) phydev->link = 0; if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev)) phydev->drv->config_intr(phydev); + return genphy_config_aneg(phydev); } return 0; -- cgit v1.2.3 From b2fb01f426883a794ed80be9110675a2d8356347 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 20 Dec 2017 23:26:24 -0800 Subject: net_sched: fix a missing rcu barrier in mini_qdisc_pair_swap() The rcu_barrier_bh() in mini_qdisc_pair_swap() is to wait for flying RCU callback installed by a previous mini_qdisc_pair_swap(), however we miss it on the tp_head==NULL path, which leads to that the RCU callback still uses miniq_old->rcu after it is freed together with qdisc in qdisc_graft(). So just add it on that path too. Fixes: 46209401f8f6 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath ") Reported-by: Jakub Kicinski Tested-by: Jakub Kicinski Cc: Jiri Pirko Cc: John Fastabend Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cd1b200acae7..661c7144b53a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + /* Wait for flying RCU callback before it is freed. */ + rcu_barrier_bh(); return; } @@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, rcu_assign_pointer(*miniqp->p_miniq, miniq); if (miniq_old) - /* This is counterpart of the rcu barrier above. We need to + /* This is counterpart of the rcu barriers above. We need to * block potential new user of miniq_old until all readers * are not seeing it. */ -- cgit v1.2.3 From 0a3d805c9c503e05d6e5d3868c53e92a06589dcf Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 21 Dec 2017 13:07:11 +0100 Subject: tipc: base group replicast ack counter on number of actual receivers In commit 2f487712b893 ("tipc: guarantee that group broadcast doesn't bypass group unicast") we introduced a mechanism that requires the first (replicated) broadcast sent after a unicast to be acknowledged by all receivers before permitting sending of the next (true) broadcast. The counter for keeping track of the number of acknowledges to expect is based on the tipc_group::member_cnt variable. But this misses that some of the known members may not be ready for reception, and will never acknowledge the message, either because they haven't fully joined the group or because they are leaving the group. Such members are identified by not fulfilling the condition tested for in the function tipc_group_is_enabled(). We now set the counter for the actual number of acks to receive at the moment the message is sent, by just counting the number of recipients satisfying the tipc_group_is_enabled() test. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 7ebbdeb2a90e..e5b03f08f076 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -368,18 +368,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) u16 prev = grp->bc_snd_nxt - 1; struct tipc_member *m; struct rb_node *n; + u16 ackers = 0; for (n = rb_first(&grp->members); n; n = rb_next(n)) { m = container_of(n, struct tipc_member, tree_node); if (tipc_group_is_enabled(m)) { tipc_group_update_member(m, len); m->bc_acked = prev; + ackers++; } } /* Mark number of acknowledges to expect, if any */ if (ack) - grp->bc_ackers = grp->member_cnt; + grp->bc_ackers = ackers; grp->bc_snd_nxt++; } -- cgit v1.2.3 From 4853f128c13ed2731625dff2410b7fdbe540fb26 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 21 Dec 2017 13:13:59 +0100 Subject: net: sched: fix possible null pointer deref in tcf_block_put We need to check block for being null in both tcf_block_put and tcf_block_put_ext. Fixes: 343723dd51ef ("net: sched: fix clsact init error path") Reported-by: Prashant Bhole Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b91ea03e3afa..b9d63d2246e6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -379,6 +379,8 @@ void tcf_block_put(struct tcf_block *block) { struct tcf_block_ext_info ei = {0, }; + if (!block) + return; tcf_block_put_ext(block, block->q, &ei); } -- cgit v1.2.3 From 3a33a19bf88cdfc6d982972bc6ffcf7a62c1015e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 21 Dec 2017 14:36:34 +0100 Subject: tipc: fix memory leak of group member when peer node is lost When a group member receives a member WITHDRAW event, this might have two reasons: either the peer member is leaving the group, or the link to the member's node has been lost. In the latter case we need to issue a DOWN event to the user right away, and let function tipc_group_filter_msg() perform delete of the member item. However, in this case we miss to change the state of the member item to MBR_LEAVING, so the member item is not deleted, and we have a memory leak. We now separate better between the four sub-cases of a WITHRAW event and make sure that each case is handled correctly. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index e5b03f08f076..8e12ab55346b 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -850,17 +850,26 @@ void tipc_group_member_evt(struct tipc_group *grp, *usr_wakeup = true; m->usr_pending = false; node_up = tipc_node_is_up(net, node); - - /* Hold back event if more messages might be expected */ - if (m->state != MBR_LEAVING && node_up) { - m->event_msg = skb; - tipc_group_decr_active(grp, m); - m->state = MBR_LEAVING; - } else { - if (node_up) + m->event_msg = NULL; + + if (node_up) { + /* Hold back event if a LEAVE msg should be expected */ + if (m->state != MBR_LEAVING) { + m->event_msg = skb; + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + } else { msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - else + __skb_queue_tail(inputq, skb); + } + } else { + if (m->state != MBR_LEAVING) { + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); + } else { + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + } __skb_queue_tail(inputq, skb); } list_del_init(&m->list); -- cgit v1.2.3 From 47c332deb8e89f6c59b0bb2615945c6e7fad1a60 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 5 Dec 2017 09:36:14 +0100 Subject: hwmon: Deal with errors from the thermal subsystem If the thermal subsystem returne -EPROBE_DEFER or any other error when hwmon calls devm_thermal_zone_of_sensor_register(), this is silently ignored. I ran into this with an incorrectly defined thermal zone, making it non-existing and thus this call failed with -EPROBE_DEFER assuming it would appear later. The sensor was still added which is incorrect: sensors must strictly be added after the thermal zones, so deferred probe must be respected. Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Signed-off-by: Linus Walleij Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index c9790e2c3440..af5123042990 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, struct hwmon_device *hwdev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) @@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, tdata->hwdev = hwdev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } @@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(dev, + hwdev, j); + if (err) + goto free_device; + } } } } return hdev; +free_device: + device_unregister(hdev); free_hwmon: kfree(hwdev); ida_remove: -- cgit v1.2.3 From 6a6b0b9914e73a8a54253dd5f6f5e5dd5e4a756c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 21 Dec 2017 10:29:09 -0800 Subject: tcp: Avoid preprocessor directives in tracepoint macro args Using a preprocessor directive to check for CONFIG_IPV6 in the middle of a DECLARE_EVENT_CLASS macro's arg list causes sparse to report a series of errors: ./include/trace/events/tcp.h:68:1: error: directive in argument list ./include/trace/events/tcp.h:75:1: error: directive in argument list ./include/trace/events/tcp.h:144:1: error: directive in argument list ./include/trace/events/tcp.h:151:1: error: directive in argument list ./include/trace/events/tcp.h:216:1: error: directive in argument list ./include/trace/events/tcp.h:223:1: error: directive in argument list ./include/trace/events/tcp.h:274:1: error: directive in argument list ./include/trace/events/tcp.h:281:1: error: directive in argument list Once sparse finds an error, it stops printing warnings for the file it is checking. This masks any sparse warnings that would normally be reported for the core TCP code. Instead, handle the preprocessor conditionals in a couple of auxiliary macros. This also has the benefit of reducing duplicate code. Cc: David Ahern Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 97 ++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 60 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 07cccca6cbf1..ab34c561f26b 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -25,6 +25,35 @@ tcp_state_name(TCP_CLOSING), \ tcp_state_name(TCP_NEW_SYN_RECV)) +#define TP_STORE_V4MAPPED(__entry, saddr, daddr) \ + do { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + ipv6_addr_set_v4mapped(saddr, pin6); \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + ipv6_addr_set_v4mapped(daddr, pin6); \ + } while (0) + +#if IS_ENABLED(CONFIG_IPV6) +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + do { \ + if (sk->sk_family == AF_INET6) { \ + struct in6_addr *pin6; \ + \ + pin6 = (struct in6_addr *)__entry->saddr_v6; \ + *pin6 = saddr6; \ + pin6 = (struct in6_addr *)__entry->daddr_v6; \ + *pin6 = daddr6; \ + } else { \ + TP_STORE_V4MAPPED(__entry, saddr, daddr); \ + } \ + } while (0) +#else +#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6) \ + TP_STORE_V4MAPPED(__entry, saddr, daddr) +#endif + /* * tcp event with arguments sk and skb * @@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skbaddr = skb; @@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", @@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state, TP_fast_assign( struct inet_sock *inet = inet_sk(sk); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state, p32 = (__be32 *) __entry->daddr; *p32 = inet->inet_daddr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = sk->sk_v6_rcv_saddr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = sk->sk_v6_daddr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(inet->inet_saddr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(inet->inet_daddr, pin6); - } + TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr, + sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s", @@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack, TP_fast_assign( struct inet_request_sock *ireq = inet_rsk(req); - struct in6_addr *pin6; __be32 *p32; __entry->skaddr = sk; @@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack, p32 = (__be32 *) __entry->daddr; *p32 = ireq->ir_rmt_addr; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) { - pin6 = (struct in6_addr *)__entry->saddr_v6; - *pin6 = ireq->ir_v6_loc_addr; - pin6 = (struct in6_addr *)__entry->daddr_v6; - *pin6 = ireq->ir_v6_rmt_addr; - } else -#endif - { - pin6 = (struct in6_addr *)__entry->saddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); - pin6 = (struct in6_addr *)__entry->daddr_v6; - ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); - } + TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr, + ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", -- cgit v1.2.3 From 756efe131088b6e6e7f0124ff9c4e1f0165d3140 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 22 Dec 2017 17:46:04 +0800 Subject: clk: use atomic runtime pm api in clk_core_is_enabled Current clk_pm_runtime_put is using pm_runtime_put_sync which is not safe to be called in clk_core_is_enabled as it should be able to run in atomic context. Thus use pm_runtime_put instead which is atomic safe. Cc: Stephen Boyd Cc: Michael Turquette Cc: Ulf Hansson Cc: Marek Szyprowski Fixes: 9a34b45397e5 ("clk: Add support for runtime PM") Signed-off-by: Dong Aisheng Reviewed-by: Ulf Hansson Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8a1860a36c77..b56c11f51baf 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core) ret = core->ops->is_enabled(core->hw); done: - clk_pm_runtime_put(core); + if (core->dev) + pm_runtime_put(core->dev); return ret; } -- cgit v1.2.3 From 44be77c590f381bc629815ac789b8b15ecc4ddcf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Dec 2017 08:53:59 +0100 Subject: ALSA: hda - Fix missing COEF init for ALC225/295/299 There was a long-standing problem on HP Spectre X360 with Kabylake where it lacks of the front speaker output in some situations. Also there are other products showing the similar behavior. The culprit seems to be the missing COEF setup on ALC codecs, ALC225/295/299, which are all compatible. This patch adds the proper COEF setup (to initialize idx 0x67 / bits 0x3000) for addressing the issue. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195457 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1522ba31e16d..8fd2d9c62c96 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0292: alc_update_coef_idx(codec, 0x4, 1<<15, 0); break; - case 0x10ec0215: case 0x10ec0225: + case 0x10ec0295: + case 0x10ec0299: + alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); + /* fallthrough */ + case 0x10ec0215: case 0x10ec0233: case 0x10ec0236: case 0x10ec0255: @@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0286: case 0x10ec0288: case 0x10ec0285: - case 0x10ec0295: case 0x10ec0298: case 0x10ec0289: - case 0x10ec0299: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; case 0x10ec0275: -- cgit v1.2.3 From c1350bff69d13369c3316f5d5e580021c2196f90 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 27 Dec 2017 09:03:45 +0100 Subject: ALSA: hda - Clean up ALC299 init code ALC299 is compatible with ALC225/295, thus it doesn't have to assign its own model. Merge together with ALC225/295 code. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8fd2d9c62c96..b2037131eed9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6923,12 +6923,9 @@ static int patch_alc269(struct hda_codec *codec) break; case 0x10ec0225: case 0x10ec0295: - spec->codec_variant = ALC269_TYPE_ALC225; - spec->gen.mixer_nid = 0; /* no loopback on ALC225 ALC295 */ - break; case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; - spec->gen.mixer_nid = 0; /* no loopback on ALC299 */ + spec->gen.mixer_nid = 0; /* no loopback on ALC225, ALC295 and ALC299 */ break; case 0x10ec0234: case 0x10ec0274: -- cgit v1.2.3 From 078711d7f88d33b0adebb402a1bcb2aa89afe68b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 Dec 2017 18:42:08 +0100 Subject: regmap: debugfs: document why we don't create the debugfs entries This is a follow-up to commit a5ba91c380b8 ("regmap: debugfs: emit a debug message when locking is disabled"). I figured that a user may see this message, grep the code, come to this place and he still won't know why we actually disabled debugfs. Add a comment explaining the reason. Signed-off-by: Bartosz Golaszewski Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index ae962b756863..f3266334063e 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -529,6 +529,13 @@ void regmap_debugfs_init(struct regmap *map, const char *name) struct regmap_range_node *range_node; const char *devname = "dummy"; + /* + * Userspace can initiate reads from the hardware over debugfs. + * Normally internal regmap structures and buffers are protected with + * a mutex or a spinlock, but if the regmap owner decided to disable + * all locking mechanisms, this is no longer the case. For safety: + * don't create the debugfs entries if locking is disabled. + */ if (map->debugfs_disable) { dev_dbg(map->dev, "regmap locking disabled - not creating debugfs entries\n"); return; -- cgit v1.2.3 From a4887813c3a9481ab87c8a71ab1de50b975cc823 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 25 Dec 2017 14:37:09 +0800 Subject: regmap: Add one flag to indicate if a hwlock should be used Since the hwlock id 0 is valid for hardware spinlock core, but now id 0 is treated as one invalid value for regmap. Thus we should add one extra flag for regmap config to indicate if a hardware spinlock should be used, then id 0 can be valid for regmap to request. Signed-off-by: Baolin Wang Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- include/linux/regmap.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f25ab18ca057..d23a5c99b639 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -671,7 +671,7 @@ struct regmap *__regmap_init(struct device *dev, map->lock = config->lock; map->unlock = config->unlock; map->lock_arg = config->lock_arg; - } else if (config->hwlock_id) { + } else if (config->use_hwlock) { map->hwlock = hwspin_lock_request_specific(config->hwlock_id); if (!map->hwlock) { ret = -ENXIO; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 15eddc1353ba..c78e0057df66 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -317,6 +317,7 @@ typedef void (*regmap_unlock)(void *); * * @ranges: Array of configuration entries for virtual address ranges. * @num_ranges: Number of range configuration entries. + * @use_hwlock: Indicate if a hardware spinlock should be used. * @hwlock_id: Specify the hardware spinlock id. * @hwlock_mode: The hardware spinlock mode, should be HWLOCK_IRQSTATE, * HWLOCK_IRQ or 0. @@ -365,6 +366,7 @@ struct regmap_config { const struct regmap_range_cfg *ranges; unsigned int num_ranges; + bool use_hwlock; unsigned int hwlock_id; unsigned int hwlock_mode; }; -- cgit v1.2.3 From c6a36ad383559a60a249aa6016cebf3cb8b6c485 Mon Sep 17 00:00:00 2001 From: Max Schulze Date: Wed, 20 Dec 2017 20:47:44 +0100 Subject: USB: serial: ftdi_sio: add id for Airbus DS P8GR Add AIRBUS_DS_P8GR device IDs to ftdi_sio driver. Signed-off-by: Max Schulze Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1aba9105b369..fc68952c994a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) }, { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) }, + { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 4faa09fe308c..8b4ecd2bd297 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -914,6 +914,12 @@ #define ICPDAS_I7561U_PID 0x0104 #define ICPDAS_I7563U_PID 0x0105 +/* + * Airbus Defence and Space + */ +#define AIRBUS_DS_VID 0x1e8e /* Vendor ID */ +#define AIRBUS_DS_P8GR 0x6001 /* Tetra P8GR */ + /* * RT Systems programming cables for various ham radios */ -- cgit v1.2.3 From 052f71e25a7ecd80a9567b291df8ea333d9a8565 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 21 Dec 2017 15:06:13 +0200 Subject: xhci: Fix xhci debugfs NULL pointer dereference in resume from hibernate Free the virt_device and its debugfs_private member together. When resuming from hibernate the .free_dev callback unconditionally freed the debugfs_private member, but could leave virt_device intact. This triggered a NULL pointer dereference after resume when usbmuxd sent a USBDEVFS_SETCONFIGURATION ioctl to a device, trying to add a endpoint debugfs entry to a already freed debugfs_private pointer. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Reported-by: Alexander Kappner Tested-by: Alexander Kappner Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2424d3020ca3..da6dbe3ebd8b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; - xhci_debugfs_remove_slot(xhci, udev->slot_id); - #ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. @@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) } ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) + if (ret) { + xhci_debugfs_remove_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); + } } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) -- cgit v1.2.3 From dde634057da71a3505d7a6c0b77bb24ded6728c8 Mon Sep 17 00:00:00 2001 From: Alexander Kappner Date: Thu, 21 Dec 2017 15:06:14 +0200 Subject: xhci: Fix use-after-free in xhci debugfs Trying to read from debugfs after the system has resumed from hibernate causes a use-after-free and thus a protection fault. Steps to reproduce: Hibernate system, resume from hibernate, then run $ cat /sys/kernel/debug/usb/xhci/*/command-ring/enqueue [ 3902.765086] general protection fault: 0000 [#1] PREEMPT SMP ... [ 3902.765136] RIP: 0010:xhci_trb_virt_to_dma.part.50+0x5/0x30 ... [ 3902.765178] Call Trace: [ 3902.765188] xhci_ring_enqueue_show+0x1e/0x40 [ 3902.765197] seq_read+0xdb/0x3a0 [ 3902.765204] ? __handle_mm_fault+0x5fb/0x1210 [ 3902.765211] full_proxy_read+0x4a/0x70 [ 3902.765219] __vfs_read+0x23/0x120 [ 3902.765228] vfs_read+0x8e/0x130 [ 3902.765235] SyS_read+0x42/0x90 [ 3902.765242] do_syscall_64+0x6b/0x290 [ 3902.765251] entry_SYSCALL64_slow_path+0x25/0x25 The issue is caused by the xhci ring structures being reallocated when the system is resumed, but pointers to the old structures being retained in the debugfs files "private" field: The proposed patch fixes this issue by storing a pointer to the xhci_ring field in the xhci device structure in debugfs rather than directly storing a pointer to the xhci_ring. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Signed-off-by: Alexander Kappner Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-debugfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index 4f7895dbcf88..e26e685d8a57 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id, static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); seq_printf(s, "%pad\n", &dma); @@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused) static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) { dma_addr_t dma; - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); seq_printf(s, "%pad\n", &dma); @@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused) static int xhci_ring_cycle_show(struct seq_file *s, void *unused) { - struct xhci_ring *ring = s->private; + struct xhci_ring *ring = *(struct xhci_ring **)s->private; seq_printf(s, "%d\n", ring->cycle_state); @@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci, } static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci, - struct xhci_ring *ring, + struct xhci_ring **ring, const char *name, struct dentry *parent) { @@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci, snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index); epriv->root = xhci_debugfs_create_ring_dir(xhci, - dev->eps[ep_index].new_ring, + &dev->eps[ep_index].new_ring, epriv->name, spriv->root); spriv->eps[ep_index] = epriv; @@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id) priv->dev = dev; dev->debugfs_private = priv; - xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring, + xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring, "ep00", priv->root); xhci_debugfs_create_context_files(xhci, priv->root, slot_id); @@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci) ARRAY_SIZE(xhci_extcap_dbc), "reg-ext-dbc"); - xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring, "command-ring", xhci->debugfs_root); - xhci_debugfs_create_ring_dir(xhci, xhci->event_ring, + xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring, "event-ring", xhci->debugfs_root); -- cgit v1.2.3 From da99706689481717998d1d48edd389f339eea979 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 21 Dec 2017 15:06:15 +0200 Subject: usb: xhci: Add XHCI_TRUST_TX_LENGTH for Renesas uPD720201 When plugging in a USB webcam I see the following message: xhci_hcd 0000:04:00.0: WARN Successful completion on short TX: needs XHCI_TRUST_TX_LENGTH quirk? handle_tx_event: 913 callbacks suppressed All is quiet again with this patch (and I've done a fair but of soak testing with the camera since). Cc: Signed-off-by: Daniel Thompson Acked-by: Ard Biesheuvel Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7ef1274ef7f7..1aad89b8aba0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -177,6 +177,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_BROKEN_STREAMS; } + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + pdev->device == 0x0014) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; -- cgit v1.2.3 From e5fcc2abc353be94548080d84de3269ef6cc2af6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:01 +0800 Subject: perf stat: Define a structure for per-thread shadow stats Perf has a set of static variables to record the runtime shadow metrics stats. While if we want to record the runtime shadow stats for per-thread, it will be the limitation. This patch creates a structure and the next patches will use this structure to update the runtime shadow stats for per-thread. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 11 ----------- tools/perf/util/stat.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 57ec22513971..93aac2788056 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@ #include "expr.h" #include "metricgroup.h" -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..c685c41f1fb9 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@ #include #include #include "xyarray.h" +#include "rblist.h" struct stats { @@ -43,6 +44,47 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; @@ -92,7 +134,6 @@ struct perf_stat_output_ctx { bool force_header; }; -struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, -- cgit v1.2.3 From 49cd456af1dcb13ff3e94cb997c82968ae86722a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:02 +0800 Subject: perf stat: Extend rbtree to support per-thread shadow stats Previously the rbtree was used to link generic metrics. This patches adds new ctx/type/stat into rbtree keys because we will use this rbtree to maintain shadow metrics to replace original a couple of static arrays for supporting per-thread shadow stats. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 93aac2788056..528be3e8d13b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -45,7 +45,10 @@ struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; + enum stat_type type; + int ctx; int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -58,6 +61,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) -- cgit v1.2.3 From 8efb2df1288bc1bcc3711a97028620717319f138 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:03 +0800 Subject: perf stat: Create the runtime_stat init/exit function It mainly initializes and releases the rblist which is defined in struct runtime_stat. For the original rblist 'runtime_saved_values', it's still kept there for keeping the patch bisectable. The rblist 'runtime_saved_values' will be removed in later patch at switching time. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-4-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 17 +++++++++++++++++ tools/perf/util/stat.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 528be3e8d13b..07cfbf613bdc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -40,6 +40,7 @@ static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { @@ -134,6 +135,21 @@ static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); @@ -141,6 +157,7 @@ void perf_stat__init_shadow_stats(void) runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; runtime_saved_values.node_delete = saved_value_delete; + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c685c41f1fb9..f20240037377 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -117,12 +117,15 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, -- cgit v1.2.3 From 1fcd03946b52b8a57a6692fedd4406b45baedfe6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:04 +0800 Subject: perf stat: Update per-thread shadow stats The functions perf_stat__update_shadow_stats() is called to update the shadow stats on a set of static variables. But the static variables are the limitations to be extended to support per-thread shadow stats. This patch lets the perf_stat__update_shadow_stats() support to update the shadow stats on a input parameter 'st' and uses update_runtime_stat() to update the stats. It will not directly update the static variables as before. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 3 +- tools/perf/util/stat-shadow.c | 86 +++++++++++++++++++++++++++++-------------- tools/perf/util/stat.c | 8 ++-- tools/perf/util/stat.h | 2 +- 5 files changed, 68 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..81b395040298 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, val, - sample->cpu); + sample->cpu, + &rt_stat); evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a027b4712e48..3f4a2c21b824 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1214,7 +1214,8 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id)); + first_shadow_cpu(counter, id), + &rt_stat); } } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07cfbf613bdc..4b28c40de927 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -116,19 +116,29 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, - bool create) + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } @@ -217,13 +227,24 @@ void perf_stat__reset_shadow_stats(void) } } +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu) + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); @@ -231,50 +252,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); update_stats(&v->stats, count); } } @@ -694,7 +723,8 @@ static void generic_metric(const char *metric_expr, stats = &walltime_nsecs_stats; scale = 1e-9; } else { - v = saved_value_lookup(metric_events[i], cpu, false); + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, &rt_stat); if (!v) break; stats = &v->stats; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..78abfd40b135 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0); + perf_stat__update_shadow_stats(evsel, count->val, 0, + &rt_stat); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -362,7 +364,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, *count, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f20240037377..bb9902ad3a79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -129,7 +129,7 @@ void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; -- cgit v1.2.3 From e0128b30dbfb2884530251b4accdffdbf55a6b72 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:05 +0800 Subject: perf stat: Print per-thread shadow stats The function perf_stat__print_shadow_stats() is called to print the shadow stats on a set of static variables. But the static variables are the limitations to support per-thread shadow stats. This patch lets the perf_stat__print_shadow_stats() support to print the shadow stats from a input parameter 'st'. It will not directly get value from static variable. Instead, it now uses runtime_stat_avg() and runtime_stat_n() to get and compute the values. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-6-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 23 +++-- tools/perf/util/stat-shadow.c | 209 ++++++++++++++++++++++++++---------------- tools/perf/util/stat.h | 3 +- 4 files changed, 151 insertions(+), 87 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 81b395040298..fac6f053e4da 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1557,7 +1557,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(ev2)->val, sample->cpu, &ctx, - NULL); + NULL, + &rt_stat); } evsel_script(evsel->leader)->gnum = 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f4a2c21b824..097a694d16f2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1097,7 +1097,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise) + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { @@ -1190,7 +1191,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out, &metric_events); + &out, &metric_events, st); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1335,7 +1336,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0); + printout(id, nr, counter, uval, prefix, run, ena, 1.0, + &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1365,7 +1367,8 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } } @@ -1402,7 +1405,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -1441,7 +1445,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1473,7 +1478,8 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1529,7 +1535,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) perf_stat__print_shadow_stats(counter, 0, 0, &out, - &metric_events); + &metric_events, + &rt_stat); } fputc('\n', stat_config.output); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4b28c40de927..a95c4fe991aa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -424,15 +424,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -448,13 +473,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -467,13 +493,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -486,13 +513,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -505,13 +534,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -523,13 +554,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -541,13 +573,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -559,13 +592,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -623,68 +657,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -704,7 +742,8 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; struct parse_ctx pctx; @@ -724,7 +763,7 @@ static void generic_metric(const char *metric_expr, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, &rt_stat); + STAT_NONE, 0, st); if (!v) break; stats = &v->stats; @@ -752,7 +791,8 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; @@ -763,7 +803,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -771,8 +812,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -785,8 +831,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -794,8 +840,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -803,8 +850,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -812,8 +860,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -821,8 +870,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -830,27 +880,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -859,7 +910,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -868,8 +920,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -878,19 +931,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -904,28 +959,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -938,19 +993,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, out); - } else if (runtime_nsecs_stats[cpu].n != 0) { + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -961,7 +1016,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { num = 0; } @@ -974,7 +1029,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, out); + avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index bb9902ad3a79..76b322a2d293 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -140,7 +140,8 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); -- cgit v1.2.3 From 6a1e2c5c267358455a13bd8d59547430370c845a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:06 +0800 Subject: perf stat: Remove a set of shadow stats static variables In previous patches, we have reconstructed the code and let it not access the static variables directly. This patch removes these static variables. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-7-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 68 ++++++++++--------------------------------- tools/perf/util/stat.h | 1 + 2 files changed, 16 insertions(+), 53 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a95c4fe991aa..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -16,28 +16,6 @@ * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct runtime_stat rt_stat; @@ -163,10 +141,6 @@ void runtime_stat__exit(struct runtime_stat *st) void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - runtime_saved_values.node_delete = saved_value_delete; runtime_stat__init(&rt_stat); } @@ -188,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -227,6 +178,17 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, int ctx, int cpu, u64 count) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 76b322a2d293..cfe4fb899633 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { -- cgit v1.2.3 From 56739444d861daa050624d40c7adff32c73e9980 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:07 +0800 Subject: perf stat: Allocate shadow stats buffer for threads After perf_evlist__create_maps() being executed, we can get all threads from /proc. And via thread_map__nr(), we can also get the number of threads. With the number of threads, the patch allocates a buffer which will record the shadow stats for these threads. The buffer pointer is saved in stat_config. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-8-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/stat.h | 2 ++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 097a694d16f2..4c492ac3ac07 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -214,8 +214,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, static void perf_stat__reset_stats(void) { + int i; + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); + + for (i = 0; i < stat_config.stats_num; i++) + perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -2495,6 +2500,35 @@ int process_cpu_map_event(struct perf_tool *tool, return set_maps(st); } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + free(config->stats); +} + static const char * const stat_report_usage[] = { "perf stat report []", NULL, @@ -2750,8 +2784,15 @@ int cmd_stat(int argc, const char **argv) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (stat_config.aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) { thread_map__read_comms(evsel_list->threads); + if (target.system_wide) { + if (runtime_stat_new(&stat_config, + thread_map__nr(evsel_list->threads))) { + goto out; + } + } + } if (interval && interval < 100) { if (interval < 10) { @@ -2841,5 +2882,8 @@ out: sysfs__write_int(FREEZE_ON_SMI_PATH, 0); perf_evlist__delete(evsel_list); + + runtime_stat_delete(&stat_config); + return status; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index cfe4fb899633..2ed95dc72784 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); -- cgit v1.2.3 From 14e72a21c783654ca7b6c897b6d6508c1abccd7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:08 +0800 Subject: perf stat: Update or print per-thread stats If the stats pointer in stat_config structure is not null, it will update the per-thread stats or print the per-thread stats on this buffer. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-9-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++++++-- tools/perf/util/stat.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4c492ac3ac07..f4129a5fbb01 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1372,8 +1372,13 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + + if (stat_config.stats) + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &stat_config.stats[thread]); + else + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &rt_stat); fputc('\n', output); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 78abfd40b135..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -280,9 +280,14 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->val, cpu, &rt_stat); - if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0, - &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; -- cgit v1.2.3 From 73c0ca1eee3d2c96898e05a16be49da2a6d590b2 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:09 +0800 Subject: perf thread_map: Enumerate all threads from /proc This patch calls thread_map__new_all_cpus() to enumerate all threads from /proc if per-thread flag is enabled. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-10-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 2 +- tools/perf/util/evlist.c | 3 ++- tools/perf/util/thread_map.c | 5 ++++- tools/perf/util/thread_map.h | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index dbcb6a19b375..4de1939b58ba 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -105,7 +105,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); - threads = thread_map__new_str(str, NULL, 0); + threads = thread_map__new_str(str, NULL, 0, false); TEST_ASSERT_VAL("failed to allocate thread_map", threads); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3570355bcf39..f0a5e09c4071 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1105,7 +1105,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 2b653853eec2..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -323,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 07a765fb22bb..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); -- cgit v1.2.3 From 1d9f8d1b824bf69cf984c1c36e5641b51eea42bb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:10 +0800 Subject: perf stat: Remove --per-thread pid/tid limitation Currently, if we execute 'perf stat --per-thread' without specifying pid/tid, perf will return error. root@skl:/tmp# perf stat --per-thread The --per-thread option is only available when monitoring via -p -t options. -p, --pid stat events on existing process id -t, --tid stat events on existing thread id This patch removes this limitation. If no pid/tid specified, it returns all threads (get threads from /proc). Note that it doesn't support cpu_list yet so if it's a cpu_list case, then skip. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-11-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 23 +++++++++++++++-------- tools/perf/util/target.h | 7 +++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f4129a5fbb01..ee708ba6f79a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -277,7 +277,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -340,7 +340,7 @@ static int read_counter(struct perf_evsel *counter) int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) ncpus = perf_evsel__nr_cpus(counter); else ncpus = 1; @@ -2743,12 +2743,16 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } - if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { - fprintf(stderr, "The --per-thread option is only available " - "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, stat_options, "p", 1); - parse_options_usage(NULL, stat_options, "t", 1); - goto out; + if ((stat_config.aggr_mode == AGGR_THREAD) && + !target__has_task(&target)) { + if (!target.system_wide || target.cpu_list) { + fprintf(stderr, "The --per-thread option is only " + "available when monitoring via -p -t -a " + "options or only --per-thread.\n"); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); + goto out; + } } /* @@ -2772,6 +2776,9 @@ int cmd_stat(int argc, const char **argv) target__validate(&target); + if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) + target.per_thread = true; + if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } -- cgit v1.2.3 From 29734550c996c259ffa8d32198439d6fe4b51320 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:11 +0800 Subject: perf stat: Resort '--per-thread' result There are many threads reported if we enable '--per-thread' globally. 1. Most of the threads are not counted or counting value 0. This patch removes these threads. 2. We also resort the threads in display according to the counting value. It's useful for user to see the hottest threads easily. For example, the new results would be: root@skl:/tmp# perf stat --per-thread ^C Performance counter stats for 'system wide': perf-24165 4.302433 cpu-clock (msec) # 0.001 CPUs utilized vmstat-23127 1.562215 cpu-clock (msec) # 0.000 CPUs utilized irqbalance-2780 0.827851 cpu-clock (msec) # 0.000 CPUs utilized sshd-23111 0.278308 cpu-clock (msec) # 0.000 CPUs utilized thermald-2841 0.230880 cpu-clock (msec) # 0.000 CPUs utilized sshd-23058 0.207306 cpu-clock (msec) # 0.000 CPUs utilized kworker/0:2-19991 0.133983 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:1-18249 0.125636 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 0.085533 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:2-23146 0.077139 cpu-clock (msec) # 0.000 CPUs utilized gmain-2700 0.041789 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1-15354 0.028370 cpu-clock (msec) # 0.000 CPUs utilized kworker/6:0-17528 0.023895 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1H-1887 0.013209 cpu-clock (msec) # 0.000 CPUs utilized kworker/5:2-31362 0.011627 cpu-clock (msec) # 0.000 CPUs utilized watchdog/0-11 0.010892 cpu-clock (msec) # 0.000 CPUs utilized kworker/3:2-12870 0.010220 cpu-clock (msec) # 0.000 CPUs utilized ksoftirqd/0-7 0.008869 cpu-clock (msec) # 0.000 CPUs utilized watchdog/1-14 0.008476 cpu-clock (msec) # 0.000 CPUs utilized watchdog/7-50 0.002944 cpu-clock (msec) # 0.000 CPUs utilized watchdog/3-26 0.002893 cpu-clock (msec) # 0.000 CPUs utilized watchdog/4-32 0.002759 cpu-clock (msec) # 0.000 CPUs utilized watchdog/2-20 0.002429 cpu-clock (msec) # 0.000 CPUs utilized watchdog/6-44 0.001491 cpu-clock (msec) # 0.000 CPUs utilized watchdog/5-38 0.001477 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 10 context-switches # 0.117 M/sec kworker/u16:1-18249 7 context-switches # 0.056 M/sec sshd-23111 4 context-switches # 0.014 M/sec vmstat-23127 4 context-switches # 0.003 M/sec perf-24165 4 context-switches # 0.930 K/sec kworker/0:2-19991 3 context-switches # 0.022 M/sec kworker/u16:2-23146 3 context-switches # 0.039 M/sec kworker/4:1-15354 2 context-switches # 0.070 M/sec kworker/6:0-17528 2 context-switches # 0.084 M/sec sshd-23058 2 context-switches # 0.010 M/sec ksoftirqd/0-7 1 context-switches # 0.113 M/sec watchdog/0-11 1 context-switches # 0.092 M/sec watchdog/1-14 1 context-switches # 0.118 M/sec watchdog/2-20 1 context-switches # 0.412 M/sec watchdog/3-26 1 context-switches # 0.346 M/sec watchdog/4-32 1 context-switches # 0.362 M/sec watchdog/5-38 1 context-switches # 0.677 M/sec watchdog/6-44 1 context-switches # 0.671 M/sec watchdog/7-50 1 context-switches # 0.340 M/sec kworker/4:1H-1887 1 context-switches # 0.076 M/sec thermald-2841 1 context-switches # 0.004 M/sec gmain-2700 1 context-switches # 0.024 M/sec irqbalance-2780 1 context-switches # 0.001 M/sec kworker/3:2-12870 1 context-switches # 0.098 M/sec kworker/5:2-31362 1 context-switches # 0.086 M/sec kworker/u16:1-18249 2 cpu-migrations # 0.016 M/sec kworker/u16:2-23146 2 cpu-migrations # 0.026 M/sec rcu_sched-8 1 cpu-migrations # 0.012 M/sec sshd-23058 1 cpu-migrations # 0.005 M/sec perf-24165 8,833,385 cycles # 2.053 GHz vmstat-23127 1,702,699 cycles # 1.090 GHz irqbalance-2780 739,847 cycles # 0.894 GHz sshd-23111 269,506 cycles # 0.968 GHz thermald-2841 204,556 cycles # 0.886 GHz sshd-23058 158,780 cycles # 0.766 GHz kworker/0:2-19991 112,981 cycles # 0.843 GHz kworker/u16:1-18249 100,926 cycles # 0.803 GHz rcu_sched-8 74,024 cycles # 0.865 GHz kworker/u16:2-23146 55,984 cycles # 0.726 GHz gmain-2700 34,278 cycles # 0.820 GHz kworker/4:1-15354 20,665 cycles # 0.728 GHz kworker/6:0-17528 16,445 cycles # 0.688 GHz kworker/5:2-31362 9,492 cycles # 0.816 GHz watchdog/3-26 8,695 cycles # 3.006 GHz kworker/4:1H-1887 8,238 cycles # 0.624 GHz watchdog/4-32 7,580 cycles # 2.747 GHz kworker/3:2-12870 7,306 cycles # 0.715 GHz watchdog/2-20 7,274 cycles # 2.995 GHz watchdog/0-11 6,988 cycles # 0.642 GHz ksoftirqd/0-7 6,376 cycles # 0.719 GHz watchdog/1-14 5,340 cycles # 0.630 GHz watchdog/5-38 4,061 cycles # 2.749 GHz watchdog/6-44 3,976 cycles # 2.667 GHz watchdog/7-50 3,418 cycles # 1.161 GHz vmstat-23127 2,511,699 instructions # 1.48 insn per cycle perf-24165 1,829,908 instructions # 0.21 insn per cycle irqbalance-2780 1,190,204 instructions # 1.61 insn per cycle thermald-2841 143,544 instructions # 0.70 insn per cycle sshd-23111 128,138 instructions # 0.48 insn per cycle sshd-23058 57,654 instructions # 0.36 insn per cycle rcu_sched-8 44,063 instructions # 0.60 insn per cycle kworker/u16:1-18249 42,551 instructions # 0.42 insn per cycle kworker/0:2-19991 25,873 instructions # 0.23 insn per cycle kworker/u16:2-23146 21,407 instructions # 0.38 insn per cycle gmain-2700 13,691 instructions # 0.40 insn per cycle kworker/4:1-15354 12,964 instructions # 0.63 insn per cycle kworker/6:0-17528 10,034 instructions # 0.61 insn per cycle kworker/5:2-31362 5,203 instructions # 0.55 insn per cycle kworker/3:2-12870 4,866 instructions # 0.67 insn per cycle kworker/4:1H-1887 3,586 instructions # 0.44 insn per cycle ksoftirqd/0-7 3,463 instructions # 0.54 insn per cycle watchdog/0-11 3,135 instructions # 0.45 insn per cycle watchdog/1-14 3,135 instructions # 0.59 insn per cycle watchdog/2-20 3,135 instructions # 0.43 insn per cycle watchdog/3-26 3,135 instructions # 0.36 insn per cycle watchdog/4-32 3,135 instructions # 0.41 insn per cycle watchdog/5-38 3,135 instructions # 0.77 insn per cycle watchdog/6-44 3,135 instructions # 0.79 insn per cycle watchdog/7-50 3,135 instructions # 0.92 insn per cycle vmstat-23127 539,181 branches # 345.139 M/sec perf-24165 375,364 branches # 87.245 M/sec irqbalance-2780 262,092 branches # 316.593 M/sec thermald-2841 31,611 branches # 136.915 M/sec sshd-23111 21,874 branches # 78.596 M/sec sshd-23058 10,682 branches # 51.528 M/sec rcu_sched-8 8,693 branches # 101.633 M/sec kworker/u16:1-18249 7,891 branches # 62.808 M/sec kworker/0:2-19991 5,761 branches # 42.998 M/sec kworker/u16:2-23146 4,099 branches # 53.138 M/sec kworker/4:1-15354 2,755 branches # 97.110 M/sec gmain-2700 2,638 branches # 63.127 M/sec kworker/6:0-17528 2,216 branches # 92.739 M/sec kworker/5:2-31362 1,132 branches # 97.360 M/sec kworker/3:2-12870 1,081 branches # 105.773 M/sec kworker/4:1H-1887 725 branches # 54.887 M/sec ksoftirqd/0-7 707 branches # 79.716 M/sec watchdog/0-11 652 branches # 59.860 M/sec watchdog/1-14 652 branches # 76.923 M/sec watchdog/2-20 652 branches # 268.423 M/sec watchdog/3-26 652 branches # 225.372 M/sec watchdog/4-32 652 branches # 236.318 M/sec watchdog/5-38 652 branches # 441.435 M/sec watchdog/6-44 652 branches # 437.290 M/sec watchdog/7-50 652 branches # 221.467 M/sec vmstat-23127 8,960 branch-misses # 1.66% of all branches irqbalance-2780 3,047 branch-misses # 1.16% of all branches perf-24165 2,876 branch-misses # 0.77% of all branches sshd-23111 1,843 branch-misses # 8.43% of all branches thermald-2841 1,444 branch-misses # 4.57% of all branches sshd-23058 1,379 branch-misses # 12.91% of all branches kworker/u16:1-18249 982 branch-misses # 12.44% of all branches rcu_sched-8 893 branch-misses # 10.27% of all branches kworker/u16:2-23146 578 branch-misses # 14.10% of all branches kworker/0:2-19991 376 branch-misses # 6.53% of all branches gmain-2700 280 branch-misses # 10.61% of all branches kworker/6:0-17528 196 branch-misses # 8.84% of all branches kworker/4:1-15354 187 branch-misses # 6.79% of all branches kworker/5:2-31362 123 branch-misses # 10.87% of all branches watchdog/0-11 95 branch-misses # 14.57% of all branches watchdog/4-32 89 branch-misses # 13.65% of all branches kworker/3:2-12870 80 branch-misses # 7.40% of all branches watchdog/3-26 61 branch-misses # 9.36% of all branches kworker/4:1H-1887 60 branch-misses # 8.28% of all branches watchdog/2-20 52 branch-misses # 7.98% of all branches ksoftirqd/0-7 47 branch-misses # 6.65% of all branches watchdog/1-14 46 branch-misses # 7.06% of all branches watchdog/7-50 13 branch-misses # 1.99% of all branches watchdog/5-38 8 branch-misses # 1.23% of all branches watchdog/6-44 7 branch-misses # 1.07% of all branches 3.695150786 seconds time elapsed root@skl:/tmp# perf stat --per-thread -M IPC,CPI ^C Performance counter stats for 'system wide': vmstat-23127 2,000,783 inst_retired.any # 1.5 IPC thermald-2841 1,472,670 inst_retired.any # 1.3 IPC sshd-23111 977,374 inst_retired.any # 1.2 IPC perf-24163 483,779 inst_retired.any # 0.2 IPC gmain-2700 341,213 inst_retired.any # 0.9 IPC sshd-23058 148,891 inst_retired.any # 0.8 IPC rtkit-daemon-3288 71,210 inst_retired.any # 0.7 IPC kworker/u16:1-18249 39,562 inst_retired.any # 0.3 IPC rcu_sched-8 14,474 inst_retired.any # 0.8 IPC kworker/0:2-19991 7,659 inst_retired.any # 0.2 IPC kworker/4:1-15354 6,714 inst_retired.any # 0.8 IPC rtkit-daemon-3289 4,839 inst_retired.any # 0.3 IPC kworker/6:0-17528 3,321 inst_retired.any # 0.6 IPC kworker/5:2-31362 3,215 inst_retired.any # 0.5 IPC kworker/7:2-23145 3,173 inst_retired.any # 0.7 IPC kworker/4:1H-1887 1,719 inst_retired.any # 0.3 IPC watchdog/0-11 1,479 inst_retired.any # 0.3 IPC watchdog/1-14 1,479 inst_retired.any # 0.3 IPC watchdog/2-20 1,479 inst_retired.any # 0.4 IPC watchdog/3-26 1,479 inst_retired.any # 0.4 IPC watchdog/4-32 1,479 inst_retired.any # 0.3 IPC watchdog/5-38 1,479 inst_retired.any # 0.3 IPC watchdog/6-44 1,479 inst_retired.any # 0.7 IPC watchdog/7-50 1,479 inst_retired.any # 0.7 IPC kworker/u16:2-23146 1,408 inst_retired.any # 0.5 IPC perf-24163 2,249,872 cpu_clk_unhalted.thread vmstat-23127 1,352,455 cpu_clk_unhalted.thread thermald-2841 1,161,140 cpu_clk_unhalted.thread sshd-23111 807,827 cpu_clk_unhalted.thread gmain-2700 375,535 cpu_clk_unhalted.thread sshd-23058 194,071 cpu_clk_unhalted.thread kworker/u16:1-18249 114,306 cpu_clk_unhalted.thread rtkit-daemon-3288 103,547 cpu_clk_unhalted.thread kworker/0:2-19991 46,550 cpu_clk_unhalted.thread rcu_sched-8 18,855 cpu_clk_unhalted.thread rtkit-daemon-3289 17,549 cpu_clk_unhalted.thread kworker/4:1-15354 8,812 cpu_clk_unhalted.thread kworker/5:2-31362 6,812 cpu_clk_unhalted.thread kworker/4:1H-1887 5,270 cpu_clk_unhalted.thread kworker/6:0-17528 5,111 cpu_clk_unhalted.thread kworker/7:2-23145 4,667 cpu_clk_unhalted.thread watchdog/0-11 4,663 cpu_clk_unhalted.thread watchdog/1-14 4,663 cpu_clk_unhalted.thread watchdog/4-32 4,626 cpu_clk_unhalted.thread watchdog/5-38 4,403 cpu_clk_unhalted.thread watchdog/3-26 3,936 cpu_clk_unhalted.thread watchdog/2-20 3,850 cpu_clk_unhalted.thread kworker/u16:2-23146 2,654 cpu_clk_unhalted.thread watchdog/6-44 2,017 cpu_clk_unhalted.thread watchdog/7-50 2,017 cpu_clk_unhalted.thread vmstat-23127 2,000,783 inst_retired.any # 0.7 CPI thermald-2841 1,472,670 inst_retired.any # 0.8 CPI sshd-23111 977,374 inst_retired.any # 0.8 CPI perf-24163 495,037 inst_retired.any # 4.7 CPI gmain-2700 341,213 inst_retired.any # 1.1 CPI sshd-23058 148,891 inst_retired.any # 1.3 CPI rtkit-daemon-3288 71,210 inst_retired.any # 1.5 CPI kworker/u16:1-18249 39,562 inst_retired.any # 2.9 CPI rcu_sched-8 14,474 inst_retired.any # 1.3 CPI kworker/0:2-19991 7,659 inst_retired.any # 6.1 CPI kworker/4:1-15354 6,714 inst_retired.any # 1.3 CPI rtkit-daemon-3289 4,839 inst_retired.any # 3.6 CPI kworker/6:0-17528 3,321 inst_retired.any # 1.5 CPI kworker/5:2-31362 3,215 inst_retired.any # 2.1 CPI kworker/7:2-23145 3,173 inst_retired.any # 1.5 CPI kworker/4:1H-1887 1,719 inst_retired.any # 3.1 CPI watchdog/0-11 1,479 inst_retired.any # 3.2 CPI watchdog/1-14 1,479 inst_retired.any # 3.2 CPI watchdog/2-20 1,479 inst_retired.any # 2.6 CPI watchdog/3-26 1,479 inst_retired.any # 2.7 CPI watchdog/4-32 1,479 inst_retired.any # 3.1 CPI watchdog/5-38 1,479 inst_retired.any # 3.0 CPI watchdog/6-44 1,479 inst_retired.any # 1.4 CPI watchdog/7-50 1,479 inst_retired.any # 1.4 CPI kworker/u16:2-23146 1,408 inst_retired.any # 1.9 CPI perf-24163 2,302,323 cycles vmstat-23127 1,352,455 cycles thermald-2841 1,161,140 cycles sshd-23111 807,827 cycles gmain-2700 375,535 cycles sshd-23058 194,071 cycles kworker/u16:1-18249 114,306 cycles rtkit-daemon-3288 103,547 cycles kworker/0:2-19991 46,550 cycles rcu_sched-8 18,855 cycles rtkit-daemon-3289 17,549 cycles kworker/4:1-15354 8,812 cycles kworker/5:2-31362 6,812 cycles kworker/4:1H-1887 5,270 cycles kworker/6:0-17528 5,111 cycles kworker/7:2-23145 4,667 cycles watchdog/0-11 4,663 cycles watchdog/1-14 4,663 cycles watchdog/4-32 4,626 cycles watchdog/5-38 4,403 cycles watchdog/3-26 3,936 cycles watchdog/2-20 3,850 cycles kworker/u16:2-23146 2,654 cycles watchdog/6-44 2,017 cycles watchdog/7-50 2,017 cycles 2.175726600 seconds time elapsed Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-12-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 77 ++++++++++++++++++++++++++++++++++++++++------- tools/perf/util/stat.h | 9 ++++++ 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee708ba6f79a..58d501d1f5fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1351,13 +1351,24 @@ static void print_aggr(char *prefix) } } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static int cmp_val(const void *a, const void *b) { - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int cpu, thread; + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret) +{ + int cpu, thread, i = 0; double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; for (thread = 0; thread < nthreads; thread++) { u64 ena = 0, run = 0, val = 0; @@ -1368,19 +1379,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) run += perf_counts(counter->counts, cpu, thread)->run; } + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(&target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + FILE *output = stat_config.output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; - + id = buf[thread].id; if (stat_config.stats) - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &stat_config.stats[thread]); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &stat_config.stats[id]); else - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &rt_stat); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } + + free(buf); } struct caggr_data { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2ed95dc72784..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -111,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); -- cgit v1.2.3 From 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf utils: Move is_directory() to path.h So that it can be used more widely, like in the next patch, when it will be used to fix a bug in 'perf test' handling of dirent.d_type == DT_UNKNOWN. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch, removed needless includes in path.h ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 +------------- tools/perf/util/path.c | 14 ++++++++++++++ tools/perf/util/path.h | 3 +++ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fac6f053e4da..77e47cf39f2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include #include @@ -2401,19 +2402,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ -- cgit v1.2.3 From 378811ac303df13efbe49f3ad1795b63d334ac5d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf test: Handle properly readdir DT_UNKNOWN Some system can return DT_UNKNOWN in readdir's struct dirent::d_type and we must handle it properly. In this case we can directly check if the entity we found is directory and skip it. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 766573e236e4..fafa014240cd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -411,9 +411,9 @@ static const char *shell_test__description(char *description, size_t size, return description ? trim(description + 1) : NULL; } -#define for_each_shell_test(dir, ent) \ +#define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (ent->d_type == DT_REG && ent->d_name[0] != '.') + if (!is_directory(base, ent)) static const char *shell_tests__dir(char *path, size_t size) { @@ -452,7 +452,7 @@ static int shell_tests__max_desc_width(void) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { char bf[256]; const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); @@ -504,7 +504,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, st.dir, ent) { int curr = i++; char desc[256]; struct test test = { @@ -614,7 +614,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { int curr = i++; char bf[256]; struct test t = { -- cgit v1.2.3 From 3315d14f8eea27a845bd2e3a88341a35f4025866 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 23:13:24 +0530 Subject: perf perf: Remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1512582204-6493-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 1 - tools/perf/builtin-c2c.c | 3 --- tools/perf/builtin-record.c | 1 - tools/perf/builtin-stat.c | 1 - tools/perf/tests/parse-events.c | 1 - tools/perf/util/auxtrace.c | 3 --- tools/perf/util/header.c | 2 -- tools/perf/util/metricgroup.c | 2 -- tools/perf/util/scripting-engines/trace-event-python.c | 1 - 9 files changed, 15 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2defb6df7fd0..9aa3a674829b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -27,7 +27,6 @@ #include "cpumap.h" #include -#include static unsigned int nthreads = 0; static unsigned int nsecs = 10; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f1da9b0833c0..c0debc3f79b6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -27,13 +27,10 @@ #include "sort.h" #include "tool.h" #include "data.h" -#include "sort.h" #include "event.h" #include "evlist.h" #include "evsel.h" -#include #include "ui/browsers/hists.h" -#include "evlist.h" #include "thread.h" struct c2c_hists { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0a5749ef8b94..98da8cb8de93 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -51,7 +51,6 @@ #include #include #include -#include #include struct switch_output { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 58d501d1f5fd..98bf9d32f222 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ #include "util/group.h" #include "util/session.h" #include "util/tool.h" -#include "util/group.h" #include "util/string2.h" #include "util/metricgroup.h" #include "asm/bug.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f0679613bd18..18b06444f230 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..c76687e42344 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include #include #include -#include -#include -#include #include #include "../perf.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5890e08e0754..ca73aa7be708 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,7 @@ #include #include #include -#include #include -#include #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e48410c99b39..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@ #include "pmu.h" #include "expr.h" #include "rblist.h" -#include "pmu.h" #include #include #include #include "pmu-events/pmu-events.h" -#include "strbuf.h" #include "strlist.h" #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..c1848b543f27 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" -- cgit v1.2.3 From 7af7919f0f4bde0cec1f546f924be81cfe50533d Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: tools include s390: Grab a copy of arch/s390/include/uapi/asm/unistd.h Will be used for generating the syscall id/string translation table. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-vjfbfvgjrnqnbdluqd7leo98@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/unistd.h | 412 ++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 413 insertions(+) create mode 100644 tools/arch/s390/include/uapi/asm/unistd.h diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..725120939051 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/unistd.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _UAPI_ASM_S390_UNISTD_H_ +#define _UAPI_ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime 255 +#define __NR_timer_gettime 256 +#define __NR_timer_getoverrun 257 +#define __NR_timer_delete 258 +#define __NR_clock_settime 259 +#define __NR_clock_gettime 260 +#define __NR_clock_getres 261 +#define __NR_clock_nanosleep 262 +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +#define __NR_mbind 268 +#define __NR_get_mempolicy 269 +#define __NR_set_mempolicy 270 +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +#define __NR_migrate_pages 287 +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +#define __NR_move_pages 310 +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 +#define __NR_renameat2 347 +#define __NR_seccomp 348 +#define __NR_getrandom 349 +#define __NR_memfd_create 350 +#define __NR_bpf 351 +#define __NR_s390_pci_mmio_write 352 +#define __NR_s390_pci_mmio_read 353 +#define __NR_execveat 354 +#define __NR_userfaultfd 355 +#define __NR_membarrier 356 +#define __NR_recvmmsg 357 +#define __NR_sendmmsg 358 +#define __NR_socket 359 +#define __NR_socketpair 360 +#define __NR_bind 361 +#define __NR_connect 362 +#define __NR_listen 363 +#define __NR_accept4 364 +#define __NR_getsockopt 365 +#define __NR_setsockopt 366 +#define __NR_getsockname 367 +#define __NR_getpeername 368 +#define __NR_sendto 369 +#define __NR_sendmsg 370 +#define __NR_recvfrom 371 +#define __NR_recvmsg 372 +#define __NR_shutdown 373 +#define __NR_mlock2 374 +#define __NR_copy_file_range 375 +#define __NR_preadv2 376 +#define __NR_pwritev2 377 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define __NR_s390_sthyi 380 +#define NR_syscalls 381 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index ea602cd1b43a..f81ca508700c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,6 +33,7 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h +arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h include/asm-generic/bitops/arch_hweight.h -- cgit v1.2.3 From 164a747f1ac2380c582988d2a4d9a9af13f8e644 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf s390: Generate system call table from asm/unistd.h This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. Committer testing: $ rm -rf /tmp/build/perf $ mkdir /tmp/build/perf $ make srctree=/home/acme/git/perf -C tools/perf/arch/s390 OUTPUT=/tmp/build/perf/ archheaders make: Entering directory '/home/acme/git/perf/tools/perf/arch/s390' /bin/sh '/home/acme/git/perf/tools/perf/arch/s390/entry/syscalls//mksyscalltbl' 'cc' /home/acme/git/perf/tools/arch/s390/include/uapi/asm/unistd.h > /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c make: Leaving directory '/home/acme/git/perf/tools/perf/arch/s390' $ head -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c static const char *syscalltbl_s390_64[] = { [1] = "exit", [2] = "fork", [3] = "read", [4] = "write", $ tail -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c [378] = "s390_guarded_storage", [379] = "statx", [380] = "s390_sthyi", }; #define SYSCALLTBL_S390_64_MAX_ID 380 $ Now to plug this into 'perf trace' proper. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-h5km60rdg3rqxvsys85q50l3@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 21 ++++++++++++++ tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 36 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 tools/perf/arch/s390/entry/syscalls/mksyscalltbl diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 09ba923debe8..48228de415d0 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -3,3 +3,24 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/s390/include/generated/asm +header := $(out)/syscalls_64.c +sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, s390) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..7fa0d0abd419 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf +# +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner +# + +gcc=$1 +input=$2 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr + + echo 'static const char *syscalltbl_s390_64[] = {' + while read sc nr; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" +} + + +$gcc -m64 -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table -- cgit v1.2.3 From 901bb0280b60782603e999a6c1e30ddfe1c7b0fb Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf trace: Use generated syscall table on s390 too This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. It also enables users to specify wildcards, for example, perf trace -e 'open*', just like was already possible on x86. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-htplh3nbrivi7g3cffbh4fsu@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 10 +++++++++- tools/perf/util/syscalltbl.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 79b117a03fd7..6f73c2316740 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -22,6 +22,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) NO_PERF_REGS := 1 +NO_SYSCALL_TABLE := 1 # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) @@ -33,7 +34,8 @@ endif ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated + NO_SYSCALL_TABLE := 0 + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma $(call detected,CONFIG_X86_64) @@ -56,12 +58,18 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif +ifneq ($(NO_SYSCALL_TABLE),1) + CFLAGS += -DHAVE_SYSCALL_TABLE +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@ #include const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { -- cgit v1.2.3 From 5449f13c553e9c50690419f6114665a8beb71bea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:46:11 -0300 Subject: perf annotate: Get the cpuid from evsel->evlist->env in symbol__annotate() To reduce its function signature, since we get this from 'evsel' which is already one of its arguments. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-070eap7t6uicg9c3w086xy2z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 4 +--- tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/util/annotate.c | 7 ++++--- tools/perf/util/annotate.h | 2 +- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 540461f5e345..c6ccda52117d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -138,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 03b7363a49c9..286427975112 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1116,9 +1116,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_line), &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, evsel, sizeof(struct browser_line), &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cdb5ecf91666..aeeaf15029f0 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index facad1e279a8..bc34b28373f4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1622,13 +1622,14 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid) + struct arch **parch) { struct annotate_args args = { .privsize = privsize, .map = map, .evsel = evsel, }; + struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = NULL; struct arch *arch; int err; @@ -1648,7 +1649,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, *parch = arch; if (arch->init) { - err = arch->init(arch, cpuid); + err = arch->init(arch, env ? env->cpuid : NULL); if (err) { pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); return err; @@ -1999,7 +2000,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6d7289e88fa3..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -179,7 +179,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid); + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 95853c51c0ca..541897049c6c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2842,9 +2842,9 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel) return NULL; } -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c3663a70c9b9..0e961ce60a9c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -447,6 +447,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3 From 3285debaf5992f9729ba33e3f31eff5253d29dc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:52:17 -0300 Subject: perf annotate: Use perf_env when obtaining the arch name Paving the way to reuse these routines in other areas, like when generating errno tables. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-rh1qv051vb8gfdcswskrn53h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/evsel.c | 7 ------- tools/perf/util/evsel.h | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc34b28373f4..eac45ccd5c32 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1420,16 +1420,19 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) +static const char *perf_env__arch(struct perf_env *env) { struct utsname uts; + char *arch_name; - if (!arch_name) { /* Assume we are annotating locally. */ + if (!env) { /* Assume local operation */ if (uname(&uts) < 0) return NULL; arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); + } else + arch_name = env->arch; + + return normalize_arch(arch_name); } static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) @@ -1630,14 +1633,10 @@ int symbol__annotate(struct symbol *sym, struct map *map, .evsel = evsel, }; struct perf_env *env = perf_evsel__env(evsel); - const char *arch_name = NULL; + const char *arch_name = perf_env__arch(env); struct arch *arch; int err; - if (evsel) - arch_name = perf_evsel__env_arch(evsel); - - arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 541897049c6c..4718f0a460df 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2835,13 +2835,6 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { if (evsel && evsel->evlist) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0e961ce60a9c..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -446,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3 From 4e8fbc1c975c667c61a3073da81b338b9bf61c37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 14:47:49 -0300 Subject: perf env: Adopt perf_env__arch() from the annotate code And use it in the libunwind case, with both passing a valid perf_env to extract the arch to be normalized from and passing NULL with the same semantic as in the annotate code: to get it from uname() uts.machine. Now the code to generate per arch errno translation tables (int/string) can use it to decode perf.data files recorded in a different arch than that where 'perf trace' (or any other analysis tool) runs. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-p2epffgash69w38kvj3ntpc9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/common.c | 44 +++-------------------------------- tools/perf/arch/common.h | 1 - tools/perf/util/annotate.c | 16 ------------- tools/perf/util/env.c | 47 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 2 ++ tools/perf/util/unwind-libunwind.c | 4 ++-- 6 files changed, 54 insertions(+), 60 deletions(-) diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 8c0cfeb55f8e..c6f373508a4f 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include "common.h" +#include "../util/env.h" #include "../util/util.h" #include "../util/debug.h" -#include "sane_ctype.h" - const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", @@ -120,55 +118,19 @@ static int lookup_triplets(const char *const *triplets, const char *name) return -1; } -/* - * Return architecture name in a normalized form. - * The conversion logic comes from the Makefile. - */ -const char *normalize_arch(char *arch) -{ - if (!strcmp(arch, "x86_64")) - return "x86"; - if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') - return "x86"; - if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) - return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) - return "arm64"; - if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) - return "arm"; - if (!strncmp(arch, "s390", 4)) - return "s390"; - if (!strncmp(arch, "parisc", 6)) - return "parisc"; - if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) - return "powerpc"; - if (!strncmp(arch, "mips", 4)) - return "mips"; - if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) - return "sh"; - - return arch; -} - static int perf_env__lookup_binutils_path(struct perf_env *env, const char *name, const char **path) { int idx; - const char *arch, *cross_env; - struct utsname uts; + const char *arch = perf_env__arch(env), *cross_env; const char *const *path_list; char *buf = NULL; - arch = normalize_arch(env->arch); - - if (uname(&uts) < 0) - goto out; - /* * We don't need to try to find objdump path for native system. * Just use default binutils path (e.g.: "objdump"). */ - if (!strcmp(normalize_arch(uts.machine), arch)) + if (!strcmp(perf_env__arch(NULL), arch)) goto out; cross_env = getenv("CROSS_COMPILE"); diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index a1546509ad24..2d875baa92e6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -7,6 +7,5 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); -const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eac45ccd5c32..68e687d1bf99 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "sane_ctype.h" @@ -1420,21 +1419,6 @@ fallback: return 0; } -static const char *perf_env__arch(struct perf_env *env) -{ - struct utsname uts; - char *arch_name; - - if (!env) { /* Assume local operation */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } else - arch_name = env->arch; - - return normalize_arch(arch_name); -} - static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include +#include struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@ #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) -- cgit v1.2.3 From 9f5c6d8777a2d962b0eeacb2a16f37da6bea545b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:26:46 +0900 Subject: perf probe: Add warning message if there is unexpected event name This improve the error message so that user can know event-name error before writing new events to kprobe-events interface. E.g. ====== #./perf probe -x /lib64/libc-2.25.so malloc_get_state* Internal error: "malloc_get_state@GLIBC_2" is an invalid event name. Error: Failed to add events. ====== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275040665.24652.5188568529237584489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..262d5da86623 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2625,6 +2625,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } -- cgit v1.2.3 From a3110cd9d0f77a796da545e112f9305094257798 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 11 Dec 2017 15:19:25 -0300 Subject: perf probe: Cut off the version suffix from event name Cut off the version suffix (e.g. @GLIBC_2.2.5 etc.) from automatic generated event name. This fixes wildcard event adding like below case; ===== # perf probe -x /lib64/libc-2.25.so malloc* Internal error: "malloc_get_state@GLIBC_2" is wrong event name. Error: Failed to add events. ===== This failure was caused by a versioned suffix symbol. With this fix, perf probe automatically cuts the suffix after @ as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc* Added new events: probe_libc:malloc_printerr (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_check (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_info (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:mallochook (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state (on malloc* in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Reported-by: bhargavb Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/None Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 262d5da86623..7e582547ac07 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2584,8 +2584,8 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; -- cgit v1.2.3 From e63c625a1e417edbe513b75b347a7238e9e7fea0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:27:44 +0900 Subject: perf probe: Add __return suffix for return events Add __return suffix for function return events automatically. Without this, user have to give --force option and will see the number suffix for each event like "function_1", which is not easy to recognize. Instead, this adds __return suffix to it automatically. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so 'malloc*%return' Added new events: probe_libc:malloc_printerr__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_check__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_info__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:mallochook__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state__return -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275046418.24652.6696011972866498489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 2 +- tools/perf/util/probe-event.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index d7e4869905f1..f96382692f42 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -170,7 +170,7 @@ Probe points are defined by following syntax. or, sdt_PROVIDER:SDTEVENT -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the modules. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7e582547ac07..a68141d360b0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2573,7 +2573,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2590,7 +2591,7 @@ static int get_new_event_name(char *buf, size_t len, const char *base, *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2689,8 +2690,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; -- cgit v1.2.3 From 4b3a2716dd785fabb9f6ac80c1d53cb29a88169d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:12 +0900 Subject: perf probe: Find versioned symbols from map Commit d80406453ad4 ("perf symbols: Allow user probes on versioned symbols") allows user to find default versioned symbols (with "@@") in map. However, it did not enable normal versioned symbol (with "@") for perf-probe. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Failed to find symbol malloc_get_state in /usr/lib64/libc-2.25.so Error: Failed to add events. ===== This solves above issue by improving perf-probe symbol search function, as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Added new event: probe_libc:malloc_get_state (on malloc_get_state in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 # ./perf probe -l probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) ===== Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275049269.24652.1639103455496216255.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 8 ++++++++ tools/perf/util/probe-event.c | 20 ++++++++++++++++++-- tools/perf/util/symbol.c | 5 +++++ tools/perf/util/symbol.h | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 9c4e23d8c8ce..53d83d7e6a09 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -64,6 +64,14 @@ int arch__compare_symbol_names_n(const char *namea, const char *nameb, return strncmp(namea, nameb, n); } + +const char *arch__normalize_symbol_name(const char *name) +{ + /* Skip over initial dot */ + if (name && *name == '.') + name++; + return name; +} #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a68141d360b0..0d6c66d51939 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2801,16 +2801,32 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; if (map__load(map) < 0) return 0; map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2856,7 +2872,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 -- cgit v1.2.3 From 1e9f9e8af0de80e8f6a47d991df66090934be0c6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:41 +0900 Subject: perf string: Add {strdup,strpbrk}_esc() To support the special characters escaped by '\' in 'perf probe' event parser. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275052163.24652.18205979384585484358.stgit@devbox [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/string.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/string2.h | 2 ++ 2 files changed, 48 insertions(+) diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ -- cgit v1.2.3 From c588d158124d5b60184fc612e551a19720720d68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Dec 2017 00:05:12 +0900 Subject: perf probe: Support escaped character in parser Support the special characters escaped by '\' in parser. This allows user to specify versions directly like below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state\\@GLIBC_2.2.5 Added new event: probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 ===== Or, you can use separators in source filename, e.g. ===== # ./perf probe -x /opt/test/a.out foo+bar.c:3 Semantic error :There is non-digit character in offset. Error: Command Parse Error. ===== Usually "+" in source file cause parser error, but ===== # ./perf probe -x /opt/test/a.out foo\\+bar.c:4 Added new event: probe_a:main (on @foo+bar.c:4 in /opt/test/a.out) You can now use it in all perf tools, such as: perf record -e probe_a:main -aR sleep 1 ===== escaped "\+" allows you to specify that. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151309111236.18107.5634753157435343410.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 16 +++++++++ tools/perf/util/probe-event.c | 58 ++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index f96382692f42..b6866a05edd2 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -182,6 +182,14 @@ Note that before using the SDT event, the target binary (on which SDT events are For details of the SDT, see below. https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html +ESCAPED CHARACTER +----------------- + +In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters. +This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters. +Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB'). +See EXAMPLES how it is used. + PROBE ARGUMENT -------------- Each probe argument follows below syntax. @@ -277,6 +285,14 @@ Add a USDT probe to a target process running in a different mount namespace ./perf probe --target-ns -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end +Add a probe on specific versioned symbol by backslash escape + + ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5' + +Add a probe in a source file using special characters by backslash escape + + ./perf probe -x /opt/test/a.out 'foo\+bar.c:4' + SEE ALSO -------- diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0d6c66d51939..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2803,23 +2807,31 @@ static int find_probe_functions(struct map *map, char *name, struct rb_node *tmp; const char *norm, *ver; char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { norm = arch__normalize_symbol_name(sym->name); if (!norm) continue; - /* We don't care about default symbol or not */ - ver = strchr(norm, '@'); - if (ver) { - buf = strndup(norm, ver - norm); - if (!buf) - return -ENOMEM; - norm = buf; + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) -- cgit v1.2.3 From f9d8adb345d7adbb2d3431eea73beb89c8d6d612 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 29 Nov 2017 19:43:46 +0100 Subject: perf evsel: Fix swap for samples with raw data When we detect a different endianity we swap event before processing. It's tricky for samples because we have no idea what's inside. We treat it as an array of u64s, swap them and later on we swap back parts which are different. We mangle this way also the tracepoint raw data, which ends up in report showing wrong data: 1.95% comm=Q^B pid=29285 prio=16777216 target_cpu=000 1.67% comm=l^B pid=0 prio=16777216 target_cpu=000 Luckily the traceevent library handles the endianity by itself (thank you Steven!), so we can pass the RAW data directly in the other endianity. 2.51% comm=beah-rhts-task pid=1175 prio=120 target_cpu=002 2.23% comm=kworker/0:0 pid=11566 prio=120 target_cpu=000 The fix is basically to swap back the raw data if different endianity is detected. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20171129184346.3656-1-jolsa@kernel.org [ Add util/memswap.c to python-ext-sources to link missing mem_bswap_64() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 20 +++++++++++++++++--- tools/perf/util/python-ext-sources | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4718f0a460df..1cf044cbae36 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -2131,14 +2132,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c util/mmap.c util/namespaces.c ../lib/bitmap.c -- cgit v1.2.3 From 69b5c953400897a978f8a7d212c53aa90ff5027d Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 12 Dec 2017 11:22:03 -0500 Subject: perf test shell: Fix check open filename arg using 'perf trace' Commit f231af789b11 ("perf test shell: Fix check open filename arg using 'perf trace' on s390x") added an exception for s390x to use openat() instead of open() in the test that intercepts a open syscall to look for the filename argument as obtained by the vfs_getname 'perf probe' it puts in place at the getname_flags kernel function. Its not just s390x that uses openat() instead of open(), so use 'perf list' to look for the syscall:sys_enter_open(at)? present in the system being tested instead of checking if the system is s390x. In fact Namhyung pointed out that glibc 2.26 changed this behaviour, as described in https://lwn.net/Articles/738694/, so systems where glibc is >= 2.26 will need this patch for this test to work, which already took place in some distros for architectures such as s390x, while Fedora 26 x86_64 is at glibc 2.25, i.e. still uses open(). Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Tested-by: Thomas Richter Link: https://lkml.kernel.org/r/ab23fe42-1080-a46b-503e-744e097f414f@linux.vnet.ibm.com Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan LPU-Reference: 1275675985.12835754.1513095723265.JavaMail.zimbra@redhat.com Link: https://lkml.kernel.org/n/tip-j2wbz9av1rw3thr3t0g4dtuk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2a9ef080efd0..55ad9793d544 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,10 +17,9 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } - - perf trace -e ${svc:-open} touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + perf trace -e $evts touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3 From 922991c2b14219b33270c770f917e0d1bf8f5597 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Dec 2017 17:43:40 -0300 Subject: Revert "perf s390: Always build with -fPIC" This one made x86 always build with -fPIC, when the intention was for s390 to be built that way, due to a rebase mistake. Reported-by: Hendrik Brueckner This reverts commit 1dc4ddf112a408e607a073d951b962b6c6e2bd6c. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6f73c2316740..eb6bd99be0bd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -43,7 +43,6 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 - CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) -- cgit v1.2.3 From a9a3f1d18a6c9ccf89728e23474645aa91e2f4f1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 13 Dec 2017 17:46:54 -0300 Subject: perf s390: Always build with -fPIC On s390, object files must be compiled with position-indepedent code in order to be incrementally linked or linked to shared libraries. Therefore, add -fPIC to the CFLAGS for s390 to ensure each object file is built properly. Reported-by: Jonathan Hermann Signed-off-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Thomas Richter Cc: linux s390 list Link: https://lkml.kernel.org/r/20171207080951.GC4889@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eb6bd99be0bd..f050f38d8fa3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -58,7 +58,7 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 NO_SYSCALL_TABLE := 0 - CFLAGS += -I$(OUTPUT)arch/s390/include/generated + CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) -- cgit v1.2.3 From ca8000684ec4e66f965e1f9547a3c6cb834154ca Mon Sep 17 00:00:00 2001 From: Mengting Zhang Date: Wed, 13 Dec 2017 15:01:53 +0800 Subject: perf evsel: Enable ignore_missing_thread for pid option While monitoring a multithread process with pid option, perf sometimes may return sys_perf_event_open failure with 3(No such process) if any of the process's threads die before we open the event. However, we want perf continue monitoring the remaining threads and do not exit with error. Here, the patch enables perf_evsel::ignore_missing_thread for -p option to ignore complete failure if any of threads die before we open the event. But it may still return sys_perf_event_open failure with 22(Invalid) if we monitors several event groups. sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8 sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8 WARNING: Ignored open failure for pid 28962 sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8 sys_perf_event_open failed, error -22 That is because when we ignore a missing thread, we change the thread_idx without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd() may return a wrong group_fd for the next thread and sys_perf_event_open() return with 22. sys_perf_event_open(){ ... if (group_fd != -1) perf_fget_light()//to get corresponding group_leader by group_fd ... if (group_leader) if (group_leader->ctx->task != ctx->task)//should on the same task goto err_context ... } This patch also fixes this bug by introducing perf_evsel__remove_fd() and update_fds to allow removing fds for the missing thread. Changes since v1: - Change group_fd__remove() into a more genetic way without changing code logic - Remove redundant condition Changes since v2: - Use a proper function name and add some comment. - Multiline comment style fixes. Committer testing: Before this patch the recently added 'perf stat --per-thread' for system wide counting would race while enumerating all threads using /proc: [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# When, say, the kernel was being built, so lots of shortlived threads, after this patch this doesn't happen. Signed-off-by: Mengting Zhang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Cheng Jian Cc: Li Bin Cc: Wang Nan Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com [ Remove one use 'evlist' alias variable ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 98da8cb8de93..50385d89c497 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1804,8 +1804,8 @@ int cmd_record(int argc, const char **argv) goto out; } - /* Enable ignoring missing threads when -u option is defined. */ - rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + /* Enable ignoring missing threads when -u/-p option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cf044cbae36..a4d256ea0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1599,10 +1599,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1618,11 +1654,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1727,7 +1770,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper -- cgit v1.2.3 From f1031c8d33a8c40d4cac26e58c37d9fba0e31a8a Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 14 Dec 2017 17:52:42 -0600 Subject: perf probe arm64: Fix symbol fixup issues due to ELF type On an arm64 machine running a CONFIG_RANDOMIZE_BASE=y kernel, perf kernel symbol resolution fails. Debugging saw symsrc_init calling the default elf__needs_adjust_symbols() where checks for an ET_DYN (3) ehdr.e_type failed when they should have succeeded. Fix by adopting powerpc version of the weak elf__needs_adjust_symbols() function, as done in commit d2332098331f ("perf probe ppc: Fix symbol fixup issues due to ELF type"). Prior to this patch, perf test 1 would fail: $ sudo oldperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33374 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols ERR : 0xfffe0000100f1000: do_undefinstr not on kallsyms ERR : 0xfffe0000100f1320: do_sysinstr not on kallsyms ERR : 0xfffe0000100f13b0: do_debug_exception not on kallsyms ERR : 0xfffe0000100f1498: do_mem_abort not on kallsyms ERR : 0xfffe0000100f1580: do_sp_pc_abort not on kallsyms ... After applying this patch, perf test 1 now succeeds: $ sudo ./newperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33378 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols WARN: 0xffff000008081000: diff name v: do_undefinstr k: __exception_text_start WARN: 0xffff0000080819e8: diff name v: __irqentry_text_end k: __softirqentry_text_start WARN: 0xffff000008081d08: diff name v: __entry_text_start k: __softirqentry_text_end WARN: 0xffff00000809db5c: diff name v: flush_icache_range k: __flush_cache_user_range WARN: 0xffff000008101908: diff name v: sys_ni_syscall k: sys_vm86old ... Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171214175242.e30450f17f93ad675d968fa3@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/sym-handling.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tools/perf/arch/arm64/util/sym-handling.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b1ab72d2a42e..e04f6cdd6f32 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c new file mode 100644 index 000000000000..0051b1ee8450 --- /dev/null +++ b/tools/perf/arch/arm64/util/sym-handling.c @@ -0,0 +1,22 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include "debug.h" +#include "symbol.h" +#include "map.h" +#include "probe-event.h" +#include "probe-file.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; +} +#endif -- cgit v1.2.3 From 74cd5815d9af6e6c4f3bcecfbc8e439f2fd7e6b1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 21 Dec 2017 17:26:10 +0800 Subject: perf tool: Improve bash command line auto-complete for multiple events with comma perf has perf-completion.sh to define command line auto-completion in bash/zsh. For record/stat -e it works for single events, but isn't working when specifying multiple events with comma. It would be very useful if it could be fixed to make it easier by supporting multiple events, comma separated. With this patch, the result can be like this: 1. Support the events returned from 'perf list --raw-dump' root@skl:/tmp# perf stat -e cpu/cache cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch- cpu/branch-instructions/ cpu/branch-misses/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-i root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-instructions/ 2. Support the events listed in /sys/bus/event_source/devices/cpu/events root@skl:/tmp# perf stat -e cycle cycle_activity.cycles_l1d_miss cycle_activity.stalls_l3_miss cycle_activity.cycles_l2_miss cycle_activity.stalls_mem_any cycle_activity.cycles_l3_miss cycle_activity.stalls_total cycle_activity.cycles_mem_any cycles-ct cycle_activity.stalls_l1d_miss cycles-t cycle_activity.stalls_l2_miss root@skl:/tmp# perf stat -e cycles- cycles-ct cycles-t root@skl:/tmp# perf stat -e cycles-t,cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache- cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache-misses/ 3. Support the uppercase event which is with prefix "cpu/" root@skl:/tmp# perf stat -e cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/C cpu/CACHE-MISSES/ cpu/CPU-CYCLES/ cpu/CYCLES-T/ cpu/CACHE-REFERENCES/ cpu/CYCLES-CT/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/CACHE-REFERENCES/ Note that: a) This patch only supports bash. b) It doesn't support the cases like {},{} or {...,...}. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513848370-8098-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 345f5d6e9ed5..d8310830a18b 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -162,8 +162,33 @@ __perf_main () # List possible events for -e option elif [[ $prev == @("-e"|"--event") && $prev_skip_opts == @(record|stat|top) ]]; then - evts=$($cmd list --raw-dump) - __perfcomp_colon "$evts" "$cur" + + local cur1=${COMP_WORDS[COMP_CWORD]} + local raw_evts=$($cmd list --raw-dump) + local arr s tmp result + + if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then + OLD_IFS="$IFS" + IFS=" " + arr=($raw_evts) + IFS="$OLD_IFS" + + for s in ${arr[@]} + do + if [[ "$s" == *cpu/* ]]; then + tmp=${s#*cpu/} + result=$result" ""cpu/"${tmp^^} + else + result=$result" "$s + fi + done + + evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + else + evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + fi + + __perfcomp_colon "$evts" "$cur1" else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| @@ -246,11 +271,16 @@ fi type perf &>/dev/null && _perf() { + if [[ "$COMP_WORDBREAKS" != *,* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS}," + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then - _get_comp_words_by_ref -n =: cur words cword prev + _get_comp_words_by_ref -n =:, cur words cword prev else - __perf_get_comp_words_by_ref -n =: cur words cword prev + __perf_get_comp_words_by_ref -n =:, cur words cword prev fi __perf_main } && -- cgit v1.2.3 From 34c16db0f035f3f3dc50fbed03747693c12b6a5b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 22 Dec 2017 18:57:35 +0800 Subject: perf tools: Return all events as auto-completions after comma It's a follow up for one previous patch "perf tool: Improve bash command line auto-complete for multiple events with comma." It fixes an issue that no events are displayed when is directly typed after comma. With this patch, now the result is: root@skl:/tmp# perf stat -e cpu-cycles, Display all 2389 possibilities? (y or n) alarmtimer:alarmtimer_cancel alarmtimer:alarmtimer_fired alarmtimer:alarmtimer_start alarmtimer:alarmtimer_suspend alignment-faults arith.divider_active BAClear_Cost baclears.any block:block_bio_backmerge block:block_bio_bounce block:block_bio_complete block:block_bio_frontmerge block:block_bio_queue block:block_bio_remap block:block_dirty_buffer block:block_getrq block:block_plug block:block_rq_complete block:block_rq_insert block:block_rq_issue block:block_rq_remap block:block_rq_requeue block:block_sleeprq --More-- One remaining issue is that the auto-completions doesn't work well for the event with ':'. For example, clk:clk_enable. Because ':' is set as WORDBREAK by default in bash. Need more work for this case. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513940255-16528-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index d8310830a18b..90206413f4d7 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -183,12 +183,16 @@ __perf_main () fi done - evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events) else - evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events) fi - __perfcomp_colon "$evts" "$cur1" + if [[ "$cur1" == , ]]; then + __perfcomp_colon "$evts" "" + else + __perfcomp_colon "$evts" "$cur1" + fi else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| -- cgit v1.2.3 From 5d4fd9c8b83b36d34521b3af361a5726899045bf Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 23 Dec 2017 04:15:58 +0800 Subject: perf tools: Auto-complete for events with ':' It's a follow up patch for a previous patch "perf tool: Return all events as auto-completions after comma". With this patch, auto-completion can work well for events with a ':'. For example: root@skl:/tmp# perf stat -e block:block_ block:block_bio_backmerge block:block_rq_complete block:block_bio_bounce block:block_rq_insert block:block_bio_complete block:block_rq_issue block:block_bio_frontmerge block:block_rq_remap block:block_bio_queue block:block_rq_requeue block:block_bio_remap block:block_sleeprq block:block_dirty_buffer block:block_split block:block_getrq block:block_touch_buffer block:block_plug block:block_unplug root@skl:/tmp# perf stat -e block:block_rq_ block:block_rq_complete block:block_rq_issue block:block_rq_requeue block:block_rq_insert block:block_rq_remap root@skl:/tmp# perf stat -e block:block_rq_complete block:block_rq_complete root@skl:/tmp# perf stat -e block:block_rq_complete Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513973758-19109-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 90206413f4d7..fdf75d45efff 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -280,6 +280,11 @@ _perf() export COMP_WORDBREAKS fi + if [[ "$COMP_WORDBREAKS" == *:* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}" + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then _get_comp_words_by_ref -n =:, cur words cword prev -- cgit v1.2.3 From 14e138a86f6347c6199f610576d2e11c03bec5f0 Mon Sep 17 00:00:00 2001 From: Avinash Repaka Date: Thu, 21 Dec 2017 20:17:04 -0800 Subject: RDS: Check cmsg_len before dereferencing CMSG_DATA RDS currently doesn't check if the length of the control message is large enough to hold the required data, before dereferencing the control message data. This results in following crash: BUG: KASAN: stack-out-of-bounds in rds_rdma_bytes net/rds/send.c:1013 [inline] BUG: KASAN: stack-out-of-bounds in rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 Read of size 8 at addr ffff8801c928fb70 by task syzkaller455006/3157 CPU: 0 PID: 3157 Comm: syzkaller455006 Not tainted 4.15.0-rc3+ #161 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 rds_rdma_bytes net/rds/send.c:1013 [inline] rds_sendmsg+0x1f02/0x1f90 net/rds/send.c:1066 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 ___sys_sendmsg+0x320/0x8b0 net/socket.c:2018 __sys_sendmmsg+0x1ee/0x620 net/socket.c:2108 SYSC_sendmmsg net/socket.c:2139 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2134 entry_SYSCALL_64_fastpath+0x1f/0x96 RIP: 0033:0x43fe49 RSP: 002b:00007fffbe244ad8 EFLAGS: 00000217 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fe49 RDX: 0000000000000001 RSI: 000000002020c000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004017b0 R13: 0000000000401840 R14: 0000000000000000 R15: 0000000000000000 To fix this, we verify that the cmsg_len is large enough to hold the data to be read, before proceeding further. Reported-by: syzbot Signed-off-by: Avinash Repaka Acked-by: Santosh Shilimkar Reviewed-by: Yuval Shaia Signed-off-by: David S. Miller --- net/rds/send.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/send.c b/net/rds/send.c index b52cdc8ae428..f72466c63f0c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes) continue; if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) { + if (cmsg->cmsg_len < + CMSG_LEN(sizeof(struct rds_rdma_args))) + return -EINVAL; args = CMSG_DATA(cmsg); *rdma_bytes += args->remote_vec.bytes; } -- cgit v1.2.3 From 19142551b2be4a9e13838099fde1351386e5e007 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 22 Dec 2017 09:35:16 +0200 Subject: tipc: error path leak fixes in tipc_enable_bearer() Fix memory leak in tipc_enable_bearer() if enable_media() fails, and cleanup with bearer_disable() if tipc_mon_create() fails. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tommi Rantala Signed-off-by: David S. Miller --- net/tipc/bearer.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..c8001471da6c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -324,6 +324,7 @@ restart: if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); + kfree(b); return -EINVAL; } @@ -347,8 +348,10 @@ restart: if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - if (tipc_mon_create(net, bearer_id)) + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); return -ENOMEM; + } pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, -- cgit v1.2.3 From 642a8439ddd8423b92f2e71960afe21ee1f66bb6 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Fri, 22 Dec 2017 09:35:17 +0200 Subject: tipc: fix tipc_mon_delete() oops in tipc_enable_bearer() error path Calling tipc_mon_delete() before the monitor has been created will oops. This can happen in tipc_enable_bearer() error path if tipc_disc_create() fails. [ 48.589074] BUG: unable to handle kernel paging request at 0000000000001008 [ 48.590266] IP: tipc_mon_delete+0xea/0x270 [tipc] [ 48.591223] PGD 1e60c5067 P4D 1e60c5067 PUD 1eb0cf067 PMD 0 [ 48.592230] Oops: 0000 [#1] SMP KASAN [ 48.595610] CPU: 5 PID: 1199 Comm: tipc Tainted: G B 4.15.0-rc4-pc64-dirty #5 [ 48.597176] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 [ 48.598489] RIP: 0010:tipc_mon_delete+0xea/0x270 [tipc] [ 48.599347] RSP: 0018:ffff8801d827f668 EFLAGS: 00010282 [ 48.600705] RAX: ffff8801ee813f00 RBX: 0000000000000204 RCX: 0000000000000000 [ 48.602183] RDX: 1ffffffff1de6a75 RSI: 0000000000000297 RDI: 0000000000000297 [ 48.604373] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff1dd1533 [ 48.605607] R10: ffffffff8eafbb05 R11: fffffbfff1dd1534 R12: 0000000000000050 [ 48.607082] R13: dead000000000200 R14: ffffffff8e73f310 R15: 0000000000001020 [ 48.608228] FS: 00007fc686484800(0000) GS:ffff8801f5540000(0000) knlGS:0000000000000000 [ 48.610189] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 48.611459] CR2: 0000000000001008 CR3: 00000001dda70002 CR4: 00000000003606e0 [ 48.612759] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 48.613831] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 48.615038] Call Trace: [ 48.615635] tipc_enable_bearer+0x415/0x5e0 [tipc] [ 48.620623] tipc_nl_bearer_enable+0x1ab/0x200 [tipc] [ 48.625118] genl_family_rcv_msg+0x36b/0x570 [ 48.631233] genl_rcv_msg+0x5a/0xa0 [ 48.631867] netlink_rcv_skb+0x1cc/0x220 [ 48.636373] genl_rcv+0x24/0x40 [ 48.637306] netlink_unicast+0x29c/0x350 [ 48.639664] netlink_sendmsg+0x439/0x590 [ 48.642014] SYSC_sendto+0x199/0x250 [ 48.649912] do_syscall_64+0xfd/0x2c0 [ 48.650651] entry_SYSCALL64_slow_path+0x25/0x25 [ 48.651843] RIP: 0033:0x7fc6859848e3 [ 48.652539] RSP: 002b:00007ffd25dff938 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 48.654003] RAX: ffffffffffffffda RBX: 00007ffd25dff990 RCX: 00007fc6859848e3 [ 48.655303] RDX: 0000000000000054 RSI: 00007ffd25dff990 RDI: 0000000000000003 [ 48.656512] RBP: 00007ffd25dff980 R08: 00007fc685c35fc0 R09: 000000000000000c [ 48.657697] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000d13010 [ 48.658840] R13: 00007ffd25e009c0 R14: 0000000000000000 R15: 0000000000000000 [ 48.662972] RIP: tipc_mon_delete+0xea/0x270 [tipc] RSP: ffff8801d827f668 [ 48.664073] CR2: 0000000000001008 [ 48.664576] ---[ end trace e811818d54d5ce88 ]--- Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Tommi Rantala Signed-off-by: David S. Miller --- net/tipc/monitor.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 8e884ed06d4b..32dc33a94bc7 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *tmp; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { -- cgit v1.2.3 From 178e5f57a8d8f8fc5799a624b96fc31ef9a29ffa Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Fri, 22 Dec 2017 17:12:09 +0800 Subject: net: fec: unmap the xmit buffer that are not transferred by DMA The enet IP only support 32 bit, it will use swiotlb buffer to do dma mapping when xmit buffer DMA memory address is bigger than 4G in i.MX platform. After stress suspend/resume test, it will print out: log: [12826.352864] fec 5b040000.ethernet: swiotlb buffer is full (sz: 191 bytes) [12826.359676] DMA: Out of SW-IOMMU space for 191 bytes at device 5b040000.ethernet [12826.367110] fec 5b040000.ethernet eth0: Tx DMA memory map failed The issue is that the ready xmit buffers that are dma mapped but DMA still don't copy them into fifo, once MAC restart, these DMA buffers are not unmapped. So it should check the dma mapping buffer and unmap them. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 610573855213..8184d2fca9be 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); if (txq->tx_skbuff[i]) { dev_kfree_skb_any(txq->tx_skbuff[i]); txq->tx_skbuff[i] = NULL; -- cgit v1.2.3 From 5a8bae9761dc5dd409ff5c3a529b2801bd0dac3a Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:27 +0530 Subject: tg3: Update copyright Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 6 ++++-- drivers/net/ethernet/broadcom/tg3.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d09c5a9c53b5..5fe8d9b05f31 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -4,11 +4,13 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2005-2014 Broadcom Corporation. + * Copyright (C) 2005-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. * * Firmware is: * Derived from proprietary unpublished source code, - * Copyright (C) 2000-2003 Broadcom Corporation. + * Copyright (C) 2000-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Ltd. * * Permission is hereby granted for the distribution of this firmware * data in hexadecimal or equivalent format, provided this copyright diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index c2d02d02d1e6..3d60fc7a2da6 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -5,7 +5,8 @@ * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com) * Copyright (C) 2004 Sun Microsystems Inc. - * Copyright (C) 2007-2014 Broadcom Corporation. + * Copyright (C) 2007-2016 Broadcom Corporation. + * Copyright (C) 2016-2017 Broadcom Limited. */ #ifndef _T3_H -- cgit v1.2.3 From 4419bb1cedcda0272e1dc410345c5a1d1da0e367 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:28 +0530 Subject: tg3: Add workaround to restrict 5762 MRRS to 2048 One of AMD based server with 5762 hangs with jumbo frame traffic. This AMD platform has southbridge limitation which is restricting MRRS to 4000. As a work around, driver to restricts the MRRS to 2048 for this particular 5762 NX1 card. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 10 ++++++++++ drivers/net/ethernet/broadcom/tg3.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5fe8d9b05f31..a0caa71a8c3b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -10054,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy) tw32(GRC_MODE, tp->grc_mode | val); + /* On one of the AMD platform, MRRS is restricted to 4000 because of + * south bridge limitation. As a workaround, Driver is setting MRRS + * to 2048 instead of default 4096. + */ + if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL && + tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) { + val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK; + tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048); + } + /* Setup the timer prescalar register. Clock is always 66Mhz. */ val = tr32(GRC_MISC_CFG); val &= ~0xff; diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 3d60fc7a2da6..1f0271fa7c74 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -97,6 +97,7 @@ #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR 0x0106 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT 0x0109 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT 0x010a +#define TG3PCI_SUBDEVICE_ID_DELL_5762 0x07f0 #define TG3PCI_SUBVENDOR_ID_COMPAQ PCI_VENDOR_ID_COMPAQ #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE 0x007c #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2 0x009a @@ -282,6 +283,9 @@ #define TG3PCI_STD_RING_PROD_IDX 0x00000098 /* 64-bit */ #define TG3PCI_RCV_RET_RING_CON_IDX 0x000000a0 /* 64-bit */ /* 0xa8 --> 0xb8 unused */ +#define TG3PCI_DEV_STATUS_CTRL 0x000000b4 +#define MAX_READ_REQ_SIZE_2048 0x00004000 +#define MAX_READ_REQ_MASK 0x00007000 #define TG3PCI_DUAL_MAC_CTRL 0x000000b8 #define DUAL_MAC_CTRL_CH_MASK 0x00000003 #define DUAL_MAC_CTRL_ID 0x00000004 -- cgit v1.2.3 From e60ee41aaf898584205a6af5c996860d0fe6a836 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Fri, 22 Dec 2017 16:05:29 +0530 Subject: tg3: Enable PHY reset in MTU change path for 5720 A customer noticed RX path hang when MTU is changed on the fly while running heavy traffic with NCSI enabled for 5717 and 5719. Since 5720 belongs to same ASIC family, we observed same issue and same fix could solve this problem for 5720. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a0caa71a8c3b..8995cfefbfcf 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14239,7 +14239,8 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) */ if (tg3_asic_rev(tp) == ASIC_REV_57766 || tg3_asic_rev(tp) == ASIC_REV_5717 || - tg3_asic_rev(tp) == ASIC_REV_5719) + tg3_asic_rev(tp) == ASIC_REV_5719 || + tg3_asic_rev(tp) == ASIC_REV_5720) reset_phy = true; err = tg3_restart_hw(tp, reset_phy); -- cgit v1.2.3 From f7084059a9cb9e56a186e1677b1dcffd76c2cd24 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 22 Dec 2017 13:01:39 -0200 Subject: bnx2x: Improve reliability in case of nested PCI errors While in recovery process of PCI error (called EEH on PowerPC arch), another PCI transaction could be corrupted causing a situation of nested PCI errors. Also, this scenario could be reproduced with error injection mechanisms (for debug purposes). We observe that in case of nested PCI errors, bnx2x might attempt to initialize its shmem and cause a kernel crash due to bad addresses read from MCP. Multiple different stack traces were observed depending on the point the second PCI error happens. This patch avoids the crashes by: * failing PCI recovery in case of nested errors (since multiple PCI errors in a row are not expected to lead to a functional adapter anyway), and by, * preventing access to adapter FW when MCP is failed (we mark it as failed when shmem cannot get initialized properly). Reported-by: Abdul Haleem Signed-off-by: Guilherme G. Piccoli Acked-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 4 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4c739d5355d2..8ae269ec17a1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) del_timer_sync(&bp->timer); - if (IS_PF(bp)) { + if (IS_PF(bp) && !BP_NOMCP(bp)) { /* Set ALWAYS_ALIVE bit in shmem */ bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE; bnx2x_drv_pulse(bp); @@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bp->cnic_loaded = false; /* Clear driver version indication in shmem */ - if (IS_PF(bp)) + if (IS_PF(bp) && !BP_NOMCP(bp)) bnx2x_update_mng_version(bp); /* Check if there are pending parity attentions. If there are - set diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 91e2a7560b48..ddd5d3ebd201 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp) do { bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR); + + /* If we read all 0xFFs, means we are in PCI error state and + * should bail out to avoid crashes on adapter's FW reads. + */ + if (bp->common.shmem_base == 0xFFFFFFFF) { + bp->flags |= NO_MCP_FLAG; + return -ENODEV; + } + if (bp->common.shmem_base) { val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]); if (val & SHR_MEM_VALIDITY_MB) @@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) BNX2X_ERR("IO slot reset --> driver unload\n"); /* MCP should have been reset; Need to wait for validity */ - bnx2x_init_shmem(bp); + if (bnx2x_init_shmem(bp)) { + rtnl_unlock(); + return PCI_ERS_RESULT_DISCONNECT; + } if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { u32 v; -- cgit v1.2.3 From 76dc6c097d581ad8eeedf8e1a000423a3d742445 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 26 Dec 2017 15:08:53 +0100 Subject: cpu/hotplug: Move inline keyword at the beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix non-fatal warnings such as: kernel/cpu.c:95:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] static void inline cpuhp_lock_release(bool bringup) { } ^~~~~~ Signed-off-by: Mathieu Malaterre Signed-off-by: Thomas Gleixner Cc: Arnd Bergmann Cc: Sebastian Andrzej Siewior Cc: Peter Zijlstra Cc: "Paul E. McKenney" Link: https://lkml.kernel.org/r/20171226140855.16583-1-malat@debian.org --- kernel/cpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..3d002a6f216e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); -static void inline cpuhp_lock_acquire(bool bringup) +static inline void cpuhp_lock_acquire(bool bringup) { lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } -static void inline cpuhp_lock_release(bool bringup) +static inline void cpuhp_lock_release(bool bringup) { lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } #else -static void inline cpuhp_lock_acquire(bool bringup) { } -static void inline cpuhp_lock_release(bool bringup) { } +static inline void cpuhp_lock_acquire(bool bringup) { } +static inline void cpuhp_lock_release(bool bringup) { } #endif -- cgit v1.2.3 From 8cb38a602478e9f806571f6920b0a3298aabf042 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 22 Dec 2017 10:15:20 -0800 Subject: sctp: Replace use of sockets_allocated with specified macro. The patch(180d8cd942ce) replaces all uses of struct sock fields' memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem to accessor macros. But the sockets_allocated field of sctp sock is not replaced at all. Then replace it now for unifying the code. Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.") Cc: Glauber Costa Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3253f724a995..b4fb6e4886d2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4498,7 +4498,7 @@ static int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); - percpu_counter_inc(&sctp_sockets_allocated); + sk_sockets_allocated_inc(sk); sock_prot_inuse_add(net, sk->sk_prot, 1); /* Nothing can fail after this block, otherwise @@ -4542,7 +4542,7 @@ static void sctp_destroy_sock(struct sock *sk) } sctp_endpoint_free(sp->ep); local_bh_disable(); - percpu_counter_dec(&sctp_sockets_allocated); + sk_sockets_allocated_dec(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); } -- cgit v1.2.3 From 45d8b80c2ac5d21cd1e2954431fb676bc2b1e099 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:32:35 -0500 Subject: ring-buffer: Mask out the info bits when returning buffer page length Two info bits were added to the "commit" part of the ring buffer data page when returned to be consumed. This was to inform the user space readers that events have been missed, and that the count may be stored at the end of the page. What wasn't handled, was the splice code that actually called a function to return the length of the data in order to zero out the rest of the page before sending it up to user space. These data bits were returned with the length making the value negative, and that negative value was not checked. It was compared to PAGE_SIZE, and only used if the size was less than PAGE_SIZE. Luckily PAGE_SIZE is unsigned long which made the compare an unsigned compare, meaning the negative size value did not end up causing a large portion of memory to be randomly zeroed out. Cc: stable@vger.kernel.org Fixes: 66a8cb95ed040 ("ring-buffer: Add place holder recording of dropped events") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c87766c1c204..e06cde093f76 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); /* Missed count stored at end */ #define RB_MISSED_STORED (1 << 30) +#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED) + struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ @@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage) */ size_t ring_buffer_page_len(void *page) { - return local_read(&((struct buffer_data_page *)page)->commit) + struct buffer_data_page *bpage = page; + + return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS) + BUF_PAGE_HDR_SIZE; } -- cgit v1.2.3 From 6b7e633fe9c24682df550e5311f47fb524701586 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 20:38:57 -0500 Subject: tracing: Remove extra zeroing out of the ring buffer page The ring_buffer_read_page() takes care of zeroing out any extra data in the page that it returns. There's no need to zero it out again from the consumer. It was removed from one consumer of this function, but read_buffers_splice_read() did not remove it, and worse, it contained a nasty bug because of it. Cc: stable@vger.kernel.org Fixes: 2711ca237a084 ("ring-buffer: Move zeroing out excess in page to ring buffer code") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 59518b8126d0..73652d5318b2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int entries, size, i; + int entries, i; ssize_t ret = 0; #ifdef CONFIG_TRACER_MAX_TRACE @@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, break; } - /* - * zero out any left over data, this is going to - * user land. - */ - size = ring_buffer_page_len(ref->page); - if (size < PAGE_SIZE) - memset(ref->page + size, 0, PAGE_SIZE - size); - page = virt_to_page(ref->page); spd.pages[i] = page; -- cgit v1.2.3 From ae415fa4c5248a8cf4faabd5a3c20576cb1ad607 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 22 Dec 2017 21:19:29 -0500 Subject: ring-buffer: Do no reuse reader page if still in use To free the reader page that is allocated with ring_buffer_alloc_read_page(), ring_buffer_free_read_page() must be called. For faster performance, this page can be reused by the ring buffer to avoid having to free and allocate new pages. The issue arises when the page is used with a splice pipe into the networking code. The networking code may up the page counter for the page, and keep it active while sending it is queued to go to the network. The incrementing of the page ref does not prevent it from being reused in the ring buffer, and this can cause the page that is being sent out to the network to be modified before it is sent by reading new data. Add a check to the page ref counter, and only reuse the page if it is not being used anywhere else. Cc: stable@vger.kernel.org Fixes: 73a757e63114d ("ring-buffer: Return reader page back into existing ring buffer") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e06cde093f76..9ab18995ff1e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4404,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) { struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct buffer_data_page *bpage = data; + struct page *page = virt_to_page(bpage); unsigned long flags; + /* If the page is still in use someplace else, we can't reuse it */ + if (page_ref_count(page) > 1) + goto out; + local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); @@ -4417,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data) arch_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); + out: free_page((unsigned long)bpage); } EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); -- cgit v1.2.3 From 24f2aaf952ee0b59f31c3a18b8b36c9e3d3c2cf5 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Tue, 26 Dec 2017 15:12:53 +0800 Subject: tracing: Fix crash when it fails to alloc ring buffer Double free of the ring buffer happens when it fails to alloc new ring buffer instance for max_buffer if TRACER_MAX_TRACE is configured. The root cause is that the pointer is not set to NULL after the buffer is freed in allocate_trace_buffers(), and the freeing of the ring buffer is invoked again later if the pointer is not equal to Null, as: instance_mkdir() |-allocate_trace_buffers() |-allocate_trace_buffer(tr, &tr->trace_buffer...) |-allocate_trace_buffer(tr, &tr->max_buffer...) // allocate fail(-ENOMEM),first free // and the buffer pointer is not set to null |-ring_buffer_free(tr->trace_buffer.buffer) // out_free_tr |-free_trace_buffers() |-free_trace_buffer(&tr->trace_buffer); //if trace_buffer is not null, free again |-ring_buffer_free(buf->buffer) |-rb_free_cpu_buffer(buffer->buffers[cpu]) // ring_buffer_per_cpu is null, and // crash in ring_buffer_per_cpu->pages Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Signed-off-by: Jing Xia Signed-off-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 73652d5318b2..0e53d46544b8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7603,7 +7603,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) allocate_snapshot ? size : 1); if (WARN_ON(ret)) { ring_buffer_free(tr->trace_buffer.buffer); + tr->trace_buffer.buffer = NULL; free_percpu(tr->trace_buffer.data); + tr->trace_buffer.data = NULL; return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; -- cgit v1.2.3 From 4397f04575c44e1440ec2e49b6302785c95fd2f8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 26 Dec 2017 20:07:34 -0500 Subject: tracing: Fix possible double free on failure of allocating trace buffer Jing Xia and Chunyan Zhang reported that on failing to allocate part of the tracing buffer, memory is freed, but the pointers that point to them are not initialized back to NULL, and later paths may try to free the freed memory again. Jing and Chunyan fixed one of the locations that does this, but missed a spot. Link: http://lkml.kernel.org/r/20171226071253.8968-1-chunyan.zhang@spreadtrum.com Cc: stable@vger.kernel.org Fixes: 737223fbca3b1 ("tracing: Consolidate buffer allocation code") Reported-by: Jing Xia Reported-by: Chunyan Zhang Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e53d46544b8..2a8d8a294345 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7580,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size buf->data = alloc_percpu(struct trace_array_cpu); if (!buf->data) { ring_buffer_free(buf->buffer); + buf->buffer = NULL; return -ENOMEM; } -- cgit v1.2.3 From 7ad1437d6ace0e450a6c1167720608ad660b191d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Dec 2017 19:45:31 +0100 Subject: perf/x86/intel: Plug memory leak in intel_pmu_init() A recent commit introduced an extra merge_attr() call in the skylake branch, which causes a memory leak. Store the pointer to the extra allocated memory and free it at the end of the function. Fixes: a5df70c354c2 ("perf/x86: Only show format attributes when supported") Reported-by: Tommi Rantala Signed-off-by: Thomas Gleixner Cc: Andi Kleen --- arch/x86/events/intel/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4f139c..731153a4681e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = { __init int intel_pmu_init(void) { + struct attribute **extra_attr = NULL; + struct attribute **to_free = NULL; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; @@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void) unsigned int unused; struct extra_reg *er; int version, i; - struct attribute **extra_attr = NULL; char *name; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { @@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_attr = merge_attr(extra_attr, skl_format_attr); + to_free = extra_attr; x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); @@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } + kfree(to_free); return 0; } -- cgit v1.2.3 From 7ac139eaa6bbdb07c547b6916a808eab3897e0e3 Mon Sep 17 00:00:00 2001 From: rodrigosiqueira Date: Fri, 15 Dec 2017 11:15:33 -0200 Subject: x86: Remove unused parameter of prepare_switch_to Commit e37e43a497d5 ("x86/mm/64: Enable vmapped stacks (CONFIG_HAVE_ARCH_VMAP_STACK=y)") added prepare_switch_to with one extra parameter which is not used by the function, remove it. Signed-off-by: Rodrigo Siqueira Signed-off-by: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20171215131533.hp6kqebw45o7uvsb@smtp.gmail.com --- arch/x86/include/asm/switch_to.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8c6bd6863db9..1008d4622709 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *prev, - struct task_struct *next) +static inline void prepare_switch_to(struct task_struct *next) { #ifdef CONFIG_VMAP_STACK /* @@ -70,7 +69,7 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(prev, next); \ + prepare_switch_to(next); \ \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) -- cgit v1.2.3 From 2b83ff96f51d0b039c4561b9f95c824d7bddb85c Mon Sep 17 00:00:00 2001 From: Matthieu CASTET Date: Tue, 12 Dec 2017 11:10:44 +0100 Subject: led: core: Fix brightness setting when setting delay_off=0 With the current code, the following sequence won't work : echo timer > trigger echo 0 > delay_off * at this point we call ** led_delay_off_store ** led_blink_set *** stop timer ** led_blink_setup ** led_set_software_blink *** if !delay_on, led off *** if !delay_off, set led_set_brightness_nosleep <--- LED_BLINK_SW is set but timer is stop *** otherwise start timer/set LED_BLINK_SW flag echo xxx > brightness * led_set_brightness ** if LED_BLINK_SW *** if brightness=0, led off *** else apply brightness if next timer <--- timer is stop, and will never apply new setting ** otherwise set led_set_brightness_nosleep To fix that, when we delete the timer, we should clear LED_BLINK_SW. Cc: linux-leds@vger.kernel.org Signed-off-by: Matthieu CASTET Signed-off-by: Jacek Anaszewski --- drivers/leds/led-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index fd83c7f77a95..f3654fd2eaf3 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,7 +186,7 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - del_timer_sync(&led_cdev->blink_timer); + led_stop_software_blink(led_cdev); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); -- cgit v1.2.3 From ac461122c88a10b7d775de2f56467f097c9e627a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 Dec 2017 11:48:50 -0800 Subject: x86-32: Fix kexec with stack canary (CONFIG_CC_STACKPROTECTOR) Commit e802a51ede91 ("x86/idt: Consolidate IDT invalidation") cleaned up and unified the IDT invalidation that existed in a couple of places. It changed no actual real code. Despite not changing any actual real code, it _did_ change code generation: by implementing the common idt_invalidate() function in archx86/kernel/idt.c, it made the use of the function in arch/x86/kernel/machine_kexec_32.c be a real function call rather than an (accidental) inlining of the function. That, in turn, exposed two issues: - in load_segments(), we had incorrectly reset all the segment registers, which then made the stack canary load (which gcc does using offset of %gs) cause a trap. Instead of %gs pointing to the stack canary, it will be the normal zero-based kernel segment, and the stack canary load will take a page fault at address 0x14. - to make this even harder to debug, we had invalidated the GDT just before calling idt_invalidate(), which meant that the fault happened with an invalid GDT, which in turn causes a triple fault and immediate reboot. Fix this by (a) not reloading the special segments in load_segments(). We currently don't do any percpu accesses (which would require %fs on x86-32) in this area, but there's no reason to think that we might not want to do them, and like %gs, it's pointless to break it. (b) doing idt_invalidate() before invalidating the GDT, to keep things at least _slightly_ more debuggable for a bit longer. Without a IDT, traps will not work. Without a GDT, traps also will not work, but neither will any segment loads etc. So in a very real sense, the GDT is even more core than the IDT. Fixes: e802a51ede91 ("x86/idt: Consolidate IDT invalidation") Reported-and-tested-by: Alexandru Chirvasitu Signed-off-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: Steven Rostedt Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.LFD.2.21.1712271143180.8572@i7.lan --- arch/x86/kernel/machine_kexec_32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 00bc751c861c..edfede768688 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -48,8 +48,6 @@ static void load_segments(void) "\tmovl $"STR(__KERNEL_DS)",%%eax\n" "\tmovl %%eax,%%ds\n" "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" : : : "eax", "memory"); #undef STR @@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image) * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); idt_invalidate(phys_to_virt(0)); + set_gdt(phys_to_virt(0), 0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, -- cgit v1.2.3 From 0074a8f3b30302383ce59867299975fbf37a4061 Mon Sep 17 00:00:00 2001 From: Rafael Gago Date: Thu, 21 Dec 2017 13:27:30 +0100 Subject: mtd: spi-nor: Add support for s25fl128l and s25fl256l They are exactly the same as the s25fl064l but bigger. Signed-off-by: Rafael Gago Castano Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 8bafd462f0ae..79c598425352 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1094,7 +1094,7 @@ static const struct flash_info spi_nor_ids[] = { { "pm25lv010", INFO(0, 0, 32 * 1024, 4, SECT_4K_PMC) }, { "pm25lq032", INFO(0x7f9d46, 0, 64 * 1024, 64, SECT_4K) }, - /* Spansion -- single (large) sector size only, at least + /* Spansion/Cypress -- single (large) sector size only, at least * for the chips listed here (without boot sectors). */ { "s25sl032p", INFO(0x010215, 0x4d00, 64 * 1024, 64, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, @@ -1123,6 +1123,8 @@ static const struct flash_info spi_nor_ids[] = { { "s25fl204k", INFO(0x014013, 0, 64 * 1024, 8, SECT_4K | SPI_NOR_DUAL_READ) }, { "s25fl208k", INFO(0x014014, 0, 64 * 1024, 16, SECT_4K | SPI_NOR_DUAL_READ) }, { "s25fl064l", INFO(0x016017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, + { "s25fl128l", INFO(0x016018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, + { "s25fl256l", INFO(0x016019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, /* SST -- large erase sizes are "overlays", "sectors" are 4K */ { "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024, 8, SECT_4K | SST_WRITE) }, -- cgit v1.2.3 From ad9a3668a434faca1339789ed2f043d679199309 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 24 Dec 2017 13:54:56 +0200 Subject: IB/mlx5: Serialize access to the VMA list User-space applications can do mmap and munmap directly at any time. Since the VMA list is not protected with a mutex, concurrent accesses to the VMA list from the mmap and munmap can cause data corruption. Add a mutex around the list. Cc: # v4.7 Fixes: 7c2344c3bbf9 ("IB/mlx5: Implements disassociate_ucontext API") Reviewed-by: Yishai Hadas Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 8 ++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b4ef4d9b6ce5..8ac50de2b242 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, } INIT_LIST_HEAD(&context->vma_private_list); + mutex_init(&context->vma_private_list_mutex); INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); @@ -1624,7 +1625,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area) * mlx5_ib_disassociate_ucontext(). */ mlx5_ib_vma_priv_data->vma = NULL; + mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex); list_del(&mlx5_ib_vma_priv_data->list); + mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex); kfree(mlx5_ib_vma_priv_data); } @@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, return -ENOMEM; vma_prv->vma = vma; + vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex; vma->vm_private_data = vma_prv; vma->vm_ops = &mlx5_ib_vm_ops; + mutex_lock(&ctx->vma_private_list_mutex); list_add(&vma_prv->list, vma_head); + mutex_unlock(&ctx->vma_private_list_mutex); return 0; } @@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) * mlx5_ib_vma_close. */ down_write(&owning_mm->mmap_sem); + mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; @@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) list_del(&vma_private->list); kfree(vma_private); } + mutex_unlock(&context->vma_private_list_mutex); up_write(&owning_mm->mmap_sem); mmput(owning_mm); put_task_struct(owning_process); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6dd8cac78de2..2c5f3533bbc9 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -115,6 +115,8 @@ enum { struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; + /* protect vma_private_list add/del */ + struct mutex *vma_private_list_mutex; }; struct mlx5_ib_ucontext { @@ -129,6 +131,8 @@ struct mlx5_ib_ucontext { /* Transport Domain number */ u32 tdn; struct list_head vma_private_list; + /* protect vma_private_list add/del */ + struct mutex vma_private_list_mutex; unsigned long upd_xlt_page; /* protect ODP/KSM */ -- cgit v1.2.3 From 05d14e7b0c138cb07ba30e464f47b39434f3fdef Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 24 Dec 2017 13:54:57 +0200 Subject: IB/uverbs: Fix command checking as part of ib_uverbs_ex_modify_qp() If the input command length is larger than the kernel supports an error should be returned in case the unsupported bytes are not cleared, instead of the other way aroudn. This matches what all other callers of ib_is_udata_cleared do and will avoid user ABI problems in the future. Cc: # v4.10 Fixes: 189aba99e700 ("IB/uverbs: Extend modify_qp and support packet pacing") Reviewed-by: Yishai Hadas Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d0202bb176a4..840b24096690 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, return -EOPNOTSUPP; if (ucore->inlen > sizeof(cmd)) { - if (ib_is_udata_cleared(ucore, sizeof(cmd), - ucore->inlen - sizeof(cmd))) + if (!ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; } -- cgit v1.2.3 From 4a50881bbac309e6f0684816a180bc3c14e1485d Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Sun, 24 Dec 2017 13:54:58 +0200 Subject: IB/core: Verify that QP is security enabled in create and destroy The XRC target QP create flow sets up qp_sec only if there is an IB link with LSM security enabled. However, several other related uAPI entry points blindly follow the qp_sec NULL pointer, resulting in a possible oops. Check for NULL before using qp_sec. Cc: # v4.12 Fixes: d291f1a65232 ("IB/core: Enforce PKey security on QPs") Reviewed-by: Daniel Jurgens Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/security.c | 3 +++ drivers/infiniband/core/verbs.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index feafdb961c48..59b2f96d986a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev) if (ret) return ret; + if (!qp->qp_sec) + return 0; + mutex_lock(&real_qp->qp_sec->mutex); ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys, qp->qp_sec); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 3fb8fb6cc824..e36d27ed4daa 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp) spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); atomic_dec(&real_qp->usecnt); - ib_close_shared_qp_security(qp->qp_sec); + if (qp->qp_sec) + ib_close_shared_qp_security(qp->qp_sec); kfree(qp); return 0; -- cgit v1.2.3 From 45e6ae7ef21b907dacb18da62d5787d74a31d860 Mon Sep 17 00:00:00 2001 From: Nitzan Carmi Date: Tue, 26 Dec 2017 11:20:20 +0200 Subject: IB/mlx5: Fix mlx5_ib_alloc_mr error flow ibmr.device is being set only after ib_alloc_mr() is (successfully) complete. Therefore, in case mlx5_core_create_mkey() return with error, the error flow calls mlx5_free_priv_descs() which uses ibmr.device (which doesn't exist yet), causing a NULL dereference oops. To fix this, the IB device should be set in the mr struct earlier stage (e.g. prior to calling mlx5_core_create_mkey()). Fixes: 8a187ee52b04 ("IB/mlx5: Support the new memory registration API") Signed-off-by: Max Gurtovoy Signed-off-by: Nitzan Carmi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ee0ee1f9994b..d109fe8290a7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, MLX5_SET(mkc, mkc, access_mode, mr->access_mode); MLX5_SET(mkc, mkc, umr_en, 1); + mr->ibmr.device = pd->device; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; -- cgit v1.2.3 From 59585b4be9ae4dc6506551709bdcd6f5210b8a01 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 25 Dec 2017 03:43:53 +0100 Subject: sparc64: repair calling incorrect hweight function from stubs Commit v4.12-rc4-1-g9289ea7f952b introduced a mistake that made the 64-bit hweight stub call the 16-bit hweight function. Fixes: 9289ea7f952b ("sparc64: Use indirect calls in hamming weight stubs") Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- arch/sparc/lib/hweight.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index e5547b22cd18..0ddbbb031822 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32) .previous ENTRY(__arch_hweight64) - sethi %hi(__sw_hweight16), %g1 - jmpl %g1 + %lo(__sw_hweight16), %g0 + sethi %hi(__sw_hweight64), %g1 + jmpl %g1 + %lo(__sw_hweight64), %g0 nop ENDPROC(__arch_hweight64) EXPORT_SYMBOL(__arch_hweight64) -- cgit v1.2.3 From 17407ec3354d291541099b2a7cd71545d9402e14 Mon Sep 17 00:00:00 2001 From: Romain Porte Date: Thu, 28 Dec 2017 11:03:24 +0100 Subject: mtd: spi-nor: Add ISSI is25lp080d support Add support for a new ISSI 1MB SPI NOR chip that was tested in our lab. Datasheet is available at: http://www.issi.com/WW/pdf/25LP-WP080D.pdf Testing was done only without the SPI_NOR_{DUAL,QUAD}_READ flags that were added later, according to the datasheet. Tested-by: Pascal Fabreges Reviewed-by: Alexander Sverdlin Signed-off-by: Romain Porte Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 79c598425352..d445a4d3b770 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1048,6 +1048,8 @@ static const struct flash_info spi_nor_ids[] = { { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, { "is25lq040b", INFO(0x9d4013, 0, 64 * 1024, 8, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25lp080d", INFO(0x9d6014, 0, 64 * 1024, 16, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "is25lp128", INFO(0x9d6018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ) }, -- cgit v1.2.3 From 39c3fd58952d7599d367c84c1330b785d91d6088 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 2 Dec 2017 18:11:04 +0100 Subject: kernel/irq: Extend lockdep class for request mutex The IRQ code already has support for lockdep class for the lock mutex in an interrupt descriptor. Extend this to add a second class for the request mutex in the descriptor. Not having a class is resulting in false positive splats in some code paths. Signed-off-by: Andrew Lunn Signed-off-by: Thomas Gleixner Acked-by: linus.walleij@linaro.org Cc: grygorii.strashko@ti.com Cc: f.fainelli@gmail.com Link: https://lkml.kernel.org/r/1512234664-21555-1-git-send-email-andrew@lunn.ch --- arch/powerpc/sysdev/fsl_msi.c | 4 +++- drivers/gpio/gpio-bcm-kona.c | 3 ++- drivers/gpio/gpio-brcmstb.c | 4 +++- drivers/gpio/gpio-tegra.c | 4 +++- drivers/gpio/gpiolib.c | 27 ++++++++++++++++--------- drivers/irqchip/irq-renesas-intc-irqpin.c | 6 +++++- drivers/mfd/arizona-irq.c | 4 +++- drivers/pinctrl/pinctrl-single.c | 5 ++++- include/linux/gpio/driver.h | 33 ++++++++++++++++++++----------- include/linux/irqdesc.h | 9 ++++++--- kernel/irq/generic-chip.c | 11 +++++++---- 11 files changed, 75 insertions(+), 35 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c61..76861a00bb92 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7..5b24801bffef 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_lock_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708..02fa8fe2292a 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4..44332b793718 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 06f29cf5018a..cee59fe1321c 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) */ static struct lock_class_key intc_irqpin_irq_lock_class; +/* And this is for the request mutex */ +static struct lock_class_key intc_irqpin_irq_request_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); - irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class, + &intc_irqpin_irq_request_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/mfd/arizona-irq.c b/drivers/mfd/arizona-irq.c index 09cf3699e354..a307832d7e45 100644 --- a/drivers/mfd/arizona-irq.c +++ b/drivers/mfd/arizona-irq.c @@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = { }; static struct lock_class_key arizona_irq_lock_class; +static struct lock_class_key arizona_irq_request_class; static int arizona_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) @@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq, struct arizona *data = h->host_data; irq_set_chip_data(virq, data); - irq_set_lockdep_class(virq, &arizona_irq_lock_class); + irq_set_lockdep_class(virq, &arizona_irq_lock_class, + &arizona_irq_request_class); irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq); irq_set_nested_thread(virq, 1); irq_set_noprobe(virq); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index e6cd8de793e2..3501491e5bfc 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = { */ static struct lock_class_key pcs_lock_class; +/* Class for the IRQ request mutex */ +static struct lock_class_key pcs_request_class; + /* * REVISIT: Reads and writes could eventually use regmap or something * generic. But at least on omaps, some mux registers are performance @@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, pcs_soc); irq_set_chip_and_handler(irq, &pcs->chip, handle_level_irq); - irq_set_lockdep_class(irq, &pcs_lock_class); + irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class); irq_set_noprobe(irq); return 0; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index c26c5bb6b491..508c03dfef25 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); /* - * Separate lockdep class for interrupt chip which can nest irq_desc - * lock. + * Separate lockdep classes for interrupt chip which can nest irq_desc + * lock and request mutex. */ static struct lock_class_key irq_nested_lock_class; +static struct lock_class_key irq_nested_request_class; /* * irq_map_generic_chip - Map a generic chip for an irq domain @@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, set_bit(idx, &gc->installed); if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(virq, &irq_nested_lock_class); + irq_set_lockdep_class(virq, &irq_nested_lock_class, + &irq_nested_request_class); if (chip->irq_calc_mask) chip->irq_calc_mask(data); @@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) - irq_set_lockdep_class(i, &irq_nested_lock_class); + irq_set_lockdep_class(i, &irq_nested_lock_class, + &irq_nested_request_class); if (!(flags & IRQ_GC_NO_MASK)) { struct irq_data *d = irq_get_irq_data(i); -- cgit v1.2.3 From 466a2b42d67644447a1765276259a3ea5531ddff Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 21 Dec 2017 02:22:45 +0100 Subject: cpufreq: schedutil: Use idle_calls counter of the remote CPU Since the recent remote cpufreq callback work, its possible that a cpufreq update is triggered from a remote CPU. For single policies however, the current code uses the local CPU when trying to determine if the remote sg_cpu entered idle or is busy. This is incorrect. To remedy this, compare with the nohz tick idle_calls counter of the remote CPU. Fixes: 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki --- include/linux/tick.h | 1 + kernel/sched/cpufreq_schedutil.c | 2 +- kernel/time/tick-sched.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index f442d1a42025..7cc35921218e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); +extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 2f52ec0f1539..d6717a3331a1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, #ifdef CONFIG_NO_HZ_COMMON static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { - unsigned long idle_calls = tick_nohz_get_idle_calls(); + unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); bool ret = idle_calls == sg_cpu->saved_idle_calls; sg_cpu->saved_idle_calls = idle_calls; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99578f06c8d4..77555faf6fbc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -985,6 +985,19 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value + * for a particular CPU. + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls_cpu(int cpu) +{ + struct tick_sched *ts = tick_get_tick_sched(cpu); + + return ts->idle_calls; +} + /** * tick_nohz_get_idle_calls - return the current idle calls counter value * -- cgit v1.2.3 From 11bca0a83f83f6093d816295668e74ef24595944 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 2 Dec 2017 09:13:04 -0800 Subject: genirq: Guard handle_bad_irq log messages An interrupt storm on a bad interrupt will cause the kernel log to be clogged. [ 60.089234] ->handle_irq(): ffffffffbe2f803f, [ 60.090455] 0xffffffffbf2af380 [ 60.090510] handle_bad_irq+0x0/0x2e5 [ 60.090522] ->irq_data.chip(): ffffffffbf2af380, [ 60.090553] IRQ_NOPROBE set [ 60.090584] ->handle_irq(): ffffffffbe2f803f, [ 60.090590] handle_bad_irq+0x0/0x2e5 [ 60.090596] ->irq_data.chip(): ffffffffbf2af380, [ 60.090602] 0xffffffffbf2af380 [ 60.090608] ->action(): (null) [ 60.090779] handle_bad_irq+0x0/0x2e5 This was seen when running an upstream kernel on Acer Chromebook R11. The system was unstable as result. Guard the log message with __printk_ratelimit to reduce the impact. This won't prevent the interrupt storm from happening, but at least the system remains stable. Signed-off-by: Guenter Roeck Signed-off-by: Thomas Gleixner Cc: Dmitry Torokhov Cc: Joe Perches Cc: Andy Shevchenko Cc: Mika Westerberg Link: https://bugzilla.kernel.org/show_bug.cgi?id=197953 Link: https://lkml.kernel.org/r/1512234784-21038-1-git-send-email-linux@roeck-us.net --- kernel/irq/debug.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index 17f05ef8f575..e4d3819a91cc 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h @@ -12,6 +12,11 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (!__ratelimit(&ratelimit)) + return; + printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); printk("->handle_irq(): %p, ", desc->handle_irq); -- cgit v1.2.3 From 4fcab6693445cfb84f2b65868c58043535090e52 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 4 Dec 2017 12:03:12 +0800 Subject: x86/apic: Avoid wrong warning when parsing 'apic=' in X86-32 case There are two consumers of apic=: apic_set_verbosity() for setting the APIC debug level; parse_apic() for registering APIC driver by hand. X86-32 supports both of them, but sometimes, kernel issues a weird warning. eg: when kernel was booted up with 'apic=bigsmp' in command line, early_param would warn like that: ... [ 0.000000] APIC Verbosity level bigsmp not recognised use apic=verbose or apic=debug [ 0.000000] Malformed early option 'apic' ... Wrap the warning code in CONFIG_X86_64 case to avoid this. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: rdunlap@infradead.org Cc: corbet@lwn.net Link: https://lkml.kernel.org/r/20171204040313.24824-1-douly.fnst@cn.fujitsu.com --- arch/x86/kernel/apic/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6e272f3ea984..880441f24146 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; +#ifdef CONFIG_X86_64 else { pr_warning("APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } +#endif return 0; } -- cgit v1.2.3 From 64e05d118e357bb52a084b609436acf292ce7944 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 4 Dec 2017 12:03:13 +0800 Subject: x86/apic: Update the 'apic=' description of setting APIC driver There are two consumers of apic=: the APIC debug level and the low level generic architecture code, but Linux just documented the first one. Append the second description. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: rdunlap@infradead.org Cc: corbet@lwn.net Link: https://lkml.kernel.org/r/20171204040313.24824-2-douly.fnst@cn.fujitsu.com --- Documentation/admin-guide/kernel-parameters.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b74e13312fdc..852fb11dd2c9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -328,11 +328,15 @@ not play well with APC CPU idle - disable it if you have APC and your system crashes randomly. - apic= [APIC,X86-32] Advanced Programmable Interrupt Controller + apic= [APIC,X86] Advanced Programmable Interrupt Controller Change the output verbosity whilst booting Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. + For X86-32, this can also be used to specify an APIC + driver name. + Format: apic=driver_name + Examples: apic=bigsmp apic_extnmi= [APIC,X86] External NMI delivery setting Format: { bsp (default) | all | none } -- cgit v1.2.3 From 7d7fb91cb43aebdcadca6a0fce25c3174feab980 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Dec 2017 23:25:08 +0200 Subject: ACPI / x86: boot: Swap variables in condition in acpi_register_gsi_ioapic() For better readability compare input to something considered settled down. Additionally move it to one line (while it's slightly longer 80 characters it makes readability better). No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f4c463df8b08..4bad714d6227 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -676,8 +676,7 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, mutex_lock(&acpi_ioapic_lock); irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); /* Don't set up the ACPI SCI because it's already set up */ - if (irq >= 0 && enable_update_mptable && - acpi_gbl_FADT.sci_interrupt != gsi) + if (irq >= 0 && enable_update_mptable && gsi != acpi_gbl_FADT.sci_interrupt) mp_config_acpi_gsi(dev, gsi, trigger, polarity); mutex_unlock(&acpi_ioapic_lock); #endif -- cgit v1.2.3 From 220580fb0d0a71cff0f17c24463c787f43dd6626 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Dec 2017 23:25:09 +0200 Subject: ACPI / x86: boot: Get rid of ACPI_INVALID_GSI Commit 49e4b84333f3 (ACPI: Use correct IRQ when uninstalling ACPI interrupt handler) brings a new definition for invalid ACPI IRQ, i.e. INVALID_ACPI_IRQ, which is defined to 0xffffffff (or -1 for unsigned value). Get rid of a former one, which was brought in by commit 2c0a6894df19 (x86, ACPI, irq: Enhance error handling in function acpi_register_gsi()), in favour of latter. To clarify the rationale of changing from INT_MIN to ((unsigned)-1) definition consider the following: - IRQ 0 is valid one in hardware, so, better not to use it everywhere (Linux uses 0 as NO IRQ, though it's another story) - INT_MIN splits the range into two, while 0xffffffff reserves only the last item - when type casting is done in most cases 0xff, 0xffff is naturally used as a marker of invalid HW IRQ: for example PCI INT line 0xff means no IRQ assigned by BIOS Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4bad714d6227..56752b48e480 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -112,8 +112,6 @@ static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; -#define ACPI_INVALID_GSI INT_MIN - /* * This is just a simple wrapper around early_memremap(), * with sanity checks for phys == 0 and size == 0. @@ -372,7 +370,7 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, * and acpi_isa_irq_to_gsi() may give wrong result. */ if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi) - isa_irq_to_gsi[gsi] = ACPI_INVALID_GSI; + isa_irq_to_gsi[gsi] = INVALID_ACPI_IRQ; isa_irq_to_gsi[bus_irq] = gsi; } @@ -637,7 +635,7 @@ EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) { if (isa_irq < nr_legacy_irqs() && - isa_irq_to_gsi[isa_irq] != ACPI_INVALID_GSI) { + isa_irq_to_gsi[isa_irq] != INVALID_ACPI_IRQ) { *gsi = isa_irq_to_gsi[isa_irq]; return 0; } -- cgit v1.2.3 From 4565c4f6056967ee8844fa550e3cbbe1c0e65a11 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Dec 2017 23:25:10 +0200 Subject: ACPI / x86: boot: Use INVALID_ACPI_IRQ instead of 0 for acpi_sci_override_gsi 0 is valid hardware interrupt which might be in some cases overridden. Due to this, switch to INVALID_ACPI_IRQ to mark SCI override not set. While here, change the type of the variable from int to u32 to match the GSI type used in the rest of the code. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/acpi.h | 2 +- arch/x86/kernel/acpi/boot.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d0ec9df1cbe..44f5d79d5105 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -49,7 +49,7 @@ extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; extern u8 acpi_sci_flags; -extern int acpi_sci_override_gsi; +extern u32 acpi_sci_override_gsi; void acpi_pic_sci_set_trigger(unsigned int, u16); struct device; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 56752b48e480..5a12cadbf019 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -68,8 +68,9 @@ int acpi_ioapic; int acpi_strict; int acpi_disable_cmcff; +/* ACPI SCI override configuration */ u8 acpi_sci_flags __initdata; -int acpi_sci_override_gsi __initdata; +u32 acpi_sci_override_gsi __initdata = INVALID_ACPI_IRQ; int acpi_skip_timer_override __initdata; int acpi_use_timer_override __initdata; int acpi_fix_pin2_polarity __initdata; @@ -1209,7 +1210,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) * If BIOS did not supply an INT_SRC_OVR for the SCI * pretend we got one so we can set the SCI flags. */ - if (!acpi_sci_override_gsi) + if (acpi_sci_override_gsi == INVALID_ACPI_IRQ) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, acpi_gbl_FADT.sci_interrupt); -- cgit v1.2.3 From 7c7bcfeae2d8e59066bd273b7d70392574e14c15 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 15 Dec 2017 23:25:11 +0200 Subject: ACPI / x86: boot: Don't setup SCI on HW-reduced platforms As per note in 5.2.9 Fixed ACPI Description Table (FADT) chapter of ACPI specification, on HW-reduced platforma OSPM should ignore fields related to the ACPI HW register interface, one of which is SCI_INT. Follow the spec and ignore any configuration done for interrupt line defined by SCI_INT if FADT specifies a HW-reduced platform. HW-reduced platforms will still be able to use SCI in case it provides an override record in MADT table. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5a12cadbf019..4bf004bab4b2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1209,8 +1209,9 @@ static int __init acpi_parse_madt_ioapic_entries(void) /* * If BIOS did not supply an INT_SRC_OVR for the SCI * pretend we got one so we can set the SCI flags. + * But ignore setting up SCI on hardware reduced platforms. */ - if (acpi_sci_override_gsi == INVALID_ACPI_IRQ) + if (acpi_sci_override_gsi == INVALID_ACPI_IRQ && !acpi_gbl_reduced_hardware) acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, acpi_gbl_FADT.sci_interrupt); -- cgit v1.2.3 From 5928c281524fe451114e04f1dfa11246a37e859f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2017 19:41:47 +0100 Subject: ACPI / video: Default lcd_only to true on Win8-ready and newer machines We're seeing a lot of bogus backlight interfaces on newer machines without a LCD such as desktops, servers and HDMI sticks. This causes userspace to show a non-functional brightness slider in e.g. the GNOME3 system menu, which is undesirable. And, in general, we should simply just not register a non functional backlight interface. Checking the LCD flag causes the bogus acpi_video backlight interfaces to go away (on the machines this was tested on). This change sets the lcd_only option by default on any machines which are Win8-ready, to fix this. This is not entirely without a risk of regressions, but video_detect.c already prefers native-backlight interfaces over the acpi_video one on Win8-ready machines, calling acpi_video_unregister_backlight() as soon as a native interface shows up. This is done because the ACPI backlight interface often is broken on Win8-ready machines, because win8 does not seem to actually use it. So in practice we already end up not registering the ACPI backlight interface on (most) Win8-ready machines with a LCD panel, thus this change does not change anything for (most) machines with a LCD panel and on machines without a LCD panel we actually don't want to register any backlight interfaces. This has been tested on the following machines and fixes a bogus backlight interface showing up there: - Desktop with an Asrock B150M Pro4S/D3 m.b. using i5-6500 builtin gfx - Intel Compute Stick STK1AW32SC - Meegopad T08 HDMI stick Bogus backlight interfaces have also been reported on: - Desktop with Asus H87I-Plus m.b. - Desktop with ASRock B75M-ITX m.b. - Desktop with Gigabyte Z87-D3HP m.b. - Dell PowerEdge T20 desktop Link: https://bugzilla.redhat.com/show_bug.cgi?id=1097436 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1133327 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1133329 Link: https://bugzilla.redhat.com/show_bug.cgi?id=1133646 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 0972ec0e2eb8..f53ccc680238 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -80,8 +80,8 @@ MODULE_PARM_DESC(report_key_events, static bool device_id_scheme = false; module_param(device_id_scheme, bool, 0444); -static bool only_lcd = false; -module_param(only_lcd, bool, 0444); +static int only_lcd = -1; +module_param(only_lcd, int, 0444); static int register_count; static DEFINE_MUTEX(register_count_mutex); @@ -2136,6 +2136,16 @@ int acpi_video_register(void) goto leave; } + /* + * We're seeing a lot of bogus backlight interfaces on newer machines + * without a LCD such as desktops, servers and HDMI sticks. Checking + * the lcd flag fixes this, so enable this on any machines which are + * win8 ready (where we also prefer the native backlight driver, so + * normally the acpi_video code should not register there anyways). + */ + if (only_lcd == -1) + only_lcd = acpi_osi_is_win8(); + dmi_check_system(video_dmi_table); ret = acpi_bus_register_driver(&acpi_video_bus); -- cgit v1.2.3 From 3eff5f67a21997b4a86e0e5062fc72c2347e25bf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 20 Dec 2017 05:44:40 +0000 Subject: PM / OPP: Make local function ti_opp_supply_set_opp() static Fixes the following sparse warning: drivers/opp/ti-opp-supply.c:276:5: warning: symbol 'ti_opp_supply_set_opp' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Rafael J. Wysocki --- drivers/opp/ti-opp-supply.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index 44dae3e51aac..370eff3acd8a 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -273,7 +273,7 @@ static int _opp_set_voltage(struct device *dev, * * Return: If successful, 0, else appropriate error value. */ -int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data) +static int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data) { struct dev_pm_opp_supply *old_supply_vdd = &data->old_opp.supplies[0]; struct dev_pm_opp_supply *old_supply_vbb = &data->old_opp.supplies[1]; -- cgit v1.2.3 From e7e83dd3ff1dd2f9e60213f6eedc7e5b08192062 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 26 Dec 2017 15:27:20 -0600 Subject: objtool: Fix Clang enum conversion warning Fix the following Clang enum conversion warning: arch/x86/decode.c:141:20: error: implicit conversion from enumeration type 'enum op_src_type' to different enumeration type 'enum op_dest_type' [-Werror,-Wenum-conversion] op->dest.type = OP_SRC_REG; ~ ^~~~~~~~~~ It just happened to work before because OP_SRC_REG and OP_DEST_REG have the same value. Signed-off-by: Lukas Bulwahn Signed-off-by: Josh Poimboeuf Reviewed-by: Nicholas Mc Guire Reviewed-by: Nick Desaulniers Cc: Jiri Slaby Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0") Link: http://lkml.kernel.org/r/b4156c5738bae781c392e7a3691aed4514ebbdf2.1514323568.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8acfc47af70e..540a209b78ab 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_STACK; op->src.type = OP_SRC_ADD; op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_SRC_REG; + op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } break; -- cgit v1.2.3 From 2332bd04199353b06bf35f14f972d518907f08e0 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Sat, 23 Dec 2017 12:53:52 +0800 Subject: cpufreq: imx6q: switch to Use clk_bulk_get() to refine clk operations Use clk_bulk_get() to simplify the driver's clocks handling. Acked-by: Viresh Kumar Signed-off-by: Dong Aisheng Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 125 ++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 69 deletions(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index d9b2c2de49c4..8bfb0775662b 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -25,15 +25,29 @@ static struct regulator *arm_reg; static struct regulator *pu_reg; static struct regulator *soc_reg; -static struct clk *arm_clk; -static struct clk *pll1_sys_clk; -static struct clk *pll1_sw_clk; -static struct clk *step_clk; -static struct clk *pll2_pfd2_396m_clk; - -/* clk used by i.MX6UL */ -static struct clk *pll2_bus_clk; -static struct clk *secondary_sel_clk; +enum IMX6_CPUFREQ_CLKS { + ARM, + PLL1_SYS, + STEP, + PLL1_SW, + PLL2_PFD2_396M, + /* MX6UL requires two more clks */ + PLL2_BUS, + SECONDARY_SEL, +}; +#define IMX6Q_CPUFREQ_CLK_NUM 5 +#define IMX6UL_CPUFREQ_CLK_NUM 7 + +static int num_clks; +static struct clk_bulk_data clks[] = { + { .id = "arm" }, + { .id = "pll1_sys" }, + { .id = "step" }, + { .id = "pll1_sw" }, + { .id = "pll2_pfd2_396m" }, + { .id = "pll2_bus" }, + { .id = "secondary_sel" }, +}; static struct device *cpu_dev; static bool free_opp; @@ -53,7 +67,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) new_freq = freq_table[index].frequency; freq_hz = new_freq * 1000; - old_freq = clk_get_rate(arm_clk) / 1000; + old_freq = clk_get_rate(clks[ARM].clk) / 1000; opp = dev_pm_opp_find_freq_ceil(cpu_dev, &freq_hz); if (IS_ERR(opp)) { @@ -112,29 +126,31 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) * voltage of 528MHz, so lower the CPU frequency to one * half before changing CPU frequency. */ - clk_set_rate(arm_clk, (old_freq >> 1) * 1000); - clk_set_parent(pll1_sw_clk, pll1_sys_clk); - if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) - clk_set_parent(secondary_sel_clk, pll2_bus_clk); + clk_set_rate(clks[ARM].clk, (old_freq >> 1) * 1000); + clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk); + if (freq_hz > clk_get_rate(clks[PLL2_PFD2_396M].clk)) + clk_set_parent(clks[SECONDARY_SEL].clk, + clks[PLL2_BUS].clk); else - clk_set_parent(secondary_sel_clk, pll2_pfd2_396m_clk); - clk_set_parent(step_clk, secondary_sel_clk); - clk_set_parent(pll1_sw_clk, step_clk); + clk_set_parent(clks[SECONDARY_SEL].clk, + clks[PLL2_PFD2_396M].clk); + clk_set_parent(clks[STEP].clk, clks[SECONDARY_SEL].clk); + clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk); } else { - clk_set_parent(step_clk, pll2_pfd2_396m_clk); - clk_set_parent(pll1_sw_clk, step_clk); - if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { - clk_set_rate(pll1_sys_clk, new_freq * 1000); - clk_set_parent(pll1_sw_clk, pll1_sys_clk); + clk_set_parent(clks[STEP].clk, clks[PLL2_PFD2_396M].clk); + clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk); + if (freq_hz > clk_get_rate(clks[PLL2_PFD2_396M].clk)) { + clk_set_rate(clks[PLL1_SYS].clk, new_freq * 1000); + clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk); } else { /* pll1_sys needs to be enabled for divider rate change to work. */ pll1_sys_temp_enabled = true; - clk_prepare_enable(pll1_sys_clk); + clk_prepare_enable(clks[PLL1_SYS].clk); } } /* Ensure the arm clock divider is what we expect */ - ret = clk_set_rate(arm_clk, new_freq * 1000); + ret = clk_set_rate(clks[ARM].clk, new_freq * 1000); if (ret) { dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); regulator_set_voltage_tol(arm_reg, volt_old, 0); @@ -143,7 +159,7 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* PLL1 is only needed until after ARM-PODF is set. */ if (pll1_sys_temp_enabled) - clk_disable_unprepare(pll1_sys_clk); + clk_disable_unprepare(clks[PLL1_SYS].clk); /* scaling down? scale voltage after frequency */ if (new_freq < old_freq) { @@ -174,7 +190,7 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy) { int ret; - policy->clk = arm_clk; + policy->clk = clks[ARM].clk; ret = cpufreq_generic_init(policy, freq_table, transition_latency); policy->suspend_freq = policy->max; @@ -266,28 +282,15 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) return -ENOENT; } - arm_clk = clk_get(cpu_dev, "arm"); - pll1_sys_clk = clk_get(cpu_dev, "pll1_sys"); - pll1_sw_clk = clk_get(cpu_dev, "pll1_sw"); - step_clk = clk_get(cpu_dev, "step"); - pll2_pfd2_396m_clk = clk_get(cpu_dev, "pll2_pfd2_396m"); - if (IS_ERR(arm_clk) || IS_ERR(pll1_sys_clk) || IS_ERR(pll1_sw_clk) || - IS_ERR(step_clk) || IS_ERR(pll2_pfd2_396m_clk)) { - dev_err(cpu_dev, "failed to get clocks\n"); - ret = -ENOENT; - goto put_clk; - } - if (of_machine_is_compatible("fsl,imx6ul") || - of_machine_is_compatible("fsl,imx6ull")) { - pll2_bus_clk = clk_get(cpu_dev, "pll2_bus"); - secondary_sel_clk = clk_get(cpu_dev, "secondary_sel"); - if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) { - dev_err(cpu_dev, "failed to get clocks specific to imx6ul\n"); - ret = -ENOENT; - goto put_clk; - } - } + of_machine_is_compatible("fsl,imx6ull")) + num_clks = IMX6UL_CPUFREQ_CLK_NUM; + else + num_clks = IMX6Q_CPUFREQ_CLK_NUM; + + ret = clk_bulk_get(cpu_dev, num_clks, clks); + if (ret) + goto put_node; arm_reg = regulator_get(cpu_dev, "arm"); pu_reg = regulator_get_optional(cpu_dev, "pu"); @@ -424,22 +427,11 @@ put_reg: regulator_put(pu_reg); if (!IS_ERR(soc_reg)) regulator_put(soc_reg); -put_clk: - if (!IS_ERR(arm_clk)) - clk_put(arm_clk); - if (!IS_ERR(pll1_sys_clk)) - clk_put(pll1_sys_clk); - if (!IS_ERR(pll1_sw_clk)) - clk_put(pll1_sw_clk); - if (!IS_ERR(step_clk)) - clk_put(step_clk); - if (!IS_ERR(pll2_pfd2_396m_clk)) - clk_put(pll2_pfd2_396m_clk); - if (!IS_ERR(pll2_bus_clk)) - clk_put(pll2_bus_clk); - if (!IS_ERR(secondary_sel_clk)) - clk_put(secondary_sel_clk); + + clk_bulk_put(num_clks, clks); +put_node: of_node_put(np); + return ret; } @@ -453,13 +445,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) if (!IS_ERR(pu_reg)) regulator_put(pu_reg); regulator_put(soc_reg); - clk_put(arm_clk); - clk_put(pll1_sys_clk); - clk_put(pll1_sw_clk); - clk_put(step_clk); - clk_put(pll2_pfd2_396m_clk); - clk_put(pll2_bus_clk); - clk_put(secondary_sel_clk); + + clk_bulk_put(num_clks, clks); return 0; } -- cgit v1.2.3 From 517d7c79bdb39864e617960504bdc1aa560c75c6 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Thu, 28 Dec 2017 12:03:06 +0100 Subject: tipc: fix hanging poll() for stream sockets In commit 42b531de17d2f6 ("tipc: Fix missing connection request handling"), we replaced unconditional wakeup() with condtional wakeup for clients with flags POLLIN | POLLRDNORM | POLLRDBAND. This breaks the applications which do a connect followed by poll with POLLOUT flag. These applications are not woken when the connection is ESTABLISHED and hence sleep forever. In this commit, we fix it by including the POLLOUT event for sockets in TIPC_CONNECTING state. Fixes: 42b531de17d2f6 ("tipc: Fix missing connection request handling") Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41127d0b925e..3b4084480377 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; -- cgit v1.2.3 From f72c4ac695573699dde5b71da1c3b9ef80440616 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 28 Dec 2017 12:38:13 -0500 Subject: skbuff: in skb_copy_ubufs unclone before releasing zerocopy skb_copy_ubufs must unclone before it is safe to modify its skb_shared_info with skb_zcopy_clear. Commit b90ddd568792 ("skbuff: skb_copy_ubufs must release uarg even without user frags") ensures that all skbs release their zerocopy state, even those without frags. But I forgot an edge case where such an skb arrives that is cloned. The stack does not build such packets. Vhost/tun skbs have their frags orphaned before cloning. TCP skbs only attach zerocopy state when a frag is added. But if TCP packets can be trimmed or linearized, this might occur. Tracing the code I found no instance so far (e.g., skb_linearize ends up calling skb_zcopy_clear if !skb->data_len). Still, it is non-obvious that no path exists. And it is fragile to rely on this. Fixes: b90ddd568792 ("skbuff: skb_copy_ubufs must release uarg even without user frags") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a3cb0be4c6f3..08f574081315 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) int i, new_frags; u32 d_off; - if (!num_frags) - goto release; - if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; + if (!num_frags) + goto release; + new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); -- cgit v1.2.3 From 602f7a2714a3b3aa4bec82ab0a86a9f5a2c4aa61 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 28 Dec 2017 11:00:43 -0800 Subject: sock: Add sock_owned_by_user_nocheck This allows checking socket lock ownership with producing lockdep warnings. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 9155da422692..7a7b14e9628a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk) return sk->sk_lock.owned; } +static inline bool sock_owned_by_user_nocheck(const struct sock *sk) +{ + return sk->sk_lock.owned; +} + /* no reclassification while locks are held */ static inline bool sock_allow_reclassification(const struct sock *csk) { -- cgit v1.2.3 From d66fa9ec53c43bba9fa973c16419f6061b7cc3ea Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 28 Dec 2017 11:00:44 -0800 Subject: strparser: Call sock_owned_by_user_nocheck strparser wants to check socket ownership without producing any warnings. As indicated by the comment in the code, it is permissible for owned_by_user to return true. Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages") Reported-by: syzbot Reported-and-tested-by: Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index c5fda15ba319..1fdab5c4eda8 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp) * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ - if (sock_owned_by_user(strp->sk)) { + if (sock_owned_by_user_nocheck(strp->sk)) { queue_work(strp_wq, &strp->work); return; } -- cgit v1.2.3 From 955b1b5a00ba694159a7d3763412597f707c294d Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 20 Dec 2017 16:30:50 +0900 Subject: nvme-pci: move use_sgl initialization to nvme_init_iod() A flag "use_sgl" of "struct nvme_iod" has been used in nvme_init_iod() without being set to any value. It seems like "use_sgl" has been set in either nvme_pci_setup_prps() or nvme_pci_setup_sgls() which occur later than nvme_init_iod(). Make "iod->use_sgl" being set in a proper place, nvme_init_iod(). Also move nvme_pci_use_sgls() up above nvme_init_iod() to make it possible to be called by nvme_init_iod(). Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5800c3c9082..d53550e612bc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -448,12 +448,31 @@ static void **nvme_pci_iod_list(struct request *req) return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); } +static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + unsigned int avg_seg_size; + + avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), + blk_rq_nr_phys_segments(req)); + + if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) + return false; + if (!iod->nvmeq->qid) + return false; + if (!sgl_threshold || avg_seg_size < sgl_threshold) + return false; + return true; +} + static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); int nseg = blk_rq_nr_phys_segments(rq); unsigned int size = blk_rq_payload_bytes(rq); + iod->use_sgl = nvme_pci_use_sgls(dev, rq); + if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, iod->use_sgl); @@ -604,8 +623,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, dma_addr_t prp_dma; int nprps, i; - iod->use_sgl = false; - length -= (page_size - offset); if (length <= 0) { iod->first_dma = 0; @@ -715,8 +732,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, int entries = iod->nents, i = 0; dma_addr_t sgl_dma; - iod->use_sgl = true; - /* setting the transfer type as SGL */ cmd->flags = NVME_CMD_SGL_METABUF; @@ -770,23 +785,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_OK; } -static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - unsigned int avg_seg_size; - - avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), - blk_rq_nr_phys_segments(req)); - - if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) - return false; - if (!iod->nvmeq->qid) - return false; - if (!sgl_threshold || avg_seg_size < sgl_threshold) - return false; - return true; -} - static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { @@ -806,7 +804,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_ATTR_NO_WARN)) goto out; - if (nvme_pci_use_sgls(dev, req)) + if (iod->use_sgl) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); -- cgit v1.2.3 From cee160fd34b459ace029653436319557a643795a Mon Sep 17 00:00:00 2001 From: Jeff Lien Date: Tue, 19 Dec 2017 13:24:15 -0600 Subject: nvme: fix sector units when going between formats If you format a device with a 4k sector size back to 512 bytes, the queue limit values for physical block size and minimum IO size were not getting updated; only the logical block size was being updated. This patch adds code to update the physical block and IO minimum sizes. Signed-off-by: Jeff Lien Reviewed-by: Martin K. Petersen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1e46e60b8f10..961d6a4af19c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1335,6 +1335,7 @@ static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); + unsigned short bs = 1 << ns->lba_shift; unsigned stream_alignment = 0; if (ns->ctrl->nr_streams && ns->sws && ns->sgs) @@ -1343,7 +1344,10 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); - blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift); + blk_queue_logical_block_size(disk->queue, bs); + blk_queue_physical_block_size(disk->queue, bs); + blk_queue_io_min(disk->queue, bs); + if (ns->ms && !ns->ext && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); -- cgit v1.2.3 From d5bf4b7f437c250821d40c3e32158729e6b484ce Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 21 Dec 2017 14:54:15 +0200 Subject: nvme-rdma: fix concurrent reset and reconnect Now ctrl state machine allows to transition from RESETTING to RECONNECTING. In nvme-rdma when we receive a rdma cm DISONNECTED event, we trigger nvme_rdma_error_recovery. This happens also when we execute a controller reset, issue a cm diconnect request and receive a cm disconnect reply, as a result, the reset work and the error recovery work can run concurrently. Until now the state machine prevented from the error recovery work from running as a result of a controller reset (RESETTING -> RECONNECTING was not allowed). To fix this, we adopt the FC state machine approach, we always transition from LIVE to RESETTING and only then to RECONNECTING. We do this both for the error recovery work and the controller reset work: 1. transition to RESETTING 2. teardown the controller association 3. transition to RECONNECTING This will restore the protection against reset work and error recovery work from concurrently running together. Fixes: 3cec7f9de448 ("nvme: allow controller RESETTING to RECONNECTING transition") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 37af56596be6..2a0bba7f50cf 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -974,12 +974,18 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_start_queues(&ctrl->ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + /* state change failure should never happen */ + WARN_ON_ONCE(1); + return; + } + nvme_rdma_reconnect_or_remove(ctrl); } static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) { - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) return; queue_work(nvme_wq, &ctrl->err_work); @@ -1753,6 +1759,12 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + /* state change failure should never happen */ + WARN_ON_ONCE(1); + return; + } + ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) goto out_fail; -- cgit v1.2.3 From 479a322fb729d657d34706ccf8dd12916f36628f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 21 Dec 2017 15:07:27 +0200 Subject: nvme-mpath: fix last path removal during traffic In case our last path is removed during traffic, we can end up requeueing the bio(s) but never schedule the actual requeue work as upper layers still have open handles on the mpath device node. Fix this by scheduling requeue work if the namespace being removed is the last path in the ns_head path list. Fixes: 32acab3181c7 ("nvme: implement multipath access to nvme subsystems") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/nvme.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 961d6a4af19c..839650e0926a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2991,6 +2991,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->namespaces_mutex); synchronize_srcu(&ns->head->srcu); + nvme_mpath_check_last_path(ns); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ea1aa5283e8e..a00eabd06427 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -417,6 +417,15 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) rcu_assign_pointer(head->current_path, NULL); } struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); + +static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +{ + struct nvme_ns_head *head = ns->head; + + if (head->disk && list_empty(&head->list)) + kblockd_schedule_work(&head->requeue_work); +} + #else static inline void nvme_failover_req(struct request *req) { @@ -448,6 +457,9 @@ static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns) static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { } +static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +{ +} #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM -- cgit v1.2.3 From 254beb84faccbe2f4eda0b51924857bdfb679969 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 21 Dec 2017 14:15:47 -0800 Subject: nvme-fcloop: avoid possible uninitialized variable warning The kbuild test robot send mail of a potential use of an uninitialized variable - "tport" in fcloop_delete_targetport() which then calls __targetport_unreg() which uses the variable. It will never be the case it is uninitialized as the call to __targetport_unreg() only occurs if there is a valid nport pointer. And at the time the nport pointer is assigned, the tport variable is set. Remove the warning by assigning a NULL value initially. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 7b75d9de55ab..6a018a0bd6ce 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1085,7 +1085,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct fcloop_nport *nport = NULL, *tmpport; - struct fcloop_tport *tport; + struct fcloop_tport *tport = NULL; u64 nodename, portname; unsigned long flags; int ret; -- cgit v1.2.3 From a31e58e129f73ab5b04016330b13ed51fde7a961 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 Dec 2017 11:33:33 +0100 Subject: x86/apic: Switch all APICs to Fixed delivery mode Some of the APIC incarnations are operating in lowest priority delivery mode. This worked as long as the vector management code allocated the same vector on all possible CPUs for each interrupt. Lowest priority delivery mode does not necessarily respect the affinity setting and may redirect to some other online CPU. This was documented somewhere in the old code and the conversion to single target delivery missed to update the delivery mode of the affected APIC drivers which results in spurious interrupts on some of the affected CPU/Chipset combinations. Switch the APIC drivers over to Fixed delivery mode and remove all leftovers of lowest priority delivery mode. Switching to Fixed delivery mode is not a problem on these CPUs because the kernel already uses Fixed delivery mode for IPIs. The reason for this is that th SDM explicitely forbids lowest prio mode for IPIs. The reason is obvious: If the irq routing does not honor destination targets in lowest prio mode then an IPI targeted at CPU1 might end up on CPU0, which would be a fatal problem in many cases. As a consequence of this change, the apic::irq_delivery_mode field is now pointless, but this needs to be cleaned up in a separate patch. Fixes: fdba46ffb4c2 ("x86/apic: Get rid of multi CPU affinity") Reported-by: vcaputo@pengaru.com Signed-off-by: Thomas Gleixner Tested-by: vcaputo@pengaru.com Cc: Pavel Machek Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712281140440.1688@nanos --- arch/x86/kernel/apic/apic_flat_64.c | 2 +- arch/x86/kernel/apic/apic_noop.c | 2 +- arch/x86/kernel/apic/msi.c | 8 ++------ arch/x86/kernel/apic/probe_32.c | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- drivers/pci/host/pci-hyperv.c | 8 ++------ 6 files changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b64..25a87028cb3f 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7b659c4480c9..5078b5ce63a7 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9b18be764422..ce503c99f5c4 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL : MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU : - MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED : - MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fa22017de806..02e8acb134f8 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 622f13ca8a94..8b04234e010b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/drivers/pci/host/pci-hyperv.c b/drivers/pci/host/pci-hyperv.c index 0fe3ea164ee5..e7d94473aedd 100644 --- a/drivers/pci/host/pci-hyperv.c +++ b/drivers/pci/host/pci-hyperv.c @@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in @@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2( int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = 1; - int_pkt->int_desc.delivery_mode = - (apic->irq_delivery_mode == dest_LowestPrio) ? - dest_LowestPrio : dest_Fixed; + int_pkt->int_desc.delivery_mode = dest_Fixed; /* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten -- cgit v1.2.3 From 8880c13734af33635118a1e9567dadc7f9ddb7a8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:29:15 +0100 Subject: gpio: brcmstb: Make really use of the new lockdep class The recent extension of irq_set_lockdep_class() with a second argument added the new lockdep class to the mrcmstb driver, but used the already existing lockdep class as second argument, which leaves the new lockdep class defined but unused. Use the new lockdep class as that's what the change intended to do. Fixes: 39c3fd58952d ("kernel/irq: Extend lockdep class for request mutex") Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Andrew Lunn Cc: linus.walleij@linaro.org --- drivers/gpio/gpio-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 5b24801bffef..bb4f8cf18bd9 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -348,7 +348,7 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, if (ret < 0) return ret; irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, - &brcmstb_gpio_irq_lock_class); + &brcmstb_gpio_irq_request_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; -- cgit v1.2.3 From da5dd9e854d2edd6b02ebfe28583052f922104da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 10:42:10 +0100 Subject: genirq/msi: Handle reactivation only on success When analyzing the fallout of the x86 vector allocation rework it turned out that the error handling in msi_domain_alloc_irqs() is broken. If MSI_FLAG_MUST_REACTIVATE is set for a MSI domain then it clears the activation flag for a successfully initialized msi descriptor. If a subsequent initialization fails then the error handling code path does not deactivate the interrupt because the activation flag got cleared. Move the clearing of the activation flag outside of the initialization loop so that an eventual failure can be cleaned up correctly. Fixes: 22d0b12f3560 ("genirq/irqdomain: Add force reactivation flag to irq domains") Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- kernel/irq/msi.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index edb987b2c58d..9ba954331171 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -339,6 +339,13 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, return ret; } +static bool msi_check_reservation_mode(struct msi_domain_info *info) +{ + if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) + return false; + return true; +} + /** * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain * @domain: The domain to allocate from @@ -353,9 +360,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - msi_alloc_info_t arg; + struct irq_data *irq_data; struct msi_desc *desc; + msi_alloc_info_t arg; int i, ret, virq; + bool can_reserve; ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); if (ret) @@ -385,6 +394,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, if (ops->msi_finish) ops->msi_finish(&arg, 0); + can_reserve = msi_check_reservation_mode(info); + for_each_msi_entry(desc, dev) { virq = desc->irq; if (desc->nvec_used == 1) @@ -397,15 +408,23 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, * the MSI entries before the PCI layer enables MSI in the * card. Otherwise the card latches a random msi message. */ - if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { - struct irq_data *irq_data; + if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY)) + continue; + irq_data = irq_domain_get_irq_data(domain, desc->irq); + ret = irq_domain_activate_irq(irq_data, true); + if (ret) + goto cleanup; + } + + /* + * If these interrupts use reservation mode, clear the activated bit + * so request_irq() will assign the final vector. + */ + if (can_reserve) { + for_each_msi_entry(desc, dev) { irq_data = irq_domain_get_irq_data(domain, desc->irq); - ret = irq_domain_activate_irq(irq_data, true); - if (ret) - goto cleanup; - if (info->flags & MSI_FLAG_MUST_REACTIVATE) - irqd_clr_activated(irq_data); + irqd_clr_activated(irq_data); } } return 0; -- cgit v1.2.3 From 69790ba92b8d67eaee5e50b30a5b696d40664caf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:44:34 +0100 Subject: genirq: Introduce IRQD_CAN_RESERVE flag Add a new flag to mark interrupts which can use reservation mode. This is going to be used in subsequent patches to disable reservation mode for a certain class of MSI devices. Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- include/linux/irq.h | 17 +++++++++++++++++ kernel/irq/debugfs.c | 1 + 2 files changed, 18 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index e140f69163b6..a0231e96a578 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,6 +212,7 @@ struct irq_data { * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +234,7 @@ enum { IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } +static inline void irqd_set_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_CAN_RESERVE; +} + +static inline void irqd_clr_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; +} + +static inline bool irqd_can_reserve(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_CAN_RESERVE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 7f608ac39653..acfaaef8672a 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), + BIT_MASK_DESCR(IRQD_CAN_RESERVE), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), -- cgit v1.2.3 From 945f50a591783ac6e9bd59694f34d1ba03b778a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:57:00 +0100 Subject: x86/vector: Use IRQD_CAN_RESERVE flag Set the new CAN_RESERVE flag when the initial reservation for an interrupt happens. The flag is used in a subsequent patch to disable reservation mode for a certain class of MSI devices. Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- arch/x86/kernel/apic/vector.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 750449152b04..1e969dba0476 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd) irq_matrix_reserve(vector_matrix); apicd->can_reserve = true; apicd->has_reserved = true; + irqd_set_can_reserve(irqd); trace_vector_reserve(irqd->irq, 0); vector_assign_managed_shutdown(irqd); } @@ -478,6 +479,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, } else { /* Release the vector */ apicd->can_reserve = true; + irqd_set_can_reserve(irqd); clear_irq_vector(irqd); realloc = true; } -- cgit v1.2.3 From 702cb0a02813299d6911b775c637906ae21b737d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:59:06 +0100 Subject: genirq/irqdomain: Rename early argument of irq_domain_activate_irq() The 'early' argument of irq_domain_activate_irq() is actually used to denote reservation mode. To avoid confusion, rename it before abuse happens. No functional change. Fixes: 72491643469a ("genirq/irqdomain: Update irq_domain_ops.activate() signature") Signed-off-by: Thomas Gleixner Cc: Alexandru Chirvasitu Cc: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- arch/x86/include/asm/irqdomain.h | 2 +- arch/x86/include/asm/trace/irq_vectors.h | 16 ++++++++-------- arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/kernel/apic/vector.c | 6 +++--- arch/x86/platform/uv/uv_irq.c | 2 +- drivers/gpio/gpio-xgene-sb.c | 2 +- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/intel_irq_remapping.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 4 ++-- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- include/linux/irqdomain.h | 2 +- kernel/irq/internals.h | 2 +- kernel/irq/irqdomain.c | 13 +++++++------ 13 files changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7..c066ffae222b 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); extern int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early); + struct irq_data *irq_data, bool reserve); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0..22647a642e98 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed, DECLARE_EVENT_CLASS(vector_activate, TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), + bool reserve), - TP_ARGS(irq, is_managed, can_reserve, early), + TP_ARGS(irq, is_managed, can_reserve, reserve), TP_STRUCT__entry( __field( unsigned int, irq ) __field( bool, is_managed ) __field( bool, can_reserve ) - __field( bool, early ) + __field( bool, reserve ) ), TP_fast_assign( __entry->irq = irq; __entry->is_managed = is_managed; __entry->can_reserve = can_reserve; - __entry->early = early; + __entry->reserve = reserve; ), - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) + __entry->reserve) ); #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ DEFINE_EVENT_FN(vector_activate, name, \ TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc5242..8a7963421460 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, } int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { unsigned long flags; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1e969dba0476..52c85c8147e9 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -399,21 +399,21 @@ static int activate_managed(struct irq_data *irqd) } static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) + bool reserve) { struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; int ret = 0; trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); + apicd->can_reserve, reserve); /* Nothing to do for fixed assigned vectors */ if (!apicd->can_reserve && !apicd->is_managed) return 0; raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) + if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 5f6fd860820a..e4cb9f4cde8a 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * on the specified blade to allow the sending of MSIs to the specified CPU. */ static int uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); return 0; diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 2313af82fad3..acd59113e08b 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) static int xgene_gpio_sb_domain_activate(struct irq_domain *d, struct irq_data *irq_data, - bool early) + bool reserve) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..97baf88d9505 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu, struct irq_cfg *cfg); static int irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct amd_ir_data *data = irq_data->chip_data; struct irq_2_irte *irte_info = &data->irq_2_irte; diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 76a193c7fcfc..66f69af2c219 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain, } static int intel_irq_remapping_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { intel_ir_reconfigure_irte(irq_data, true); return 0; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 4039e64cd342..06f025fd5726 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, } static int its_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); @@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq } static int its_vpe_irq_domain_activate(struct irq_domain *domain, - struct irq_data *d, bool early) + struct irq_data *d, bool reserve) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_node *its; diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index a276c61be217..e62ab087bfd8 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d, } static int stm32_gpio_domain_activate(struct irq_domain *d, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { struct stm32_gpio_bank *bank = d->host_data; struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a34355d19546..48c7e86bb556 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -113,7 +113,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 07d08ca701ec..ab19371eab9b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) #endif /* !CONFIG_GENERIC_PENDING_IRQ */ #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) -static inline int irq_domain_activate_irq(struct irq_data *data, bool early) +static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve) { irqd_set_activated(data); return 0; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4f4f60015e8a..62068ad46930 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data) } } -static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) +static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) { int ret = 0; @@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) if (irqd->parent_data) ret = __irq_domain_activate_irq(irqd->parent_data, - early); + reserve); if (!ret && domain->ops->activate) { - ret = domain->ops->activate(domain, irqd, early); + ret = domain->ops->activate(domain, irqd, reserve); /* Rollback in case of error */ if (ret && irqd->parent_data) __irq_domain_deactivate_irq(irqd->parent_data); @@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt - * @irq_data: outermost irq_data associated with interrupt + * @irq_data: Outermost irq_data associated with interrupt + * @reserve: If set only reserve an interrupt vector instead of assigning one * * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ -int irq_domain_activate_irq(struct irq_data *irq_data, bool early) +int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) { int ret = 0; if (!irqd_is_activated(irq_data)) - ret = __irq_domain_activate_irq(irq_data, early); + ret = __irq_domain_activate_irq(irq_data, reserve); if (!ret) irqd_set_activated(irq_data); return ret; -- cgit v1.2.3 From bc976233a872c0f20f018fb1e89264a541584e25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 10:47:22 +0100 Subject: genirq/msi, x86/vector: Prevent reservation mode for non maskable MSI The new reservation mode for interrupts assigns a dummy vector when the interrupt is allocated and assigns a real vector when the interrupt is requested. The reservation mode prevents vector pressure when devices with a large amount of queues/interrupts are initialized, but only a minimal subset of those queues/interrupts is actually used. This mode has an issue with MSI interrupts which cannot be masked. If the driver is not careful or the hardware emits an interrupt before the device irq is requestd by the driver then the interrupt ends up on the dummy vector as a spurious interrupt which can cause malfunction of the device or in the worst case a lockup of the machine. Change the logic for the reservation mode so that the early activation of MSI interrupts checks whether: - the device is a PCI/MSI device - the reservation mode of the underlying irqdomain is activated - PCI/MSI masking is globally enabled - the PCI/MSI device uses either MSI-X, which supports masking, or MSI with the maskbit supported. If one of those conditions is false, then clear the reservation mode flag in the irq data of the interrupt and invoke irq_domain_activate_irq() with the reserve argument cleared. In the x86 vector code, clear the can_reserve flag in the vector allocation data so a subsequent free_irq() won't create the same situation again. The interrupt stays assigned to a real vector until pci_disable_msi() is invoked and all allocations are undone. Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode") Reported-by: Alexandru Chirvasitu Reported-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712291406420.1899@nanos Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712291409460.1899@nanos --- arch/x86/kernel/apic/vector.c | 12 +++++++++++- kernel/irq/msi.c | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 52c85c8147e9..f8b03bb8e725 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -369,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd) int ret; ret = assign_irq_vector_any_locked(irqd); - if (!ret) + if (!ret) { apicd->has_reserved = false; + /* + * Core might have disabled reservation mode after + * allocating the irq descriptor. Ideally this should + * happen before allocation time, but that would require + * completely convoluted ways of transporting that + * information. + */ + if (!irqd_can_reserve(irqd)) + apicd->can_reserve = false; + } return ret; } diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 9ba954331171..2f3c4f5382cc 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -339,11 +339,38 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, return ret; } -static bool msi_check_reservation_mode(struct msi_domain_info *info) +/* + * Carefully check whether the device can use reservation mode. If + * reservation mode is enabled then the early activation will assign a + * dummy vector to the device. If the PCI/MSI device does not support + * masking of the entry then this can result in spurious interrupts when + * the device driver is not absolutely careful. But even then a malfunction + * of the hardware could result in a spurious interrupt on the dummy vector + * and render the device unusable. If the entry can be masked then the core + * logic will prevent the spurious interrupt and reservation mode can be + * used. For now reservation mode is restricted to PCI/MSI. + */ +static bool msi_check_reservation_mode(struct irq_domain *domain, + struct msi_domain_info *info, + struct device *dev) { + struct msi_desc *desc; + + if (domain->bus_token != DOMAIN_BUS_PCI_MSI) + return false; + if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) return false; - return true; + + if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask) + return false; + + /* + * Checking the first MSI descriptor is sufficient. MSIX supports + * masking and MSI does so when the maskbit is set. + */ + desc = first_msi_entry(dev); + return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; } /** @@ -394,7 +421,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, if (ops->msi_finish) ops->msi_finish(&arg, 0); - can_reserve = msi_check_reservation_mode(info); + can_reserve = msi_check_reservation_mode(domain, info, dev); for_each_msi_entry(desc, dev) { virq = desc->irq; @@ -412,7 +439,9 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, continue; irq_data = irq_domain_get_irq_data(domain, desc->irq); - ret = irq_domain_activate_irq(irq_data, true); + if (!can_reserve) + irqd_clr_can_reserve(irq_data); + ret = irq_domain_activate_irq(irq_data, can_reserve); if (ret) goto cleanup; } -- cgit v1.2.3 From ced6d5c11d3e7b342f1a80f908e6756ebd4b8ddd Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Fri, 22 Dec 2017 15:51:12 +0100 Subject: timers: Use deferrable base independent of base::nohz_active During boot and before base::nohz_active is set in the timer bases, deferrable timers are enqueued into the standard timer base. This works correctly as long as base::nohz_active is false. Once it base::nohz_active is set and a timer which was enqueued before that is accessed the lock selector code choses the lock of the deferred base. This causes unlocked access to the standard base and in case the timer is removed it does not clear the pending flag in the standard base bitmap which causes get_next_timer_interrupt() to return bogus values. To prevent that, the deferrable timers must be enqueued in the deferrable base, even when base::nohz_active is not set. Those deferrable timers also need to be expired unconditional. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Signed-off-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: stable@vger.kernel.org Cc: rt@linutronix.de Cc: Paul McKenney Link: https://lkml.kernel.org/r/20171222145337.633328378@linutronix.de --- kernel/time/timer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ffebcf878fba..19a9c3da7698 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); return base; } @@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); /* - * If the timer is deferrable and nohz is active then we need to use - * the deferrable base. + * If the timer is deferrable and NO_HZ_COMMON is set then we need + * to use the deferrable base. */ - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && - (tflags & TIMER_DEFERRABLE)) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) base = this_cpu_ptr(&timer_bases[BASE_DEF]); return base; } @@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) base->must_forward_clk = false; __run_timers(base); - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); } -- cgit v1.2.3 From 26456f87aca7157c057de65c9414b37f1ab881d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Dec 2017 21:37:25 +0100 Subject: timers: Reinitialize per cpu bases on hotplug The timer wheel bases are not (re)initialized on CPU hotplug. That leaves them with a potentially stale clk and next_expiry valuem, which can cause trouble then the CPU is plugged. Add a prepare callback which forwards the clock, sets next_expiry to far in the future and reset the control flags to a known state. Set base->must_forward_clk so the first timer which is queued will try to forward the clock to current jiffies. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: Anna-Maria Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272152200.2431@nanos --- include/linux/cpuhotplug.h | 2 +- include/linux/timer.h | 4 +++- kernel/cpu.c | 4 ++-- kernel/time/timer.c | 15 +++++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..1a32e558eb11 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, diff --git a/include/linux/timer.h b/include/linux/timer.h index 04af640ea95b..2448f9cc48a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 41376c3ac93b..97858477e586 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = { * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ - [CPUHP_TIMERS_DEAD] = { + [CPUHP_TIMERS_PREPARE] = { .name = "timers:dead", - .startup.single = NULL, + .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, /* Kicks the plugged cpu into life */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 19a9c3da7698..6be576e02209 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1853,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h } } +int timers_prepare_cpu(unsigned int cpu) +{ + struct timer_base *base; + int b; + + for (b = 0; b < NR_BASES; b++) { + base = per_cpu_ptr(&timer_bases[b], cpu); + base->clk = jiffies; + base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->is_idle = false; + base->must_forward_clk = true; + } + return 0; +} + int timers_dead_cpu(unsigned int cpu) { struct timer_base *old_base; -- cgit v1.2.3 From 5d62c183f9e9df1deeea0906d099a94e8a43047a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:13 +0100 Subject: nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick() The conditions in irq_exit() to invoke tick_nohz_irq_exit() which subsequently invokes tick_nohz_stop_sched_tick() are: if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) If need_resched() is not set, but a timer softirq is pending then this is an indication that the softirq code punted and delegated the execution to softirqd. need_resched() is not true because the current interrupted task takes precedence over softirqd. Invoking tick_nohz_irq_exit() in this case can cause an endless loop of timer interrupts because the timer wheel contains an expired timer, but softirqs are not yet executed. So it returns an immediate expiry request, which causes the timer to fire immediately again. Lather, rinse and repeat.... Prevent that by adding a check for a pending timer soft interrupt to the conditions in tick_nohz_stop_sched_tick() which avoid calling get_next_timer_interrupt(). That keeps the tick sched timer on the tick and prevents a repetitive programming of an already expired timer. Reported-by: Sebastian Siewior Signed-off-by: Thomas Gleixner Acked-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul McKenney Cc: Anna-Maria Gleixner Cc: Sebastian Siewior Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272156050.2431@nanos --- kernel/time/tick-sched.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 77555faf6fbc..f7cc7abfcf25 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) ts->next_tick = 0; } +static inline bool local_timer_softirq_pending(void) +{ + return local_softirq_pending() & TIMER_SOFTIRQ; +} + static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now, int cpu) { @@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); ts->last_jiffies = basejiff; - if (rcu_needs_cpu(basemono, &next_rcu) || - arch_needs_cpu() || irq_work_needs_cpu()) { + /* + * Keep the periodic tick, when RCU, architecture or irq_work + * requests it. + * Aside of that check whether the local timer softirq is + * pending. If so its a bad idea to call get_next_timer_interrupt() + * because there is an already expired timer, so it will request + * immeditate expiry, which rearms the hardware timer with a + * minimal delta which brings us back to this place + * immediately. Lather, rinse and repeat... + */ + if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { /* -- cgit v1.2.3 From fd45bb77ad682be728d1002431d77b8c73342836 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:14 +0100 Subject: timers: Invoke timer_start_debug() where it makes sense The timer start debug function is called before the proper timer base is set. As a consequence the trace data contains the stale CPU and flags values. Call the debug function after setting the new base and flags. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: stable@vger.kernel.org Cc: rt@linutronix.de Cc: Paul McKenney Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/20171222145337.792907137@linutronix.de --- kernel/time/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 6be576e02209..89a9e1b4264a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1007,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; - debug_activate(timer, expires); - new_base = get_target_base(base, timer->flags); if (base != new_base) { @@ -1032,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option } } + debug_activate(timer, expires); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance -- cgit v1.2.3 From 9f4533cd7334235cd4c9b9fb1b0b8791e2ba01a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Dec 2017 15:51:15 +0100 Subject: timerqueue: Document return values of timerqueue_add/del() The return values of timerqueue_add/del() are not documented in the kernel doc comment. Add proper documentation. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: rt@linutronix.de Cc: Paul McKenney Cc: Anna-Maria Gleixner Link: https://lkml.kernel.org/r/20171222145337.872681338@linutronix.de --- lib/timerqueue.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/timerqueue.c b/lib/timerqueue.c index 4a720ed4fdaf..0d54bcbc8170 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -33,8 +33,9 @@ * @head: head of timerqueue * @node: timer node to be added * - * Adds the timer node to the timerqueue, sorted by the - * node's expires value. + * Adds the timer node to the timerqueue, sorted by the node's expires + * value. Returns true if the newly added timer is the first expiring timer in + * the queue. */ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { @@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add); * @head: head of timerqueue * @node: timer node to be removed * - * Removes the timer node from the timerqueue. + * Removes the timer node from the timerqueue. Returns true if the queue is + * not empty after the remove. */ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) { -- cgit v1.2.3 From 257a4b018d1b514a1cc738e3ca11b566d8f3a3d8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 26 Dec 2017 17:34:44 +1100 Subject: xfrm: Forbid state updates from changing encap type Currently we allow state updates to competely replace the contents of x->encap. This is bad because on the user side ESP only sets up header lengths depending on encap_type once when the state is first created. This could result in the header lengths getting out of sync with the actual state configuration. In practice key managers will never do a state update to change the encapsulation type. Only the port numbers need to be changed as the peer NAT entry is updated. Therefore this patch adds a check in xfrm_state_update to forbid any changes to the encap_type. Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 500b3391f474..1e80f68e2266 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1534,8 +1534,12 @@ out: err = -EINVAL; spin_lock_bh(&x1->lock); if (likely(x1->km.state == XFRM_STATE_VALID)) { - if (x->encap && x1->encap) + if (x->encap && x1->encap && + x->encap->encap_type == x1->encap->encap_type) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + else if (x->encap || x1->encap) + goto fail; + if (x->coaddr && x1->coaddr) { memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); } @@ -1552,6 +1556,8 @@ out: x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); } + +fail: spin_unlock_bh(&x1->lock); xfrm_state_put(x1); -- cgit v1.2.3 From 862591bf4f519d1b8d859af720fafeaebdd0162a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 27 Dec 2017 23:25:45 +0100 Subject: xfrm: skip policies marked as dead while rehashing syzkaller triggered following KASAN splat: BUG: KASAN: slab-out-of-bounds in xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 read of size 2 at addr ffff8801c8e92fe4 by task kworker/1:1/23 [..] Workqueue: events xfrm_hash_rebuild [..] __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:428 xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618 process_one_work+0xbbf/0x1b10 kernel/workqueue.c:2112 worker_thread+0x223/0x1990 kernel/workqueue.c:2246 [..] The reproducer triggers: 1016 if (error) { 1017 list_move_tail(&walk->walk.all, &x->all); 1018 goto out; 1019 } in xfrm_policy_walk() via pfkey (it sets tiny rcv space, dump callback returns -ENOBUFS). In this case, *walk is located the pfkey socket struct, so this socket becomes visible in the global policy list. It looks like this is intentional -- phony walker has walk.dead set to 1 and all other places skip such "policies". Ccing original authors of the two commits that seem to expose this issue (first patch missed ->dead check, second patch adds pfkey sockets to policies dumper list). Fixes: 880a6fab8f6ba5b ("xfrm: configure policy hash table thresholds by netlink") Fixes: 12a169e7d8f4b1c ("ipsec: Put dumpers on the dump list") Cc: Herbert Xu Cc: Timo Teras Cc: Christophe Gouault Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 70aa5cb0c659..2ef6db98e9ba 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { + if (policy->walk.dead || + xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { /* skip socket policies */ continue; } -- cgit v1.2.3 From 06b335cb51af018d5feeff5dd4fd53847ddb675a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:13:05 -0600 Subject: af_key: fix buffer overread in verify_address_len() If a message sent to a PF_KEY socket ended with one of the extensions that takes a 'struct sadb_address' but there were not enough bytes remaining in the message for the ->sa_family member of the 'struct sockaddr' which is supposed to follow, then verify_address_len() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. This bug was found using syzkaller with KMSAN. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[24] = { 0 }; struct sadb_msg *msg = (void *)buf; struct sadb_address *addr = (void *)(msg + 1); msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 3; addr->sadb_address_len = 1; addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; write(sock, buf, 24); } Reported-by: Alexander Potapenko Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/key/af_key.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 3dffb892d52c..596499cc8b2f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -401,6 +401,11 @@ static int verify_address_len(const void *p) #endif int len; + if (sp->sadb_address_len < + DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), + sizeof(uint64_t))) + return -EINVAL; + switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); -- cgit v1.2.3 From 4e765b4972af7b07adcb1feb16e7a525ce1f6b28 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 18:15:23 -0600 Subject: af_key: fix buffer overread in parse_exthdrs() If a message sent to a PF_KEY socket ended with an incomplete extension header (fewer than 4 bytes remaining), then parse_exthdrs() read past the end of the message, into uninitialized memory. Fix it by returning -EINVAL in this case. Reproducer: #include #include #include int main() { int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); char buf[17] = { 0 }; struct sadb_msg *msg = (void *)buf; msg->sadb_msg_version = PF_KEY_V2; msg->sadb_msg_type = SADB_DELETE; msg->sadb_msg_len = 2; write(sock, buf, 17); } Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Steffen Klassert --- net/key/af_key.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index 596499cc8b2f..d40861a048fe 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -516,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * uint16_t ext_type; int ext_len; + if (len < sizeof(*ehdr)) + return -EINVAL; + ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; -- cgit v1.2.3 From 3ce120b16cc548472f80cf8644f90eda958cf1b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Dec 2017 17:34:43 -0800 Subject: kbuild: add '-fno-stack-check' to kernel build options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears that hardened gentoo enables "-fstack-check" by default for gcc. That doesn't work _at_all_ for the kernel, because the kernel stack doesn't act like a user stack at all: it's much smaller, and it doesn't auto-expand on use. So the extra "probe one page below the stack" code generated by -fstack-check just breaks the kernel in horrible ways, causing infinite double faults etc. [ I have to say, that the particular code gcc generates looks very stupid even for user space where it works, but that's a separate issue. ] Reported-and-tested-by: Alexander Tsoy Reported-and-tested-by: Toralf Förster Cc: stable@kernel.org Cc: Dave Hansen Cc: Jiri Kosina Cc: Andy Lutomirski Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index ac8c441866b7..92b74bcd3c2a 100644 --- a/Makefile +++ b/Makefile @@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) -- cgit v1.2.3 From d89e426499cf36b96161bd32970d6783f1fbcb0e Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sat, 30 Dec 2017 14:43:31 -0600 Subject: objtool: Fix seg fault caused by missing parameter Fix a seg fault when no parameter is provided to 'objtool orc'. Signed-off-by: Simon Ser Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/9172803ec7ebb72535bcd0b7f966ae96d515968e.1514666459.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/builtin-orc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 4c6b5c9ef073..91e8e19ff5e0 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv) const char *objname; argc--; argv++; + if (argc <= 0) + usage_with_options(orc_usage, check_options); + if (!strncmp(argv[0], "gen", 3)) { argc = parse_options(argc, argv, check_options, orc_usage, 0); if (argc != 1) @@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv) objname = argv[0]; return check(objname, no_fp, no_unreachable, true); - } if (!strcmp(argv[0], "dump")) { -- cgit v1.2.3 From ce90aaf5cde4ce057b297bb6c955caf16ef00ee6 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Sat, 30 Dec 2017 14:43:32 -0600 Subject: objtool: Fix seg fault with clang-compiled objects Fix a seg fault which happens when an input file provided to 'objtool orc generate' doesn't have a '.shstrtab' section (for instance, object files produced by clang don't have this section). Signed-off-by: Simon Ser Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c0f2231683e9bed40fac1f13ce2c33b8389854bc.1514666459.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/orc_gen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index e5ca31429c9b..e61fe703197b 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file) /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx); + if (!sec) + return -1; ip_relasec = elf_create_rela_section(file->elf, sec); if (!ip_relasec) -- cgit v1.2.3 From 322f8b8b340c824aef891342b0f5795d15e11562 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 30 Dec 2017 22:13:53 +0100 Subject: x86/smpboot: Remove stale TLB flush invocations smpboot_setup_warm_reset_vector() and smpboot_restore_warm_reset_vector() invoke local_flush_tlb() for no obvious reason. Digging in history revealed that the original code in the 2.1 era added those because the code manipulated a swapper_pg_dir pagetable entry. The pagetable manipulation was removed long ago in the 2.3 timeframe, but the TLB flush invocations stayed around forever. Remove them along with the pointless pr_debug()s which come from the same 2.1 change. Reported-by: Dominik Brodowski Signed-off-by: Thomas Gleixner Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Linus Torvalds Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171230211829.586548655@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 33d6000265aa..c3402fc30865 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -128,25 +128,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - /* * Paranoid: Set warm reset code and vector here back * to default values. -- cgit v1.2.3 From decab0888e6e14e11d53cefa85f8b3d3b45ce73c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 30 Dec 2017 22:13:54 +0100 Subject: x86/mm: Remove preempt_disable/enable() from __native_flush_tlb() The preempt_disable/enable() pair in __native_flush_tlb() was added in commit: 5cf0791da5c1 ("x86/mm: Disable preemption during CR3 read+write") ... to protect the UP variant of flush_tlb_mm_range(). That preempt_disable/enable() pair should have been added to the UP variant of flush_tlb_mm_range() instead. The UP variant was removed with commit: ce4a4e565f52 ("x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code") ... but the preempt_disable/enable() pair stayed around. The latest change to __native_flush_tlb() in commit: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") ... added an access to a per CPU variable outside the preempt disabled regions, which makes no sense at all. __native_flush_tlb() must always be called with at least preemption disabled. Remove the preempt_disable/enable() pair and add a WARN_ON_ONCE() to catch bad callers independent of the smp_processor_id() debugging. Signed-off-by: Thomas Gleixner Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dominik Brodowski Cc: Linus Torvalds Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171230211829.679325424@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/tlbflush.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index b519da4fc03c..f9b48ce152eb 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -345,15 +345,17 @@ static inline void invalidate_user_asid(u16 asid) */ static inline void __native_flush_tlb(void) { - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* - * If current->mm == NULL then we borrow a mm which may change - * during a task switch and therefore we must not be preempted - * while we write CR3 back: + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). */ - preempt_disable(); + WARN_ON_ONCE(preemptible()); + + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); - preempt_enable(); } /* -- cgit v1.2.3 From a62d69857aab4caa43049e72fe0ed5c4a60518dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Dec 2017 11:24:34 +0100 Subject: x86/ldt: Plug memory leak in error path The error path in write_ldt() tries to free 'old_ldt' instead of the newly allocated 'new_ldt', resulting in a memory leak. It also misses to clean up a half populated LDT pagetable, which is not a leak as it gets cleaned up when the process exits. Free both the potentially half populated LDT pagetable and the newly allocated LDT struct. This can be done unconditionally because once an LDT is mapped subsequent maps will succeed, because the PTE page is already populated and the two LDTs fit into that single page. Reported-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dominik Brodowski Cc: Linus Torvalds Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1712311121340.1899@nanos Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 579cc4a66fdf..500e90e44f86 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -421,7 +421,13 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) */ error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); if (error) { - free_ldt_struct(old_ldt); + /* + * This only can fail for the first LDT setup. If an LDT is + * already installed then the PTE page is already + * populated. Mop up a half populated page table. + */ + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); goto out_unlock; } -- cgit v1.2.3 From 2f10a61cee8fdb9f8da90f5db687e1862b22cf06 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sun, 31 Dec 2017 16:18:56 +0100 Subject: xfrm: fix rcu usage in xfrm_get_type_offload request_module can sleep, thus we cannot hold rcu_read_lock() while calling it. The function also jumps back and takes rcu_read_lock() again (in xfrm_state_get_afinfo()), resulting in an imbalance. This codepath is triggered whenever a new offloaded state is created. Fixes: ffdb5211da1c ("xfrm: Auto-load xfrm offload modules") Reported-by: syzbot+ca425f44816d749e8eb49755567a75ee48cf4a30@syzkaller.appspotmail.com Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1e80f68e2266..429957412633 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -313,13 +313,14 @@ retry: if ((type && !try_module_get(type->owner))) type = NULL; + rcu_read_unlock(); + if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); try_load = 0; goto retry; } - rcu_read_unlock(); return type; } -- cgit v1.2.3 From 7f414195b0c3612acd12b4611a5fe75995cf10c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Dec 2017 16:52:15 +0100 Subject: x86/ldt: Make LDT pgtable free conditional Andy prefers to be paranoid about the pagetable free in the error path of write_ldt(). Make it conditional and warn whenever the installment of a secondary LDT fails. Requested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ldt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 500e90e44f86..26d713ecad34 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -426,7 +426,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) * already installed then the PTE page is already * populated. Mop up a half populated page table. */ - free_ldt_pgtables(mm); + if (!WARN_ON_ONCE(old_ldt)) + free_ldt_pgtables(mm); free_ldt_struct(new_ldt); goto out_unlock; } -- cgit v1.2.3 From c0b23903f5b077effec90769d365646a8c2faae0 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Mon, 25 Dec 2017 16:38:58 +0100 Subject: MAINTAINERS: mark arch/blackfin/ and its gubbins as orphaned The blackfin architecture has seen no maintainer action of any kind since April 2015. No new code, no pull requests, no acks to patches, no response to mails, nothing. The web site has an expired certificate (expiration Sep 2017, issued in 2013), the mailing list sees no answers either, with one exception: https://sourceforge.net/p/adi-buildroot/mailman/adi-buildroot-devel/ > > Steven is no longer working on this for ADI. Acked by me if this works. Thanks. > > Best regards, > Aaron Wu > Analog Devices Inc. But, Aaron doesn't seem to respond to queries either. Signed-off-by: Adam Borowski Acked-by: Linus Walleij Cc: Arnd Bergmann Signed-off-by: Linus Torvalds --- MAINTAINERS | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a6e86e20761e..2d0773007c89 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2621,24 +2621,22 @@ F: fs/bfs/ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE -M: Steven Miao L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: arch/blackfin/ BLACKFIN EMAC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/net/ethernet/adi/ BLACKFIN MEDIA DRIVER -M: Scott Jiang L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org/ -S: Supported +S: Orphan F: drivers/media/platform/blackfin/ F: drivers/media/i2c/adv7183* F: drivers/media/i2c/vs6624* @@ -2646,25 +2644,25 @@ F: drivers/media/i2c/vs6624* BLACKFIN RTC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/rtc/rtc-bfin.c BLACKFIN SDH DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/mmc/host/bfin_sdh.c BLACKFIN SERIAL DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/tty/serial/bfin_uart.c BLACKFIN WATCHDOG DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/watchdog/bfin_wdt.c BLINKM RGB LED DRIVER -- cgit v1.2.3 From 30a7acd573899fd8b8ac39236eff6468b195ac7d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Dec 2017 14:47:43 -0800 Subject: Linux 4.15-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 92b74bcd3c2a..eb1f5973813e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 8ec426c7019ed9600d9dc0cf758445adcdbfc14e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:04 -0500 Subject: lustre: don't set f_version in ll_readdir f_version is only ever used by filesystem-specific code. Generic VFS code never uses it. Nothing in lustre ever looks at it, so just remove this. Signed-off-by: Jeff Layton Reviewed-by: Andreas Dilger --- drivers/staging/lustre/lustre/llite/dir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 5b2e47c246f3..6f59045be0f9 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -369,8 +369,6 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) } ctx->pos = pos; ll_finish_md_op_data(op_data); - filp->f_version = inode->i_version; - out: if (!rc) ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1); @@ -1678,7 +1676,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) else fd->lfd_pos = offset; file->f_pos = offset; - file->f_version = 0; } ret = offset; } -- cgit v1.2.3 From 7a11ac289c437cb06633620940b191a63dec1f4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:05 -0500 Subject: ntfs: remove i_version handling NTFS keeps track of the i_version counter here, seemingly for no reason. It does not set the SB_I_VERSION flag so it'll never be incremented on write, and it doesn't increment it internally for metadata operations. Signed-off-by: Jeff Layton --- fs/ntfs/inode.c | 9 --------- fs/ntfs/mft.c | 6 ------ 2 files changed, 15 deletions(-) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 7c410f879412..1c1ee489284b 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -560,13 +560,6 @@ static int ntfs_read_locked_inode(struct inode *vi) ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); /* Setup the generic vfs inode parts now. */ - - /* - * This is for checking whether an inode has changed w.r.t. a file so - * that the file can be updated if necessary (compare with f_version). - */ - vi->i_version = 1; - vi->i_uid = vol->uid; vi->i_gid = vol->gid; vi->i_mode = 0; @@ -1240,7 +1233,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); @@ -1507,7 +1499,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ni = NTFS_I(vi); base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index ee8392aee9f6..2831f495a674 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2641,12 +2641,6 @@ mft_rec_already_initialized: goto undo_mftbmp_alloc; } vi->i_ino = bit; - /* - * This is for checking whether an inode has changed w.r.t. a - * file so that the file can be updated if necessary (compare - * with f_version). - */ - vi->i_version = 1; /* The owner and group come from the ntfs volume. */ vi->i_uid = vol->uid; -- cgit v1.2.3 From d1bf2d30728f310f72296b54f0651ecdb09cbb12 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sun, 5 Nov 2017 21:27:41 -0800 Subject: PM / devfreq: Propagate error from devfreq_add_device() Propagate the error of devfreq_add_device() in devm_devfreq_add_device() rather than statically returning ENOMEM. This makes it slightly faster to pinpoint the cause of a returned error. Fixes: 8cd84092d35e ("PM / devfreq: Add resource-managed function for devfreq device") Cc: stable@vger.kernel.org Acked-by: Chanwoo Choi Signed-off-by: Bjorn Andersson Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 78fb496ecb4e..99c4021fc33b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -737,7 +737,7 @@ struct devfreq *devm_devfreq_add_device(struct device *dev, devfreq = devfreq_add_device(dev, profile, governor_name, data); if (IS_ERR(devfreq)) { devres_free(ptr); - return ERR_PTR(-ENOMEM); + return devfreq; } *ptr = devfreq; -- cgit v1.2.3 From 63f1e05f7fe9ca509c60154d6a833abf96eecdc9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 6 Dec 2017 14:20:15 -0600 Subject: PM / devfreq: Fix potential NULL pointer dereference in governor_store df->governor is being dereferenced before it is null checked, hence there is a potential null pointer dereference. Notice that df->governor is being null checked at line 1004: if (df->governor) {, which implies it might be null. Fix this by null checking df->governor before dereferencing it. Addresses-Coverity-ID: 1401988 ("Dereference before null check") Fixes: bcf23c79c4e4 ("PM / devfreq: Fix available_governor sysfs") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 99c4021fc33b..fe2af6aa88fc 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -996,7 +996,8 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr, if (df->governor == governor) { ret = 0; goto out; - } else if (df->governor->immutable || governor->immutable) { + } else if ((df->governor && df->governor->immutable) || + governor->immutable) { ret = -EINVAL; goto out; } -- cgit v1.2.3 From 4307413256ac1e09b8f53e8715af3df9e49beec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Fri, 29 Dec 2017 09:54:25 +0000 Subject: USB: serial: cp210x: add IDs for LifeScan OneTouch Verio IQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add IDs for the OneTouch Verio IQ that comes with an embedded USB-to-serial converter. Signed-off-by: Diego Elio Pettenò Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7c6273bf5beb..38814225a816 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ -- cgit v1.2.3 From dc32b5c3e6e2ef29cef76d9ce1b92d394446150e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 Jan 2018 09:28:31 -0600 Subject: capabilities: fix buffer overread on very short xattr If userspace attempted to set a "security.capability" xattr shorter than 4 bytes (e.g. 'setfattr -n security.capability -v x file'), then cap_convert_nscap() read past the end of the buffer containing the xattr value because it accessed the ->magic_etc field without verifying that the xattr value is long enough to contain that field. Fix it by validating the xattr value size first. This bug was found using syzkaller with KASAN. The KASAN report was as follows (cleaned up slightly): BUG: KASAN: slab-out-of-bounds in cap_convert_nscap+0x514/0x630 security/commoncap.c:498 Read of size 4 at addr ffff88002d8741c0 by task syz-executor1/2852 CPU: 0 PID: 2852 Comm: syz-executor1 Not tainted 4.15.0-rc6-00200-gcc0aac99d977 #253 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0xe3/0x195 lib/dump_stack.c:53 print_address_description+0x73/0x260 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x235/0x350 mm/kasan/report.c:409 cap_convert_nscap+0x514/0x630 security/commoncap.c:498 setxattr+0x2bd/0x350 fs/xattr.c:446 path_setxattr+0x168/0x1b0 fs/xattr.c:472 SYSC_setxattr fs/xattr.c:487 [inline] SyS_setxattr+0x36/0x50 fs/xattr.c:483 entry_SYSCALL_64_fastpath+0x18/0x85 Fixes: 8db6c34f1dbc ("Introduce v3 namespaced file capabilities") Cc: # v4.14+ Signed-off-by: Eric Biggers Reviewed-by: Serge Hallyn Signed-off-by: James Morris --- security/commoncap.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 4f8e09340956..48620c93d697 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m) return m & ~VFS_CAP_FLAGS_EFFECTIVE; } -static bool is_v2header(size_t size, __le32 magic) +static bool is_v2header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); if (size != XATTR_CAPS_SZ_2) return false; - return sansflags(m) == VFS_CAP_REVISION_2; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2; } -static bool is_v3header(size_t size, __le32 magic) +static bool is_v3header(size_t size, const struct vfs_cap_data *cap) { - __u32 m = le32_to_cpu(magic); - if (size != XATTR_CAPS_SZ_3) return false; - return sansflags(m) == VFS_CAP_REVISION_3; + return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3; } /* @@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, fs_ns = inode->i_sb->s_user_ns; cap = (struct vfs_cap_data *) tmpbuf; - if (is_v2header((size_t) ret, cap->magic_etc)) { + if (is_v2header((size_t) ret, cap)) { /* If this is sizeof(vfs_cap_data) then we're ok with the * on-disk value, so return that. */ if (alloc) @@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, else kfree(tmpbuf); return ret; - } else if (!is_v3header((size_t) ret, cap->magic_etc)) { + } else if (!is_v3header((size_t) ret, cap)) { kfree(tmpbuf); return -EINVAL; } @@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size, return make_kuid(task_ns, rootid); } -static bool validheader(size_t size, __le32 magic) +static bool validheader(size_t size, const struct vfs_cap_data *cap) { - return is_v2header(size, magic) || is_v3header(size, magic); + return is_v2header(size, cap) || is_v3header(size, cap); } /* @@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) if (!*ivalue) return -EINVAL; - if (!validheader(size, cap->magic_etc)) + if (!validheader(size, cap)) return -EINVAL; if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; -- cgit v1.2.3 From 98801506552593c9b8ac11021b0cdad12cab4f6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: fscache: Fix the default for fscache_maybe_release_page() Fix the default for fscache_maybe_release_page() for when the cookie isn't valid or the page isn't cached. It mustn't return false as that indicates the page cannot yet be freed. The problem with the default is that if, say, there's no cache, but a network filesystem's pages are using up almost all the available memory, a system can OOM because the filesystem ->releasepage() op will not allow them to be released as fscache_maybe_release_page() incorrectly prevents it. This can be tested by writing a sequence of 512MiB files to an AFS mount. It does not affect NFS or CIFS because both of those wrap the call in a check of PG_fscache and it shouldn't bother Ceph as that only has PG_private set whilst writeback is in progress. This might be an issue for 9P, however. Note that the pages aren't entirely stuck. Removing a file or unmounting will clear things because that uses ->invalidatepage() instead. Fixes: 201a15428bd5 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Al Viro Tested-by: Marc Dionne cc: stable@vger.kernel.org # 2.6.32+ --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** -- cgit v1.2.3 From 7888da95832d50a87bbfdb9f40620ddc66f94b3c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: afs: Potential uninitialized variable in afs_extract_data() Smatch warns that: fs/afs/rxrpc.c:922 afs_extract_data() error: uninitialized symbol 'remote_abort'. Smatch is right that "remote_abort" might be uninitialized when we pass it to afs_set_call_complete(). I don't know if that function uses the uninitialized variable. Anyway, the comment for rxrpc_kernel_recv_data(), says that "*_abort should also be initialised to 0." and this patch does that. Signed-off-by: Dan Carpenter Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index ea1460b9b71a..e1126659f043 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count, { struct afs_net *net = call->net; enum afs_call_state state; - u32 remote_abort; + u32 remote_abort = 0; int ret; _enter("{%s,%zu},,%zu,%d", -- cgit v1.2.3 From 440fbc3a8a694467ba641234cedb96c28ab2d5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: afs: Fix unlink Repeating creation and deletion of a file on an afs mount will run the box out of memory, e.g.: dd if=/dev/zero of=/afs/scratch/m0 bs=$((1024*1024)) count=512 rm /afs/scratch/m0 The problem seems to be that it's not properly decrementing the nlink count so that the inode can be scrapped. Note that this doesn't fix local creation followed by remote deletion. That's harder to handle and will require a separate patch as we're not told that the file has been deleted - only that the directory has changed. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/dir.c | 37 +++++++++++++++++++++++++++++-------- fs/afs/inode.c | 4 ++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ff8d5bf4354f..23c7f395d718 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -895,20 +895,38 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct dentry *dentry, struct key *key) +static int afs_dir_remove_link(struct dentry *dentry, struct key *key, + unsigned long d_version_before, + unsigned long d_version_after) { + bool dir_valid; int ret = 0; + /* There were no intervening changes on the server if the version + * number we got back was incremented by exactly 1. + */ + dir_valid = (d_version_after == d_version_before + 1); + if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - kdebug("AFS_VNODE_DELETED"); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - - ret = afs_validate(vnode, key); - if (ret == -ESTALE) + if (dir_valid) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } ret = 0; + } else { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + kdebug("AFS_VNODE_DELETED"); + + ret = afs_validate(vnode, key); + if (ret == -ESTALE) + ret = 0; + } _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); } @@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) struct afs_fs_cursor fc; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct key *key; + unsigned long d_version = (unsigned long)dentry->d_fsdata; int ret; _enter("{%x:%u},{%pd}", @@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_vnode_commit_status(&fc, dvnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); if (ret == 0) - ret = afs_dir_remove_link(dentry, key); + ret = afs_dir_remove_link( + dentry, key, d_version, + (unsigned long)dvnode->status.data_version); } error_key: diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..1e81864ef0b2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } read_sequnlock_excl(&vnode->cb_lock); + + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + clear_nlink(&vnode->vfs_inode); + if (valid) goto valid; -- cgit v1.2.3 From afae457d874860a7e299d334f59eede5f3ad4b47 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: afs: Fix missing error handling in afs_write_end() afs_write_end() is missing page unlock and put if afs_fill_page() fails. Reported-by: Al Viro Signed-off-by: David Howells --- fs/afs/write.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index cb5f8a3df577..9370e2feb999 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping, ret = afs_fill_page(vnode, key, pos + copied, len - copied, page); if (ret < 0) - return ret; + goto out; } SetPageUptodate(page); } @@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping, set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); + ret = copied; + +out: unlock_page(page); put_page(page); - - return copied; + return ret; } /* -- cgit v1.2.3 From ecb101aed86156ec7cd71e5dca668e09146e6994 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Sun, 31 Dec 2017 21:24:58 -0800 Subject: powerpc/mm: Fix SEGV on mapped region to return SEGV_ACCERR The recent refactoring of the powerpc page fault handler in commit c3350602e876 ("powerpc/mm: Make bad_area* helper functions") caused access to protected memory regions to indicate SEGV_MAPERR instead of the traditional SEGV_ACCERR in the si_code field of a user-space signal handler. This can confuse debug libraries that temporarily change the protection of memory regions, and expect to use SEGV_ACCERR as an indication to restore access to a region. This commit restores the previous behavior. The following program exhibits the issue: $ ./repro read || echo "FAILED" $ ./repro write || echo "FAILED" $ ./repro exec || echo "FAILED" #include #include #include #include #include #include #include static void segv_handler(int n, siginfo_t *info, void *arg) { _exit(info->si_code == SEGV_ACCERR ? 0 : 1); } int main(int argc, char **argv) { void *p = NULL; struct sigaction act = { .sa_sigaction = segv_handler, .sa_flags = SA_SIGINFO, }; assert(argc == 2); p = mmap(NULL, getpagesize(), (strcmp(argv[1], "write") == 0) ? PROT_READ : 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); assert(p != MAP_FAILED); assert(sigaction(SIGSEGV, &act, NULL) == 0); if (strcmp(argv[1], "read") == 0) printf("%c", *(unsigned char *)p); else if (strcmp(argv[1], "write") == 0) *(unsigned char *)p = 0; else if (strcmp(argv[1], "exec") == 0) ((void (*)(void))p)(); return 1; /* failed to generate SEGV */ } Fixes: c3350602e876 ("powerpc/mm: Make bad_area* helper functions") Cc: stable@vger.kernel.org # v4.14+ Signed-off-by: John Sperbeck Acked-by: Benjamin Herrenschmidt [mpe: Add commit references in change log] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..6e1e39035380 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR); +} + static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { @@ -490,7 +495,7 @@ retry: good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, -- cgit v1.2.3 From 32bb954dbf6db98562cb4477608dc546421caaf6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 2 Jan 2018 12:00:22 +0100 Subject: xtensa: shut up gcc-8 warnings Many uses of strncpy() on xtensa causes a warning like arch/xtensa/include/asm/string.h:56:42: warning: array subscript is above array bounds [-Warray-bounds] : "0" (__dest), "1" (__src), "r" (__src+__n) This avoids the warning by turning the pointer arithmetic into an integer operation that does not get checked the same way. Signed-off-by: Arnd Bergmann Signed-off-by: Max Filippov --- arch/xtensa/include/asm/string.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/include/asm/string.h b/arch/xtensa/include/asm/string.h index 586bad9fe187..89b51a0c752f 100644 --- a/arch/xtensa/include/asm/string.h +++ b/arch/xtensa/include/asm/string.h @@ -53,7 +53,7 @@ static inline char *strncpy(char *__dest, const char *__src, size_t __n) "bne %1, %5, 1b\n" "2:" : "=r" (__dest), "=r" (__src), "=&r" (__dummy) - : "0" (__dest), "1" (__src), "r" (__src+__n) + : "0" (__dest), "1" (__src), "r" ((uintptr_t)__src+__n) : "memory"); return __xdest; @@ -101,7 +101,7 @@ static inline int strncmp(const char *__cs, const char *__ct, size_t __n) "2:\n\t" "sub %2, %2, %3" : "=r" (__cs), "=r" (__ct), "=&r" (__res), "=&r" (__dummy) - : "0" (__cs), "1" (__ct), "r" (__cs+__n)); + : "0" (__cs), "1" (__ct), "r" ((uintptr_t)__cs+__n)); return __res; } -- cgit v1.2.3 From e0093a89f2386f12cc87047b43e93c3c6e15e94e Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 19 Dec 2017 20:35:20 -0800 Subject: drm/i915/psr: Fix register name mess up. Commit 77affa31722b ("drm/i915/psr: Fix compiler warnings for hsw_psr_disable()") swapped status and control registers while fixing indentation. The _ctl at the end of the status register name must have to led to this. Fixes: 77affa31722b ("drm/i915/psr: Fix compiler warnings for hsw_psr_disable()") References: https://www.mrc-cbu.cam.ac.uk/people/matt.davis/cmabridge/ Cc: Chris Wilson Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Link: https://patchwork.freedesktop.org/patch/msgid/20171220043520.2599-1-dhinakaran.pandiyan@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 14c6547d6df641d3e41fa4f4164f6e267ebfab89) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_psr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 6e3b430fccdc..55ea5eb3b7df 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); if (dev_priv->psr.active) { - i915_reg_t psr_ctl; + i915_reg_t psr_status; u32 psr_status_mask; if (dev_priv->psr.aux_frame_sync) @@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, 0); if (dev_priv->psr.psr2_support) { - psr_ctl = EDP_PSR2_CTL; + psr_status = EDP_PSR2_STATUS_CTL; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & + I915_WRITE(EDP_PSR2_CTL, + I915_READ(EDP_PSR2_CTL) & ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE)); } else { - psr_ctl = EDP_PSR_STATUS_CTL; + psr_status = EDP_PSR_STATUS_CTL; psr_status_mask = EDP_PSR_STATUS_STATE_MASK; - I915_WRITE(psr_ctl, - I915_READ(psr_ctl) & ~EDP_PSR_ENABLE); + I915_WRITE(EDP_PSR_CTL, + I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE); } /* Wait till PSR is idle */ if (intel_wait_for_register(dev_priv, - psr_ctl, psr_status_mask, 0, + psr_status, psr_status_mask, 0, 2000)) DRM_ERROR("Timed out waiting for PSR Idle State\n"); -- cgit v1.2.3 From 3488d0237f6364614f0c59d6d784bb79b11eeb92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Dec 2017 23:37:36 +0200 Subject: drm/i915: Disable DC states around GMBUS on GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent the DMC from destroying GMBUS transfers on GLK. GMBUS lives in PG1 so DC off is all we need. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171208213739.16388-1-ville.syrjala@linux.intel.com Reviewed-by: Dhinakaran Pandiyan (cherry picked from commit 156961ae7bdf6feb72778e8da83d321b273343fd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8af286c63d3b..9bf46ab211cb 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1786,6 +1786,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ -- cgit v1.2.3 From eda41bdc571e5c51d817c2e8b4578d34a9e383f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 13 Nov 2017 15:36:22 +0200 Subject: drm/i915: Put all non-blocking modesets onto an ordered wq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have plenty of global registers and whatnot programmed without any further locking by the modeset code. Currently non-bocking modesets are allowed to execute in parallel which could corrupt said registers. To avoid the problem let's run all non-blocking modesets on an ordered workqueue. We still put page flips etc. to system_unbound_wq allowing page flips on one pipe to execute in parallel with page flips or a modeset on a another pipe (assuming no known state is shared between them, at which point they would have been added to the same atomic commit and serialized that way). Blocking modesets are already serialized with each other by connection_mutex, and thus are safe. To serialize them with non-blocking modesets we just flush the workqueue before executing blocking modesets. Cc: Daniel Vetter Cc: Maarten Lankhorst Fixes: 94f050246b42 ("drm/i915: nonblocking commit") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171113133622.8593-1-ville.syrjala@linux.intel.com Acked-by: Daniel Vetter Reviewed-by: Maarten Lankhorst (cherry picked from commit 757fffcfdffb6c0dd46c1b264091c36b4e5a86ae) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/intel_display.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 54b5d4c582b6..e143004e66d5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2368,6 +2368,9 @@ struct drm_i915_private { */ struct workqueue_struct *wq; + /* ordered wq for modesets */ + struct workqueue_struct *modeset_wq; + /* Display functions */ struct drm_i915_display_funcs display; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30cf273d57aa..123585eeb87d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12544,11 +12544,15 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); i915_sw_fence_commit(&intel_state->commit_ready); - if (nonblock) + if (nonblock && intel_state->modeset) { + queue_work(dev_priv->modeset_wq, &state->commit_work); + } else if (nonblock) { queue_work(system_unbound_wq, &state->commit_work); - else + } else { + if (intel_state->modeset) + flush_workqueue(dev_priv->modeset_wq); intel_atomic_commit_tail(state); - + } return 0; } @@ -14462,6 +14466,8 @@ int intel_modeset_init(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; + dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0); + drm_mode_config_init(dev); dev->mode_config.min_width = 0; @@ -15270,6 +15276,8 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_cleanup_gt_powersave(dev_priv); intel_teardown_gmbus(dev_priv); + + destroy_workqueue(dev_priv->modeset_wq); } void intel_connector_attach_encoder(struct intel_connector *connector, -- cgit v1.2.3 From c1f08c419764439bfa2d3f33d2fdef9d7013fc47 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 3 Dec 2017 15:36:20 -0800 Subject: documentation/gpu/i915: fix docs build error after file rename Fix documentation build errors after intel_guc_loader.c was renamed to intel_guc_fw.c. Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -function GuC-specific firmware loader ../drivers/gpu/drm/i915/intel_guc_loader.c' failed with return code 1 Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c Error: Cannot open file ../drivers/gpu/drm/i915/intel_guc_loader.c WARNING: kernel-doc '../scripts/kernel-doc -rst -enable-lineno -internal ../drivers/gpu/drm/i915/intel_guc_loader.c' failed with return code 2 Fixes: e8668bbcb0f9 ("drm/i915/guc: Rename intel_guc_loader.c to intel_guc_fw.c") Signed-off-by: Randy Dunlap Cc: Michal Wajdeczko Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1b214f53-47f5-bef3-f58e-8136de5678ed@infradead.org (cherry picked from commit 006c23327f8de8575508c458131b304188d426f7) Signed-off-by: Jani Nikula --- Documentation/gpu/i915.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 2e7ee0313c1c..e21698e16534 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,10 +341,10 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :doc: GuC-specific firmware loader -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c +.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: GuC-based command submission -- cgit v1.2.3 From df29c9db8ace4497a61f3b3d33c2b8a7fd4b7b8e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 4 Dec 2017 14:32:46 +0100 Subject: omapdrm/dss/hdmi4_cec: fix interrupt handling The omap4 CEC hardware cannot tell a Nack from a Low Drive from an Arbitration Lost error, so just report a Nack, which is almost certainly the reason for the error anyway. This also simplifies the implementation. The only three interrupts that need to be enabled are: Transmit Buffer Full/Empty Change event: triggered when the transmit finished successfully and cleared the buffer. Receiver FIFO Not Empty event: triggered when a message was received. Frame Retransmit Count Exceeded event: triggered when a transmit failed repeatedly, usually due to the message being Nacked. Other reasons are possible (Low Drive, Arbitration Lost) but there is no way to know. If this happens the TX buffer needs to be cleared manually. While testing various error conditions I noticed that the hardware can receive messages up to 18 bytes in total, which exceeds the legal maximum of 16. This could cause a buffer overflow, so we check for this and constrain the size to 16 bytes. The old incorrect interrupt handler could cause the CEC framework to enter into a bad state because it mis-detected the "Start Bit Irregularity event" as an ARB_LOST transmit error when it actually is a receive error which should be ignored. Signed-off-by: Hans Verkuil Reported-by: Henrik Austad Tested-by: Henrik Austad Tested-by: Hans Verkuil Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 46 +++++++-------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index e626eddf24d5..23db74ae1826 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) /* then read the message */ msg.len = cnt & 0xf; + if (msg.len > CEC_MAX_MSG_SIZE - 2) + msg.len = CEC_MAX_MSG_SIZE - 2; msg.msg[0] = hdmi_read_reg(core->base, HDMI_CEC_RX_CMD_HEADER); msg.msg[1] = hdmi_read_reg(core->base, @@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) } } -static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1) -{ - if (stat1 & 2) { - u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); - - cec_transmit_done(core->adap, - CEC_TX_STATUS_NACK | - CEC_TX_STATUS_MAX_RETRIES, - 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); - } else if (stat1 == 0) { - cec_transmit_done(core->adap, CEC_TX_STATUS_OK, - 0, 0, 0, 0); - } -} - void hdmi4_cec_irq(struct hdmi_core_data *core) { u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); @@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0); hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1); - if (stat0 & 0x40) + if (stat0 & 0x20) { + cec_transmit_done(core->adap, CEC_TX_STATUS_OK, + 0, 0, 0, 0); REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); - else if (stat0 & 0x24) - hdmi_cec_transmit_fifo_empty(core, stat1); - if (stat1 & 2) { + } else if (stat1 & 0x02) { u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); cec_transmit_done(core->adap, CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES, 0, (dbg3 >> 4) & 7, 0, 0); - } else if (stat1 & 1) { - cec_transmit_done(core->adap, - CEC_TX_STATUS_ARB_LOST | - CEC_TX_STATUS_MAX_RETRIES, - 0, 0, 0, 0); + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } if (stat0 & 0x02) hdmi_cec_received_msg(core); - if (stat1 & 0x3) - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) @@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) /* * Enable CEC interrupts: * Transmit Buffer Full/Empty Change event - * Transmitter FIFO Empty event * Receiver FIFO Not Empty event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22); /* * Enable CEC interrupts: - * RX FIFO Overrun Error event - * Short Pulse Detected event * Frame Retransmit Count Exceeded event - * Start Bit Irregularity event */ - hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f); + hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02); /* cec calibration enable (self clearing) */ hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03); -- cgit v1.2.3 From 8a9bd4f8ebc6800bfc0596e28631ff6809a2f615 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 6 Dec 2017 10:30:39 +0100 Subject: s390/dasd: fix wrongly assigned configuration data We store per path and per device configuration data to identify the path or device correctly. The per path configuration data might get mixed up if the original request gets into error recovery and is started with a random path mask. This would lead to a wrong identification of a path in case of a CUIR event for example. Fix by copying the path mask from the original request to the error recovery request in case it is a path verification request. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_3990_erp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index c94b606e0df8..ee14d8e45c97 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr) erp = dasd_3990_erp_handle_match_erp(cqr, erp); } + + /* + * For path verification work we need to stick with the path that was + * originally chosen so that the per path configuration data is + * assigned correctly. + */ + if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) { + erp->lpm = cqr->lpm; + } + if (device->features & DASD_FEATURE_ERPLOG) { /* print current erp_chain */ dev_err(&device->cdev->dev, -- cgit v1.2.3 From fe08f34d066f4404934a509b6806db1a4f700c86 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 1 Jan 2018 09:50:50 +0100 Subject: ALSA: pcm: Remove incorrect snd_BUG_ON() usages syzkaller triggered kernel warnings through PCM OSS emulation at closing a stream: WARNING: CPU: 0 PID: 3502 at sound/core/pcm_lib.c:1635 snd_pcm_hw_param_first+0x289/0x690 sound/core/pcm_lib.c:1635 Call Trace: .... snd_pcm_hw_param_near.constprop.27+0x78d/0x9a0 sound/core/oss/pcm_oss.c:457 snd_pcm_oss_change_params+0x17d3/0x3720 sound/core/oss/pcm_oss.c:969 snd_pcm_oss_make_ready+0xaa/0x130 sound/core/oss/pcm_oss.c:1128 snd_pcm_oss_sync+0x257/0x830 sound/core/oss/pcm_oss.c:1638 snd_pcm_oss_release+0x20b/0x280 sound/core/oss/pcm_oss.c:2431 __fput+0x327/0x7e0 fs/file_table.c:210 .... This happens while it tries to open and set up the aloop device concurrently. The warning above (invoked from snd_BUG_ON() macro) is to detect the unexpected logical error where snd_pcm_hw_refine() call shouldn't fail. The theory is true for the case where the hw_params config rules are static. But for an aloop device, the hw_params rule condition does vary dynamically depending on the connected target; when another device is opened and changes the parameters, the device connected in another side is also affected, and it caused the error from snd_pcm_hw_refine(). That is, the simplest "solution" for this is to remove the incorrect assumption of static rules, and treat such an error as a normal error path. As there are a couple of other places using snd_BUG_ON() incorrectly, this patch removes these spurious snd_BUG_ON() calls. Reported-by: syzbot+6f11c7e2a1b91d466432@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 1 - sound/core/pcm_lib.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index e49f448ee04f..ceaa51f76591 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm, v = snd_pcm_hw_param_last(pcm, params, var, dir); else v = snd_pcm_hw_param_first(pcm, params, var, dir); - snd_BUG_ON(v < 0); return v; } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 10e7ef7a8804..db7894bb028c 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); @@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, return changed; if (params->rmask) { int err = snd_pcm_hw_refine(pcm, params); - if (snd_BUG_ON(err < 0)) + if (err < 0) return err; } return snd_pcm_hw_param_value(params, var, dir); -- cgit v1.2.3 From 4aac2caff30fdef1db8403af81e79807811d22ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 28 Dec 2017 03:46:48 +0000 Subject: xen/pvcalls: use GFP_ATOMIC under spin lock A spin lock is taken here so we should use GFP_ATOMIC. Fixes: 9774c6cca266 ("xen/pvcalls: implement accept command") Signed-off-by: Wei Yongjun Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/pvcalls-front.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index d1e1d8d2b9d5..4c789e61554b 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) pvcalls_exit(); return ret; } - map2 = kzalloc(sizeof(*map2), GFP_KERNEL); + map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); if (map2 == NULL) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); -- cgit v1.2.3 From af2e01da344e9f90e38d039c39385882d7364c0f Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Tue, 12 Dec 2017 12:38:37 +0100 Subject: docs: fix, intel_guc_loader.c has been moved to intel_guc_fw.c With commit d9e2e0143c the 'GuC-specific firmware loader' doc section was removed from intel_guc_loader.c without a replacement. So lets remove it from the Kernel-doc:: .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c :doc: GuC-specific firmware loader With commit e8668bbcb0 intel_guc_loader.c was renamed to to intel_guc_fw.c and to name just one, intel_guc_init_hw() was renamed to intel_guc_fw_upload(). Since we get errors in the Sphinx build like: - Error: Cannot open file ./drivers/gpu/drm/i915/intel_guc_loader.c Change the kernel-doc directive from intel_guc_loader.c to intel_guc_fw.c Signed-off-by: Markus Heiser [danvet: Rebase onto the partial fix 006c23327f8d ("documentation/gpu/i915: fix docs build error after file rename")] Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1513078717-12373-1-git-send-email-markus.heiser@darmarit.de (cherry picked from commit 0132a1a5d44d2cd32a249dbe999a88c2134a6bd1) Signed-off-by: Jani Nikula --- Documentation/gpu/i915.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e21698e16534..e94d3ac2bdd0 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -341,9 +341,6 @@ GuC GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c - :doc: GuC-specific firmware loader - .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c :internal: -- cgit v1.2.3 From 57d72e159b60456c8bb281736c02ddd3164037aa Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 14 Dec 2017 11:03:01 +0000 Subject: iommu/arm-smmu-v3: Don't free page table ops twice Kasan reports a double free when finalise_stage_fn fails: the io_pgtable ops are freed by arm_smmu_domain_finalise and then again by arm_smmu_domain_free. Prevent this by leaving pgtbl_ops empty on failure. Cc: Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reviewed-by: Robin Murphy Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f122071688fd..db4281d0e269 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1UL << ias) - 1; domain->geometry.force_aperture = true; - smmu_domain->pgtbl_ops = pgtbl_ops; ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); - if (ret < 0) + if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); + return ret; + } - return ret; + smmu_domain->pgtbl_ops = pgtbl_ops; + return 0; } static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) -- cgit v1.2.3 From 563b5cbe334e9503ab2b234e279d500fc4f76018 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jan 2018 12:33:14 +0000 Subject: iommu/arm-smmu-v3: Cope with duplicated Stream IDs For PCI devices behind an aliasing PCIe-to-PCI/X bridge, the bridge alias to DevFn 0.0 on the subordinate bus may match the original RID of the device, resulting in the same SID being present in the device's fwspec twice. This causes trouble later in arm_smmu_write_strtab_ent() when we wind up visiting the STE a second time and find it already live. Avoid the issue by giving arm_smmu_install_ste_for_dev() the cleverness to skip over duplicates. It seems mildly counterintuitive compared to preventing the duplicates from existing in the first place, but since the DT and ACPI probe paths build their fwspecs differently, this is actually the cleanest and most self-contained way to deal with it. Cc: Fixes: 8f78515425da ("iommu/arm-smmu: Implement of_xlate() for SMMUv3") Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Tested-by: Jayachandran C. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index db4281d0e269..744592d330ca 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1733,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { - int i; + int i, j; struct arm_smmu_master_data *master = fwspec->iommu_priv; struct arm_smmu_device *smmu = master->smmu; @@ -1741,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) u32 sid = fwspec->ids[i]; __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + /* Bridged PCI devices may end up with duplicated IDs */ + for (j = 0; j < i; j++) + if (fwspec->ids[j] == sid) + break; + if (j < i) + continue; + arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } } -- cgit v1.2.3 From 55a5ec9b77106ffc05e8c40d7568432bf4696d7b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 2 Jan 2018 11:45:07 -0500 Subject: Revert "net: core: dev_get_valid_name is now the same as dev_alloc_name_ns" This reverts commit 87c320e51519a83c496ab7bfb4e96c8f9c001e89. Changing the error return code in some situations turns out to be harmful in practice. In particular Michael Ellerman reports that DHCP fails on his powerpc machines, and this revert gets things working again. Johannes Berg agrees that this revert is the best course of action for now. Fixes: 029b6d140550 ("Revert "net: core: maybe return -EEXIST in __dev_alloc_name"") Reported-by: Michael Ellerman Signed-off-by: David S. Miller --- net/core/dev.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 01ee854454a8..0e0ba36eeac9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name); int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - return dev_alloc_name_ns(net, dev, name); + BUG_ON(!net); + + if (!dev_valid_name(name)) + return -EINVAL; + + if (strchr(name, '%')) + return dev_alloc_name_ns(net, dev, name); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (dev->name != name) + strlcpy(dev->name, name, IFNAMSIZ); + + return 0; } EXPORT_SYMBOL(dev_get_valid_name); -- cgit v1.2.3 From beed9263f4000c48a5c48912f26576f6fa091181 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 13:50:07 +0200 Subject: btrfs: Fix flush bio leak Commit e0ae99941423 ("btrfs: preallocate device flush bio") reworked the way the flush bio is allocated and used. Concretely it allocates the bio in __alloc_device and then re-uses it multiple times with a very simple endio routine that just calls complete() without consuming a reference. Allocated bios by default come with a ref count of 1, which is then consumed by the endio routine (or not, in which case they should be bio_put by the caller). The way the impleementation works now is that the flush bio has a refcount of 2 and we only ever bio_put it once, leaving it to hang indefinitely. Fix this by removing the extra bio_get in __alloc_device. Fixes: e0ae99941423 ("btrfs: preallocate device flush bio") Signed-off-by: Nikolay Borisov Reviewed-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d48b24e54366..94d28f549837 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void) kfree(dev); return ERR_PTR(-ENOMEM); } - bio_get(dev->flush_bio); INIT_LIST_HEAD(&dev->dev_list); INIT_LIST_HEAD(&dev->dev_alloc_list); -- cgit v1.2.3 From ec35e48b286959991cdbb886f1bdeda4575c80b4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Dec 2017 11:58:27 -0800 Subject: btrfs: fix refcount_t usage when deleting btrfs_delayed_nodes refcounts have a generic implementation and an asm optimized one. The generic version has extra debugging to make sure that once a refcount goes to zero, refcount_inc won't increase it. The btrfs delayed inode code wasn't expecting this, and we're tripping over the warnings when the generic refcounts are used. We ended up with this race: Process A Process B btrfs_get_delayed_node() spin_lock(root->inode_lock) radix_tree_lookup() __btrfs_release_delayed_node() refcount_dec_and_test(&delayed_node->refs) our refcount is now zero refcount_add(2) <--- warning here, refcount unchanged spin_lock(root->inode_lock) radix_tree_delete() With the generic refcounts, we actually warn again when process B above tries to release his refcount because refcount_add() turned into a no-op. We saw this in production on older kernels without the asm optimized refcounts. The fix used here is to use refcount_inc_not_zero() to detect when the object is in the middle of being freed and return NULL. This is almost always the right answer anyway, since we usually end up pitching the delayed_node if it didn't have fresh data in it. This also changes __btrfs_release_delayed_node() to remove the extra check for zero refcounts before radix tree deletion. btrfs_get_delayed_node() was the only path that was allowing refcounts to go from zero to one. Fixes: 6de5f18e7b0da ("btrfs: fix refcount_t usage when deleting btrfs_delayed_node") CC: # 4.12+ Signed-off-by: Chris Mason Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5d73f79ded8b..056276101c63 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_lock(&root->inode_lock); node = radix_tree_lookup(&root->delayed_nodes_tree, ino); + if (node) { if (btrfs_inode->delayed_node) { refcount_inc(&node->refs); /* can be accessed */ @@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node( spin_unlock(&root->inode_lock); return node; } - btrfs_inode->delayed_node = node; - /* can be accessed and cached in the inode */ - refcount_add(2, &node->refs); + + /* + * It's possible that we're racing into the middle of removing + * this node from the radix tree. In this case, the refcount + * was zero and it should never go back to one. Just return + * NULL like it was never in the radix at all; our release + * function is in the process of removing it. + * + * Some implementations of refcount_inc refuse to bump the + * refcount once it has hit zero. If we don't do this dance + * here, refcount_inc() may decide to just WARN_ONCE() instead + * of actually bumping the refcount. + * + * If this node is properly in the radix, we want to bump the + * refcount twice, once for the inode and once for this get + * operation. + */ + if (refcount_inc_not_zero(&node->refs)) { + refcount_inc(&node->refs); + btrfs_inode->delayed_node = node; + } else { + node = NULL; + } + spin_unlock(&root->inode_lock); return node; } @@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node( mutex_unlock(&delayed_node->mutex); if (refcount_dec_and_test(&delayed_node->refs)) { - bool free = false; struct btrfs_root *root = delayed_node->root; + spin_lock(&root->inode_lock); - if (refcount_read(&delayed_node->refs) == 0) { - radix_tree_delete(&root->delayed_nodes_tree, - delayed_node->inode_id); - free = true; - } + /* + * Once our refcount goes to zero, nobody is allowed to bump it + * back up. We can delete it now. + */ + ASSERT(refcount_read(&delayed_node->refs) == 0); + radix_tree_delete(&root->delayed_nodes_tree, + delayed_node->inode_id); spin_unlock(&root->inode_lock); - if (free) - kmem_cache_free(delayed_node_cache, delayed_node); + kmem_cache_free(delayed_node_cache, delayed_node); } } -- cgit v1.2.3 From 7a0a87160a1dc09220ec485b31d0f82f687a053f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 2 Jan 2018 11:00:57 +0100 Subject: ALSA: pcm: Set config update bits only when really changed The PCM config space refine codes touch the parameter rmask and cmask bits when the given config parameter is changed. But in most places it checks only whether the changed value is non-zero or not, and they don't consider whether a negative error value is returned. This will lead to the incorrect update bits set upon the error path. Fix the codes to check properly the return code whether it's really updated or an error. Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 10 +++++----- sound/core/pcm_lib.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index e49f448ee04f..5d131088ac7c 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -186,7 +186,7 @@ static int _snd_pcm_hw_param_mask(struct snd_pcm_hw_params *params, { int changed; changed = snd_mask_refine(hw_param_mask(params, var), val); - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } @@ -233,7 +233,7 @@ static int _snd_pcm_hw_param_min(struct snd_pcm_hw_params *params, val, open); else return -EINVAL; - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } @@ -294,7 +294,7 @@ static int _snd_pcm_hw_param_max(struct snd_pcm_hw_params *params, val, open); else return -EINVAL; - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } @@ -500,7 +500,7 @@ static int _snd_pcm_hw_param_set(struct snd_pcm_hw_params *params, } } else return -EINVAL; - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } @@ -540,7 +540,7 @@ static int _snd_pcm_hw_param_setinteger(struct snd_pcm_hw_params *params, { int changed; changed = snd_interval_setinteger(hw_param_interval(params, var)); - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 10e7ef7a8804..bfff8d001466 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1603,7 +1603,7 @@ static int _snd_pcm_hw_param_first(struct snd_pcm_hw_params *params, changed = snd_interval_refine_first(hw_param_interval(params, var)); else return -EINVAL; - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } @@ -1649,7 +1649,7 @@ static int _snd_pcm_hw_param_last(struct snd_pcm_hw_params *params, changed = snd_interval_refine_last(hw_param_interval(params, var)); else return -EINVAL; - if (changed) { + if (changed > 0) { params->cmask |= 1 << var; params->rmask |= 1 << var; } -- cgit v1.2.3 From 23263ec86a5f44312d2899323872468752324107 Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Mon, 25 Dec 2017 10:43:49 +0800 Subject: ip6_tunnel: disable dst caching if tunnel is dual-stack When an ip6_tunnel is in mode 'any', where the transport layer protocol can be either 4 or 41, dst_cache must be disabled. This is because xfrm policies might apply to only one of the two protocols. Caching dst would cause xfrm policies for one protocol incorrectly used for the other. Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 931c38f6ff4a..b263c809d8d4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } - } else if (!(t->parms.flags & - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { - /* enable the cache only only if the routing decision does - * not depend on the current inner header value + } else if (t->parms.proto != 0 && !(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | + IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only if neither the outer protocol nor the + * routing decision depends on the current inner header value */ use_cache = true; } -- cgit v1.2.3 From 52a589d51f1008f62569bf89e95b26221ee76690 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Dec 2017 14:43:58 +0800 Subject: geneve: update skb dst pmtu on tx path Commit a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") has fixed a performance issue caused by the change of lower dev's mtu for vxlan. The same thing needs to be done for geneve as well. Note that geneve cannot adjust it's mtu according to lower dev's mtu when creating it. The performance is very low later when netperfing over it without fixing the mtu manually. This patch could also avoid this issue. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/geneve.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b718a02a6bb6..0a48b3073d3d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(rt)) return PTR_ERR(rt); + if (skb_dst(skb)) { + int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); @@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (IS_ERR(dst)) return PTR_ERR(dst); + if (skb_dst(skb)) { + int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - + GENEVE_BASE_HLEN - info->options_len - 14; + + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + } + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); -- cgit v1.2.3 From 2fa771be953a17f8e0a9c39103464c2574444c62 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Dec 2017 14:45:12 +0800 Subject: ip6_tunnel: allow ip6gre dev mtu to be set below 1280 Commit 582442d6d5bc ("ipv6: Allow the MTU of ipip6 tunnel to be set below 1280") fixed a mtu setting issue. It works for ipip6 tunnel. But ip6gre dev updates the mtu also with ip6_tnl_change_mtu. Since the inner packet over ip6gre can be ipv4 and it's mtu should also be allowed to set below 1280, the same issue also exists on ip6gre. This patch is to fix it by simply changing to check if parms.proto is IPPROTO_IPV6 in ip6_tnl_change_mtu instead, to make ip6gre to go to 'else' branch. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b263c809d8d4..9a7cf355bc8c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1677,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); - if (tnl->parms.proto == IPPROTO_IPIP) { - if (new_mtu < ETH_MIN_MTU) + if (tnl->parms.proto == IPPROTO_IPV6) { + if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { - if (new_mtu < IPV6_MIN_MTU) + if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (new_mtu > 0xFFF8 - dev->hard_header_len) -- cgit v1.2.3 From 8764a8267b128405cf383157d5e9a4a3735d2409 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 08:57:35 +0100 Subject: mlxsw: spectrum_router: Fix NULL pointer deref When we remove the neighbour associated with a nexthop we should always refuse to write the nexthop to the adjacency table. Regardless if it is already present in the table or not. Otherwise, we risk dereferencing the NULL pointer that was set instead of the neighbour. Fixes: a7ff87acd995 ("mlxsw: spectrum_router: Implement next-hop routing") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index be657b8533f0..434b3922b34f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3228,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, { if (!removing) nh->should_offload = 1; - else if (nh->offloaded) + else nh->should_offload = 0; nh->update = 1; } -- cgit v1.2.3 From 90045fc9c78855bdc625a0ab185d97b72a937613 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 25 Dec 2017 09:05:33 +0100 Subject: mlxsw: spectrum: Relax sanity checks during enslavement Since commit 25cc72a33835 ("mlxsw: spectrum: Forbid linking to devices that have uppers") the driver forbids enslavement to netdevs that already have uppers of their own, as this can result in various ordering problems. This requirement proved to be too strict for some users who need to be able to enslave ports to a bridge that already has uppers. In this case, we can allow the enslavement if the bridge is already known to us, as any configuration performed on top of the bridge was already reflected to the device. Fixes: 25cc72a33835 ("mlxsw: spectrum: Forbid linking to devices that have uppers") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Tested-by: Alexander Petrovskiy Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 11 +++++++++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 ++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9bd8d28de152..c3837ca7a705 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4376,7 +4376,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; @@ -4504,6 +4507,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; struct net_device *upper_dev; @@ -4520,7 +4524,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) { + if (netdev_has_any_upper_dev(upper_dev) && + (!netif_is_bridge_master(upper_dev) || + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, + upper_dev))) { NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 432ab9b12b7f..05ce1befd9b3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -365,6 +365,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7b8548e25ae7..593ad31be749 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, return NULL; } +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); +} + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, struct net_device *br_dev) -- cgit v1.2.3 From 02a0d9216d4daf6a58d88642bd2da2c78c327552 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Tue, 2 Jan 2018 09:39:25 -0800 Subject: Input: xen-kbdfront - do not advertise multi-touch pressure support Some user-space applications expect multi-touch pressure on contact to be reported if it is advertised in device properties. Otherwise, such applications may treat reports not as actual touches, but hovering. Currently this is only advertised, but not reported. Fix this by not advertising that ABS_MT_PRESSURE is supported. Signed-off-by: Oleksandr Andrushchenko Signed-off-by: Andrii Chepurnyi Patchwork-Id: 10140017 Signed-off-by: Dmitry Torokhov --- drivers/input/misc/xen-kbdfront.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 6bf56bb5f8d9..d91f3b1c5375 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev, 0, width, 0, 0); input_set_abs_params(mtouch, ABS_MT_POSITION_Y, 0, height, 0, 0); - input_set_abs_params(mtouch, ABS_MT_PRESSURE, - 0, 255, 0, 0); ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT); if (ret) { -- cgit v1.2.3 From 5a371cf87e145b86efd32007e46146e78c1eff6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 31 Dec 2017 15:33:14 +0200 Subject: IB/mlx4: Fix mlx4_ib_alloc_mr error flow ibmr.device is being set only after ib_alloc_mr() is successfully complete. Therefore, in case imlx4_mr_enable() returns with error, the error flow unwinder calls to mlx4_free_priv_pages(), which uses ibmr.device. Such usage causes to NULL dereference oops and to fix it, the IB device should be set in the mr struct earlier stage (e.g. prior to calling mlx4_free_priv_pages()). Fixes: 1b2cd0fc673c ("IB/mlx4: Support the new memory registration API") Signed-off-by: Nitzan Carmi Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 313bfb9ccb71..4975f3e6596e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, goto err_free_mr; mr->max_pages = max_num_sg; - err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) goto err_free_pl; @@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, return &mr->ibmr; err_free_pl: + mr->ibmr.device = pd->device; mlx4_free_priv_pages(mr); err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); -- cgit v1.2.3 From 16ba3defb8bd01a9464ba4820a487f5b196b455b Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sun, 31 Dec 2017 15:33:15 +0200 Subject: IB/ipoib: Fix race condition in neigh creation When using enhanced mode for IPoIB, two threads may execute xmit in parallel to two different TX queues while the target is the same. In this case, both of them will add the same neighbor to the path's neigh link list and we might see the following message: list_add double add: new=ffff88024767a348, prev=ffff88024767a348... WARNING: lib/list_debug.c:31__list_add_valid+0x4e/0x70 ipoib_start_xmit+0x477/0x680 [ib_ipoib] dev_hard_start_xmit+0xb9/0x3e0 sch_direct_xmit+0xf9/0x250 __qdisc_run+0x176/0x5d0 __dev_queue_xmit+0x1f5/0xb10 __dev_queue_xmit+0x55/0xb10 Analysis: Two SKB are scheduled to be transmitted from two cores. In ipoib_start_xmit, both gets NULL when calling ipoib_neigh_get. Two calls to neigh_add_path are made. One thread takes the spin-lock and calls ipoib_neigh_alloc which creates the neigh structure, then (after the __path_find) the neigh is added to the path's neigh link list. When the second thread enters the critical section it also calls ipoib_neigh_alloc but in this case it gets the already allocated ipoib_neigh structure, which is already linked to the path's neigh link list and adds it again to the list. Which beside of triggering the list, it creates a loop in the linked list. This loop leads to endless loop inside path_rec_completion. Solution: Check list_empty(&neigh->list) before adding to the list. Add a similar fix in "ipoib_multicast.c::ipoib_mcast_send" Fixes: b63b70d87741 ('IPoIB: Use a private hash table for path lookup in xmit path') Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 25 ++++++++++++++++++------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 5 ++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 12b7f911f0e5..8880351df179 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev, return 0; } -static void neigh_add_path(struct sk_buff *skb, u8 *daddr, - struct net_device *dev) +static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr, + struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rdma_netdev *rn = netdev_priv(dev); @@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - return; + return NULL; + } + + /* To avoid race condition, make sure that the + * neigh will be added only once. + */ + if (unlikely(!list_empty(&neigh->list))) { + spin_unlock_irqrestore(&priv->lock, flags); + return neigh; } path = __path_find(dev, daddr + 4); @@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, path->ah->last_send = rn->send(dev, skb, path->ah->ah, IPOIB_QPN(daddr)); ipoib_neigh_put(neigh); - return; + return NULL; } } else { neigh->ah = NULL; @@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); - return; + return NULL; err_path: ipoib_neigh_free(neigh); @@ -983,6 +991,8 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); ipoib_neigh_put(neigh); + + return NULL; } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, @@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_TIPC): neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (unlikely(!neigh)) { - neigh_add_path(skb, phdr->hwaddr, dev); - return NETDEV_TX_OK; + neigh = neigh_add_path(skb, phdr->hwaddr, dev); + if (likely(!neigh)) + return NETDEV_TX_OK; } break; case htons(ETH_P_ARP): diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 93e149efc1f5..9b3f47ae2016 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) spin_lock_irqsave(&priv->lock, flags); if (!neigh) { neigh = ipoib_neigh_alloc(daddr, dev); - if (neigh) { + /* Make sure that the neigh will be added only + * once to mcast list. + */ + if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); -- cgit v1.2.3 From 2196881566225f3c3428d1a5f847a992944daa5b Mon Sep 17 00:00:00 2001 From: Aliaksei Karaliou Date: Thu, 21 Dec 2017 13:18:26 -0800 Subject: xfs: quota: fix missed destroy of qi_tree_lock xfs_qm_destroy_quotainfo() does not destroy quotainfo->qi_tree_lock while destroys quotainfo->qi_quotaofflock. Signed-off-by: Aliaksei Karaliou Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_qm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ec952dfad359..d0053115427f 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -736,6 +736,7 @@ xfs_qm_destroy_quotainfo( IRELE(qi->qi_pquotaip); qi->qi_pquotaip = NULL; } + mutex_destroy(&qi->qi_tree_lock); mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); mp->m_quotainfo = NULL; -- cgit v1.2.3 From 3a3882ff26fbdbaf5f7e13f6a0bccfbf7121041d Mon Sep 17 00:00:00 2001 From: Aliaksei Karaliou Date: Thu, 21 Dec 2017 13:18:26 -0800 Subject: xfs: quota: check result of register_shrinker() xfs_qm_init_quotainfo() does not check result of register_shrinker() which was tagged as __must_check recently, reported by sparse. Signed-off-by: Aliaksei Karaliou [darrick: move xfs_qm_destroy_quotainos nearer xfs_qm_init_quotainos] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_qm.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index d0053115427f..b897b11afb2c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -48,7 +48,7 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); - +STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it @@ -695,9 +695,17 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&qinf->qi_shrinker); + + error = register_shrinker(&qinf->qi_shrinker); + if (error) + goto out_free_inos; + return 0; +out_free_inos: + mutex_destroy(&qinf->qi_quotaofflock); + mutex_destroy(&qinf->qi_tree_lock); + xfs_qm_destroy_quotainos(qinf); out_free_lru: list_lru_destroy(&qinf->qi_lru); out_free_qinf: @@ -706,7 +714,6 @@ out_free_qinf: return error; } - /* * Gets called when unmounting a filesystem or when all quotas get * turned off. @@ -723,19 +730,7 @@ xfs_qm_destroy_quotainfo( unregister_shrinker(&qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - if (qi->qi_pquotaip) { - IRELE(qi->qi_pquotaip); - qi->qi_pquotaip = NULL; - } + xfs_qm_destroy_quotainos(qi); mutex_destroy(&qi->qi_tree_lock); mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); @@ -1600,6 +1595,24 @@ error_rele: return error; } +STATIC void +xfs_qm_destroy_quotainos( + xfs_quotainfo_t *qi) +{ + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + if (qi->qi_pquotaip) { + IRELE(qi->qi_pquotaip); + qi->qi_pquotaip = NULL; + } +} + STATIC void xfs_qm_dqfree_one( struct xfs_dquot *dqp) -- cgit v1.2.3 From b4d8ad7fd3a18e6d92d4ebe858185c704604a57d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 22 Dec 2017 13:14:34 -0800 Subject: xfs: fix s_maxbytes overflow problems Fix some integer overflow problems if offset + count happen to be large enough to cause an integer overflow. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_aops.c | 4 ++-- fs/xfs/xfs_iomap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 21e2d70884e1..4fc526a27a94 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - count) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - size) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7ab52a8bc0a9..66e1edbfb2b2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1006,7 +1006,7 @@ xfs_file_iomap_begin( } ASSERT(offset <= mp->m_super->s_maxbytes); - if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) + if (offset > mp->m_super->s_maxbytes - length) length = mp->m_super->s_maxbytes - offset; offset_fsb = XFS_B_TO_FSBT(mp, offset); end_fsb = XFS_B_TO_FSB(mp, offset + length); -- cgit v1.2.3 From 3bb23421a504f01551b7cb9dff0e41dbf16656b0 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Dec 2017 07:48:51 +0200 Subject: net/sched: Fix update of lastuse in act modules implementing stats_update We need to update lastuse to to the most updated value between what is already set and the new value. If HW matching fails, i.e. because of an issue, the stats are not updated but it could be that software did match and updated lastuse. Fixes: 5712bf9c5c30 ("net/sched: act_mirred: Use passed lastuse argument") Fixes: 9fea47d93bcc ("net/sched: act_gact: Update statistics when offloaded to hardware") Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_gact.c | 2 +- net/sched/act_mirred.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e29a48ef7fc3..a0ac42b3ed06 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 8b3e59388480..08b61849c2a2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, struct tcf_t *tm = &m->tcf_tm; _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); - tm->lastuse = lastuse; + tm->lastuse = max_t(u64, tm->lastuse, lastuse); } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, -- cgit v1.2.3 From d02fd6e7d2933ede6478a15f9e4ce8a93845824e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 26 Dec 2017 21:44:32 +0800 Subject: macvlan: Fix one possible double free Because the macvlan_uninit would free the macvlan port, so there is one double free case in macvlan_common_newlink. When the macvlan port is just created, then register_netdevice or netdev_upper_dev_link failed and they would invoke macvlan_uninit. Then it would reach the macvlan_port_destroy which triggers the double free. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a178c5efd33e..a0f2be81d52e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: + /* macvlan_uninit would free the macvlan port */ unregister_netdevice(dev); + return err; destroy_macvlan_port: - if (create) + /* the macvlan port may be freed by macvlan_uninit when fail to register. + * so we destroy the macvlan port only when it's valid. + */ + if (create && macvlan_port_get_rtnl(dev)) macvlan_port_destroy(port->dev); return err; } -- cgit v1.2.3 From ac817f5ad066697e4d4d35ec68c974eba2c5f17a Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Dec 2017 23:15:12 +0000 Subject: phylink: ensure we report link down when LOS asserted Although we disable the netdev carrier, we fail to report in the kernel log that the link went down. Fix this. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 827f3f92560e..150cd95a6e1e 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1429,9 +1429,8 @@ static void phylink_sfp_link_down(void *upstream) WARN_ON(!lockdep_rtnl_is_held()); set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state); + queue_work(system_power_efficient_wq, &pl->resolve); flush_work(&pl->resolve); - - netif_carrier_off(pl->netdev); } static void phylink_sfp_link_up(void *upstream) -- cgit v1.2.3 From 0b2122e4934c7783d336397864e34ee53aad0965 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 26 Dec 2017 23:15:17 +0000 Subject: sfp: fix sfp-bus oops when removing socket/upstream When we remove a socket or upstream, and the other side isn't registered, we dereference a NULL pointer, causing a kernel oops. Fix this. Fixes: ce0aa27ff3f6 ("sfp: add sfp-bus to bridge between network devices and sfp cages") Signed-off-by: Russell King Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 8a1b1f4c1b7c..ab64a142b832 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -356,7 +356,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream); void sfp_unregister_upstream(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->sfp) + sfp_unregister_bus(bus); bus->upstream = NULL; bus->netdev = NULL; rtnl_unlock(); @@ -459,7 +460,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket); void sfp_unregister_socket(struct sfp_bus *bus) { rtnl_lock(); - sfp_unregister_bus(bus); + if (bus->netdev) + sfp_unregister_bus(bus); bus->sfp_dev = NULL; bus->sfp = NULL; bus->socket_ops = NULL; -- cgit v1.2.3 From 0b76aae741abb9d16d2c0e67f8b1e766576f897d Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 6 Dec 2017 02:26:29 +0530 Subject: e1000: fix disabling already-disabled warning This patch adds check so that driver does not disable already disabled device. [ 44.637743] advantechwdt: Unexpected close, not stopping watchdog! [ 44.997548] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input6 [ 45.013419] e1000 0000:00:03.0: disabling already-disabled device [ 45.013447] ------------[ cut here ]------------ [ 45.014868] WARNING: CPU: 1 PID: 71 at drivers/pci/pci.c:1641 pci_disable_device+0xa1/0x105: pci_disable_device at drivers/pci/pci.c:1640 [ 45.016171] CPU: 1 PID: 71 Comm: rcu_perf_shutdo Not tainted 4.14.0-01330-g3c07399 #1 [ 45.017197] task: ffff88011bee9e40 task.stack: ffffc90000860000 [ 45.017987] RIP: 0010:pci_disable_device+0xa1/0x105: pci_disable_device at drivers/pci/pci.c:1640 [ 45.018603] RSP: 0000:ffffc90000863e30 EFLAGS: 00010286 [ 45.019282] RAX: 0000000000000035 RBX: ffff88013a230008 RCX: 0000000000000000 [ 45.020182] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000203 [ 45.021084] RBP: ffff88013a3f31e8 R08: 0000000000000001 R09: 0000000000000000 [ 45.021986] R10: ffffffff827ec29c R11: 0000000000000002 R12: 0000000000000001 [ 45.022946] R13: ffff88013a230008 R14: ffff880117802b20 R15: ffffc90000863e8f [ 45.023842] FS: 0000000000000000(0000) GS:ffff88013fd00000(0000) knlGS:0000000000000000 [ 45.024863] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 45.025583] CR2: ffffc900006d4000 CR3: 000000000220f000 CR4: 00000000000006a0 [ 45.026478] Call Trace: [ 45.026811] __e1000_shutdown+0x1d4/0x1e2: __e1000_shutdown at drivers/net/ethernet/intel/e1000/e1000_main.c:5162 [ 45.027344] ? rcu_perf_cleanup+0x2a1/0x2a1: rcu_perf_shutdown at kernel/rcu/rcuperf.c:627 [ 45.027883] e1000_shutdown+0x14/0x3a: e1000_shutdown at drivers/net/ethernet/intel/e1000/e1000_main.c:5235 [ 45.028351] device_shutdown+0x110/0x1aa: device_shutdown at drivers/base/core.c:2807 [ 45.028858] kernel_power_off+0x31/0x64: kernel_power_off at kernel/reboot.c:260 [ 45.029343] rcu_perf_shutdown+0x9b/0xa7: rcu_perf_shutdown at kernel/rcu/rcuperf.c:637 [ 45.029852] ? __wake_up_common_lock+0xa2/0xa2: autoremove_wake_function at kernel/sched/wait.c:376 [ 45.030414] kthread+0x126/0x12e: kthread at kernel/kthread.c:233 [ 45.030834] ? __kthread_bind_mask+0x8e/0x8e: kthread at kernel/kthread.c:190 [ 45.031399] ? ret_from_fork+0x1f/0x30: ret_from_fork at arch/x86/entry/entry_64.S:443 [ 45.031883] ? kernel_init+0xa/0xf5: kernel_init at init/main.c:997 [ 45.032325] ret_from_fork+0x1f/0x30: ret_from_fork at arch/x86/entry/entry_64.S:443 [ 45.032777] Code: 00 48 85 ed 75 07 48 8b ab a8 00 00 00 48 8d bb 98 00 00 00 e8 aa d1 11 00 48 89 ea 48 89 c6 48 c7 c7 d8 e4 0b 82 e8 55 7d da ff <0f> ff b9 01 00 00 00 31 d2 be 01 00 00 00 48 c7 c7 f0 b1 61 82 [ 45.035222] ---[ end trace c257137b1b1976ef ]--- [ 45.037838] ACPI: Preparing to enter system sleep state S5 Signed-off-by: Tushar Dave Tested-by: Fengguang Wu Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 3 ++- drivers/net/ethernet/intel/e1000/e1000_main.c | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index d7bdea79e9fa..8fd2458060a0 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -331,7 +331,8 @@ struct e1000_adapter { enum e1000_state_t { __E1000_TESTING, __E1000_RESETTING, - __E1000_DOWN + __E1000_DOWN, + __E1000_DISABLED }; #undef pr_fmt diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 1982f7917a8d..3dd4aeb2706d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter, static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; - struct e1000_adapter *adapter; + struct e1000_adapter *adapter = NULL; struct e1000_hw *hw; static int cards_found; @@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 tmp = 0; u16 eeprom_apme_mask = E1000_EEPROM_APME; int bars, need_ioport; + bool disable_dev = false; /* do not allocate ioport bars when not needed */ need_ioport = e1000_is_need_ioport(pdev); @@ -1259,11 +1260,13 @@ err_mdio_ioremap: iounmap(hw->ce4100_gbe_mdio_base_virt); iounmap(hw->hw_addr); err_ioremap: + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, bars); err_pci_reg: - pci_disable_device(pdev); + if (!adapter || disable_dev) + pci_disable_device(pdev); return err; } @@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + bool disable_dev; e1000_down_and_stop(adapter); e1000_release_manageability(adapter); @@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev) iounmap(hw->flash_address); pci_release_selected_regions(pdev, adapter->bars); + disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags); free_netdev(netdev); - pci_disable_device(pdev); + if (disable_dev) + pci_disable_device(pdev); } /** @@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) if (netif_running(netdev)) e1000_free_irq(adapter); - pci_disable_device(pdev); + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); return 0; } @@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev) pr_err("Cannot enable PCI device from suspend\n"); return err; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); @@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) e1000_down(adapter); - pci_disable_device(pdev); + + if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags)) + pci_disable_device(pdev); /* Request a slot slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev) pr_err("Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT; } + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__E1000_DISABLED, &adapter->flags); pci_set_master(pdev); pci_enable_wake(pdev, PCI_D3hot, 0); -- cgit v1.2.3 From 4110e02eb45ea447ec6f5459c9934de0a273fb91 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 11 Dec 2017 16:26:40 +0900 Subject: e1000e: Fix e1000_check_for_copper_link_ich8lan return value. e1000e_check_for_copper_link() and e1000_check_for_copper_link_ich8lan() are the two functions that may be assigned to mac.ops.check_for_link when phy.media_type == e1000_media_type_copper. Commit 19110cfbb34d ("e1000e: Separate signaling for link check/link up") changed the meaning of the return value of check_for_link for copper media but only adjusted the first function. This patch adjusts the second function likewise. Reported-by: Christian Hesse Reported-by: Gabriel C Link: https://bugzilla.kernel.org/show_bug.cgi?id=198047 Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up") Signed-off-by: Benjamin Poirier Tested-by: Aaron Brown Tested-by: Christian Hesse Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index d6d4ed7acf03..31277d3bb7dc 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1367,6 +1367,9 @@ out: * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) { @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) -- cgit v1.2.3 From bd30ffc414e55194ed6149fad69a145550cb7c18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZ=20Lin=20=28=E6=9E=97=E4=B8=8A=E6=99=BA=29?= Date: Fri, 29 Dec 2017 17:02:17 +0800 Subject: NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for PID 0x9625 of YUGA CLM920-NC5. YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation. qmicli -d /dev/cdc-wdm0 -p --dms-get-revision [/dev/cdc-wdm0] Device revision retrieved: Revision: 'CLM920_NC5-V1 1 [Oct 23 2016 19:00:00]' Signed-off-by: SZ Lin (林上智) Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 3000ddd1c7e2..728819feab44 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ -- cgit v1.2.3 From 807fc072991861ff0cd7ac44267ff1dd76ef316e Mon Sep 17 00:00:00 2001 From: Yue Hin Lau Date: Fri, 29 Dec 2017 11:11:18 +0000 Subject: drm/amd/display: call set csc_default if enable adjustment is false Fixes a greenish tint on RV displays. Signed-off-by: Yue Hin Lau Reviewed-by: Eric Bernstein Acked-by: Harry Wentland Signed-off-by: Alex Deucher [drake@endlessm.com: backport to 4.15] Signed-off-by: Daniel Drake Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h | 2 +- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c | 6 ++---- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 2 ++ drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index a9782b1aba47..34daf895f848 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -1360,7 +1360,7 @@ void dpp1_cm_set_output_csc_adjustment( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void dpp1_cm_set_gamut_remap( struct dpp *dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 40627c244bf5..ed1216b53465 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -225,14 +225,13 @@ void dpp1_cm_set_gamut_remap( void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, - const struct default_adjustment *default_adjust) + enum dc_color_space colorspace) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); uint32_t ocsc_mode = 0; - if (default_adjust != NULL) { - switch (default_adjust->out_color_space) { + switch (colorspace) { case COLOR_SPACE_SRGB: case COLOR_SPACE_2020_RGB_FULLRANGE: ocsc_mode = 0; @@ -253,7 +252,6 @@ void dpp1_cm_set_output_csc_default( case COLOR_SPACE_UNKNOWN: default: break; - } } REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 961ad5c3b454..05dc01e54531 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2097,6 +2097,8 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, tbl_entry.color_space = color_space; //tbl_entry.regval = matrix; pipe_ctx->plane_res.dpp->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry); + } else { + pipe_ctx->plane_res.dpp->funcs->opp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace); } } static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 83a68460edcd..9420dfb94d39 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -64,7 +64,7 @@ struct dpp_funcs { void (*opp_set_csc_default)( struct dpp *dpp, - const struct default_adjustment *default_adjust); + enum dc_color_space colorspace); void (*opp_set_csc_adjustment)( struct dpp *dpp, -- cgit v1.2.3 From 19d859a7205bc59ffc38303eb25ae394f61d21dc Mon Sep 17 00:00:00 2001 From: Xiongwei Song Date: Tue, 2 Jan 2018 21:24:55 +0800 Subject: drm/ttm: check the return value of kzalloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the function ttm_page_alloc_init, kzalloc call is made for variable _manager, we need to check its return value, it may return NULL. Signed-off-by: Xiongwei Song Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index b5ba6441489f..5d252fb27a82 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -1007,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) pr_info("Initializing pool allocator\n"); _manager = kzalloc(sizeof(*_manager), GFP_KERNEL); + if (!_manager) + return -ENOMEM; ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0); -- cgit v1.2.3 From 0ae60d0c4f191c4241377cc3fc5931dc90ca3bbd Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:40:21 +0100 Subject: parisc: Show unhashed hardware inventory Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Helge Deller --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index d8f77358e2ba..29b99b8964aa 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); -- cgit v1.2.3 From 63b2c373137b16d948b08cffacc6abfcf4cffea6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:42:59 +0100 Subject: parisc: Show initial kernel memory layout unhashed Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Helge Deller --- arch/parisc/mm/init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49..48f41399fc0b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -631,11 +631,11 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" - " vmalloc : 0x%p - 0x%p (%4ld MB)\n" - " memory : 0x%p - 0x%p (%4ld MB)\n" - " .init : 0x%p - 0x%p (%4ld kB)\n" - " .data : 0x%p - 0x%p (%4ld kB)\n" - " .text : 0x%p - 0x%p (%4ld kB)\n", + " vmalloc : 0x%px - 0x%px (%4ld MB)\n" + " memory : 0x%px - 0x%px (%4ld MB)\n" + " .init : 0x%px - 0x%px (%4ld kB)\n" + " .data : 0x%px - 0x%px (%4ld kB)\n" + " .text : 0x%px - 0x%px (%4ld kB)\n", (void*)VMALLOC_START, (void*)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, -- cgit v1.2.3 From 04903c06b4854d2e85f6e3c368d5d48c4ce55f09 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:45:42 +0100 Subject: parisc: Show unhashed HPA of Dino chip Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 0b3fb99d9b89..7390fb8ca9d1 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d) struct dino_device *dino_dev = irq_data_get_irq_chip_data(d); int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* Clear the matching bit in the IMR register */ dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq)); @@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d) int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS); u32 tmp; - DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq); + DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq); /* ** clear pending IRQ bits @@ -396,7 +396,7 @@ ilr_again: if (mask) { if (--ilr_loop > 0) goto ilr_again; - printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", + printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n", dino_dev->hba.base_addr, mask); return IRQ_NONE; } @@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus) struct pci_dev *dev; struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge)); - DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n", + DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n", __func__, bus, bus->busn_res.start, bus->bridge->platform_data); @@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev, res->flags = IORESOURCE_IO; /* do not mark it busy ! */ if (request_resource(&ioport_resource, res) < 0) { printk(KERN_ERR "%s: request I/O Port region failed " - "0x%lx/%lx (hpa 0x%p)\n", + "0x%lx/%lx (hpa 0x%px)\n", name, (unsigned long)res->start, (unsigned long)res->end, dino_dev->hba.base_addr); return 1; -- cgit v1.2.3 From 28df2f83c39554d9e64cd9d2a93b8e28e24df5b7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:47:01 +0100 Subject: parisc: Show unhashed EISA EEPROM address Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Helge Deller --- drivers/parisc/eisa_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/eisa_eeprom.c b/drivers/parisc/eisa_eeprom.c index 4dd9b1308128..99a80da6fd2e 100644 --- a/drivers/parisc/eisa_eeprom.c +++ b/drivers/parisc/eisa_eeprom.c @@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void) return retval; } - printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr); + printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr); return 0; } -- cgit v1.2.3 From f8978bd95cf92f869f3d9b34c1b699f49253b8c6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 1 Jan 2018 13:07:15 +0200 Subject: RDMA/netlink: Fix locking around __ib_get_device_by_index Holding locks is mandatory when calling __ib_device_get_by_index, otherwise there are races during the list iteration with device removal. Since the locks are static to device.c, __ib_device_get_by_index can never be called correctly by any user out side the file. Make the function static and provide a safe function that gets the correct locks and returns a kref'd pointer. Fix all callers. Fixes: e5c9469efcb1 ("RDMA/netlink: Add nldev device doit implementation") Fixes: c3f66f7b0052 ("RDMA/netlink: Implement nldev port doit callback") Fixes: 7d02f605f0dc ("RDMA/netlink: Add nldev port dumpit implementation") Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 +- drivers/infiniband/core/device.c | 18 ++++++++++++- drivers/infiniband/core/nldev.c | 54 ++++++++++++++++++++++++------------- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1d687a664f8..66f0268f37a6 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, } #endif -struct ib_device *__ib_device_get_by_index(u32 ifindex); +struct ib_device *ib_device_get_by_index(u32 ifindex); /* RDMA device netlink */ void nldev_init(void); void nldev_exit(void); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 30914f3baa5f..465520627e4b 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } -struct ib_device *__ib_device_get_by_index(u32 index) +static struct ib_device *__ib_device_get_by_index(u32 index) { struct ib_device *device; @@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index) return NULL; } +/* + * Caller is responsible to return refrerence count by calling put_device() + */ +struct ib_device *ib_device_get_by_index(u32 index) +{ + struct ib_device *device; + + down_read(&lists_rwsem); + device = __ib_device_get_by_index(index); + if (device) + get_device(&device->dev); + + up_read(&lists_rwsem); + return device; +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 9a05245a1acf..0dcd1aa6f683 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_dev_info(msg, device); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int _nldev_get_dumpit(struct ib_device *device, @@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(index); + device = ib_device_get_by_index(index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); - if (!rdma_is_port_valid(device, port)) - return -EINVAL; + if (!rdma_is_port_valid(device, port)) { + err = -EINVAL; + goto err; + } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; + if (!msg) { + err = -ENOMEM; + goto err; + } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); err = fill_port_info(msg, device, port); - if (err) { - nlmsg_free(msg); - return err; - } + if (err) + goto err_free; nlmsg_end(msg, nlh); + put_device(&device->dev); return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); + +err_free: + nlmsg_free(msg); +err: + put_device(&device->dev); + return err; } static int nldev_port_get_dumpit(struct sk_buff *skb, @@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); - device = __ib_device_get_by_index(ifindex); + device = ib_device_get_by_index(ifindex); if (!device) return -EINVAL; @@ -299,7 +315,9 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, nlmsg_end(skb, nlh); } -out: cb->args[0] = idx; +out: + put_device(&device->dev); + cb->args[0] = idx; return skb->len; } -- cgit v1.2.3 From 88776c0e70be0290f8357019d844aae15edaa967 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 2 Jan 2018 20:36:44 +0100 Subject: parisc: Fix alignment of pa_tlb_lock in assembly on 32-bit SMP kernel Qemu for PARISC reported on a 32bit SMP parisc kernel strange failures about "Not-handled unaligned insn 0x0e8011d6 and 0x0c2011c9." Those opcodes evaluate to the ldcw() assembly instruction which requires (on 32bit) an alignment of 16 bytes to ensure atomicity. As it turns out, qemu is correct and in our assembly code in entry.S and pacache.S we don't pay attention to the required alignment. This patch fixes the problem by aligning the lock offset in assembly code in the same manner as we do in our C-code. Signed-off-by: Helge Deller Cc: # v4.0+ --- arch/parisc/include/asm/ldcw.h | 2 ++ arch/parisc/kernel/entry.S | 13 +++++++++++-- arch/parisc/kernel/pacache.S | 9 +++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index dd5a08aaa4da..3eb4bfc1fb36 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -12,6 +12,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -29,6 +30,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f3cecf5117cf..e95207c0565e 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f8951..2d40c4ff3f69 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f -- cgit v1.2.3 From ee249f271524d111aed8d6e7c61e220aa6b4d714 Mon Sep 17 00:00:00 2001 From: Lei YU Date: Mon, 13 Nov 2017 11:27:33 +0800 Subject: hwmon: Add W83773G driver Nuvoton W83773G is a hardware monitor IC providing one local temperature and two remote temperature sensors. Signed-off-by: Lei YU Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 10 ++ drivers/hwmon/Makefile | 1 + drivers/hwmon/w83773g.c | 329 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 drivers/hwmon/w83773g.c diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 7ad017690e3a..530ff7c9234c 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1725,6 +1725,16 @@ config SENSORS_VT8231 This driver can also be built as a module. If so, the module will be called vt8231. +config SENSORS_W83773G + tristate "Nuvoton W83773G" + depends on I2C + help + If you say yes here you get support for the Nuvoton W83773G hardware + monitoring chip. + + This driver can also be built as a module. If so, the module + will be called w83773g. + config SENSORS_W83781D tristate "Winbond W83781D, W83782D, W83783S, Asus AS99127F" depends on I2C diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 0fe489fab663..f814b4ace138 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_SENSORS_ATK0110) += asus_atk0110.o # asb100, then w83781d go first, as they can override other drivers' addresses. obj-$(CONFIG_SENSORS_ASB100) += asb100.o obj-$(CONFIG_SENSORS_W83627HF) += w83627hf.o +obj-$(CONFIG_SENSORS_W83773G) += w83773g.o obj-$(CONFIG_SENSORS_W83792D) += w83792d.o obj-$(CONFIG_SENSORS_W83793) += w83793.o obj-$(CONFIG_SENSORS_W83795) += w83795.o diff --git a/drivers/hwmon/w83773g.c b/drivers/hwmon/w83773g.c new file mode 100644 index 000000000000..0b97c285b049 --- /dev/null +++ b/drivers/hwmon/w83773g.c @@ -0,0 +1,329 @@ +/* + * Copyright (C) 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Driver for the Nuvoton W83773G SMBus temperature sensor IC. + * Supported models: W83773G + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* W83773 has 3 channels */ +#define W83773_CHANNELS 3 + +/* The W83773 registers */ +#define W83773_CONVERSION_RATE_REG_READ 0x04 +#define W83773_CONVERSION_RATE_REG_WRITE 0x0A +#define W83773_MANUFACTURER_ID_REG 0xFE +#define W83773_LOCAL_TEMP 0x00 + +static const u8 W83773_STATUS[2] = { 0x02, 0x17 }; + +static const u8 W83773_TEMP_LSB[2] = { 0x10, 0x25 }; +static const u8 W83773_TEMP_MSB[2] = { 0x01, 0x24 }; + +static const u8 W83773_OFFSET_LSB[2] = { 0x12, 0x16 }; +static const u8 W83773_OFFSET_MSB[2] = { 0x11, 0x15 }; + +/* this is the number of sensors in the device */ +static const struct i2c_device_id w83773_id[] = { + { "w83773g" }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, w83773_id); + +static const struct of_device_id w83773_of_match[] = { + { + .compatible = "nuvoton,w83773g" + }, + { }, +}; +MODULE_DEVICE_TABLE(of, w83773_of_match); + +static inline long temp_of_local(s8 reg) +{ + return reg * 1000; +} + +static inline long temp_of_remote(s8 hb, u8 lb) +{ + return (hb << 3 | lb >> 5) * 125; +} + +static int get_local_temp(struct regmap *regmap, long *val) +{ + unsigned int regval; + int ret; + + ret = regmap_read(regmap, W83773_LOCAL_TEMP, ®val); + if (ret < 0) + return ret; + + *val = temp_of_local(regval); + return 0; +} + +static int get_remote_temp(struct regmap *regmap, int index, long *val) +{ + unsigned int regval_high; + unsigned int regval_low; + int ret; + + ret = regmap_read(regmap, W83773_TEMP_MSB[index], ®val_high); + if (ret < 0) + return ret; + + ret = regmap_read(regmap, W83773_TEMP_LSB[index], ®val_low); + if (ret < 0) + return ret; + + *val = temp_of_remote(regval_high, regval_low); + return 0; +} + +static int get_fault(struct regmap *regmap, int index, long *val) +{ + unsigned int regval; + int ret; + + ret = regmap_read(regmap, W83773_STATUS[index], ®val); + if (ret < 0) + return ret; + + *val = (u8)regval & 0x04 >> 2; + return 0; +} + +static int get_offset(struct regmap *regmap, int index, long *val) +{ + unsigned int regval_high; + unsigned int regval_low; + int ret; + + ret = regmap_read(regmap, W83773_OFFSET_MSB[index], ®val_high); + if (ret < 0) + return ret; + + ret = regmap_read(regmap, W83773_OFFSET_LSB[index], ®val_low); + if (ret < 0) + return ret; + + *val = temp_of_remote(regval_high, regval_low); + return 0; +} + +static int set_offset(struct regmap *regmap, int index, long val) +{ + int ret; + u8 high_byte; + u8 low_byte; + + val = clamp_val(val, -127825, 127825); + /* offset value equals to (high_byte << 3 | low_byte >> 5) * 125 */ + val /= 125; + high_byte = val >> 3; + low_byte = (val & 0x07) << 5; + + ret = regmap_write(regmap, W83773_OFFSET_MSB[index], high_byte); + if (ret < 0) + return ret; + + return regmap_write(regmap, W83773_OFFSET_LSB[index], low_byte); +} + +static int get_update_interval(struct regmap *regmap, long *val) +{ + unsigned int regval; + int ret; + + ret = regmap_read(regmap, W83773_CONVERSION_RATE_REG_READ, ®val); + if (ret < 0) + return ret; + + *val = 16000 >> regval; + return 0; +} + +static int set_update_interval(struct regmap *regmap, long val) +{ + int rate; + + /* + * For valid rates, interval can be calculated as + * interval = (1 << (8 - rate)) * 62.5; + * Rounded rate is therefore + * rate = 8 - __fls(interval * 8 / (62.5 * 7)); + * Use clamp_val() to avoid overflows, and to ensure valid input + * for __fls. + */ + val = clamp_val(val, 62, 16000) * 10; + rate = 8 - __fls((val * 8 / (625 * 7))); + return regmap_write(regmap, W83773_CONVERSION_RATE_REG_WRITE, rate); +} + +static int w83773_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct regmap *regmap = dev_get_drvdata(dev); + + if (type == hwmon_chip) { + if (attr == hwmon_chip_update_interval) + return get_update_interval(regmap, val); + return -EOPNOTSUPP; + } + + switch (attr) { + case hwmon_temp_input: + if (channel == 0) + return get_local_temp(regmap, val); + return get_remote_temp(regmap, channel - 1, val); + case hwmon_temp_fault: + return get_fault(regmap, channel - 1, val); + case hwmon_temp_offset: + return get_offset(regmap, channel - 1, val); + default: + return -EOPNOTSUPP; + } +} + +static int w83773_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct regmap *regmap = dev_get_drvdata(dev); + + if (type == hwmon_chip && attr == hwmon_chip_update_interval) + return set_update_interval(regmap, val); + + if (type == hwmon_temp && attr == hwmon_temp_offset) + return set_offset(regmap, channel - 1, val); + + return -EOPNOTSUPP; +} + +static umode_t w83773_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (type) { + case hwmon_chip: + switch (attr) { + case hwmon_chip_update_interval: + return 0644; + } + break; + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_fault: + return 0444; + case hwmon_temp_offset: + return 0644; + } + break; + default: + break; + } + return 0; +} + +static const u32 w83773_chip_config[] = { + HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL, + 0 +}; + +static const struct hwmon_channel_info w83773_chip = { + .type = hwmon_chip, + .config = w83773_chip_config, +}; + +static const u32 w83773_temp_config[] = { + HWMON_T_INPUT, + HWMON_T_INPUT | HWMON_T_FAULT | HWMON_T_OFFSET, + HWMON_T_INPUT | HWMON_T_FAULT | HWMON_T_OFFSET, + 0 +}; + +static const struct hwmon_channel_info w83773_temp = { + .type = hwmon_temp, + .config = w83773_temp_config, +}; + +static const struct hwmon_channel_info *w83773_info[] = { + &w83773_chip, + &w83773_temp, + NULL +}; + +static const struct hwmon_ops w83773_ops = { + .is_visible = w83773_is_visible, + .read = w83773_read, + .write = w83773_write, +}; + +static const struct hwmon_chip_info w83773_chip_info = { + .ops = &w83773_ops, + .info = w83773_info, +}; + +static const struct regmap_config w83773_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int w83773_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct device *hwmon_dev; + struct regmap *regmap; + int ret; + + regmap = devm_regmap_init_i2c(client, &w83773_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "failed to allocate register map\n"); + return PTR_ERR(regmap); + } + + /* Set the conversion rate to 2 Hz */ + ret = regmap_write(regmap, W83773_CONVERSION_RATE_REG_WRITE, 0x05); + if (ret < 0) { + dev_err(&client->dev, "error writing config rate register\n"); + return ret; + } + + i2c_set_clientdata(client, regmap); + + hwmon_dev = devm_hwmon_device_register_with_info(dev, + client->name, + regmap, + &w83773_chip_info, + NULL); + return PTR_ERR_OR_ZERO(hwmon_dev); +} + +static struct i2c_driver w83773_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "w83773g", + .of_match_table = of_match_ptr(w83773_of_match), + }, + .probe = w83773_probe, + .id_table = w83773_id, +}; + +module_i2c_driver(w83773_driver); + +MODULE_AUTHOR("Lei YU "); +MODULE_DESCRIPTION("W83773G temperature sensor driver"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From ba3d8588442dbf954ff4aa77fbb16bfecc7e5b98 Mon Sep 17 00:00:00 2001 From: Lei YU Date: Mon, 13 Nov 2017 11:27:34 +0800 Subject: hwmon: (w83773g) Add documentation Add documentation for the w83773g driver. Signed-off-by: Lei YU Signed-off-by: Guenter Roeck --- Documentation/hwmon/w83773g | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/hwmon/w83773g diff --git a/Documentation/hwmon/w83773g b/Documentation/hwmon/w83773g new file mode 100644 index 000000000000..4cc6c0b8257f --- /dev/null +++ b/Documentation/hwmon/w83773g @@ -0,0 +1,33 @@ +Kernel driver w83773g +==================== + +Supported chips: + * Nuvoton W83773G + Prefix: 'w83773g' + Addresses scanned: I2C 0x4c and 0x4d + Datasheet: https://www.nuvoton.com/resource-files/W83773G_SG_DatasheetV1_2.pdf + +Authors: + Lei YU + +Description +----------- + +This driver implements support for Nuvoton W83773G temperature sensor +chip. This chip implements one local and two remote sensors. +The chip also features offsets for the two remote sensors which get added to +the input readings. The chip does all the scaling by itself and the driver +therefore reports true temperatures that don't need any user-space adjustments. +Temperature is measured in degrees Celsius. +The chip is wired over I2C/SMBus and specified over a temperature +range of -40 to +125 degrees Celsius (for local sensor) and -40 to +127 +degrees Celsius (for remote sensors). +Resolution for both the local and remote channels is 0.125 degree C. + +The chip supports only temperature measurement. The driver exports +the temperature values via the following sysfs files: + +temp[1-3]_input +temp[2-3]_fault +temp[2-3]_offset +update_interval -- cgit v1.2.3 From 6335d98abd942bfb28d908f712b7c7d73aaea871 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 19 Nov 2017 09:14:50 -0800 Subject: hwmon: Drop unnecessary 'default n' from Kconfig 'default n' is default, so there is no need to specify it explicitly. Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 530ff7c9234c..ef23553ff5cb 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -26,11 +26,9 @@ if HWMON config HWMON_VID tristate - default n config HWMON_DEBUG_CHIP bool "Hardware Monitoring Chip debugging messages" - default n help Say Y here if you want the I2C chip drivers to produce a bunch of debug messages to the system log. Select this if you are having @@ -42,7 +40,6 @@ comment "Native drivers" config SENSORS_AB8500 tristate "AB8500 thermal monitoring" depends on AB8500_GPADC && AB8500_BM - default n help If you say yes here you get support for the thermal sensor part of the AB8500 chip. The driver includes thermal management for @@ -302,7 +299,6 @@ config SENSORS_APPLESMC select NEW_LEDS select LEDS_CLASS select INPUT_POLLDEV - default n help This driver provides support for the Apple System Management Controller, which provides an accelerometer (Apple Sudden Motion @@ -678,7 +674,6 @@ config SENSORS_JC42 config SENSORS_POWR1220 tristate "Lattice POWR1220 Power Monitoring" depends on I2C - default n help If you say yes here you get access to the hardware monitoring functions of the Lattice POWR1220 isp Power Supply Monitoring, @@ -702,7 +697,6 @@ config SENSORS_LTC2945 tristate "Linear Technology LTC2945" depends on I2C select REGMAP_I2C - default n help If you say yes here you get support for Linear Technology LTC2945 I2C System Monitor. @@ -727,7 +721,6 @@ config SENSORS_LTC2990 config SENSORS_LTC4151 tristate "Linear Technology LTC4151" depends on I2C - default n help If you say yes here you get support for Linear Technology LTC4151 High Voltage I2C Current and Voltage Monitor interface. @@ -738,7 +731,6 @@ config SENSORS_LTC4151 config SENSORS_LTC4215 tristate "Linear Technology LTC4215" depends on I2C - default n help If you say yes here you get support for Linear Technology LTC4215 Hot Swap Controller I2C interface. @@ -750,7 +742,6 @@ config SENSORS_LTC4222 tristate "Linear Technology LTC4222" depends on I2C select REGMAP_I2C - default n help If you say yes here you get support for Linear Technology LTC4222 Dual Hot Swap Controller I2C interface. @@ -761,7 +752,6 @@ config SENSORS_LTC4222 config SENSORS_LTC4245 tristate "Linear Technology LTC4245" depends on I2C - default n help If you say yes here you get support for Linear Technology LTC4245 Multiple Supply Hot Swap Controller I2C interface. @@ -773,7 +763,6 @@ config SENSORS_LTC4260 tristate "Linear Technology LTC4260" depends on I2C select REGMAP_I2C - default n help If you say yes here you get support for Linear Technology LTC4260 Positive Voltage Hot Swap Controller I2C interface. @@ -784,7 +773,6 @@ config SENSORS_LTC4260 config SENSORS_LTC4261 tristate "Linear Technology LTC4261" depends on I2C - default n help If you say yes here you get support for Linear Technology LTC4261 Negative Voltage Hot Swap Controller I2C interface. @@ -1276,7 +1264,6 @@ config SENSORS_NSA320 config SENSORS_PCF8591 tristate "Philips PCF8591 ADC/DAC" depends on I2C - default n help If you say yes here you get support for Philips PCF8591 4-channel ADC, 1-channel DAC chips. @@ -1459,7 +1446,6 @@ config SENSORS_SMSC47B397 config SENSORS_SCH56XX_COMMON tristate - default n config SENSORS_SCH5627 tristate "SMSC SCH5627" @@ -1505,7 +1491,6 @@ config SENSORS_STTS751 config SENSORS_SMM665 tristate "Summit Microelectronics SMM665" depends on I2C - default n help If you say yes here you get support for the hardware monitoring features of the Summit Microelectronics SMM665/SMM665B Six-Channel @@ -1792,7 +1777,6 @@ config SENSORS_W83795 config SENSORS_W83795_FANCTRL bool "Include automatic fan control support (DANGEROUS)" depends on SENSORS_W83795 - default n help If you say yes here, support for automatic fan speed control will be included in the driver. -- cgit v1.2.3 From b9ccff233e5eb1ae6adc831c0aa1f456d0cbc5cb Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 22 Nov 2017 00:30:56 -0500 Subject: hwmon: (coretemp) deprecate pci_get_bus_and_slot() pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Use pci_get_domain_bus_and_slot() with a domain number of 0 where we can't extract the domain number. Other places, use the actual domain number from the device. Signed-off-by: Sinan Kaya Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index c13a4fd86b3c..4bdbf77f7197 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -246,7 +246,8 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) int err; u32 eax, edx; int i; - struct pci_dev *host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + u16 devfn = PCI_DEVFN(0, 0); + struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); /* * Explicit tjmax table entries override heuristics. -- cgit v1.2.3 From 666c14906b496f148e437404283f6a6a84cee719 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 28 Nov 2017 18:33:06 -0800 Subject: hwmon: (pmbus/lm25066) Drop support for LM25063 LM25063 was never released. Drop support for it. Signed-off-by: Guenter Roeck --- Documentation/hwmon/lm25066 | 20 +------------ drivers/hwmon/pmbus/lm25066.c | 67 ++----------------------------------------- 2 files changed, 3 insertions(+), 84 deletions(-) diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066 index 3fa6bf820c88..51b32aa203a8 100644 --- a/Documentation/hwmon/lm25066 +++ b/Documentation/hwmon/lm25066 @@ -8,11 +8,6 @@ Supported chips: Datasheets: http://www.ti.com/lit/gpn/lm25056 http://www.ti.com/lit/gpn/lm25056a - * TI LM25063 - Prefix: 'lm25063' - Addresses scanned: - - Datasheet: - To be announced * National Semiconductor LM25066 Prefix: 'lm25066' Addresses scanned: - @@ -42,7 +37,7 @@ Description ----------- This driver supports hardware monitoring for National Semiconductor / TI LM25056, -LM25063, LM25066, LM5064, and LM5066/LM5066I Power Management, Monitoring, +LM25066, LM5064, and LM5066/LM5066I Power Management, Monitoring, Control, and Protection ICs. The driver is a client driver to the core PMBus driver. Please see @@ -74,12 +69,8 @@ in1_input Measured input voltage. in1_average Average measured input voltage. in1_min Minimum input voltage. in1_max Maximum input voltage. -in1_crit Critical high input voltage (LM25063 only). -in1_lcrit Critical low input voltage (LM25063 only). in1_min_alarm Input voltage low alarm. in1_max_alarm Input voltage high alarm. -in1_lcrit_alarm Input voltage critical low alarm (LM25063 only). -in1_crit_alarm Input voltage critical high alarm. (LM25063 only). in2_label "vmon" in2_input Measured voltage on VAUX pin @@ -94,16 +85,12 @@ in3_input Measured output voltage. in3_average Average measured output voltage. in3_min Minimum output voltage. in3_min_alarm Output voltage low alarm. -in3_highest Historical minimum output voltage (LM25063 only). -in3_lowest Historical maximum output voltage (LM25063 only). curr1_label "iin" curr1_input Measured input current. curr1_average Average measured input current. curr1_max Maximum input current. -curr1_crit Critical input current (LM25063 only). curr1_max_alarm Input current high alarm. -curr1_crit_alarm Input current critical high alarm (LM25063 only). power1_label "pin" power1_input Measured input power. @@ -113,11 +100,6 @@ power1_alarm Input power alarm power1_input_highest Historical maximum power. power1_reset_history Write any value to reset maximum power history. -power2_label "pout". LM25063 only. -power2_input Measured output power. -power2_max Maximum output power limit. -power2_crit Critical output power limit. - temp1_input Measured temperature. temp1_max Maximum temperature. temp1_crit Critical high temperature. diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c index 10d17fb8f283..53db78753a0d 100644 --- a/drivers/hwmon/pmbus/lm25066.c +++ b/drivers/hwmon/pmbus/lm25066.c @@ -1,5 +1,5 @@ /* - * Hardware monitoring driver for LM25056 / LM25063 / LM25066 / LM5064 / LM5066 + * Hardware monitoring driver for LM25056 / LM25066 / LM5064 / LM5066 * * Copyright (c) 2011 Ericsson AB. * Copyright (c) 2013 Guenter Roeck @@ -28,7 +28,7 @@ #include #include "pmbus.h" -enum chips { lm25056, lm25063, lm25066, lm5064, lm5066, lm5066i }; +enum chips { lm25056, lm25066, lm5064, lm5066, lm5066i }; #define LM25066_READ_VAUX 0xd0 #define LM25066_MFR_READ_IIN 0xd1 @@ -53,11 +53,6 @@ enum chips { lm25056, lm25063, lm25066, lm5064, lm5066, lm5066i }; #define LM25056_MFR_STS_VAUX_OV_WARN BIT(1) #define LM25056_MFR_STS_VAUX_UV_WARN BIT(0) -/* LM25063 only */ - -#define LM25063_READ_VOUT_MAX 0xe5 -#define LM25063_READ_VOUT_MIN 0xe6 - struct __coeff { short m, b, R; }; @@ -122,36 +117,6 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { .m = 16, }, }, - [lm25063] = { - [PSC_VOLTAGE_IN] = { - .m = 16000, - .R = -2, - }, - [PSC_VOLTAGE_OUT] = { - .m = 16000, - .R = -2, - }, - [PSC_CURRENT_IN] = { - .m = 10000, - .R = -2, - }, - [PSC_CURRENT_IN_L] = { - .m = 10000, - .R = -2, - }, - [PSC_POWER] = { - .m = 5000, - .R = -3, - }, - [PSC_POWER_L] = { - .m = 5000, - .R = -3, - }, - [PSC_TEMPERATURE] = { - .m = 15596, - .R = -3, - }, - }, [lm5064] = { [PSC_VOLTAGE_IN] = { .m = 4611, @@ -272,10 +237,6 @@ static int lm25066_read_word_data(struct i2c_client *client, int page, int reg) /* VIN: 6.14 mV VAUX: 293 uV LSB */ ret = DIV_ROUND_CLOSEST(ret * 293, 6140); break; - case lm25063: - /* VIN: 6.25 mV VAUX: 200.0 uV LSB */ - ret = DIV_ROUND_CLOSEST(ret * 20, 625); - break; case lm25066: /* VIN: 4.54 mV VAUX: 283.2 uV LSB */ ret = DIV_ROUND_CLOSEST(ret * 2832, 45400); @@ -330,24 +291,6 @@ static int lm25066_read_word_data(struct i2c_client *client, int page, int reg) return ret; } -static int lm25063_read_word_data(struct i2c_client *client, int page, int reg) -{ - int ret; - - switch (reg) { - case PMBUS_VIRT_READ_VOUT_MAX: - ret = pmbus_read_word_data(client, 0, LM25063_READ_VOUT_MAX); - break; - case PMBUS_VIRT_READ_VOUT_MIN: - ret = pmbus_read_word_data(client, 0, LM25063_READ_VOUT_MIN); - break; - default: - ret = lm25066_read_word_data(client, page, reg); - break; - } - return ret; -} - static int lm25056_read_word_data(struct i2c_client *client, int page, int reg) { int ret; @@ -502,11 +445,6 @@ static int lm25066_probe(struct i2c_client *client, info->read_word_data = lm25056_read_word_data; info->read_byte_data = lm25056_read_byte_data; data->rlimit = 0x0fff; - } else if (data->id == lm25063) { - info->func[0] |= PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT - | PMBUS_HAVE_POUT; - info->read_word_data = lm25063_read_word_data; - data->rlimit = 0xffff; } else { info->func[0] |= PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT; info->read_word_data = lm25066_read_word_data; @@ -543,7 +481,6 @@ static int lm25066_probe(struct i2c_client *client, static const struct i2c_device_id lm25066_id[] = { {"lm25056", lm25056}, - {"lm25063", lm25063}, {"lm25066", lm25066}, {"lm5064", lm5064}, {"lm5066", lm5066}, -- cgit v1.2.3 From d206636e7697f47332774f29b90b92f6047d265d Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 20 Nov 2017 15:12:03 +1030 Subject: hwmon: (pmbus) Add fan control support Expose fanX_target, pwmX and pwmX_enable hwmon sysfs attributes. Fans in a PMBus device are driven by the configuration of two registers, FAN_CONFIG_x_y and FAN_COMMAND_x: FAN_CONFIG_x_y dictates how the fan and the tacho operate (if installed), while FAN_COMMAND_x sets the desired fan rate. The unit of FAN_COMMAND_x is dependent on the operational fan mode, RPM or PWM percent duty, as determined by the corresponding configuration in FAN_CONFIG_x_y. The mapping of fanX_target, pwmX and pwmX_enable onto FAN_CONFIG_x_y and FAN_COMMAND_x is implemented with the addition of virtual registers to facilitate the necessary side-effects of each access: 1. PMBUS_VIRT_FAN_TARGET_x 2. PMBUS_VIRT_PWM_x 3. PMBUS_VIRT_PWM_ENABLE_x Some complexity arises with the fanX_target and pwmX attributes both mapping onto FAN_COMMAND_x: There is no general mapping between PWM percent duty and RPM, so we can't display values in either attribute in terms of the other (which in my mind is the intuitive, if impossible, behaviour). This problem also affects the pwmX_enable attribute which allows userspace to switch between full speed, manual PWM and a number of automatic control modes, possibly including a switch to RPM behaviour (e.g. automatically adjusting PWM duty to reach a RPM target, the behaviour of fanX_target). The next most intuitive behaviour is for fanX_target and pwmX to simply be independent, to retain their most recently set value even if that value is not active on the hardware (due to switching to the alternative control mode). This property of retaining the value independent of the hardware state has useful results for both userspace and the kernel: Userspace always sees a sensible value in the attribute (the last thing it was set to, as opposed to 0 or receiving an error on read), and the kernel can use the attributes as a value cache. This latter point eases the implementation of pwmX_enable, which can look up the associated pmbus_sensor object, take its cached value and apply it to hardware on changing control mode. This ensures we will not arbitrarily set a PWM value as an RPM value or vice versa, and we can assume that the RPM or PWM value set was sensible at least at some point in the past. Finally, the DIRECT mode coefficients of some controllers is different between RPM and PWM percent duty control modes, so PSC_PWM is introduced to capture the necessary coefficients. As pmbus core had no PWM support previously PSC_FAN continues to be used to capture the RPM DIRECT coefficients, but in order to avoid falsely applying RPM scaling to PWM values I have introduced the PMBUS_HAVE_PWM12 and PMB_BUS_HAVE_PWM34 feature bits. These feature bits allow drivers to explicitly declare PWM support in order to have the attributes exposed. Signed-off-by: Andrew Jeffery Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 39 ++++++- drivers/hwmon/pmbus/pmbus_core.c | 238 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 259 insertions(+), 18 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index fa613bd209e3..b54d7604d3ef 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -190,6 +190,33 @@ enum pmbus_regs { PMBUS_VIRT_VMON_UV_FAULT_LIMIT, PMBUS_VIRT_VMON_OV_FAULT_LIMIT, PMBUS_VIRT_STATUS_VMON, + + /* + * RPM and PWM Fan control + * + * Drivers wanting to expose PWM control must define the behaviour of + * PMBUS_VIRT_PWM_[1-4] and PMBUS_VIRT_PWM_ENABLE_[1-4] in the + * {read,write}_word_data callback. + * + * pmbus core provides a default implementation for + * PMBUS_VIRT_FAN_TARGET_[1-4]. + * + * TARGET, PWM and PWM_ENABLE members must be defined sequentially; + * pmbus core uses the difference between the provided register and + * it's _1 counterpart to calculate the FAN/PWM ID. + */ + PMBUS_VIRT_FAN_TARGET_1, + PMBUS_VIRT_FAN_TARGET_2, + PMBUS_VIRT_FAN_TARGET_3, + PMBUS_VIRT_FAN_TARGET_4, + PMBUS_VIRT_PWM_1, + PMBUS_VIRT_PWM_2, + PMBUS_VIRT_PWM_3, + PMBUS_VIRT_PWM_4, + PMBUS_VIRT_PWM_ENABLE_1, + PMBUS_VIRT_PWM_ENABLE_2, + PMBUS_VIRT_PWM_ENABLE_3, + PMBUS_VIRT_PWM_ENABLE_4, }; /* @@ -223,6 +250,8 @@ enum pmbus_regs { #define PB_FAN_1_RPM BIT(6) #define PB_FAN_1_INSTALLED BIT(7) +enum pmbus_fan_mode { percent = 0, rpm }; + /* * STATUS_BYTE, STATUS_WORD (lower) */ @@ -313,6 +342,7 @@ enum pmbus_sensor_classes { PSC_POWER, PSC_TEMPERATURE, PSC_FAN, + PSC_PWM, PSC_NUM_CLASSES /* Number of power sensor classes */ }; @@ -339,6 +369,8 @@ enum pmbus_sensor_classes { #define PMBUS_HAVE_STATUS_FAN34 BIT(17) #define PMBUS_HAVE_VMON BIT(18) #define PMBUS_HAVE_STATUS_VMON BIT(19) +#define PMBUS_HAVE_PWM12 BIT(20) +#define PMBUS_HAVE_PWM34 BIT(21) enum pmbus_data_format { linear = 0, direct, vid }; enum vrm_version { vr11 = 0, vr12, vr13 }; @@ -421,5 +453,10 @@ int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id, int pmbus_do_remove(struct i2c_client *client); const struct pmbus_driver_info *pmbus_get_driver_info(struct i2c_client *client); - +int pmbus_get_fan_rate_device(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode); +int pmbus_get_fan_rate_cached(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode); +int pmbus_update_fan(struct i2c_client *client, int page, int id, + u8 config, u8 mask, u16 command); #endif /* PMBUS_H */ diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index a139940cd991..fdd33857f117 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -65,6 +65,7 @@ struct pmbus_sensor { u16 reg; /* register */ enum pmbus_sensor_classes class; /* sensor class */ bool update; /* runtime sensor update needed */ + bool convert; /* Whether or not to apply linear/vid/direct */ int data; /* Sensor data. Negative if there was a read error */ }; @@ -129,6 +130,27 @@ struct pmbus_debugfs_entry { u8 reg; }; +static const int pmbus_fan_rpm_mask[] = { + PB_FAN_1_RPM, + PB_FAN_2_RPM, + PB_FAN_1_RPM, + PB_FAN_2_RPM, +}; + +static const int pmbus_fan_config_registers[] = { + PMBUS_FAN_CONFIG_12, + PMBUS_FAN_CONFIG_12, + PMBUS_FAN_CONFIG_34, + PMBUS_FAN_CONFIG_34 +}; + +static const int pmbus_fan_command_registers[] = { + PMBUS_FAN_COMMAND_1, + PMBUS_FAN_COMMAND_2, + PMBUS_FAN_COMMAND_3, + PMBUS_FAN_COMMAND_4, +}; + void pmbus_clear_cache(struct i2c_client *client) { struct pmbus_data *data = i2c_get_clientdata(client); @@ -198,6 +220,28 @@ int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg, } EXPORT_SYMBOL_GPL(pmbus_write_word_data); + +static int pmbus_write_virt_reg(struct i2c_client *client, int page, int reg, + u16 word) +{ + int bit; + int id; + int rv; + + switch (reg) { + case PMBUS_VIRT_FAN_TARGET_1 ... PMBUS_VIRT_FAN_TARGET_4: + id = reg - PMBUS_VIRT_FAN_TARGET_1; + bit = pmbus_fan_rpm_mask[id]; + rv = pmbus_update_fan(client, page, id, bit, bit, word); + break; + default: + rv = -ENXIO; + break; + } + + return rv; +} + /* * _pmbus_write_word_data() is similar to pmbus_write_word_data(), but checks if * a device specific mapping function exists and calls it if necessary. @@ -214,11 +258,38 @@ static int _pmbus_write_word_data(struct i2c_client *client, int page, int reg, if (status != -ENODATA) return status; } + if (reg >= PMBUS_VIRT_BASE) - return -ENXIO; + return pmbus_write_virt_reg(client, page, reg, word); + return pmbus_write_word_data(client, page, reg, word); } +int pmbus_update_fan(struct i2c_client *client, int page, int id, + u8 config, u8 mask, u16 command) +{ + int from; + int rv; + u8 to; + + from = pmbus_read_byte_data(client, page, + pmbus_fan_config_registers[id]); + if (from < 0) + return from; + + to = (from & ~mask) | (config & mask); + if (to != from) { + rv = pmbus_write_byte_data(client, page, + pmbus_fan_config_registers[id], to); + if (rv < 0) + return rv; + } + + return _pmbus_write_word_data(client, page, + pmbus_fan_command_registers[id], command); +} +EXPORT_SYMBOL_GPL(pmbus_update_fan); + int pmbus_read_word_data(struct i2c_client *client, int page, u8 reg) { int rv; @@ -231,6 +302,24 @@ int pmbus_read_word_data(struct i2c_client *client, int page, u8 reg) } EXPORT_SYMBOL_GPL(pmbus_read_word_data); +static int pmbus_read_virt_reg(struct i2c_client *client, int page, int reg) +{ + int rv; + int id; + + switch (reg) { + case PMBUS_VIRT_FAN_TARGET_1 ... PMBUS_VIRT_FAN_TARGET_4: + id = reg - PMBUS_VIRT_FAN_TARGET_1; + rv = pmbus_get_fan_rate_device(client, page, id, rpm); + break; + default: + rv = -ENXIO; + break; + } + + return rv; +} + /* * _pmbus_read_word_data() is similar to pmbus_read_word_data(), but checks if * a device specific mapping function exists and calls it if necessary. @@ -246,8 +335,10 @@ static int _pmbus_read_word_data(struct i2c_client *client, int page, int reg) if (status != -ENODATA) return status; } + if (reg >= PMBUS_VIRT_BASE) - return -ENXIO; + return pmbus_read_virt_reg(client, page, reg); + return pmbus_read_word_data(client, page, reg); } @@ -312,6 +403,68 @@ static int _pmbus_read_byte_data(struct i2c_client *client, int page, int reg) return pmbus_read_byte_data(client, page, reg); } +static struct pmbus_sensor *pmbus_find_sensor(struct pmbus_data *data, int page, + int reg) +{ + struct pmbus_sensor *sensor; + + for (sensor = data->sensors; sensor; sensor = sensor->next) { + if (sensor->page == page && sensor->reg == reg) + return sensor; + } + + return ERR_PTR(-EINVAL); +} + +static int pmbus_get_fan_rate(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode, + bool from_cache) +{ + struct pmbus_data *data = i2c_get_clientdata(client); + bool want_rpm, have_rpm; + struct pmbus_sensor *s; + int config; + int reg; + + want_rpm = (mode == rpm); + + if (from_cache) { + reg = want_rpm ? PMBUS_VIRT_FAN_TARGET_1 : PMBUS_VIRT_PWM_1; + s = pmbus_find_sensor(data, page, reg + id); + if (IS_ERR(s)) + return PTR_ERR(s); + + return s->data; + } + + config = pmbus_read_byte_data(client, page, + pmbus_fan_config_registers[id]); + if (config < 0) + return config; + + have_rpm = !!(config & pmbus_fan_rpm_mask[id]); + if (want_rpm == have_rpm) + return pmbus_read_word_data(client, page, + pmbus_fan_command_registers[id]); + + /* Can't sensibly map between RPM and PWM, just return zero */ + return 0; +} + +int pmbus_get_fan_rate_device(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode) +{ + return pmbus_get_fan_rate(client, page, id, mode, false); +} +EXPORT_SYMBOL_GPL(pmbus_get_fan_rate_device); + +int pmbus_get_fan_rate_cached(struct i2c_client *client, int page, int id, + enum pmbus_fan_mode mode) +{ + return pmbus_get_fan_rate(client, page, id, mode, true); +} +EXPORT_SYMBOL_GPL(pmbus_get_fan_rate_cached); + static void pmbus_clear_fault_page(struct i2c_client *client, int page) { _pmbus_write_byte(client, page, PMBUS_CLEAR_FAULTS); @@ -513,7 +666,7 @@ static long pmbus_reg2data_direct(struct pmbus_data *data, /* X = 1/m * (Y * 10^-R - b) */ R = -R; /* scale result to milli-units for everything but fans */ - if (sensor->class != PSC_FAN) { + if (!(sensor->class == PSC_FAN || sensor->class == PSC_PWM)) { R += 3; b *= 1000; } @@ -568,6 +721,9 @@ static long pmbus_reg2data(struct pmbus_data *data, struct pmbus_sensor *sensor) { long val; + if (!sensor->convert) + return sensor->data; + switch (data->info->format[sensor->class]) { case direct: val = pmbus_reg2data_direct(data, sensor); @@ -672,7 +828,7 @@ static u16 pmbus_data2reg_direct(struct pmbus_data *data, } /* Calculate Y = (m * X + b) * 10^R */ - if (sensor->class != PSC_FAN) { + if (!(sensor->class == PSC_FAN || sensor->class == PSC_PWM)) { R -= 3; /* Adjust R and b for data in milli-units */ b *= 1000; } @@ -703,6 +859,9 @@ static u16 pmbus_data2reg(struct pmbus_data *data, { u16 regval; + if (!sensor->convert) + return val; + switch (data->info->format[sensor->class]) { case direct: regval = pmbus_data2reg_direct(data, sensor, val); @@ -915,7 +1074,8 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, const char *name, const char *type, int seq, int page, int reg, enum pmbus_sensor_classes class, - bool update, bool readonly) + bool update, bool readonly, + bool convert) { struct pmbus_sensor *sensor; struct device_attribute *a; @@ -925,12 +1085,18 @@ static struct pmbus_sensor *pmbus_add_sensor(struct pmbus_data *data, return NULL; a = &sensor->attribute; - snprintf(sensor->name, sizeof(sensor->name), "%s%d_%s", - name, seq, type); + if (type) + snprintf(sensor->name, sizeof(sensor->name), "%s%d_%s", + name, seq, type); + else + snprintf(sensor->name, sizeof(sensor->name), "%s%d", + name, seq); + sensor->page = page; sensor->reg = reg; sensor->class = class; sensor->update = update; + sensor->convert = convert; pmbus_dev_attr_init(a, sensor->name, readonly ? S_IRUGO : S_IRUGO | S_IWUSR, pmbus_show_sensor, pmbus_set_sensor); @@ -1029,7 +1195,7 @@ static int pmbus_add_limit_attrs(struct i2c_client *client, curr = pmbus_add_sensor(data, name, l->attr, index, page, l->reg, attr->class, attr->update || l->update, - false); + false, true); if (!curr) return -ENOMEM; if (l->sbit && (info->func[page] & attr->sfunc)) { @@ -1068,7 +1234,7 @@ static int pmbus_add_sensor_attrs_one(struct i2c_client *client, return ret; } base = pmbus_add_sensor(data, name, "input", index, page, attr->reg, - attr->class, true, true); + attr->class, true, true, true); if (!base) return -ENOMEM; if (attr->sfunc) { @@ -1592,13 +1758,6 @@ static const int pmbus_fan_registers[] = { PMBUS_READ_FAN_SPEED_4 }; -static const int pmbus_fan_config_registers[] = { - PMBUS_FAN_CONFIG_12, - PMBUS_FAN_CONFIG_12, - PMBUS_FAN_CONFIG_34, - PMBUS_FAN_CONFIG_34 -}; - static const int pmbus_fan_status_registers[] = { PMBUS_STATUS_FAN_12, PMBUS_STATUS_FAN_12, @@ -1621,6 +1780,42 @@ static const u32 pmbus_fan_status_flags[] = { }; /* Fans */ + +/* Precondition: FAN_CONFIG_x_y and FAN_COMMAND_x must exist for the fan ID */ +static int pmbus_add_fan_ctrl(struct i2c_client *client, + struct pmbus_data *data, int index, int page, int id, + u8 config) +{ + struct pmbus_sensor *sensor; + + sensor = pmbus_add_sensor(data, "fan", "target", index, page, + PMBUS_VIRT_FAN_TARGET_1 + id, PSC_FAN, + false, false, true); + + if (!sensor) + return -ENOMEM; + + if (!((data->info->func[page] & PMBUS_HAVE_PWM12) || + (data->info->func[page] & PMBUS_HAVE_PWM34))) + return 0; + + sensor = pmbus_add_sensor(data, "pwm", NULL, index, page, + PMBUS_VIRT_PWM_1 + id, PSC_PWM, + false, false, true); + + if (!sensor) + return -ENOMEM; + + sensor = pmbus_add_sensor(data, "pwm", "enable", index, page, + PMBUS_VIRT_PWM_ENABLE_1 + id, PSC_PWM, + true, false, false); + + if (!sensor) + return -ENOMEM; + + return 0; +} + static int pmbus_add_fan_attributes(struct i2c_client *client, struct pmbus_data *data) { @@ -1655,9 +1850,18 @@ static int pmbus_add_fan_attributes(struct i2c_client *client, if (pmbus_add_sensor(data, "fan", "input", index, page, pmbus_fan_registers[f], - PSC_FAN, true, true) == NULL) + PSC_FAN, true, true, true) == NULL) return -ENOMEM; + /* Fan control */ + if (pmbus_check_word_register(client, page, + pmbus_fan_command_registers[f])) { + ret = pmbus_add_fan_ctrl(client, data, index, + page, f, regval); + if (ret < 0) + return ret; + } + /* * Each fan status register covers multiple fans, * so we have to do some magic. -- cgit v1.2.3 From 56ad86b4b16e4b7154300d71f8e93cca64b98e92 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 20 Nov 2017 15:12:04 +1030 Subject: hwmon: (pmbus/max31785) Add fan control The implementation makes use of the new fan control virtual registers exposed by the pmbus core. It mixes use of the default implementations with some overrides via the read/write handlers to handle FAN_COMMAND_1 on the MAX31785, whose definition breaks the value range into various control bands dependent on RPM or PWM mode. Signed-off-by: Andrew Jeffery Signed-off-by: Guenter Roeck --- Documentation/hwmon/max31785 | 7 +++ drivers/hwmon/pmbus/max31785.c | 138 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 144 insertions(+), 1 deletion(-) diff --git a/Documentation/hwmon/max31785 b/Documentation/hwmon/max31785 index 45fb6093dec2..7b0a0a8cdb6b 100644 --- a/Documentation/hwmon/max31785 +++ b/Documentation/hwmon/max31785 @@ -32,6 +32,7 @@ Sysfs attributes fan[1-4]_alarm Fan alarm. fan[1-4]_fault Fan fault. fan[1-4]_input Fan RPM. +fan[1-4]_target Fan input target in[1-6]_crit Critical maximum output voltage in[1-6]_crit_alarm Output voltage critical high alarm @@ -44,6 +45,12 @@ in[1-6]_max_alarm Output voltage high alarm in[1-6]_min Minimum output voltage in[1-6]_min_alarm Output voltage low alarm +pwm[1-4] Fan target duty cycle (0..255) +pwm[1-4]_enable 0: Full-speed + 1: Manual PWM control + 2: Automatic PWM (tach-feedback RPM fan-control) + 3: Automatic closed-loop (temp-feedback fan-control) + temp[1-11]_crit Critical high temperature temp[1-11]_crit_alarm Chip temperature critical high alarm temp[1-11]_input Measured temperature diff --git a/drivers/hwmon/pmbus/max31785.c b/drivers/hwmon/pmbus/max31785.c index 9313849d5160..8706a696c89a 100644 --- a/drivers/hwmon/pmbus/max31785.c +++ b/drivers/hwmon/pmbus/max31785.c @@ -20,8 +20,136 @@ enum max31785_regs { #define MAX31785_NR_PAGES 23 +static int max31785_get_pwm(struct i2c_client *client, int page) +{ + int rv; + + rv = pmbus_get_fan_rate_device(client, page, 0, percent); + if (rv < 0) + return rv; + else if (rv >= 0x8000) + return 0; + else if (rv >= 0x2711) + return 0x2710; + + return rv; +} + +static int max31785_get_pwm_mode(struct i2c_client *client, int page) +{ + int config; + int command; + + config = pmbus_read_byte_data(client, page, PMBUS_FAN_CONFIG_12); + if (config < 0) + return config; + + command = pmbus_read_word_data(client, page, PMBUS_FAN_COMMAND_1); + if (command < 0) + return command; + + if (config & PB_FAN_1_RPM) + return (command >= 0x8000) ? 3 : 2; + + if (command >= 0x8000) + return 3; + else if (command >= 0x2711) + return 0; + + return 1; +} + +static int max31785_read_word_data(struct i2c_client *client, int page, + int reg) +{ + int rv; + + switch (reg) { + case PMBUS_VIRT_PWM_1: + rv = max31785_get_pwm(client, page); + break; + case PMBUS_VIRT_PWM_ENABLE_1: + rv = max31785_get_pwm_mode(client, page); + break; + default: + rv = -ENODATA; + break; + } + + return rv; +} + +static inline u32 max31785_scale_pwm(u32 sensor_val) +{ + /* + * The datasheet describes the accepted value range for manual PWM as + * [0, 0x2710], while the hwmon pwmX sysfs interface accepts values in + * [0, 255]. The MAX31785 uses DIRECT mode to scale the FAN_COMMAND + * registers and in PWM mode the coefficients are m=1, b=0, R=2. The + * important observation here is that 0x2710 == 10000 == 100 * 100. + * + * R=2 (== 10^2 == 100) accounts for scaling the value provided at the + * sysfs interface into the required hardware resolution, but it does + * not yet yield a value that we can write to the device (this initial + * scaling is handled by pmbus_data2reg()). Multiplying by 100 below + * translates the parameter value into the percentage units required by + * PMBus, and then we scale back by 255 as required by the hwmon pwmX + * interface to yield the percentage value at the appropriate + * resolution for hardware. + */ + return (sensor_val * 100) / 255; +} + +static int max31785_pwm_enable(struct i2c_client *client, int page, + u16 word) +{ + int config = 0; + int rate; + + switch (word) { + case 0: + rate = 0x7fff; + break; + case 1: + rate = pmbus_get_fan_rate_cached(client, page, 0, percent); + if (rate < 0) + return rate; + rate = max31785_scale_pwm(rate); + break; + case 2: + config = PB_FAN_1_RPM; + rate = pmbus_get_fan_rate_cached(client, page, 0, rpm); + if (rate < 0) + return rate; + break; + case 3: + rate = 0xffff; + break; + default: + return -EINVAL; + } + + return pmbus_update_fan(client, page, 0, config, PB_FAN_1_RPM, rate); +} + +static int max31785_write_word_data(struct i2c_client *client, int page, + int reg, u16 word) +{ + switch (reg) { + case PMBUS_VIRT_PWM_1: + return pmbus_update_fan(client, page, 0, 0, PB_FAN_1_RPM, + max31785_scale_pwm(word)); + case PMBUS_VIRT_PWM_ENABLE_1: + return max31785_pwm_enable(client, page, word); + default: + break; + } + + return -ENODATA; +} + #define MAX31785_FAN_FUNCS \ - (PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12) + (PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12 | PMBUS_HAVE_PWM12) #define MAX31785_TEMP_FUNCS \ (PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP) @@ -32,11 +160,19 @@ enum max31785_regs { static const struct pmbus_driver_info max31785_info = { .pages = MAX31785_NR_PAGES, + .write_word_data = max31785_write_word_data, + .read_word_data = max31785_read_word_data, + /* RPM */ .format[PSC_FAN] = direct, .m[PSC_FAN] = 1, .b[PSC_FAN] = 0, .R[PSC_FAN] = 0, + /* PWM */ + .format[PSC_PWM] = direct, + .m[PSC_PWM] = 1, + .b[PSC_PWM] = 0, + .R[PSC_PWM] = 2, .func[0] = MAX31785_FAN_FUNCS, .func[1] = MAX31785_FAN_FUNCS, .func[2] = MAX31785_FAN_FUNCS, -- cgit v1.2.3 From 464df6fa3766784b85b00d56cd4d7c706aee5375 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 20 Nov 2017 15:12:05 +1030 Subject: hwmon: (pmbus) Add virtual page config bit Some circumstances call for virtual pages, to expose multiple values packed into an extended PMBus register in a manner non-compliant with the PMBus standard. An example of this is the Maxim MAX31785 controller, which extends the READ_FAN_SPEED_1 PMBus register from two to four bytes to support tach readings for both rotors of a dual rotor fan. This extended register contains two word-sized values, one reporting the rate of the fastest rotor, the other the rate of the slowest. The concept of virtual pages aids this situation by mapping the page number onto the value to be selected from the vectored result. We should not try to set virtual pages on the device as such a page explicitly doesn't exist; add a flag so we can avoid doing so. Signed-off-by: Andrew Jeffery Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 2 ++ drivers/hwmon/pmbus/pmbus_core.c | 27 ++++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index b54d7604d3ef..d39d506aa63e 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -372,6 +372,8 @@ enum pmbus_sensor_classes { #define PMBUS_HAVE_PWM12 BIT(20) #define PMBUS_HAVE_PWM34 BIT(21) +#define PMBUS_PAGE_VIRTUAL BIT(31) + enum pmbus_data_format { linear = 0, direct, vid }; enum vrm_version { vr11 = 0, vr12, vr13 }; diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index fdd33857f117..99ab39f19bf4 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -162,18 +162,27 @@ EXPORT_SYMBOL_GPL(pmbus_clear_cache); int pmbus_set_page(struct i2c_client *client, int page) { struct pmbus_data *data = i2c_get_clientdata(client); - int rv = 0; - int newpage; + int rv; + + if (page < 0 || page == data->currpage) + return 0; - if (page >= 0 && page != data->currpage) { + if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL)) { rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page); - newpage = i2c_smbus_read_byte_data(client, PMBUS_PAGE); - if (newpage != page) - rv = -EIO; - else - data->currpage = page; + if (rv < 0) + return rv; + + rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE); + if (rv < 0) + return rv; + + if (rv != page) + return -EIO; } - return rv; + + data->currpage = page; + + return 0; } EXPORT_SYMBOL_GPL(pmbus_set_page); -- cgit v1.2.3 From cf583b4275761754638c946ff777546d1a9b6744 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Mon, 20 Nov 2017 15:12:06 +1030 Subject: hwmon: (pmbus/max31785) Add dual tachometer support The dual tachometer feature is implemented in hardware with a TACHSEL input to indicate the rotor under measurement, and exposed on the device by extending the READ_FAN_SPEED_1 word with two extra bytes*. The need to read the non-standard four-byte response leads to a cut-down implementation of i2c_smbus_xfer_emulated() included in the driver. Further, to expose the second rotor tachometer value to userspace the values are exposed through virtual pages. We re-route accesses to FAN_CONFIG_1_2 and READ_FAN_SPEED_1 on pages 23-28 (not defined by the hardware) to the same registers on pages 0-5, and with the latter command we extract the value from the second word of the four-byte response. * The documentation recommends the slower rotor be associated with TACHSEL=0, which corresponds to the first word of the response. The TACHSEL=0 measurement is used by the controller's closed-loop fan management to judge target fan rate. Signed-off-by: Andrew Jeffery Signed-off-by: Guenter Roeck --- Documentation/hwmon/max31785 | 8 ++- drivers/hwmon/pmbus/max31785.c | 147 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 3 deletions(-) diff --git a/Documentation/hwmon/max31785 b/Documentation/hwmon/max31785 index 7b0a0a8cdb6b..270c5f865261 100644 --- a/Documentation/hwmon/max31785 +++ b/Documentation/hwmon/max31785 @@ -17,8 +17,9 @@ management with temperature and remote voltage sensing. Various fan control features are provided, including PWM frequency control, temperature hysteresis, dual tachometer measurements, and fan health monitoring. -For dual rotor fan configuration, the MAX31785 exposes the slowest rotor of the -two in the fan[1-4]_input attributes. +For dual-rotor configurations the MAX31785A exposes the second rotor tachometer +readings in attributes fan[5-8]_input. By contrast the MAX31785 only exposes +the slowest rotor measurement, and does so in the fan[1-4]_input attributes. Usage Notes ----------- @@ -31,7 +32,8 @@ Sysfs attributes fan[1-4]_alarm Fan alarm. fan[1-4]_fault Fan fault. -fan[1-4]_input Fan RPM. +fan[1-8]_input Fan RPM. On the MAX31785A, inputs 5-8 correspond to the + second rotor of fans 1-4 fan[1-4]_target Fan input target in[1-6]_crit Critical maximum output voltage diff --git a/drivers/hwmon/pmbus/max31785.c b/drivers/hwmon/pmbus/max31785.c index 8706a696c89a..bffab449be39 100644 --- a/drivers/hwmon/pmbus/max31785.c +++ b/drivers/hwmon/pmbus/max31785.c @@ -16,9 +16,79 @@ enum max31785_regs { MFR_REVISION = 0x9b, + MFR_FAN_CONFIG = 0xf1, }; +#define MAX31785 0x3030 +#define MAX31785A 0x3040 + +#define MFR_FAN_CONFIG_DUAL_TACH BIT(12) + #define MAX31785_NR_PAGES 23 +#define MAX31785_NR_FAN_PAGES 6 + +static int max31785_read_byte_data(struct i2c_client *client, int page, + int reg) +{ + if (page < MAX31785_NR_PAGES) + return -ENODATA; + + switch (reg) { + case PMBUS_VOUT_MODE: + return -ENOTSUPP; + case PMBUS_FAN_CONFIG_12: + return pmbus_read_byte_data(client, page - MAX31785_NR_PAGES, + reg); + } + + return -ENODATA; +} + +static int max31785_write_byte(struct i2c_client *client, int page, u8 value) +{ + if (page < MAX31785_NR_PAGES) + return -ENODATA; + + return -ENOTSUPP; +} + +static int max31785_read_long_data(struct i2c_client *client, int page, + int reg, u32 *data) +{ + unsigned char cmdbuf[1]; + unsigned char rspbuf[4]; + int rc; + + struct i2c_msg msg[2] = { + { + .addr = client->addr, + .flags = 0, + .len = sizeof(cmdbuf), + .buf = cmdbuf, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = sizeof(rspbuf), + .buf = rspbuf, + }, + }; + + cmdbuf[0] = reg; + + rc = pmbus_set_page(client, page); + if (rc < 0) + return rc; + + rc = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg)); + if (rc < 0) + return rc; + + *data = (rspbuf[0] << (0 * 8)) | (rspbuf[1] << (1 * 8)) | + (rspbuf[2] << (2 * 8)) | (rspbuf[3] << (3 * 8)); + + return rc; +} static int max31785_get_pwm(struct i2c_client *client, int page) { @@ -62,9 +132,30 @@ static int max31785_get_pwm_mode(struct i2c_client *client, int page) static int max31785_read_word_data(struct i2c_client *client, int page, int reg) { + u32 val; int rv; switch (reg) { + case PMBUS_READ_FAN_SPEED_1: + if (page < MAX31785_NR_PAGES) + return -ENODATA; + + rv = max31785_read_long_data(client, page - MAX31785_NR_PAGES, + reg, &val); + if (rv < 0) + return rv; + + rv = (val >> 16) & 0xffff; + break; + case PMBUS_FAN_COMMAND_1: + /* + * PMBUS_FAN_COMMAND_x is probed to judge whether or not to + * expose fan control registers. + * + * Don't expose fan_target attribute for virtual pages. + */ + rv = (page >= MAX31785_NR_PAGES) ? -ENOTSUPP : -ENODATA; + break; case PMBUS_VIRT_PWM_1: rv = max31785_get_pwm(client, page); break; @@ -157,11 +248,15 @@ static int max31785_write_word_data(struct i2c_client *client, int page, #define MAX31785_VOUT_FUNCS \ (PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT) +#define MAX37185_NUM_FAN_PAGES 6 + static const struct pmbus_driver_info max31785_info = { .pages = MAX31785_NR_PAGES, .write_word_data = max31785_write_word_data, + .read_byte_data = max31785_read_byte_data, .read_word_data = max31785_read_word_data, + .write_byte = max31785_write_byte, /* RPM */ .format[PSC_FAN] = direct, @@ -208,13 +303,46 @@ static const struct pmbus_driver_info max31785_info = { .func[22] = MAX31785_VOUT_FUNCS, }; +static int max31785_configure_dual_tach(struct i2c_client *client, + struct pmbus_driver_info *info) +{ + int ret; + int i; + + for (i = 0; i < MAX31785_NR_FAN_PAGES; i++) { + ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, i); + if (ret < 0) + return ret; + + ret = i2c_smbus_read_word_data(client, MFR_FAN_CONFIG); + if (ret < 0) + return ret; + + if (ret & MFR_FAN_CONFIG_DUAL_TACH) { + int virtual = MAX31785_NR_PAGES + i; + + info->pages = virtual + 1; + info->func[virtual] |= PMBUS_HAVE_FAN12; + info->func[virtual] |= PMBUS_PAGE_VIRTUAL; + } + } + + return 0; +} + static int max31785_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct device *dev = &client->dev; struct pmbus_driver_info *info; + bool dual_tach = false; s64 ret; + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_WORD_DATA)) + return -ENODEV; + info = devm_kzalloc(dev, sizeof(struct pmbus_driver_info), GFP_KERNEL); if (!info) return -ENOMEM; @@ -225,6 +353,25 @@ static int max31785_probe(struct i2c_client *client, if (ret < 0) return ret; + ret = i2c_smbus_read_word_data(client, MFR_REVISION); + if (ret < 0) + return ret; + + if (ret == MAX31785A) { + dual_tach = true; + } else if (ret == MAX31785) { + if (!strcmp("max31785a", id->name)) + dev_warn(dev, "Expected max3175a, found max31785: cannot provide secondary tachometer readings\n"); + } else { + return -ENODEV; + } + + if (dual_tach) { + ret = max31785_configure_dual_tach(client, info); + if (ret < 0) + return ret; + } + return pmbus_do_probe(client, id, info); } -- cgit v1.2.3 From 5d389b125186cf254ad5b8015763ac07c151aea4 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Wed, 22 Nov 2017 16:32:15 +0100 Subject: hwmon: (ina2xx) Make calibration register value fixed Calibration register is used for calculating current register in hardware according to datasheet: current = shunt_volt * calib_register / 2048 (ina 226) current = shunt_volt * calib_register / 4096 (ina 219) Fix calib_register value to 2048 for ina226 and 4096 for ina 219 in order to avoid truncation error and provide best precision allowed by shunt_voltage measurement. Make current scale value follow changes of shunt_resistor from sysfs as calib_register value is now fixed. Power_lsb value should also follow shunt_resistor changes as stated in datasheet: power_lsb = 25 * current_lsb (ina 226) power_lsb = 20 * current_lsb (ina 219) Signed-off-by: Maciej Purski Signed-off-by: Guenter Roeck --- drivers/hwmon/ina2xx.c | 87 +++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 62e38fa8cda2..e362a932fe8c 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -95,18 +95,20 @@ enum ina2xx_ids { ina219, ina226 }; struct ina2xx_config { u16 config_default; - int calibration_factor; + int calibration_value; int registers; int shunt_div; int bus_voltage_shift; int bus_voltage_lsb; /* uV */ - int power_lsb; /* uW */ + int power_lsb_factor; }; struct ina2xx_data { const struct ina2xx_config *config; long rshunt; + long current_lsb_uA; + long power_lsb_uW; struct mutex config_lock; struct regmap *regmap; @@ -116,21 +118,21 @@ struct ina2xx_data { static const struct ina2xx_config ina2xx_config[] = { [ina219] = { .config_default = INA219_CONFIG_DEFAULT, - .calibration_factor = 40960000, + .calibration_value = 4096, .registers = INA219_REGISTERS, .shunt_div = 100, .bus_voltage_shift = 3, .bus_voltage_lsb = 4000, - .power_lsb = 20000, + .power_lsb_factor = 20, }, [ina226] = { .config_default = INA226_CONFIG_DEFAULT, - .calibration_factor = 5120000, + .calibration_value = 2048, .registers = INA226_REGISTERS, .shunt_div = 400, .bus_voltage_shift = 0, .bus_voltage_lsb = 1250, - .power_lsb = 25000, + .power_lsb_factor = 25, }, }; @@ -169,12 +171,16 @@ static u16 ina226_interval_to_reg(int interval) return INA226_SHIFT_AVG(avg_bits); } +/* + * Calibration register is set to the best value, which eliminates + * truncation errors on calculating current register in hardware. + * According to datasheet (eq. 3) the best values are 2048 for + * ina226 and 4096 for ina219. They are hardcoded as calibration_value. + */ static int ina2xx_calibrate(struct ina2xx_data *data) { - u16 val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - data->rshunt); - - return regmap_write(data->regmap, INA2XX_CALIBRATION, val); + return regmap_write(data->regmap, INA2XX_CALIBRATION, + data->config->calibration_value); } /* @@ -187,10 +193,6 @@ static int ina2xx_init(struct ina2xx_data *data) if (ret < 0) return ret; - /* - * Set current LSB to 1mA, shunt is in uOhms - * (equation 13 in datasheet). - */ return ina2xx_calibrate(data); } @@ -268,15 +270,15 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_POWER: - val = regval * data->config->power_lsb; + val = regval * data->power_lsb_uW; break; case INA2XX_CURRENT: - /* signed register, LSB=1mA (selected), in mA */ - val = (s16)regval; + /* signed register, result in mA */ + val = regval * data->current_lsb_uA; + val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: - val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - regval); + val = regval; break; default: /* programmer goofed */ @@ -304,9 +306,32 @@ static ssize_t ina2xx_show_value(struct device *dev, ina2xx_get_value(data, attr->index, regval)); } -static ssize_t ina2xx_set_shunt(struct device *dev, - struct device_attribute *da, - const char *buf, size_t count) +/* + * In order to keep calibration register value fixed, the product + * of current_lsb and shunt_resistor should also be fixed and equal + * to shunt_voltage_lsb = 1 / shunt_div multiplied by 10^9 in order + * to keep the scale. + */ +static int ina2xx_set_shunt(struct ina2xx_data *data, long val) +{ + unsigned int dividend = DIV_ROUND_CLOSEST(1000000000, + data->config->shunt_div); + if (val <= 0 || val > dividend) + return -EINVAL; + + mutex_lock(&data->config_lock); + data->rshunt = val; + data->current_lsb_uA = DIV_ROUND_CLOSEST(dividend, val); + data->power_lsb_uW = data->config->power_lsb_factor * + data->current_lsb_uA; + mutex_unlock(&data->config_lock); + + return 0; +} + +static ssize_t ina2xx_store_shunt(struct device *dev, + struct device_attribute *da, + const char *buf, size_t count) { unsigned long val; int status; @@ -316,18 +341,9 @@ static ssize_t ina2xx_set_shunt(struct device *dev, if (status < 0) return status; - if (val == 0 || - /* Values greater than the calibration factor make no sense. */ - val > data->config->calibration_factor) - return -EINVAL; - - mutex_lock(&data->config_lock); - data->rshunt = val; - status = ina2xx_calibrate(data); - mutex_unlock(&data->config_lock); + status = ina2xx_set_shunt(data, val); if (status < 0) return status; - return count; } @@ -387,7 +403,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_set_shunt, + ina2xx_show_value, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ @@ -448,10 +464,7 @@ static int ina2xx_probe(struct i2c_client *client, val = INA2XX_RSHUNT_DEFAULT; } - if (val <= 0 || val > data->config->calibration_factor) - return -ENODEV; - - data->rshunt = val; + ina2xx_set_shunt(data, val); ina2xx_regmap_config.max_register = data->config->registers; -- cgit v1.2.3 From 98b16a09861aa85d68853728515738b19a7cd8d4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 30 Nov 2017 19:12:57 +0100 Subject: hwmon: (max31785) Add OF device ID table The driver doesn't have a struct of_device_id table but supported devices are registered via Device Trees. This is working on the assumption that a I2C device registered via OF will always match a legacy I2C device ID and that the MODALIAS reported will always be of the form i2c:. But this could change in the future so the correct approach is to have an OF device ID table if the devices are registered via OF. Before this patch: $ modinfo drivers/hwmon/pmbus/max31785.ko | grep alias alias: i2c:max31785a alias: i2c:max31785 After this patch: $ modinfo drivers/hwmon/pmbus/max31785.ko | grep alias alias: i2c:max31785a alias: i2c:max31785 alias: of:N*T*Cmaxim,max31785aC* alias: of:N*T*Cmaxim,max31785a alias: of:N*T*Cmaxim,max31785C* alias: of:N*T*Cmaxim,max31785 Signed-off-by: Javier Martinez Canillas Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/max31785.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/pmbus/max31785.c b/drivers/hwmon/pmbus/max31785.c index bffab449be39..c9dc8799b5e1 100644 --- a/drivers/hwmon/pmbus/max31785.c +++ b/drivers/hwmon/pmbus/max31785.c @@ -383,9 +383,18 @@ static const struct i2c_device_id max31785_id[] = { MODULE_DEVICE_TABLE(i2c, max31785_id); +static const struct of_device_id max31785_of_match[] = { + { .compatible = "maxim,max31785" }, + { .compatible = "maxim,max31785a" }, + { }, +}; + +MODULE_DEVICE_TABLE(of, max31785_of_match); + static struct i2c_driver max31785_driver = { .driver = { .name = "max31785", + .of_match_table = max31785_of_match, }, .probe = max31785_probe, .remove = pmbus_do_remove, -- cgit v1.2.3 From 3870945aeb0e94126b9ea1cbd4f2f50d99eb948d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:15:00 -0800 Subject: hwmon: Fix parameter documentation sparse reports: drivers/hwmon/hwmon.c:681: warning: No description found for parameter 'chip' drivers/hwmon/hwmon.c:681: warning: Excess function parameter 'info' description in 'hwmon_device_register_with_info' drivers/hwmon/hwmon.c:789: warning: No description found for parameter 'chip' drivers/hwmon/hwmon.c:789: warning: No description found for parameter 'groups' drivers/hwmon/hwmon.c:789: warning: Excess function parameter 'info' description in 'devm_hwmon_device_register_with_info' Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index af5123042990..32083e452cde 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -678,7 +678,7 @@ EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups); * @dev: the parent device * @name: hwmon name attribute * @drvdata: driver data to attach to created device - * @info: pointer to hwmon chip information + * @chip: pointer to hwmon chip information * @extra_groups: pointer to list of additional non-standard attribute groups * * hwmon_device_unregister() must be called when the device is no @@ -785,11 +785,11 @@ EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups); /** * devm_hwmon_device_register_with_info - register w/ hwmon - * @dev: the parent device - * @name: hwmon name attribute - * @drvdata: driver data to attach to created device - * @info: Pointer to hwmon chip information - * @groups - pointer to list of driver specific attribute groups + * @dev: the parent device + * @name: hwmon name attribute + * @drvdata: driver data to attach to created device + * @chip: pointer to hwmon chip information + * @groups: pointer to list of driver specific attribute groups * * Returns the pointer to the new device. The new device is automatically * unregistered with the parent device. -- cgit v1.2.3 From 679f50b8d1f80a7083444c5a8aa02281b9ff9cc1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:16:52 -0800 Subject: hwmon: (sht21) Fix documentation of struct sht21 Sparse reports: drivers/hwmon/sht21.c:60: warning: No description found for parameter 'client' drivers/hwmon/sht21.c:60: warning: Excess struct member 'hwmon_dev' description in 'sht21' Signed-off-by: Guenter Roeck --- drivers/hwmon/sht21.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/sht21.c b/drivers/hwmon/sht21.c index 06706d288355..190e7b39ce32 100644 --- a/drivers/hwmon/sht21.c +++ b/drivers/hwmon/sht21.c @@ -41,7 +41,7 @@ /** * struct sht21 - SHT21 device specific data - * @hwmon_dev: device registered with hwmon + * @client: I2C client device * @lock: mutex to protect measurement values * @last_update: time of last update (jiffies) * @temperature: cached temperature measurement value -- cgit v1.2.3 From d5324e90957adb4f2ab144d14ed74853a8d9c525 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:18:34 -0800 Subject: hwmon: (sht15) Fix parameter documentation of sht15_crc8() Sparse reports: drivers/hwmon/sht15.c:188: warning: No description found for parameter 'len' Signed-off-by: Guenter Roeck --- drivers/hwmon/sht15.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 25d28343ba93..2be77752cd56 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -179,6 +179,7 @@ struct sht15_data { * sht15_crc8() - compute crc8 * @data: sht15 specific data. * @value: sht15 retrieved data. + * @len: Length of retrieved data * * This implements section 2 of the CRC datasheet. */ -- cgit v1.2.3 From 7f6d70cd478452d2534b6c33b752a8009783383a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:22:26 -0800 Subject: hwmon: (iio_hwmon) Fix documentation of struct iio_hwmon_state Sparse reports: drivers/hwmon/iio_hwmon.c:36: warning: No description found for parameter 'groups' Signed-off-by: Guenter Roeck --- drivers/hwmon/iio_hwmon.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/iio_hwmon.c b/drivers/hwmon/iio_hwmon.c index f6a76679c650..5e5b32a1ec4b 100644 --- a/drivers/hwmon/iio_hwmon.c +++ b/drivers/hwmon/iio_hwmon.c @@ -23,7 +23,8 @@ * @channels: filled with array of channels from iio * @num_channels: number of channels in channels (saves counting twice) * @hwmon_dev: associated hwmon device - * @attr_group: the group of attributes + * @attr_group: the group of attributes + * @groups: null terminated array of attribute groups * @attrs: null terminated array of attribute pointers. */ struct iio_hwmon_state { -- cgit v1.2.3 From a7a9b15a35d2504aa83e140d4cf50c85ac01e965 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:24:57 -0800 Subject: hwmon: (hih6130) Fix documentation of struct hih6130 Sparse reports: drivers/hwmon/hih6130.c:56: warning: No description found for parameter 'client' drivers/hwmon/hih6130.c:56: warning: Excess struct member 'hwmon_dev' description in 'hih6130' Signed-off-by: Guenter Roeck --- drivers/hwmon/hih6130.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c index 7b73d2002d3e..0ae1ee1dbf76 100644 --- a/drivers/hwmon/hih6130.c +++ b/drivers/hwmon/hih6130.c @@ -37,7 +37,7 @@ /** * struct hih6130 - HIH-6130 device specific data - * @hwmon_dev: device registered with hwmon + * @client: pointer to I2C client device * @lock: mutex to protect measurement values * @valid: only false before first measurement is taken * @last_update: time of last update (jiffies) -- cgit v1.2.3 From 571e3f3a8e6582ecce2c7fd5d0a9e2ae74cece02 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 3 Dec 2017 15:27:56 -0800 Subject: hwmon: (w83773g) Fix fault detection and reporting Smatch reports: drivers/hwmon/w83773g.c:105 get_fault() warn: shift has higher precedence than mask Code analysis shows that the code is indeed wrong. Fix it, and while we are at it, drop unnecessary typecast. Fixes: 86a10c802362 ("hwmon: Add W83773G driver") Cc: Lei YU Reviewed-by: Lei YU Signed-off-by: Guenter Roeck --- drivers/hwmon/w83773g.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/w83773g.c b/drivers/hwmon/w83773g.c index 0b97c285b049..e858093ac806 100644 --- a/drivers/hwmon/w83773g.c +++ b/drivers/hwmon/w83773g.c @@ -102,7 +102,7 @@ static int get_fault(struct regmap *regmap, int index, long *val) if (ret < 0) return ret; - *val = (u8)regval & 0x04 >> 2; + *val = (regval & 0x04) >> 2; return 0; } -- cgit v1.2.3 From eb6489b696ad22a8464e20502e18014434b4b0ea Mon Sep 17 00:00:00 2001 From: "Edward A. James" Date: Mon, 11 Dec 2017 15:32:49 -0600 Subject: hwmon: (pmbus) Export pmbus device debugfs directory entry Pmbus client drivers, if they want to use debugfs, should use the same root directory as the pmbus debugfs entries are using. Therefore, export the device dentry for the pmbus client. Signed-off-by: Edward A. James Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 2 ++ drivers/hwmon/pmbus/pmbus_core.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index d39d506aa63e..1d24397d36ec 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -461,4 +461,6 @@ int pmbus_get_fan_rate_cached(struct i2c_client *client, int page, int id, enum pmbus_fan_mode mode); int pmbus_update_fan(struct i2c_client *client, int page, int id, u8 config, u8 mask, u16 command); +struct dentry *pmbus_get_debugfs_dir(struct i2c_client *client); + #endif /* PMBUS_H */ diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 99ab39f19bf4..f7c47d7994e7 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2381,6 +2381,14 @@ int pmbus_do_remove(struct i2c_client *client) } EXPORT_SYMBOL_GPL(pmbus_do_remove); +struct dentry *pmbus_get_debugfs_dir(struct i2c_client *client) +{ + struct pmbus_data *data = i2c_get_clientdata(client); + + return data->debugfs; +} +EXPORT_SYMBOL_GPL(pmbus_get_debugfs_dir); + static int __init pmbus_core_init(void) { pmbus_debugfs_dir = debugfs_create_dir("pmbus", NULL); -- cgit v1.2.3 From d6bb645a1704cba3884bf03d5a8bd86915b5e650 Mon Sep 17 00:00:00 2001 From: "Edward A. James" Date: Mon, 11 Dec 2017 15:32:50 -0600 Subject: hwmon: (pmbus) cffps: Add debugfs entries Add debugfs entries for additional power supply data, including part number, serial number, FRU number, firmware revision, ccin, and the input history of the power supply. The input history is 10 minutes of input power data in the form of twenty 30-second packets. Each packet contains average and maximum power for that 30 second period. Signed-off-by: Edward A. James [groeck: Fixed endianness problem] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ibm-cffps.c | 202 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c index cb56da6834e5..de2547476253 100644 --- a/drivers/hwmon/pmbus/ibm-cffps.c +++ b/drivers/hwmon/pmbus/ibm-cffps.c @@ -8,12 +8,26 @@ */ #include +#include #include +#include #include +#include #include +#include #include "pmbus.h" +#define CFFPS_FRU_CMD 0x9A +#define CFFPS_PN_CMD 0x9B +#define CFFPS_SN_CMD 0x9E +#define CFFPS_CCIN_CMD 0xBD +#define CFFPS_FW_CMD_START 0xFA +#define CFFPS_FW_NUM_BYTES 4 + +#define CFFPS_INPUT_HISTORY_CMD 0xD6 +#define CFFPS_INPUT_HISTORY_SIZE 100 + /* STATUS_MFR_SPECIFIC bits */ #define CFFPS_MFR_FAN_FAULT BIT(0) #define CFFPS_MFR_THERMAL_FAULT BIT(1) @@ -24,6 +38,144 @@ #define CFFPS_MFR_VAUX_FAULT BIT(6) #define CFFPS_MFR_CURRENT_SHARE_WARNING BIT(7) +enum { + CFFPS_DEBUGFS_INPUT_HISTORY = 0, + CFFPS_DEBUGFS_FRU, + CFFPS_DEBUGFS_PN, + CFFPS_DEBUGFS_SN, + CFFPS_DEBUGFS_CCIN, + CFFPS_DEBUGFS_FW, + CFFPS_DEBUGFS_NUM_ENTRIES +}; + +struct ibm_cffps_input_history { + struct mutex update_lock; + unsigned long last_update; + + u8 byte_count; + u8 data[CFFPS_INPUT_HISTORY_SIZE]; +}; + +struct ibm_cffps { + struct i2c_client *client; + + struct ibm_cffps_input_history input_history; + + int debugfs_entries[CFFPS_DEBUGFS_NUM_ENTRIES]; +}; + +#define to_psu(x, y) container_of((x), struct ibm_cffps, debugfs_entries[(y)]) + +static ssize_t ibm_cffps_read_input_history(struct ibm_cffps *psu, + char __user *buf, size_t count, + loff_t *ppos) +{ + int rc; + u8 msgbuf0[1] = { CFFPS_INPUT_HISTORY_CMD }; + u8 msgbuf1[CFFPS_INPUT_HISTORY_SIZE + 1] = { 0 }; + struct i2c_msg msg[2] = { + { + .addr = psu->client->addr, + .flags = psu->client->flags, + .len = 1, + .buf = msgbuf0, + }, { + .addr = psu->client->addr, + .flags = psu->client->flags | I2C_M_RD, + .len = CFFPS_INPUT_HISTORY_SIZE + 1, + .buf = msgbuf1, + }, + }; + + if (!*ppos) { + mutex_lock(&psu->input_history.update_lock); + if (time_after(jiffies, psu->input_history.last_update + HZ)) { + /* + * Use a raw i2c transfer, since we need more bytes + * than Linux I2C supports through smbus xfr (only 32). + */ + rc = i2c_transfer(psu->client->adapter, msg, 2); + if (rc < 0) { + mutex_unlock(&psu->input_history.update_lock); + return rc; + } + + psu->input_history.byte_count = msgbuf1[0]; + memcpy(psu->input_history.data, &msgbuf1[1], + CFFPS_INPUT_HISTORY_SIZE); + psu->input_history.last_update = jiffies; + } + + mutex_unlock(&psu->input_history.update_lock); + } + + return simple_read_from_buffer(buf, count, ppos, + psu->input_history.data, + psu->input_history.byte_count); +} + +static ssize_t ibm_cffps_debugfs_op(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u8 cmd; + int i, rc; + int *idxp = file->private_data; + int idx = *idxp; + struct ibm_cffps *psu = to_psu(idxp, idx); + char data[I2C_SMBUS_BLOCK_MAX] = { 0 }; + + switch (idx) { + case CFFPS_DEBUGFS_INPUT_HISTORY: + return ibm_cffps_read_input_history(psu, buf, count, ppos); + case CFFPS_DEBUGFS_FRU: + cmd = CFFPS_FRU_CMD; + break; + case CFFPS_DEBUGFS_PN: + cmd = CFFPS_PN_CMD; + break; + case CFFPS_DEBUGFS_SN: + cmd = CFFPS_SN_CMD; + break; + case CFFPS_DEBUGFS_CCIN: + rc = i2c_smbus_read_word_swapped(psu->client, CFFPS_CCIN_CMD); + if (rc < 0) + return rc; + + rc = snprintf(data, 5, "%04X", rc); + goto done; + case CFFPS_DEBUGFS_FW: + for (i = 0; i < CFFPS_FW_NUM_BYTES; ++i) { + rc = i2c_smbus_read_byte_data(psu->client, + CFFPS_FW_CMD_START + i); + if (rc < 0) + return rc; + + snprintf(&data[i * 2], 3, "%02X", rc); + } + + rc = i * 2; + goto done; + default: + return -EINVAL; + } + + rc = i2c_smbus_read_block_data(psu->client, cmd, data); + if (rc < 0) + return rc; + +done: + data[rc] = '\n'; + rc += 2; + + return simple_read_from_buffer(buf, count, ppos, data, rc); +} + +static const struct file_operations ibm_cffps_fops = { + .llseek = noop_llseek, + .read = ibm_cffps_debugfs_op, + .open = simple_open, +}; + static int ibm_cffps_read_byte_data(struct i2c_client *client, int page, int reg) { @@ -119,7 +271,55 @@ static struct pmbus_driver_info ibm_cffps_info = { static int ibm_cffps_probe(struct i2c_client *client, const struct i2c_device_id *id) { - return pmbus_do_probe(client, id, &ibm_cffps_info); + int i, rc; + struct dentry *debugfs; + struct dentry *ibm_cffps_dir; + struct ibm_cffps *psu; + + rc = pmbus_do_probe(client, id, &ibm_cffps_info); + if (rc) + return rc; + + /* Don't fail the probe if we can't create debugfs */ + debugfs = pmbus_get_debugfs_dir(client); + if (!debugfs) + return 0; + + ibm_cffps_dir = debugfs_create_dir(client->name, debugfs); + if (!ibm_cffps_dir) + return 0; + + psu = devm_kzalloc(&client->dev, sizeof(*psu), GFP_KERNEL); + if (!psu) + return 0; + + psu->client = client; + mutex_init(&psu->input_history.update_lock); + psu->input_history.last_update = jiffies - HZ; + + for (i = 0; i < CFFPS_DEBUGFS_NUM_ENTRIES; ++i) + psu->debugfs_entries[i] = i; + + debugfs_create_file("input_history", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_INPUT_HISTORY], + &ibm_cffps_fops); + debugfs_create_file("fru", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_FRU], + &ibm_cffps_fops); + debugfs_create_file("part_number", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_PN], + &ibm_cffps_fops); + debugfs_create_file("serial_number", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_SN], + &ibm_cffps_fops); + debugfs_create_file("ccin", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_CCIN], + &ibm_cffps_fops); + debugfs_create_file("fw_version", 0444, ibm_cffps_dir, + &psu->debugfs_entries[CFFPS_DEBUGFS_FW], + &ibm_cffps_fops); + + return 0; } static const struct i2c_device_id ibm_cffps_id[] = { -- cgit v1.2.3 From 8c9e52705740b21c546907711630c389ef09715e Mon Sep 17 00:00:00 2001 From: Joel Date: Sat, 23 Dec 2017 23:35:27 +1030 Subject: dt-bindings: hwmon: aspeed-pwm-tacho: Add reset node The device tree bindings are updated to document the resets phandle, and the example is updated to match what is expected for both the reset and clock phandle. Note that the bindings should have always had the reset controller, as the hardware is unusable without it. Acked-by: Rob Herring Signed-off-by: Joel Stanley Signed-off-by: Guenter Roeck --- .../devicetree/bindings/hwmon/aspeed-pwm-tacho.txt | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt index 367c8203213b..3ac02988a1a5 100644 --- a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt +++ b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt @@ -22,8 +22,9 @@ Required properties for pwm-tacho node: - compatible : should be "aspeed,ast2400-pwm-tacho" for AST2400 and "aspeed,ast2500-pwm-tacho" for AST2500. -- clocks : a fixed clock providing input clock frequency(PWM - and Fan Tach clock) +- clocks : phandle to clock provider with the clock number in the second cell + +- resets : phandle to reset controller with the reset number in the second cell fan subnode format: =================== @@ -48,19 +49,14 @@ Required properties for each child node: Examples: -pwm_tacho_fixed_clk: fixedclk { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <24000000>; -}; - pwm_tacho: pwmtachocontroller@1e786000 { #address-cells = <1>; #size-cells = <1>; #cooling-cells = <2>; reg = <0x1E786000 0x1000>; compatible = "aspeed,ast2500-pwm-tacho"; - clocks = <&pwm_tacho_fixed_clk>; + clocks = <&syscon ASPEED_CLK_APB>; + resets = <&syscon ASPEED_RESET_PWM>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pwm0_default &pinctrl_pwm1_default>; -- cgit v1.2.3 From 18c514cc0e0278b7852a6741973a9523ad012700 Mon Sep 17 00:00:00 2001 From: Joel Date: Sat, 23 Dec 2017 23:35:28 +1030 Subject: hwmon: (aspeed-pwm-tacho) Deassert reset in probe The ASPEED SoC must deassert a reset in order to use the PWM/tach peripheral. Signed-off-by: Joel Stanley Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index 63a95e23ca81..693a3d53cab5 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -181,6 +182,7 @@ struct aspeed_cooling_device { struct aspeed_pwm_tacho_data { struct regmap *regmap; + struct reset_control *rst; unsigned long clk_freq; bool pwm_present[8]; bool fan_tach_present[16]; @@ -905,6 +907,13 @@ static int aspeed_create_fan(struct device *dev, return 0; } +static void aspeed_pwm_tacho_remove(void *data) +{ + struct aspeed_pwm_tacho_data *priv = data; + + reset_control_assert(priv->rst); +} + static int aspeed_pwm_tacho_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -931,6 +940,19 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) &aspeed_pwm_tacho_regmap_config); if (IS_ERR(priv->regmap)) return PTR_ERR(priv->regmap); + + priv->rst = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(priv->rst)) { + dev_err(dev, + "missing or invalid reset controller device tree entry"); + return PTR_ERR(priv->rst); + } + reset_control_deassert(priv->rst); + + ret = devm_add_action_or_reset(dev, aspeed_pwm_tacho_remove, priv); + if (ret) + return ret; + regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE, 0); regmap_write(priv->regmap, ASPEED_PTCR_TACH_SOURCE_EXT, 0); -- cgit v1.2.3 From d97c2e0d635e39b5b63784deb3212e846ebf76dc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Dec 2017 01:50:20 +0100 Subject: PM / wakeup: Drop redundant check from device_set_wakeup_enable() Since both device_wakeup_enable() and device_wakeup_disable() check if dev is not NULL and whether or not power.can_wakeup is set for it, device_set_wakeup_enable() doesn't have to do that, so drop that check from it. No intentional changes in functionality. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/wakeup.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index cb72965b3281..90c7212de087 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -464,9 +464,6 @@ EXPORT_SYMBOL_GPL(device_init_wakeup); */ int device_set_wakeup_enable(struct device *dev, bool enable) { - if (!dev || !dev->power.can_wakeup) - return -EINVAL; - return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev); } EXPORT_SYMBOL_GPL(device_set_wakeup_enable); -- cgit v1.2.3 From 9dbc64a5d5938b990a045509ff5356fc53e4abd4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Jan 2018 01:42:56 +0100 Subject: PM / wakeup: Drop redundant check from device_init_wakeup() Since device_wakeup_disable() checks the device's power.can_wakeup flag, device_init_wakeup() doesn't need to do that before calling it, so drop that redundant check from device_init_wakeup(). No intentional changes in functionality. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/wakeup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 90c7212de087..b7b8b2fe89c6 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -448,9 +448,7 @@ int device_init_wakeup(struct device *dev, bool enable) device_set_wakeup_capable(dev, true); ret = device_wakeup_enable(dev); } else { - if (dev->power.can_wakeup) - device_wakeup_disable(dev); - + device_wakeup_disable(dev); device_set_wakeup_capable(dev, false); } -- cgit v1.2.3 From 4fa3061a6856cc72f3f984702145bb30f16ee40e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 Dec 2017 00:58:18 +0100 Subject: PM / core: Add helpers for subsystem callback selection Add helper routines to find and return a suitable subsystem callback during the "noirq" phases of system suspend/resume (or analogous) transitions as well as during the "late" phase of system suspend and the "early" phase of system resume (or analogous) transitions. The helpers will be called from additional sites going forward. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Geert Uytterhoeven --- drivers/base/power/main.c | 188 +++++++++++++++++++++++++++++++--------------- 1 file changed, 128 insertions(+), 60 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 6e8cc5de93fd..3c5fdf155c91 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -551,6 +551,35 @@ bool dev_pm_may_skip_resume(struct device *dev) return !dev->power.must_resume && pm_transition.event != PM_EVENT_RESTORE; } +static pm_callback_t dpm_subsys_resume_noirq_cb(struct device *dev, + pm_message_t state, + const char **info_p) +{ + pm_callback_t callback; + const char *info; + + if (dev->pm_domain) { + info = "noirq power domain "; + callback = pm_noirq_op(&dev->pm_domain->ops, state); + } else if (dev->type && dev->type->pm) { + info = "noirq type "; + callback = pm_noirq_op(dev->type->pm, state); + } else if (dev->class && dev->class->pm) { + info = "noirq class "; + callback = pm_noirq_op(dev->class->pm, state); + } else if (dev->bus && dev->bus->pm) { + info = "noirq bus "; + callback = pm_noirq_op(dev->bus->pm, state); + } else { + return NULL; + } + + if (info_p) + *info_p = info; + + return callback; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -562,8 +591,8 @@ bool dev_pm_may_skip_resume(struct device *dev) */ static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) { - pm_callback_t callback = NULL; - const char *info = NULL; + pm_callback_t callback; + const char *info; int error = 0; TRACE_DEVICE(dev); @@ -577,19 +606,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn dpm_wait_for_superior(dev, async); - if (dev->pm_domain) { - info = "noirq power domain "; - callback = pm_noirq_op(&dev->pm_domain->ops, state); - } else if (dev->type && dev->type->pm) { - info = "noirq type "; - callback = pm_noirq_op(dev->type->pm, state); - } else if (dev->class && dev->class->pm) { - info = "noirq class "; - callback = pm_noirq_op(dev->class->pm, state); - } else if (dev->bus && dev->bus->pm) { - info = "noirq bus "; - callback = pm_noirq_op(dev->bus->pm, state); - } + callback = dpm_subsys_resume_noirq_cb(dev, state, &info); if (!callback && dev->driver && dev->driver->pm) { info = "noirq driver "; @@ -704,6 +721,35 @@ void dpm_resume_noirq(pm_message_t state) dpm_noirq_end(); } +static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev, + pm_message_t state, + const char **info_p) +{ + pm_callback_t callback; + const char *info; + + if (dev->pm_domain) { + info = "early power domain "; + callback = pm_late_early_op(&dev->pm_domain->ops, state); + } else if (dev->type && dev->type->pm) { + info = "early type "; + callback = pm_late_early_op(dev->type->pm, state); + } else if (dev->class && dev->class->pm) { + info = "early class "; + callback = pm_late_early_op(dev->class->pm, state); + } else if (dev->bus && dev->bus->pm) { + info = "early bus "; + callback = pm_late_early_op(dev->bus->pm, state); + } else { + return NULL; + } + + if (info_p) + *info_p = info; + + return callback; +} + /** * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. @@ -714,8 +760,8 @@ void dpm_resume_noirq(pm_message_t state) */ static int device_resume_early(struct device *dev, pm_message_t state, bool async) { - pm_callback_t callback = NULL; - const char *info = NULL; + pm_callback_t callback; + const char *info; int error = 0; TRACE_DEVICE(dev); @@ -729,19 +775,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn dpm_wait_for_superior(dev, async); - if (dev->pm_domain) { - info = "early power domain "; - callback = pm_late_early_op(&dev->pm_domain->ops, state); - } else if (dev->type && dev->type->pm) { - info = "early type "; - callback = pm_late_early_op(dev->type->pm, state); - } else if (dev->class && dev->class->pm) { - info = "early class "; - callback = pm_late_early_op(dev->class->pm, state); - } else if (dev->bus && dev->bus->pm) { - info = "early bus "; - callback = pm_late_early_op(dev->bus->pm, state); - } + callback = dpm_subsys_resume_early_cb(dev, state, &info); if (!callback && dev->driver && dev->driver->pm) { info = "early driver "; @@ -1128,6 +1162,35 @@ static void dpm_superior_set_must_resume(struct device *dev) device_links_read_unlock(idx); } +static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev, + pm_message_t state, + const char **info_p) +{ + pm_callback_t callback; + const char *info; + + if (dev->pm_domain) { + info = "noirq power domain "; + callback = pm_noirq_op(&dev->pm_domain->ops, state); + } else if (dev->type && dev->type->pm) { + info = "noirq type "; + callback = pm_noirq_op(dev->type->pm, state); + } else if (dev->class && dev->class->pm) { + info = "noirq class "; + callback = pm_noirq_op(dev->class->pm, state); + } else if (dev->bus && dev->bus->pm) { + info = "noirq bus "; + callback = pm_noirq_op(dev->bus->pm, state); + } else { + return NULL; + } + + if (info_p) + *info_p = info; + + return callback; +} + /** * __device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. @@ -1139,8 +1202,8 @@ static void dpm_superior_set_must_resume(struct device *dev) */ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async) { - pm_callback_t callback = NULL; - const char *info = NULL; + pm_callback_t callback; + const char *info; int error = 0; TRACE_DEVICE(dev); @@ -1159,19 +1222,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (dev->power.syscore || dev->power.direct_complete) goto Complete; - if (dev->pm_domain) { - info = "noirq power domain "; - callback = pm_noirq_op(&dev->pm_domain->ops, state); - } else if (dev->type && dev->type->pm) { - info = "noirq type "; - callback = pm_noirq_op(dev->type->pm, state); - } else if (dev->class && dev->class->pm) { - info = "noirq class "; - callback = pm_noirq_op(dev->class->pm, state); - } else if (dev->bus && dev->bus->pm) { - info = "noirq bus "; - callback = pm_noirq_op(dev->bus->pm, state); - } + callback = dpm_subsys_suspend_noirq_cb(dev, state, &info); if (!callback && dev->driver && dev->driver->pm) { info = "noirq driver "; @@ -1306,6 +1357,35 @@ int dpm_suspend_noirq(pm_message_t state) return ret; } +static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev, + pm_message_t state, + const char **info_p) +{ + pm_callback_t callback; + const char *info; + + if (dev->pm_domain) { + info = "late power domain "; + callback = pm_late_early_op(&dev->pm_domain->ops, state); + } else if (dev->type && dev->type->pm) { + info = "late type "; + callback = pm_late_early_op(dev->type->pm, state); + } else if (dev->class && dev->class->pm) { + info = "late class "; + callback = pm_late_early_op(dev->class->pm, state); + } else if (dev->bus && dev->bus->pm) { + info = "late bus "; + callback = pm_late_early_op(dev->bus->pm, state); + } else { + return NULL; + } + + if (info_p) + *info_p = info; + + return callback; +} + /** * __device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. @@ -1316,8 +1396,8 @@ int dpm_suspend_noirq(pm_message_t state) */ static int __device_suspend_late(struct device *dev, pm_message_t state, bool async) { - pm_callback_t callback = NULL; - const char *info = NULL; + pm_callback_t callback; + const char *info; int error = 0; TRACE_DEVICE(dev); @@ -1338,19 +1418,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as if (dev->power.syscore || dev->power.direct_complete) goto Complete; - if (dev->pm_domain) { - info = "late power domain "; - callback = pm_late_early_op(&dev->pm_domain->ops, state); - } else if (dev->type && dev->type->pm) { - info = "late type "; - callback = pm_late_early_op(dev->type->pm, state); - } else if (dev->class && dev->class->pm) { - info = "late class "; - callback = pm_late_early_op(dev->class->pm, state); - } else if (dev->bus && dev->bus->pm) { - info = "late bus "; - callback = pm_late_early_op(dev->bus->pm, state); - } + callback = dpm_subsys_suspend_late_cb(dev, state, &info); if (!callback && dev->driver && dev->driver->pm) { info = "late driver "; -- cgit v1.2.3 From 75e94645fc3b1007eacb4c7863059f8e8d098cda Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 Dec 2017 01:00:45 +0100 Subject: PM / core: Direct DPM_FLAG_SMART_SUSPEND optimization Make the PM core avoid invoking the "late" and "noirq" system-wide suspend (or analogous) callbacks provided by device drivers directly for devices with DPM_FLAG_SMART_SUSPEND set that are in runtime suspend during the "late" and "noirq" phases of system-wide suspend (or analogous) transitions. That is only done for devices without any middle-layer "late" and "noirq" suspend callbacks (to avoid confusing the middle layer if there is one). The underlying observation is that runtime PM is disabled for devices during the "late" and "noirq" system-wide suspend phases, so if they remain in runtime suspend from the "late" phase forward, it doesn't make sense to invoke the "late" and "noirq" callbacks provided by the drivers for them (arguably, the device is already suspended and in the right state). Thus, if the remaining driver suspend callbacks are to be invoked directly by the core, they can be skipped. This change really makes it possible for, say, platform device drivers to re-use runtime PM suspend and resume callbacks by pointing ->suspend_late and ->resume_early, respectively (and possibly the analogous hibernation-related callback pointers too), to them without adding any extra "is the device already suspended?" type of checks to the callback routines, as long as they will be invoked directly by the core. Signed-off-by: Rafael J. Wysocki --- Documentation/driver-api/pm/devices.rst | 18 +++---- drivers/base/power/main.c | 85 ++++++++++++++++++++++++++++++--- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index b0fe63c91f8d..07026811dcae 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -777,14 +777,16 @@ The driver can indicate that by setting ``DPM_FLAG_SMART_SUSPEND`` in runtime suspend at the beginning of the ``suspend_late`` phase of system-wide suspend (or in the ``poweroff_late`` phase of hibernation), when runtime PM has been disabled for it, under the assumption that its state should not change -after that point until the system-wide transition is over. If that happens, the -driver's system-wide resume callbacks, if present, may still be invoked during -the subsequent system-wide resume transition and the device's runtime power -management status may be set to "active" before enabling runtime PM for it, -so the driver must be prepared to cope with the invocation of its system-wide -resume callbacks back-to-back with its ``->runtime_suspend`` one (without the -intervening ``->runtime_resume`` and so on) and the final state of the device -must reflect the "active" status for runtime PM in that case. +after that point until the system-wide transition is over (the PM core itself +does that for devices whose "noirq", "late" and "early" system-wide PM callbacks +are executed directly by it). If that happens, the driver's system-wide resume +callbacks, if present, may still be invoked during the subsequent system-wide +resume transition and the device's runtime power management status may be set +to "active" before enabling runtime PM for it, so the driver must be prepared to +cope with the invocation of its system-wide resume callbacks back-to-back with +its ``->runtime_suspend`` one (without the intervening ``->runtime_resume`` and +so on) and the final state of the device must reflect the "active" runtime PM +status in that case. During system-wide resume from a sleep state it's easiest to put devices into the full-power state, as explained in :file:`Documentation/power/runtime_pm.txt`. diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 3c5fdf155c91..154f7b4db8d0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -539,6 +539,24 @@ void dev_pm_skip_next_resume_phases(struct device *dev) dev->power.is_suspended = false; } +/** + * suspend_event - Return a "suspend" message for given "resume" one. + * @resume_msg: PM message representing a system-wide resume transition. + */ +static pm_message_t suspend_event(pm_message_t resume_msg) +{ + switch (resume_msg.event) { + case PM_EVENT_RESUME: + return PMSG_SUSPEND; + case PM_EVENT_THAW: + case PM_EVENT_RESTORE: + return PMSG_FREEZE; + case PM_EVENT_RECOVER: + return PMSG_HIBERNATE; + } + return PMSG_ON; +} + /** * dev_pm_may_skip_resume - System-wide device resume optimization check. * @dev: Target device. @@ -580,6 +598,14 @@ static pm_callback_t dpm_subsys_resume_noirq_cb(struct device *dev, return callback; } +static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev, + pm_message_t state, + const char **info_p); + +static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev, + pm_message_t state, + const char **info_p); + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -607,13 +633,40 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn dpm_wait_for_superior(dev, async); callback = dpm_subsys_resume_noirq_cb(dev, state, &info); + if (callback) + goto Run; - if (!callback && dev->driver && dev->driver->pm) { + if (dev_pm_smart_suspend_and_suspended(dev)) { + pm_message_t suspend_msg = suspend_event(state); + + /* + * If "freeze" callbacks have been skipped during a transition + * related to hibernation, the subsequent "thaw" callbacks must + * be skipped too or bad things may happen. Otherwise, resume + * callbacks are going to be run for the device, so its runtime + * PM status must be changed to reflect the new state after the + * transition under way. + */ + if (!dpm_subsys_suspend_late_cb(dev, suspend_msg, NULL) && + !dpm_subsys_suspend_noirq_cb(dev, suspend_msg, NULL)) { + if (state.event == PM_EVENT_THAW) { + dev_pm_skip_next_resume_phases(dev); + goto Skip; + } else { + pm_runtime_set_active(dev); + } + } + } + + if (dev->driver && dev->driver->pm) { info = "noirq driver "; callback = pm_noirq_op(dev->driver->pm, state); } +Run: error = dpm_run_callback(callback, dev, state, info); + +Skip: dev->power.is_noirq_suspended = false; if (dev_pm_may_skip_resume(dev)) { @@ -628,7 +681,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn dev_pm_skip_next_resume_phases(dev); } - Out: +Out: complete_all(&dev->power.completion); TRACE_RESUME(error); return error; @@ -1223,18 +1276,26 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a goto Complete; callback = dpm_subsys_suspend_noirq_cb(dev, state, &info); + if (callback) + goto Run; - if (!callback && dev->driver && dev->driver->pm) { + if (dev_pm_smart_suspend_and_suspended(dev) && + !dpm_subsys_suspend_late_cb(dev, state, NULL)) + goto Skip; + + if (dev->driver && dev->driver->pm) { info = "noirq driver "; callback = pm_noirq_op(dev->driver->pm, state); } +Run: error = dpm_run_callback(callback, dev, state, info); if (error) { async_error = error; goto Complete; } +Skip: dev->power.is_noirq_suspended = true; if (dev_pm_test_driver_flags(dev, DPM_FLAG_LEAVE_SUSPENDED)) { @@ -1419,17 +1480,27 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as goto Complete; callback = dpm_subsys_suspend_late_cb(dev, state, &info); + if (callback) + goto Run; - if (!callback && dev->driver && dev->driver->pm) { + if (dev_pm_smart_suspend_and_suspended(dev) && + !dpm_subsys_suspend_noirq_cb(dev, state, NULL)) + goto Skip; + + if (dev->driver && dev->driver->pm) { info = "late driver "; callback = pm_late_early_op(dev->driver->pm, state); } +Run: error = dpm_run_callback(callback, dev, state, info); - if (!error) - dev->power.is_late_suspended = true; - else + if (error) { async_error = error; + goto Complete; + } + +Skip: + dev->power.is_late_suspended = true; Complete: TRACE_SUSPEND(error); -- cgit v1.2.3 From 32bfa56ac158c1ebcc82df2518860f824be5e5be Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 Dec 2017 01:02:13 +0100 Subject: PM / core: Direct DPM_FLAG_LEAVE_SUSPENDED handling Make the PM core handle DPM_FLAG_LEAVE_SUSPENDED directly for devices whose "noirq", "late" and "early" driver callbacks are invoked directly by it. Namely, make it skip all of the system-wide resume callbacks for such devices with DPM_FLAG_LEAVE_SUSPENDED set if they are in runtime suspend during the "noirq" phase of system-wide suspend (or analogous) transitions or the system transition under way is a proper suspend (rather than anything related to hibernation) and the device's wakeup settings are compatible with runtime PM (that is, the device cannot generate wakeup signals at all or it is allowed to wake up the system from sleep). Signed-off-by: Rafael J. Wysocki --- Documentation/driver-api/pm/devices.rst | 9 ++++++ drivers/base/power/main.c | 51 ++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index 07026811dcae..1128705a5731 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -816,3 +816,12 @@ appropriate in its "noirq" resume callback, which is executed regardless of whether or not the device is left suspended, but the other resume callbacks (except for ``->complete``) will be skipped automatically by the PM core if the device really can be left in suspend. + +For devices whose "noirq", "late" and "early" driver callbacks are invoked +directly by the PM core, all of the system-wide resume callbacks are skipped if +``DPM_FLAG_LEAVE_SUSPENDED`` is set and the device is in runtime suspend during +the ``suspend_noirq`` (or analogous) phase or the transition under way is a +proper system suspend (rather than anything related to hibernation) and the +device's wakeup settings are suitable for runtime PM (that is, it cannot +generate wakeup signals at all or it is allowed to wake up the system from +sleep). diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 154f7b4db8d0..70398e7b3569 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -619,6 +619,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn { pm_callback_t callback; const char *info; + bool skip_resume; int error = 0; TRACE_DEVICE(dev); @@ -632,10 +633,15 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn dpm_wait_for_superior(dev, async); + skip_resume = dev_pm_may_skip_resume(dev); + callback = dpm_subsys_resume_noirq_cb(dev, state, &info); if (callback) goto Run; + if (skip_resume) + goto Skip; + if (dev_pm_smart_suspend_and_suspended(dev)) { pm_message_t suspend_msg = suspend_event(state); @@ -650,7 +656,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn if (!dpm_subsys_suspend_late_cb(dev, suspend_msg, NULL) && !dpm_subsys_suspend_noirq_cb(dev, suspend_msg, NULL)) { if (state.event == PM_EVENT_THAW) { - dev_pm_skip_next_resume_phases(dev); + skip_resume = true; goto Skip; } else { pm_runtime_set_active(dev); @@ -669,7 +675,7 @@ Run: Skip: dev->power.is_noirq_suspended = false; - if (dev_pm_may_skip_resume(dev)) { + if (skip_resume) { /* * The device is going to be left in suspend, but it might not * have been in runtime suspend before the system suspended, so @@ -1244,6 +1250,32 @@ static pm_callback_t dpm_subsys_suspend_noirq_cb(struct device *dev, return callback; } +static bool device_must_resume(struct device *dev, pm_message_t state, + bool no_subsys_suspend_noirq) +{ + pm_message_t resume_msg = resume_event(state); + + /* + * If all of the device driver's "noirq", "late" and "early" callbacks + * are invoked directly by the core, the decision to allow the device to + * stay in suspend can be based on its current runtime PM status and its + * wakeup settings. + */ + if (no_subsys_suspend_noirq && + !dpm_subsys_suspend_late_cb(dev, state, NULL) && + !dpm_subsys_resume_early_cb(dev, resume_msg, NULL) && + !dpm_subsys_resume_noirq_cb(dev, resume_msg, NULL)) + return !pm_runtime_status_suspended(dev) && + (resume_msg.event != PM_EVENT_RESUME || + (device_can_wakeup(dev) && !device_may_wakeup(dev))); + + /* + * The only safe strategy here is to require that if the device may not + * be left in suspend, resume callbacks must be invoked for it. + */ + return !dev->power.may_skip_resume; +} + /** * __device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. @@ -1257,6 +1289,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a { pm_callback_t callback; const char *info; + bool no_subsys_cb = false; int error = 0; TRACE_DEVICE(dev); @@ -1279,8 +1312,9 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (callback) goto Run; - if (dev_pm_smart_suspend_and_suspended(dev) && - !dpm_subsys_suspend_late_cb(dev, state, NULL)) + no_subsys_cb = !dpm_subsys_suspend_late_cb(dev, state, NULL); + + if (dev_pm_smart_suspend_and_suspended(dev) && no_subsys_cb) goto Skip; if (dev->driver && dev->driver->pm) { @@ -1299,14 +1333,9 @@ Skip: dev->power.is_noirq_suspended = true; if (dev_pm_test_driver_flags(dev, DPM_FLAG_LEAVE_SUSPENDED)) { - /* - * The only safe strategy here is to require that if the device - * may not be left in suspend, resume callbacks must be invoked - * for it. - */ dev->power.must_resume = dev->power.must_resume || - !dev->power.may_skip_resume || - atomic_read(&dev->power.usage_count) > 1; + atomic_read(&dev->power.usage_count) > 1 || + device_must_resume(dev, state, no_subsys_cb); } else { dev->power.must_resume = true; } -- cgit v1.2.3 From 01e1429b877ece6576eb59b74f613b630f859478 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 23 Nov 2017 21:25:30 +0530 Subject: extcon: axp288:: Handle return value of platform_get_irq platform_get_irq() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 981fba56bc18..e16a7838cac3 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -301,6 +301,9 @@ static int axp288_extcon_probe(struct platform_device *pdev) for (i = 0; i < EXTCON_IRQ_END; i++) { pirq = platform_get_irq(pdev, i); + if (pirq < 0) + return pirq; + info->irq[i] = regmap_irq_get_virq(info->regmap_irqc, pirq); if (info->irq[i] < 0) { dev_err(&pdev->dev, -- cgit v1.2.3 From 10887fb0dbba483dd588f20e2929372093d49a69 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Dec 2017 13:36:13 +0100 Subject: extcon: axp288: Remove unused extcon_nb struct member Remove the unused extcon_nb struct member. Signed-off-by: Hans de Goede Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index e16a7838cac3..3bd27ebe2736 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -107,7 +107,6 @@ struct axp288_extcon_info { struct gpio_desc *gpio_mux_cntl; int irq[EXTCON_IRQ_END]; struct extcon_dev *edev; - struct notifier_block extcon_nb; unsigned int previous_cable; }; -- cgit v1.2.3 From 9bf317e900a19a857eb9921c9441a92e89f40415 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 31 Dec 2017 01:04:13 +0900 Subject: extcon: axp288: Remove unused platform data This is not used / set anywhere in the tree. Signed-off-by: Hans de Goede Reviewed-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-axp288.c | 35 +---------------------------------- include/linux/mfd/axp20x.h | 5 ----- 2 files changed, 1 insertion(+), 39 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 3bd27ebe2736..1621f2f7f129 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -24,8 +24,6 @@ #include #include #include -#include -#include #include /* Power source status register */ @@ -79,11 +77,6 @@ enum axp288_extcon_reg { AXP288_BC_DET_STAT_REG = 0x2f, }; -enum axp288_mux_select { - EXTCON_GPIO_MUX_SEL_PMIC = 0, - EXTCON_GPIO_MUX_SEL_SOC, -}; - enum axp288_extcon_irq { VBUS_FALLING_IRQ = 0, VBUS_RISING_IRQ, @@ -104,7 +97,6 @@ struct axp288_extcon_info { struct device *dev; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; - struct gpio_desc *gpio_mux_cntl; int irq[EXTCON_IRQ_END]; struct extcon_dev *edev; unsigned int previous_cable; @@ -196,15 +188,6 @@ static int axp288_handle_chrg_det_event(struct axp288_extcon_info *info) } no_vbus: - /* - * If VBUS is absent Connect D+/D- lines to PMIC for BC - * detection. Else connect them to SOC for USB communication. - */ - if (info->gpio_mux_cntl) - gpiod_set_value(info->gpio_mux_cntl, - vbus_attach ? EXTCON_GPIO_MUX_SEL_SOC - : EXTCON_GPIO_MUX_SEL_PMIC); - extcon_set_state_sync(info->edev, info->previous_cable, false); if (info->previous_cable == EXTCON_CHG_USB_SDP) extcon_set_state_sync(info->edev, EXTCON_USB, false); @@ -252,8 +235,7 @@ static int axp288_extcon_probe(struct platform_device *pdev) { struct axp288_extcon_info *info; struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent); - struct axp288_extcon_pdata *pdata = pdev->dev.platform_data; - int ret, i, pirq, gpio; + int ret, i, pirq; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) @@ -263,8 +245,6 @@ static int axp288_extcon_probe(struct platform_device *pdev) info->regmap = axp20x->regmap; info->regmap_irqc = axp20x->regmap_irqc; info->previous_cable = EXTCON_NONE; - if (pdata) - info->gpio_mux_cntl = pdata->gpio_mux_cntl; platform_set_drvdata(pdev, info); @@ -285,19 +265,6 @@ static int axp288_extcon_probe(struct platform_device *pdev) return ret; } - /* Set up gpio control for USB Mux */ - if (info->gpio_mux_cntl) { - gpio = desc_to_gpio(info->gpio_mux_cntl); - ret = devm_gpio_request(&pdev->dev, gpio, "USB_MUX"); - if (ret < 0) { - dev_err(&pdev->dev, - "failed to request the gpio=%d\n", gpio); - return ret; - } - gpiod_direction_output(info->gpio_mux_cntl, - EXTCON_GPIO_MUX_SEL_PMIC); - } - for (i = 0; i < EXTCON_IRQ_END; i++) { pirq = platform_get_irq(pdev, i); if (pirq < 0) diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 78dc85365c4f..080798f17ece 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -645,11 +645,6 @@ struct axp20x_dev { const struct regmap_irq_chip *regmap_irq_chip; }; -struct axp288_extcon_pdata { - /* GPIO pin control to switch D+/D- lines b/w PMIC and SOC */ - struct gpio_desc *gpio_mux_cntl; -}; - /* generic helper function for reading 9-16 bit wide regs */ static inline int axp20x_read_variable_width(struct regmap *regmap, unsigned int reg, unsigned int width) -- cgit v1.2.3 From 71891e2dab6b55a870f8f7735e44a2963860b5c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Dec 2017 10:02:52 -0800 Subject: ethtool: do not print warning for applications using legacy API In kernel log ths message appears on every boot: "warning: `NetworkChangeNo' uses legacy ethtool link settings API, link modes are only partially reported" When ethtool link settings API changed, it started complaining about usages of old API. Ironically, the original patch was from google but the application using the legacy API is chrome. Linux ABI is fixed as much as possible. The kernel must not break it and should not complain about applications using legacy API's. This patch just removes the warning since using legacy API's in Linux is perfectly acceptable. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Stephen Hemminger Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- net/core/ethtool.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f8fcf450a36e..8225416911ae 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev, return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings); } -static void -warn_incomplete_ethtool_legacy_settings_conversion(const char *details) -{ - char name[sizeof(current->comm)]; - - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n", - get_task_comm(name, current), details); -} - /* Query device for its ethtool_cmd settings. * * Backward compatibility note: for compatibility with legacy ethtool, @@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) &link_ksettings); if (err < 0) return err; - if (!convert_link_ksettings_to_legacy_settings(&cmd, - &link_ksettings)) - warn_incomplete_ethtool_legacy_settings_conversion( - "link modes are only partially reported"); + convert_link_ksettings_to_legacy_settings(&cmd, + &link_ksettings); /* send a sensible cmd tag back to user */ cmd.cmd = ETHTOOL_GSET; -- cgit v1.2.3 From f9c935db8086231a35b7f5c2a53e3f1e10f388ee Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 29 Dec 2017 19:48:02 +0100 Subject: tipc: fix problems with multipoint-to-point flow control In commit 04d7b574b245 ("tipc: add multipoint-to-point flow control") we introduced a protocol for preventing buffer overflow when many group members try to simultaneously send messages to the same receiving member. Stress test of this mechanism has revealed a couple of related bugs: - When the receiving member receives an advertisement REMIT message from one of the senders, it will sometimes prematurely activate a pending member and send it the remitted advertisement, although the upper limit for active senders has been reached. This leads to accumulation of illegal advertisements, and eventually to messages being dropped because of receive buffer overflow. - When the receiving member leaves REMITTED state while a received message is being read, we miss to look at the pending queue, to activate the oldest pending peer. This leads to some pending senders being starved out, and never getting the opportunity to profit from the remitted advertisement. We fix the former in the function tipc_group_proto_rcv() by returning directly from the function once it becomes clear that the remitting peer cannot leave REMITTED state at that point. We fix the latter in the function tipc_group_update_rcv_win() by looking up and activate the longest pending peer when it becomes clear that the remitting peer now can leave REMITTED state. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/tipc/group.c b/net/tipc/group.c index 8e12ab55346b..5f4ffae807ee 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { - if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) grp->active_cnt--; } @@ -562,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, int max_active = grp->max_active; int reclaim_limit = max_active * 3 / 4; int active_cnt = grp->active_cnt; - struct tipc_member *m, *rm; + struct tipc_member *m, *rm, *pm; m = tipc_group_find_member(grp, node, port); if (!m) @@ -605,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); } + grp->active_cnt--; + list_del_init(&m->list); + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: case MBR_DISCOVERED: @@ -742,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!m || m->state != MBR_RECLAIMING) return; - list_del_init(&m->list); - grp->active_cnt--; remitted = msg_grp_remitted(hdr); /* Messages preceding the REMIT still in receive queue */ if (m->advertised > remitted) { m->state = MBR_REMITTED; in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; } /* All messages preceding the REMIT have been read */ if (m->advertised <= remitted) { @@ -761,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); m->advertised = ADV_IDLE + in_flight; + grp->active_cnt--; + list_del_init(&m->list); /* Set oldest pending member to active and advertise */ if (list_empty(&grp->pending)) -- cgit v1.2.3 From af1be2e21203867cb958aaceed5366e2e24b88e8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Dec 2017 08:45:57 -0800 Subject: ARC: handle gcc generated __builtin_trap for older compiler ARC gcc prior to GNU 2018.03 release didn't have a target specific __builtin_trap() implementation, generating default abort() call. Implement the abort() call - emulating what newer gcc does for the same, as suggested by Arnd. Acked-by: Arnd Bergmann Signed-off-by: Vineet Gupta --- arch/arc/kernel/traps.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index 004f4e4a4c10..133a4dae41fe 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -161,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs) insterror_is_error(address, regs); } + +/* + * abort() call generated by older gcc for __builtin_trap() + */ +void abort(void) +{ + __asm__ __volatile__("trap_s 5\n"); +} -- cgit v1.2.3 From 2dc6e1a4883a3eba451c76b726c23a580ed05307 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 2 Jan 2018 19:53:11 +0100 Subject: ASoC: rt5645: set in2_diff flag for GPD win and pocket devices The GPD pocket has a differential signal microphone and needs in2_diff to be set to avoid getting a very noisy signal. Since the GPD pocket and win use the same DMI strings, they share their platform data-definition, so enabling in2_diff on the pocket also sets it on the GPD win. The GPD win has a normal microphone, but setting in2_diff there does not negatively impact the sound from the microphone. Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index a1a7bb770745..6dd894c9f355 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3628,6 +3628,8 @@ static const struct rt5645_platform_data gpd_win_platform_data = { .jd_mode = 3, .inv_jd1_1 = true, .long_name = "gpd-win-pocket-rt5645", + /* The GPD pocket has a diff. mic, for the win this does not matter. */ + .in2_diff = true, }; static const struct dmi_system_id dmi_platform_gpd_win[] = { -- cgit v1.2.3 From 78f5605c0329f8b108a915a46032093628f6054b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 2 Jan 2018 19:53:12 +0100 Subject: ASoC: rt5645: cleanup DMI matching code Rather then doing a dmi_check_system() per possible system use an array with all known systems, with dmi_system_id.driver_data pointing to the platform-data for the matching system. Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 124 +++++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 73 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 6dd894c9f355..df6cd5bd6a9f 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3573,66 +3573,74 @@ static const struct acpi_device_id rt5645_acpi_match[] = { MODULE_DEVICE_TABLE(acpi, rt5645_acpi_match); #endif -static const struct rt5645_platform_data general_platform_data = { +static const struct rt5645_platform_data intel_braswell_platform_data = { .dmic1_data_pin = RT5645_DMIC1_DISABLE, .dmic2_data_pin = RT5645_DMIC_DATA_IN2P, .jd_mode = 3, }; -static const struct dmi_system_id dmi_platform_intel_braswell[] = { +static const struct rt5645_platform_data buddy_platform_data = { + .dmic1_data_pin = RT5645_DMIC_DATA_GPIO5, + .dmic2_data_pin = RT5645_DMIC_DATA_IN2P, + .jd_mode = 3, + .level_trigger_irq = true, +}; + +static const struct rt5645_platform_data gpd_win_platform_data = { + .jd_mode = 3, + .inv_jd1_1 = true, + .long_name = "gpd-win-pocket-rt5645", + /* The GPD pocket has a diff. mic, for the win this does not matter. */ + .in2_diff = true, +}; + +static const struct rt5645_platform_data asus_t100ha_platform_data = { + .dmic1_data_pin = RT5645_DMIC_DATA_IN2N, + .dmic2_data_pin = RT5645_DMIC2_DISABLE, + .jd_mode = 3, + .inv_jd1_1 = true, +}; + +static const struct rt5645_platform_data jd_mode3_platform_data = { + .jd_mode = 3, +}; + +static const struct dmi_system_id dmi_platform_data[] = { + { + .ident = "Chrome Buddy", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Buddy"), + }, + .driver_data = (void *)&buddy_platform_data, + }, { .ident = "Intel Strago", .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "Strago"), }, + .driver_data = (void *)&intel_braswell_platform_data, }, { .ident = "Google Chrome", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), }, + .driver_data = (void *)&intel_braswell_platform_data, }, { .ident = "Google Setzer", .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"), }, + .driver_data = (void *)&intel_braswell_platform_data, }, { .ident = "Microsoft Surface 3", .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"), }, + .driver_data = (void *)&intel_braswell_platform_data, }, - { } -}; - -static const struct rt5645_platform_data buddy_platform_data = { - .dmic1_data_pin = RT5645_DMIC_DATA_GPIO5, - .dmic2_data_pin = RT5645_DMIC_DATA_IN2P, - .jd_mode = 3, - .level_trigger_irq = true, -}; - -static const struct dmi_system_id dmi_platform_intel_broadwell[] = { - { - .ident = "Chrome Buddy", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "Buddy"), - }, - }, - { } -}; - -static const struct rt5645_platform_data gpd_win_platform_data = { - .jd_mode = 3, - .inv_jd1_1 = true, - .long_name = "gpd-win-pocket-rt5645", - /* The GPD pocket has a diff. mic, for the win this does not matter. */ - .in2_diff = true, -}; - -static const struct dmi_system_id dmi_platform_gpd_win[] = { { /* * Match for the GPDwin which unfortunately uses somewhat @@ -3643,61 +3651,34 @@ static const struct dmi_system_id dmi_platform_gpd_win[] = { * the same default product_name. Also the GPDwin is the * only device to have both board_ and product_name not set. */ - .ident = "GPD Win", + .ident = "GPD Win / Pocket", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_BOARD_SERIAL, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), }, + .driver_data = (void *)&gpd_win_platform_data, }, - {} -}; - -static const struct rt5645_platform_data general_platform_data2 = { - .dmic1_data_pin = RT5645_DMIC_DATA_IN2N, - .dmic2_data_pin = RT5645_DMIC2_DISABLE, - .jd_mode = 3, - .inv_jd1_1 = true, -}; - -static const struct dmi_system_id dmi_platform_asus_t100ha[] = { { .ident = "ASUS T100HAN", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "T100HAN"), }, + .driver_data = (void *)&asus_t100ha_platform_data, }, - { } -}; - -static const struct rt5645_platform_data minix_z83_4_platform_data = { - .jd_mode = 3, -}; - -static const struct dmi_system_id dmi_platform_minix_z83_4[] = { { .ident = "MINIX Z83-4", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), }, + .driver_data = (void *)&jd_mode3_platform_data, }, { } }; -static bool rt5645_check_dp(struct device *dev) -{ - if (device_property_present(dev, "realtek,in2-differential") || - device_property_present(dev, "realtek,dmic1-data-pin") || - device_property_present(dev, "realtek,dmic2-data-pin") || - device_property_present(dev, "realtek,jd-mode")) - return true; - - return false; -} - static int rt5645_parse_dt(struct rt5645_priv *rt5645, struct device *dev) { rt5645->pdata.in2_diff = device_property_read_bool(dev, @@ -3716,6 +3697,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct rt5645_platform_data *pdata = dev_get_platdata(&i2c->dev); + const struct dmi_system_id *dmi_data; struct rt5645_priv *rt5645; int ret, i; unsigned int val; @@ -3729,20 +3711,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, rt5645->i2c = i2c; i2c_set_clientdata(i2c, rt5645); + dmi_data = dmi_first_match(dmi_platform_data); + if (dmi_data) { + dev_info(&i2c->dev, "Detected %s platform\n", dmi_data->ident); + pdata = dmi_data->driver_data; + } + if (pdata) rt5645->pdata = *pdata; - else if (dmi_check_system(dmi_platform_intel_broadwell)) - rt5645->pdata = buddy_platform_data; - else if (rt5645_check_dp(&i2c->dev)) + else rt5645_parse_dt(rt5645, &i2c->dev); - else if (dmi_check_system(dmi_platform_intel_braswell)) - rt5645->pdata = general_platform_data; - else if (dmi_check_system(dmi_platform_gpd_win)) - rt5645->pdata = gpd_win_platform_data; - else if (dmi_check_system(dmi_platform_asus_t100ha)) - rt5645->pdata = general_platform_data2; - else if (dmi_check_system(dmi_platform_minix_z83_4)) - rt5645->pdata = minix_z83_4_platform_data; if (quirk != -1) { rt5645->pdata.in2_diff = QUIRK_IN2_DIFF(quirk); -- cgit v1.2.3 From a249a95667f4f814b9b15f4b59049ffe68b5677f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 2 Jan 2018 19:53:13 +0100 Subject: ASoC: rt5645: add platform data for the Teclast X80 Pro tablet The Teclast X80 Pro tablet needs jd_mode = 3 for headset jack detection. Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index df6cd5bd6a9f..daf7b73ba415 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3676,6 +3676,14 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&jd_mode3_platform_data, }, + { + .ident = "Teclast X80 Pro", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_MATCH(DMI_PRODUCT_NAME, "X80 Pro"), + }, + .driver_data = (void *)&jd_mode3_platform_data, + }, { } }; -- cgit v1.2.3 From b70b309950418437bbd2a30afd169c4f09dee3e5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 2 Jan 2018 19:53:14 +0100 Subject: ASoC: Intel: cht_bsw_rt5645: Analog Mic support Various Cherry Trail boards with a rt5645 codec have an analog mic connected to IN2P + IN2N. The mic on this boards also needs micbias to be enabled, on some boards micbias1 is used and on others micbias2, so we enable both. This commit adds a new "Int Analog Mic" DAPM widget for this, so that we do not end up enabling micbias on boards with a digital mic which uses the already present "Int Mic" widget. Some existing UCM files already refer to "Int Mic" for their "Internal Analog Microphones" SectionDevice, but these don't work anyways since they enable the RECMIX BST1 Switch instead of the BST2 switch. Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/intel/boards/cht_bsw_rt5645.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 18d129caa974..f898ee140cdc 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -118,6 +118,7 @@ static const struct snd_soc_dapm_widget cht_dapm_widgets[] = { SND_SOC_DAPM_HP("Headphone", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_MIC("Int Mic", NULL), + SND_SOC_DAPM_MIC("Int Analog Mic", NULL), SND_SOC_DAPM_SPK("Ext Spk", NULL), SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), @@ -128,6 +129,8 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"IN1N", NULL, "Headset Mic"}, {"DMIC L1", NULL, "Int Mic"}, {"DMIC R1", NULL, "Int Mic"}, + {"IN2P", NULL, "Int Analog Mic"}, + {"IN2N", NULL, "Int Analog Mic"}, {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, {"Ext Spk", NULL, "SPOL"}, @@ -135,6 +138,9 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"Headphone", NULL, "Platform Clock"}, {"Headset Mic", NULL, "Platform Clock"}, {"Int Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "micbias1"}, + {"Int Analog Mic", NULL, "micbias2"}, {"Ext Spk", NULL, "Platform Clock"}, }; @@ -189,6 +195,7 @@ static const struct snd_kcontrol_new cht_mc_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Int Mic"), + SOC_DAPM_PIN_SWITCH("Int Analog Mic"), SOC_DAPM_PIN_SWITCH("Ext Spk"), }; -- cgit v1.2.3 From 9e327ce71f3894e7e6b57f5c15a0dfa5be79f44e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Jan 2018 14:27:59 +0100 Subject: spi: sirf: account for const type of of_device_id.data This driver creates various const structures that it stores in the data field of an of_device_id array. Adding const to the declaration of the location that receives the const value from the data field ensures that the compiler will continue to check that the value is not modified. Furthermore, the const-discarding cast on the extraction from the data field is no longer needed. Done using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Mark Brown --- drivers/spi/spi-sirf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c index bbb1a275f718..f009d76f96b1 100644 --- a/drivers/spi/spi-sirf.c +++ b/drivers/spi/spi-sirf.c @@ -1072,7 +1072,7 @@ static int spi_sirfsoc_probe(struct platform_device *pdev) struct sirfsoc_spi *sspi; struct spi_master *master; struct resource *mem_res; - struct sirf_spi_comp_data *spi_comp_data; + const struct sirf_spi_comp_data *spi_comp_data; int irq; int ret; const struct of_device_id *match; @@ -1092,7 +1092,7 @@ static int spi_sirfsoc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, master); sspi = spi_master_get_devdata(master); sspi->fifo_full_offset = ilog2(sspi->fifo_size); - spi_comp_data = (struct sirf_spi_comp_data *)match->data; + spi_comp_data = match->data; sspi->regs = spi_comp_data->regs; sspi->type = spi_comp_data->type; sspi->fifo_level_chk_mask = (sspi->fifo_size / 4) - 1; -- cgit v1.2.3 From d5cc0a1fcbb5ddbef9fdd4c4a978da3254ddbf37 Mon Sep 17 00:00:00 2001 From: Pardha Saradhi K Date: Tue, 2 Jan 2018 14:59:57 +0530 Subject: ASoC: Intel: Skylake: Disable clock gating during firmware and library download During firmware and library download, sometimes it is observed that firmware and library download is timed-out resulting into probe failure. This patch disables dynamic clock gating while firmware and library download. Signed-off-by: Pardha Saradhi K Signed-off-by: Sanyog Kale Signed-off-by: Guneshwor Singh Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-messages.c | 4 ++++ sound/soc/intel/skylake/skl-pcm.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c index 4e63213a8d55..933c1fbb222f 100644 --- a/sound/soc/intel/skylake/skl-messages.c +++ b/sound/soc/intel/skylake/skl-messages.c @@ -417,7 +417,11 @@ int skl_resume_dsp(struct skl *skl) if (skl->skl_sst->is_first_boot == true) return 0; + /* disable dynamic clock gating during fw and lib download */ + ctx->enable_miscbdcge(ctx->dev, false); + ret = skl_dsp_wake(ctx->dsp); + ctx->enable_miscbdcge(ctx->dev, true); if (ret < 0) return ret; diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c index cc6535ab84d1..b45a9cd5f058 100644 --- a/sound/soc/intel/skylake/skl-pcm.c +++ b/sound/soc/intel/skylake/skl-pcm.c @@ -1342,7 +1342,11 @@ static int skl_platform_soc_probe(struct snd_soc_platform *platform) return -EIO; } + /* disable dynamic clock gating during fw and lib download */ + skl->skl_sst->enable_miscbdcge(platform->dev, false); + ret = ops->init_fw(platform->dev, skl->skl_sst); + skl->skl_sst->enable_miscbdcge(platform->dev, true); if (ret < 0) { dev_err(platform->dev, "Failed to boot first fw: %d\n", ret); return ret; -- cgit v1.2.3 From 3a41092709a14c8efc84571deacc95a24b7fd6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 29 Dec 2017 14:48:02 +0100 Subject: spi: bcm53xx: simplify reading SPI data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit makes transfer function use spi_transfer_is_last to determine if currently processed transfer is the last one. Thanks to that we finally set hardware registers properly and it makes controller behave the way it's expected to. This allows simplifying read function which can now simply start reading from the slot 0 instead of the last saved offset. It has been successfully tested using spi_write_then_read. Moreover this change fixes handling messages with two writing transfers. It's important for SPI flash devices as their drivers commonly use one transfer for a command and another one for data. Signed-off-by: Rafał Miłecki Signed-off-by: Mark Brown --- drivers/spi/spi-bcm53xx.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-bcm53xx.c b/drivers/spi/spi-bcm53xx.c index 6e409eabe1c9..d02ceb7a29d1 100644 --- a/drivers/spi/spi-bcm53xx.c +++ b/drivers/spi/spi-bcm53xx.c @@ -27,8 +27,6 @@ struct bcm53xxspi { struct bcma_device *core; struct spi_master *master; void __iomem *mmio_base; - - size_t read_offset; bool bspi; /* Boot SPI mode with memory mapping */ }; @@ -172,8 +170,6 @@ static void bcm53xxspi_buf_write(struct bcm53xxspi *b53spi, u8 *w_buf, if (!cont) bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0); - - b53spi->read_offset = len; } static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf, @@ -182,10 +178,10 @@ static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf, u32 tmp; int i; - for (i = 0; i < b53spi->read_offset + len; i++) { + for (i = 0; i < len; i++) { tmp = B53SPI_CDRAM_CONT | B53SPI_CDRAM_PCS_DISABLE_ALL | B53SPI_CDRAM_PCS_DSCK; - if (!cont && i == b53spi->read_offset + len - 1) + if (!cont && i == len - 1) tmp &= ~B53SPI_CDRAM_CONT; tmp &= ~0x1; /* Command Register File */ @@ -194,8 +190,7 @@ static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf, /* Set queue pointers */ bcm53xxspi_write(b53spi, B53SPI_MSPI_NEWQP, 0); - bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, - b53spi->read_offset + len - 1); + bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, len - 1); if (cont) bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 1); @@ -214,13 +209,11 @@ static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf, bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0); for (i = 0; i < len; ++i) { - int offset = b53spi->read_offset + i; + u16 reg = B53SPI_MSPI_RXRAM + 4 * (1 + i * 2); /* Data stored in the transmit register file LSB */ - r_buf[i] = (u8)bcm53xxspi_read(b53spi, B53SPI_MSPI_RXRAM + 4 * (1 + offset * 2)); + r_buf[i] = (u8)bcm53xxspi_read(b53spi, reg); } - - b53spi->read_offset = 0; } static int bcm53xxspi_transfer_one(struct spi_master *master, @@ -238,7 +231,8 @@ static int bcm53xxspi_transfer_one(struct spi_master *master, left = t->len; while (left) { size_t to_write = min_t(size_t, 16, left); - bool cont = left - to_write > 0; + bool cont = !spi_transfer_is_last(master, t) || + left - to_write > 0; bcm53xxspi_buf_write(b53spi, buf, to_write, cont); left -= to_write; @@ -250,9 +244,9 @@ static int bcm53xxspi_transfer_one(struct spi_master *master, buf = (u8 *)t->rx_buf; left = t->len; while (left) { - size_t to_read = min_t(size_t, 16 - b53spi->read_offset, - left); - bool cont = left - to_read > 0; + size_t to_read = min_t(size_t, 16, left); + bool cont = !spi_transfer_is_last(master, t) || + left - to_read > 0; bcm53xxspi_buf_read(b53spi, buf, to_read, cont); left -= to_read; -- cgit v1.2.3 From fa32f7a3fedd800161ca529397da14faa6c47c67 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Jan 2018 20:38:50 +0800 Subject: regulator: sc2731: Fix defines for SC2731_WR_UNLOCK and SC2731_PWR_WR_PROT_VALUE The defines for SC2731_WR_UNLOCK and SC2731_PWR_WR_PROT_VALUE makes regmap_write() call looks strange because it takes reg parameter fist then val. Base on Erick's suggestion to define SC2731_PWR_WR_PROT and SC2731_WR_UNLOCK_VALUE instead. Signed-off-by: Axel Lin Reviewed-by: Erick Chen Signed-off-by: Mark Brown --- drivers/regulator/sc2731-regulator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/sc2731-regulator.c b/drivers/regulator/sc2731-regulator.c index 794fcd504b3d..eb2bdf060b7b 100644 --- a/drivers/regulator/sc2731-regulator.c +++ b/drivers/regulator/sc2731-regulator.c @@ -13,8 +13,8 @@ /* * SC2731 regulator lock register */ -#define SC2731_PWR_WR_PROT_VALUE 0xf0c -#define SC2731_WR_UNLOCK 0x6e7f +#define SC2731_PWR_WR_PROT 0xf0c +#define SC2731_WR_UNLOCK_VALUE 0x6e7f /* * SC2731 enable register @@ -203,8 +203,8 @@ static struct regulator_desc regulators[] = { static int sc2731_regulator_unlock(struct regmap *regmap) { - return regmap_write(regmap, SC2731_PWR_WR_PROT_VALUE, - SC2731_WR_UNLOCK); + return regmap_write(regmap, SC2731_PWR_WR_PROT, + SC2731_WR_UNLOCK_VALUE); } static int sc2731_regulator_probe(struct platform_device *pdev) -- cgit v1.2.3 From dd6bb9b16f23f9b95b77713c45bd6182336c5b2e Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 2 Jan 2018 19:47:18 +0800 Subject: ASoC: mediatek: fix error handling in mt2701_afe_pcm_dev_probe() Fix unbalanced error handling path which will get incorrect counts if probe failed. The .remove() should be adjusted accordingly. Signed-off-by: Ryder Lee Tested-by: Garlic Tseng Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-pcm.c | 31 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index 8fda182f849b..a7362d1cda1b 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -1590,12 +1590,16 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, afe); - pm_runtime_enable(&pdev->dev); - if (!pm_runtime_enabled(&pdev->dev)) - goto err_pm_disable; - pm_runtime_get_sync(&pdev->dev); - ret = snd_soc_register_platform(&pdev->dev, &mtk_afe_pcm_platform); + pm_runtime_enable(dev); + if (!pm_runtime_enabled(dev)) { + ret = mt2701_afe_runtime_resume(dev); + if (ret) + goto err_pm_disable; + } + pm_runtime_get_sync(dev); + + ret = snd_soc_register_platform(dev, &mtk_afe_pcm_platform); if (ret) { dev_warn(dev, "err_platform\n"); goto err_platform; @@ -1610,35 +1614,28 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) goto err_dai_component; } - mt2701_afe_runtime_resume(&pdev->dev); - return 0; err_dai_component: - snd_soc_unregister_component(&pdev->dev); - + snd_soc_unregister_platform(dev); err_platform: - snd_soc_unregister_platform(&pdev->dev); - + pm_runtime_put_sync(dev); err_pm_disable: - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(dev); return ret; } static int mt2701_afe_pcm_dev_remove(struct platform_device *pdev) { - struct mtk_base_afe *afe = platform_get_drvdata(pdev); - + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt2701_afe_runtime_suspend(&pdev->dev); - pm_runtime_put_sync(&pdev->dev); snd_soc_unregister_component(&pdev->dev); snd_soc_unregister_platform(&pdev->dev); - /* disable afe clock */ - mt2701_afe_disable_clock(afe); + return 0; } -- cgit v1.2.3 From ac89c400ebb146604e718b3fa168c15592e73a8c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 2 Jan 2018 10:51:34 +0530 Subject: cpu_cooling: Remove static-power related documentation commit 84fe2cab4859 ("cpu_cooling: Drop static-power related stuff") removed support for static-power in kernel, but it missed reflecting the same in documentation. Remove the static power related documentation bits as well. Reported-by: Javi Merino Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/thermal/cpu-cooling-api.txt | 82 +------------------------------ 1 file changed, 2 insertions(+), 80 deletions(-) diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt index 7a1c89db0419..7df567eaea1a 100644 --- a/Documentation/thermal/cpu-cooling-api.txt +++ b/Documentation/thermal/cpu-cooling-api.txt @@ -44,16 +44,14 @@ the user. The registration APIs returns the cooling device pointer. 2. Power models The power API registration functions provide a simple power model for -CPUs. The current power is calculated as dynamic + (optionally) -static power. This power model requires that the operating-points of +CPUs. The current power is calculated as dynamic power (static power isn't +supported currently). This power model requires that the operating-points of the CPUs are registered using the kernel's opp library and the `cpufreq_frequency_table` is assigned to the `struct device` of the cpu. If you are using CONFIG_CPUFREQ_DT then the `cpufreq_frequency_table` should already be assigned to the cpu device. -2.1 Dynamic power - The dynamic power consumption of a processor depends on many factors. For a given processor implementation the primary factors are: @@ -92,79 +90,3 @@ mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range from 100 to 500. For reference, the approximate values for the SoC in ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and 140 for the Cortex-A53 cluster. - - -2.2 Static power - -Static leakage power consumption depends on a number of factors. For a -given circuit implementation the primary factors are: - -- Time the circuit spends in each 'power state' -- Temperature -- Operating voltage -- Process grade - -The time the circuit spends in each 'power state' for a given -evaluation period at first order means OFF or ON. However, -'retention' states can also be supported that reduce power during -inactive periods without loss of context. - -Note: The visibility of state entries to the OS can vary, according to -platform specifics, and this can then impact the accuracy of a model -based on OS state information alone. It might be possible in some -cases to extract more accurate information from system resources. - -The temperature, operating voltage and process 'grade' (slow to fast) -of the circuit are all significant factors in static leakage power -consumption. All of these have complex relationships to static power. - -Circuit implementation specific factors include the chosen silicon -process as well as the type, number and size of transistors in both -the logic gates and any RAM elements included. - -The static power consumption modelling must take into account the -power managed regions that are implemented. Taking the example of an -ARM processor cluster, the modelling would take into account whether -each CPU can be powered OFF separately or if only a single power -region is implemented for the complete cluster. - -In one view, there are others, a static power consumption model can -then start from a set of reference values for each power managed -region (e.g. CPU, Cluster/L2) in each state (e.g. ON, OFF) at an -arbitrary process grade, voltage and temperature point. These values -are then scaled for all of the following: the time in each state, the -process grade, the current temperature and the operating voltage. -However, since both implementation specific and complex relationships -dominate the estimate, the appropriate interface to the model from the -cpu cooling device is to provide a function callback that calculates -the static power in this platform. When registering the cpu cooling -device pass a function pointer that follows the `get_static_t` -prototype: - - int plat_get_static(cpumask_t *cpumask, int interval, - unsigned long voltage, u32 &power); - -`cpumask` is the cpumask of the cpus involved in the calculation. -`voltage` is the voltage at which they are operating. The function -should calculate the average static power for the last `interval` -milliseconds. It returns 0 on success, -E* on error. If it -succeeds, it should store the static power in `power`. Reading the -temperature of the cpus described by `cpumask` is left for -plat_get_static() to do as the platform knows best which thermal -sensor is closest to the cpu. - -If `plat_static_func` is NULL, static power is considered to be -negligible for this platform and only dynamic power is considered. - -The platform specific callback can then use any combination of tables -and/or equations to permute the estimated value. Process grade -information is not passed to the model since access to such data, from -on-chip measurement capability or manufacture time data, is platform -specific. - -Note: the significance of static power for CPUs in comparison to -dynamic power is highly dependent on implementation. Given the -potential complexity in implementation, the importance and accuracy of -its inclusion when using cpu cooling devices should be assessed on a -case by case basis. - -- cgit v1.2.3 From 230c08b2acf65863ac5905ea1fa93106bdd20af3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Jan 2018 14:28:06 +0100 Subject: spi: spi-fsl-dspi: account for const type of of_device_id.data This driver creates a number of const structures that it stores in the data field of an of_device_id array. The data field of an of_device_id structure has type const void *, so there is no need for a const-discarding cast when putting const values into such a structure. Done using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 02d3ed7f2558..0630962ce442 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -903,10 +903,9 @@ static irqreturn_t dspi_interrupt(int irq, void *dev_id) } static const struct of_device_id fsl_dspi_dt_ids[] = { - { .compatible = "fsl,vf610-dspi", .data = (void *)&vf610_data, }, - { .compatible = "fsl,ls1021a-v1.0-dspi", - .data = (void *)&ls1021a_v1_data, }, - { .compatible = "fsl,ls2085a-dspi", .data = (void *)&ls2085a_data, }, + { .compatible = "fsl,vf610-dspi", .data = &vf610_data, }, + { .compatible = "fsl,ls1021a-v1.0-dspi", .data = &ls1021a_v1_data, }, + { .compatible = "fsl,ls2085a-dspi", .data = &ls2085a_data, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_dspi_dt_ids); -- cgit v1.2.3 From b9f902b7fd800214b5598a636ceb74bfe2db63be Mon Sep 17 00:00:00 2001 From: Banajit Goswami Date: Sun, 31 Dec 2017 20:40:14 -0800 Subject: ASoC: change mask in snd_soc_get/put_volsw_sx to unsigned int If the result of (min + max) is negative in functions snd_soc_get_volsw_sx() or snd_soc_put_volsw_sx(), there will be an overflow for the variable 'mask'. UBSAN: Undefined behaviour in sound/soc/soc-ops.c:382:6 signed integer overflow: -2147483648 - 1 cannot be represented in type 'int' Fix this by updating the variable type of 'mask' to unsigned int. Signed-off-by: Banajit Goswami Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 500f98c730b9..7144a51ddfa9 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -378,7 +378,7 @@ int snd_soc_get_volsw_sx(struct snd_kcontrol *kcontrol, unsigned int rshift = mc->rshift; int max = mc->max; int min = mc->min; - int mask = (1 << (fls(min + max) - 1)) - 1; + unsigned int mask = (1 << (fls(min + max) - 1)) - 1; unsigned int val; int ret; @@ -423,7 +423,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, unsigned int rshift = mc->rshift; int max = mc->max; int min = mc->min; - int mask = (1 << (fls(min + max) - 1)) - 1; + unsigned int mask = (1 << (fls(min + max) - 1)) - 1; int err = 0; unsigned int val, val_mask, val2 = 0; -- cgit v1.2.3 From 835bcec5fdf3f9e880111b482177e7e70e3596da Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 2 Jan 2018 17:21:09 +0000 Subject: x86/efi: Fix kernel param add_efi_memmap regression 'add_efi_memmap' is an early param, but do_add_efi_memmap() has no chance to run because the code path is before parse_early_param(). I believe it worked when the param was introduced but probably later some other changes caused the wrong order and nobody noticed it. Move efi_memblock_x86_reserve_range() after parse_early_param() to fix it. Signed-off-by: Dave Young Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Bryan O'Donoghue Cc: Ge Song Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102172110.17018-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1..145810b0edf6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p) set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } - - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); #endif x86_init.oem.arch_setup(); @@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux -- cgit v1.2.3 From f24c4d478013d82bd1b943df566fff3561d52864 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jan 2018 17:21:10 +0000 Subject: efi/capsule-loader: Reinstate virtual capsule mapping Commit: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") ... refactored the capsule loading code that maps the capsule header, to avoid having to map it several times. However, as it turns out, the vmap() call we ended up removing did not just map the header, but the entire capsule image, and dropping this virtual mapping breaks capsules that are processed by the firmware immediately (i.e., without a reboot). Unfortunately, that change was part of a larger refactor that allowed a quirk to be implemented for Quark, which has a non-standard memory layout for capsules, and we have slightly painted ourselves into a corner by allowing quirk code to mangle the capsule header and memory layout. So we need to fix this without breaking Quark. Fortunately, Quark does not appear to care about the virtual mapping, and so we can simply do a partial revert of commit: 2a457fb31df6 ("efi/capsule-loader: Use page addresses rather than struct page pointers") ... and create a vmap() mapping of the entire capsule (including header) based on the reinstated struct page array, unless running on Quark, in which case we pass the capsule header copy as before. Reported-by: Ge Song Tested-by: Bryan O'Donoghue Tested-by: Ge Song Signed-off-by: Ard Biesheuvel Cc: Cc: Dave Young Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") Link: http://lkml.kernel.org/r/20180102172110.17018-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 13 +++++++++- drivers/firmware/efi/capsule-loader.c | 45 ++++++++++++++++++++++++++++------- include/linux/efi.h | 4 +++- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 8a99a2e96537..5b513ccffde4 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, /* * Update the first page pointer to skip over the CSH header. */ - cap_info->pages[0] += csh->headersize; + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; return 1; } diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index ec8ac5c4dd84..055e2e8f985a 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -20,10 +20,6 @@ #define NO_FURTHER_WRITE_ACTION -1 -#ifndef phys_to_page -#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT) -#endif - /** * efi_free_all_buff_pages - free all previous allocated buffer pages * @cap_info: pointer to current instance of capsule_info structure @@ -35,7 +31,7 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info) { while (cap_info->index > 0) - __free_page(phys_to_page(cap_info->pages[--cap_info->index])); + __free_page(cap_info->pages[--cap_info->index]); cap_info->index = NO_FURTHER_WRITE_ACTION; } @@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) cap_info->pages = temp_page; + temp_page = krealloc(cap_info->phys, + pages_needed * sizeof(phys_addr_t *), + GFP_KERNEL | __GFP_ZERO); + if (!temp_page) + return -ENOMEM; + + cap_info->phys = temp_page; + return 0; } @@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, **/ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) { + bool do_vunmap = false; int ret; - ret = efi_capsule_update(&cap_info->header, cap_info->pages); + /* + * cap_info->capsule may have been assigned already by a quirk + * handler, so only overwrite it if it is NULL + */ + if (!cap_info->capsule) { + cap_info->capsule = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); + if (!cap_info->capsule) + return -ENOMEM; + do_vunmap = true; + } + + ret = efi_capsule_update(cap_info->capsule, cap_info->phys); + if (do_vunmap) + vunmap(cap_info->capsule); if (ret) { pr_err("capsule update failed\n"); return ret; @@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } - cap_info->pages[cap_info->index++] = page_to_phys(page); + cap_info->pages[cap_info->index] = page; + cap_info->phys[cap_info->index] = page_to_phys(page); cap_info->page_bytes_remain = PAGE_SIZE; + cap_info->index++; } else { - page = phys_to_page(cap_info->pages[cap_info->index - 1]); + page = cap_info->pages[cap_info->index - 1]; } kbuff = kmap(page); @@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file) struct capsule_info *cap_info = file->private_data; kfree(cap_info->pages); + kfree(cap_info->phys); kfree(file->private_data); file->private_data = NULL; return 0; @@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); + return -ENOMEM; + } + file->private_data = cap_info; return 0; diff --git a/include/linux/efi.h b/include/linux/efi.h index d813f7b04da7..29fdf8029cf6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -140,11 +140,13 @@ struct efi_boot_memmap { struct capsule_info { efi_capsule_header_t header; + efi_capsule_header_t *capsule; int reset_type; long index; size_t count; size_t total_size; - phys_addr_t *pages; + struct page **pages; + phys_addr_t *phys; size_t page_bytes_remain; }; -- cgit v1.2.3 From 81b60dbff04980a45b348c5b5eeca2713d4594ca Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 3 Jan 2018 09:44:17 +0000 Subject: MAINTAINERS: Remove Matt Fleming as EFI co-maintainer Instate Ard Biesheuvel as the sole EFI maintainer and leave other folks as maintainers for the EFI test driver and efivarfs file system. Also add Ard Biesheuvel as the EFI test driver and efivarfs maintainer. Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Ivan Hu Cc: Jeremy Kerr Cc: Linus Torvalds Cc: Matthew Garrett Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180103094417.6353-1-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- MAINTAINERS | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b46c9cea5ae5..95c3fa1f520f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5149,15 +5149,15 @@ F: sound/usb/misc/ua101.c EFI TEST DRIVER L: linux-efi@vger.kernel.org M: Ivan Hu -M: Matt Fleming +M: Ard Biesheuvel S: Maintained F: drivers/firmware/efi/test/ EFI VARIABLE FILESYSTEM M: Matthew Garrett M: Jeremy Kerr -M: Matt Fleming -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git +M: Ard Biesheuvel +T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git L: linux-efi@vger.kernel.org S: Maintained F: fs/efivarfs/ @@ -5318,7 +5318,6 @@ S: Supported F: security/integrity/evm/ EXTENSIBLE FIRMWARE INTERFACE (EFI) -M: Matt Fleming M: Ard Biesheuvel L: linux-efi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git -- cgit v1.2.3 From 76ad9dffd91be11e51b847eb115d623b713a3bdc Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 2 Jan 2018 18:10:38 +0000 Subject: efi/capsule-loader: Fix pr_err() string to end with newline pr_err() messages should be terminated with a newline to avoid other messages being concatenated onto the end. Signed-off-by: Arvind Yadav Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Tyler Baicar Cc: Vasyl Gomonovych Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102181042.19074-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/capsule-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 055e2e8f985a..e456f4602df1 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -45,7 +45,7 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) pages_needed = ALIGN(cap_info->total_size, PAGE_SIZE) / PAGE_SIZE; if (pages_needed == 0) { - pr_err("invalid capsule size"); + pr_err("invalid capsule size\n"); return -EINVAL; } -- cgit v1.2.3 From 1e9de1d2207d67b97bb0b62e38454b663d6542fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jan 2018 18:10:39 +0000 Subject: arm64/efi: Ignore EFI_MEMORY_XP attribute if RP and/or WP are set The UEFI memory map is a bit vague about how to interpret the EFI_MEMORY_XP attribute when it is combined with EFI_MEMORY_RP and/or EFI_MEMORY_WP, which have retroactively been redefined as cacheability attributes rather than permission attributes. So let's ignore EFI_MEMORY_XP if _RP and/or _WP are also set. In this case, it is likely that they are being used to describe the capability of the region (i.e., whether it has the controls to reconfigure it as non-executable) rather than the nature of the contents of the region (i.e., whether it contains data that we will never attempt to execute) Reported-by: Stephen Boyd Tested-by: Stephen Boyd Signed-off-by: Ard Biesheuvel Cc: Arvind Yadav Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tyler Baicar Cc: Vasyl Gomonovych Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102181042.19074-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/efi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 82cd07592519..f85ac58d08a3 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -48,7 +48,9 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) return pgprot_val(PAGE_KERNEL_ROX); /* RW- */ - if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE) + if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) == + EFI_MEMORY_XP) || + type != EFI_RUNTIME_SERVICES_CODE) return pgprot_val(PAGE_KERNEL); /* RWX */ -- cgit v1.2.3 From 50342b2e498777df237a40a23eebc02f0935e636 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Tue, 2 Jan 2018 18:10:40 +0000 Subject: efi: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: drivers/firmware/efi/efi.c:610:8-14: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Arvind Yadav Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Tyler Baicar Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102181042.19074-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 557a47829d03..8ce70c2e73d5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -608,7 +608,7 @@ static int __init efi_load_efivars(void) return 0; pdev = platform_device_register_simple("efivars", 0, NULL, 0); - return IS_ERR(pdev) ? PTR_ERR(pdev) : 0; + return PTR_ERR_OR_ZERO(pdev); } device_initcall(efi_load_efivars); #endif -- cgit v1.2.3 From c6d8c8ef1d0d94fdae9f5d72982963db89f9cdad Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Tue, 2 Jan 2018 18:10:41 +0000 Subject: efi: Move ARM CPER code to new file The ARM CPER code is currently mixed in with the other CPER code. Move it to a new file to separate it from the rest of the CPER code. Signed-off-by: Tyler Baicar Signed-off-by: Ard Biesheuvel Cc: Arvind Yadav Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Vasyl Gomonovych Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102181042.19074-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Kconfig | 5 ++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/cper-arm.c | 147 ++++++++++++++++++++++++++++++++++++++++ drivers/firmware/efi/cper.c | 122 +-------------------------------- include/linux/cper.h | 4 ++ 5 files changed, 160 insertions(+), 119 deletions(-) create mode 100644 drivers/firmware/efi/cper-arm.c diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 2b4c39fdfa91..aab108e82f78 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -166,6 +166,11 @@ endmenu config UEFI_CPER bool +config UEFI_CPER_ARM + bool + depends on UEFI_CPER && ( ARM || ARM64 ) + default y + config EFI_DEV_PATH_PARSER bool depends on ACPI diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 269501dfba53..a3e73d6e8a43 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -30,3 +30,4 @@ arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o obj-$(CONFIG_ARM) += $(arm-obj-y) obj-$(CONFIG_ARM64) += $(arm-obj-y) obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o +obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c new file mode 100644 index 000000000000..4afbfed52163 --- /dev/null +++ b/drivers/firmware/efi/cper-arm.c @@ -0,0 +1,147 @@ +/* + * UEFI Common Platform Error Record (CPER) support + * + * Copyright (C) 2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INDENT_SP " " + +static const char * const arm_reg_ctx_strs[] = { + "AArch32 general purpose registers", + "AArch32 EL1 context registers", + "AArch32 EL2 context registers", + "AArch32 secure context registers", + "AArch64 general purpose registers", + "AArch64 EL1 context registers", + "AArch64 EL2 context registers", + "AArch64 EL3 context registers", + "Misc. system register structure", +}; + +void cper_print_proc_arm(const char *pfx, + const struct cper_sec_proc_arm *proc) +{ + int i, len, max_ctx_type; + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + char newpfx[64]; + + printk("%sMIDR: 0x%016llx\n", pfx, proc->midr); + + len = proc->section_length - (sizeof(*proc) + + proc->err_info_num * (sizeof(*err_info))); + if (len < 0) { + printk("%ssection length: %d\n", pfx, proc->section_length); + printk("%ssection length is too small\n", pfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num); + return; + } + + if (proc->validation_bits & CPER_ARM_VALID_MPIDR) + printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n", + pfx, proc->mpidr); + + if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) + printk("%serror affinity level: %d\n", pfx, + proc->affinity_level); + + if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { + printk("%srunning state: 0x%x\n", pfx, proc->running_state); + printk("%sPower State Coordination Interface state: %d\n", + pfx, proc->psci_state); + } + + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); + + err_info = (struct cper_arm_err_info *)(proc + 1); + for (i = 0; i < proc->err_info_num; i++) { + printk("%sError info structure %d:\n", pfx, i); + + printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1); + + if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) { + if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST) + printk("%sfirst error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST) + printk("%slast error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED) + printk("%spropagated error captured\n", + newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW) + printk("%soverflow occurred, error info is incomplete\n", + newpfx); + } + + printk("%serror_type: %d, %s\n", newpfx, err_info->type, + err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ? + cper_proc_error_type_strs[err_info->type] : "unknown"); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) + printk("%serror_info: 0x%016llx\n", newpfx, + err_info->error_info); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR) + printk("%svirtual fault address: 0x%016llx\n", + newpfx, err_info->virt_fault_addr); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) + printk("%sphysical fault address: 0x%016llx\n", + newpfx, err_info->physical_fault_addr); + err_info += 1; + } + + ctx_info = (struct cper_arm_ctx_info *)err_info; + max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; + for (i = 0; i < proc->context_info_num; i++) { + int size = sizeof(*ctx_info) + ctx_info->size; + + printk("%sContext info structure %d:\n", pfx, i); + if (len < size) { + printk("%ssection length is too small\n", newpfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + return; + } + if (ctx_info->type > max_ctx_type) { + printk("%sInvalid context type: %d (max: %d)\n", + newpfx, ctx_info->type, max_ctx_type); + return; + } + printk("%sregister context type: %s\n", newpfx, + arm_reg_ctx_strs[ctx_info->type]); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, + (ctx_info + 1), ctx_info->size, 0); + len -= size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size); + } + + if (len > 0) { + printk("%sVendor specific error info has %u bytes:\n", pfx, + len); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info, + len, true); + } +} diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index d2fcafcea07e..c165933ebf38 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -122,7 +122,7 @@ static const char * const proc_isa_strs[] = { "ARM A64", }; -static const char * const proc_error_type_strs[] = { +const char * const cper_proc_error_type_strs[] = { "cache error", "TLB error", "bus error", @@ -157,8 +157,8 @@ static void cper_print_proc_generic(const char *pfx, if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); cper_print_bits(pfx, proc->proc_error_type, - proc_error_type_strs, - ARRAY_SIZE(proc_error_type_strs)); + cper_proc_error_type_strs, + ARRAY_SIZE(cper_proc_error_type_strs)); } if (proc->validation_bits & CPER_PROC_VALID_OPERATION) printk("%s""operation: %d, %s\n", pfx, proc->operation, @@ -188,122 +188,6 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } -#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) -static const char * const arm_reg_ctx_strs[] = { - "AArch32 general purpose registers", - "AArch32 EL1 context registers", - "AArch32 EL2 context registers", - "AArch32 secure context registers", - "AArch64 general purpose registers", - "AArch64 EL1 context registers", - "AArch64 EL2 context registers", - "AArch64 EL3 context registers", - "Misc. system register structure", -}; - -static void cper_print_proc_arm(const char *pfx, - const struct cper_sec_proc_arm *proc) -{ - int i, len, max_ctx_type; - struct cper_arm_err_info *err_info; - struct cper_arm_ctx_info *ctx_info; - char newpfx[64]; - - printk("%sMIDR: 0x%016llx\n", pfx, proc->midr); - - len = proc->section_length - (sizeof(*proc) + - proc->err_info_num * (sizeof(*err_info))); - if (len < 0) { - printk("%ssection length: %d\n", pfx, proc->section_length); - printk("%ssection length is too small\n", pfx); - printk("%sfirmware-generated error record is incorrect\n", pfx); - printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num); - return; - } - - if (proc->validation_bits & CPER_ARM_VALID_MPIDR) - printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n", - pfx, proc->mpidr); - - if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) - printk("%serror affinity level: %d\n", pfx, - proc->affinity_level); - - if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { - printk("%srunning state: 0x%x\n", pfx, proc->running_state); - printk("%sPower State Coordination Interface state: %d\n", - pfx, proc->psci_state); - } - - snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); - - err_info = (struct cper_arm_err_info *)(proc + 1); - for (i = 0; i < proc->err_info_num; i++) { - printk("%sError info structure %d:\n", pfx, i); - - printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1); - - if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) { - if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST) - printk("%sfirst error captured\n", newpfx); - if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST) - printk("%slast error captured\n", newpfx); - if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED) - printk("%spropagated error captured\n", - newpfx); - if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW) - printk("%soverflow occurred, error info is incomplete\n", - newpfx); - } - - printk("%serror_type: %d, %s\n", newpfx, err_info->type, - err_info->type < ARRAY_SIZE(proc_error_type_strs) ? - proc_error_type_strs[err_info->type] : "unknown"); - if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) - printk("%serror_info: 0x%016llx\n", newpfx, - err_info->error_info); - if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR) - printk("%svirtual fault address: 0x%016llx\n", - newpfx, err_info->virt_fault_addr); - if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) - printk("%sphysical fault address: 0x%016llx\n", - newpfx, err_info->physical_fault_addr); - err_info += 1; - } - - ctx_info = (struct cper_arm_ctx_info *)err_info; - max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; - for (i = 0; i < proc->context_info_num; i++) { - int size = sizeof(*ctx_info) + ctx_info->size; - - printk("%sContext info structure %d:\n", pfx, i); - if (len < size) { - printk("%ssection length is too small\n", newpfx); - printk("%sfirmware-generated error record is incorrect\n", pfx); - return; - } - if (ctx_info->type > max_ctx_type) { - printk("%sInvalid context type: %d (max: %d)\n", - newpfx, ctx_info->type, max_ctx_type); - return; - } - printk("%sregister context type: %s\n", newpfx, - arm_reg_ctx_strs[ctx_info->type]); - print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, - (ctx_info + 1), ctx_info->size, 0); - len -= size; - ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size); - } - - if (len > 0) { - printk("%sVendor specific error info has %u bytes:\n", pfx, - len); - print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info, - len, true); - } -} -#endif - static const char * const mem_err_type_strs[] = { "unknown", "no error", diff --git a/include/linux/cper.h b/include/linux/cper.h index 723e952fde0d..3299e43c76eb 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -494,6 +494,8 @@ struct cper_sec_pcie { /* Reset to default packing */ #pragma pack() +extern const char * const cper_proc_error_type_strs[4]; + u64 cper_next_record_id(void); const char *cper_severity_str(unsigned int); const char *cper_mem_err_type_str(unsigned int); @@ -503,5 +505,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *, struct cper_mem_err_compact *); const char *cper_mem_err_unpack(struct trace_seq *, struct cper_mem_err_compact *); +void cper_print_proc_arm(const char *pfx, + const struct cper_sec_proc_arm *proc); #endif -- cgit v1.2.3 From 301f55b1a9177132d2b9ce8a90bf0ae4b37bb850 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Tue, 2 Jan 2018 18:10:42 +0000 Subject: efi: Parse ARM error information value ARM errors just print out the error information value, then the value needs to be manually decoded as per the UEFI spec. Add decoding of the ARM error information value so that the kernel logs capture all of the valid information at first glance. ARM error information value decoding is captured in UEFI 2.7 spec tables 263-265. Signed-off-by: Tyler Baicar Signed-off-by: Ard Biesheuvel Cc: Arvind Yadav Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Vasyl Gomonovych Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180102181042.19074-6-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/cper-arm.c | 213 +++++++++++++++++++++++++++++++++++++++- include/linux/cper.h | 44 +++++++++ 2 files changed, 255 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c index 4afbfed52163..698e5c8e0c8d 100644 --- a/drivers/firmware/efi/cper-arm.c +++ b/drivers/firmware/efi/cper-arm.c @@ -44,13 +44,218 @@ static const char * const arm_reg_ctx_strs[] = { "Misc. system register structure", }; +static const char * const arm_err_trans_type_strs[] = { + "Instruction", + "Data Access", + "Generic", +}; + +static const char * const arm_bus_err_op_strs[] = { + "Generic error (type cannot be determined)", + "Generic read (type of instruction or data request cannot be determined)", + "Generic write (type of instruction of data request cannot be determined)", + "Data read", + "Data write", + "Instruction fetch", + "Prefetch", +}; + +static const char * const arm_cache_err_op_strs[] = { + "Generic error (type cannot be determined)", + "Generic read (type of instruction or data request cannot be determined)", + "Generic write (type of instruction of data request cannot be determined)", + "Data read", + "Data write", + "Instruction fetch", + "Prefetch", + "Eviction", + "Snooping (processor initiated a cache snoop that resulted in an error)", + "Snooped (processor raised a cache error caused by another processor or device snooping its cache)", + "Management", +}; + +static const char * const arm_tlb_err_op_strs[] = { + "Generic error (type cannot be determined)", + "Generic read (type of instruction or data request cannot be determined)", + "Generic write (type of instruction of data request cannot be determined)", + "Data read", + "Data write", + "Instruction fetch", + "Prefetch", + "Local management operation (processor initiated a TLB management operation that resulted in an error)", + "External management operation (processor raised a TLB error caused by another processor or device broadcasting TLB operations)", +}; + +static const char * const arm_bus_err_part_type_strs[] = { + "Local processor originated request", + "Local processor responded to request", + "Local processor observed", + "Generic", +}; + +static const char * const arm_bus_err_addr_space_strs[] = { + "External Memory Access", + "Internal Memory Access", + "Unknown", + "Device Memory Access", +}; + +static void cper_print_arm_err_info(const char *pfx, u32 type, + u64 error_info) +{ + u8 trans_type, op_type, level, participation_type, address_space; + u16 mem_attributes; + bool proc_context_corrupt, corrected, precise_pc, restartable_pc; + bool time_out, access_mode; + + /* If the type is unknown, bail. */ + if (type > CPER_ARM_MAX_TYPE) + return; + + /* + * Vendor type errors have error information values that are vendor + * specific. + */ + if (type == CPER_ARM_VENDOR_ERROR) + return; + + if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) { + trans_type = ((error_info >> CPER_ARM_ERR_TRANSACTION_SHIFT) + & CPER_ARM_ERR_TRANSACTION_MASK); + if (trans_type < ARRAY_SIZE(arm_err_trans_type_strs)) { + printk("%stransaction type: %s\n", pfx, + arm_err_trans_type_strs[trans_type]); + } + } + + if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) { + op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT) + & CPER_ARM_ERR_OPERATION_MASK); + switch (type) { + case CPER_ARM_CACHE_ERROR: + if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) { + printk("%soperation type: %s\n", pfx, + arm_cache_err_op_strs[op_type]); + } + break; + case CPER_ARM_TLB_ERROR: + if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) { + printk("%soperation type: %s\n", pfx, + arm_tlb_err_op_strs[op_type]); + } + break; + case CPER_ARM_BUS_ERROR: + if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) { + printk("%soperation type: %s\n", pfx, + arm_bus_err_op_strs[op_type]); + } + break; + } + } + + if (error_info & CPER_ARM_ERR_VALID_LEVEL) { + level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT) + & CPER_ARM_ERR_LEVEL_MASK); + switch (type) { + case CPER_ARM_CACHE_ERROR: + printk("%scache level: %d\n", pfx, level); + break; + case CPER_ARM_TLB_ERROR: + printk("%sTLB level: %d\n", pfx, level); + break; + case CPER_ARM_BUS_ERROR: + printk("%saffinity level at which the bus error occurred: %d\n", + pfx, level); + break; + } + } + + if (error_info & CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT) { + proc_context_corrupt = ((error_info >> CPER_ARM_ERR_PC_CORRUPT_SHIFT) + & CPER_ARM_ERR_PC_CORRUPT_MASK); + if (proc_context_corrupt) + printk("%sprocessor context corrupted\n", pfx); + else + printk("%sprocessor context not corrupted\n", pfx); + } + + if (error_info & CPER_ARM_ERR_VALID_CORRECTED) { + corrected = ((error_info >> CPER_ARM_ERR_CORRECTED_SHIFT) + & CPER_ARM_ERR_CORRECTED_MASK); + if (corrected) + printk("%sthe error has been corrected\n", pfx); + else + printk("%sthe error has not been corrected\n", pfx); + } + + if (error_info & CPER_ARM_ERR_VALID_PRECISE_PC) { + precise_pc = ((error_info >> CPER_ARM_ERR_PRECISE_PC_SHIFT) + & CPER_ARM_ERR_PRECISE_PC_MASK); + if (precise_pc) + printk("%sPC is precise\n", pfx); + else + printk("%sPC is imprecise\n", pfx); + } + + if (error_info & CPER_ARM_ERR_VALID_RESTARTABLE_PC) { + restartable_pc = ((error_info >> CPER_ARM_ERR_RESTARTABLE_PC_SHIFT) + & CPER_ARM_ERR_RESTARTABLE_PC_MASK); + if (restartable_pc) + printk("%sProgram execution can be restarted reliably at the PC associated with the error.\n", pfx); + } + + /* The rest of the fields are specific to bus errors */ + if (type != CPER_ARM_BUS_ERROR) + return; + + if (error_info & CPER_ARM_ERR_VALID_PARTICIPATION_TYPE) { + participation_type = ((error_info >> CPER_ARM_ERR_PARTICIPATION_TYPE_SHIFT) + & CPER_ARM_ERR_PARTICIPATION_TYPE_MASK); + if (participation_type < ARRAY_SIZE(arm_bus_err_part_type_strs)) { + printk("%sparticipation type: %s\n", pfx, + arm_bus_err_part_type_strs[participation_type]); + } + } + + if (error_info & CPER_ARM_ERR_VALID_TIME_OUT) { + time_out = ((error_info >> CPER_ARM_ERR_TIME_OUT_SHIFT) + & CPER_ARM_ERR_TIME_OUT_MASK); + if (time_out) + printk("%srequest timed out\n", pfx); + } + + if (error_info & CPER_ARM_ERR_VALID_ADDRESS_SPACE) { + address_space = ((error_info >> CPER_ARM_ERR_ADDRESS_SPACE_SHIFT) + & CPER_ARM_ERR_ADDRESS_SPACE_MASK); + if (address_space < ARRAY_SIZE(arm_bus_err_addr_space_strs)) { + printk("%saddress space: %s\n", pfx, + arm_bus_err_addr_space_strs[address_space]); + } + } + + if (error_info & CPER_ARM_ERR_VALID_MEM_ATTRIBUTES) { + mem_attributes = ((error_info >> CPER_ARM_ERR_MEM_ATTRIBUTES_SHIFT) + & CPER_ARM_ERR_MEM_ATTRIBUTES_MASK); + printk("%smemory access attributes:0x%x\n", pfx, mem_attributes); + } + + if (error_info & CPER_ARM_ERR_VALID_ACCESS_MODE) { + access_mode = ((error_info >> CPER_ARM_ERR_ACCESS_MODE_SHIFT) + & CPER_ARM_ERR_ACCESS_MODE_MASK); + if (access_mode) + printk("%saccess mode: normal\n", pfx); + else + printk("%saccess mode: secure\n", pfx); + } +} + void cper_print_proc_arm(const char *pfx, const struct cper_sec_proc_arm *proc) { int i, len, max_ctx_type; struct cper_arm_err_info *err_info; struct cper_arm_ctx_info *ctx_info; - char newpfx[64]; + char newpfx[64], infopfx[64]; printk("%sMIDR: 0x%016llx\n", pfx, proc->midr); @@ -102,9 +307,13 @@ void cper_print_proc_arm(const char *pfx, printk("%serror_type: %d, %s\n", newpfx, err_info->type, err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ? cper_proc_error_type_strs[err_info->type] : "unknown"); - if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) + if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) { printk("%serror_info: 0x%016llx\n", newpfx, err_info->error_info); + snprintf(infopfx, sizeof(infopfx), "%s%s", newpfx, INDENT_SP); + cper_print_arm_err_info(infopfx, err_info->type, + err_info->error_info); + } if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR) printk("%svirtual fault address: 0x%016llx\n", newpfx, err_info->virt_fault_addr); diff --git a/include/linux/cper.h b/include/linux/cper.h index 3299e43c76eb..d14ef4e77c8a 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -275,6 +275,50 @@ enum { #define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) #define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) +#define CPER_ARM_CACHE_ERROR 0 +#define CPER_ARM_TLB_ERROR 1 +#define CPER_ARM_BUS_ERROR 2 +#define CPER_ARM_VENDOR_ERROR 3 +#define CPER_ARM_MAX_TYPE CPER_ARM_VENDOR_ERROR + +#define CPER_ARM_ERR_VALID_TRANSACTION_TYPE BIT(0) +#define CPER_ARM_ERR_VALID_OPERATION_TYPE BIT(1) +#define CPER_ARM_ERR_VALID_LEVEL BIT(2) +#define CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT BIT(3) +#define CPER_ARM_ERR_VALID_CORRECTED BIT(4) +#define CPER_ARM_ERR_VALID_PRECISE_PC BIT(5) +#define CPER_ARM_ERR_VALID_RESTARTABLE_PC BIT(6) +#define CPER_ARM_ERR_VALID_PARTICIPATION_TYPE BIT(7) +#define CPER_ARM_ERR_VALID_TIME_OUT BIT(8) +#define CPER_ARM_ERR_VALID_ADDRESS_SPACE BIT(9) +#define CPER_ARM_ERR_VALID_MEM_ATTRIBUTES BIT(10) +#define CPER_ARM_ERR_VALID_ACCESS_MODE BIT(11) + +#define CPER_ARM_ERR_TRANSACTION_SHIFT 16 +#define CPER_ARM_ERR_TRANSACTION_MASK GENMASK(1,0) +#define CPER_ARM_ERR_OPERATION_SHIFT 18 +#define CPER_ARM_ERR_OPERATION_MASK GENMASK(3,0) +#define CPER_ARM_ERR_LEVEL_SHIFT 22 +#define CPER_ARM_ERR_LEVEL_MASK GENMASK(2,0) +#define CPER_ARM_ERR_PC_CORRUPT_SHIFT 25 +#define CPER_ARM_ERR_PC_CORRUPT_MASK GENMASK(0,0) +#define CPER_ARM_ERR_CORRECTED_SHIFT 26 +#define CPER_ARM_ERR_CORRECTED_MASK GENMASK(0,0) +#define CPER_ARM_ERR_PRECISE_PC_SHIFT 27 +#define CPER_ARM_ERR_PRECISE_PC_MASK GENMASK(0,0) +#define CPER_ARM_ERR_RESTARTABLE_PC_SHIFT 28 +#define CPER_ARM_ERR_RESTARTABLE_PC_MASK GENMASK(0,0) +#define CPER_ARM_ERR_PARTICIPATION_TYPE_SHIFT 29 +#define CPER_ARM_ERR_PARTICIPATION_TYPE_MASK GENMASK(1,0) +#define CPER_ARM_ERR_TIME_OUT_SHIFT 31 +#define CPER_ARM_ERR_TIME_OUT_MASK GENMASK(0,0) +#define CPER_ARM_ERR_ADDRESS_SPACE_SHIFT 32 +#define CPER_ARM_ERR_ADDRESS_SPACE_MASK GENMASK(1,0) +#define CPER_ARM_ERR_MEM_ATTRIBUTES_SHIFT 34 +#define CPER_ARM_ERR_MEM_ATTRIBUTES_MASK GENMASK(8,0) +#define CPER_ARM_ERR_ACCESS_MODE_SHIFT 43 +#define CPER_ARM_ERR_ACCESS_MODE_MASK GENMASK(0,0) + /* * All tables and structs must be byte-packed to match CPER * specification, since the tables are provided by the system BIOS -- cgit v1.2.3 From 87faa0d9b43b4755ff6963a22d1fd1bee1aa3b39 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Jan 2018 15:18:44 +0100 Subject: x86/pti: Enable PTI by default This really want's to be enabled by default. Users who know what they are doing can disable it either in the config or on the kernel command line. Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index a623d13bf288..3d4debd0257e 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -56,6 +56,7 @@ config SECURITY_NETWORK config PAGE_TABLE_ISOLATION bool "Remove the kernel mapping in user mode" + default y depends on X86_64 && !UML help This feature reduces the number of hardware side channels by -- cgit v1.2.3 From 694d99d40972f12e59a3696effee8a376b79d7c8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 26 Dec 2017 23:43:54 -0600 Subject: x86/cpu, x86/pti: Do not enable PTI on AMD processors AMD processors are not subject to the types of attacks that the kernel page table isolation feature protects against. The AMD microarchitecture does not allow memory references, including speculative references, that access higher privileged data when running in a lesser privileged mode when that access would result in a page fault. Disable page table isolation by default on AMD processors by not setting the X86_BUG_CPU_INSECURE feature, which controls whether X86_FEATURE_PTI is set. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171227054354.20369.94587.stgit@tlendack-t1.amdoffice.net --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f2a94dfb434e..b1be494ab4e8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -899,8 +899,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - /* Assume for now that ALL x86 CPUs are insecure */ - setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_INSECURE); fpu__init_system(c); -- cgit v1.2.3 From 52994c256df36fda9a715697431cba9daecb6b11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Jan 2018 15:57:59 +0100 Subject: x86/pti: Make sure the user/kernel PTEs match Meelis reported that his K8 Athlon64 emits MCE warnings when PTI is enabled: [Hardware Error]: Error Addr: 0x0000ffff81e000e0 [Hardware Error]: MC1 Error: L1 TLB multimatch. [Hardware Error]: cache level: L1, tx: INSN The address is in the entry area, which is mapped into kernel _AND_ user space. That's special because we switch CR3 while we are executing there. User mapping: 0xffffffff81e00000-0xffffffff82000000 2M ro PSE GLB x pmd Kernel mapping: 0xffffffff81000000-0xffffffff82000000 16M ro PSE x pmd So the K8 is complaining that the TLB entries differ. They differ in the GLB bit. Drop the GLB bit when installing the user shared mapping. Fixes: 6dc72c3cbca0 ("x86/mm/pti: Share entry text PMD") Reported-by: Meelis Roos Signed-off-by: Thomas Gleixner Tested-by: Meelis Roos Cc: Borislav Petkov Cc: Tom Lendacky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031407180.1957@nanos --- arch/x86/mm/pti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index bce8aea65606..2da28ba97508 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -367,7 +367,8 @@ static void __init pti_setup_espfix64(void) static void __init pti_clone_entry_text(void) { pti_clone_pmds((unsigned long) __entry_text_start, - (unsigned long) __irqentry_text_end, _PAGE_RW); + (unsigned long) __irqentry_text_end, + _PAGE_RW | _PAGE_GLOBAL); } /* -- cgit v1.2.3 From a9cdbe72c4e8bf3b38781c317a79326e2e1a230d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 31 Dec 2017 10:18:06 -0600 Subject: x86/dumpstack: Fix partial register dumps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The show_regs_safe() logic is wrong. When there's an iret stack frame, it prints the entire pt_regs -- most of which is random stack data -- instead of just the five registers at the end. show_regs_safe() is also poorly named: the on_stack() checks aren't for safety. Rename the function to show_regs_if_on_stack() and add a comment to explain why the checks are needed. These issues were introduced with the "partial register dump" feature of the following commit: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully") That patch had gone through a few iterations of development, and the above issues were artifacts from a previous iteration of the patch where 'regs' pointed directly to the iret frame rather than to the (partially empty) pt_regs. Tested-by: Alexander Tsoy Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toralf Förster Cc: stable@vger.kernel.org Fixes: b02fcf9ba121 ("x86/unwinder: Handle stack overflows more gracefully") Link: http://lkml.kernel.org/r/5b05b8b344f59db2d3d50dbdeba92d60f2304c54.1514736742.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/unwind.h | 17 +++++++++++++---- arch/x86/kernel/dumpstack.c | 28 ++++++++++++++++++++-------- arch/x86/kernel/stacktrace.c | 2 +- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index c1688c2d0a12..1f86e1b0a5cd 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* - * WARNING: The entire pt_regs may not be safe to dereference. In some cases, - * only the iret frame registers are accessible. Use with caution! + * If 'partial' returns true, only the iret frame registers are valid. */ -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { if (unwind_done(state)) return NULL; + if (partial) { +#ifdef CONFIG_UNWINDER_ORC + *partial = !state->full_regs; +#else + *partial = false; +#endif + } + return state->regs; } #else -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { return NULL; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 5fa110699ed2..d0bb176a7261 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs) regs->sp, regs->flags); } -static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) +static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, + bool partial) { - if (on_stack(info, regs, sizeof(*regs))) + /* + * These on_stack() checks aren't strictly necessary: the unwind code + * has already validated the 'regs' pointer. The checks are done for + * ordering reasons: if the registers are on the next stack, we don't + * want to print them out yet. Otherwise they'll be shown as part of + * the wrong stack. Later, when show_trace_log_lvl() switches to the + * next stack, this function will be called again with the same regs so + * they can be printed in the right context. + */ + if (!partial && on_stack(info, regs, sizeof(*regs))) { __show_regs(regs, 0); - else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, - IRET_FRAME_SIZE)) { + + } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { /* * When an interrupt or exception occurs in entry code, the * full pt_regs might not have been saved yet. In that case @@ -98,6 +109,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; + bool partial; printk("%sCall Trace:\n", log_lvl); @@ -140,7 +152,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, printk("%s <%s>\n", log_lvl, stack_name); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); /* * Scan the stack, printing any text addresses we find. At the @@ -164,7 +176,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by show_regs_safe() below. + * by show_regs_if_on_stack(). */ if (regs && stack == ®s->ip) goto next; @@ -199,9 +211,9 @@ next: unwind_next_frame(&state); /* if the frame has entry regs, print them */ - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, &partial); if (regs) - show_regs_safe(&stack_info, regs); + show_regs_if_on_stack(&stack_info, regs, partial); } if (stack_name) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 8dabd7bf1673..60244bfaf88f 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -98,7 +98,7 @@ static int __save_stack_trace_reliable(struct stack_trace *trace, for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, NULL); if (regs) { /* * Kernel mode registers on the stack indicate an -- cgit v1.2.3 From 3ffdeb1a02be3086f1411a15c5b9c481fa28e21f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sun, 31 Dec 2017 10:18:07 -0600 Subject: x86/dumpstack: Print registers for first stack frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the stack dump code, if the frame after the starting pt_regs is also a regs frame, the registers don't get printed. Fix that. Reported-by: Andy Lutomirski Tested-by: Alexander Tsoy Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toralf Förster Cc: stable@vger.kernel.org Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") Link: http://lkml.kernel.org/r/396f84491d2f0ef64eda4217a2165f5712f6a115.1514736742.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d0bb176a7261..afbecff161d1 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -115,6 +115,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unwind_start(&state, task, regs, stack); stack = stack ? : get_stack_pointer(task, regs); + regs = unwind_get_entry_regs(&state, &partial); /* * Iterate through the stacks, starting with the current stack pointer. @@ -132,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - hardirq stack * - entry stack */ - for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; if (get_stack_info(stack, task, &stack_info, &visit_mask)) { -- cgit v1.2.3 From d8d99d8ed658a705909b07ba21b643c53851d70c Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 2 Jan 2018 19:47:19 +0800 Subject: ASoC: mediatek: rework clock functions for MT2701 Reworks clock part to make it more reasonable. The current changes are: - Replace regmap operations by CCF APIs. Doing so, we just need to handle the element clocks and can also get accurate information via CCF. - Rename clocks to make them more generic so that the future revisions of the IP can adapt gracefully. - Regroup 'aud_clks[]' by usage - the basic needs and I2S parts: The new code just keep the common clocks in array and let SoC self decide I2S numbers - If future chips have different sets of channels we will add a little more abstract here. Moreover, this patch moves I2S clocks to the struct mt2701_i2s_data so that we can easily manage them when calls .prepare() and .shutdown(). Signed-off-by: Ryder Lee Tested-by: Garlic Tseng Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c | 518 +++++++--------------- sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h | 15 +- sound/soc/mediatek/mt2701/mt2701-afe-common.h | 64 +-- sound/soc/mediatek/mt2701/mt2701-afe-pcm.c | 45 +- 4 files changed, 200 insertions(+), 442 deletions(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c index affa7fb25dd9..75ccdca5811d 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c @@ -21,442 +21,256 @@ #include "mt2701-afe-common.h" #include "mt2701-afe-clock-ctrl.h" -static const char *aud_clks[MT2701_CLOCK_NUM] = { - [MT2701_AUD_INFRA_SYS_AUDIO] = "infra_sys_audio_clk", - [MT2701_AUD_AUD_MUX1_SEL] = "top_audio_mux1_sel", - [MT2701_AUD_AUD_MUX2_SEL] = "top_audio_mux2_sel", - [MT2701_AUD_AUD_MUX1_DIV] = "top_audio_mux1_div", - [MT2701_AUD_AUD_MUX2_DIV] = "top_audio_mux2_div", - [MT2701_AUD_AUD_48K_TIMING] = "top_audio_48k_timing", - [MT2701_AUD_AUD_44K_TIMING] = "top_audio_44k_timing", - [MT2701_AUD_AUDPLL_MUX_SEL] = "top_audpll_mux_sel", - [MT2701_AUD_APLL_SEL] = "top_apll_sel", - [MT2701_AUD_AUD1PLL_98M] = "top_aud1_pll_98M", - [MT2701_AUD_AUD2PLL_90M] = "top_aud2_pll_90M", - [MT2701_AUD_HADDS2PLL_98M] = "top_hadds2_pll_98M", - [MT2701_AUD_HADDS2PLL_294M] = "top_hadds2_pll_294M", - [MT2701_AUD_AUDPLL] = "top_audpll", - [MT2701_AUD_AUDPLL_D4] = "top_audpll_d4", - [MT2701_AUD_AUDPLL_D8] = "top_audpll_d8", - [MT2701_AUD_AUDPLL_D16] = "top_audpll_d16", - [MT2701_AUD_AUDPLL_D24] = "top_audpll_d24", - [MT2701_AUD_AUDINTBUS] = "top_audintbus_sel", - [MT2701_AUD_CLK_26M] = "clk_26m", - [MT2701_AUD_SYSPLL1_D4] = "top_syspll1_d4", - [MT2701_AUD_AUD_K1_SRC_SEL] = "top_aud_k1_src_sel", - [MT2701_AUD_AUD_K2_SRC_SEL] = "top_aud_k2_src_sel", - [MT2701_AUD_AUD_K3_SRC_SEL] = "top_aud_k3_src_sel", - [MT2701_AUD_AUD_K4_SRC_SEL] = "top_aud_k4_src_sel", - [MT2701_AUD_AUD_K5_SRC_SEL] = "top_aud_k5_src_sel", - [MT2701_AUD_AUD_K6_SRC_SEL] = "top_aud_k6_src_sel", - [MT2701_AUD_AUD_K1_SRC_DIV] = "top_aud_k1_src_div", - [MT2701_AUD_AUD_K2_SRC_DIV] = "top_aud_k2_src_div", - [MT2701_AUD_AUD_K3_SRC_DIV] = "top_aud_k3_src_div", - [MT2701_AUD_AUD_K4_SRC_DIV] = "top_aud_k4_src_div", - [MT2701_AUD_AUD_K5_SRC_DIV] = "top_aud_k5_src_div", - [MT2701_AUD_AUD_K6_SRC_DIV] = "top_aud_k6_src_div", - [MT2701_AUD_AUD_I2S1_MCLK] = "top_aud_i2s1_mclk", - [MT2701_AUD_AUD_I2S2_MCLK] = "top_aud_i2s2_mclk", - [MT2701_AUD_AUD_I2S3_MCLK] = "top_aud_i2s3_mclk", - [MT2701_AUD_AUD_I2S4_MCLK] = "top_aud_i2s4_mclk", - [MT2701_AUD_AUD_I2S5_MCLK] = "top_aud_i2s5_mclk", - [MT2701_AUD_AUD_I2S6_MCLK] = "top_aud_i2s6_mclk", - [MT2701_AUD_ASM_M_SEL] = "top_asm_m_sel", - [MT2701_AUD_ASM_H_SEL] = "top_asm_h_sel", - [MT2701_AUD_UNIVPLL2_D4] = "top_univpll2_d4", - [MT2701_AUD_UNIVPLL2_D2] = "top_univpll2_d2", - [MT2701_AUD_SYSPLL_D5] = "top_syspll_d5", +static const char *const base_clks[] = { + [MT2701_TOP_AUD_MCLK_SRC0] = "top_audio_mux1_sel", + [MT2701_TOP_AUD_MCLK_SRC1] = "top_audio_mux2_sel", + [MT2701_AUDSYS_AFE] = "audio_afe_pd", + [MT2701_AUDSYS_AFE_CONN] = "audio_afe_conn_pd", + [MT2701_AUDSYS_A1SYS] = "audio_a1sys_pd", + [MT2701_AUDSYS_A2SYS] = "audio_a2sys_pd", }; int mt2701_init_clock(struct mtk_base_afe *afe) { struct mt2701_afe_private *afe_priv = afe->platform_priv; - int i = 0; - - for (i = 0; i < MT2701_CLOCK_NUM; i++) { - afe_priv->clocks[i] = devm_clk_get(afe->dev, aud_clks[i]); - if (IS_ERR(afe_priv->clocks[i])) { - dev_warn(afe->dev, "%s devm_clk_get %s fail\n", - __func__, aud_clks[i]); - return PTR_ERR(aud_clks[i]); + int i; + + for (i = 0; i < MT2701_BASE_CLK_NUM; i++) { + afe_priv->base_ck[i] = devm_clk_get(afe->dev, base_clks[i]); + if (IS_ERR(afe_priv->base_ck[i])) { + dev_err(afe->dev, "failed to get %s\n", base_clks[i]); + return PTR_ERR(afe_priv->base_ck[i]); } } - return 0; -} + /* Get I2S related clocks */ + for (i = 0; i < MT2701_I2S_NUM; i++) { + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i]; + char name[13]; -int mt2701_afe_enable_clock(struct mtk_base_afe *afe) -{ - int ret = 0; + snprintf(name, sizeof(name), "i2s%d_src_sel", i); + i2s_path->sel_ck = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->sel_ck)) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->sel_ck); + } - ret = mt2701_turn_on_a1sys_clock(afe); - if (ret) { - dev_err(afe->dev, "%s turn_on_a1sys_clock fail %d\n", - __func__, ret); - return ret; - } + snprintf(name, sizeof(name), "i2s%d_src_div", i); + i2s_path->div_ck = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->div_ck)) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->div_ck); + } - ret = mt2701_turn_on_a2sys_clock(afe); - if (ret) { - dev_err(afe->dev, "%s turn_on_a2sys_clock fail %d\n", - __func__, ret); - mt2701_turn_off_a1sys_clock(afe); - return ret; - } + snprintf(name, sizeof(name), "i2s%d_mclk_en", i); + i2s_path->mclk_ck = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->mclk_ck)) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->mclk_ck); + } - ret = mt2701_turn_on_afe_clock(afe); - if (ret) { - dev_err(afe->dev, "%s turn_on_afe_clock fail %d\n", - __func__, ret); - mt2701_turn_off_a1sys_clock(afe); - mt2701_turn_off_a2sys_clock(afe); - return ret; + snprintf(name, sizeof(name), "i2so%d_hop_ck", i); + i2s_path->hop_ck[I2S_OUT] = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->hop_ck[I2S_OUT])) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->hop_ck[I2S_OUT]); + } + + snprintf(name, sizeof(name), "i2si%d_hop_ck", i); + i2s_path->hop_ck[I2S_IN] = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->hop_ck[I2S_IN])) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->hop_ck[I2S_IN]); + } + + snprintf(name, sizeof(name), "asrc%d_out_ck", i); + i2s_path->asrco_ck = devm_clk_get(afe->dev, name); + if (IS_ERR(i2s_path->asrco_ck)) { + dev_err(afe->dev, "failed to get %s\n", name); + return PTR_ERR(i2s_path->asrco_ck); + } } - regmap_update_bits(afe->regmap, ASYS_TOP_CON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON); - regmap_update_bits(afe->regmap, AFE_DAC_CON0, - AFE_DAC_CON0_AFE_ON, - AFE_DAC_CON0_AFE_ON); - regmap_write(afe->regmap, PWR2_TOP_CON, - PWR2_TOP_CON_INIT_VAL); - regmap_write(afe->regmap, PWR1_ASM_CON1, - PWR1_ASM_CON1_INIT_VAL); - regmap_write(afe->regmap, PWR2_ASM_CON1, - PWR2_ASM_CON1_INIT_VAL); + /* Some platforms may support BT path */ + afe_priv->mrgif_ck = devm_clk_get(afe->dev, "audio_mrgif_pd"); + if (IS_ERR(afe_priv->mrgif_ck)) { + if (PTR_ERR(afe_priv->mrgif_ck) == -EPROBE_DEFER) + return -EPROBE_DEFER; - return 0; -} + afe_priv->mrgif_ck = NULL; + } -void mt2701_afe_disable_clock(struct mtk_base_afe *afe) -{ - mt2701_turn_off_afe_clock(afe); - mt2701_turn_off_a1sys_clock(afe); - mt2701_turn_off_a2sys_clock(afe); - regmap_update_bits(afe->regmap, ASYS_TOP_CON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON, 0); - regmap_update_bits(afe->regmap, AFE_DAC_CON0, - AFE_DAC_CON0_AFE_ON, 0); + return 0; } -int mt2701_turn_on_a1sys_clock(struct mtk_base_afe *afe) +int mt2701_afe_enable_i2s(struct mtk_base_afe *afe, int id, int dir) { struct mt2701_afe_private *afe_priv = afe->platform_priv; - int ret = 0; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id]; + int ret; - /* Set Mux */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_MUX1_SEL]); + ret = clk_prepare_enable(i2s_path->asrco_ck); if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUD_MUX1_SEL], ret); - goto A1SYS_CLK_AUD_MUX1_SEL_ERR; + dev_err(afe->dev, "failed to enable ASRC clock %d\n", ret); + return ret; } - ret = clk_set_parent(afe_priv->clocks[MT2701_AUD_AUD_MUX1_SEL], - afe_priv->clocks[MT2701_AUD_AUD1PLL_98M]); + ret = clk_prepare_enable(i2s_path->hop_ck[dir]); if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", __func__, - aud_clks[MT2701_AUD_AUD_MUX1_SEL], - aud_clks[MT2701_AUD_AUD1PLL_98M], ret); - goto A1SYS_CLK_AUD_MUX1_SEL_ERR; + dev_err(afe->dev, "failed to enable I2S clock %d\n", ret); + goto err_hop_ck; } - /* Set Divider */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_MUX1_DIV]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, - aud_clks[MT2701_AUD_AUD_MUX1_DIV], - ret); - goto A1SYS_CLK_AUD_MUX1_DIV_ERR; - } + return 0; - ret = clk_set_rate(afe_priv->clocks[MT2701_AUD_AUD_MUX1_DIV], - MT2701_AUD_AUD_MUX1_DIV_RATE); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%d fail %d\n", __func__, - aud_clks[MT2701_AUD_AUD_MUX1_DIV], - MT2701_AUD_AUD_MUX1_DIV_RATE, ret); - goto A1SYS_CLK_AUD_MUX1_DIV_ERR; - } +err_hop_ck: + clk_disable_unprepare(i2s_path->asrco_ck); - /* Enable clock gate */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_48K_TIMING]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUD_48K_TIMING], ret); - goto A1SYS_CLK_AUD_48K_ERR; - } + return ret; +} - /* Enable infra audio */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_INFRA_SYS_AUDIO], ret); - goto A1SYS_CLK_INFRA_ERR; - } +void mt2701_afe_disable_i2s(struct mtk_base_afe *afe, int id, int dir) +{ + struct mt2701_afe_private *afe_priv = afe->platform_priv; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id]; - return 0; + clk_disable_unprepare(i2s_path->hop_ck[dir]); + clk_disable_unprepare(i2s_path->asrco_ck); +} -A1SYS_CLK_INFRA_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); -A1SYS_CLK_AUD_48K_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_48K_TIMING]); -A1SYS_CLK_AUD_MUX1_DIV_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX1_DIV]); -A1SYS_CLK_AUD_MUX1_SEL_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX1_SEL]); +int mt2701_afe_enable_mclk(struct mtk_base_afe *afe, int id) +{ + struct mt2701_afe_private *afe_priv = afe->platform_priv; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id]; - return ret; + return clk_prepare_enable(i2s_path->mclk_ck); } -void mt2701_turn_off_a1sys_clock(struct mtk_base_afe *afe) +void mt2701_afe_disable_mclk(struct mtk_base_afe *afe, int id) { struct mt2701_afe_private *afe_priv = afe->platform_priv; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id]; - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_48K_TIMING]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX1_DIV]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX1_SEL]); + clk_disable_unprepare(i2s_path->mclk_ck); } -int mt2701_turn_on_a2sys_clock(struct mtk_base_afe *afe) +int mt2701_enable_btmrg_clk(struct mtk_base_afe *afe) { struct mt2701_afe_private *afe_priv = afe->platform_priv; - int ret = 0; - /* Set Mux */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_MUX2_SEL]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUD_MUX2_SEL], ret); - goto A2SYS_CLK_AUD_MUX2_SEL_ERR; - } + return clk_prepare_enable(afe_priv->mrgif_ck); +} - ret = clk_set_parent(afe_priv->clocks[MT2701_AUD_AUD_MUX2_SEL], - afe_priv->clocks[MT2701_AUD_AUD2PLL_90M]); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", __func__, - aud_clks[MT2701_AUD_AUD_MUX2_SEL], - aud_clks[MT2701_AUD_AUD2PLL_90M], ret); - goto A2SYS_CLK_AUD_MUX2_SEL_ERR; - } +void mt2701_disable_btmrg_clk(struct mtk_base_afe *afe) +{ + struct mt2701_afe_private *afe_priv = afe->platform_priv; - /* Set Divider */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_MUX2_DIV]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUD_MUX2_DIV], ret); - goto A2SYS_CLK_AUD_MUX2_DIV_ERR; - } + clk_disable_unprepare(afe_priv->mrgif_ck); +} - ret = clk_set_rate(afe_priv->clocks[MT2701_AUD_AUD_MUX2_DIV], - MT2701_AUD_AUD_MUX2_DIV_RATE); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%d fail %d\n", __func__, - aud_clks[MT2701_AUD_AUD_MUX2_DIV], - MT2701_AUD_AUD_MUX2_DIV_RATE, ret); - goto A2SYS_CLK_AUD_MUX2_DIV_ERR; - } +static int mt2701_afe_enable_audsys(struct mtk_base_afe *afe) +{ + struct mt2701_afe_private *afe_priv = afe->platform_priv; + int ret; - /* Enable clock gate */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUD_44K_TIMING]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUD_44K_TIMING], ret); - goto A2SYS_CLK_AUD_44K_ERR; - } + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_AFE]); + if (ret) + return ret; - /* Enable infra audio */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_INFRA_SYS_AUDIO], ret); - goto A2SYS_CLK_INFRA_ERR; - } + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); + if (ret) + goto err_audio_a1sys; + + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_A2SYS]); + if (ret) + goto err_audio_a2sys; + + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_AFE_CONN]); + if (ret) + goto err_afe_conn; return 0; -A2SYS_CLK_INFRA_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); -A2SYS_CLK_AUD_44K_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_44K_TIMING]); -A2SYS_CLK_AUD_MUX2_DIV_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX2_DIV]); -A2SYS_CLK_AUD_MUX2_SEL_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX2_SEL]); +err_afe_conn: + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A2SYS]); +err_audio_a2sys: + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); +err_audio_a1sys: + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_AFE]); return ret; } -void mt2701_turn_off_a2sys_clock(struct mtk_base_afe *afe) +static void mt2701_afe_disable_audsys(struct mtk_base_afe *afe) { struct mt2701_afe_private *afe_priv = afe->platform_priv; - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_44K_TIMING]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX2_DIV]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUD_MUX2_SEL]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_AFE_CONN]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A2SYS]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_AFE]); } -int mt2701_turn_on_afe_clock(struct mtk_base_afe *afe) +int mt2701_afe_enable_clock(struct mtk_base_afe *afe) { - struct mt2701_afe_private *afe_priv = afe->platform_priv; int ret; - /* enable INFRA_SYS */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_INFRA_SYS_AUDIO], ret); - goto AFE_AUD_INFRA_ERR; - } - - /* Set MT2701_AUD_AUDINTBUS to MT2701_AUD_SYSPLL1_D4 */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_AUDINTBUS]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_AUDINTBUS], ret); - goto AFE_AUD_AUDINTBUS_ERR; - } - - ret = clk_set_parent(afe_priv->clocks[MT2701_AUD_AUDINTBUS], - afe_priv->clocks[MT2701_AUD_SYSPLL1_D4]); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", __func__, - aud_clks[MT2701_AUD_AUDINTBUS], - aud_clks[MT2701_AUD_SYSPLL1_D4], ret); - goto AFE_AUD_AUDINTBUS_ERR; - } - - /* Set MT2701_AUD_ASM_H_SEL to MT2701_AUD_UNIVPLL2_D2 */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_ASM_H_SEL]); - if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_ASM_H_SEL], ret); - goto AFE_AUD_ASM_H_ERR; - } - - ret = clk_set_parent(afe_priv->clocks[MT2701_AUD_ASM_H_SEL], - afe_priv->clocks[MT2701_AUD_UNIVPLL2_D2]); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", __func__, - aud_clks[MT2701_AUD_ASM_H_SEL], - aud_clks[MT2701_AUD_UNIVPLL2_D2], ret); - goto AFE_AUD_ASM_H_ERR; - } - - /* Set MT2701_AUD_ASM_M_SEL to MT2701_AUD_UNIVPLL2_D4 */ - ret = clk_prepare_enable(afe_priv->clocks[MT2701_AUD_ASM_M_SEL]); + /* Enable audio system */ + ret = mt2701_afe_enable_audsys(afe); if (ret) { - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[MT2701_AUD_ASM_M_SEL], ret); - goto AFE_AUD_ASM_M_ERR; + dev_err(afe->dev, "failed to enable audio system %d\n", ret); + return ret; } - ret = clk_set_parent(afe_priv->clocks[MT2701_AUD_ASM_M_SEL], - afe_priv->clocks[MT2701_AUD_UNIVPLL2_D4]); - if (ret) { - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", __func__, - aud_clks[MT2701_AUD_ASM_M_SEL], - aud_clks[MT2701_AUD_UNIVPLL2_D4], ret); - goto AFE_AUD_ASM_M_ERR; - } + regmap_update_bits(afe->regmap, ASYS_TOP_CON, + AUDIO_TOP_CON0_A1SYS_A2SYS_ON, + AUDIO_TOP_CON0_A1SYS_A2SYS_ON); + regmap_update_bits(afe->regmap, AFE_DAC_CON0, + AFE_DAC_CON0_AFE_ON, + AFE_DAC_CON0_AFE_ON); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON0, - AUDIO_TOP_CON0_PDN_AFE, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON0, - AUDIO_TOP_CON0_PDN_APLL_CK, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_A1SYS, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_A2SYS, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_AFE_CONN, 0); + /* Configure ASRC */ + regmap_write(afe->regmap, PWR1_ASM_CON1, PWR1_ASM_CON1_INIT_VAL); + regmap_write(afe->regmap, PWR2_ASM_CON1, PWR2_ASM_CON1_INIT_VAL); return 0; - -AFE_AUD_ASM_M_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_ASM_M_SEL]); -AFE_AUD_ASM_H_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_ASM_H_SEL]); -AFE_AUD_AUDINTBUS_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUDINTBUS]); -AFE_AUD_INFRA_ERR: - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - - return ret; } -void mt2701_turn_off_afe_clock(struct mtk_base_afe *afe) +int mt2701_afe_disable_clock(struct mtk_base_afe *afe) { - struct mt2701_afe_private *afe_priv = afe->platform_priv; + regmap_update_bits(afe->regmap, ASYS_TOP_CON, + AUDIO_TOP_CON0_A1SYS_A2SYS_ON, 0); + regmap_update_bits(afe->regmap, AFE_DAC_CON0, + AFE_DAC_CON0_AFE_ON, 0); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_INFRA_SYS_AUDIO]); - - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_AUDINTBUS]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_ASM_H_SEL]); - clk_disable_unprepare(afe_priv->clocks[MT2701_AUD_ASM_M_SEL]); - - regmap_update_bits(afe->regmap, AUDIO_TOP_CON0, - AUDIO_TOP_CON0_PDN_AFE, AUDIO_TOP_CON0_PDN_AFE); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON0, - AUDIO_TOP_CON0_PDN_APLL_CK, - AUDIO_TOP_CON0_PDN_APLL_CK); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_A1SYS, - AUDIO_TOP_CON4_PDN_A1SYS); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_A2SYS, - AUDIO_TOP_CON4_PDN_A2SYS); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_AFE_CONN, - AUDIO_TOP_CON4_PDN_AFE_CONN); + mt2701_afe_disable_audsys(afe); + + return 0; } void mt2701_mclk_configuration(struct mtk_base_afe *afe, int id, int domain, int mclk) { - struct mt2701_afe_private *afe_priv = afe->platform_priv; + struct mt2701_afe_private *priv = afe->platform_priv; + struct mt2701_i2s_path *i2s_path = &priv->i2s_path[id]; int ret; - int aud_src_div_id = MT2701_AUD_AUD_K1_SRC_DIV + id; - int aud_src_clk_id = MT2701_AUD_AUD_K1_SRC_SEL + id; - /* Set MCLK Kx_SRC_SEL(domain) */ - ret = clk_prepare_enable(afe_priv->clocks[aud_src_clk_id]); - if (ret) - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[aud_src_clk_id], ret); - - if (domain == 0) { - ret = clk_set_parent(afe_priv->clocks[aud_src_clk_id], - afe_priv->clocks[MT2701_AUD_AUD_MUX1_SEL]); - if (ret) - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", - __func__, aud_clks[aud_src_clk_id], - aud_clks[MT2701_AUD_AUD_MUX1_SEL], ret); - } else { - ret = clk_set_parent(afe_priv->clocks[aud_src_clk_id], - afe_priv->clocks[MT2701_AUD_AUD_MUX2_SEL]); - if (ret) - dev_err(afe->dev, "%s clk_set_parent %s-%s fail %d\n", - __func__, aud_clks[aud_src_clk_id], - aud_clks[MT2701_AUD_AUD_MUX2_SEL], ret); - } - clk_disable_unprepare(afe_priv->clocks[aud_src_clk_id]); + /* Set mclk source */ + if (domain == 0) + ret = clk_set_parent(i2s_path->sel_ck, + priv->base_ck[MT2701_TOP_AUD_MCLK_SRC0]); + else + ret = clk_set_parent(i2s_path->sel_ck, + priv->base_ck[MT2701_TOP_AUD_MCLK_SRC1]); - /* Set MCLK Kx_SRC_DIV(divider) */ - ret = clk_prepare_enable(afe_priv->clocks[aud_src_div_id]); if (ret) - dev_err(afe->dev, "%s clk_prepare_enable %s fail %d\n", - __func__, aud_clks[aud_src_div_id], ret); + dev_err(afe->dev, "failed to set domain%d mclk source %d\n", + domain, ret); - ret = clk_set_rate(afe_priv->clocks[aud_src_div_id], mclk); + /* Set mclk divider */ + ret = clk_set_rate(i2s_path->div_ck, mclk); if (ret) - dev_err(afe->dev, "%s clk_set_rate %s-%d fail %d\n", __func__, - aud_clks[aud_src_div_id], mclk, ret); - clk_disable_unprepare(afe_priv->clocks[aud_src_div_id]); + dev_err(afe->dev, "failed to set mclk divider %d\n", ret); } MODULE_DESCRIPTION("MT2701 afe clock control"); diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h index 6497d570cf09..15417d9d6597 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h +++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h @@ -21,16 +21,15 @@ struct mtk_base_afe; int mt2701_init_clock(struct mtk_base_afe *afe); int mt2701_afe_enable_clock(struct mtk_base_afe *afe); -void mt2701_afe_disable_clock(struct mtk_base_afe *afe); +int mt2701_afe_disable_clock(struct mtk_base_afe *afe); -int mt2701_turn_on_a1sys_clock(struct mtk_base_afe *afe); -void mt2701_turn_off_a1sys_clock(struct mtk_base_afe *afe); +int mt2701_afe_enable_i2s(struct mtk_base_afe *afe, int id, int dir); +void mt2701_afe_disable_i2s(struct mtk_base_afe *afe, int id, int dir); +int mt2701_afe_enable_mclk(struct mtk_base_afe *afe, int id); +void mt2701_afe_disable_mclk(struct mtk_base_afe *afe, int id); -int mt2701_turn_on_a2sys_clock(struct mtk_base_afe *afe); -void mt2701_turn_off_a2sys_clock(struct mtk_base_afe *afe); - -int mt2701_turn_on_afe_clock(struct mtk_base_afe *afe); -void mt2701_turn_off_afe_clock(struct mtk_base_afe *afe); +int mt2701_enable_btmrg_clk(struct mtk_base_afe *afe); +void mt2701_disable_btmrg_clk(struct mtk_base_afe *afe); void mt2701_mclk_configuration(struct mtk_base_afe *afe, int id, int domain, int mclk); diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-common.h b/sound/soc/mediatek/mt2701/mt2701-afe-common.h index c19430e98adf..ce5bd4dc864d 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-common.h +++ b/sound/soc/mediatek/mt2701/mt2701-afe-common.h @@ -69,53 +69,14 @@ enum { MT2701_IRQ_ASYS_END, }; -/* 2701 clock def */ -enum audio_system_clock_type { - MT2701_AUD_INFRA_SYS_AUDIO, - MT2701_AUD_AUD_MUX1_SEL, - MT2701_AUD_AUD_MUX2_SEL, - MT2701_AUD_AUD_MUX1_DIV, - MT2701_AUD_AUD_MUX2_DIV, - MT2701_AUD_AUD_48K_TIMING, - MT2701_AUD_AUD_44K_TIMING, - MT2701_AUD_AUDPLL_MUX_SEL, - MT2701_AUD_APLL_SEL, - MT2701_AUD_AUD1PLL_98M, - MT2701_AUD_AUD2PLL_90M, - MT2701_AUD_HADDS2PLL_98M, - MT2701_AUD_HADDS2PLL_294M, - MT2701_AUD_AUDPLL, - MT2701_AUD_AUDPLL_D4, - MT2701_AUD_AUDPLL_D8, - MT2701_AUD_AUDPLL_D16, - MT2701_AUD_AUDPLL_D24, - MT2701_AUD_AUDINTBUS, - MT2701_AUD_CLK_26M, - MT2701_AUD_SYSPLL1_D4, - MT2701_AUD_AUD_K1_SRC_SEL, - MT2701_AUD_AUD_K2_SRC_SEL, - MT2701_AUD_AUD_K3_SRC_SEL, - MT2701_AUD_AUD_K4_SRC_SEL, - MT2701_AUD_AUD_K5_SRC_SEL, - MT2701_AUD_AUD_K6_SRC_SEL, - MT2701_AUD_AUD_K1_SRC_DIV, - MT2701_AUD_AUD_K2_SRC_DIV, - MT2701_AUD_AUD_K3_SRC_DIV, - MT2701_AUD_AUD_K4_SRC_DIV, - MT2701_AUD_AUD_K5_SRC_DIV, - MT2701_AUD_AUD_K6_SRC_DIV, - MT2701_AUD_AUD_I2S1_MCLK, - MT2701_AUD_AUD_I2S2_MCLK, - MT2701_AUD_AUD_I2S3_MCLK, - MT2701_AUD_AUD_I2S4_MCLK, - MT2701_AUD_AUD_I2S5_MCLK, - MT2701_AUD_AUD_I2S6_MCLK, - MT2701_AUD_ASM_M_SEL, - MT2701_AUD_ASM_H_SEL, - MT2701_AUD_UNIVPLL2_D4, - MT2701_AUD_UNIVPLL2_D2, - MT2701_AUD_SYSPLL_D5, - MT2701_CLOCK_NUM +enum audio_base_clock { + MT2701_TOP_AUD_MCLK_SRC0, + MT2701_TOP_AUD_MCLK_SRC1, + MT2701_AUDSYS_AFE, + MT2701_AUDSYS_AFE_CONN, + MT2701_AUDSYS_A1SYS, + MT2701_AUDSYS_A2SYS, + MT2701_BASE_CLK_NUM, }; static const unsigned int mt2701_afe_backup_list[] = { @@ -144,7 +105,6 @@ struct mtk_base_irq_data; struct mt2701_i2s_data { int i2s_ctrl_reg; - int i2s_pwn_shift; int i2s_asrc_fs_shift; int i2s_asrc_fs_mask; }; @@ -161,11 +121,17 @@ struct mt2701_i2s_path { int on[I2S_DIR_NUM]; int occupied[I2S_DIR_NUM]; const struct mt2701_i2s_data *i2s_data[2]; + struct clk *hop_ck[I2S_DIR_NUM]; + struct clk *sel_ck; + struct clk *div_ck; + struct clk *mclk_ck; + struct clk *asrco_ck; }; struct mt2701_afe_private { - struct clk *clocks[MT2701_CLOCK_NUM]; struct mt2701_i2s_path i2s_path[MT2701_I2S_NUM]; + struct clk *base_ck[MT2701_BASE_CLK_NUM]; + struct clk *mrgif_ck; bool mrg_enable[MT2701_STREAM_DIR_NUM]; }; diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index a7362d1cda1b..33f809228f25 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -97,21 +97,12 @@ static int mt2701_afe_i2s_startup(struct snd_pcm_substream *substream, { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct mtk_base_afe *afe = snd_soc_platform_get_drvdata(rtd->platform); - struct mt2701_afe_private *afe_priv = afe->platform_priv; int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id); - int clk_num = MT2701_AUD_AUD_I2S1_MCLK + i2s_num; - int ret = 0; if (i2s_num < 0) return i2s_num; - /* enable mclk */ - ret = clk_prepare_enable(afe_priv->clocks[clk_num]); - if (ret) - dev_err(afe->dev, "Failed to enable mclk for I2S: %d\n", - i2s_num); - - return ret; + return mt2701_afe_enable_mclk(afe, i2s_num); } static int mt2701_afe_i2s_path_shutdown(struct snd_pcm_substream *substream, @@ -151,9 +142,9 @@ static int mt2701_afe_i2s_path_shutdown(struct snd_pcm_substream *substream, /* disable i2s */ regmap_update_bits(afe->regmap, i2s_data->i2s_ctrl_reg, ASYS_I2S_CON_I2S_EN, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - 1 << i2s_data->i2s_pwn_shift, - 1 << i2s_data->i2s_pwn_shift); + + mt2701_afe_disable_i2s(afe, i2s_num, stream_dir); + return 0; } @@ -165,7 +156,6 @@ static void mt2701_afe_i2s_shutdown(struct snd_pcm_substream *substream, struct mt2701_afe_private *afe_priv = afe->platform_priv; int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id); struct mt2701_i2s_path *i2s_path; - int clk_num = MT2701_AUD_AUD_I2S1_MCLK + i2s_num; if (i2s_num < 0) return; @@ -185,7 +175,7 @@ static void mt2701_afe_i2s_shutdown(struct snd_pcm_substream *substream, I2S_UNSTART: /* disable mclk */ - clk_disable_unprepare(afe_priv->clocks[clk_num]); + mt2701_afe_disable_mclk(afe, i2s_num); } static int mt2701_i2s_path_prepare_enable(struct snd_pcm_substream *substream, @@ -251,9 +241,7 @@ static int mt2701_i2s_path_prepare_enable(struct snd_pcm_substream *substream, fs << i2s_data->i2s_asrc_fs_shift); /* enable i2s */ - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - 1 << i2s_data->i2s_pwn_shift, - 0 << i2s_data->i2s_pwn_shift); + mt2701_afe_enable_i2s(afe, i2s_num, stream_dir); /* reset i2s hw status before enable */ regmap_update_bits(afe->regmap, i2s_data->i2s_ctrl_reg, @@ -339,9 +327,11 @@ static int mt2701_btmrg_startup(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct mtk_base_afe *afe = snd_soc_platform_get_drvdata(rtd->platform); struct mt2701_afe_private *afe_priv = afe->platform_priv; + int ret; - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_MRGIF, 0); + ret = mt2701_enable_btmrg_clk(afe); + if (ret) + return ret; afe_priv->mrg_enable[substream->stream] = 1; return 0; @@ -406,9 +396,7 @@ static void mt2701_btmrg_shutdown(struct snd_pcm_substream *substream, AFE_MRGIF_CON_MRG_EN, 0); regmap_update_bits(afe->regmap, AFE_MRGIF_CON, AFE_MRGIF_CON_MRG_I2S_EN, 0); - regmap_update_bits(afe->regmap, AUDIO_TOP_CON4, - AUDIO_TOP_CON4_PDN_MRGIF, - AUDIO_TOP_CON4_PDN_MRGIF); + mt2701_disable_btmrg_clk(afe); } afe_priv->mrg_enable[substream->stream] = 0; } @@ -1386,14 +1374,12 @@ static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = { { { .i2s_ctrl_reg = ASYS_I2SO1_CON, - .i2s_pwn_shift = 6, .i2s_asrc_fs_shift = 0, .i2s_asrc_fs_mask = 0x1f, }, { .i2s_ctrl_reg = ASYS_I2SIN1_CON, - .i2s_pwn_shift = 0, .i2s_asrc_fs_shift = 0, .i2s_asrc_fs_mask = 0x1f, @@ -1402,14 +1388,12 @@ static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = { { { .i2s_ctrl_reg = ASYS_I2SO2_CON, - .i2s_pwn_shift = 7, .i2s_asrc_fs_shift = 5, .i2s_asrc_fs_mask = 0x1f, }, { .i2s_ctrl_reg = ASYS_I2SIN2_CON, - .i2s_pwn_shift = 1, .i2s_asrc_fs_shift = 5, .i2s_asrc_fs_mask = 0x1f, @@ -1418,14 +1402,12 @@ static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = { { { .i2s_ctrl_reg = ASYS_I2SO3_CON, - .i2s_pwn_shift = 8, .i2s_asrc_fs_shift = 10, .i2s_asrc_fs_mask = 0x1f, }, { .i2s_ctrl_reg = ASYS_I2SIN3_CON, - .i2s_pwn_shift = 2, .i2s_asrc_fs_shift = 10, .i2s_asrc_fs_mask = 0x1f, @@ -1434,14 +1416,12 @@ static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = { { { .i2s_ctrl_reg = ASYS_I2SO4_CON, - .i2s_pwn_shift = 9, .i2s_asrc_fs_shift = 15, .i2s_asrc_fs_mask = 0x1f, }, { .i2s_ctrl_reg = ASYS_I2SIN4_CON, - .i2s_pwn_shift = 3, .i2s_asrc_fs_shift = 15, .i2s_asrc_fs_mask = 0x1f, @@ -1483,8 +1463,7 @@ static int mt2701_afe_runtime_suspend(struct device *dev) { struct mtk_base_afe *afe = dev_get_drvdata(dev); - mt2701_afe_disable_clock(afe); - return 0; + return mt2701_afe_disable_clock(afe); } static int mt2701_afe_runtime_resume(struct device *dev) -- cgit v1.2.3 From 600b2fd4f0f7ae5ebcb604c39c9a97e573f9d23e Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 2 Jan 2018 19:47:20 +0800 Subject: ASoC: mediatek: cleanup audio driver for MT2701 Cleanup unused code such as 'i2s_num' guard, headers, indentation and some defines. Signed-off-by: Ryder Lee Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c | 14 +--- sound/soc/mediatek/mt2701/mt2701-afe-common.h | 20 +---- sound/soc/mediatek/mt2701/mt2701-afe-pcm.c | 94 ++++------------------- sound/soc/mediatek/mt2701/mt2701-reg.h | 41 +--------- 4 files changed, 24 insertions(+), 145 deletions(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c index 75ccdca5811d..56a057c78c9a 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c @@ -14,10 +14,6 @@ * GNU General Public License for more details. */ -#include -#include -#include - #include "mt2701-afe-common.h" #include "mt2701-afe-clock-ctrl.h" @@ -223,8 +219,8 @@ int mt2701_afe_enable_clock(struct mtk_base_afe *afe) } regmap_update_bits(afe->regmap, ASYS_TOP_CON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON); + ASYS_TOP_CON_ASYS_TIMING_ON, + ASYS_TOP_CON_ASYS_TIMING_ON); regmap_update_bits(afe->regmap, AFE_DAC_CON0, AFE_DAC_CON0_AFE_ON, AFE_DAC_CON0_AFE_ON); @@ -239,7 +235,7 @@ int mt2701_afe_enable_clock(struct mtk_base_afe *afe) int mt2701_afe_disable_clock(struct mtk_base_afe *afe) { regmap_update_bits(afe->regmap, ASYS_TOP_CON, - AUDIO_TOP_CON0_A1SYS_A2SYS_ON, 0); + ASYS_TOP_CON_ASYS_TIMING_ON, 0); regmap_update_bits(afe->regmap, AFE_DAC_CON0, AFE_DAC_CON0_AFE_ON, 0); @@ -272,7 +268,3 @@ void mt2701_mclk_configuration(struct mtk_base_afe *afe, int id, int domain, if (ret) dev_err(afe->dev, "failed to set mclk divider %d\n", ret); } - -MODULE_DESCRIPTION("MT2701 afe clock control"); -MODULE_AUTHOR("Garlic Tseng "); -MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-common.h b/sound/soc/mediatek/mt2701/mt2701-afe-common.h index ce5bd4dc864d..9a2b301a4c21 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-common.h +++ b/sound/soc/mediatek/mt2701/mt2701-afe-common.h @@ -16,6 +16,7 @@ #ifndef _MT_2701_AFE_COMMON_H_ #define _MT_2701_AFE_COMMON_H_ + #include #include #include @@ -25,16 +26,7 @@ #define MT2701_STREAM_DIR_NUM (SNDRV_PCM_STREAM_LAST + 1) #define MT2701_PLL_DOMAIN_0_RATE 98304000 #define MT2701_PLL_DOMAIN_1_RATE 90316800 -#define MT2701_AUD_AUD_MUX1_DIV_RATE (MT2701_PLL_DOMAIN_0_RATE / 2) -#define MT2701_AUD_AUD_MUX2_DIV_RATE (MT2701_PLL_DOMAIN_1_RATE / 2) - -enum { - MT2701_I2S_1, - MT2701_I2S_2, - MT2701_I2S_3, - MT2701_I2S_4, - MT2701_I2S_NUM, -}; +#define MT2701_I2S_NUM 4 enum { MT2701_MEMIF_DL1, @@ -62,8 +54,7 @@ enum { }; enum { - MT2701_IRQ_ASYS_START, - MT2701_IRQ_ASYS_IRQ1 = MT2701_IRQ_ASYS_START, + MT2701_IRQ_ASYS_IRQ1, MT2701_IRQ_ASYS_IRQ2, MT2701_IRQ_ASYS_IRQ3, MT2701_IRQ_ASYS_END, @@ -100,9 +91,6 @@ static const unsigned int mt2701_afe_backup_list[] = { AFE_MEMIF_PBUF_SIZE, }; -struct snd_pcm_substream; -struct mtk_base_irq_data; - struct mt2701_i2s_data { int i2s_ctrl_reg; int i2s_asrc_fs_shift; @@ -120,7 +108,7 @@ struct mt2701_i2s_path { int mclk_rate; int on[I2S_DIR_NUM]; int occupied[I2S_DIR_NUM]; - const struct mt2701_i2s_data *i2s_data[2]; + const struct mt2701_i2s_data *i2s_data[I2S_DIR_NUM]; struct clk *hop_ck[I2S_DIR_NUM]; struct clk *sel_ck; struct clk *div_ck; diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index 33f809228f25..0edadca12a5e 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -20,16 +20,12 @@ #include #include #include -#include #include "mt2701-afe-common.h" - #include "mt2701-afe-clock-ctrl.h" #include "../common/mtk-afe-platform-driver.h" #include "../common/mtk-afe-fe-dai.h" -#define AFE_IRQ_STATUS_BITS 0xff - static const struct snd_pcm_hardware mt2701_afe_hardware = { .info = SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_RESUME | SNDRV_PCM_INFO_MMAP_VALID, @@ -107,21 +103,16 @@ static int mt2701_afe_i2s_startup(struct snd_pcm_substream *substream, static int mt2701_afe_i2s_path_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, + int i2s_num, int dir_invert) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct mtk_base_afe *afe = snd_soc_platform_get_drvdata(rtd->platform); struct mt2701_afe_private *afe_priv = afe->platform_priv; - int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id); - struct mt2701_i2s_path *i2s_path; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i2s_num]; const struct mt2701_i2s_data *i2s_data; int stream_dir = substream->stream; - if (i2s_num < 0) - return i2s_num; - - i2s_path = &afe_priv->i2s_path[i2s_num]; - if (dir_invert) { if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK) stream_dir = SNDRV_PCM_STREAM_CAPTURE; @@ -167,11 +158,11 @@ static void mt2701_afe_i2s_shutdown(struct snd_pcm_substream *substream, else goto I2S_UNSTART; - mt2701_afe_i2s_path_shutdown(substream, dai, 0); + mt2701_afe_i2s_path_shutdown(substream, dai, i2s_num, 0); /* need to disable i2s-out path when disable i2s-in */ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) - mt2701_afe_i2s_path_shutdown(substream, dai, 1); + mt2701_afe_i2s_path_shutdown(substream, dai, i2s_num, 1); I2S_UNSTART: /* disable mclk */ @@ -180,24 +171,19 @@ I2S_UNSTART: static int mt2701_i2s_path_prepare_enable(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, + int i2s_num, int dir_invert) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct mtk_base_afe *afe = snd_soc_platform_get_drvdata(rtd->platform); struct mt2701_afe_private *afe_priv = afe->platform_priv; - int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id); - struct mt2701_i2s_path *i2s_path; + struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i2s_num]; const struct mt2701_i2s_data *i2s_data; struct snd_pcm_runtime * const runtime = substream->runtime; int reg, fs, w_len = 1; /* now we support bck 64bits only */ int stream_dir = substream->stream; unsigned int mask = 0, val = 0; - if (i2s_num < 0) - return i2s_num; - - i2s_path = &afe_priv->i2s_path[i2s_num]; - if (dir_invert) { if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK) stream_dir = SNDRV_PCM_STREAM_CAPTURE; @@ -288,13 +274,13 @@ static int mt2701_afe_i2s_prepare(struct snd_pcm_substream *substream, mt2701_mclk_configuration(afe, i2s_num, clk_domain, mclk_rate); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { - mt2701_i2s_path_prepare_enable(substream, dai, 0); + mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 0); } else { /* need to enable i2s-out path when enable i2s-in */ /* prepare for another direction "out" */ - mt2701_i2s_path_prepare_enable(substream, dai, 1); + mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 1); /* prepare for "in" */ - mt2701_i2s_path_prepare_enable(substream, dai, 0); + mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 0); } return 0; @@ -562,7 +548,6 @@ static const struct snd_soc_dai_ops mt2701_single_memif_dai_ops = { .hw_free = mtk_afe_fe_hw_free, .prepare = mtk_afe_fe_prepare, .trigger = mtk_afe_fe_trigger, - }; static const struct snd_soc_dai_ops mt2701_dlm_memif_dai_ops = { @@ -903,31 +888,6 @@ static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_i2s4[] = { PWR2_TOP_CON, 19, 1, 0), }; -static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_asrc0[] = { - SOC_DAPM_SINGLE_AUTODISABLE("Asrc0 out Switch", AUDIO_TOP_CON4, 14, 1, - 1), -}; - -static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_asrc1[] = { - SOC_DAPM_SINGLE_AUTODISABLE("Asrc1 out Switch", AUDIO_TOP_CON4, 15, 1, - 1), -}; - -static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_asrc2[] = { - SOC_DAPM_SINGLE_AUTODISABLE("Asrc2 out Switch", PWR2_TOP_CON, 6, 1, - 1), -}; - -static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_asrc3[] = { - SOC_DAPM_SINGLE_AUTODISABLE("Asrc3 out Switch", PWR2_TOP_CON, 7, 1, - 1), -}; - -static const struct snd_kcontrol_new mt2701_afe_multi_ch_out_asrc4[] = { - SOC_DAPM_SINGLE_AUTODISABLE("Asrc4 out Switch", PWR2_TOP_CON, 8, 1, - 1), -}; - static const struct snd_soc_dapm_widget mt2701_afe_pcm_widgets[] = { /* inter-connections */ SND_SOC_DAPM_MIXER("I00", SND_SOC_NOPM, 0, 0, NULL, 0), @@ -987,19 +947,6 @@ static const struct snd_soc_dapm_widget mt2701_afe_pcm_widgets[] = { SND_SOC_DAPM_MIXER("I18I19", SND_SOC_NOPM, 0, 0, mt2701_afe_multi_ch_out_i2s3, ARRAY_SIZE(mt2701_afe_multi_ch_out_i2s3)), - - SND_SOC_DAPM_MIXER("ASRC_O0", SND_SOC_NOPM, 0, 0, - mt2701_afe_multi_ch_out_asrc0, - ARRAY_SIZE(mt2701_afe_multi_ch_out_asrc0)), - SND_SOC_DAPM_MIXER("ASRC_O1", SND_SOC_NOPM, 0, 0, - mt2701_afe_multi_ch_out_asrc1, - ARRAY_SIZE(mt2701_afe_multi_ch_out_asrc1)), - SND_SOC_DAPM_MIXER("ASRC_O2", SND_SOC_NOPM, 0, 0, - mt2701_afe_multi_ch_out_asrc2, - ARRAY_SIZE(mt2701_afe_multi_ch_out_asrc2)), - SND_SOC_DAPM_MIXER("ASRC_O3", SND_SOC_NOPM, 0, 0, - mt2701_afe_multi_ch_out_asrc3, - ARRAY_SIZE(mt2701_afe_multi_ch_out_asrc3)), }; static const struct snd_soc_dapm_route mt2701_afe_pcm_routes[] = { @@ -1009,7 +956,6 @@ static const struct snd_soc_dapm_route mt2701_afe_pcm_routes[] = { {"I2S0 Playback", NULL, "O15"}, {"I2S0 Playback", NULL, "O16"}, - {"I2S1 Playback", NULL, "O17"}, {"I2S1 Playback", NULL, "O18"}, {"I2S2 Playback", NULL, "O19"}, @@ -1026,7 +972,6 @@ static const struct snd_soc_dapm_route mt2701_afe_pcm_routes[] = { {"I00", NULL, "I2S0 Capture"}, {"I01", NULL, "I2S0 Capture"}, - {"I02", NULL, "I2S1 Capture"}, {"I03", NULL, "I2S1 Capture"}, /* I02,03 link to UL2, also need to open I2S0 */ @@ -1034,15 +979,10 @@ static const struct snd_soc_dapm_route mt2701_afe_pcm_routes[] = { {"I26", NULL, "BT Capture"}, - {"ASRC_O0", "Asrc0 out Switch", "DLM"}, - {"ASRC_O1", "Asrc1 out Switch", "DLM"}, - {"ASRC_O2", "Asrc2 out Switch", "DLM"}, - {"ASRC_O3", "Asrc3 out Switch", "DLM"}, - - {"I12I13", "Multich I2S0 Out Switch", "ASRC_O0"}, - {"I14I15", "Multich I2S1 Out Switch", "ASRC_O1"}, - {"I16I17", "Multich I2S2 Out Switch", "ASRC_O2"}, - {"I18I19", "Multich I2S3 Out Switch", "ASRC_O3"}, + {"I12I13", "Multich I2S0 Out Switch", "DLM"}, + {"I14I15", "Multich I2S1 Out Switch", "DLM"}, + {"I16I17", "Multich I2S2 Out Switch", "DLM"}, + {"I18I19", "Multich I2S3 Out Switch", "DLM"}, { "I12", NULL, "I12I13" }, { "I13", NULL, "I12I13" }, @@ -1067,7 +1007,6 @@ static const struct snd_soc_dapm_route mt2701_afe_pcm_routes[] = { { "O21", "I18 Switch", "I18" }, { "O22", "I19 Switch", "I19" }, { "O31", "I35 Switch", "I35" }, - }; static const struct snd_soc_component_driver mt2701_afe_pcm_dai_component = { @@ -1484,12 +1423,13 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) afe = devm_kzalloc(&pdev->dev, sizeof(*afe), GFP_KERNEL); if (!afe) return -ENOMEM; + afe->platform_priv = devm_kzalloc(&pdev->dev, sizeof(*afe_priv), GFP_KERNEL); if (!afe->platform_priv) return -ENOMEM; - afe_priv = afe->platform_priv; + afe_priv = afe->platform_priv; afe->dev = &pdev->dev; dev = afe->dev; @@ -1524,7 +1464,6 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) afe->memif_size = MT2701_MEMIF_NUM; afe->memif = devm_kcalloc(dev, afe->memif_size, sizeof(*afe->memif), GFP_KERNEL); - if (!afe->memif) return -ENOMEM; @@ -1537,7 +1476,6 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) afe->irqs_size = MT2701_IRQ_ASYS_END; afe->irqs = devm_kcalloc(dev, afe->irqs_size, sizeof(*afe->irqs), GFP_KERNEL); - if (!afe->irqs) return -ENOMEM; @@ -1555,7 +1493,6 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) afe->mtk_afe_hardware = &mt2701_afe_hardware; afe->memif_fs = mt2701_memif_fs; afe->irq_fs = mt2701_irq_fs; - afe->reg_back_up_list = mt2701_afe_backup_list; afe->reg_back_up_list_num = ARRAY_SIZE(mt2701_afe_backup_list); afe->runtime_resume = mt2701_afe_runtime_resume; @@ -1646,4 +1583,3 @@ module_platform_driver(mt2701_afe_pcm_driver); MODULE_DESCRIPTION("Mediatek ALSA SoC AFE platform driver for 2701"); MODULE_AUTHOR("Garlic Tseng "); MODULE_LICENSE("GPL v2"); - diff --git a/sound/soc/mediatek/mt2701/mt2701-reg.h b/sound/soc/mediatek/mt2701/mt2701-reg.h index bb62b1c55957..f17c76f37b5f 100644 --- a/sound/soc/mediatek/mt2701/mt2701-reg.h +++ b/sound/soc/mediatek/mt2701/mt2701-reg.h @@ -17,17 +17,6 @@ #ifndef _MT2701_REG_H_ #define _MT2701_REG_H_ -#include -#include -#include -#include -#include -#include -#include "mt2701-afe-common.h" - -/***************************************************************************** - * R E G I S T E R D E F I N I T I O N - *****************************************************************************/ #define AUDIO_TOP_CON0 0x0000 #define AUDIO_TOP_CON4 0x0010 #define AUDIO_TOP_CON5 0x0014 @@ -109,18 +98,6 @@ #define AFE_DAI_BASE 0x1370 #define AFE_DAI_CUR 0x137c -/* AUDIO_TOP_CON0 (0x0000) */ -#define AUDIO_TOP_CON0_A1SYS_A2SYS_ON (0x3 << 0) -#define AUDIO_TOP_CON0_PDN_AFE (0x1 << 2) -#define AUDIO_TOP_CON0_PDN_APLL_CK (0x1 << 23) - -/* AUDIO_TOP_CON4 (0x0010) */ -#define AUDIO_TOP_CON4_I2SO1_PWN (0x1 << 6) -#define AUDIO_TOP_CON4_PDN_A1SYS (0x1 << 21) -#define AUDIO_TOP_CON4_PDN_A2SYS (0x1 << 22) -#define AUDIO_TOP_CON4_PDN_AFE_CONN (0x1 << 23) -#define AUDIO_TOP_CON4_PDN_MRGIF (0x1 << 25) - /* AFE_DAIBT_CON0 (0x001c) */ #define AFE_DAIBT_CON0_DAIBT_EN (0x1 << 0) #define AFE_DAIBT_CON0_BT_FUNC_EN (0x1 << 1) @@ -137,22 +114,8 @@ #define AFE_MRGIF_CON_I2S_MODE_MASK (0xf << 20) #define AFE_MRGIF_CON_I2S_MODE_32K (0x4 << 20) -/* ASYS_I2SO1_CON (0x061c) */ -#define ASYS_I2SO1_CON_FS (0x1f << 8) -#define ASYS_I2SO1_CON_FS_SET(x) ((x) << 8) -#define ASYS_I2SO1_CON_MULTI_CH (0x1 << 16) -#define ASYS_I2SO1_CON_SIDEGEN (0x1 << 30) -#define ASYS_I2SO1_CON_I2S_EN (0x1 << 0) -/* 0:EIAJ 1:I2S */ -#define ASYS_I2SO1_CON_I2S_MODE (0x1 << 3) -#define ASYS_I2SO1_CON_WIDE_MODE (0x1 << 1) -#define ASYS_I2SO1_CON_WIDE_MODE_SET(x) ((x) << 1) - -/* PWR2_TOP_CON (0x0634) */ -#define PWR2_TOP_CON_INIT_VAL (0xffe1ffff) - -/* ASYS_IRQ_CLR (0x07c0) */ -#define ASYS_IRQ_CLR_ALL (0xffffffff) +/* ASYS_TOP_CON (0x0600) */ +#define ASYS_TOP_CON_ASYS_TIMING_ON (0x3 << 0) /* PWR2_ASM_CON1 (0x1070) */ #define PWR2_ASM_CON1_INIT_VAL (0x492492) -- cgit v1.2.3 From 20a1ea2222e7cbf96e9bf8579362e971491e6aea Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Jan 2018 16:38:46 +0100 Subject: ASoC: skl: Fix kernel warning due to zero NHTL entry I got the following kernel warning when loading snd-soc-skl module on Dell Latitude 7270 laptop: memremap attempted on mixed range 0x0000000000000000 size: 0x0 WARNING: CPU: 0 PID: 484 at kernel/memremap.c:98 memremap+0x8a/0x180 Call Trace: skl_nhlt_init+0x82/0xf0 [snd_soc_skl] skl_probe+0x2ee/0x7c0 [snd_soc_skl] .... It seems that the machine doesn't support the SKL DSP gives the empty NHLT entry, and it triggers the warning. For avoiding it, let do the zero check before calling memremap(). Cc: Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-nhlt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index d14c50a60289..1ce414d86d8a 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -43,7 +43,8 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; - nhlt_table = (struct nhlt_acpi_table *) + if (nhlt_ptr->length) + nhlt_table = (struct nhlt_acpi_table *) memremap(nhlt_ptr->min_addr, nhlt_ptr->length, MEMREMAP_WB); ACPI_FREE(obj); -- cgit v1.2.3 From c0bace798436bca0fdc221ff61143f1376a9c3de Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Mon, 1 Jan 2018 19:33:20 +0100 Subject: uapi libc compat: add fallback for unsupported libcs libc-compat.h aims to prevent symbol collisions between uapi and libc headers for each supported libc. This requires continuous coordination between them. The goal of this commit is to improve the situation for libcs (such as musl) which are not yet supported and/or do not wish to be explicitly supported, while not affecting supported libcs. More precisely, with this commit, unsupported libcs can request the suppression of any specific uapi definition by defining the correspondings _UAPI_DEF_* macro as 0. This can fix symbol collisions for them, as long as the libc headers are included before the uapi headers. Inclusion in the other order is outside the scope of this commit. All infrastructure in order to enable this fallback for unsupported libcs is already in place, except that libc-compat.h unconditionally defines all _UAPI_DEF_* macros to 1 for all unsupported libcs so that any previous definitions are ignored. In order to fix this, this commit merely makes these definitions conditional. This commit together with the musl libc commit http://git.musl-libc.org/cgit/musl/commit/?id=04983f2272382af92eb8f8838964ff944fbb8258 fixes for example the following compiler errors when is included after musl's : ./linux/in6.h:32:8: error: redefinition of 'struct in6_addr' ./linux/in6.h:49:8: error: redefinition of 'struct sockaddr_in6' ./linux/in6.h:59:8: error: redefinition of 'struct ipv6_mreq' The comments referencing glibc are still correct, but this file is not only used for glibc any more. Signed-off-by: Felix Janda Reviewed-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/libc-compat.h | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 282875cf8056..8254c937c9f4 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -168,46 +168,99 @@ /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything - * that we need. */ + * that we need. Check for previous __UAPI_* definitions to give + * unsupported C libraries a way to opt out of any kernel definition. */ #else /* !defined(__GLIBC__) */ /* Definitions for if.h */ +#ifndef __UAPI_DEF_IF_IFCONF #define __UAPI_DEF_IF_IFCONF 1 +#endif +#ifndef __UAPI_DEF_IF_IFMAP #define __UAPI_DEF_IF_IFMAP 1 +#endif +#ifndef __UAPI_DEF_IF_IFNAMSIZ #define __UAPI_DEF_IF_IFNAMSIZ 1 +#endif +#ifndef __UAPI_DEF_IF_IFREQ #define __UAPI_DEF_IF_IFREQ 1 +#endif /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1 +#endif /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */ +#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1 +#endif /* Definitions for in.h */ +#ifndef __UAPI_DEF_IN_ADDR #define __UAPI_DEF_IN_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN_IPPROTO #define __UAPI_DEF_IN_IPPROTO 1 +#endif +#ifndef __UAPI_DEF_IN_PKTINFO #define __UAPI_DEF_IN_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP_MREQ #define __UAPI_DEF_IP_MREQ 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN #define __UAPI_DEF_SOCKADDR_IN 1 +#endif +#ifndef __UAPI_DEF_IN_CLASS #define __UAPI_DEF_IN_CLASS 1 +#endif /* Definitions for in6.h */ +#ifndef __UAPI_DEF_IN6_ADDR #define __UAPI_DEF_IN6_ADDR 1 +#endif +#ifndef __UAPI_DEF_IN6_ADDR_ALT #define __UAPI_DEF_IN6_ADDR_ALT 1 +#endif +#ifndef __UAPI_DEF_SOCKADDR_IN6 #define __UAPI_DEF_SOCKADDR_IN6 1 +#endif +#ifndef __UAPI_DEF_IPV6_MREQ #define __UAPI_DEF_IPV6_MREQ 1 +#endif +#ifndef __UAPI_DEF_IPPROTO_V6 #define __UAPI_DEF_IPPROTO_V6 1 +#endif +#ifndef __UAPI_DEF_IPV6_OPTIONS #define __UAPI_DEF_IPV6_OPTIONS 1 +#endif +#ifndef __UAPI_DEF_IN6_PKTINFO #define __UAPI_DEF_IN6_PKTINFO 1 +#endif +#ifndef __UAPI_DEF_IP6_MTUINFO #define __UAPI_DEF_IP6_MTUINFO 1 +#endif /* Definitions for ipx.h */ +#ifndef __UAPI_DEF_SOCKADDR_IPX #define __UAPI_DEF_SOCKADDR_IPX 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION #define __UAPI_DEF_IPX_ROUTE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION #define __UAPI_DEF_IPX_INTERFACE_DEFINITION 1 +#endif +#ifndef __UAPI_DEF_IPX_CONFIG_DATA #define __UAPI_DEF_IPX_CONFIG_DATA 1 +#endif +#ifndef __UAPI_DEF_IPX_ROUTE_DEF #define __UAPI_DEF_IPX_ROUTE_DEF 1 +#endif /* Definitions for xattr.h */ +#ifndef __UAPI_DEF_XATTR #define __UAPI_DEF_XATTR 1 +#endif #endif /* __GLIBC__ */ -- cgit v1.2.3 From c095508770aebf1b9218e77026e48345d719b17c Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Tue, 2 Jan 2018 19:44:34 +0000 Subject: RDS: Heap OOB write in rds_message_alloc_sgs() When args->nr_local is 0, nr_pages gets also 0 due some size calculation via rds_rm_size(), which is later used to allocate pages for DMA, this bug produces a heap Out-Of-Bound write access to a specific memory region. Signed-off-by: Mohamed Ghannam Signed-off-by: David S. Miller --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index bc2f1e0977d6..94729d9da437 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args) local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + if (args->nr_local == 0) + return -EINVAL; + /* figure out the number of pages in the vector */ for (i = 0; i < args->nr_local; i++) { if (copy_from_user(&vec, &local_vec[i], -- cgit v1.2.3 From 0739fdfc0617a86781799d033e8fe758e8e48554 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Tue, 2 Jan 2018 19:47:21 +0800 Subject: ASoC: mediatek: update clock related properties of MT2701 AFE Add 'assigned-clocks*' properties which are used to initialize default domain sources of audio system. we could configure different sets of input clocks through DTS now. Hence driver no longer cares about that. Also we change some 'clock-names' to make them more generic so that other chips can reuse gracefully. Signed-off-by: Ryder Lee Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/mt2701-afe-pcm.txt | 207 +++++++++------------ 1 file changed, 91 insertions(+), 116 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt index 77a57f84bed4..0450baad2813 100644 --- a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt +++ b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt @@ -6,51 +6,44 @@ Required properties: - interrupts: should contain AFE and ASYS interrupts - interrupt-names: should be "afe" and "asys" - power-domains: should define the power domain +- clocks: Must contain an entry for each entry in clock-names + See ../clocks/clock-bindings.txt for details - clock-names: should have these clock names: - "infra_sys_audio_clk", "top_audio_mux1_sel", "top_audio_mux2_sel", - "top_audio_mux1_div", - "top_audio_mux2_div", - "top_audio_48k_timing", - "top_audio_44k_timing", - "top_audpll_mux_sel", - "top_apll_sel", - "top_aud1_pll_98M", - "top_aud2_pll_90M", - "top_hadds2_pll_98M", - "top_hadds2_pll_294M", - "top_audpll", - "top_audpll_d4", - "top_audpll_d8", - "top_audpll_d16", - "top_audpll_d24", - "top_audintbus_sel", - "clk_26m", - "top_syspll1_d4", - "top_aud_k1_src_sel", - "top_aud_k2_src_sel", - "top_aud_k3_src_sel", - "top_aud_k4_src_sel", - "top_aud_k5_src_sel", - "top_aud_k6_src_sel", - "top_aud_k1_src_div", - "top_aud_k2_src_div", - "top_aud_k3_src_div", - "top_aud_k4_src_div", - "top_aud_k5_src_div", - "top_aud_k6_src_div", - "top_aud_i2s1_mclk", - "top_aud_i2s2_mclk", - "top_aud_i2s3_mclk", - "top_aud_i2s4_mclk", - "top_aud_i2s5_mclk", - "top_aud_i2s6_mclk", - "top_asm_m_sel", - "top_asm_h_sel", - "top_univpll2_d4", - "top_univpll2_d2", - "top_syspll_d5"; + "i2s0_src_sel", + "i2s1_src_sel", + "i2s2_src_sel", + "i2s3_src_sel", + "i2s0_src_div", + "i2s1_src_div", + "i2s2_src_div", + "i2s3_src_div", + "i2s0_mclk_en", + "i2s1_mclk_en", + "i2s2_mclk_en", + "i2s3_mclk_en", + "i2so0_hop_ck", + "i2so1_hop_ck", + "i2so2_hop_ck", + "i2so3_hop_ck", + "i2si0_hop_ck", + "i2si1_hop_ck", + "i2si2_hop_ck", + "i2si3_hop_ck", + "asrc0_out_ck", + "asrc1_out_ck", + "asrc2_out_ck", + "asrc3_out_ck", + "audio_afe_pd", + "audio_afe_conn_pd", + "audio_a1sys_pd", + "audio_a2sys_pd", + "audio_mrgif_pd"; +- assigned-clocks: list of input clocks and dividers for the audio system. + See ../clocks/clock-bindings.txt for details. +- assigned-clocks-parents: parent of input clocks of assigned clocks. +- assigned-clock-rates: list of clock frequencies of assigned clocks. Example: @@ -62,93 +55,75 @@ Example: ; interrupt-names = "afe", "asys"; power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>; - clocks = <&infracfg CLK_INFRA_AUDIO>, - <&topckgen CLK_TOP_AUD_MUX1_SEL>, + clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>, <&topckgen CLK_TOP_AUD_MUX2_SEL>, - <&topckgen CLK_TOP_AUD_MUX1_DIV>, - <&topckgen CLK_TOP_AUD_MUX2_DIV>, - <&topckgen CLK_TOP_AUD_48K_TIMING>, - <&topckgen CLK_TOP_AUD_44K_TIMING>, - <&topckgen CLK_TOP_AUDPLL_MUX_SEL>, - <&topckgen CLK_TOP_APLL_SEL>, - <&topckgen CLK_TOP_AUD1PLL_98M>, - <&topckgen CLK_TOP_AUD2PLL_90M>, - <&topckgen CLK_TOP_HADDS2PLL_98M>, - <&topckgen CLK_TOP_HADDS2PLL_294M>, - <&topckgen CLK_TOP_AUDPLL>, - <&topckgen CLK_TOP_AUDPLL_D4>, - <&topckgen CLK_TOP_AUDPLL_D8>, - <&topckgen CLK_TOP_AUDPLL_D16>, - <&topckgen CLK_TOP_AUDPLL_D24>, - <&topckgen CLK_TOP_AUDINTBUS_SEL>, - <&clk26m>, - <&topckgen CLK_TOP_SYSPLL1_D4>, <&topckgen CLK_TOP_AUD_K1_SRC_SEL>, <&topckgen CLK_TOP_AUD_K2_SRC_SEL>, <&topckgen CLK_TOP_AUD_K3_SRC_SEL>, <&topckgen CLK_TOP_AUD_K4_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K5_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K6_SRC_SEL>, <&topckgen CLK_TOP_AUD_K1_SRC_DIV>, <&topckgen CLK_TOP_AUD_K2_SRC_DIV>, <&topckgen CLK_TOP_AUD_K3_SRC_DIV>, <&topckgen CLK_TOP_AUD_K4_SRC_DIV>, - <&topckgen CLK_TOP_AUD_K5_SRC_DIV>, - <&topckgen CLK_TOP_AUD_K6_SRC_DIV>, <&topckgen CLK_TOP_AUD_I2S1_MCLK>, <&topckgen CLK_TOP_AUD_I2S2_MCLK>, <&topckgen CLK_TOP_AUD_I2S3_MCLK>, <&topckgen CLK_TOP_AUD_I2S4_MCLK>, - <&topckgen CLK_TOP_AUD_I2S5_MCLK>, - <&topckgen CLK_TOP_AUD_I2S6_MCLK>, - <&topckgen CLK_TOP_ASM_M_SEL>, - <&topckgen CLK_TOP_ASM_H_SEL>, - <&topckgen CLK_TOP_UNIVPLL2_D4>, - <&topckgen CLK_TOP_UNIVPLL2_D2>, - <&topckgen CLK_TOP_SYSPLL_D5>; + <&audiosys CLK_AUD_I2SO1>, + <&audiosys CLK_AUD_I2SO2>, + <&audiosys CLK_AUD_I2SO3>, + <&audiosys CLK_AUD_I2SO4>, + <&audiosys CLK_AUD_I2SIN1>, + <&audiosys CLK_AUD_I2SIN2>, + <&audiosys CLK_AUD_I2SIN3>, + <&audiosys CLK_AUD_I2SIN4>, + <&audiosys CLK_AUD_ASRCO1>, + <&audiosys CLK_AUD_ASRCO2>, + <&audiosys CLK_AUD_ASRCO3>, + <&audiosys CLK_AUD_ASRCO4>, + <&audiosys CLK_AUD_AFE>, + <&audiosys CLK_AUD_AFE_CONN>, + <&audiosys CLK_AUD_A1SYS>, + <&audiosys CLK_AUD_A2SYS>, + <&audiosys CLK_AUD_AFE_MRGIF>; - clock-names = "infra_sys_audio_clk", - "top_audio_mux1_sel", + clock-names = "top_audio_mux1_sel", "top_audio_mux2_sel", - "top_audio_mux1_div", - "top_audio_mux2_div", - "top_audio_48k_timing", - "top_audio_44k_timing", - "top_audpll_mux_sel", - "top_apll_sel", - "top_aud1_pll_98M", - "top_aud2_pll_90M", - "top_hadds2_pll_98M", - "top_hadds2_pll_294M", - "top_audpll", - "top_audpll_d4", - "top_audpll_d8", - "top_audpll_d16", - "top_audpll_d24", - "top_audintbus_sel", - "clk_26m", - "top_syspll1_d4", - "top_aud_k1_src_sel", - "top_aud_k2_src_sel", - "top_aud_k3_src_sel", - "top_aud_k4_src_sel", - "top_aud_k5_src_sel", - "top_aud_k6_src_sel", - "top_aud_k1_src_div", - "top_aud_k2_src_div", - "top_aud_k3_src_div", - "top_aud_k4_src_div", - "top_aud_k5_src_div", - "top_aud_k6_src_div", - "top_aud_i2s1_mclk", - "top_aud_i2s2_mclk", - "top_aud_i2s3_mclk", - "top_aud_i2s4_mclk", - "top_aud_i2s5_mclk", - "top_aud_i2s6_mclk", - "top_asm_m_sel", - "top_asm_h_sel", - "top_univpll2_d4", - "top_univpll2_d2", - "top_syspll_d5"; + "i2s0_src_sel", + "i2s1_src_sel", + "i2s2_src_sel", + "i2s3_src_sel", + "i2s0_src_div", + "i2s1_src_div", + "i2s2_src_div", + "i2s3_src_div", + "i2s0_mclk_en", + "i2s1_mclk_en", + "i2s2_mclk_en", + "i2s3_mclk_en", + "i2so0_hop_ck", + "i2so1_hop_ck", + "i2so2_hop_ck", + "i2so3_hop_ck", + "i2si0_hop_ck", + "i2si1_hop_ck", + "i2si2_hop_ck", + "i2si3_hop_ck", + "asrc0_out_ck", + "asrc1_out_ck", + "asrc2_out_ck", + "asrc3_out_ck", + "audio_afe_pd", + "audio_afe_conn_pd", + "audio_a1sys_pd", + "audio_a2sys_pd", + "audio_mrgif_pd"; + + assigned-clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>, + <&topckgen CLK_TOP_AUD_MUX2_SEL>, + <&topckgen CLK_TOP_AUD_MUX1_DIV>, + <&topckgen CLK_TOP_AUD_MUX2_DIV>; + assigned-clock-parents = <&topckgen CLK_TOP_AUD1PLL_98M>, + <&topckgen CLK_TOP_AUD2PLL_90M>; + assigned-clock-rates = <0>, <0>, <49152000>, <45158400>; }; -- cgit v1.2.3 From 79d0895140e937ba111e6420b4cd83ee75efa788 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 2 Jan 2018 19:44:37 -0200 Subject: sctp: fix error path in sctp_stream_init syzbot noticed a NULL pointer dereference panic in sctp_stream_free() which was caused by an incomplete error handling in sctp_stream_init(). By not clearing stream->outcnt, it made a for() in sctp_stream_free() think that it had elements to free, but not, leading to the panic. As suggested by Xin Long, this patch also simplifies the error path by moving it to the only if() that uses it. See-also: https://www.spinics.net/lists/netdev/msg473756.html See-also: https://www.spinics.net/lists/netdev/msg465024.html Reported-by: syzbot Fixes: f952be79cebd ("sctp: introduce struct sctp_stream_out_ext") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/stream.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 76ea66be0bbe..524dfeb94c41 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, sctp_stream_outq_migrate(stream, NULL, outcnt); sched->sched_all(stream); - i = sctp_stream_alloc_out(stream, outcnt, gfp); - if (i) - return i; + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) + goto out; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -170,19 +170,17 @@ in: if (!incnt) goto out; - i = sctp_stream_alloc_in(stream, incnt, gfp); - if (i) { - ret = -ENOMEM; - goto free; + ret = sctp_stream_alloc_in(stream, incnt, gfp); + if (ret) { + sched->free(stream); + kfree(stream->out); + stream->out = NULL; + stream->outcnt = 0; + goto out; } stream->incnt = incnt; - goto out; -free: - sched->free(stream); - kfree(stream->out); - stream->out = NULL; out: return ret; } -- cgit v1.2.3 From f1c8d3720f2e6c8c2b209120678236debd0360e5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 2 Jan 2018 14:05:19 -0800 Subject: vxlan: trivial indenting fix. Fix indentation of reserved_flags2 field in vxlanhdr_gpe. Fixes: e1e5314de08b ("vxlan: implement GPE") Signed-off-by: William Tu Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/vxlan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 13223396dc64..f96391e84a8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -146,7 +146,7 @@ struct vxlanhdr_gpe { np_applied:1, instance_applied:1, version:2, -reserved_flags2:2; + reserved_flags2:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved_flags2:2, version:2, -- cgit v1.2.3 From 64e711ca59ef9b7873d77ef06bc174aa01af9115 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 17 Nov 2017 15:51:47 -0800 Subject: i40e: Remove UDP support for big buffer Since UDP based filters are not supported via big buffer cloud filters, remove UDP support. Also change a few return types to indicate unsupported vs invalid configuration. Signed-off-by: Amritha Nambiar Acked-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 321d8be80871..fffd4868defb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6038,8 +6038,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) /* Set Bit 7 to be valid */ mode = I40E_AQ_SET_SWITCH_BIT7_VALID; - /* Set L4type to both TCP and UDP support */ - mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + /* Set L4type for TCP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP; /* Set cloud filter mode */ mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; @@ -6969,18 +6969,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_valid_ether_addr(filter->src_mac)) || (is_multicast_ether_addr(filter->dst_mac) && is_multicast_ether_addr(filter->src_mac))) - return -EINVAL; + return -EOPNOTSUPP; - /* Make sure port is specified, otherwise bail out, for channel - * specific cloud filter needs 'L4 port' to be non-zero + /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP + * ports are not supported via big buffer now. */ - if (!filter->dst_port) - return -EINVAL; + if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) + return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port || filter->src_ipv4 || !ipv6_addr_any(&filter->ip.v6.src_ip6)) - return -EINVAL; + return -EOPNOTSUPP; /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -6991,7 +6991,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) - return -EINVAL; + return -EOPNOTSUPP; /* since we validated that L4 port must be valid before * we get here, start with respective "flags" value -- cgit v1.2.3 From e90f686b4358d7d7e5dbaa48b8e78c9a4e41826e Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 3 Jan 2018 10:39:29 +0800 Subject: net: fec: restore dev_id in the cases of probe error The static variable dev_id always plus one before netdev registerred. It should restore the dev_id value in the cases of probe error. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8184d2fca9be..6a4fc2b35488 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3556,6 +3556,7 @@ failed_phy: of_node_put(phy_node); failed_ioremap: free_netdev(ndev); + dev_id--; return ret; } -- cgit v1.2.3 From 3f38c683033a9a0a2738e7067f449deefabfa3ef Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 3 Jan 2018 10:39:30 +0800 Subject: net: fec: defer probe if regulator is not ready Defer probe if regulator is not ready. E.g. some regulator is fixed regulator controlled by i2c expander gpio, the i2c device may be probed after the driver, then it should handle the case of defer probe error. Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 6a4fc2b35488..19f198e22e15 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3469,6 +3469,10 @@ fec_probe(struct platform_device *pdev) goto failed_regulator; } } else { + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto failed_regulator; + } fep->reg_phy = NULL; } -- cgit v1.2.3 From 248de22e638f10bd5bfc7624a357f940f66ba137 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Dec 2017 10:55:04 -0800 Subject: i40e/i40evf: Account for frags split over multiple descriptors in check linearize The original code for __i40e_chk_linearize didn't take into account the fact that if a fragment is 16K in size or larger it has to be split over 2 descriptors and the smaller of those 2 descriptors will be on the trailing edge of the transmit. As a result we can get into situations where we didn't catch requests that could result in a Tx hang. This patch takes care of that by subtracting the length of all but the trailing edge of the stale fragment before we test for sum. By doing this we can guarantee that we have all cases covered, including the case of a fragment that spans multiple descriptors. We don't need to worry about checking the inner portions of this since 12K is the maximum aligned DMA size and that is larger than any MSS will ever be since the MTU limit for jumbos is something on the order of 9K. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 26 +++++++++++++++++++++++--- drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 26 +++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4566d66ffc7c..5bc2748ac468 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3047,10 +3047,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -3058,7 +3078,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 50864f99446d..1ba29bb85b67 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) /* Walk through fragments adding latest fragment, testing it, and * then removing stale fragments from the sum. */ - stale = &skb_shinfo(skb)->frags[0]; - for (;;) { + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + sum += skb_frag_size(frag++); + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > I40E_MAX_DATA_PER_TXD) { + int align_pad = -(stale->page_offset) & + (I40E_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= I40E_MAX_DATA_PER_TXD_ALIGNED; + stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED; + } while (stale_size > I40E_MAX_DATA_PER_TXD); + } + /* if sum is negative we failed to make sufficient progress */ if (sum < 0) return true; @@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (!nr_frags--) break; - sum -= skb_frag_size(stale++); + sum -= stale_size; } return false; -- cgit v1.2.3 From 458867b2ca0c987445c5d9adccd1642970e1ba07 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 20 Dec 2017 11:04:36 -0500 Subject: i40e: don't remove netdev->dev_addr when syncing uc list In some circumstances, such as with bridging, it is possible that the stack will add a devices own MAC address to its unicast address list. If, later, the stack deletes this address, then the i40e driver will receive a request to remove this address. The driver stores its current MAC address as part of the MAC/VLAN hash array, since it is convenient and matches exactly how the hardware expects to be told which traffic to receive. This causes a problem, since for more devices, the MAC address is stored separately, and requests to delete a unicast address should not have the ability to remove the filter for the MAC address. Fix this by forcing a check on every address sync to ensure we do not remove the device address. There is a very narrow possibility of a race between .set_mac and .set_rx_mode, if we don't change netdev->dev_addr before updating our internal MAC list in .set_mac. This might be possible if .set_rx_mode is going to remove MAC "XYZ" from the list, at the same time as .set_mac changes our dev_addr to MAC "XYZ", we might possibly queue a delete, then an add in .set_mac, then queue a delete in .set_rx_mode's dev_uc_sync and then update netdev->dev_addr. We can avoid this by moving the copy into dev_addr prior to the changes to the MAC filter list. A similar race on the other side does not cause problems, as if we're changing our MAC form A to B, and we race with .set_rx_mode, it could queue a delete from A, we'd update our address, and allow the delete. This seems like a race, but in reality we're about to queue a delete of A anyways, so it would not cause any issues. A race in the initialization code is unlikely because the netdevice has not yet been fully initialized and the stack should not be adding or removing addresses yet. Note that we don't (yet) need similar code for the VF driver because it does not make use of __dev_uc_sync and __dev_mc_sync, but instead roles its own method for handling updates to the MAC/VLAN list, which already has code to protect against removal of the hardware address. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index fffd4868defb..9e4b78e447f8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p) else netdev_info(netdev, "set new mac address %pM\n", addr->sa_data); + /* Copy the address first, so that we avoid a possible race with + * .set_rx_mode(). If we copy after changing the address in the filter + * list, we might open ourselves to a narrow race window where + * .set_rx_mode could delete our dev_addr filter and prevent traffic + * from passing. + */ + ether_addr_copy(netdev->dev_addr, addr->sa_data); + spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); i40e_add_mac_filter(vsi, addr->sa_data); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, addr->sa_data); if (vsi->type == I40E_VSI_MAIN) { i40e_status ret; @@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + i40e_del_mac_filter(vsi, addr); return 0; -- cgit v1.2.3 From 2ca69d73bc05a55edb95689d436ce87974a3162e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 22 Dec 2017 05:29:03 +0000 Subject: ASoC: rcar: tidyup simple-card example for CPU node commit a5702e1cb3c ("ASoC: rsnd: Drop unit-addresses without reg properties") modifies simple-card multi CPU nodes. But, naming of "cpu-x" breaks probing. Let's add reg = ; instead of renaming node. Reported-by: Hiroyuki Yokoyama CC: Geert Uytterhoeven Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/renesas,rsnd.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt index b3c28bdcc268..5bed9a595772 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt +++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt @@ -197,12 +197,17 @@ Ex) [MEM] -> [SRC2] -> [CTU03] -+ sound { + #address-cells = <1>; + #size-cells = <0>; + compatible = "simple-scu-audio-card"; ... - simple-audio-card,cpu-0 { + simple-audio-card,cpu@0 { + reg = <0>; sound-dai = <&rcar_sound 0>; }; - simple-audio-card,cpu-1 { + simple-audio-card,cpu@1 { + reg = <1>; sound-dai = <&rcar_sound 1>; }; simple-audio-card,codec { -- cgit v1.2.3 From bc4244c6e33f96b48c4986ce4653df4673c6a08e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 22 Dec 2017 12:45:16 +0100 Subject: i40e: flower: Fix return value for unsupported offload When filter configuration is not supported, drivers should return -EOPNOTSUPP so the core can react correctly. Fixes: 2f4b411a3d67 ("i40e: Enable cloud filters via tc-flower") Signed-off-by: Jiri Pirko Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9e4b78e447f8..42dcaefc4c19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7371,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, if (tc < 0) { dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); - return -EINVAL; + return -EOPNOTSUPP; } if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || -- cgit v1.2.3 From 15962a18284552b5ec58982ff60a5e92e0c5c92b Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Wed, 3 Jan 2018 11:44:07 +0530 Subject: cxgb4: Fix FW flash errors commit 96ac18f14a5a ("cxgb4: Add support for new flash parts") removed initialization of adapter->params.sf_fw_start causing issues while flashing firmware to card. We no longer need sf_fw_start in adapter->params as we already have macros defined for FW flash addresses. Fixes: 96ac18f14a5a ("cxgb4: Add support for new flash parts") Signed-off-by: Arjun Vynipadath Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 - drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 17 ++++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 6f9fa6e3c42a..d8424ed16c33 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -344,7 +344,6 @@ struct adapter_params { unsigned int sf_size; /* serial flash size in bytes */ unsigned int sf_nsec; /* # of flash sectors */ - unsigned int sf_fw_start; /* start of FW image in flash */ unsigned int fw_vers; /* firmware version */ unsigned int bs_vers; /* bootstrap version */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index f63210f15579..375ef86a84da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2844,8 +2844,6 @@ enum { SF_RD_DATA_FAST = 0xb, /* read flash */ SF_RD_ID = 0x9f, /* read ID */ SF_ERASE_SECTOR = 0xd8, /* erase sector */ - - FW_MAX_SIZE = 16 * SF_SEC_SIZE, }; /** @@ -3558,8 +3556,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) const __be32 *p = (const __be32 *)fw_data; const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; - unsigned int fw_img_start = adap->params.sf_fw_start; - unsigned int fw_start_sec = fw_img_start / sf_sec_size; + unsigned int fw_start_sec = FLASH_FW_START_SEC; + unsigned int fw_size = FLASH_FW_MAX_SIZE; + unsigned int fw_start = FLASH_FW_START; if (!size) { dev_err(adap->pdev_dev, "FW image has no data\n"); @@ -3575,9 +3574,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) "FW image size differs from size in FW header\n"); return -EINVAL; } - if (size > FW_MAX_SIZE) { + if (size > fw_size) { dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n", - FW_MAX_SIZE); + fw_size); return -EFBIG; } if (!t4_fw_matches_chip(adap, hdr)) @@ -3604,11 +3603,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); if (ret) goto out; - addr = fw_img_start; + addr = fw_start; for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; @@ -3618,7 +3617,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) } ret = t4_write_flash(adap, - fw_img_start + offsetof(struct fw_hdr, fw_ver), + fw_start + offsetof(struct fw_hdr, fw_ver), sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); out: if (ret) -- cgit v1.2.3 From 7853b49ce8e0ef6364d24512b287463841d71bd3 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 3 Jan 2018 06:17:29 +0000 Subject: net: ena: unmask MSI-X only after device initialization is completed Under certain conditions MSI-X interrupt might arrive right after it was unmasked in ena_up(). There is a chance it would be processed by the driver before device ENA_FLAG_DEV_UP flag is set. In such a case the interrupt is ignored. ENA device operates in auto-masked mode, therefore ignoring interrupt leaves it masked for good. Moving unmask of interrupt to be the last step in ena_up(). Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 97c5a89a9cf7..6fb28fd43eb3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1565,7 +1565,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) static int ena_up_complete(struct ena_adapter *adapter) { - int rc, i; + int rc; rc = ena_rss_configure(adapter); if (rc) @@ -1584,17 +1584,6 @@ static int ena_up_complete(struct ena_adapter *adapter) ena_napi_enable_all(adapter); - /* Enable completion queues interrupt */ - for (i = 0; i < adapter->num_queues; i++) - ena_unmask_interrupt(&adapter->tx_ring[i], - &adapter->rx_ring[i]); - - /* schedule napi in case we had pending packets - * from the last time we disable napi - */ - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); - return 0; } @@ -1731,7 +1720,7 @@ create_err: static int ena_up(struct ena_adapter *adapter) { - int rc; + int rc, i; netdev_dbg(adapter->netdev, "%s\n", __func__); @@ -1774,6 +1763,17 @@ static int ena_up(struct ena_adapter *adapter) set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + /* Enable completion queues interrupt */ + for (i = 0; i < adapter->num_queues; i++) + ena_unmask_interrupt(&adapter->tx_ring[i], + &adapter->rx_ring[i]); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + return rc; err_up: -- cgit v1.2.3 From ee4552aaf3fef5345199b8a82e40be7245b289fb Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 3 Jan 2018 06:17:30 +0000 Subject: net: ena: fix error handling in ena_down() sequence ENA admin command queue errors are not handled as part of ena_down(). As a result, in case of error admin queue transitions to non-running state and aborts all subsequent commands including those coming from ena_up(). Reset scheduled by the driver from the timer service context would not proceed due to sharing rtnl with ena_up()/ena_down() Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6fb28fd43eb3..fbe21a817bd8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq; MODULE_DEVICE_TABLE(pci, ena_pci_tbl); static int ena_rss_init_default(struct ena_adapter *adapter); +static void check_for_admin_com_state(struct ena_adapter *adapter); +static void ena_destroy_device(struct ena_adapter *adapter); +static int ena_restore_device(struct ena_adapter *adapter); static void ena_tx_timeout(struct net_device *dev) { @@ -1884,6 +1887,17 @@ static int ena_close(struct net_device *netdev) if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) ena_down(adapter); + /* Check for device status and issue reset if needed*/ + check_for_admin_com_state(adapter); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, ifdown, adapter->netdev, + "Destroy failure, restarting device\n"); + ena_dump_stats_to_dmesg(adapter); + /* rtnl lock already obtained in dev_ioctl() layer */ + ena_destroy_device(adapter); + ena_restore_device(adapter); + } + return 0; } @@ -2544,11 +2558,12 @@ static void ena_destroy_device(struct ena_adapter *adapter) ena_com_set_admin_running_state(ena_dev, false); - ena_close(netdev); + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); /* Before releasing the ENA resources, a device reset is required. * (to prevent the device from accessing them). - * In case the reset flag is set and the device is up, ena_close + * In case the reset flag is set and the device is up, ena_down() * already perform the reset, so it can be skipped. */ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) -- cgit v1.2.3 From 89434c3c35081439627baa2225622d5bd12242fe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 3 Jan 2018 18:11:14 +0100 Subject: spi: sh-msiof: Fix timeout failures for TX-only DMA transfers When using RX (with or without TX), the DMA interrupt triggers completion when the RX FIFO has been emptied, i.e. after the full transfer has finished. However, when using TX without RX, the DMA interrupt triggers completion as soon as the DMA engine has filled the TX FIFO, i.e. before the full transfer has finished. Then sh_msiof_modify_ctr_wait() will spin until the transfer has really finished and the TFSE bit is cleared, for at most 1 ms. For slow speeds and/or large transfers, this may cause timeouts and transfer failures: spi_sh_msiof e6e10000.spi: failed to shut down hardware 74x164 spi2.0: SPI transfer failed: -110 spi_master spi2: failed to transfer one message from queue 74x164 spi2.0: Failed writing: -110 Fix this by waiting explicitly until the TX FIFO has been emptied. Based on a patch in the BSP by Hiromitsu Yamasaki. Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index fcd261f98b9f..06bc4b170c47 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -784,11 +784,21 @@ static int sh_msiof_dma_once(struct sh_msiof_spi_priv *p, const void *tx, goto stop_dma; } - /* wait for tx fifo to be emptied / rx fifo to be filled */ + /* wait for tx/rx DMA completion */ ret = sh_msiof_wait_for_completion(p); if (ret) goto stop_reset; + if (!rx) { + reinit_completion(&p->done); + sh_msiof_write(p, IER, IER_TEOFE); + + /* wait for tx fifo to be emptied */ + ret = sh_msiof_wait_for_completion(p); + if (ret) + goto stop_reset; + } + /* clear status bits */ sh_msiof_reset_str(p); -- cgit v1.2.3 From e816c201aed5232171f8eb80b5d46ae6516683b9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Jan 2018 15:21:33 -0800 Subject: exec: Weaken dumpability for secureexec This is a logical revert of commit e37fdb785a5f ("exec: Use secureexec for setting dumpability") This weakens dumpability back to checking only for uid/gid changes in current (which is useless), but userspace depends on dumpability not being tied to secureexec. https://bugzilla.redhat.com/show_bug.cgi?id=1528633 Reported-by: Tom Horsley Fixes: e37fdb785a5f ("exec: Use secureexec for setting dumpability") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Linus Torvalds --- fs/exec.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 5688b5e1b937..7eb8d21bcab9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1349,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm) current->sas_ss_sp = current->sas_ss_size = 0; - /* Figure out dumpability. */ + /* + * Figure out dumpability. Note that this checking only of current + * is wrong, but userspace depends on it. This should be testing + * bprm->secureexec instead. + */ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || - bprm->secureexec) + !(uid_eq(current_euid(), current_uid()) && + gid_eq(current_egid(), current_gid()))) set_dumpable(current->mm, suid_dumpable); else set_dumpable(current->mm, SUID_DUMP_USER); -- cgit v1.2.3 From ee4aa8df70fa6d76bd776c025dc0d8d746c18317 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 3 Jan 2018 13:09:23 -0500 Subject: 3c59x: fix missing dma_mapping_error check and bad ring refill logic A few spots in 3c59x missed calls to dma_mapping_error checks, casuing WARN_ONS to trigger. Clean those up. While we're at it, refactor the refill code a bit so that if skb allocation or dma mapping fails, we recycle the existing buffer. This prevents holes in the rx ring, and makes for much simpler logic Note: This is compile only tested. Ted, if you could run this and confirm that it continues to work properly, I would appreciate it, as I currently don't have access to this hardware Signed-off-by: Neil Horman CC: Steffen Klassert CC: "David S. Miller" Reported-by: tedheadster@gmail.com Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 90 +++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index f4e13a7014bd..36c8950dbd2d 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -602,7 +602,7 @@ struct vortex_private { struct sk_buff* rx_skbuff[RX_RING_SIZE]; struct sk_buff* tx_skbuff[TX_RING_SIZE]; unsigned int cur_rx, cur_tx; /* The next free ring entry */ - unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */ + unsigned int dirty_tx; /* The ring entries to be free()ed. */ struct vortex_extra_stats xstats; /* NIC-specific extra stats */ struct sk_buff *tx_skb; /* Packet being eaten by bus master ctrl. */ dma_addr_t tx_skb_dma; /* Allocated DMA address for bus master ctrl DMA. */ @@ -618,7 +618,6 @@ struct vortex_private { /* The remainder are related to chip state, mostly media selection. */ struct timer_list timer; /* Media selection timer. */ - struct timer_list rx_oom_timer; /* Rx skb allocation retry timer */ int options; /* User-settable misc. driver options. */ unsigned int media_override:4, /* Passed-in media type. */ default_media:4, /* Read from the EEPROM/Wn3_Config. */ @@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); static void vortex_timer(struct timer_list *t); -static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev) timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev) window_write16(vp, 0x0040, 4, Wn4_NetDiag); if (vp->full_bus_master_rx) { /* Boomerang bus master. */ - vp->cur_rx = vp->dirty_rx = 0; + vp->cur_rx = 0; /* Initialize the RxEarly register as recommended. */ iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD); iowrite32(0x0020, ioaddr + PktStatus); @@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev) struct vortex_private *vp = netdev_priv(dev); int i; int retval; + dma_addr_t dma; /* Use the now-standard shared IRQ implementation. */ if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ? @@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); + dma = pci_map_single(VORTEX_PCI(vp), skb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + break; + vp->rx_ring[i].addr = cpu_to_le32(dma); } if (i != RX_RING_SIZE) { pr_emerg("%s: no memory for rx ring\n", dev->name); @@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) int len = (skb->len + 3) & ~3; vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + spin_lock_irq(&vp->window_lock); window_set(vp, 7); iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr); @@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev) int entry = vp->cur_rx % RX_RING_SIZE; void __iomem *ioaddr = vp->ioaddr; int rx_status; - int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx; + int rx_work_limit = RX_RING_SIZE; if (vortex_debug > 5) pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS)); @@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev) } else { /* The packet length: up to 4.5K!. */ int pkt_len = rx_status & 0x1fff; - struct sk_buff *skb; + struct sk_buff *skb, *newskb; + dma_addr_t newdma; dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr); if (vortex_debug > 4) @@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev) pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_copy++; } else { + /* Pre-allocate the replacement skb. If it or its + * mapping fails then recycle the buffer thats already + * in place + */ + newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); + if (!newskb) { + dev->stats.rx_dropped++; + goto clear_complete; + } + newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, + PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + dev->stats.rx_dropped++; + consume_skb(newskb); + goto clear_complete; + } + /* Pass up the skbuff already on the Rx ring. */ skb = vp->rx_skbuff[entry]; - vp->rx_skbuff[entry] = NULL; + vp->rx_skbuff[entry] = newskb; + vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); vp->rx_nocopy++; @@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev) netif_rx(skb); dev->stats.rx_packets++; } - entry = (++vp->cur_rx) % RX_RING_SIZE; - } - /* Refill the Rx ring buffers. */ - for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) { - struct sk_buff *skb; - entry = vp->dirty_rx % RX_RING_SIZE; - if (vp->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ); - if (skb == NULL) { - static unsigned long last_jif; - if (time_after(jiffies, last_jif + 10 * HZ)) { - pr_warn("%s: memory shortage\n", - dev->name); - last_jif = jiffies; - } - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) - mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1)); - break; /* Bad news! */ - } - vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); - vp->rx_skbuff[entry] = skb; - } +clear_complete: vp->rx_ring[entry].status = 0; /* Clear complete bit. */ iowrite16(UpUnstall, ioaddr + EL3_CMD); + entry = (++vp->cur_rx) % RX_RING_SIZE; } return 0; } -/* - * If we've hit a total OOM refilling the Rx ring we poll once a second - * for some memory. Otherwise there is no way to restart the rx process. - */ -static void -rx_oom_timer(struct timer_list *t) -{ - struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); - struct net_device *dev = vp->mii.dev; - - spin_lock_irq(&vp->lock); - if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ - boomerang_rx(dev); - if (vortex_debug > 1) { - pr_debug("%s: rx_oom_timer %s\n", dev->name, - ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying"); - } - spin_unlock_irq(&vp->lock); -} - static void vortex_down(struct net_device *dev, int final_down) { @@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down) netdev_reset_queue(dev); netif_stop_queue(dev); - del_timer_sync(&vp->rx_oom_timer); del_timer_sync(&vp->timer); /* Turn off statistics ASAP. We update dev->stats below. */ -- cgit v1.2.3 From d7732ba55c4b6a2da339bb12589c515830cfac2c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 3 Jan 2018 19:52:04 +0100 Subject: x86/pti: Switch to kernel CR3 at early in entry_SYSCALL_compat() The preparation for PTI which added CR3 switching to the entry code misplaced the CR3 switch in entry_SYSCALL_compat(). With PTI enabled the entry code tries to access a per cpu variable after switching to kernel GS. This fails because that variable is not mapped to user space. This results in a double fault and in the worst case a kernel crash. Move the switch ahead of the access and clobber RSP which has been saved already. Fixes: 8a09317b895f ("x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching") Reported-by: Lars Wendler Reported-by: Laura Abbott Signed-off-by: Thomas Gleixner Cc: Borislav Betkov Cc: Andy Lutomirski , Cc: Dave Hansen , Cc: Peter Zijlstra , Cc: Greg KH , , Cc: Boris Ostrovsky , Cc: Juergen Gross Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801031949200.1957@nanos --- arch/x86/entry/entry_64_compat.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 40f17009ec20..98d5358e4041 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -190,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs - /* Stash user ESP and switch to the kernel stack. */ + /* Stash user ESP */ movl %esp, %r8d + + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ @@ -219,12 +224,6 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq $0 /* pt_regs->r14 = 0 */ pushq $0 /* pt_regs->r15 = 0 */ - /* - * We just saved %rdi so it is safe to clobber. It is not - * preserved during the C calls inside TRACE_IRQS_OFF anyway. - */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. -- cgit v1.2.3 From 2fd9c41aea47f4ad071accf94b94f94f2c4d31eb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 3 Jan 2018 12:39:52 -0800 Subject: x86/process: Define cpu_tss_rw in same section as declaration cpu_tss_rw is declared with DECLARE_PER_CPU_PAGE_ALIGNED but then defined with DEFINE_PER_CPU_SHARED_ALIGNED leading to section mismatch warnings. Use DEFINE_PER_CPU_PAGE_ALIGNED consistently. This is necessary because it's mapped to the cpu entry area and must be page aligned. [ tglx: Massaged changelog a bit ] Fixes: 1a935bc3d4ea ("x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct") Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: thomas.lendacky@amd.com Cc: Borislav Petkov Cc: tklauser@distanz.ch Cc: minipli@googlemail.com Cc: me@kylehuey.com Cc: namit@vmware.com Cc: luto@kernel.org Cc: jpoimboe@redhat.com Cc: tj@kernel.org Cc: cl@linux.com Cc: bp@suse.de Cc: thgarnie@google.com Cc: kirill.shutemov@linux.intel.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180103203954.183360-1-ndesaulniers@google.com --- arch/x86/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 517415978409..3cb2486c47e4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower -- cgit v1.2.3 From ce9caf2f79a5aa170a4b6456a03db639eed9c988 Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Fri, 29 Dec 2017 17:05:43 +0100 Subject: drm/vc4: Move IRQ enable to PM path We were calling enable_irq on bind, where it was already enabled previously by the IRQ helper. Additionally, dev->irq is not set correctly until after postinstall and so was always zero here, triggering a warning in 4.15. Fix both by moving the enable to the power management resume path, where we know there was a previous disable invocation during suspend. Fixes: 253696ccd613 ("drm/vc4: Account for interrupts in flight") Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1514563543-32511-1-git-send-email-stschake@gmail.com Tested-by: Stefan Wahren Reviewed-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_irq.c | 3 --- drivers/gpu/drm/vc4/vc4_v3d.c | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 26eddbb62893..3dd62d75f531 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -209,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - /* Undo the effects of a previous vc4_irq_uninstall. */ - enable_irq(dev->irq); - /* Enable both the render done and out of memory interrupts. */ V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 622cd43840b8..493f392b3a0a 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev) return ret; vc4_v3d_init_hw(vc4->dev); + + /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */ + enable_irq(vc4->dev->irq); vc4_irq_postinstall(vc4->dev); return 0; -- cgit v1.2.3 From bec40c26041de61162f7be9d2ce548c756ce0f65 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Jan 2018 13:39:15 -0800 Subject: IB/srpt: Disable RDMA access by the initiator With the SRP protocol all RDMA operations are initiated by the target. Since no RDMA operations are initiated by the initiator, do not grant the initiator permission to submit RDMA reads or writes to the target. Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8a1bd354b1cc..7c4249038004 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) return -ENOMEM; attr->qp_state = IB_QPS_INIT; - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; attr->port_num = ch->sport->port; attr->pkey_index = 0; -- cgit v1.2.3 From a1ffa4670cb97ae3a4b3e8535d88be5f643f7c3b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 3 Jan 2018 13:39:16 -0800 Subject: IB/srpt: Fix ACL lookup during login Make sure that the initiator port GUID is stored in ch->ini_guid. Note: when initiating a connection sgid and dgid members in struct sa_path_rec represent the source and destination GIDs. When accepting a connection however sgid represents the destination GID and dgid the source GID. Fixes: commit 2bce1a6d2209 ("IB/srpt: Accept GUIDs as port names") Signed-off-by: Bart Van Assche Cc: Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 7c4249038004..bfa576aa9f03 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2077,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto destroy_ib; } - guid = (__be16 *)¶m->primary_path->sgid.global.interface_id; + guid = (__be16 *)¶m->primary_path->dgid.global.interface_id; snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x", be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); -- cgit v1.2.3 From 121d760d0788f95619049c63449d977065cab69d Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Fri, 29 Dec 2017 02:50:08 +0800 Subject: drm/i915/gvt: Clear the shadow page table entry after post-sync A shadow page table entry needs to be cleared after being set as post-sync. This patch fixes the recent error reported in Win7-32 test. Fixes: 2707e4446688 ("drm/i915/gvt: vGPU graphics memory virtualization") Signed-off-by: Zhi Wang CC: Stable Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 8e331142badb..64d67ff9bf08 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, return ret; } else { if (!test_bit(index, spt->post_shadow_bitmap)) { + int type = spt->shadow_page.type; + ppgtt_get_shadow_entry(spt, &se, index); ret = ppgtt_handle_guest_entry_removal(gpt, &se, index); if (ret) return ret; + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn); + ppgtt_set_shadow_entry(spt, &se, index); } - ppgtt_set_post_shadow(spt, index); } -- cgit v1.2.3 From 36464580e658019ac7be26a08c4679bee0454d2c Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 11 Dec 2017 21:39:10 +0100 Subject: dt-bindings/bcm2836-l1-intc: Add interrupt polarity support This increases the interrupt cells for the 1st level interrupt controller binding in order to describe the polarity like on the other ARM platforms. Reviewed-by: Rob Herring Signed-off-by: Stefan Wahren Signed-off-by: Marc Zyngier --- .../devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt index f320dcd6e69b..8ced1696c325 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2836-l1-intc.txt @@ -12,7 +12,7 @@ Required properties: registers - interrupt-controller: Identifies the node as an interrupt controller - #interrupt-cells: Specifies the number of cells needed to encode an - interrupt source. The value shall be 1 + interrupt source. The value shall be 2 Please refer to interrupts.txt in this directory for details of the common Interrupt Controllers bindings used by client devices. @@ -32,6 +32,6 @@ local_intc: local_intc { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-parent = <&local_intc>; }; -- cgit v1.2.3 From ad83c7cb2f37fad01f53a8748c6f8067acb2968d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 11 Dec 2017 21:39:11 +0100 Subject: irqchip/irq-bcm2836: Add support for DT interrupt polarity In order to properly define the polarity of the per-cpu interrupts, we need to support for a second property cell. But this must be optional to keep backward compatibility with old DT blobs. Suggested-by: Marc Zyngier Signed-off-by: Stefan Wahren Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-bcm2836.c | 46 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 667b9e14b032..dfe4a460340b 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -98,13 +98,35 @@ static struct irq_chip bcm2836_arm_irqchip_gpu = { .irq_unmask = bcm2836_arm_irqchip_unmask_gpu_irq, }; -static void bcm2836_arm_irqchip_register_irq(int hwirq, struct irq_chip *chip) -{ - int irq = irq_create_mapping(intc.domain, hwirq); +static int bcm2836_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + struct irq_chip *chip; + + switch (hw) { + case LOCAL_IRQ_CNTPSIRQ: + case LOCAL_IRQ_CNTPNSIRQ: + case LOCAL_IRQ_CNTHPIRQ: + case LOCAL_IRQ_CNTVIRQ: + chip = &bcm2836_arm_irqchip_timer; + break; + case LOCAL_IRQ_GPU_FAST: + chip = &bcm2836_arm_irqchip_gpu; + break; + case LOCAL_IRQ_PMU_FAST: + chip = &bcm2836_arm_irqchip_pmu; + break; + default: + pr_warn_once("Unexpected hw irq: %lu\n", hw); + return -EINVAL; + } irq_set_percpu_devid(irq); - irq_set_chip_and_handler(irq, chip, handle_percpu_devid_irq); + irq_domain_set_info(d, irq, hw, chip, d->host_data, + handle_percpu_devid_irq, NULL, NULL); irq_set_status_flags(irq, IRQ_NOAUTOEN); + + return 0; } static void @@ -165,7 +187,8 @@ static int bcm2836_cpu_dying(unsigned int cpu) #endif static const struct irq_domain_ops bcm2836_arm_irqchip_intc_ops = { - .xlate = irq_domain_xlate_onecell + .xlate = irq_domain_xlate_onetwocell, + .map = bcm2836_map, }; static void @@ -218,19 +241,6 @@ static int __init bcm2836_arm_irqchip_l1_intc_of_init(struct device_node *node, if (!intc.domain) panic("%pOF: unable to create IRQ domain\n", node); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTPSIRQ, - &bcm2836_arm_irqchip_timer); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTPNSIRQ, - &bcm2836_arm_irqchip_timer); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTHPIRQ, - &bcm2836_arm_irqchip_timer); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_CNTVIRQ, - &bcm2836_arm_irqchip_timer); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_GPU_FAST, - &bcm2836_arm_irqchip_gpu); - bcm2836_arm_irqchip_register_irq(LOCAL_IRQ_PMU_FAST, - &bcm2836_arm_irqchip_pmu); - bcm2836_arm_irqchip_smp_init(); set_handle_irq(bcm2836_arm_irqchip_handle_irq); -- cgit v1.2.3 From b12f5d0ffcdcd1dc9c732a5be72afdc6a7d627cf Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 11 Dec 2017 21:39:12 +0100 Subject: dt-bindings/bcm283x: Define polarity of per-cpu interrupts This patch define the polarity of the per-cpu interrupts on BCM2836 and BCM2837 in order to avoid the warnings from ARM arch timer code: arch_timer: WARNING: Invalid trigger for IRQ19, assuming level low arch_timer: WARNING: Please fix your firmware arch_timer: cp15 timer(s) running at 19.20MHz (virt). Signed-off-by: Stefan Wahren Signed-off-by: Marc Zyngier --- arch/arm/boot/dts/bcm2836.dtsi | 14 +++++++------- arch/arm/boot/dts/bcm2837.dtsi | 12 ++++++------ arch/arm/boot/dts/bcm283x.dtsi | 1 + 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index 61e158003509..1dfd76442777 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -13,24 +13,24 @@ compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-parent = <&local_intc>; }; arm-pmu { compatible = "arm,cortex-a7-pmu"; interrupt-parent = <&local_intc>; - interrupts = <9>; + interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; }; }; timer { compatible = "arm,armv7-timer"; interrupt-parent = <&local_intc>; - interrupts = <0>, // PHYS_SECURE_PPI - <1>, // PHYS_NONSECURE_PPI - <3>, // VIRT_PPI - <2>; // HYP_PPI + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>, // PHYS_SECURE_PPI + <1 IRQ_TYPE_LEVEL_HIGH>, // PHYS_NONSECURE_PPI + <3 IRQ_TYPE_LEVEL_HIGH>, // VIRT_PPI + <2 IRQ_TYPE_LEVEL_HIGH>; // HYP_PPI always-on; }; @@ -76,7 +76,7 @@ compatible = "brcm,bcm2836-armctrl-ic"; reg = <0x7e00b200 0x200>; interrupt-parent = <&local_intc>; - interrupts = <8>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; }; &cpu_thermal { diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index bc1cca5cf43c..efa7d3387ab2 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -12,7 +12,7 @@ compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-parent = <&local_intc>; }; }; @@ -20,10 +20,10 @@ timer { compatible = "arm,armv7-timer"; interrupt-parent = <&local_intc>; - interrupts = <0>, // PHYS_SECURE_PPI - <1>, // PHYS_NONSECURE_PPI - <3>, // VIRT_PPI - <2>; // HYP_PPI + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>, // PHYS_SECURE_PPI + <1 IRQ_TYPE_LEVEL_HIGH>, // PHYS_NONSECURE_PPI + <3 IRQ_TYPE_LEVEL_HIGH>, // VIRT_PPI + <2 IRQ_TYPE_LEVEL_HIGH>; // HYP_PPI always-on; }; @@ -73,7 +73,7 @@ compatible = "brcm,bcm2836-armctrl-ic"; reg = <0x7e00b200 0x200>; interrupt-parent = <&local_intc>; - interrupts = <8>; + interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; }; &cpu_thermal { diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index dcde93c85c2d..18db25a5a66e 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -2,6 +2,7 @@ #include #include #include +#include /* firmware-provided startup stubs live here, where the secondary CPUs are * spinning. -- cgit v1.2.3 From 404e6bea10662f0e142748353169d25378271e49 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Jan 2018 11:47:19 +0000 Subject: irqchip/ompic: fix return value check in ompic_of_init() In case of error, the function ioremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 9b54470afd83 ("irqchip: add initial support for ompic") Acked-by: Stafford Horne Signed-off-by: Wei Yongjun Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-ompic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-ompic.c b/drivers/irqchip/irq-ompic.c index cf6d0c455518..e66ef4373b1e 100644 --- a/drivers/irqchip/irq-ompic.c +++ b/drivers/irqchip/irq-ompic.c @@ -171,9 +171,9 @@ static int __init ompic_of_init(struct device_node *node, /* Setup the device */ ompic_base = ioremap(res.start, resource_size(&res)); - if (IS_ERR(ompic_base)) { + if (!ompic_base) { pr_err("ompic: unable to map registers"); - return PTR_ERR(ompic_base); + return -ENOMEM; } irq = irq_of_parse_and_map(node, 0); -- cgit v1.2.3 From c2ba80af4805543ace4928191d877ffe706087e1 Mon Sep 17 00:00:00 2001 From: Miodrag Dinic Date: Fri, 29 Dec 2017 16:41:45 +0100 Subject: dt-bindings/goldfish-pic: Add device tree binding for Goldfish PIC driver Add documentation for DT binding of Goldfish PIC driver. The compatible string used by OS for binding the driver is "google,goldfish-pic". Acked-by: Rob Herring Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Signed-off-by: Marc Zyngier --- .../interrupt-controller/google,goldfish-pic.txt | 30 ++++++++++++++++++++++ MAINTAINERS | 5 ++++ 2 files changed, 35 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt diff --git a/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt new file mode 100644 index 000000000000..35f752706e7d --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt @@ -0,0 +1,30 @@ +Android Goldfish PIC + +Android Goldfish programmable interrupt device used by Android +emulator. + +Required properties: + +- compatible : should contain "google,goldfish-pic" +- reg : +- interrupts : + +Example for mips when used in cascade mode: + + cpuintc { + #interrupt-cells = <0x1>; + #address-cells = <0>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + interrupt-controller@1f000000 { + compatible = "google,goldfish-pic"; + reg = <0x1f000000 0x1000>; + + interrupt-controller; + #interrupt-cells = <0x1>; + + interrupt-parent = <&cpuintc>; + interrupts = <0x2>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index b46c9cea5ae5..fe9a60ac81d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -867,6 +867,11 @@ S: Supported F: drivers/android/ F: drivers/staging/android/ +ANDROID GOLDFISH PIC DRIVER +M: Miodrag Dinic +S: Supported +F: Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt + ANDROID GOLDFISH RTC DRIVER M: Miodrag Dinic S: Supported -- cgit v1.2.3 From 4235ff50cf98dd42ba15175687570f9f03e124a1 Mon Sep 17 00:00:00 2001 From: Miodrag Dinic Date: Fri, 29 Dec 2017 16:41:46 +0100 Subject: irqchip/irq-goldfish-pic: Add Goldfish PIC driver Add device driver for a virtual programmable interrupt controller The virtual PIC is designed as a device tree-based interrupt controller. The compatible string used by OS for binding the driver is "google,goldfish-pic". Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Signed-off-by: Marc Zyngier --- MAINTAINERS | 1 + drivers/irqchip/Kconfig | 8 +++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-goldfish-pic.c | 139 +++++++++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+) create mode 100644 drivers/irqchip/irq-goldfish-pic.c diff --git a/MAINTAINERS b/MAINTAINERS index fe9a60ac81d8..f3b9b8775e41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -871,6 +871,7 @@ ANDROID GOLDFISH PIC DRIVER M: Miodrag Dinic S: Supported F: Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.txt +F: drivers/irqchip/irq-goldfish-pic.c ANDROID GOLDFISH RTC DRIVER M: Miodrag Dinic diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index c70476b34a53..d913aec85109 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -343,4 +343,12 @@ config MESON_IRQ_GPIO help Support Meson SoC Family GPIO Interrupt Multiplexer +config GOLDFISH_PIC + bool "Goldfish programmable interrupt controller" + depends on MIPS && (GOLDFISH || COMPILE_TEST) + select IRQ_DOMAIN + help + Say yes here to enable Goldfish interrupt controller driver used + for Goldfish based virtual platforms. + endmenu diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index d2df34a54d38..d27e3e3619e0 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -84,3 +84,4 @@ obj-$(CONFIG_QCOM_IRQ_COMBINER) += qcom-irq-combiner.o obj-$(CONFIG_IRQ_UNIPHIER_AIDET) += irq-uniphier-aidet.o obj-$(CONFIG_ARCH_SYNQUACER) += irq-sni-exiu.o obj-$(CONFIG_MESON_IRQ_GPIO) += irq-meson-gpio.o +obj-$(CONFIG_GOLDFISH_PIC) += irq-goldfish-pic.o diff --git a/drivers/irqchip/irq-goldfish-pic.c b/drivers/irqchip/irq-goldfish-pic.c new file mode 100644 index 000000000000..2a92f03c73e4 --- /dev/null +++ b/drivers/irqchip/irq-goldfish-pic.c @@ -0,0 +1,139 @@ +/* + * Driver for MIPS Goldfish Programmable Interrupt Controller. + * + * Author: Miodrag Dinic + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#define GFPIC_NR_IRQS 32 + +/* 8..39 Cascaded Goldfish PIC interrupts */ +#define GFPIC_IRQ_BASE 8 + +#define GFPIC_REG_IRQ_PENDING 0x04 +#define GFPIC_REG_IRQ_DISABLE_ALL 0x08 +#define GFPIC_REG_IRQ_DISABLE 0x0c +#define GFPIC_REG_IRQ_ENABLE 0x10 + +struct goldfish_pic_data { + void __iomem *base; + struct irq_domain *irq_domain; +}; + +static void goldfish_pic_cascade(struct irq_desc *desc) +{ + struct goldfish_pic_data *gfpic = irq_desc_get_handler_data(desc); + struct irq_chip *host_chip = irq_desc_get_chip(desc); + u32 pending, hwirq, virq; + + chained_irq_enter(host_chip, desc); + + pending = readl(gfpic->base + GFPIC_REG_IRQ_PENDING); + while (pending) { + hwirq = __fls(pending); + virq = irq_linear_revmap(gfpic->irq_domain, hwirq); + generic_handle_irq(virq); + pending &= ~(1 << hwirq); + } + + chained_irq_exit(host_chip, desc); +} + +static const struct irq_domain_ops goldfish_irq_domain_ops = { + .xlate = irq_domain_xlate_onecell, +}; + +static int __init goldfish_pic_of_init(struct device_node *of_node, + struct device_node *parent) +{ + struct goldfish_pic_data *gfpic; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; + unsigned int parent_irq; + int ret = 0; + + gfpic = kzalloc(sizeof(*gfpic), GFP_KERNEL); + if (!gfpic) { + ret = -ENOMEM; + goto out_err; + } + + parent_irq = irq_of_parse_and_map(of_node, 0); + if (!parent_irq) { + pr_err("Failed to map parent IRQ!\n"); + ret = -EINVAL; + goto out_free; + } + + gfpic->base = of_iomap(of_node, 0); + if (!gfpic->base) { + pr_err("Failed to map base address!\n"); + ret = -ENOMEM; + goto out_unmap_irq; + } + + /* Mask interrupts. */ + writel(1, gfpic->base + GFPIC_REG_IRQ_DISABLE_ALL); + + gc = irq_alloc_generic_chip("GFPIC", 1, GFPIC_IRQ_BASE, gfpic->base, + handle_level_irq); + if (!gc) { + pr_err("Failed to allocate chip structures!\n"); + ret = -ENOMEM; + goto out_iounmap; + } + + ct = gc->chip_types; + ct->regs.enable = GFPIC_REG_IRQ_ENABLE; + ct->regs.disable = GFPIC_REG_IRQ_DISABLE; + ct->chip.irq_unmask = irq_gc_unmask_enable_reg; + ct->chip.irq_mask = irq_gc_mask_disable_reg; + + irq_setup_generic_chip(gc, IRQ_MSK(GFPIC_NR_IRQS), 0, + IRQ_NOPROBE | IRQ_LEVEL, 0); + + gfpic->irq_domain = irq_domain_add_legacy(of_node, GFPIC_NR_IRQS, + GFPIC_IRQ_BASE, 0, + &goldfish_irq_domain_ops, + NULL); + if (!gfpic->irq_domain) { + pr_err("Failed to add irqdomain!\n"); + ret = -ENOMEM; + goto out_destroy_generic_chip; + } + + irq_set_chained_handler_and_data(parent_irq, + goldfish_pic_cascade, gfpic); + + pr_info("Successfully registered.\n"); + return 0; + +out_destroy_generic_chip: + irq_destroy_generic_chip(gc, IRQ_MSK(GFPIC_NR_IRQS), + IRQ_NOPROBE | IRQ_LEVEL, 0); +out_iounmap: + iounmap(gfpic->base); +out_unmap_irq: + irq_dispose_mapping(parent_irq); +out_free: + kfree(gfpic); +out_err: + pr_err("Failed to initialize! (errno = %d)\n", ret); + return ret; +} + +IRQCHIP_DECLARE(google_gf_pic, "google,goldfish-pic", goldfish_pic_of_init); -- cgit v1.2.3 From ebe2f8718007d5a1238bb3cb8141b5bb2b4d5773 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Tue, 5 Dec 2017 13:16:21 -0600 Subject: irqchip/gic-v3: Fix the driver probe() fail due to disabled GICC entry The ACPI specification says OS shouldn't attempt to use GICC configuration parameters if the flag ACPI_MADT_ENABLED is cleared. The ARM64-SMP code skips the disabled GICC entries but not causing any issue. However the current GICv3 driver probe bails out causing kernel panic() instead of skipping the disabled GICC interfaces. This issue happens on systems where redistributor regions are not in the always-on power domain and one of GICC interface marked with ACPI_MADT_ENABLED=0. This patch does the two things to fix the panic. - Don't return an error in gic_acpi_match_gicc() for disabled GICC entry. - No need to keep GICR region information for disabled GICC entry. Observed kernel crash on QDF2400 platform GICC entry is disabled. Kernel crash traces: Kernel panic - not syncing: No interrupt controller found. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.5 #26 [] dump_backtrace+0x0/0x218 [] show_stack+0x14/0x20 [] dump_stack+0x98/0xb8 [] panic+0x118/0x26c [] init_IRQ+0x24/0x2c [] start_kernel+0x230/0x394 [] __primary_switched+0x64/0x6c ---[ end Kernel panic - not syncing: No interrupt controller found. Disabled GICC subtable example: Subtable Type : 0B [Generic Interrupt Controller] Length : 50 Reserved : 0000 CPU Interface Number : 0000003D Processor UID : 0000003D Flags (decoded below) : 00000000 Processor Enabled : 0 Performance Interrupt Trig Mode : 0 Virtual GIC Interrupt Trig Mode : 0 Parking Protocol Version : 00000000 Performance Interrupt : 00000017 Parked Address : 0000000000000000 Base Address : 0000000000000000 Virtual GIC Base Address : 0000000000000000 Hypervisor GIC Base Address : 0000000000000000 Virtual GIC Interrupt : 00000019 Redistributor Base Address : 0000FFFF88F40000 ARM MPIDR : 000000000000000D Efficiency Class : 00 Reserved : 000000 Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index b56c3e23f0af..a874777e9b9d 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1331,6 +1331,10 @@ gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; + /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) + return 0; + redist_base = ioremap(gicc->gicr_base_address, size); if (!redist_base) return -ENOMEM; @@ -1380,6 +1384,13 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) return 0; + /* + * It's perfectly valid firmware can pass disabled GICC entry, driver + * should not treat as errors, skip the entry instead of probe fail. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) + return 0; + return -ENODEV; } -- cgit v1.2.3 From 2bd7b4aacdb6efa5ccd4749c365c171b884791d2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Jan 2018 23:49:18 +0100 Subject: mmc: s3mci: mark debug_regs[] as static The global array clashes with a newly added symbol of the same name: drivers/staging/ccree/cc_debugfs.o:(.data+0x0): multiple definition of `debug_regs' drivers/mmc/host/s3cmci.o:(.data+0x70): first defined here We should fix both, this one addresses the s3cmci driver by removing the symbol from the global namespace. While at it, this separates the declaration from the type definition and makes the variable const. Fixes: 9bdd203b4dc8 ("s3cmci: add debugfs support for examining driver and hardware state") Fixes: b3ec9a6736f2 ("staging: ccree: staging: ccree: replace sysfs by debugfs interface") Signed-off-by: Arnd Bergmann Signed-off-by: Ulf Hansson --- drivers/mmc/host/s3cmci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index f7f157a62a4a..555c7f133eb8 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = { struct s3cmci_reg { unsigned short addr; unsigned char *name; -} debug_regs[] = { +}; + +static const struct s3cmci_reg debug_regs[] = { DBG_REG(CON), DBG_REG(PRE), DBG_REG(CMDARG), @@ -1446,7 +1448,7 @@ struct s3cmci_reg { static int s3cmci_regs_show(struct seq_file *seq, void *v) { struct s3cmci_host *host = seq->private; - struct s3cmci_reg *rptr = debug_regs; + const struct s3cmci_reg *rptr = debug_regs; for (; rptr->name; rptr++) seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name, -- cgit v1.2.3 From d72d72cd33ad59134de203b2fc4e2e4cc81e72c5 Mon Sep 17 00:00:00 2001 From: Atul Garg Date: Wed, 3 Jan 2018 20:17:36 -0800 Subject: mmc:host:sdhci-pci:Addition of Arasan PCI Controller with integrated phy. The Arasan Controller is based on a FPGA platform and has integrated phy with specific registers used during initialization and management of different modes. The phy and the controller are integrated and registers are very specific to Arasan. Arasan being an IP provider, licenses these IPs to various companies for integration of IP in custom SOCs. The custom SOCs define own register map depending on how bits are tied inside the SOC for phy registers, depending on SOC memory plan and hence will require own platform drivers. If more details on phy registers are required, an interface document is hosted at https://arasan.com/NF/eMMC5.1 PHY Programming in Linux.pdf. Signed-off-by: Atul Garg Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/Makefile | 2 +- drivers/mmc/host/sdhci-pci-arasan.c | 331 ++++++++++++++++++++++++++++++++++++ drivers/mmc/host/sdhci-pci-core.c | 4 +- drivers/mmc/host/sdhci-pci.h | 7 +- 4 files changed, 340 insertions(+), 4 deletions(-) create mode 100644 drivers/mmc/host/sdhci-pci-arasan.c diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 191a04010205..84cd1388abc3 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_MMC_MXC) += mxcmmc.o obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o obj-$(CONFIG_MMC_SDHCI_PCI) += sdhci-pci.o -sdhci-pci-y += sdhci-pci-core.o sdhci-pci-o2micro.o +sdhci-pci-y += sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += sdhci-pci-data.o obj-$(CONFIG_MMC_SDHCI_ACPI) += sdhci-acpi.o obj-$(CONFIG_MMC_SDHCI_PXAV3) += sdhci-pxav3.o diff --git a/drivers/mmc/host/sdhci-pci-arasan.c b/drivers/mmc/host/sdhci-pci-arasan.c new file mode 100644 index 000000000000..499f3205ec5c --- /dev/null +++ b/drivers/mmc/host/sdhci-pci-arasan.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sdhci-pci-arasan.c - Driver for Arasan PCI Controller with + * integrated phy. + * + * Copyright (C) 2017 Arasan Chip Systems Inc. + * + * Author: Atul Garg + */ + +#include +#include + +#include "sdhci.h" +#include "sdhci-pci.h" + +/* Extra registers for Arasan SD/SDIO/MMC Host Controller with PHY */ +#define PHY_ADDR_REG 0x300 +#define PHY_DAT_REG 0x304 + +#define PHY_WRITE BIT(8) +#define PHY_BUSY BIT(9) +#define DATA_MASK 0xFF + +/* PHY Specific Registers */ +#define DLL_STATUS 0x00 +#define IPAD_CTRL1 0x01 +#define IPAD_CTRL2 0x02 +#define IPAD_STS 0x03 +#define IOREN_CTRL1 0x06 +#define IOREN_CTRL2 0x07 +#define IOPU_CTRL1 0x08 +#define IOPU_CTRL2 0x09 +#define ITAP_DELAY 0x0C +#define OTAP_DELAY 0x0D +#define STRB_SEL 0x0E +#define CLKBUF_SEL 0x0F +#define MODE_CTRL 0x11 +#define DLL_TRIM 0x12 +#define CMD_CTRL 0x20 +#define DATA_CTRL 0x21 +#define STRB_CTRL 0x22 +#define CLK_CTRL 0x23 +#define PHY_CTRL 0x24 + +#define DLL_ENBL BIT(3) +#define RTRIM_EN BIT(1) +#define PDB_ENBL BIT(1) +#define RETB_ENBL BIT(6) +#define ODEN_CMD BIT(1) +#define ODEN_DAT 0xFF +#define REN_STRB BIT(0) +#define REN_CMND BIT(1) +#define REN_DATA 0xFF +#define PU_CMD BIT(1) +#define PU_DAT 0xFF +#define ITAPDLY_EN BIT(0) +#define OTAPDLY_EN BIT(0) +#define OD_REL_CMD BIT(1) +#define OD_REL_DAT 0xFF +#define DLLTRM_ICP 0x8 +#define PDB_CMND BIT(0) +#define PDB_DATA 0xFF +#define PDB_STRB BIT(0) +#define PDB_CLOCK BIT(0) +#define CALDONE_MASK 0x10 +#define DLL_RDY_MASK 0x10 +#define MAX_CLK_BUF 0x7 + +/* Mode Controls */ +#define ENHSTRB_MODE BIT(0) +#define HS400_MODE BIT(1) +#define LEGACY_MODE BIT(2) +#define DDR50_MODE BIT(3) + +/* + * Controller has no specific bits for HS200/HS. + * Used BIT(4), BIT(5) for software programming. + */ +#define HS200_MODE BIT(4) +#define HISPD_MODE BIT(5) + +#define OTAPDLY(x) (((x) << 1) | OTAPDLY_EN) +#define ITAPDLY(x) (((x) << 1) | ITAPDLY_EN) +#define FREQSEL(x) (((x) << 5) | DLL_ENBL) +#define IOPAD(x, y) ((x) | ((y) << 2)) + +/* Arasan private data */ +struct arasan_host { + u32 chg_clk; +}; + +static int arasan_phy_addr_poll(struct sdhci_host *host, u32 offset, u32 mask) +{ + ktime_t timeout = ktime_add_us(ktime_get(), 100); + bool failed; + u8 val = 0; + + while (1) { + failed = ktime_after(ktime_get(), timeout); + val = sdhci_readw(host, PHY_ADDR_REG); + if (!(val & mask)) + return 0; + if (failed) + return -EBUSY; + } +} + +static int arasan_phy_write(struct sdhci_host *host, u8 data, u8 offset) +{ + sdhci_writew(host, data, PHY_DAT_REG); + sdhci_writew(host, (PHY_WRITE | offset), PHY_ADDR_REG); + return arasan_phy_addr_poll(host, PHY_ADDR_REG, PHY_BUSY); +} + +static int arasan_phy_read(struct sdhci_host *host, u8 offset, u8 *data) +{ + int ret; + + sdhci_writew(host, 0, PHY_DAT_REG); + sdhci_writew(host, offset, PHY_ADDR_REG); + ret = arasan_phy_addr_poll(host, PHY_ADDR_REG, PHY_BUSY); + + /* Masking valid data bits */ + *data = sdhci_readw(host, PHY_DAT_REG) & DATA_MASK; + return ret; +} + +static int arasan_phy_sts_poll(struct sdhci_host *host, u32 offset, u32 mask) +{ + int ret; + ktime_t timeout = ktime_add_us(ktime_get(), 100); + bool failed; + u8 val = 0; + + while (1) { + failed = ktime_after(ktime_get(), timeout); + ret = arasan_phy_read(host, offset, &val); + if (ret) + return -EBUSY; + else if (val & mask) + return 0; + if (failed) + return -EBUSY; + } +} + +/* Initialize the Arasan PHY */ +static int arasan_phy_init(struct sdhci_host *host) +{ + int ret; + u8 val; + + /* Program IOPADs and wait for calibration to be done */ + if (arasan_phy_read(host, IPAD_CTRL1, &val) || + arasan_phy_write(host, val | RETB_ENBL | PDB_ENBL, IPAD_CTRL1) || + arasan_phy_read(host, IPAD_CTRL2, &val) || + arasan_phy_write(host, val | RTRIM_EN, IPAD_CTRL2)) + return -EBUSY; + ret = arasan_phy_sts_poll(host, IPAD_STS, CALDONE_MASK); + if (ret) + return -EBUSY; + + /* Program CMD/Data lines */ + if (arasan_phy_read(host, IOREN_CTRL1, &val) || + arasan_phy_write(host, val | REN_CMND | REN_STRB, IOREN_CTRL1) || + arasan_phy_read(host, IOPU_CTRL1, &val) || + arasan_phy_write(host, val | PU_CMD, IOPU_CTRL1) || + arasan_phy_read(host, CMD_CTRL, &val) || + arasan_phy_write(host, val | PDB_CMND, CMD_CTRL) || + arasan_phy_read(host, IOREN_CTRL2, &val) || + arasan_phy_write(host, val | REN_DATA, IOREN_CTRL2) || + arasan_phy_read(host, IOPU_CTRL2, &val) || + arasan_phy_write(host, val | PU_DAT, IOPU_CTRL2) || + arasan_phy_read(host, DATA_CTRL, &val) || + arasan_phy_write(host, val | PDB_DATA, DATA_CTRL) || + arasan_phy_read(host, STRB_CTRL, &val) || + arasan_phy_write(host, val | PDB_STRB, STRB_CTRL) || + arasan_phy_read(host, CLK_CTRL, &val) || + arasan_phy_write(host, val | PDB_CLOCK, CLK_CTRL) || + arasan_phy_read(host, CLKBUF_SEL, &val) || + arasan_phy_write(host, val | MAX_CLK_BUF, CLKBUF_SEL) || + arasan_phy_write(host, LEGACY_MODE, MODE_CTRL)) + return -EBUSY; + return 0; +} + +/* Set Arasan PHY for different modes */ +static int arasan_phy_set(struct sdhci_host *host, u8 mode, u8 otap, + u8 drv_type, u8 itap, u8 trim, u8 clk) +{ + u8 val; + int ret; + + if (mode == HISPD_MODE || mode == HS200_MODE) + ret = arasan_phy_write(host, 0x0, MODE_CTRL); + else + ret = arasan_phy_write(host, mode, MODE_CTRL); + if (ret) + return ret; + if (mode == HS400_MODE || mode == HS200_MODE) { + ret = arasan_phy_read(host, IPAD_CTRL1, &val); + if (ret) + return ret; + ret = arasan_phy_write(host, IOPAD(val, drv_type), IPAD_CTRL1); + if (ret) + return ret; + } + if (mode == LEGACY_MODE) { + ret = arasan_phy_write(host, 0x0, OTAP_DELAY); + if (ret) + return ret; + ret = arasan_phy_write(host, 0x0, ITAP_DELAY); + } else { + ret = arasan_phy_write(host, OTAPDLY(otap), OTAP_DELAY); + if (ret) + return ret; + if (mode != HS200_MODE) + ret = arasan_phy_write(host, ITAPDLY(itap), ITAP_DELAY); + else + ret = arasan_phy_write(host, 0x0, ITAP_DELAY); + } + if (ret) + return ret; + if (mode != LEGACY_MODE) { + ret = arasan_phy_write(host, trim, DLL_TRIM); + if (ret) + return ret; + } + ret = arasan_phy_write(host, 0, DLL_STATUS); + if (ret) + return ret; + if (mode != LEGACY_MODE) { + ret = arasan_phy_write(host, FREQSEL(clk), DLL_STATUS); + if (ret) + return ret; + ret = arasan_phy_sts_poll(host, DLL_STATUS, DLL_RDY_MASK); + if (ret) + return -EBUSY; + } + return 0; +} + +static int arasan_select_phy_clock(struct sdhci_host *host) +{ + struct sdhci_pci_slot *slot = sdhci_priv(host); + struct arasan_host *arasan_host = sdhci_pci_priv(slot); + u8 clk; + + if (arasan_host->chg_clk == host->mmc->ios.clock) + return 0; + + arasan_host->chg_clk = host->mmc->ios.clock; + if (host->mmc->ios.clock == 200000000) + clk = 0x0; + else if (host->mmc->ios.clock == 100000000) + clk = 0x2; + else if (host->mmc->ios.clock == 50000000) + clk = 0x1; + else + clk = 0x0; + + if (host->mmc_host_ops.hs400_enhanced_strobe) { + arasan_phy_set(host, ENHSTRB_MODE, 1, 0x0, 0x0, + DLLTRM_ICP, clk); + } else { + switch (host->mmc->ios.timing) { + case MMC_TIMING_LEGACY: + arasan_phy_set(host, LEGACY_MODE, 0x0, 0x0, 0x0, + 0x0, 0x0); + break; + case MMC_TIMING_MMC_HS: + case MMC_TIMING_SD_HS: + arasan_phy_set(host, HISPD_MODE, 0x3, 0x0, 0x2, + DLLTRM_ICP, clk); + break; + case MMC_TIMING_MMC_HS200: + case MMC_TIMING_UHS_SDR104: + arasan_phy_set(host, HS200_MODE, 0x2, + host->mmc->ios.drv_type, 0x0, + DLLTRM_ICP, clk); + break; + case MMC_TIMING_MMC_DDR52: + case MMC_TIMING_UHS_DDR50: + arasan_phy_set(host, DDR50_MODE, 0x1, 0x0, + 0x0, DLLTRM_ICP, clk); + break; + case MMC_TIMING_MMC_HS400: + arasan_phy_set(host, HS400_MODE, 0x1, + host->mmc->ios.drv_type, 0xa, + DLLTRM_ICP, clk); + break; + default: + break; + } + } + return 0; +} + +static int arasan_pci_probe_slot(struct sdhci_pci_slot *slot) +{ + int err; + + slot->host->mmc->caps |= MMC_CAP_NONREMOVABLE | MMC_CAP_8_BIT_DATA; + err = arasan_phy_init(slot->host); + if (err) + return -ENODEV; + return 0; +} + +static void arasan_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) +{ + sdhci_set_clock(host, clock); + + /* Change phy settings for the new clock */ + arasan_select_phy_clock(host); +} + +static const struct sdhci_ops arasan_sdhci_pci_ops = { + .set_clock = arasan_sdhci_set_clock, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, +}; + +const struct sdhci_pci_fixes sdhci_arasan = { + .probe_slot = arasan_pci_probe_slot, + .ops = &arasan_sdhci_pci_ops, + .priv_size = sizeof(struct arasan_host), +}; diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 110c634cfb43..c5b229b46314 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -35,7 +35,6 @@ #include "sdhci.h" #include "sdhci-pci.h" -static int sdhci_pci_enable_dma(struct sdhci_host *host); static void sdhci_pci_hw_reset(struct sdhci_host *host); #ifdef CONFIG_PM_SLEEP @@ -1459,6 +1458,7 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(O2, SDS1, o2), SDHCI_PCI_DEVICE(O2, SEABIRD0, o2), SDHCI_PCI_DEVICE(O2, SEABIRD1, o2), + SDHCI_PCI_DEVICE(ARASAN, PHY_EMMC, arasan), SDHCI_PCI_DEVICE_CLASS(AMD, SYSTEM_SDHCI, PCI_CLASS_MASK, amd), /* Generic SD host controller */ {PCI_DEVICE_CLASS(SYSTEM_SDHCI, PCI_CLASS_MASK)}, @@ -1473,7 +1473,7 @@ MODULE_DEVICE_TABLE(pci, pci_ids); * * \*****************************************************************************/ -static int sdhci_pci_enable_dma(struct sdhci_host *host) +int sdhci_pci_enable_dma(struct sdhci_host *host) { struct sdhci_pci_slot *slot; struct pci_dev *pdev; diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 0056f08a29cc..5cbcdc448f98 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -55,6 +55,9 @@ #define PCI_SUBDEVICE_ID_NI_7884 0x7884 +#define PCI_VENDOR_ID_ARASAN 0x16e6 +#define PCI_DEVICE_ID_ARASAN_PHY_EMMC 0x0670 + /* * PCI device class and mask */ @@ -170,11 +173,13 @@ static inline void *sdhci_pci_priv(struct sdhci_pci_slot *slot) #ifdef CONFIG_PM_SLEEP int sdhci_pci_resume_host(struct sdhci_pci_chip *chip); #endif - +int sdhci_pci_enable_dma(struct sdhci_host *host); int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot); int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip); #ifdef CONFIG_PM_SLEEP int sdhci_pci_o2_resume(struct sdhci_pci_chip *chip); #endif +extern const struct sdhci_pci_fixes sdhci_arasan; + #endif /* __SDHCI_PCI_H */ -- cgit v1.2.3 From bd70b19e9e7e87ad330d820386774f304e74d112 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 3 Jan 2018 13:39:01 +0100 Subject: ASoC: rt5645: change micbias widget type to supply. Register "micbias1" and "micbias2" to supply widgets as modern drivers do. This should not cause any (new) issues for existing users of the codec, since micbias support is broken anyways. Micbias support needs the RT5645_MICBIAS?_POW_CTRL_SEL bits in the RT5645_GEN_CTRL2 register to be updated when enabled/disabled which we currently do not do. The updating of these bits will be fixed in a follow-up commit. Signed-off-by: Bard Liao Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index daf7b73ba415..6f6da0abe220 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -1980,10 +1980,10 @@ static const struct snd_soc_dapm_widget rt5645_dapm_widgets[] = { /* Input Side */ /* micbias */ - SND_SOC_DAPM_MICBIAS("micbias1", RT5645_PWR_ANLG2, - RT5645_PWR_MB1_BIT, 0), - SND_SOC_DAPM_MICBIAS("micbias2", RT5645_PWR_ANLG2, - RT5645_PWR_MB2_BIT, 0), + SND_SOC_DAPM_SUPPLY("micbias1", RT5645_PWR_ANLG2, + RT5645_PWR_MB1_BIT, 0, NULL, 0), + SND_SOC_DAPM_SUPPLY("micbias2", RT5645_PWR_ANLG2, + RT5645_PWR_MB2_BIT, 0, NULL, 0), /* Input Lines */ SND_SOC_DAPM_INPUT("DMIC L1"), SND_SOC_DAPM_INPUT("DMIC R1"), -- cgit v1.2.3 From e61f3f31e5830f027d8eedb742bc48a9b3173699 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 3 Jan 2018 13:39:02 +0100 Subject: ASoC: rt5645: add micbias power control select. We need to set a corresponding control bit before powering micbias up. Signed-off-by: Bard Liao [hdegoede@redhat.com: Remove 2 unused variable declarations] Signed-off-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-- sound/soc/codecs/rt5645.h | 6 +++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 6f6da0abe220..fbaf36aeb587 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -1943,6 +1943,56 @@ static int rt5650_hp_event(struct snd_soc_dapm_widget *w, return 0; } +static int rt5645_set_micbias1_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + snd_soc_update_bits(codec, RT5645_GEN_CTRL2, + RT5645_MICBIAS1_POW_CTRL_SEL_MASK, + RT5645_MICBIAS1_POW_CTRL_SEL_M); + break; + + case SND_SOC_DAPM_POST_PMD: + snd_soc_update_bits(codec, RT5645_GEN_CTRL2, + RT5645_MICBIAS1_POW_CTRL_SEL_MASK, + RT5645_MICBIAS1_POW_CTRL_SEL_A); + break; + + default: + return 0; + } + + return 0; +} + +static int rt5645_set_micbias2_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *k, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + snd_soc_update_bits(codec, RT5645_GEN_CTRL2, + RT5645_MICBIAS2_POW_CTRL_SEL_MASK, + RT5645_MICBIAS2_POW_CTRL_SEL_M); + break; + + case SND_SOC_DAPM_POST_PMD: + snd_soc_update_bits(codec, RT5645_GEN_CTRL2, + RT5645_MICBIAS2_POW_CTRL_SEL_MASK, + RT5645_MICBIAS2_POW_CTRL_SEL_A); + break; + + default: + return 0; + } + + return 0; +} + static const struct snd_soc_dapm_widget rt5645_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("LDO2", RT5645_PWR_MIXER, RT5645_PWR_LDO2_BIT, 0, NULL, 0), @@ -1981,9 +2031,11 @@ static const struct snd_soc_dapm_widget rt5645_dapm_widgets[] = { /* Input Side */ /* micbias */ SND_SOC_DAPM_SUPPLY("micbias1", RT5645_PWR_ANLG2, - RT5645_PWR_MB1_BIT, 0, NULL, 0), + RT5645_PWR_MB1_BIT, 0, rt5645_set_micbias1_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_SUPPLY("micbias2", RT5645_PWR_ANLG2, - RT5645_PWR_MB2_BIT, 0, NULL, 0), + RT5645_PWR_MB2_BIT, 0, rt5645_set_micbias2_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), /* Input Lines */ SND_SOC_DAPM_INPUT("DMIC L1"), SND_SOC_DAPM_INPUT("DMIC R1"), diff --git a/sound/soc/codecs/rt5645.h b/sound/soc/codecs/rt5645.h index cfc5f97549eb..940325b28c29 100644 --- a/sound/soc/codecs/rt5645.h +++ b/sound/soc/codecs/rt5645.h @@ -2117,6 +2117,12 @@ enum { #define RT5645_RXDC_SRC_STO (0x0 << 7) #define RT5645_RXDC_SRC_MONO (0x1 << 7) #define RT5645_RXDC_SRC_SFT (7) +#define RT5645_MICBIAS1_POW_CTRL_SEL_MASK (0x1 << 5) +#define RT5645_MICBIAS1_POW_CTRL_SEL_A (0x0 << 5) +#define RT5645_MICBIAS1_POW_CTRL_SEL_M (0x1 << 5) +#define RT5645_MICBIAS2_POW_CTRL_SEL_MASK (0x1 << 4) +#define RT5645_MICBIAS2_POW_CTRL_SEL_A (0x0 << 4) +#define RT5645_MICBIAS2_POW_CTRL_SEL_M (0x1 << 4) #define RT5645_RXDP2_SEL_MASK (0x1 << 3) #define RT5645_RXDP2_SEL_IF2 (0x0 << 3) #define RT5645_RXDP2_SEL_ADC (0x1 << 3) -- cgit v1.2.3 From 30414f3010aff95ffdb6bed7b9dce62cde94fdc7 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 2 Jan 2018 12:18:37 -0800 Subject: drm/i915: Apply Display WA #1183 on skl, kbl, and cfl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Display WA #1183 was recently added to workaround "Failures when enabling DPLL0 with eDP link rate 2.16 or 4.32 GHz and CD clock frequency 308.57 or 617.14 MHz (CDCLK_CTL CD Frequency Select 10b or 11b) used in this enabling or in previous enabling." This workaround was designed to minimize the impact only to save the bad case with that link rates. But HW engineers indicated that it should be safe to apply broadly, although they were expecting the DPLL0 link rate to be unchanged on runtime. We need to cover 2 cases: when we are in fact enabling DPLL0 and when we are just changing the frequency with small differences. This is based on previous patch by Rodrigo Vivi with suggestions from Ville Syrjälä. Cc: Arthur J Runyan Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: stable@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171204232210.4958-1-lucas.demarchi@intel.com (cherry picked from commit 53421c2fe99ce16838639ad89d772d914a119a49) [ Lucas: Backport to 4.15 adding back variable that has been removed on commits not meant to be backported ] Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180102201837.6812-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_cdclk.c | 35 ++++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 ++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3866c49bc390..333f40bc03bb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6977,6 +6977,7 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) #define MASK_WAKEMEM (1<<13) #define SKL_DFSM _MMIO(0x51000) @@ -8522,6 +8523,7 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) #define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index b2a6d62b71c0..60cf4e58389a 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -860,16 +860,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - int min_cdclk = skl_calc_cdclk(0, vco); u32 val; WARN_ON(vco != 8100000 && vco != 8640000); - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - /* * We always enable DPLL0 with the lowest link rate possible, but still * taking into account the VCO required to operate the eDP panel at the @@ -923,7 +917,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select, pcu_ack; + u32 freq_select, pcu_ack, cdclk_ctl; int ret; WARN_ON((cdclk == 24000) != (vco == 0)); @@ -940,7 +934,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* set CDCLK_CTL */ + /* Choose frequency for this cdclk */ switch (cdclk) { case 450000: case 432000: @@ -968,10 +962,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); + cdclk_ctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk.hw.vco != vco) { + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + } + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); + POSTING_READ(CDCLK_CTL); + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9bf46ab211cb..7e115f3927f6 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -598,6 +598,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC5\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); } @@ -625,6 +630,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } -- cgit v1.2.3 From 3522f867c13b63cf62acdf1b8ca5664c549a716a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Jan 2018 16:26:31 +0100 Subject: ACPI: EC: Fix debugfs_create_*() usage acpi_ec.gpe is "unsigned long", hence treating it as "u32" would expose the wrong half on big-endian 64-bit systems. Fix this by changing its type to "u32" and removing the cast, as all other code already uses u32 or sometimes even only u8. Fixes: 1195a098168fcacf (ACPI: Provide /sys/kernel/debug/ec/...) Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 2 +- drivers/acpi/ec_sys.c | 2 +- drivers/acpi/internal.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 0252c9b9af3d..d9f38c645e4a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1516,7 +1516,7 @@ static int acpi_ec_setup(struct acpi_ec *ec, bool handle_events) } acpi_handle_info(ec->handle, - "GPE=0x%lx, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n", + "GPE=0x%x, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n", ec->gpe, ec->command_addr, ec->data_addr); return ret; } diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index 6c7dd7af789e..dd70d6c2bca0 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -128,7 +128,7 @@ static int acpi_ec_add_debugfs(struct acpi_ec *ec, unsigned int ec_device_count) return -ENOMEM; } - if (!debugfs_create_x32("gpe", 0444, dev_dir, (u32 *)&first_ec->gpe)) + if (!debugfs_create_x32("gpe", 0444, dev_dir, &first_ec->gpe)) goto error; if (!debugfs_create_bool("use_global_lock", 0444, dev_dir, &first_ec->global_lock)) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 7f43423de43c..1d0a501bc7f0 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -159,7 +159,7 @@ static inline void acpi_early_processor_osc(void) {} -------------------------------------------------------------------------- */ struct acpi_ec { acpi_handle handle; - unsigned long gpe; + u32 gpe; unsigned long command_addr; unsigned long data_addr; bool global_lock; -- cgit v1.2.3 From 29f1b2b0fecfae69e31833836f1da3136696eee5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 28 Dec 2017 22:11:36 -0500 Subject: posix-timers: Prevent UB from shifting negative signed value Shifting a negative signed number is undefined behavior. Looking at the macros MAKE_PROCESS_CPUCLOCK and FD_TO_CLOCKID, it seems that the subexpression: (~(clockid_t) (pid) << 3) where clockid_t resolves to a signed int, which once negated, is undefined behavior to shift the value of if the results thus far are negative. It was further suggested to make these macros into inline functions. Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Cc: Dimitri Sivanich Cc: Frederic Weisbecker Cc: Al Viro Cc: linux-kselftest@vger.kernel.org Cc: Shuah Khan Cc: Deepa Dinamani Link: https://lkml.kernel.org/r/1514517100-18051-1-git-send-email-nick.desaulniers@gmail.com --- include/linux/posix-timers.h | 25 +++++++++++++++++++------ kernel/time/posix-clock.c | 2 +- kernel/time/posix-cpu-timers.c | 4 ++-- tools/testing/selftests/ptp/testptp.c | 4 +--- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 672c4f32311e..c85704fcdbd2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -42,13 +42,26 @@ struct cpu_timer_list { #define CLOCKFD CPUCLOCK_MAX #define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK) -#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ - ((~(clockid_t) (pid) << 3) | (clockid_t) (clock)) -#define MAKE_THREAD_CPUCLOCK(tid, clock) \ - MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK) +static inline clockid_t make_process_cpuclock(const unsigned int pid, + const clockid_t clock) +{ + return ((~pid) << 3) | clock; +} +static inline clockid_t make_thread_cpuclock(const unsigned int tid, + const clockid_t clock) +{ + return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); +} -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) -#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3)) +static inline clockid_t fd_to_clockid(const int fd) +{ + return make_process_cpuclock((unsigned int) fd, CLOCKFD); +} + +static inline int clockid_to_fd(const clockid_t clk) +{ + return ~(clk >> 3); +} #define REQUEUE_PENDING 1 diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 17cdc554c9fe..cc91d90abd84 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -216,7 +216,7 @@ struct posix_clock_desc { static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd) { - struct file *fp = fget(CLOCKID_TO_FD(id)); + struct file *fp = fget(clockid_to_fd(id)); int err = -EINVAL; if (!fp) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 1f27887aa194..cef79ca5bbd5 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1363,8 +1363,8 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block) return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); } -#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) -#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) +#define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED) +#define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED) static int process_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 5d2eae16f7ee..a5d8f0ab0da0 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -60,9 +60,7 @@ static int clock_adjtime(clockid_t id, struct timex *tx) static clockid_t get_clockid(int fd) { #define CLOCKFD 3 -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) - - return FD_TO_CLOCKID(fd); + return (((unsigned int) ~fd) << 3) | CLOCKFD; } static void handle_alarm(int s) -- cgit v1.2.3 From 3ea15452ee85754f70f3b9fa1f23165ef2e77ba7 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 3 Jan 2018 11:00:31 +0800 Subject: nl80211: Check for the required netlink attribute presence nl80211_nan_add_func() does not check if the required attribute NL80211_NAN_FUNC_FOLLOW_UP_DEST is present when processing NL80211_CMD_ADD_NAN_FUNCTION request. This request can be issued by users with CAP_NET_ADMIN privilege and may result in NULL dereference and a system crash. Add a check for the required attribute presence. Signed-off-by: Hao Chen Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 213d0c498c97..2b3dbcd40e46 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11361,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb, break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || - !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } -- cgit v1.2.3 From 96365d9fdb2f0d81bfc010298289a8c168931cd0 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Thu, 4 Jan 2018 15:44:07 +0800 Subject: ASoC: mediatek: add some core clocks for MT2701 AFE Add three core clocks for MT2701 AFE. Signed-off-by: Ryder Lee Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c | 30 ++++++++++++++++++++++- sound/soc/mediatek/mt2701/mt2701-afe-common.h | 3 +++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c index 56a057c78c9a..949fc3a1d025 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c @@ -18,8 +18,11 @@ #include "mt2701-afe-clock-ctrl.h" static const char *const base_clks[] = { + [MT2701_INFRA_SYS_AUDIO] = "infra_sys_audio_clk", [MT2701_TOP_AUD_MCLK_SRC0] = "top_audio_mux1_sel", [MT2701_TOP_AUD_MCLK_SRC1] = "top_audio_mux2_sel", + [MT2701_TOP_AUD_A1SYS] = "top_audio_a1sys_hp", + [MT2701_TOP_AUD_A2SYS] = "top_audio_a2sys_hp", [MT2701_AUDSYS_AFE] = "audio_afe_pd", [MT2701_AUDSYS_AFE_CONN] = "audio_afe_conn_pd", [MT2701_AUDSYS_A1SYS] = "audio_a1sys_pd", @@ -169,10 +172,26 @@ static int mt2701_afe_enable_audsys(struct mtk_base_afe *afe) struct mt2701_afe_private *afe_priv = afe->platform_priv; int ret; - ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_AFE]); + /* Enable infra clock gate */ + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_INFRA_SYS_AUDIO]); if (ret) return ret; + /* Enable top a1sys clock gate */ + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_TOP_AUD_A1SYS]); + if (ret) + goto err_a1sys; + + /* Enable top a2sys clock gate */ + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_TOP_AUD_A2SYS]); + if (ret) + goto err_a2sys; + + /* Internal clock gates */ + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_AFE]); + if (ret) + goto err_afe; + ret = clk_prepare_enable(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); if (ret) goto err_audio_a1sys; @@ -193,6 +212,12 @@ err_audio_a2sys: clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); err_audio_a1sys: clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_AFE]); +err_afe: + clk_disable_unprepare(afe_priv->base_ck[MT2701_TOP_AUD_A2SYS]); +err_a2sys: + clk_disable_unprepare(afe_priv->base_ck[MT2701_TOP_AUD_A1SYS]); +err_a1sys: + clk_disable_unprepare(afe_priv->base_ck[MT2701_INFRA_SYS_AUDIO]); return ret; } @@ -205,6 +230,9 @@ static void mt2701_afe_disable_audsys(struct mtk_base_afe *afe) clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A2SYS]); clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_A1SYS]); clk_disable_unprepare(afe_priv->base_ck[MT2701_AUDSYS_AFE]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_TOP_AUD_A1SYS]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_TOP_AUD_A2SYS]); + clk_disable_unprepare(afe_priv->base_ck[MT2701_INFRA_SYS_AUDIO]); } int mt2701_afe_enable_clock(struct mtk_base_afe *afe) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-common.h b/sound/soc/mediatek/mt2701/mt2701-afe-common.h index 9a2b301a4c21..ae8ddeacfbfe 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-common.h +++ b/sound/soc/mediatek/mt2701/mt2701-afe-common.h @@ -61,8 +61,11 @@ enum { }; enum audio_base_clock { + MT2701_INFRA_SYS_AUDIO, MT2701_TOP_AUD_MCLK_SRC0, MT2701_TOP_AUD_MCLK_SRC1, + MT2701_TOP_AUD_A1SYS, + MT2701_TOP_AUD_A2SYS, MT2701_AUDSYS_AFE, MT2701_AUDSYS_AFE_CONN, MT2701_AUDSYS_A1SYS, -- cgit v1.2.3 From dfa3cbb83e099d5ef9809b67ea3bff3a39dc2f06 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Thu, 4 Jan 2018 15:44:08 +0800 Subject: ASoC: mediatek: modify MT2701 AFE driver to adapt mfd device As the new MFD parent is in place, modify MT2701 AFE driver to adapt it. Signed-off-by: Ryder Lee Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-pcm.c | 45 +++++++++++++----------------- sound/soc/mediatek/mt2701/mt2701-reg.h | 1 - 2 files changed, 20 insertions(+), 26 deletions(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index 0edadca12a5e..f0cd08fa5c5d 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -1368,14 +1369,6 @@ static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = { }, }; -static const struct regmap_config mt2701_afe_regmap_config = { - .reg_bits = 32, - .reg_stride = 4, - .val_bits = 32, - .max_register = AFE_END_ADDR, - .cache_type = REGCACHE_NONE, -}; - static irqreturn_t mt2701_asys_isr(int irq_id, void *dev) { int id; @@ -1414,9 +1407,9 @@ static int mt2701_afe_runtime_resume(struct device *dev) static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) { + struct snd_soc_component *component; struct mtk_base_afe *afe; struct mt2701_afe_private *afe_priv; - struct resource *res; struct device *dev; int i, irq_id, ret; @@ -1446,17 +1439,11 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) return ret; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - afe->base_addr = devm_ioremap_resource(&pdev->dev, res); - - if (IS_ERR(afe->base_addr)) - return PTR_ERR(afe->base_addr); - - afe->regmap = devm_regmap_init_mmio(&pdev->dev, afe->base_addr, - &mt2701_afe_regmap_config); - if (IS_ERR(afe->regmap)) - return PTR_ERR(afe->regmap); + afe->regmap = syscon_node_to_regmap(dev->parent->of_node); + if (!afe->regmap) { + dev_err(dev, "could not get regmap from parent\n"); + return -ENODEV; + } mutex_init(&afe->irq_alloc_lock); @@ -1490,6 +1477,12 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) = &mt2701_i2s_data[i][I2S_IN]; } + component = kzalloc(sizeof(*component), GFP_KERNEL); + if (!component) + return -ENOMEM; + + component->regmap = afe->regmap; + afe->mtk_afe_hardware = &mt2701_afe_hardware; afe->memif_fs = mt2701_memif_fs; afe->irq_fs = mt2701_irq_fs; @@ -1502,7 +1495,7 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) ret = mt2701_init_clock(afe); if (ret) { dev_err(dev, "init clock error\n"); - return ret; + goto err_init_clock; } platform_set_drvdata(pdev, afe); @@ -1521,10 +1514,10 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) goto err_platform; } - ret = snd_soc_register_component(&pdev->dev, - &mt2701_afe_pcm_dai_component, - mt2701_afe_pcm_dais, - ARRAY_SIZE(mt2701_afe_pcm_dais)); + ret = snd_soc_add_component(dev, component, + &mt2701_afe_pcm_dai_component, + mt2701_afe_pcm_dais, + ARRAY_SIZE(mt2701_afe_pcm_dais)); if (ret) { dev_warn(dev, "err_dai_component\n"); goto err_dai_component; @@ -1538,6 +1531,8 @@ err_platform: pm_runtime_put_sync(dev); err_pm_disable: pm_runtime_disable(dev); +err_init_clock: + kfree(component); return ret; } diff --git a/sound/soc/mediatek/mt2701/mt2701-reg.h b/sound/soc/mediatek/mt2701/mt2701-reg.h index f17c76f37b5f..18e676974f22 100644 --- a/sound/soc/mediatek/mt2701/mt2701-reg.h +++ b/sound/soc/mediatek/mt2701/mt2701-reg.h @@ -145,5 +145,4 @@ #define ASYS_I2S_CON_WIDE_MODE_SET(x) ((x) << 1) #define ASYS_I2S_IN_PHASE_FIX (0x1 << 31) -#define AFE_END_ADDR 0x15e0 #endif -- cgit v1.2.3 From 7f12a56367bf526afde7e81820a8c7d97e75ed10 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Thu, 4 Jan 2018 15:44:09 +0800 Subject: ASoC: mediatek: update MT2701 AFE documentation to adapt mfd device As the new MFD parent is in place, modify MT2701 AFE documentation to adapt it. Also add three core clocks in example. Signed-off-by: Ryder Lee Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/mt2701-afe-pcm.txt | 171 +++++++++++---------- 1 file changed, 93 insertions(+), 78 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt index 0450baad2813..6df87b97f7cb 100644 --- a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt +++ b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt @@ -2,15 +2,17 @@ Mediatek AFE PCM controller for mt2701 Required properties: - compatible = "mediatek,mt2701-audio"; -- reg: register location and size - interrupts: should contain AFE and ASYS interrupts - interrupt-names: should be "afe" and "asys" - power-domains: should define the power domain - clocks: Must contain an entry for each entry in clock-names See ../clocks/clock-bindings.txt for details - clock-names: should have these clock names: + "infra_sys_audio_clk", "top_audio_mux1_sel", "top_audio_mux2_sel", + "top_audio_a1sys_hp", + "top_audio_a2sys_hp", "i2s0_src_sel", "i2s1_src_sel", "i2s2_src_sel", @@ -45,85 +47,98 @@ Required properties: - assigned-clocks-parents: parent of input clocks of assigned clocks. - assigned-clock-rates: list of clock frequencies of assigned clocks. +Must be a subnode of MediaTek audsys device tree node. +See ../arm/mediatek/mediatek,audsys.txt for details about the parent node. + Example: - afe: mt2701-afe-pcm@11220000 { - compatible = "mediatek,mt2701-audio"; - reg = <0 0x11220000 0 0x2000>, - <0 0x112A0000 0 0x20000>; - interrupts = , - ; - interrupt-names = "afe", "asys"; - power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>; - clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>, - <&topckgen CLK_TOP_AUD_MUX2_SEL>, - <&topckgen CLK_TOP_AUD_K1_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K2_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K3_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K4_SRC_SEL>, - <&topckgen CLK_TOP_AUD_K1_SRC_DIV>, - <&topckgen CLK_TOP_AUD_K2_SRC_DIV>, - <&topckgen CLK_TOP_AUD_K3_SRC_DIV>, - <&topckgen CLK_TOP_AUD_K4_SRC_DIV>, - <&topckgen CLK_TOP_AUD_I2S1_MCLK>, - <&topckgen CLK_TOP_AUD_I2S2_MCLK>, - <&topckgen CLK_TOP_AUD_I2S3_MCLK>, - <&topckgen CLK_TOP_AUD_I2S4_MCLK>, - <&audiosys CLK_AUD_I2SO1>, - <&audiosys CLK_AUD_I2SO2>, - <&audiosys CLK_AUD_I2SO3>, - <&audiosys CLK_AUD_I2SO4>, - <&audiosys CLK_AUD_I2SIN1>, - <&audiosys CLK_AUD_I2SIN2>, - <&audiosys CLK_AUD_I2SIN3>, - <&audiosys CLK_AUD_I2SIN4>, - <&audiosys CLK_AUD_ASRCO1>, - <&audiosys CLK_AUD_ASRCO2>, - <&audiosys CLK_AUD_ASRCO3>, - <&audiosys CLK_AUD_ASRCO4>, - <&audiosys CLK_AUD_AFE>, - <&audiosys CLK_AUD_AFE_CONN>, - <&audiosys CLK_AUD_A1SYS>, - <&audiosys CLK_AUD_A2SYS>, - <&audiosys CLK_AUD_AFE_MRGIF>; + audsys: audio-subsystem@11220000 { + compatible = "mediatek,mt2701-audsys", "syscon", "simple-mfd"; + ... + + afe: audio-controller { + compatible = "mediatek,mt2701-audio"; + interrupts = , + ; + interrupt-names = "afe", "asys"; + power-domains = <&scpsys MT2701_POWER_DOMAIN_IFR_MSC>; + + clocks = <&infracfg CLK_INFRA_AUDIO>, + <&topckgen CLK_TOP_AUD_MUX1_SEL>, + <&topckgen CLK_TOP_AUD_MUX2_SEL>, + <&topckgen CLK_TOP_AUD_48K_TIMING>, + <&topckgen CLK_TOP_AUD_44K_TIMING>, + <&topckgen CLK_TOP_AUD_K1_SRC_SEL>, + <&topckgen CLK_TOP_AUD_K2_SRC_SEL>, + <&topckgen CLK_TOP_AUD_K3_SRC_SEL>, + <&topckgen CLK_TOP_AUD_K4_SRC_SEL>, + <&topckgen CLK_TOP_AUD_K1_SRC_DIV>, + <&topckgen CLK_TOP_AUD_K2_SRC_DIV>, + <&topckgen CLK_TOP_AUD_K3_SRC_DIV>, + <&topckgen CLK_TOP_AUD_K4_SRC_DIV>, + <&topckgen CLK_TOP_AUD_I2S1_MCLK>, + <&topckgen CLK_TOP_AUD_I2S2_MCLK>, + <&topckgen CLK_TOP_AUD_I2S3_MCLK>, + <&topckgen CLK_TOP_AUD_I2S4_MCLK>, + <&audsys CLK_AUD_I2SO1>, + <&audsys CLK_AUD_I2SO2>, + <&audsys CLK_AUD_I2SO3>, + <&audsys CLK_AUD_I2SO4>, + <&audsys CLK_AUD_I2SIN1>, + <&audsys CLK_AUD_I2SIN2>, + <&audsys CLK_AUD_I2SIN3>, + <&audsys CLK_AUD_I2SIN4>, + <&audsys CLK_AUD_ASRCO1>, + <&audsys CLK_AUD_ASRCO2>, + <&audsys CLK_AUD_ASRCO3>, + <&audsys CLK_AUD_ASRCO4>, + <&audsys CLK_AUD_AFE>, + <&audsys CLK_AUD_AFE_CONN>, + <&audsys CLK_AUD_A1SYS>, + <&audsys CLK_AUD_A2SYS>, + <&audsys CLK_AUD_AFE_MRGIF>; - clock-names = "top_audio_mux1_sel", - "top_audio_mux2_sel", - "i2s0_src_sel", - "i2s1_src_sel", - "i2s2_src_sel", - "i2s3_src_sel", - "i2s0_src_div", - "i2s1_src_div", - "i2s2_src_div", - "i2s3_src_div", - "i2s0_mclk_en", - "i2s1_mclk_en", - "i2s2_mclk_en", - "i2s3_mclk_en", - "i2so0_hop_ck", - "i2so1_hop_ck", - "i2so2_hop_ck", - "i2so3_hop_ck", - "i2si0_hop_ck", - "i2si1_hop_ck", - "i2si2_hop_ck", - "i2si3_hop_ck", - "asrc0_out_ck", - "asrc1_out_ck", - "asrc2_out_ck", - "asrc3_out_ck", - "audio_afe_pd", - "audio_afe_conn_pd", - "audio_a1sys_pd", - "audio_a2sys_pd", - "audio_mrgif_pd"; + clock-names = "infra_sys_audio_clk", + "top_audio_mux1_sel", + "top_audio_mux2_sel", + "top_audio_a1sys_hp", + "top_audio_a2sys_hp", + "i2s0_src_sel", + "i2s1_src_sel", + "i2s2_src_sel", + "i2s3_src_sel", + "i2s0_src_div", + "i2s1_src_div", + "i2s2_src_div", + "i2s3_src_div", + "i2s0_mclk_en", + "i2s1_mclk_en", + "i2s2_mclk_en", + "i2s3_mclk_en", + "i2so0_hop_ck", + "i2so1_hop_ck", + "i2so2_hop_ck", + "i2so3_hop_ck", + "i2si0_hop_ck", + "i2si1_hop_ck", + "i2si2_hop_ck", + "i2si3_hop_ck", + "asrc0_out_ck", + "asrc1_out_ck", + "asrc2_out_ck", + "asrc3_out_ck", + "audio_afe_pd", + "audio_afe_conn_pd", + "audio_a1sys_pd", + "audio_a2sys_pd", + "audio_mrgif_pd"; - assigned-clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>, - <&topckgen CLK_TOP_AUD_MUX2_SEL>, - <&topckgen CLK_TOP_AUD_MUX1_DIV>, - <&topckgen CLK_TOP_AUD_MUX2_DIV>; - assigned-clock-parents = <&topckgen CLK_TOP_AUD1PLL_98M>, - <&topckgen CLK_TOP_AUD2PLL_90M>; - assigned-clock-rates = <0>, <0>, <49152000>, <45158400>; + assigned-clocks = <&topckgen CLK_TOP_AUD_MUX1_SEL>, + <&topckgen CLK_TOP_AUD_MUX2_SEL>, + <&topckgen CLK_TOP_AUD_MUX1_DIV>, + <&topckgen CLK_TOP_AUD_MUX2_DIV>; + assigned-clock-parents = <&topckgen CLK_TOP_AUD1PLL_98M>, + <&topckgen CLK_TOP_AUD2PLL_90M>; + assigned-clock-rates = <0>, <0>, <49152000>, <45158400>; + }; }; -- cgit v1.2.3 From 736a80bbfda709fb3631f5f62056f250a38e5804 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jan 2018 15:51:53 +0100 Subject: mac80211: mesh: drop frames appearing to be from us If there are multiple mesh stations with the same MAC address, they will both get confused and start throwing warnings. Obviously in this case nothing can actually work anyway, so just drop frames that look like they're from ourselves early on. Reported-by: Gui Iribarren Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 70e9d2ca8bbe..4daafb07602f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) } return true; case NL80211_IFTYPE_MESH_POINT: + if (ether_addr_equal(sdata->vif.addr, hdr->addr2)) + return false; if (multicast) return true; return ether_addr_equal(sdata->vif.addr, hdr->addr1); -- cgit v1.2.3 From d14ac576d10f865970bb1324d337e5e24d79aaf4 Mon Sep 17 00:00:00 2001 From: Christian Holl Date: Wed, 3 Jan 2018 19:53:02 +0100 Subject: USB: serial: cp210x: add new device ID ELV ALC 8xxx This adds the ELV ALC 8xxx Battery Charging device to the list of USB IDs of drivers/usb/serial/cp210x.c Signed-off-by: Christian Holl Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 38814225a816..06d502b3e913 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -175,6 +175,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */ { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ -- cgit v1.2.3 From 54e98b5d663fcd8e3279c2391537b1a1f7bfe344 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 3 Jan 2018 22:02:29 -0800 Subject: net: dsa: b53: Turn off Broadcom tags for more switches Models such as BCM5395/97/98 and BCM53125/24/53115 and compatible require that we turn on managed mode to actually act on Broadcom tags, otherwise they just pass them through on ingress (host -> switch) and don't insert them in egress (switch -> host). Turning on managed mode is simple, but requires us to properly support ARL misses on multicast addresses which is a much more involved set of changes not suitable for a bug fix for this release. Reported-by: Jochen Friedrich Fixes: 7edc58d614d4 ("net: dsa: b53: Turn on Broadcom tags") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index f5a8dd96fd75..4498ab897d94 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1500,10 +1500,13 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, { struct b53_device *dev = ds->priv; - /* Older models support a different tag format that we do not - * support in net/dsa/tag_brcm.c yet. + /* Older models (5325, 5365) support a different tag format that we do + * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed + * mode to be turned on which means we need to specifically manage ARL + * misses on multicast addresses (TBD). */ - if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port)) + if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) || + !b53_can_enable_brcm_tags(ds, port)) return DSA_TAG_PROTO_NONE; /* Broadcom BCM58xx chips have a flow accelerator on Port 8 -- cgit v1.2.3 From b4c2951a4833e66f1bbfe65ddcd4fdcdfafe5e8f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 2 Dec 2017 18:48:52 +0100 Subject: can: vxcan: improve handling of missing peer name attribute Picking up the patch from Serhey Popovych (commit 191cdb3822e5df6b3c8, "veth: Be more robust on network device creation when no attributes"). When the peer name attribute is not provided the former implementation tries to register the given device name twice ... which leads to -EEXIST. If only one device name is given apply an automatic generated and valid name for the peer. Cc: Serhey Popovych Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/vxcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index 8404e8852a0f..b4c4a2c76437 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, tbp = peer_tb; } - if (tbp[IFLA_IFNAME]) { + if (ifmp && tbp[IFLA_IFNAME]) { nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { -- cgit v1.2.3 From d5b42e6607661b198d8b26a0c30969605b1bf5c7 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Wed, 13 Dec 2017 19:52:23 +0100 Subject: can: gs_usb: fix return value of the "set_bittiming" callback The "set_bittiming" callback treats a positive return value as error! For that reason "can_changelink()" will quit silently after setting the bittiming values without processing ctrlmode, restart-ms, etc. Signed-off-by: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/gs_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 68ac3e88a8ce..8bf80ad9dc44 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev) dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)", rc); - return rc; + return (rc > 0) ? 0 : rc; } static void gs_usb_xmit_callback(struct urb *urb) -- cgit v1.2.3 From 13454c14550065fcc1705d6bd4ee6d40e057099f Mon Sep 17 00:00:00 2001 From: Luu An Phu Date: Tue, 2 Jan 2018 10:44:18 +0700 Subject: can: flex_can: Correct the checking for frame length in flexcan_start_xmit() The flexcan_start_xmit() function compares the frame length with data register length to write frame content into data[0] and data[1] register. Data register length is 4 bytes and frame maximum length is 8 bytes. Fix the check that compares frame length with 3. Because the register length is 4. Signed-off-by: Luu An Phu Reviewed-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0626dcfd1f3d..760d2c07e3a2 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -526,7 +526,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) data = be32_to_cpup((__be32 *)&cf->data[0]); flexcan_write(data, &priv->tx_mb->data[0]); } - if (cf->can_dlc > 3) { + if (cf->can_dlc > 4) { data = be32_to_cpup((__be32 *)&cf->data[4]); flexcan_write(data, &priv->tx_mb->data[1]); } -- cgit v1.2.3 From 6ebc5e8fe85286c7392f1777a3dba9e1fd6d0253 Mon Sep 17 00:00:00 2001 From: Martin Lederhilger Date: Thu, 21 Dec 2017 14:42:44 +0000 Subject: can: ems_usb: improve error reporting for error warning and error passive This patch adds the missing CAN_ERR_CRTL to cf->can_id in case of CAN_STATE_ERROR_WARNING or CAN_STATE_ERROR_PASSIVE Signed-off-by: Martin Lederhilger Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ems_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index b00358297424..12ff0020ecd6 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) if (dev->can.state == CAN_STATE_ERROR_WARNING || dev->can.state == CAN_STATE_ERROR_PASSIVE) { + cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (txerr > rxerr) ? CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; } -- cgit v1.2.3 From 6708913750344a900f2e73bfe4a4d6dbbce4fe8d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 16:39:27 +0100 Subject: ALSA: pcm: Add missing error checks in OSS emulation plugin builder In the OSS emulation plugin builder where the frame size is parsed in the plugin chain, some places miss the possible errors returned from the plugin src_ or dst_frames callback. This patch papers over such places. Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_plugin.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index cadc93792868..85a56af104bd 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st snd_pcm_sframes_t frames = size; plugin = snd_pcm_plug_first(plug); - while (plugin && frames > 0) { + while (plugin) { + if (frames <= 0) + return frames; if ((next = plugin->next) != NULL) { snd_pcm_sframes_t frames1 = frames; - if (plugin->dst_frames) + if (plugin->dst_frames) { frames1 = plugin->dst_frames(plugin, frames); + if (frames1 <= 0) + return frames1; + } if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) { return err; } if (err != frames1) { frames = err; - if (plugin->src_frames) + if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames1); + if (frames <= 0) + return frames; + } } } else dst_channels = NULL; -- cgit v1.2.3 From 06e7e776ca4d36547e503279aeff996cbb292c16 Mon Sep 17 00:00:00 2001 From: Ben Seri Date: Fri, 8 Dec 2017 15:14:47 +0100 Subject: Bluetooth: Prevent stack info leak from the EFS element. In the function l2cap_parse_conf_rsp and in the function l2cap_parse_conf_req the following variable is declared without initialization: struct l2cap_conf_efs efs; In addition, when parsing input configuration parameters in both of these functions, the switch case for handling EFS elements may skip the memcpy call that will write to the efs variable: ... case L2CAP_CONF_EFS: if (olen == sizeof(efs)) memcpy(&efs, (void *)val, olen); ... The olen in the above if is attacker controlled, and regardless of that if, in both of these functions the efs variable would eventually be added to the outgoing configuration request that is being built: l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), (unsigned long) &efs); So by sending a configuration request, or response, that contains an L2CAP_CONF_EFS element, but with an element length that is not sizeof(efs) - the memcpy to the uninitialized efs variable can be avoided, and the uninitialized variable would be returned to the attacker (16 bytes). This issue has been assigned CVE-2017-1000410 Cc: Marcel Holtmann Cc: Gustavo Padovan Cc: Johan Hedberg Cc: stable Signed-off-by: Ben Seri Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 43ba91c440bc..fc6615d59165 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_EFS: - remote_efs = 1; - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { + remote_efs = 1; memcpy(&efs, (void *) val, olen); + } break; case L2CAP_CONF_EWS: @@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) + if (olen == sizeof(efs)) { memcpy(&efs, (void *)val, olen); - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); + } break; case L2CAP_CONF_FCS: -- cgit v1.2.3 From b78d830f0049ef1966dc1e0ebd1ec2a594e2cf25 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:46 -0700 Subject: usbip: fix vudc_rx: harden CMD_SUBMIT path to handle malicious input Harden CMD_SUBMIT path to handle malicious input that could trigger large memory allocations. Add checks to validate transfer_buffer_length and number_of_packets to protect against bad input requesting for unbounded memory allocations. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_rx.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c index df1e30989148..1e8a23d92cb4 100644 --- a/drivers/usb/usbip/vudc_rx.c +++ b/drivers/usb/usbip/vudc_rx.c @@ -120,6 +120,25 @@ static int v_recv_cmd_submit(struct vudc *udc, urb_p->new = 1; urb_p->seqnum = pdu->base.seqnum; + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) { + /* validate packet size and number of packets */ + unsigned int maxp, packets, bytes; + + maxp = usb_endpoint_maxp(urb_p->ep->desc); + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc); + bytes = pdu->u.cmd_submit.transfer_buffer_length; + packets = DIV_ROUND_UP(bytes, maxp); + + if (pdu->u.cmd_submit.number_of_packets < 0 || + pdu->u.cmd_submit.number_of_packets > packets) { + dev_err(&udc->gadget.dev, + "CMD_SUBMIT: isoc invalid num packets %d\n", + pdu->u.cmd_submit.number_of_packets); + ret = -EMSGSIZE; + goto free_urbp; + } + } + ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type); if (ret) { usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC); -- cgit v1.2.3 From e1346fd87c71a1f61de1fe476ec8df1425ac931c Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 17:00:06 -0700 Subject: usbip: remove kernel addresses from usb device and urb debug msgs usbip_dump_usb_device() and usbip_dump_urb() print kernel addresses. Remove kernel addresses from usb device and urb debug msgs and improve the message content. Instead of printing parent device and bus addresses, print parent device and bus names. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/usbip_common.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 7b219d9109b4..ee2bbce24584 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -91,7 +91,7 @@ static void usbip_dump_usb_device(struct usb_device *udev) dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)", udev->devnum, udev->devpath, usb_speed_string(udev->speed)); - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport); + pr_debug("tt hub ttport %d\n", udev->ttport); dev_dbg(dev, " "); for (i = 0; i < 16; i++) @@ -124,12 +124,8 @@ static void usbip_dump_usb_device(struct usb_device *udev) } pr_debug("\n"); - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus); - - dev_dbg(dev, - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n", - &udev->descriptor, udev->config, - udev->actconfig, udev->rawdescriptors); + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev), + udev->bus->bus_name); dev_dbg(dev, "have_langid %d, string_langid %d\n", udev->have_langid, udev->string_langid); @@ -237,9 +233,6 @@ void usbip_dump_urb(struct urb *urb) dev = &urb->dev->dev; - dev_dbg(dev, " urb :%p\n", urb); - dev_dbg(dev, " dev :%p\n", urb->dev); - usbip_dump_usb_device(urb->dev); dev_dbg(dev, " pipe :%08x ", urb->pipe); @@ -248,11 +241,9 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " status :%d\n", urb->status); dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags); - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer); dev_dbg(dev, " transfer_buffer_length:%d\n", urb->transfer_buffer_length); dev_dbg(dev, " actual_length :%d\n", urb->actual_length); - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet); if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL) usbip_dump_usb_ctrlrequest( @@ -262,8 +253,6 @@ void usbip_dump_urb(struct urb *urb) dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets); dev_dbg(dev, " interval :%d\n", urb->interval); dev_dbg(dev, " error_count :%d\n", urb->error_count); - dev_dbg(dev, " context :%p\n", urb->context); - dev_dbg(dev, " complete :%p\n", urb->complete); } EXPORT_SYMBOL_GPL(usbip_dump_urb); -- cgit v1.2.3 From 5fd77a3a0e408c23ab4002a57db980e46bc16e72 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 22 Dec 2017 19:23:47 -0700 Subject: usbip: vudc_tx: fix v_send_ret_submit() vulnerability to null xfer buffer v_send_ret_submit() handles urb with a null transfer_buffer, when it replays a packet with potential malicious data that could contain a null buffer. Add a check for the condition when actual_length > 0 and transfer_buffer is null. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vudc_tx.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c index 1440ae0919ec..3ccb17c3e840 100644 --- a/drivers/usb/usbip/vudc_tx.c +++ b/drivers/usb/usbip/vudc_tx.c @@ -85,6 +85,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); + if (urb->actual_length > 0 && !urb->transfer_buffer) { + dev_err(&udc->gadget.dev, + "urb: actual_length %d transfer_buffer null\n", + urb->actual_length); + return -1; + } + if (urb_p->type == USB_ENDPOINT_XFER_ISOC) iovnum = 2 + urb->number_of_packets; else @@ -100,8 +107,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p) /* 1. setup usbip_header */ setup_ret_submit_pdu(&pdu_header, urb_p); - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n", - pdu_header.base.seqnum, urb); + usbip_dbg_stub_tx("setup txdata seqnum: %d\n", + pdu_header.base.seqnum); usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; -- cgit v1.2.3 From abb62c46d4949d44979fa647740feff3f7538799 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 29 Dec 2017 21:15:54 +0900 Subject: arm64: dts: uniphier: fix gpio-ranges property of PXs3 SoC This is probably a copy-paste mistake. The gpio-ranges of PXs3 is different from that of LD20. Fixes: 277b51e7050f ("arm64: dts: uniphier: add GPIO controller nodes") Signed-off-by: Masahiro Yamada Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 48e733136db4..0ac2ace82435 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -198,8 +198,8 @@ gpio-controller; #gpio-cells = <2>; gpio-ranges = <&pinctrl 0 0 0>, - <&pinctrl 96 0 0>, - <&pinctrl 160 0 0>; + <&pinctrl 104 0 0>, + <&pinctrl 168 0 0>; gpio-ranges-group-names = "gpio_range0", "gpio_range1", "gpio_range2"; -- cgit v1.2.3 From 0856655a25476d4431005e39d606e349050066b0 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 11 Dec 2017 09:52:22 +0100 Subject: wcn36xx: Fix dynamic power saving Since driver does not report hardware dynamic power saving cap, this is up to the mac80211 to manage power saving timeout and state machine, using the ieee80211 config callback to report PS changes. This patch enables/disables PS mode according to the new configuration. Remove old behaviour enabling PS mode in a static way, this make the device unusable when power save is enabled since device is forced to PS regardless RX/TX traffic. Acked-by: Bjorn Andersson Signed-off-by: Loic Poulain Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wcn36xx/main.c | 23 ++++++++++++----------- drivers/net/wireless/ath/wcn36xx/pmc.c | 6 ++++-- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index f7d228b5ba93..987f1252a3cf 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -384,6 +384,18 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed) } } + if (changed & IEEE80211_CONF_CHANGE_PS) { + list_for_each_entry(tmp, &wcn->vif_list, list) { + vif = wcn36xx_priv_to_vif(tmp); + if (hw->conf.flags & IEEE80211_CONF_PS) { + if (vif->bss_conf.ps) /* ps allowed ? */ + wcn36xx_pmc_enter_bmps_state(wcn, vif); + } else { + wcn36xx_pmc_exit_bmps_state(wcn, vif); + } + } + } + mutex_unlock(&wcn->conf_mutex); return 0; @@ -747,17 +759,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw, vif_priv->dtim_period = bss_conf->dtim_period; } - if (changed & BSS_CHANGED_PS) { - wcn36xx_dbg(WCN36XX_DBG_MAC, - "mac bss PS set %d\n", - bss_conf->ps); - if (bss_conf->ps) { - wcn36xx_pmc_enter_bmps_state(wcn, vif); - } else { - wcn36xx_pmc_exit_bmps_state(wcn, vif); - } - } - if (changed & BSS_CHANGED_BSSID) { wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n", bss_conf->bssid); diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c index 589fe5f70971..1976b80c235f 100644 --- a/drivers/net/wireless/ath/wcn36xx/pmc.c +++ b/drivers/net/wireless/ath/wcn36xx/pmc.c @@ -45,8 +45,10 @@ int wcn36xx_pmc_exit_bmps_state(struct wcn36xx *wcn, struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif); if (WCN36XX_BMPS != vif_priv->pw_state) { - wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n"); - return -EINVAL; + /* Unbalanced call or last BMPS enter failed */ + wcn36xx_dbg(WCN36XX_DBG_PMC, + "Not in BMPS mode, no need to exit\n"); + return -EALREADY; } wcn36xx_smd_exit_bmps(wcn, vif); vif_priv->pw_state = WCN36XX_FULL_POWER; -- cgit v1.2.3 From bab4a10f0dc745b3c07acb8fa5fbc4337e140f58 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 3 Jan 2018 10:38:24 -0800 Subject: ASoC: Added device tree binding for max98373 amplifier Signed-off-by: Ryan Lee Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/max98373.txt | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/max98373.txt diff --git a/Documentation/devicetree/bindings/sound/max98373.txt b/Documentation/devicetree/bindings/sound/max98373.txt new file mode 100644 index 000000000000..456cb1c59353 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/max98373.txt @@ -0,0 +1,40 @@ +Maxim Integrated MAX98373 Speaker Amplifier + +This device supports I2C. + +Required properties: + + - compatible : "maxim,max98373" + + - reg : the I2C address of the device. + +Optional properties: + + - maxim,vmon-slot-no : slot number used to send voltage information + or in inteleave mode this will be used as + interleave slot. + slot range : 0 ~ 15, Default : 0 + + - maxim,imon-slot-no : slot number used to send current information + slot range : 0 ~ 15, Default : 0 + + - maxim,spkfb-slot-no : slot number used to send speaker feedback information + slot range : 0 ~ 15, Default : 0 + + - maxim,interleave-mode : For cases where a single combined channel + for the I/V sense data is not sufficient, the device can also be configured + to share a single data output channel on alternating frames. + In this configuration, the current and voltage data will be frame interleaved + on a single output channel. + Boolean, define to enable the interleave mode, Default : false + +Example: + +codec: max98373@31 { + compatible = "maxim,max98373"; + reg = <0x31>; + maxim,vmon-slot-no = <0>; + maxim,imon-slot-no = <1>; + maxim,spkfb-slot-no = <2>; + maxim,interleave-mode; +}; -- cgit v1.2.3 From 2f3d24a1355ad32845300dfd0a375c361be7ab38 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 3 Jan 2018 10:39:17 -0800 Subject: ASoC: max98373: Added Amplifier Driver Signed-off-by: Ryan Lee Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 5 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/max98373.c | 971 ++++++++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/max98373.h | 212 ++++++++++ 4 files changed, 1190 insertions(+) create mode 100644 sound/soc/codecs/max98373.c create mode 100644 sound/soc/codecs/max98373.h diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..80af1f4d3097 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -95,6 +95,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_MAX98925 if I2C select SND_SOC_MAX98926 if I2C select SND_SOC_MAX98927 if I2C + select SND_SOC_MAX98373 if I2C select SND_SOC_MAX9850 if I2C select SND_SOC_MAX9860 if I2C select SND_SOC_MAX9768 if I2C @@ -623,6 +624,10 @@ config SND_SOC_MAX98927 tristate "Maxim Integrated MAX98927 Speaker Amplifier" depends on I2C +config SND_SOC_MAX98373 + tristate "Maxim Integrated MAX98373 Speaker Amplifier" + depends on I2C + config SND_SOC_MAX9850 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 0001069ce2a7..31a620b5e8a3 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -90,6 +90,7 @@ snd-soc-max9867-objs := max9867.o snd-soc-max98925-objs := max98925.o snd-soc-max98926-objs := max98926.o snd-soc-max98927-objs := max98927.o +snd-soc-max98373-objs := max98373.o snd-soc-max9850-objs := max9850.o snd-soc-max9860-objs := max9860.o snd-soc-mc13783-objs := mc13783.o @@ -330,6 +331,7 @@ obj-$(CONFIG_SND_SOC_MAX9867) += snd-soc-max9867.o obj-$(CONFIG_SND_SOC_MAX98925) += snd-soc-max98925.o obj-$(CONFIG_SND_SOC_MAX98926) += snd-soc-max98926.o obj-$(CONFIG_SND_SOC_MAX98927) += snd-soc-max98927.o +obj-$(CONFIG_SND_SOC_MAX98373) += snd-soc-max98373.o obj-$(CONFIG_SND_SOC_MAX9850) += snd-soc-max9850.o obj-$(CONFIG_SND_SOC_MAX9860) += snd-soc-max9860.o obj-$(CONFIG_SND_SOC_MC13783) += snd-soc-mc13783.o diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c new file mode 100644 index 000000000000..9af0d985d6e9 --- /dev/null +++ b/sound/soc/codecs/max98373.c @@ -0,0 +1,971 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017, Maxim Integrated */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "max98373.h" + +static struct reg_default max98373_reg[] = { + {MAX98373_R2000_SW_RESET, 0x00}, + {MAX98373_R2001_INT_RAW1, 0x00}, + {MAX98373_R2002_INT_RAW2, 0x00}, + {MAX98373_R2003_INT_RAW3, 0x00}, + {MAX98373_R2004_INT_STATE1, 0x00}, + {MAX98373_R2005_INT_STATE2, 0x00}, + {MAX98373_R2006_INT_STATE3, 0x00}, + {MAX98373_R2007_INT_FLAG1, 0x00}, + {MAX98373_R2008_INT_FLAG2, 0x00}, + {MAX98373_R2009_INT_FLAG3, 0x00}, + {MAX98373_R200A_INT_EN1, 0x00}, + {MAX98373_R200B_INT_EN2, 0x00}, + {MAX98373_R200C_INT_EN3, 0x00}, + {MAX98373_R200D_INT_FLAG_CLR1, 0x00}, + {MAX98373_R200E_INT_FLAG_CLR2, 0x00}, + {MAX98373_R200F_INT_FLAG_CLR3, 0x00}, + {MAX98373_R2010_IRQ_CTRL, 0x00}, + {MAX98373_R2014_THERM_WARN_THRESH, 0x10}, + {MAX98373_R2015_THERM_SHDN_THRESH, 0x27}, + {MAX98373_R2016_THERM_HYSTERESIS, 0x01}, + {MAX98373_R2017_THERM_FOLDBACK_SET, 0xC0}, + {MAX98373_R2018_THERM_FOLDBACK_EN, 0x00}, + {MAX98373_R201E_PIN_DRIVE_STRENGTH, 0x55}, + {MAX98373_R2020_PCM_TX_HIZ_EN_1, 0xFE}, + {MAX98373_R2021_PCM_TX_HIZ_EN_2, 0xFF}, + {MAX98373_R2022_PCM_TX_SRC_1, 0x00}, + {MAX98373_R2023_PCM_TX_SRC_2, 0x00}, + {MAX98373_R2024_PCM_DATA_FMT_CFG, 0xC0}, + {MAX98373_R2025_AUDIO_IF_MODE, 0x00}, + {MAX98373_R2026_PCM_CLOCK_RATIO, 0x04}, + {MAX98373_R2027_PCM_SR_SETUP_1, 0x08}, + {MAX98373_R2028_PCM_SR_SETUP_2, 0x88}, + {MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1, 0x00}, + {MAX98373_R202A_PCM_TO_SPK_MONO_MIX_2, 0x00}, + {MAX98373_R202B_PCM_RX_EN, 0x00}, + {MAX98373_R202C_PCM_TX_EN, 0x00}, + {MAX98373_R202E_ICC_RX_CH_EN_1, 0x00}, + {MAX98373_R202F_ICC_RX_CH_EN_2, 0x00}, + {MAX98373_R2030_ICC_TX_HIZ_EN_1, 0xFF}, + {MAX98373_R2031_ICC_TX_HIZ_EN_2, 0xFF}, + {MAX98373_R2032_ICC_LINK_EN_CFG, 0x30}, + {MAX98373_R2034_ICC_TX_CNTL, 0x00}, + {MAX98373_R2035_ICC_TX_EN, 0x00}, + {MAX98373_R2036_SOUNDWIRE_CTRL, 0x05}, + {MAX98373_R203D_AMP_DIG_VOL_CTRL, 0x00}, + {MAX98373_R203E_AMP_PATH_GAIN, 0x08}, + {MAX98373_R203F_AMP_DSP_CFG, 0x02}, + {MAX98373_R2040_TONE_GEN_CFG, 0x00}, + {MAX98373_R2041_AMP_CFG, 0x03}, + {MAX98373_R2042_AMP_EDGE_RATE_CFG, 0x00}, + {MAX98373_R2043_AMP_EN, 0x00}, + {MAX98373_R2046_IV_SENSE_ADC_DSP_CFG, 0x04}, + {MAX98373_R2047_IV_SENSE_ADC_EN, 0x00}, + {MAX98373_R2051_MEAS_ADC_SAMPLING_RATE, 0x00}, + {MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, 0x00}, + {MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, 0x00}, + {MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0x00}, + {MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0x00}, + {MAX98373_R2056_MEAS_ADC_PVDD_CH_EN, 0x00}, + {MAX98373_R2090_BDE_LVL_HOLD, 0x00}, + {MAX98373_R2091_BDE_GAIN_ATK_REL_RATE, 0x00}, + {MAX98373_R2092_BDE_CLIPPER_MODE, 0x00}, + {MAX98373_R2097_BDE_L1_THRESH, 0x00}, + {MAX98373_R2098_BDE_L2_THRESH, 0x00}, + {MAX98373_R2099_BDE_L3_THRESH, 0x00}, + {MAX98373_R209A_BDE_L4_THRESH, 0x00}, + {MAX98373_R209B_BDE_THRESH_HYST, 0x00}, + {MAX98373_R20A8_BDE_L1_CFG_1, 0x00}, + {MAX98373_R20A9_BDE_L1_CFG_2, 0x00}, + {MAX98373_R20AA_BDE_L1_CFG_3, 0x00}, + {MAX98373_R20AB_BDE_L2_CFG_1, 0x00}, + {MAX98373_R20AC_BDE_L2_CFG_2, 0x00}, + {MAX98373_R20AD_BDE_L2_CFG_3, 0x00}, + {MAX98373_R20AE_BDE_L3_CFG_1, 0x00}, + {MAX98373_R20AF_BDE_L3_CFG_2, 0x00}, + {MAX98373_R20B0_BDE_L3_CFG_3, 0x00}, + {MAX98373_R20B1_BDE_L4_CFG_1, 0x00}, + {MAX98373_R20B2_BDE_L4_CFG_2, 0x00}, + {MAX98373_R20B3_BDE_L4_CFG_3, 0x00}, + {MAX98373_R20B4_BDE_INFINITE_HOLD_RELEASE, 0x00}, + {MAX98373_R20B5_BDE_EN, 0x00}, + {MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0x00}, + {MAX98373_R20D1_DHT_CFG, 0x01}, + {MAX98373_R20D2_DHT_ATTACK_CFG, 0x02}, + {MAX98373_R20D3_DHT_RELEASE_CFG, 0x03}, + {MAX98373_R20D4_DHT_EN, 0x00}, + {MAX98373_R20E0_LIMITER_THRESH_CFG, 0x00}, + {MAX98373_R20E1_LIMITER_ATK_REL_RATES, 0x00}, + {MAX98373_R20E2_LIMITER_EN, 0x00}, + {MAX98373_R20FE_DEVICE_AUTO_RESTART_CFG, 0x00}, + {MAX98373_R20FF_GLOBAL_SHDN, 0x00}, + {MAX98373_R21FF_REV_ID, 0x42}, +}; + +static int max98373_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + unsigned int format = 0; + unsigned int invert = 0; + + dev_dbg(codec->dev, "%s: fmt 0x%08X\n", __func__, fmt); + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + invert = MAX98373_PCM_MODE_CFG_PCM_BCLKEDGE; + break; + default: + dev_err(codec->dev, "DAI invert mode unsupported\n"); + return -EINVAL; + } + + regmap_update_bits(max98373->regmap, + MAX98373_R2026_PCM_CLOCK_RATIO, + MAX98373_PCM_MODE_CFG_PCM_BCLKEDGE, + invert); + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + format = MAX98373_PCM_FORMAT_I2S; + break; + case SND_SOC_DAIFMT_LEFT_J: + format = MAX98373_PCM_FORMAT_LJ; + break; + case SND_SOC_DAIFMT_DSP_A: + format = MAX98373_PCM_FORMAT_TDM_MODE1; + break; + case SND_SOC_DAIFMT_DSP_B: + format = MAX98373_PCM_FORMAT_TDM_MODE0; + break; + default: + return -EINVAL; + } + + regmap_update_bits(max98373->regmap, + MAX98373_R2024_PCM_DATA_FMT_CFG, + MAX98373_PCM_MODE_CFG_FORMAT_MASK, + format << MAX98373_PCM_MODE_CFG_FORMAT_SHIFT); + + return 0; +} + +/* BCLKs per LRCLK */ +static const int bclk_sel_table[] = { + 32, 48, 64, 96, 128, 192, 256, 384, 512, 320, +}; + +static int max98373_get_bclk_sel(int bclk) +{ + int i; + /* match BCLKs per LRCLK */ + for (i = 0; i < ARRAY_SIZE(bclk_sel_table); i++) { + if (bclk_sel_table[i] == bclk) + return i + 2; + } + return 0; +} +static int max98373_set_clock(struct snd_soc_codec *codec, + struct snd_pcm_hw_params *params) +{ + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + /* BCLK/LRCLK ratio calculation */ + int blr_clk_ratio = params_channels(params) * max98373->ch_size; + int value; + + if (!max98373->tdm_mode) { + /* BCLK configuration */ + value = max98373_get_bclk_sel(blr_clk_ratio); + if (!value) { + dev_err(codec->dev, "format unsupported %d\n", + params_format(params)); + return -EINVAL; + } + + regmap_update_bits(max98373->regmap, + MAX98373_R2026_PCM_CLOCK_RATIO, + MAX98373_PCM_CLK_SETUP_BSEL_MASK, + value); + } + return 0; +} + +static int max98373_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + unsigned int sampling_rate = 0; + unsigned int chan_sz = 0; + + /* pcm mode configuration */ + switch (snd_pcm_format_width(params_format(params))) { + case 16: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_16; + break; + case 24: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_24; + break; + case 32: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_32; + break; + default: + dev_err(codec->dev, "format unsupported %d\n", + params_format(params)); + goto err; + } + + max98373->ch_size = snd_pcm_format_width(params_format(params)); + + regmap_update_bits(max98373->regmap, + MAX98373_R2024_PCM_DATA_FMT_CFG, + MAX98373_PCM_MODE_CFG_CHANSZ_MASK, chan_sz); + + dev_dbg(codec->dev, "format supported %d", + params_format(params)); + + /* sampling rate configuration */ + switch (params_rate(params)) { + case 8000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_8000; + break; + case 11025: + sampling_rate = MAX98373_PCM_SR_SET1_SR_11025; + break; + case 12000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_12000; + break; + case 16000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_16000; + break; + case 22050: + sampling_rate = MAX98373_PCM_SR_SET1_SR_22050; + break; + case 24000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_24000; + break; + case 32000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_32000; + break; + case 44100: + sampling_rate = MAX98373_PCM_SR_SET1_SR_44100; + break; + case 48000: + sampling_rate = MAX98373_PCM_SR_SET1_SR_48000; + break; + default: + dev_err(codec->dev, "rate %d not supported\n", + params_rate(params)); + goto err; + } + /* set DAI_SR to correct LRCLK frequency */ + regmap_update_bits(max98373->regmap, + MAX98373_R2027_PCM_SR_SETUP_1, + MAX98373_PCM_SR_SET1_SR_MASK, + sampling_rate); + regmap_update_bits(max98373->regmap, + MAX98373_R2028_PCM_SR_SETUP_2, + MAX98373_PCM_SR_SET2_SR_MASK, + sampling_rate << MAX98373_PCM_SR_SET2_SR_SHIFT); + + /* set sampling rate of IV */ + if (max98373->interleave_mode && + sampling_rate > MAX98373_PCM_SR_SET1_SR_16000) + regmap_update_bits(max98373->regmap, + MAX98373_R2028_PCM_SR_SETUP_2, + MAX98373_PCM_SR_SET2_IVADC_SR_MASK, + sampling_rate - 3); + else + regmap_update_bits(max98373->regmap, + MAX98373_R2028_PCM_SR_SETUP_2, + MAX98373_PCM_SR_SET2_IVADC_SR_MASK, + sampling_rate); + + return max98373_set_clock(codec, params); +err: + return -EINVAL; +} + +static int max98373_dai_tdm_slot(struct snd_soc_dai *dai, + unsigned int tx_mask, unsigned int rx_mask, + int slots, int slot_width) +{ + struct snd_soc_codec *codec = dai->codec; + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + int bsel = 0; + unsigned int chan_sz = 0; + unsigned int mask; + int x, slot_found; + + max98373->tdm_mode = true; + + /* BCLK configuration */ + bsel = max98373_get_bclk_sel(slots * slot_width); + if (bsel == 0) { + dev_err(codec->dev, "BCLK %d not supported\n", + slots * slot_width); + return -EINVAL; + } + + regmap_update_bits(max98373->regmap, + MAX98373_R2026_PCM_CLOCK_RATIO, + MAX98373_PCM_CLK_SETUP_BSEL_MASK, + bsel); + + /* Channel size configuration */ + switch (slot_width) { + case 16: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_16; + break; + case 24: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_24; + break; + case 32: + chan_sz = MAX98373_PCM_MODE_CFG_CHANSZ_32; + break; + default: + dev_err(codec->dev, "format unsupported %d\n", + slot_width); + return -EINVAL; + } + + regmap_update_bits(max98373->regmap, + MAX98373_R2024_PCM_DATA_FMT_CFG, + MAX98373_PCM_MODE_CFG_CHANSZ_MASK, chan_sz); + + /* Rx slot configuration */ + slot_found = 0; + mask = rx_mask; + for (x = 0 ; x < 16 ; x++, mask >>= 1) { + if (mask & 0x1) { + if (slot_found == 0) + regmap_update_bits(max98373->regmap, + MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1, + MAX98373_PCM_TO_SPK_CH0_SRC_MASK, x); + else + regmap_write(max98373->regmap, + MAX98373_R202A_PCM_TO_SPK_MONO_MIX_2, + x); + slot_found++; + if (slot_found > 1) + break; + } + } + + /* Tx slot Hi-Z configuration */ + regmap_write(max98373->regmap, + MAX98373_R2020_PCM_TX_HIZ_EN_1, + ~tx_mask & 0xFF); + regmap_write(max98373->regmap, + MAX98373_R2021_PCM_TX_HIZ_EN_2, + (~tx_mask & 0xFF00) >> 8); + + return 0; +} + +#define MAX98373_RATES SNDRV_PCM_RATE_8000_96000 + +#define MAX98373_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +static const struct snd_soc_dai_ops max98373_dai_ops = { + .set_fmt = max98373_dai_set_fmt, + .hw_params = max98373_dai_hw_params, + .set_tdm_slot = max98373_dai_tdm_slot, +}; + +static int max98373_dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + regmap_update_bits(max98373->regmap, + MAX98373_R20FF_GLOBAL_SHDN, + MAX98373_GLOBAL_EN_MASK, 1); + break; + case SND_SOC_DAPM_POST_PMD: + regmap_update_bits(max98373->regmap, + MAX98373_R20FF_GLOBAL_SHDN, + MAX98373_GLOBAL_EN_MASK, 0); + max98373->tdm_mode = 0; + break; + default: + return 0; + } + return 0; +} + +static const char * const max98373_switch_text[] = { + "Left", "Right", "LeftRight"}; + +static const struct soc_enum dai_sel_enum = + SOC_ENUM_SINGLE(MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1, + MAX98373_PCM_TO_SPK_MONOMIX_CFG_SHIFT, + 3, max98373_switch_text); + +static const struct snd_kcontrol_new max98373_dai_controls = + SOC_DAPM_ENUM("DAI Sel", dai_sel_enum); + +static const struct snd_kcontrol_new max98373_vi_control = + SOC_DAPM_SINGLE("Switch", MAX98373_R202C_PCM_TX_EN, 0, 1, 0); + +static const struct snd_kcontrol_new max98373_spkfb_control = + SOC_DAPM_SINGLE("Switch", MAX98373_R2043_AMP_EN, 1, 1, 0); + +static const struct snd_soc_dapm_widget max98373_dapm_widgets[] = { +SND_SOC_DAPM_DAC_E("Amp Enable", "HiFi Playback", + MAX98373_R202B_PCM_RX_EN, 0, 0, max98373_dac_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), +SND_SOC_DAPM_MUX("DAI Sel Mux", SND_SOC_NOPM, 0, 0, + &max98373_dai_controls), +SND_SOC_DAPM_OUTPUT("BE_OUT"), +SND_SOC_DAPM_AIF_OUT("Voltage Sense", "HiFi Capture", 0, + MAX98373_R2047_IV_SENSE_ADC_EN, 0, 0), +SND_SOC_DAPM_AIF_OUT("Current Sense", "HiFi Capture", 0, + MAX98373_R2047_IV_SENSE_ADC_EN, 1, 0), +SND_SOC_DAPM_AIF_OUT("Speaker FB Sense", "HiFi Capture", 0, + SND_SOC_NOPM, 0, 0), +SND_SOC_DAPM_SWITCH("VI Sense", SND_SOC_NOPM, 0, 0, + &max98373_vi_control), +SND_SOC_DAPM_SWITCH("SpkFB Sense", SND_SOC_NOPM, 0, 0, + &max98373_spkfb_control), +SND_SOC_DAPM_SIGGEN("VMON"), +SND_SOC_DAPM_SIGGEN("IMON"), +SND_SOC_DAPM_SIGGEN("FBMON"), +}; + +static DECLARE_TLV_DB_SCALE(max98373_digital_tlv, 0, -50, 0); +static const DECLARE_TLV_DB_RANGE(max98373_spk_tlv, + 0, 8, TLV_DB_SCALE_ITEM(0, 50, 0), + 9, 10, TLV_DB_SCALE_ITEM(500, 100, 0), +); +static const DECLARE_TLV_DB_RANGE(max98373_spkgain_max_tlv, + 0, 9, TLV_DB_SCALE_ITEM(800, 100, 0), +); +static const DECLARE_TLV_DB_RANGE(max98373_dht_step_size_tlv, + 0, 1, TLV_DB_SCALE_ITEM(25, 25, 0), + 2, 4, TLV_DB_SCALE_ITEM(100, 100, 0), +); +static const DECLARE_TLV_DB_RANGE(max98373_dht_spkgain_min_tlv, + 0, 9, TLV_DB_SCALE_ITEM(800, 100, 0), +); +static const DECLARE_TLV_DB_RANGE(max98373_dht_rotation_point_tlv, + 0, 1, TLV_DB_SCALE_ITEM(-50, -50, 0), + 2, 7, TLV_DB_SCALE_ITEM(-200, -100, 0), + 8, 9, TLV_DB_SCALE_ITEM(-1000, -200, 0), + 10, 11, TLV_DB_SCALE_ITEM(-1500, -300, 0), + 12, 13, TLV_DB_SCALE_ITEM(-2000, -200, 0), + 14, 15, TLV_DB_SCALE_ITEM(-2500, -500, 0), +); +static const DECLARE_TLV_DB_RANGE(max98373_limiter_thresh_tlv, + 0, 15, TLV_DB_SCALE_ITEM(0, -100, 0), +); + +static const DECLARE_TLV_DB_RANGE(max98373_bde_gain_tlv, + 0, 60, TLV_DB_SCALE_ITEM(0, -25, 0), +); + +static bool max98373_readable_register(struct device *dev, unsigned int reg) +{ + switch (reg) { + case MAX98373_R2001_INT_RAW1 ... MAX98373_R200C_INT_EN3: + case MAX98373_R2010_IRQ_CTRL: + case MAX98373_R2014_THERM_WARN_THRESH + ... MAX98373_R2018_THERM_FOLDBACK_EN: + case MAX98373_R201E_PIN_DRIVE_STRENGTH + ... MAX98373_R2036_SOUNDWIRE_CTRL: + case MAX98373_R203D_AMP_DIG_VOL_CTRL ... MAX98373_R2043_AMP_EN: + case MAX98373_R2046_IV_SENSE_ADC_DSP_CFG + ... MAX98373_R2047_IV_SENSE_ADC_EN: + case MAX98373_R2051_MEAS_ADC_SAMPLING_RATE + ... MAX98373_R2056_MEAS_ADC_PVDD_CH_EN: + case MAX98373_R2090_BDE_LVL_HOLD ... MAX98373_R2092_BDE_CLIPPER_MODE: + case MAX98373_R2097_BDE_L1_THRESH + ... MAX98373_R209B_BDE_THRESH_HYST: + case MAX98373_R20A8_BDE_L1_CFG_1 ... MAX98373_R20B3_BDE_L4_CFG_3: + case MAX98373_R20B5_BDE_EN ... MAX98373_R20B6_BDE_CUR_STATE_READBACK: + case MAX98373_R20D1_DHT_CFG ... MAX98373_R20D4_DHT_EN: + case MAX98373_R20E0_LIMITER_THRESH_CFG ... MAX98373_R20E2_LIMITER_EN: + case MAX98373_R20FE_DEVICE_AUTO_RESTART_CFG + ... MAX98373_R20FF_GLOBAL_SHDN: + case MAX98373_R21FF_REV_ID: + return true; + default: + return false; + } +}; + +static bool max98373_volatile_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case MAX98373_R2000_SW_RESET ... MAX98373_R2009_INT_FLAG3: + case MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK: + case MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK: + case MAX98373_R20B6_BDE_CUR_STATE_READBACK: + case MAX98373_R21FF_REV_ID: + return true; + default: + return false; + } +} + +static const char * const max98373_output_voltage_lvl_text[] = { + "5.43V", "6.09V", "6.83V", "7.67V", "8.60V", + "9.65V", "10.83V", "12.15V", "13.63V", "15.29V" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_out_volt_enum, + MAX98373_R203E_AMP_PATH_GAIN, 0, + max98373_output_voltage_lvl_text); + +static const char * const max98373_dht_attack_rate_text[] = { + "17.5us", "35us", "70us", "140us", + "280us", "560us", "1120us", "2240us" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_dht_attack_rate_enum, + MAX98373_R20D2_DHT_ATTACK_CFG, 0, + max98373_dht_attack_rate_text); + +static const char * const max98373_dht_release_rate_text[] = { + "45ms", "225ms", "450ms", "1150ms", + "2250ms", "3100ms", "4500ms", "6750ms" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_dht_release_rate_enum, + MAX98373_R20D3_DHT_RELEASE_CFG, 0, + max98373_dht_release_rate_text); + +static const char * const max98373_limiter_attack_rate_text[] = { + "10us", "20us", "40us", "80us", + "160us", "320us", "640us", "1.28ms", + "2.56ms", "5.12ms", "10.24ms", "20.48ms", + "40.96ms", "81.92ms", "16.384ms", "32.768ms" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_limiter_attack_rate_enum, + MAX98373_R20E1_LIMITER_ATK_REL_RATES, 4, + max98373_limiter_attack_rate_text); + +static const char * const max98373_limiter_release_rate_text[] = { + "40us", "80us", "160us", "320us", + "640us", "1.28ms", "2.56ms", "5.120ms", + "10.24ms", "20.48ms", "40.96ms", "81.92ms", + "163.84ms", "327.68ms", "655.36ms", "1310.72ms" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_limiter_release_rate_enum, + MAX98373_R20E1_LIMITER_ATK_REL_RATES, 0, + max98373_limiter_release_rate_text); + +static const char * const max98373_ADC_samplerate_text[] = { + "333kHz", "192kHz", "64kHz", "48kHz" +}; + +static SOC_ENUM_SINGLE_DECL(max98373_adc_samplerate_enum, + MAX98373_R2051_MEAS_ADC_SAMPLING_RATE, 0, + max98373_ADC_samplerate_text); + +static const struct snd_kcontrol_new max98373_snd_controls[] = { +SOC_SINGLE("Digital Vol Sel Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_VOL_SEL_SHIFT, 1, 0), +SOC_SINGLE("Volume Location Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_VOL_SEL_SHIFT, 1, 0), +SOC_SINGLE("Ramp Up Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_DSP_CFG_RMP_UP_SHIFT, 1, 0), +SOC_SINGLE("Ramp Down Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_DSP_CFG_RMP_DN_SHIFT, 1, 0), +SOC_SINGLE("CLK Monitor Switch", MAX98373_R20FE_DEVICE_AUTO_RESTART_CFG, + MAX98373_CLOCK_MON_SHIFT, 1, 0), +SOC_SINGLE("Dither Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_DSP_CFG_DITH_SHIFT, 1, 0), +SOC_SINGLE("DC Blocker Switch", MAX98373_R203F_AMP_DSP_CFG, + MAX98373_AMP_DSP_CFG_DCBLK_SHIFT, 1, 0), +SOC_SINGLE_TLV("Digital Volume", MAX98373_R203D_AMP_DIG_VOL_CTRL, + 0, 0x7F, 0, max98373_digital_tlv), +SOC_SINGLE_TLV("Speaker Volume", MAX98373_R203E_AMP_PATH_GAIN, + MAX98373_SPK_DIGI_GAIN_SHIFT, 10, 0, max98373_spk_tlv), +SOC_SINGLE_TLV("FS Max Volume", MAX98373_R203E_AMP_PATH_GAIN, + MAX98373_FS_GAIN_MAX_SHIFT, 9, 0, max98373_spkgain_max_tlv), +SOC_ENUM("Output Voltage", max98373_out_volt_enum), +/* Dynamic Headroom Tracking */ +SOC_SINGLE("DHT Switch", MAX98373_R20D4_DHT_EN, + MAX98373_DHT_EN_SHIFT, 1, 0), +SOC_SINGLE_TLV("DHT Gain Min", MAX98373_R20D1_DHT_CFG, + MAX98373_DHT_SPK_GAIN_MIN_SHIFT, 9, 0, max98373_dht_spkgain_min_tlv), +SOC_SINGLE_TLV("DHT Rot Pnt", MAX98373_R20D1_DHT_CFG, + MAX98373_DHT_ROT_PNT_SHIFT, 15, 0, max98373_dht_rotation_point_tlv), +SOC_SINGLE_TLV("DHT Attack Step", MAX98373_R20D2_DHT_ATTACK_CFG, + MAX98373_DHT_ATTACK_STEP_SHIFT, 4, 0, max98373_dht_step_size_tlv), +SOC_SINGLE_TLV("DHT Release Step", MAX98373_R20D3_DHT_RELEASE_CFG, + MAX98373_DHT_RELEASE_STEP_SHIFT, 4, 0, max98373_dht_step_size_tlv), +SOC_ENUM("DHT Attack Rate", max98373_dht_attack_rate_enum), +SOC_ENUM("DHT Release Rate", max98373_dht_release_rate_enum), +/* ADC configuration */ +SOC_SINGLE("ADC PVDD CH Switch", MAX98373_R2056_MEAS_ADC_PVDD_CH_EN, 0, 1, 0), +SOC_SINGLE("ADC PVDD FLT Switch", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, + MAX98373_FLT_EN_SHIFT, 1, 0), +SOC_SINGLE("ADC TEMP FLT Switch", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, + MAX98373_FLT_EN_SHIFT, 1, 0), +SOC_SINGLE("ADC PVDD", MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0, 0xFF, 0), +SOC_SINGLE("ADC TEMP", MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0, 0xFF, 0), +SOC_SINGLE("ADC PVDD FLT Coeff", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, + 0, 0x3, 0), +SOC_SINGLE("ADC TEMP FLT Coeff", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, + 0, 0x3, 0), +SOC_ENUM("ADC SampleRate", max98373_adc_samplerate_enum), +/* Brownout Detection Engine */ +SOC_SINGLE("BDE Switch", MAX98373_R20B5_BDE_EN, MAX98373_BDE_EN_SHIFT, 1, 0), +SOC_SINGLE("BDE LVL4 Mute Switch", MAX98373_R20B2_BDE_L4_CFG_2, + MAX98373_LVL4_MUTE_EN_SHIFT, 1, 0), +SOC_SINGLE("BDE LVL4 Hold Switch", MAX98373_R20B2_BDE_L4_CFG_2, + MAX98373_LVL4_HOLD_EN_SHIFT, 1, 0), +SOC_SINGLE("BDE LVL1 Thresh", MAX98373_R2097_BDE_L1_THRESH, 0, 0xFF, 0), +SOC_SINGLE("BDE LVL2 Thresh", MAX98373_R2098_BDE_L2_THRESH, 0, 0xFF, 0), +SOC_SINGLE("BDE LVL3 Thresh", MAX98373_R2099_BDE_L3_THRESH, 0, 0xFF, 0), +SOC_SINGLE("BDE LVL4 Thresh", MAX98373_R209A_BDE_L4_THRESH, 0, 0xFF, 0), +SOC_SINGLE("BDE Active Level", MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0, 8, 0), +SOC_SINGLE("BDE Clip Mode Switch", MAX98373_R2092_BDE_CLIPPER_MODE, 0, 1, 0), +SOC_SINGLE("BDE Thresh Hysteresis", MAX98373_R209B_BDE_THRESH_HYST, 0, 0xFF, 0), +SOC_SINGLE("BDE Hold Time", MAX98373_R2090_BDE_LVL_HOLD, 0, 0xFF, 0), +SOC_SINGLE("BDE Attack Rate", MAX98373_R2091_BDE_GAIN_ATK_REL_RATE, 4, 0xF, 0), +SOC_SINGLE("BDE Release Rate", MAX98373_R2091_BDE_GAIN_ATK_REL_RATE, 0, 0xF, 0), +SOC_SINGLE_TLV("BDE LVL1 Clip Thresh", MAX98373_R20A9_BDE_L1_CFG_2, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL2 Clip Thresh", MAX98373_R20AC_BDE_L2_CFG_2, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL3 Clip Thresh", MAX98373_R20AF_BDE_L3_CFG_2, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL4 Clip Thresh", MAX98373_R20B2_BDE_L4_CFG_2, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL1 Clip Gain Reduct", MAX98373_R20AA_BDE_L1_CFG_3, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL2 Clip Gain Reduct", MAX98373_R20AD_BDE_L2_CFG_3, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL3 Clip Gain Reduct", MAX98373_R20B0_BDE_L3_CFG_3, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL4 Clip Gain Reduct", MAX98373_R20B3_BDE_L4_CFG_3, + 0, 0x3C, 0, max98373_bde_gain_tlv), +SOC_SINGLE_TLV("BDE LVL1 Limiter Thresh", MAX98373_R20A8_BDE_L1_CFG_1, + 0, 0xF, 0, max98373_limiter_thresh_tlv), +SOC_SINGLE_TLV("BDE LVL2 Limiter Thresh", MAX98373_R20AB_BDE_L2_CFG_1, + 0, 0xF, 0, max98373_limiter_thresh_tlv), +SOC_SINGLE_TLV("BDE LVL3 Limiter Thresh", MAX98373_R20AE_BDE_L3_CFG_1, + 0, 0xF, 0, max98373_limiter_thresh_tlv), +SOC_SINGLE_TLV("BDE LVL4 Limiter Thresh", MAX98373_R20B1_BDE_L4_CFG_1, + 0, 0xF, 0, max98373_limiter_thresh_tlv), +/* Limiter */ +SOC_SINGLE("Limiter Switch", MAX98373_R20E2_LIMITER_EN, + MAX98373_LIMITER_EN_SHIFT, 1, 0), +SOC_SINGLE("Limiter Src Switch", MAX98373_R20E0_LIMITER_THRESH_CFG, + MAX98373_LIMITER_THRESH_SRC_SHIFT, 1, 0), +SOC_SINGLE_TLV("Limiter Thresh", MAX98373_R20E0_LIMITER_THRESH_CFG, + MAX98373_LIMITER_THRESH_SHIFT, 15, 0, max98373_limiter_thresh_tlv), +SOC_ENUM("Limiter Attack Rate", max98373_limiter_attack_rate_enum), +SOC_ENUM("Limiter Release Rate", max98373_limiter_release_rate_enum), +}; + +static const struct snd_soc_dapm_route max98373_audio_map[] = { + /* Plabyack */ + {"DAI Sel Mux", "Left", "Amp Enable"}, + {"DAI Sel Mux", "Right", "Amp Enable"}, + {"DAI Sel Mux", "LeftRight", "Amp Enable"}, + {"BE_OUT", NULL, "DAI Sel Mux"}, + /* Capture */ + { "VI Sense", "Switch", "VMON" }, + { "VI Sense", "Switch", "IMON" }, + { "SpkFB Sense", "Switch", "FBMON" }, + { "Voltage Sense", NULL, "VI Sense" }, + { "Current Sense", NULL, "VI Sense" }, + { "Speaker FB Sense", NULL, "SpkFB Sense" }, +}; + +static struct snd_soc_dai_driver max98373_dai[] = { + { + .name = "max98373-aif1", + .playback = { + .stream_name = "HiFi Playback", + .channels_min = 1, + .channels_max = 2, + .rates = MAX98373_RATES, + .formats = MAX98373_FORMATS, + }, + .capture = { + .stream_name = "HiFi Capture", + .channels_min = 1, + .channels_max = 2, + .rates = MAX98373_RATES, + .formats = MAX98373_FORMATS, + }, + .ops = &max98373_dai_ops, + } +}; + +static int max98373_probe(struct snd_soc_codec *codec) +{ + struct max98373_priv *max98373 = snd_soc_codec_get_drvdata(codec); + + codec->control_data = max98373->regmap; + + /* Software Reset */ + regmap_write(max98373->regmap, + MAX98373_R2000_SW_RESET, MAX98373_SOFT_RESET); + + /* IV default slot configuration */ + regmap_write(max98373->regmap, + MAX98373_R2020_PCM_TX_HIZ_EN_1, + 0xFF); + regmap_write(max98373->regmap, + MAX98373_R2021_PCM_TX_HIZ_EN_2, + 0xFF); + /* L/R mix configuration */ + regmap_write(max98373->regmap, + MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1, + 0x80); + regmap_write(max98373->regmap, + MAX98373_R202A_PCM_TO_SPK_MONO_MIX_2, + 0x1); + /* Set inital volume (0dB) */ + regmap_write(max98373->regmap, + MAX98373_R203D_AMP_DIG_VOL_CTRL, + 0x00); + regmap_write(max98373->regmap, + MAX98373_R203E_AMP_PATH_GAIN, + 0x00); + /* Enable DC blocker */ + regmap_write(max98373->regmap, + MAX98373_R203F_AMP_DSP_CFG, + 0x3); + /* Enable IMON VMON DC blocker */ + regmap_write(max98373->regmap, + MAX98373_R2046_IV_SENSE_ADC_DSP_CFG, + 0x7); + /* voltage, current slot configuration */ + regmap_write(max98373->regmap, + MAX98373_R2022_PCM_TX_SRC_1, + (max98373->i_slot << MAX98373_PCM_TX_CH_SRC_A_I_SHIFT | + max98373->v_slot) & 0xFF); + if (max98373->v_slot < 8) + regmap_update_bits(max98373->regmap, + MAX98373_R2020_PCM_TX_HIZ_EN_1, + 1 << max98373->v_slot, 0); + else + regmap_update_bits(max98373->regmap, + MAX98373_R2021_PCM_TX_HIZ_EN_2, + 1 << (max98373->v_slot - 8), 0); + + if (max98373->i_slot < 8) + regmap_update_bits(max98373->regmap, + MAX98373_R2020_PCM_TX_HIZ_EN_1, + 1 << max98373->i_slot, 0); + else + regmap_update_bits(max98373->regmap, + MAX98373_R2021_PCM_TX_HIZ_EN_2, + 1 << (max98373->i_slot - 8), 0); + + /* speaker feedback slot configuration */ + regmap_write(max98373->regmap, + MAX98373_R2023_PCM_TX_SRC_2, + max98373->spkfb_slot & 0xFF); + + /* Set interleave mode */ + if (max98373->interleave_mode) + regmap_update_bits(max98373->regmap, + MAX98373_R2024_PCM_DATA_FMT_CFG, + MAX98373_PCM_TX_CH_INTERLEAVE_MASK, + MAX98373_PCM_TX_CH_INTERLEAVE_MASK); + + /* Speaker enable */ + regmap_update_bits(max98373->regmap, + MAX98373_R2043_AMP_EN, + MAX98373_SPK_EN_MASK, 1); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int max98373_suspend(struct device *dev) +{ + struct max98373_priv *max98373 = dev_get_drvdata(dev); + + regcache_cache_only(max98373->regmap, true); + regcache_mark_dirty(max98373->regmap); + return 0; +} +static int max98373_resume(struct device *dev) +{ + struct max98373_priv *max98373 = dev_get_drvdata(dev); + + regmap_write(max98373->regmap, + MAX98373_R2000_SW_RESET, MAX98373_SOFT_RESET); + regcache_cache_only(max98373->regmap, false); + regcache_sync(max98373->regmap); + return 0; +} +#endif + +static const struct dev_pm_ops max98373_pm = { + SET_SYSTEM_SLEEP_PM_OPS(max98373_suspend, max98373_resume) +}; + +static const struct snd_soc_codec_driver soc_codec_dev_max98373 = { + .probe = max98373_probe, + .component_driver = { + .controls = max98373_snd_controls, + .num_controls = ARRAY_SIZE(max98373_snd_controls), + .dapm_widgets = max98373_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(max98373_dapm_widgets), + .dapm_routes = max98373_audio_map, + .num_dapm_routes = ARRAY_SIZE(max98373_audio_map), + }, +}; + +static const struct regmap_config max98373_regmap = { + .reg_bits = 16, + .val_bits = 8, + .max_register = MAX98373_R21FF_REV_ID, + .reg_defaults = max98373_reg, + .num_reg_defaults = ARRAY_SIZE(max98373_reg), + .readable_reg = max98373_readable_register, + .volatile_reg = max98373_volatile_reg, + .cache_type = REGCACHE_RBTREE, +}; + +static void max98373_slot_config(struct i2c_client *i2c, + struct max98373_priv *max98373) +{ + int value; + struct device *dev = &i2c->dev; + + if (!device_property_read_u32(dev, "maxim,vmon-slot-no", &value)) + max98373->v_slot = value & 0xF; + else + max98373->v_slot = 0; + + if (!device_property_read_u32(dev, "maxim,imon-slot-no", &value)) + max98373->i_slot = value & 0xF; + else + max98373->i_slot = 1; + + if (!device_property_read_u32(dev, "maxim,spkfb-slot-no", &value)) + max98373->spkfb_slot = value & 0xF; + else + max98373->spkfb_slot = 2; +} + +static int max98373_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + + int ret = 0; + int reg = 0; + struct max98373_priv *max98373 = NULL; + + max98373 = devm_kzalloc(&i2c->dev, sizeof(*max98373), GFP_KERNEL); + + if (!max98373) { + ret = -ENOMEM; + return ret; + } + i2c_set_clientdata(i2c, max98373); + + /* update interleave mode info */ + if (device_property_read_bool(&i2c->dev, "maxim,interleave_mode")) + max98373->interleave_mode = 1; + else + max98373->interleave_mode = 0; + + + /* regmap initialization */ + max98373->regmap + = devm_regmap_init_i2c(i2c, &max98373_regmap); + if (IS_ERR(max98373->regmap)) { + ret = PTR_ERR(max98373->regmap); + dev_err(&i2c->dev, + "Failed to allocate regmap: %d\n", ret); + return ret; + } + + /* Check Revision ID */ + ret = regmap_read(max98373->regmap, + MAX98373_R21FF_REV_ID, ®); + if (ret < 0) { + dev_err(&i2c->dev, + "Failed to read: 0x%02X\n", MAX98373_R21FF_REV_ID); + return ret; + } + dev_info(&i2c->dev, "MAX98373 revisionID: 0x%02X\n", reg); + + /* voltage/current slot configuration */ + max98373_slot_config(i2c, max98373); + + /* codec registeration */ + ret = snd_soc_register_codec(&i2c->dev, &soc_codec_dev_max98373, + max98373_dai, ARRAY_SIZE(max98373_dai)); + if (ret < 0) + dev_err(&i2c->dev, "Failed to register codec: %d\n", ret); + + return ret; +} + +static int max98373_i2c_remove(struct i2c_client *client) +{ + snd_soc_unregister_codec(&client->dev); + return 0; +} + +static const struct i2c_device_id max98373_i2c_id[] = { + { "max98373", 0}, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, max98373_i2c_id); + +#if defined(CONFIG_OF) +static const struct of_device_id max98373_of_match[] = { + { .compatible = "maxim,max98373", }, + { } +}; +MODULE_DEVICE_TABLE(of, max98373_of_match); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id max98373_acpi_match[] = { + { "MX98373", 0 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, max98373_acpi_match); +#endif + +static struct i2c_driver max98373_i2c_driver = { + .driver = { + .name = "max98373", + .of_match_table = of_match_ptr(max98373_of_match), + .acpi_match_table = ACPI_PTR(max98373_acpi_match), + .pm = &max98373_pm, + }, + .probe = max98373_i2c_probe, + .remove = max98373_i2c_remove, + .id_table = max98373_i2c_id, +}; + +module_i2c_driver(max98373_i2c_driver) + +MODULE_DESCRIPTION("ALSA SoC MAX98373 driver"); +MODULE_AUTHOR("Ryan Lee "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/max98373.h b/sound/soc/codecs/max98373.h new file mode 100644 index 000000000000..d0b359d0cf8c --- /dev/null +++ b/sound/soc/codecs/max98373.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2017, Maxim Integrated */ +#ifndef _MAX98373_H +#define _MAX98373_H + +#define MAX98373_R2000_SW_RESET 0x2000 +#define MAX98373_R2001_INT_RAW1 0x2001 +#define MAX98373_R2002_INT_RAW2 0x2002 +#define MAX98373_R2003_INT_RAW3 0x2003 +#define MAX98373_R2004_INT_STATE1 0x2004 +#define MAX98373_R2005_INT_STATE2 0x2005 +#define MAX98373_R2006_INT_STATE3 0x2006 +#define MAX98373_R2007_INT_FLAG1 0x2007 +#define MAX98373_R2008_INT_FLAG2 0x2008 +#define MAX98373_R2009_INT_FLAG3 0x2009 +#define MAX98373_R200A_INT_EN1 0x200A +#define MAX98373_R200B_INT_EN2 0x200B +#define MAX98373_R200C_INT_EN3 0x200C +#define MAX98373_R200D_INT_FLAG_CLR1 0x200D +#define MAX98373_R200E_INT_FLAG_CLR2 0x200E +#define MAX98373_R200F_INT_FLAG_CLR3 0x200F +#define MAX98373_R2010_IRQ_CTRL 0x2010 +#define MAX98373_R2014_THERM_WARN_THRESH 0x2014 +#define MAX98373_R2015_THERM_SHDN_THRESH 0x2015 +#define MAX98373_R2016_THERM_HYSTERESIS 0x2016 +#define MAX98373_R2017_THERM_FOLDBACK_SET 0x2017 +#define MAX98373_R2018_THERM_FOLDBACK_EN 0x2018 +#define MAX98373_R201E_PIN_DRIVE_STRENGTH 0x201E +#define MAX98373_R2020_PCM_TX_HIZ_EN_1 0x2020 +#define MAX98373_R2021_PCM_TX_HIZ_EN_2 0x2021 +#define MAX98373_R2022_PCM_TX_SRC_1 0x2022 +#define MAX98373_R2023_PCM_TX_SRC_2 0x2023 +#define MAX98373_R2024_PCM_DATA_FMT_CFG 0x2024 +#define MAX98373_R2025_AUDIO_IF_MODE 0x2025 +#define MAX98373_R2026_PCM_CLOCK_RATIO 0x2026 +#define MAX98373_R2027_PCM_SR_SETUP_1 0x2027 +#define MAX98373_R2028_PCM_SR_SETUP_2 0x2028 +#define MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1 0x2029 +#define MAX98373_R202A_PCM_TO_SPK_MONO_MIX_2 0x202A +#define MAX98373_R202B_PCM_RX_EN 0x202B +#define MAX98373_R202C_PCM_TX_EN 0x202C +#define MAX98373_R202E_ICC_RX_CH_EN_1 0x202E +#define MAX98373_R202F_ICC_RX_CH_EN_2 0x202F +#define MAX98373_R2030_ICC_TX_HIZ_EN_1 0x2030 +#define MAX98373_R2031_ICC_TX_HIZ_EN_2 0x2031 +#define MAX98373_R2032_ICC_LINK_EN_CFG 0x2032 +#define MAX98373_R2034_ICC_TX_CNTL 0x2034 +#define MAX98373_R2035_ICC_TX_EN 0x2035 +#define MAX98373_R2036_SOUNDWIRE_CTRL 0x2036 +#define MAX98373_R203D_AMP_DIG_VOL_CTRL 0x203D +#define MAX98373_R203E_AMP_PATH_GAIN 0x203E +#define MAX98373_R203F_AMP_DSP_CFG 0x203F +#define MAX98373_R2040_TONE_GEN_CFG 0x2040 +#define MAX98373_R2041_AMP_CFG 0x2041 +#define MAX98373_R2042_AMP_EDGE_RATE_CFG 0x2042 +#define MAX98373_R2043_AMP_EN 0x2043 +#define MAX98373_R2046_IV_SENSE_ADC_DSP_CFG 0x2046 +#define MAX98373_R2047_IV_SENSE_ADC_EN 0x2047 +#define MAX98373_R2051_MEAS_ADC_SAMPLING_RATE 0x2051 +#define MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG 0x2052 +#define MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG 0x2053 +#define MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK 0x2054 +#define MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK 0x2055 +#define MAX98373_R2056_MEAS_ADC_PVDD_CH_EN 0x2056 +#define MAX98373_R2090_BDE_LVL_HOLD 0x2090 +#define MAX98373_R2091_BDE_GAIN_ATK_REL_RATE 0x2091 +#define MAX98373_R2092_BDE_CLIPPER_MODE 0x2092 +#define MAX98373_R2097_BDE_L1_THRESH 0x2097 +#define MAX98373_R2098_BDE_L2_THRESH 0x2098 +#define MAX98373_R2099_BDE_L3_THRESH 0x2099 +#define MAX98373_R209A_BDE_L4_THRESH 0x209A +#define MAX98373_R209B_BDE_THRESH_HYST 0x209B +#define MAX98373_R20A8_BDE_L1_CFG_1 0x20A8 +#define MAX98373_R20A9_BDE_L1_CFG_2 0x20A9 +#define MAX98373_R20AA_BDE_L1_CFG_3 0x20AA +#define MAX98373_R20AB_BDE_L2_CFG_1 0x20AB +#define MAX98373_R20AC_BDE_L2_CFG_2 0x20AC +#define MAX98373_R20AD_BDE_L2_CFG_3 0x20AD +#define MAX98373_R20AE_BDE_L3_CFG_1 0x20AE +#define MAX98373_R20AF_BDE_L3_CFG_2 0x20AF +#define MAX98373_R20B0_BDE_L3_CFG_3 0x20B0 +#define MAX98373_R20B1_BDE_L4_CFG_1 0x20B1 +#define MAX98373_R20B2_BDE_L4_CFG_2 0x20B2 +#define MAX98373_R20B3_BDE_L4_CFG_3 0x20B3 +#define MAX98373_R20B4_BDE_INFINITE_HOLD_RELEASE 0x20B4 +#define MAX98373_R20B5_BDE_EN 0x20B5 +#define MAX98373_R20B6_BDE_CUR_STATE_READBACK 0x20B6 +#define MAX98373_R20D1_DHT_CFG 0x20D1 +#define MAX98373_R20D2_DHT_ATTACK_CFG 0x20D2 +#define MAX98373_R20D3_DHT_RELEASE_CFG 0x20D3 +#define MAX98373_R20D4_DHT_EN 0x20D4 +#define MAX98373_R20E0_LIMITER_THRESH_CFG 0x20E0 +#define MAX98373_R20E1_LIMITER_ATK_REL_RATES 0x20E1 +#define MAX98373_R20E2_LIMITER_EN 0x20E2 +#define MAX98373_R20FE_DEVICE_AUTO_RESTART_CFG 0x20FE +#define MAX98373_R20FF_GLOBAL_SHDN 0x20FF +#define MAX98373_R21FF_REV_ID 0x21FF + +/* MAX98373_R2022_PCM_TX_SRC_1 */ +#define MAX98373_PCM_TX_CH_SRC_A_V_SHIFT (0) +#define MAX98373_PCM_TX_CH_SRC_A_I_SHIFT (4) + +/* MAX98373_R2024_PCM_DATA_FMT_CFG */ +#define MAX98373_PCM_MODE_CFG_FORMAT_MASK (0x7 << 3) +#define MAX98373_PCM_MODE_CFG_FORMAT_SHIFT (3) +#define MAX98373_PCM_TX_CH_INTERLEAVE_MASK (0x1 << 2) +#define MAX98373_PCM_FORMAT_I2S (0x0 << 0) +#define MAX98373_PCM_FORMAT_LJ (0x1 << 0) +#define MAX98373_PCM_FORMAT_TDM_MODE0 (0x3 << 0) +#define MAX98373_PCM_FORMAT_TDM_MODE1 (0x4 << 0) +#define MAX98373_PCM_FORMAT_TDM_MODE2 (0x5 << 0) +#define MAX98373_PCM_MODE_CFG_CHANSZ_MASK (0x3 << 6) +#define MAX98373_PCM_MODE_CFG_CHANSZ_16 (0x1 << 6) +#define MAX98373_PCM_MODE_CFG_CHANSZ_24 (0x2 << 6) +#define MAX98373_PCM_MODE_CFG_CHANSZ_32 (0x3 << 6) + +/* MAX98373_R2026_PCM_CLOCK_RATIO */ +#define MAX98373_PCM_MODE_CFG_PCM_BCLKEDGE (0x1 << 4) +#define MAX98373_PCM_CLK_SETUP_BSEL_MASK (0xF << 0) + +/* MAX98373_R2027_PCM_SR_SETUP_1 */ +#define MAX98373_PCM_SR_SET1_SR_MASK (0xF << 0) +#define MAX98373_PCM_SR_SET1_SR_8000 (0x0 << 0) +#define MAX98373_PCM_SR_SET1_SR_11025 (0x1 << 0) +#define MAX98373_PCM_SR_SET1_SR_12000 (0x2 << 0) +#define MAX98373_PCM_SR_SET1_SR_16000 (0x3 << 0) +#define MAX98373_PCM_SR_SET1_SR_22050 (0x4 << 0) +#define MAX98373_PCM_SR_SET1_SR_24000 (0x5 << 0) +#define MAX98373_PCM_SR_SET1_SR_32000 (0x6 << 0) +#define MAX98373_PCM_SR_SET1_SR_44100 (0x7 << 0) +#define MAX98373_PCM_SR_SET1_SR_48000 (0x8 << 0) + +/* MAX98373_R2028_PCM_SR_SETUP_2 */ +#define MAX98373_PCM_SR_SET2_SR_MASK (0xF << 4) +#define MAX98373_PCM_SR_SET2_SR_SHIFT (4) +#define MAX98373_PCM_SR_SET2_IVADC_SR_MASK (0xF << 0) + +/* MAX98373_R2029_PCM_TO_SPK_MONO_MIX_1 */ +#define MAX98373_PCM_TO_SPK_MONOMIX_CFG_MASK (0x3 << 6) +#define MAX98373_PCM_TO_SPK_MONOMIX_CFG_SHIFT (6) +#define MAX98373_PCM_TO_SPK_CH0_SRC_MASK (0xF << 0) + +/* MAX98373_R203E_AMP_PATH_GAIN */ +#define MAX98373_SPK_DIGI_GAIN_MASK (0xF << 4) +#define MAX98373_SPK_DIGI_GAIN_SHIFT (4) +#define MAX98373_FS_GAIN_MAX_MASK (0xF << 0) +#define MAX98373_FS_GAIN_MAX_SHIFT (0) + +/* MAX98373_R203F_AMP_DSP_CFG */ +#define MAX98373_AMP_DSP_CFG_DCBLK_SHIFT (0) +#define MAX98373_AMP_DSP_CFG_DITH_SHIFT (1) +#define MAX98373_AMP_DSP_CFG_RMP_UP_SHIFT (2) +#define MAX98373_AMP_DSP_CFG_RMP_DN_SHIFT (3) +#define MAX98373_AMP_DSP_CFG_DAC_INV_SHIFT (5) +#define MAX98373_AMP_VOL_SEL_SHIFT (7) + +/* MAX98373_R2043_AMP_EN */ +#define MAX98373_SPKFB_EN_MASK (0x1 << 1) +#define MAX98373_SPK_EN_MASK (0x1 << 0) +#define MAX98373_SPKFB_EN_SHIFT (1) + +/*MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG */ +#define MAX98373_FLT_EN_SHIFT (4) + +/* MAX98373_R20B2_BDE_L4_CFG_2 */ +#define MAX98373_LVL4_MUTE_EN_SHIFT (7) +#define MAX98373_LVL4_HOLD_EN_SHIFT (6) + +/* MAX98373_R20B5_BDE_EN */ +#define MAX98373_BDE_EN_SHIFT (0) + +/* MAX98373_R20D1_DHT_CFG */ +#define MAX98373_DHT_SPK_GAIN_MIN_SHIFT (4) +#define MAX98373_DHT_ROT_PNT_SHIFT (0) + +/* MAX98373_R20D2_DHT_ATTACK_CFG */ +#define MAX98373_DHT_ATTACK_STEP_SHIFT (3) +#define MAX98373_DHT_ATTACK_RATE_SHIFT (0) + +/* MAX98373_R20D3_DHT_RELEASE_CFG */ +#define MAX98373_DHT_RELEASE_STEP_SHIFT (3) +#define MAX98373_DHT_RELEASE_RATE_SHIFT (0) + +/* MAX98373_R20D4_DHT_EN */ +#define MAX98373_DHT_EN_SHIFT (0) + +/* MAX98373_R20E0_LIMITER_THRESH_CFG */ +#define MAX98373_LIMITER_THRESH_SHIFT (2) +#define MAX98373_LIMITER_THRESH_SRC_SHIFT (0) + +/* MAX98373_R20E2_LIMITER_EN */ +#define MAX98373_LIMITER_EN_SHIFT (0) + +/* MAX98373_R20FE_DEVICE_AUTO_RESTART_CFG */ +#define MAX98373_CLOCK_MON_SHIFT (0) + +/* MAX98373_R20FF_GLOBAL_SHDN */ +#define MAX98373_GLOBAL_EN_MASK (0x1 << 0) + +/* MAX98373_R2000_SW_RESET */ +#define MAX98373_SOFT_RESET (0x1 << 0) + +struct max98373_priv { + struct regmap *regmap; + unsigned int v_slot; + unsigned int i_slot; + unsigned int spkfb_slot; + bool interleave_mode; + unsigned int ch_size; + bool tdm_mode; +}; +#endif -- cgit v1.2.3 From ef42e3557f66bd236bfc157c1f02de5abfd01236 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Jan 2018 15:24:57 -0200 Subject: ASoC: simple-card: Pass 'reg' property in the examples Since unit addresses are passed to simple-audio-card,dai-link a corresponding 'reg' property is needed, otherwise dtc complains (when building with W=1) in case someone copies the bindings example into a real dts file: Warning (unit_address_vs_reg): Node /sound-digital/simple-audio-card,dai-link@0 has a unit name, but no reg property Improve the example by passing the correct 'reg' properties. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/simple-card.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/simple-card.txt b/Documentation/devicetree/bindings/sound/simple-card.txt index 166f2290233b..17c13e74667d 100644 --- a/Documentation/devicetree/bindings/sound/simple-card.txt +++ b/Documentation/devicetree/bindings/sound/simple-card.txt @@ -140,6 +140,7 @@ sound { simple-audio-card,name = "Cubox Audio"; simple-audio-card,dai-link@0 { /* I2S - HDMI */ + reg = <0>; format = "i2s"; cpu { sound-dai = <&audio1 0>; @@ -150,6 +151,7 @@ sound { }; simple-audio-card,dai-link@1 { /* S/PDIF - HDMI */ + reg = <1>; cpu { sound-dai = <&audio1 1>; }; @@ -159,6 +161,7 @@ sound { }; simple-audio-card,dai-link@2 { /* S/PDIF - S/PDIF */ + reg = <2>; cpu { sound-dai = <&audio1 1>; }; -- cgit v1.2.3 From fb32dd3abf7a8fc13271d0d1c45ffc66df28dd15 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jan 2018 20:14:42 -0800 Subject: MAINTAINERS: Update my email address. Signed-off-by: Pravin Shelar Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a6e86e20761e..1e6872b4c6e2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10137,7 +10137,7 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin Shelar +M: Pravin B Shelar L: netdev@vger.kernel.org L: dev@openvswitch.org W: http://openvswitch.org -- cgit v1.2.3 From f428fe4a04cc339166c8bbd489789760de3a0cee Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 2 Jan 2018 23:27:33 -0800 Subject: rtnetlink: give a user socket to get_target_net() This function is used from two places: rtnl_dump_ifinfo and rtnl_getlink. In rtnl_getlink(), we give a request skb into get_target_net(), but in rtnl_dump_ifinfo, we give a response skb into get_target_net(). The problem here is that NETLINK_CB() isn't initialized for the response skb. In both cases we can get a user socket and give it instead of skb into get_target_net(). This bug was found by syzkaller with this call-trace: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Modules linked in: CPU: 1 PID: 3149 Comm: syzkaller140561 Not tainted 4.15.0-rc4-mm1+ #47 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__netlink_ns_capable+0x8b/0x120 net/netlink/af_netlink.c:868 RSP: 0018:ffff8801c880f348 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8443f900 RDX: 000000000000007b RSI: ffffffff86510f40 RDI: 00000000000003d8 RBP: ffff8801c880f360 R08: 0000000000000000 R09: 1ffff10039101e4f R10: 0000000000000000 R11: 0000000000000001 R12: ffffffff86510f40 R13: 000000000000000c R14: 0000000000000004 R15: 0000000000000011 FS: 0000000001a1a880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020151000 CR3: 00000001c9511005 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netlink_ns_capable+0x26/0x30 net/netlink/af_netlink.c:886 get_target_net+0x9d/0x120 net/core/rtnetlink.c:1765 rtnl_dump_ifinfo+0x2e5/0xee0 net/core/rtnetlink.c:1806 netlink_dump+0x48c/0xce0 net/netlink/af_netlink.c:2222 __netlink_dump_start+0x4f0/0x6d0 net/netlink/af_netlink.c:2319 netlink_dump_start include/linux/netlink.h:214 [inline] rtnetlink_rcv_msg+0x7f0/0xb10 net/core/rtnetlink.c:4485 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2441 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4540 netlink_unicast_kernel net/netlink/af_netlink.c:1308 [inline] netlink_unicast+0x4be/0x6a0 net/netlink/af_netlink.c:1334 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1897 Cc: Jiri Benc Fixes: 79e1ad148c84 ("rtnetlink: use netnsid to query interface") Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dabba2a91fc8..778d7f03404a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev, return false; } -static struct net *get_target_net(struct sk_buff *skb, int netnsid) +static struct net *get_target_net(struct sock *sk, int netnsid) { struct net *net; - net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + net = get_net_ns_by_id(sock_net(sk), netnsid); if (!net) return ERR_PTR(-EINVAL); /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ - if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EACCES); } @@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) ifla_policy, NULL) >= 0) { if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(skb->sk, netnsid); if (IS_ERR(tgt_net)) { tgt_net = net; netnsid = -1; @@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); - tgt_net = get_target_net(skb, netnsid); + tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } -- cgit v1.2.3 From 879626e3a52630316d817cbda7cec9a5446d1d82 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 3 Jan 2018 16:46:29 +0100 Subject: net: stmmac: enable EEE in MII, GMII or RGMII only Note in the databook - Section 4.4 - EEE : " The EEE feature is not supported when the MAC is configured to use the TBI, RTBI, SMII, RMII or SGMII single PHY interface. Even if the MAC supports multiple PHY interfaces, you should activate the EEE mode only when the MAC is operating with GMII, MII, or RGMII interface." Applying this restriction solves a stability issue observed on Amlogic gxl platforms operating with RMII interface and the internal PHY. Fixes: 83bf79b6bb64 ("stmmac: disable at run-time the EEE if not supported") Signed-off-by: Jerome Brunet Tested-by: Arnaud Patard Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 337d53d12e94..c0af0bc4e714 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) bool stmmac_eee_init(struct stmmac_priv *priv) { struct net_device *ndev = priv->dev; + int interface = priv->plat->interface; unsigned long flags; bool ret = false; + if ((interface != PHY_INTERFACE_MODE_MII) && + (interface != PHY_INTERFACE_MODE_GMII) && + !phy_interface_mode_is_rgmii(interface)) + goto out; + /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. */ -- cgit v1.2.3 From dfe8266b8dd10e12a731c985b725fcf7f0e537f0 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 3 Jan 2018 20:09:49 +0300 Subject: sh_eth: fix TSU resource handling When switching the driver to the managed device API, I managed to break the case of a dual Ether devices sharing a single TSU: the 2nd Ether port wouldn't probe. Iwamatsu-san has tried to fix this but his patch was buggy and he then dropped the ball... The solution is to limit calling devm_request_mem_region() to the first of the two ports sharing the same TSU, so devm_ioremap_resource() can't be used anymore for the TSU resource... Fixes: d5e07e69218f ("sh_eth: use managed device API") Reported-by: Nobuhiro Iwamatsu Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 75323000c364..1bdd67a8a869 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3225,10 +3225,29 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* ioremap the TSU registers */ if (mdp->cd->tsu) { struct resource *rtsu; + rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu); - if (IS_ERR(mdp->tsu_addr)) { - ret = PTR_ERR(mdp->tsu_addr); + if (!rtsu) { + dev_err(&pdev->dev, "no TSU resource\n"); + ret = -ENODEV; + goto out_release; + } + /* We can only request the TSU region for the first port + * of the two sharing this TSU for the probe to succeed... + */ + if (devno % 2 == 0 && + !devm_request_mem_region(&pdev->dev, rtsu->start, + resource_size(rtsu), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "can't request TSU resource.\n"); + ret = -EBUSY; + goto out_release; + } + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start, + resource_size(rtsu)); + if (!mdp->tsu_addr) { + dev_err(&pdev->dev, "TSU region ioremap() failed.\n"); + ret = -ENOMEM; goto out_release; } mdp->port = devno % 2; -- cgit v1.2.3 From 7d11f77f84b27cef452cee332f4e469503084737 Mon Sep 17 00:00:00 2001 From: Mohamed Ghannam Date: Wed, 3 Jan 2018 21:06:06 +0000 Subject: RDS: null pointer dereference in rds_atomic_free_op set rm->atomic.op_active to 0 when rds_pin_pages() fails or the user supplied address is invalid, this prevents a NULL pointer usage in rds_atomic_free_op() Signed-off-by: Mohamed Ghannam Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 94729d9da437..634cfcb7bba6 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -877,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm, err: if (page) put_page(page); + rm->atomic.op_active = 0; kfree(rm->atomic.op_notifier); return ret; -- cgit v1.2.3 From 7bbfe00e025240505db3e04c3b296d7c023b2a26 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Wed, 3 Jan 2018 14:11:59 -0800 Subject: ipv6: fix general protection fault in fib6_add() In fib6_add(), pn could be NULL if fib6_add_1() failed to return a fib6 node. Checking pn != fn before accessing pn->leaf makes sure pn is not NULL. This fixes the following GPF reported by syzkaller: general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 3201 Comm: syzkaller001778 Not tainted 4.15.0-rc5+ #151 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:fib6_add+0x736/0x15a0 net/ipv6/ip6_fib.c:1244 RSP: 0018:ffff8801c7626a70 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000020 RCX: ffffffff84794465 RDX: 0000000000000004 RSI: ffff8801d38935f0 RDI: 0000000000000282 RBP: ffff8801c7626da0 R08: 1ffff10038ec4c35 R09: 0000000000000000 R10: ffff8801c7626c68 R11: 0000000000000000 R12: 00000000fffffffe R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000009 FS: 0000000000000000(0000) GS:ffff8801db200000(0063) knlGS:0000000009b70840 CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 CR2: 0000000020be1000 CR3: 00000001d585a006 CR4: 00000000001606f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1006 ip6_route_multipath_add+0xd14/0x16c0 net/ipv6/route.c:3833 inet6_rtm_newroute+0xdc/0x160 net/ipv6/route.c:3957 rtnetlink_rcv_msg+0x733/0x1020 net/core/rtnetlink.c:4411 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2408 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4423 netlink_unicast_kernel net/netlink/af_netlink.c:1275 [inline] netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1301 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1864 sock_sendmsg_nosec net/socket.c:636 [inline] sock_sendmsg+0xca/0x110 net/socket.c:646 sock_write_iter+0x31a/0x5d0 net/socket.c:915 call_write_iter include/linux/fs.h:1772 [inline] do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 do_iter_write+0x154/0x540 fs/read_write.c:932 compat_writev+0x225/0x420 fs/read_write.c:1246 do_compat_writev+0x115/0x220 fs/read_write.c:1267 C_SYSC_writev fs/read_write.c:1278 [inline] compat_SyS_writev+0x26/0x30 fs/read_write.c:1274 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline] do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389 entry_SYSENTER_compat+0x54/0x63 arch/x86/entry/entry_64_compat.S:125 Reported-by: syzbot Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Signed-off-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f5285f4e1d08..d11a5578e4f8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1241,23 +1241,28 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, - lockdep_is_held(&table->tb6_lock)); - if (pn != fn && pn_leaf == rt) { - pn_leaf = NULL; - RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); - } - if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn_leaf = fib6_find_prefix(info->nl_net, table, pn); -#if RT6_DEBUG >= 2 - if (!pn_leaf) { - WARN_ON(!pn_leaf); - pn_leaf = info->nl_net->ipv6.ip6_null_entry; + if (pn != fn) { + struct rt6_info *pn_leaf = + rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + atomic_dec(&rt->rt6i_ref); } + if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, + pn); +#if RT6_DEBUG >= 2 + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = + info->nl_net->ipv6.ip6_null_entry; + } #endif - atomic_inc(&pn_leaf->rt6i_ref); - rcu_assign_pointer(pn->leaf, pn_leaf); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); + } } #endif goto failure; -- cgit v1.2.3 From 6926e041a8920c8ec27e4e155efa760aa01551fd Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 3 Jan 2018 23:14:21 +0100 Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr Musl provides its own ethhdr struct definition. Add a guard to prevent its definition of the appropriate musl header has already been included. glibc does not implement this header, but when glibc will implement this they can just define __UAPI_DEF_ETHHDR 0 to make it work with the kernel. Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 3 +++ include/uapi/linux/libc-compat.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 3ee3bf7c8526..144de4d2f385 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -23,6 +23,7 @@ #define _UAPI_LINUX_IF_ETHER_H #include +#include /* * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble @@ -149,11 +150,13 @@ * This is an Ethernet frame header. */ +#if __UAPI_DEF_ETHHDR struct ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ __be16 h_proto; /* packet type ID field */ } __attribute__((packed)); +#endif #endif /* _UAPI_LINUX_IF_ETHER_H */ diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 8254c937c9f4..fc29efaa918c 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -264,4 +264,10 @@ #endif /* __GLIBC__ */ +/* Definitions for if_ether.h */ +/* allow libcs like musl to deactivate this, glibc does not implement this. */ +#ifndef __UAPI_DEF_ETHHDR +#define __UAPI_DEF_ETHHDR 1 +#endif + #endif /* _UAPI_LIBC_COMPAT_H */ -- cgit v1.2.3 From f5a40711fa58f1c109165a4fec6078bf2dfd2bdc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 28 Dec 2017 19:06:20 +0300 Subject: x86/mm: Set MODULES_END to 0xffffffffff000000 Since f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size") kasan_mem_to_shadow(MODULES_END) could be not aligned to a page boundary. So passing page unaligned address to kasan_populate_zero_shadow() have two possible effects: 1) It may leave one page hole in supposed to be populated area. After commit 21506525fb8d ("x86/kasan/64: Teach KASAN about the cpu_entry_area") that hole happens to be in the shadow covering fixmap area and leads to crash: BUG: unable to handle kernel paging request at fffffbffffe8ee04 RIP: 0010:check_memory_region+0x5c/0x190 Call Trace: memcpy+0x1f/0x50 ghes_copy_tofrom_phys+0xab/0x180 ghes_read_estatus+0xfb/0x280 ghes_notify_nmi+0x2b2/0x410 nmi_handle+0x115/0x2c0 default_do_nmi+0x57/0x110 do_nmi+0xf8/0x150 end_repeat_nmi+0x1a/0x1e Note, the crash likely disappeared after commit 92a0f81d8957, which changed kasan_populate_zero_shadow() call the way it was before commit 21506525fb8d. 2) Attempt to load module near MODULES_END will fail, because __vmalloc_node_range() called from kasan_module_alloc() will hit the WARN_ON(!pte_none(*pte)) in the vmap_pte_range() and bail out with error. To fix this we need to make kasan_mem_to_shadow(MODULES_END) page aligned which means that MODULES_END should be 8*PAGE_SIZE aligned. The whole point of commit f06bdd4001c2 was to move MODULES_END down if NR_CPUS is big, so the cpu_entry_area takes a lot of space. But since 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") the cpu_entry_area is no longer in fixmap, so we could just set MODULES_END to a fixed 8*PAGE_SIZE aligned address. Fixes: f06bdd4001c2 ("x86/mm: Adapt MODULES_END based on fixmap section size") Reported-by: Jakub Kicinski Signed-off-by: Andrey Ryabinin Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Thomas Garnier Link: https://lkml.kernel.org/r/20171228160620.23818-1-aryabinin@virtuozzo.com --- Documentation/x86/x86_64/mm.txt | 5 +---- arch/x86/include/asm/pgtable_64_types.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index ad41b3813f0a..ddd5ffd31bd0 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -43,7 +43,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space +ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space [fixmap start] - ffffffffff5fffff kernel-internal fixmap range ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole @@ -67,9 +67,6 @@ memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available during EFI runtime calls. -The module mapping space size changes based on the CONFIG requirements for the -following fixmap section. - Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index b97a539bcdee..6233e5595389 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -104,7 +104,7 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) +#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) #define ESPFIX_PGD_ENTRY _AC(-2, UL) -- cgit v1.2.3 From f2078904810373211fb15f91888fba14c01a4acc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Jan 2018 13:01:40 +0100 Subject: x86/mm: Map cpu_entry_area at the same place on 4/5 level There is no reason for 4 and 5 level pagetables to have a different layout. It just makes determining vaddr_end for KASLR harder than necessary. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Benjamin Gilbert Cc: Greg Kroah-Hartman Cc: stable Cc: Dave Hansen Cc: Peter Zijlstra Cc: Thomas Garnier , Cc: Alexander Kuleshov Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos --- Documentation/x86/x86_64/mm.txt | 7 ++++--- arch/x86/include/asm/pgtable_64_types.h | 4 ++-- arch/x86/mm/dump_pagetables.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index ddd5ffd31bd0..f7dabe1f01e9 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,8 +12,8 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... -fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space @@ -37,7 +37,8 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... -fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6233e5595389..61b4b60bdc13 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -88,7 +88,7 @@ typedef struct { pteval_t pte; } pte_t; # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-4, UL) +# define LDT_PGD_ENTRY _AC(-3, UL) # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif @@ -110,7 +110,7 @@ typedef struct { pteval_t pte; } pte_t; #define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define CPU_ENTRY_AREA_PGD _AC(-3, UL) +#define CPU_ENTRY_AREA_PGD _AC(-4, UL) #define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) #define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index f56902c1f04b..2a4849e92831 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -61,10 +61,10 @@ enum address_markers_idx { KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif + CPU_ENTRY_AREA_NR, #if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) LDT_NR, #endif - CPU_ENTRY_AREA_NR, #ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, #endif -- cgit v1.2.3 From 1dddd25125112ba49706518ac9077a1026a18f37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Jan 2018 12:32:03 +0100 Subject: x86/kaslr: Fix the vaddr_end mess vaddr_end for KASLR is only documented in the KASLR code itself and is adjusted depending on config options. So it's not surprising that a change of the memory layout causes KASLR to have the wrong vaddr_end. This can map arbitrary stuff into other areas causing hard to understand problems. Remove the whole ifdef magic and define the start of the cpu_entry_area to be the end of the KASLR vaddr range. Add documentation to that effect. Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap") Reported-by: Benjamin Gilbert Signed-off-by: Thomas Gleixner Tested-by: Benjamin Gilbert Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: stable Cc: Dave Hansen Cc: Peter Zijlstra Cc: Thomas Garnier , Cc: Alexander Kuleshov Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801041320360.1771@nanos --- Documentation/x86/x86_64/mm.txt | 6 ++++++ arch/x86/include/asm/pgtable_64_types.h | 8 +++++++- arch/x86/mm/kaslr.c | 32 +++++++++----------------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index f7dabe1f01e9..ea91cb61a602 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... + vaddr_end for KASLR fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks @@ -37,6 +38,7 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) ... unused hole ... + vaddr_end for KASLR fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping ... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks @@ -71,3 +73,7 @@ during EFI runtime calls. Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. + +Be very careful vs. KASLR when changing anything here. The KASLR address +range must not overlap with anything except the KASAN shadow area, which is +correct as KASAN disables KASLR. diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 61b4b60bdc13..6b8f73dcbc2c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -75,7 +75,13 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* + * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * + * Be very careful vs. KASLR when changing anything here. The KASLR address + * range must not overlap with anything except the KASAN shadow area, which + * is correct as KASAN disables KASLR. + */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #ifdef CONFIG_X86_5LEVEL diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 879ef930e2c2..aedebd2ebf1e 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,25 +34,14 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. The end changes base - * on configuration to have the highest amount of space for randomization. - * It increases the possible random position for each randomized region. + * Virtual address start and end range for randomization. * - * You need to add an if/def entry if you introduce a new memory region - * compatible with KASLR. Your entry must be in logical order with memory - * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to - * ensure that this order is correct and won't be changed. + * The end address could depend on more configuration options to make the + * highest amount of space for randomization available, but that's too hard + * to keep straight and caused issues already. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; - -#if defined(CONFIG_X86_ESPFIX64) -static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; -#elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_END; -#else -static const unsigned long vaddr_end = __START_KERNEL_map; -#endif +static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; @@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void) unsigned long remain_entropy; /* - * All these BUILD_BUG_ON checks ensures the memory layout is - * consistent with the vaddr_start/vaddr_end variables. + * These BUILD_BUG_ON checks ensure the memory layout is consistent + * with the vaddr_start/vaddr_end variables. These checks are very + * limited.... */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_END); - BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || - IS_ENABLED(CONFIG_EFI)) && - vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); if (!kaslr_memory_enabled()) -- cgit v1.2.3 From 42f3bdc5dd962a5958bc024c1e1444248a6b8b4a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jan 2018 18:07:12 +0100 Subject: x86/events/intel/ds: Use the proper cache flush method for mapping ds buffers Thomas reported the following warning: BUG: using smp_processor_id() in preemptible [00000000] code: ovsdb-server/4498 caller is native_flush_tlb_single+0x57/0xc0 native_flush_tlb_single+0x57/0xc0 __set_pte_vaddr+0x2d/0x40 set_pte_vaddr+0x2f/0x40 cea_set_pte+0x30/0x40 ds_update_cea.constprop.4+0x4d/0x70 reserve_ds_buffers+0x159/0x410 x86_reserve_hardware+0x150/0x160 x86_pmu_event_init+0x3e/0x1f0 perf_try_init_event+0x69/0x80 perf_event_alloc+0x652/0x740 SyS_perf_event_open+0x3f6/0xd60 do_syscall_64+0x5c/0x190 set_pte_vaddr is used to map the ds buffers into the cpu entry area, but there are two problems with that: 1) The resulting flush is not supposed to be called in preemptible context 2) The cpu entry area is supposed to be per CPU, but the debug store buffers are mapped for all CPUs so these mappings need to be flushed globally. Add the necessary preemption protection across the mapping code and flush TLBs globally. Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area") Reported-by: Thomas Zeitlhofer Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Tested-by: Thomas Zeitlhofer Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180104170712.GB3040@hirez.programming.kicks-ass.net --- arch/x86/events/intel/ds.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8f0aace08b87..8156e47da7ba 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,6 +5,7 @@ #include #include +#include #include #include "../perf_event.h" @@ -283,20 +284,35 @@ static DEFINE_PER_CPU(void *, insn_buffer); static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { + unsigned long start = (unsigned long)cea; phys_addr_t pa; size_t msz = 0; pa = virt_to_phys(addr); + + preempt_disable(); for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) cea_set_pte(cea, pa, prot); + + /* + * This is a cross-CPU update of the cpu_entry_area, we must shoot down + * all TLB entries for it. + */ + flush_tlb_kernel_range(start, start + size); + preempt_enable(); } static void ds_clear_cea(void *cea, size_t size) { + unsigned long start = (unsigned long)cea; size_t msz = 0; + preempt_disable(); for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) cea_set_pte(cea, 0, PAGE_NONE); + + flush_tlb_kernel_range(start, start + size); + preempt_enable(); } static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) -- cgit v1.2.3 From 1e5476815fd7f98b888e01a0f9522b63085f96c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Jan 2018 22:19:04 +0100 Subject: x86/tlb: Drop the _GPL from the cpu_tlbstate export The recent changes for PTI touch cpu_tlbstate from various tlb_flush inlines. cpu_tlbstate is exported as GPL symbol, so this causes a regression when building out of tree drivers for certain graphics cards. Aside of that the export was wrong since it was introduced as it should have been EXPORT_PER_CPU_SYMBOL_GPL(). Use the correct PER_CPU export and drop the _GPL to restore the previous state which allows users to utilize the cards they payed for. As always I'm really thrilled to make this kind of change to support the #friends (or however the hot hashtag of today is spelled) from that closet sauce graphics corp. Fixes: 1e02ce4cccdc ("x86: Store a per-cpu shadow copy of CR4") Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") Reported-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: stable@vger.kernel.org --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 80259ad8c386..6b462a472a7b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -870,7 +870,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { -- cgit v1.2.3 From 56d03d7b6edaff4b0e60349fc72056d3696ca492 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:20 -0800 Subject: ACPICA: Debug output, no functional change ACPICA commit 04fffc50a131662f57a41ca517c75d32d2479a1c Fix use of return macros and other debug output changes. Link: https://github.com/acpica/acpica/commit/04fffc50 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/hwvalid.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c index 3094cec4eab4..d1679035d5f3 100644 --- a/drivers/acpi/acpica/hwvalid.c +++ b/drivers/acpi/acpica/hwvalid.c @@ -128,14 +128,14 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) acpi_io_address last_address; const struct acpi_port_info *port_info; - ACPI_FUNCTION_NAME(hw_validate_io_request); + ACPI_FUNCTION_TRACE(hw_validate_io_request); /* Supported widths are 8/16/32 */ if ((bit_width != 8) && (bit_width != 16) && (bit_width != 32)) { ACPI_ERROR((AE_INFO, "Bad BitWidth parameter: %8.8X", bit_width)); - return (AE_BAD_PARAMETER); + return_ACPI_STATUS(AE_BAD_PARAMETER); } port_info = acpi_protected_ports; @@ -153,13 +153,13 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) ACPI_ERROR((AE_INFO, "Illegal I/O port address/length above 64K: %8.8X%8.8X/0x%X", ACPI_FORMAT_UINT64(address), byte_width)); - return (AE_LIMIT); + return_ACPI_STATUS(AE_LIMIT); } /* Exit if requested address is not within the protected port table */ if (address > acpi_protected_ports[ACPI_PORT_INFO_ENTRIES - 1].end) { - return (AE_OK); + return_ACPI_STATUS(AE_OK); } /* Check request against the list of protected I/O ports */ @@ -180,8 +180,8 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) /* Port illegality may depend on the _OSI calls made by the BIOS */ if (acpi_gbl_osi_data >= port_info->osi_dependency) { - ACPI_DEBUG_PRINT((ACPI_DB_IO, - "Denied AML access to port 0x%8.8X%8.8X/%X (%s 0x%.4X-0x%.4X)", + ACPI_DEBUG_PRINT((ACPI_DB_VALUES, + "Denied AML access to port 0x%8.8X%8.8X/%X (%s 0x%.4X-0x%.4X)\n", ACPI_FORMAT_UINT64(address), byte_width, port_info->name, port_info->start, @@ -198,7 +198,7 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) } } - return (AE_OK); + return_ACPI_STATUS(AE_OK); } /****************************************************************************** -- cgit v1.2.3 From 6be2d72b186497079aeb63d5730cb84393566fd3 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:21 -0800 Subject: ACPICA: Update for a few debug output statements ACPICA commit 900e96a9c6c6d67c2e18e8c2576dc4742221fc71 Implement a very small indent for trace output. Link: https://github.com/acpica/acpica/commit/900e96a9 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/exdump.c | 4 ++-- drivers/acpi/acpica/utdebug.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index 83398dc4b7c2..ccdd2a417aa1 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -904,7 +904,7 @@ void acpi_ex_dump_operands(union acpi_operand_object **operands, const char *opcode_name, u32 num_operands) { - ACPI_FUNCTION_NAME(ex_dump_operands); + ACPI_FUNCTION_TRACE(ex_dump_operands); if (!opcode_name) { opcode_name = "UNKNOWN"; @@ -928,7 +928,7 @@ acpi_ex_dump_operands(union acpi_operand_object **operands, ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "**** End operand dump for [%s]\n", opcode_name)); - return; + return_VOID; } /******************************************************************************* diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index 615a885e2ca3..77d8c9054b8e 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -163,6 +163,7 @@ acpi_debug_print(u32 requested_debug_level, { acpi_thread_id thread_id; va_list args; + int fill_count; /* Check if debug output enabled */ @@ -202,10 +203,21 @@ acpi_debug_print(u32 requested_debug_level, acpi_os_printf("[%u] ", (u32)thread_id); } - acpi_os_printf("[%02ld] ", acpi_gbl_nesting_level); -#endif + fill_count = 48 - acpi_gbl_nesting_level - + strlen(acpi_ut_trim_function_name(function_name)); + if (fill_count < 0) { + fill_count = 0; + } + acpi_os_printf("[%02ld] %*s", + acpi_gbl_nesting_level, acpi_gbl_nesting_level, " "); + acpi_os_printf("%s%*s: ", + acpi_ut_trim_function_name(function_name), fill_count, + " "); + +#else acpi_os_printf("%-22.22s: ", acpi_ut_trim_function_name(function_name)); +#endif va_start(args, format); acpi_os_vprintf(format, args); -- cgit v1.2.3 From 896bece7eccb0e756cf91ac92479d764a8a28f5b Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:22 -0800 Subject: ACPICA: Fix a regression in the acpi_evaluate_object_type() interface ACPICA commit 9ab548ef154b992208524d61770caca90a9762be The optional Pathname parameter inadvertently became required. Introduced in April 2017. Link: https://github.com/acpica/acpica/commit/9ab548ef Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/nsxfeval.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index 783f4c838aee..9b51f65823b2 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -61,10 +61,10 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info); * * PARAMETERS: handle - Object handle (optional) * pathname - Object pathname (optional) - * external_params - List of parameters to pass to method, + * external_params - List of parameters to pass to a method, * terminated by NULL. May be NULL * if no parameters are being passed. - * return_buffer - Where to put method's return value (if + * return_buffer - Where to put the object's return value (if * any). If NULL, no value is returned. * return_type - Expected type of return object * @@ -100,13 +100,14 @@ acpi_evaluate_object_typed(acpi_handle handle, free_buffer_on_error = TRUE; } + /* Get a handle here, in order to build an error message if needed */ + + target_handle = handle; if (pathname) { status = acpi_get_handle(handle, pathname, &target_handle); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } - } else { - target_handle = handle; } full_pathname = acpi_ns_get_external_pathname(target_handle); -- cgit v1.2.3 From 6e875fa0480c16a881564a718686d517ae1ad3ad Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Wed, 3 Jan 2018 15:06:23 -0800 Subject: ACPICA: Debugger: fix slight indentation issue ACPICA commit c75af007d35c0afe8791ac39b7749c7442f49912 The %*s format specifier prints a string with a width indicated by an integer. In the case of acpi_os_printf() ("%*s", acpi_gbl_nesting_level, " "), a single space is printed to the console when acpi_gbl_nesting_level is 0 or 1. This change increments acpi_gbl_nesting_level so that there is one space printed when acpi_gbl_nesting_level is 0 and two spaces printed when acpi_gbl_nesting_level is 1. Link: https://github.com/acpica/acpica/commit/c75af007 Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utdebug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index 77d8c9054b8e..d4dfdbb539ee 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -210,7 +210,7 @@ acpi_debug_print(u32 requested_debug_level, } acpi_os_printf("[%02ld] %*s", - acpi_gbl_nesting_level, acpi_gbl_nesting_level, " "); + acpi_gbl_nesting_level, acpi_gbl_nesting_level + 1, " "); acpi_os_printf("%s%*s: ", acpi_ut_trim_function_name(function_name), fill_count, " "); -- cgit v1.2.3 From 2cb0ba70fbbf36f34c2c31fabe19a267ac4a14a3 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:24 -0800 Subject: ACPICA: Cleanup the global variables and update comments ACPICA commit 8519ba376636565350c3fa0db5621c61d34c34b2 Mostly cleanup/reformatting. Some restructuring. No functional change. Link: https://github.com/acpica/acpica/commit/8519ba37 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acglobal.h | 82 ++++++++++++++---------------------------- drivers/acpi/acpica/utdebug.c | 2 ++ 2 files changed, 29 insertions(+), 55 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 95eed442703f..0c609f803ee1 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -46,7 +46,7 @@ /***************************************************************************** * - * Globals related to the ACPI tables + * Globals related to the incoming ACPI tables * ****************************************************************************/ @@ -87,7 +87,7 @@ ACPI_GLOBAL(u8, acpi_gbl_integer_nybble_width); /***************************************************************************** * - * Mutual exclusion within ACPICA subsystem + * Mutual exclusion within the ACPICA subsystem * ****************************************************************************/ @@ -167,7 +167,7 @@ ACPI_GLOBAL(u8, acpi_gbl_next_owner_id_offset); ACPI_INIT_GLOBAL(u8, acpi_gbl_namespace_initialized, FALSE); -/* Misc */ +/* Miscellaneous */ ACPI_GLOBAL(u32, acpi_gbl_original_mode); ACPI_GLOBAL(u32, acpi_gbl_ns_lookup_count); @@ -191,10 +191,9 @@ extern const char acpi_gbl_lower_hex_digits[]; extern const char acpi_gbl_upper_hex_digits[]; extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES]; -#ifdef ACPI_DBG_TRACK_ALLOCATIONS - /* Lists for tracking memory allocations (debug only) */ +#ifdef ACPI_DBG_TRACK_ALLOCATIONS ACPI_GLOBAL(struct acpi_memory_list *, acpi_gbl_global_list); ACPI_GLOBAL(struct acpi_memory_list *, acpi_gbl_ns_node_list); ACPI_GLOBAL(u8, acpi_gbl_display_final_mem_stats); @@ -203,7 +202,7 @@ ACPI_GLOBAL(u8, acpi_gbl_disable_mem_tracking); /***************************************************************************** * - * Namespace globals + * ACPI Namespace * ****************************************************************************/ @@ -234,15 +233,20 @@ ACPI_INIT_GLOBAL(u32, acpi_gbl_nesting_level, 0); /***************************************************************************** * - * Interpreter globals + * Interpreter/Parser globals * ****************************************************************************/ -ACPI_GLOBAL(struct acpi_thread_state *, acpi_gbl_current_walk_list); - /* Control method single step flag */ ACPI_GLOBAL(u8, acpi_gbl_cm_single_step); +ACPI_GLOBAL(struct acpi_thread_state *, acpi_gbl_current_walk_list); +ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL); + +/* ASL/ASL+ converter */ + +ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE); +ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL); /***************************************************************************** * @@ -252,7 +256,6 @@ ACPI_GLOBAL(u8, acpi_gbl_cm_single_step); extern struct acpi_bit_register_info acpi_gbl_bit_register_info[ACPI_NUM_BITREG]; - ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a); ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b); @@ -263,7 +266,6 @@ ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b); ****************************************************************************/ #if (!ACPI_REDUCED_HARDWARE) - ACPI_GLOBAL(u8, acpi_gbl_all_gpes_initialized); ACPI_GLOBAL(struct acpi_gpe_xrupt_info *, acpi_gbl_gpe_xrupt_list_head); ACPI_GLOBAL(struct acpi_gpe_block_info *, @@ -272,10 +274,8 @@ ACPI_GLOBAL(acpi_gbl_event_handler, acpi_gbl_global_event_handler); ACPI_GLOBAL(void *, acpi_gbl_global_event_handler_context); ACPI_GLOBAL(struct acpi_fixed_event_handler, acpi_gbl_fixed_event_handlers[ACPI_NUM_FIXED_EVENTS]); - extern struct acpi_fixed_event_info acpi_gbl_fixed_event_info[ACPI_NUM_FIXED_EVENTS]; - #endif /* !ACPI_REDUCED_HARDWARE */ /***************************************************************************** @@ -291,14 +291,14 @@ ACPI_GLOBAL(u32, acpi_gpe_count); ACPI_GLOBAL(u32, acpi_sci_count); ACPI_GLOBAL(u32, acpi_fixed_event_count[ACPI_NUM_FIXED_EVENTS]); -/* Support for dynamic control method tracing mechanism */ +/* Dynamic control method tracing mechanism */ ACPI_GLOBAL(u32, acpi_gbl_original_dbg_level); ACPI_GLOBAL(u32, acpi_gbl_original_dbg_layer); /***************************************************************************** * - * Debugger and Disassembler globals + * Debugger and Disassembler * ****************************************************************************/ @@ -326,7 +326,6 @@ ACPI_GLOBAL(struct acpi_external_file *, acpi_gbl_external_file_list); #endif #ifdef ACPI_DEBUGGER - ACPI_INIT_GLOBAL(u8, acpi_gbl_abort_method, FALSE); ACPI_INIT_GLOBAL(acpi_thread_id, acpi_gbl_db_thread_id, ACPI_INVALID_THREAD_ID); @@ -340,7 +339,6 @@ ACPI_GLOBAL(u32, acpi_gbl_db_console_debug_level); ACPI_GLOBAL(struct acpi_namespace_node *, acpi_gbl_db_scope_node); ACPI_GLOBAL(u8, acpi_gbl_db_terminate_loop); ACPI_GLOBAL(u8, acpi_gbl_db_threads_terminated); - ACPI_GLOBAL(char *, acpi_gbl_db_args[ACPI_DEBUGGER_MAX_ARGS]); ACPI_GLOBAL(acpi_object_type, acpi_gbl_db_arg_types[ACPI_DEBUGGER_MAX_ARGS]); @@ -350,32 +348,33 @@ ACPI_GLOBAL(char, acpi_gbl_db_parsed_buf[ACPI_DB_LINE_BUFFER_SIZE]); ACPI_GLOBAL(char, acpi_gbl_db_scope_buf[ACPI_DB_LINE_BUFFER_SIZE]); ACPI_GLOBAL(char, acpi_gbl_db_debug_filename[ACPI_DB_LINE_BUFFER_SIZE]); -/* - * Statistic globals - */ +/* Statistics globals */ + ACPI_GLOBAL(u16, acpi_gbl_obj_type_count[ACPI_TOTAL_TYPES]); ACPI_GLOBAL(u16, acpi_gbl_node_type_count[ACPI_TOTAL_TYPES]); ACPI_GLOBAL(u16, acpi_gbl_obj_type_count_misc); ACPI_GLOBAL(u16, acpi_gbl_node_type_count_misc); ACPI_GLOBAL(u32, acpi_gbl_num_nodes); ACPI_GLOBAL(u32, acpi_gbl_num_objects); - #endif /* ACPI_DEBUGGER */ #if defined (ACPI_DISASSEMBLER) || defined (ACPI_ASL_COMPILER) - ACPI_GLOBAL(const char, *acpi_gbl_pld_panel_list[]); ACPI_GLOBAL(const char, *acpi_gbl_pld_vertical_position_list[]); ACPI_GLOBAL(const char, *acpi_gbl_pld_horizontal_position_list[]); ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]); - ACPI_INIT_GLOBAL(u8, acpi_gbl_disasm_flag, FALSE); - #endif -/* - * Meant for the -ca option. - */ +/***************************************************************************** + * + * ACPICA application-specific globals + * + ****************************************************************************/ + +/* ASL-to-ASL+ conversion utility (implemented within the iASL compiler) */ + +#ifdef ACPI_ASL_COMPILER ACPI_INIT_GLOBAL(char *, acpi_gbl_current_inline_comment, NULL); ACPI_INIT_GLOBAL(char *, acpi_gbl_current_end_node_comment, NULL); ACPI_INIT_GLOBAL(char *, acpi_gbl_current_open_brace_comment, NULL); @@ -386,23 +385,18 @@ ACPI_INIT_GLOBAL(char *, acpi_gbl_current_filename, NULL); ACPI_INIT_GLOBAL(char *, acpi_gbl_current_parent_filename, NULL); ACPI_INIT_GLOBAL(char *, acpi_gbl_current_include_filename, NULL); -ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL); - ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_head, NULL); ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_def_blk_comment_list_tail, NULL); - ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_head, NULL); ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_reg_comment_list_tail, NULL); - ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_head, NULL); ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_inc_comment_list_tail, NULL); - ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_head, NULL); ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_tail, @@ -410,30 +404,18 @@ ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_end_blk_comment_list_tail, ACPI_INIT_GLOBAL(struct acpi_comment_addr_node, *acpi_gbl_comment_addr_list_head, NULL); - -ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL); - ACPI_INIT_GLOBAL(struct acpi_file_node, *acpi_gbl_file_tree_root, NULL); ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_reg_comment_cache); ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_comment_addr_cache); ACPI_GLOBAL(acpi_cache_t *, acpi_gbl_file_cache); -ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE); - ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_asl_conversion, FALSE); ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_conv_debug_file, NULL); - ACPI_GLOBAL(char, acpi_gbl_table_sig[4]); - -/***************************************************************************** - * - * Application globals - * - ****************************************************************************/ +#endif #ifdef ACPI_APPLICATION - ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_debug_file, NULL); ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_output_file, NULL); ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_timeout, FALSE); @@ -442,16 +424,6 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_timeout, FALSE); ACPI_GLOBAL(acpi_spinlock, acpi_gbl_print_lock); /* For print buffer */ ACPI_GLOBAL(char, acpi_gbl_print_buffer[1024]); - #endif /* ACPI_APPLICATION */ -/***************************************************************************** - * - * Info/help support - * - ****************************************************************************/ - -extern const struct ah_predefined_name asl_predefined_info[]; -extern const struct ah_device_id asl_device_ids[]; - #endif /* __ACGLOBAL_H__ */ diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c index d4dfdbb539ee..cff7154b7fee 100644 --- a/drivers/acpi/acpica/utdebug.c +++ b/drivers/acpi/acpica/utdebug.c @@ -163,7 +163,9 @@ acpi_debug_print(u32 requested_debug_level, { acpi_thread_id thread_id; va_list args; +#ifdef ACPI_APPLICATION int fill_count; +#endif /* Check if debug output enabled */ -- cgit v1.2.3 From ee68d4773ed36e3f79860bbcad0a4ddd8d58f393 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 4 Jan 2018 13:41:27 -0800 Subject: ACPICA: Create and deploy safe version of strncpy ACPICA commit 64ad9c69a1bd534a466e060a33c0bbf5fc9e189c acpi_ut_safe_strncpy - copy and terminate string. Strncpy is not guaranteed to terminate the copied string if the input is longer than the length of the target. No functional change. Link: https://github.com/acpica/acpica/commit/64ad9c69 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acutils.h | 4 +++- drivers/acpi/acpica/dbfileio.c | 4 ++-- drivers/acpi/acpica/psutils.c | 8 +++++--- drivers/acpi/acpica/utnonansi.c | 11 ++++++++++- drivers/acpi/acpica/uttrack.c | 4 ++-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h index 8bb46d8623ca..b6b29d717824 100644 --- a/drivers/acpi/acpica/acutils.h +++ b/drivers/acpi/acpica/acutils.h @@ -638,9 +638,11 @@ void ut_convert_backslashes(char *pathname); void acpi_ut_repair_name(char *name); -#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) +#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) || defined (ACPI_DEBUG_OUTPUT) u8 acpi_ut_safe_strcpy(char *dest, acpi_size dest_size, char *source); +void acpi_ut_safe_strncpy(char *dest, char *source, acpi_size dest_size); + u8 acpi_ut_safe_strcat(char *dest, acpi_size dest_size, char *source); u8 diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c index 4d81ea291d93..cf9607945704 100644 --- a/drivers/acpi/acpica/dbfileio.c +++ b/drivers/acpi/acpica/dbfileio.c @@ -99,8 +99,8 @@ void acpi_db_open_debug_file(char *name) } acpi_os_printf("Debug output file %s opened\n", name); - strncpy(acpi_gbl_db_debug_filename, name, - sizeof(acpi_gbl_db_debug_filename)); + acpi_ut_safe_strncpy(acpi_gbl_db_debug_filename, name, + sizeof(acpi_gbl_db_debug_filename)); acpi_gbl_db_output_to_file = TRUE; } #endif diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index e15b636b1d4b..8bd7d01039cc 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -94,9 +94,11 @@ void acpi_ps_init_op(union acpi_parse_object *op, u16 opcode) op->common.descriptor_type = ACPI_DESC_TYPE_PARSER; op->common.aml_opcode = opcode; - ACPI_DISASM_ONLY_MEMBERS(strncpy(op->common.aml_op_name, - (acpi_ps_get_opcode_info(opcode))-> - name, sizeof(op->common.aml_op_name))); + ACPI_DISASM_ONLY_MEMBERS(acpi_ut_safe_strncpy(op->common.aml_op_name, + (acpi_ps_get_opcode_info + (opcode))->name, + sizeof(op->common. + aml_op_name))); } /******************************************************************************* diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c index 792664982ea3..33a0970646df 100644 --- a/drivers/acpi/acpica/utnonansi.c +++ b/drivers/acpi/acpica/utnonansi.c @@ -140,7 +140,7 @@ int acpi_ut_stricmp(char *string1, char *string2) return (c1 - c2); } -#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) +#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION) || defined (ACPI_DEBUG_OUTPUT) /******************************************************************************* * * FUNCTION: acpi_ut_safe_strcpy, acpi_ut_safe_strcat, acpi_ut_safe_strncat @@ -199,4 +199,13 @@ acpi_ut_safe_strncat(char *dest, strncat(dest, source, max_transfer_length); return (FALSE); } + +void acpi_ut_safe_strncpy(char *dest, char *source, acpi_size dest_size) +{ + /* Always terminate destination string */ + + strncpy(dest, source, dest_size); + dest[dest_size - 1] = 0; +} + #endif diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c index 28a302eb2015..633b4e2c669f 100644 --- a/drivers/acpi/acpica/uttrack.c +++ b/drivers/acpi/acpica/uttrack.c @@ -402,8 +402,8 @@ acpi_ut_track_allocation(struct acpi_debug_mem_block *allocation, allocation->component = component; allocation->line = line; - strncpy(allocation->module, module, ACPI_MAX_MODULE_NAME); - allocation->module[ACPI_MAX_MODULE_NAME - 1] = 0; + acpi_ut_safe_strncpy(allocation->module, (char *)module, + ACPI_MAX_MODULE_NAME); if (!element) { -- cgit v1.2.3 From ee174d3594c2fe997f45cd0969a33e0413913706 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:26 -0800 Subject: ACPICA: Rename a global variable, no functional change ACPICA commit ab9c83985e8b2b25dc1c173b753280a8d04922b5 Rename to add the standard prefix for globals. Link: https://github.com/acpica/acpica/commit/ab9c8398 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acglobal.h | 2 +- drivers/acpi/acpica/psutils.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 0c609f803ee1..45ef3f5dc9ad 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -245,7 +245,7 @@ ACPI_INIT_GLOBAL(union acpi_parse_object, *acpi_gbl_current_scope, NULL); /* ASL/ASL+ converter */ -ACPI_INIT_GLOBAL(u8, gbl_capture_comments, FALSE); +ACPI_INIT_GLOBAL(u8, acpi_gbl_capture_comments, FALSE); ACPI_INIT_GLOBAL(struct acpi_comment_node, *acpi_gbl_last_list_head, NULL); /***************************************************************************** diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c index 8bd7d01039cc..cd59dfe6a47d 100644 --- a/drivers/acpi/acpica/psutils.c +++ b/drivers/acpi/acpica/psutils.c @@ -161,7 +161,7 @@ union acpi_parse_object *acpi_ps_alloc_op(u16 opcode, u8 *aml) acpi_gbl_current_scope = op; } - if (gbl_capture_comments) { + if (acpi_gbl_capture_comments) { ASL_CV_TRANSFER_COMMENTS(op); } } -- cgit v1.2.3 From 87cdece169db3b839316bd8a39e438c8730d657c Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Wed, 3 Jan 2018 15:06:27 -0800 Subject: ACPICA: DT compiler: prevent error if optional field at the end of table is not present ACPICA commit a7f73af9003bf4e730db5a133300c01ba7992a01 The data table compiler throws under the following conditions: 1.) there is a table with a last field that is optional 2.) if the optional field is not present 3.) the optional field is the last line of the data table A change was made to dt_compile_table to return an AE_EOF under these conditions. This AE_EOF means that we are at the end of the file. The caller to dt_compile_table() is responsible for handling this case. For DBG2 table, we will complete the compilation of this subtable. For other tables, this could be different. Link: https://github.com/acpica/acpica/commit/a7f73af9 Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- include/acpi/acexcep.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index e1f9fe47f69e..3c46f0ef5f7a 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -130,8 +130,9 @@ struct acpi_exception_info { #define AE_HEX_OVERFLOW EXCEP_ENV (0x0020) #define AE_DECIMAL_OVERFLOW EXCEP_ENV (0x0021) #define AE_OCTAL_OVERFLOW EXCEP_ENV (0x0022) +#define AE_END_OF_TABLE EXCEP_ENV (0x0023) -#define AE_CODE_ENV_MAX 0x0022 +#define AE_CODE_ENV_MAX 0x0023 /* * Programmer exceptions @@ -275,7 +276,8 @@ static const struct acpi_exception_info acpi_gbl_exception_names_env[] = { EXCEP_TXT("AE_DECIMAL_OVERFLOW", "Overflow during ASCII decimal-to-binary conversion"), EXCEP_TXT("AE_OCTAL_OVERFLOW", - "Overflow during ASCII octal-to-binary conversion") + "Overflow during ASCII octal-to-binary conversion"), + EXCEP_TXT("AE_END_OF_TABLE", "Reached the end of table") }; static const struct acpi_exception_info acpi_gbl_exception_names_pgm[] = { -- cgit v1.2.3 From bc4d413a819f9d0764a80a55875a5d7e1f4efed4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 3 Jan 2018 15:06:28 -0800 Subject: ACPICA: Recognize the Windows 10 version 1607 and 1703 OSI strings ACPICA commit 35a4a3ea723b3066f575e63e5f0116f7ce65e713 The public Microsoft document listing recognized OSI strings [1] shows that these two strings were introduced. version 1607 / Anniversary Update / "Redstone 1" version 1703 / Creators Update / "Redstone 2" [1] http://download.microsoft.com/download/7/e/7/7e7662cf-cbea-470b-a97e-ce7ce0d98dc2/winacpi_osi.docx Link: https://github.com/acpica/acpica/commit/35a4a3ea Signed-off-by: Mario Limonciello Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/utosi.c | 2 ++ include/acpi/actypes.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c index 3175b133c0e4..f6b8dd24b006 100644 --- a/drivers/acpi/acpica/utosi.c +++ b/drivers/acpi/acpica/utosi.c @@ -101,6 +101,8 @@ static struct acpi_interface_info acpi_default_supported_interfaces[] = { {"Windows 2012", NULL, 0, ACPI_OSI_WIN_8}, /* Windows 8 and Server 2012 - Added 08/2012 */ {"Windows 2013", NULL, 0, ACPI_OSI_WIN_8}, /* Windows 8.1 and Server 2012 R2 - Added 01/2014 */ {"Windows 2015", NULL, 0, ACPI_OSI_WIN_10}, /* Windows 10 - Added 03/2015 */ + {"Windows 2016", NULL, 0, ACPI_OSI_WIN_10_RS1}, /* Windows 10 version 1607 - Added 12/2017 */ + {"Windows 2017", NULL, 0, ACPI_OSI_WIN_10_RS2}, /* Windows 10 version 1703 - Added 12/2017 */ /* Feature Group Strings */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index ddde2790a54a..31f1be74dd16 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1301,6 +1301,8 @@ typedef enum { #define ACPI_OSI_WIN_7 0x0B #define ACPI_OSI_WIN_8 0x0C #define ACPI_OSI_WIN_10 0x0D +#define ACPI_OSI_WIN_10_RS1 0x0E +#define ACPI_OSI_WIN_10_RS2 0x0F /* Definitions of getopt */ -- cgit v1.2.3 From e7b2005c608aa3dc00cd00a5001415ae04382d13 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:29 -0800 Subject: ACPICA: Fix a couple memory leaks during package object resolution ACPICA commit 69d4415360446b4a1826dab76ba0cd6d24710ddd A couple memory leaks during resolution of individual package elements. Link: https://github.com/acpica/acpica/commit/69d44153 Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dspkginit.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpica/dspkginit.c b/drivers/acpi/acpica/dspkginit.c index 6d487edfe2de..5a602b75084e 100644 --- a/drivers/acpi/acpica/dspkginit.c +++ b/drivers/acpi/acpica/dspkginit.c @@ -297,8 +297,10 @@ acpi_ds_init_package_element(u8 object_type, { union acpi_operand_object **element_ptr; + ACPI_FUNCTION_TRACE(ds_init_package_element); + if (!source_object) { - return (AE_OK); + return_ACPI_STATUS(AE_OK); } /* @@ -329,7 +331,7 @@ acpi_ds_init_package_element(u8 object_type, source_object->package.flags |= AOPOBJ_DATA_VALID; } - return (AE_OK); + return_ACPI_STATUS(AE_OK); } /******************************************************************************* @@ -352,6 +354,7 @@ acpi_ds_resolve_package_element(union acpi_operand_object **element_ptr) union acpi_generic_state scope_info; union acpi_operand_object *element = *element_ptr; struct acpi_namespace_node *resolved_node; + struct acpi_namespace_node *original_node; char *external_path = NULL; acpi_object_type type; @@ -441,6 +444,7 @@ acpi_ds_resolve_package_element(union acpi_operand_object **element_ptr) * will remain as named references. This behavior is not described * in the ACPI spec, but it appears to be an oversight. */ + original_node = resolved_node; status = acpi_ex_resolve_node_to_value(&resolved_node, NULL); if (ACPI_FAILURE(status)) { return_VOID; @@ -468,26 +472,27 @@ acpi_ds_resolve_package_element(union acpi_operand_object **element_ptr) */ case ACPI_TYPE_DEVICE: case ACPI_TYPE_THERMAL: - - /* TBD: This may not be necesssary */ - - acpi_ut_add_reference(resolved_node->object); + case ACPI_TYPE_METHOD: break; case ACPI_TYPE_MUTEX: - case ACPI_TYPE_METHOD: case ACPI_TYPE_POWER: case ACPI_TYPE_PROCESSOR: case ACPI_TYPE_EVENT: case ACPI_TYPE_REGION: + /* acpi_ex_resolve_node_to_value gave these an extra reference */ + + acpi_ut_remove_reference(original_node->object); break; default: /* * For all other types - the node was resolved to an actual - * operand object with a value, return the object + * operand object with a value, return the object. Remove + * a reference on the existing object. */ + acpi_ut_remove_reference(element); *element_ptr = (union acpi_operand_object *)resolved_node; break; } -- cgit v1.2.3 From a8c314bee4587526b29f949387c58551a9a74fcb Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Wed, 3 Jan 2018 15:06:30 -0800 Subject: ACPICA: trivial style fix, no functional change ACPICA commit 83f3375d6dcb3af812c91aaf47abcac9fc330527 This adds a semi-colon at the end of a macro call so that it can be processed correctly with source code formatting tools. Link: https://github.com/acpica/acpica/commit/83f3375d Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/exdump.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index ccdd2a417aa1..b2ff61bdb9a8 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -617,10 +617,11 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) u32 length; u32 index; - ACPI_FUNCTION_NAME(ex_dump_operand) + ACPI_FUNCTION_NAME(ex_dump_operand); - /* Check if debug output enabled */ - if (!ACPI_IS_DEBUG_ENABLED(ACPI_LV_EXEC, _COMPONENT)) { + /* Check if debug output enabled */ + + if (!ACPI_IS_DEBUG_ENABLED(ACPI_LV_EXEC, _COMPONENT)) { return; } -- cgit v1.2.3 From 3bd93bd76ba6b5588a1b2b9f4f451952c7d8994d Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Wed, 3 Jan 2018 15:06:31 -0800 Subject: ACPICA: Update version to 20171215 ACPICA commit 8b38d88e4a7151a7fc9451ac2e51c945c60b913b Version 20171215. Link: https://github.com/acpica/acpica/commit/8b38d88e Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e02610adc07d..c589c3e12d90 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -46,7 +46,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20171110 +#define ACPI_CA_VERSION 0x20171215 #include #include -- cgit v1.2.3 From e8c24773d6b2cd9bc8b36bd6e60beff599be14be Mon Sep 17 00:00:00 2001 From: Dave Young Date: Thu, 4 Jan 2018 16:17:45 -0800 Subject: mm: check pfn_valid first in zero_resv_unavail With latest kernel I get below bug while testing kdump: BUG: unable to handle kernel paging request at ffffea00034b1040 IP: zero_resv_unavail+0xbd/0x126 PGD 37b98067 P4D 37b98067 PUD 37b97067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.15.0-rc1+ #316 Hardware name: LENOVO 20ARS1BJ02/20ARS1BJ02, BIOS GJET92WW (2.42 ) 03/03/2017 task: ffffffff81a0e4c0 task.stack: ffffffff81a00000 RIP: 0010:zero_resv_unavail+0xbd/0x126 RSP: 0000:ffffffff81a03d88 EFLAGS: 00010006 RAX: 0000000000000000 RBX: ffffea00034b1040 RCX: 0000000000000010 RDX: 0000000000000000 RSI: 0000000000000092 RDI: ffffea00034b1040 RBP: 00000000000d2c41 R08: 00000000000000c0 R09: 0000000000000a0d R10: 0000000000000002 R11: 0000000000007f01 R12: ffffffff81a03d90 R13: ffffea0000000000 R14: 0000000000000063 R15: 0000000000000062 FS: 0000000000000000(0000) GS:ffffffff81c73000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea00034b1040 CR3: 0000000037609000 CR4: 00000000000606b0 Call Trace: ? free_area_init_nodes+0x640/0x664 ? zone_sizes_init+0x58/0x72 ? setup_arch+0xb50/0xc6c ? start_kernel+0x64/0x43d ? secondary_startup_64+0xa5/0xb0 Code: c1 e8 0c 48 39 d8 76 27 48 89 de 48 c1 e3 06 48 c7 c7 7a 87 79 81 e8 b0 c0 3e ff 4c 01 eb b9 10 00 00 00 31 c0 48 89 df 49 ff c6 ab eb bc 6a 00 49 c7 c0 f0 93 d1 81 31 d2 83 ce ff 41 54 49 RIP: zero_resv_unavail+0xbd/0x126 RSP: ffffffff81a03d88 CR2: ffffea00034b1040 ---[ end trace f5ba9e8f73c7ee26 ]--- This is introduced by commit a4a3ede2132a ("mm: zero reserved and unavailable struct pages"). The reason is some efi reserved boot ranges is not reported in E820 ram. In my case it is a bgrt buffer: efi: mem00: [Boot Data |RUN| | | | | | | |WB|WT|WC|UC] range=[0x00000000d2c41000-0x00000000d2c85fff] (0MB) Use "add_efi_memmap" can workaround the problem with another fix: http://lkml.kernel.org/r/20171130052327.GA3500@dhcp-128-65.nay.redhat.com In zero_resv_unavail it would be better to check pfn_valid first before zero the page struct. This fixes the problem and potential other similar problems. Also as Pavel Tatashin suggested checks pfn_valid at the beginning of the section. The range is backed by real memory. The memory range is efi "Boot Service Data", that means after ExitBootServices() these ranges can be used as system ram. But some of them need to be reserved, for example the bgrt image address in an acpi table, if the image memory is freed then kexec reboot will fail because kexec inherit same acpi table to initialize the driver. Link: http://lkml.kernel.org/r/20171201095048.GA3084@dhcp-128-65.nay.redhat.com Fixes: a4a3ede2132a ("mm: zero reserved and unavailable struct pages") Signed-off-by: Dave Young Cc: Michal Hocko Cc: Pavel Tatashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e5e775e97f4..76c9688b6a0a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6260,6 +6260,8 @@ void __paginginit zero_resv_unavail(void) pgcnt = 0; for_each_resv_unavail_range(i, &start, &end) { for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) + continue; mm_zero_struct_page(pfn_to_page(pfn)); pgcnt++; } -- cgit v1.2.3 From 4d9570158b6260f449e317a5f9ed030c2504a615 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jan 2018 16:17:49 -0800 Subject: kernel/acct.c: fix the acct->needcheck check in check_free_space() As Tsukada explains, the time_is_before_jiffies(acct->needcheck) check is very wrong, we need time_is_after_jiffies() to make sys_acct() work. Ignoring the overflows, the code should "goto out" if needcheck > jiffies, while currently it checks "needcheck < jiffies" and thus in the likely case check_free_space() does nothing until jiffies overflow. In particular this means that sys_acct() is simply broken, acct_on() sets acct->needcheck = jiffies and expects that check_free_space() should set acct->active = 1 after the free-space check, but this won't happen if jiffies increments in between. This was broken by commit 32dc73086015 ("get rid of timer in kern/acct.c") in 2011, then another (correct) commit 795a2f22a8ea ("acct() should honour the limits from the very beginning") made the problem more visible. Link: http://lkml.kernel.org/r/20171213133940.GA6554@redhat.com Fixes: 32dc73086015 ("get rid of timer in kern/acct.c") Reported-by: TSUKADA Koutaro Suggested-by: TSUKADA Koutaro Signed-off-by: Oleg Nesterov Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/acct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/acct.c b/kernel/acct.c index d15c0ee4d955..addf7732fb56 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_before_jiffies(acct->needcheck)) + if (time_is_after_jiffies(acct->needcheck)) goto out; /* May block */ -- cgit v1.2.3 From 4991c09c7c812dba13ea9be79a68b4565bb1fa4e Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 4 Jan 2018 16:17:52 -0800 Subject: mm/mprotect: add a cond_resched() inside change_pmd_range() While testing on a large CPU system, detected the following RCU stall many times over the span of the workload. This problem is solved by adding a cond_resched() in the change_pmd_range() function. INFO: rcu_sched detected stalls on CPUs/tasks: 154-....: (670 ticks this GP) idle=022/140000000000000/0 softirq=2825/2825 fqs=612 (detected by 955, t=6002 jiffies, g=4486, c=4485, q=90864) Sending NMI from CPU 955 to CPUs 154: NMI backtrace for cpu 154 CPU: 154 PID: 147071 Comm: workload Not tainted 4.15.0-rc3+ #3 NIP: c0000000000b3f64 LR: c0000000000b33d4 CTR: 000000000000aa18 REGS: 00000000a4b0fb44 TRAP: 0501 Not tainted (4.15.0-rc3+) MSR: 8000000000009033 CR: 22422082 XER: 00000000 CFAR: 00000000006cf8f0 SOFTE: 1 GPR00: 0010000000000000 c00003ef9b1cb8c0 c0000000010cc600 0000000000000000 GPR04: 8e0000018c32b200 40017b3858fd6e00 8e0000018c32b208 40017b3858fd6e00 GPR08: 8e0000018c32b210 40017b3858fd6e00 8e0000018c32b218 40017b3858fd6e00 GPR12: ffffffffffffffff c00000000fb25100 NIP [c0000000000b3f64] plpar_hcall9+0x44/0x7c LR [c0000000000b33d4] pSeries_lpar_flush_hash_range+0x384/0x420 Call Trace: flush_hash_range+0x48/0x100 __flush_tlb_pending+0x44/0xd0 hpte_need_flush+0x408/0x470 change_protection_range+0xaac/0xf10 change_prot_numa+0x30/0xb0 task_numa_work+0x2d0/0x3e0 task_work_run+0x130/0x190 do_notify_resume+0x118/0x120 ret_from_except_lite+0x70/0x74 Instruction dump: 60000000 f8810028 7ca42b78 7cc53378 7ce63b78 7d074378 7d284b78 7d495378 e9410060 e9610068 e9810070 44000022 <7d806378> e9810028 f88c0000 f8ac0008 Link: http://lkml.kernel.org/r/20171214140551.5794-1-khandual@linux.vnet.ibm.com Signed-off-by: Anshuman Khandual Suggested-by: Nicholas Piggin Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mprotect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index ec39f730a0bf..58b629bb70de 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, next = pmd_addr_end(addr, end); if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) - continue; + goto next; /* invoke the mmu notifier if the pmd is populated */ if (!mni_start) { @@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, } /* huge pmd was handled */ - continue; + goto next; } } /* fall through, the trans huge pmd just split */ @@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, this_pages = change_pte_range(vma, pmd, addr, next, newprot, dirty_accountable, prot_numa); pages += this_pages; +next: + cond_resched(); } while (pmd++, addr = next, addr != end); if (mni_start) -- cgit v1.2.3 From dc8635b78cd8669c37e230058d18c33af7451ab1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 4 Jan 2018 16:17:56 -0800 Subject: kernel/exit.c: export abort() to modules gcc -fisolate-erroneous-paths-dereference can generate calls to abort() from modular code too. [arnd@arndb.de: drop duplicate exports of abort()] Link: http://lkml.kernel.org/r/20180102103311.706364-1-arnd@arndb.de Reported-by: Vineet Gupta Cc: Sudip Mukherjee Cc: Arnd Bergmann Cc: Alexey Brodkin Cc: Russell King Cc: Jose Abreu Signed-off-by: Andrew Morton Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 1 - arch/m32r/kernel/traps.c | 1 - arch/unicore32/kernel/traps.c | 1 - kernel/exit.c | 1 + 4 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5cf04888c581..3e26c6f7a191 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -793,7 +793,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index cb79fba79d43..b88a8dd14933 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -122,7 +122,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 5f25b39f04d4..c4ac6043ebb0 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -298,7 +298,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/kernel/exit.c b/kernel/exit.c index df0c91d5606c..995453d9fb55 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1763,3 +1763,4 @@ __weak void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } +EXPORT_SYMBOL(abort); -- cgit v1.2.3 From 152a2d199e1385c6ccef17c24555103b30447c91 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 4 Jan 2018 16:17:59 -0800 Subject: mm/debug.c: provide useful debugging information for VM_BUG With the recent addition of hashed kernel pointers, places which need to produce useful debug output have to specify %px, not %p. This patch fixes all the VM debug to use %px. This is appropriate because it's debug output that the user should never be able to trigger, and kernel developers need to see the actual pointers. Link: http://lkml.kernel.org/r/20171219133236.GE13680@bombadil.infradead.org Signed-off-by: Matthew Wilcox Acked-by: Michal Hocko Cc: "Tobin C. Harding" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index d947f3e03b0d..56e2d9125ea5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason) */ int mapcount = PageSlab(page) ? 0 : page_mapcount(page); - pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", + pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); if (PageCompound(page)) @@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason) #ifdef CONFIG_MEMCG if (page->mem_cgroup) - pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup); + pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup); #endif } @@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page); void dump_vma(const struct vm_area_struct *vma) { - pr_emerg("vma %p start %p end %p\n" - "next %p prev %p mm %p\n" - "prot %lx anon_vma %p vm_ops %p\n" - "pgoff %lx file %p private_data %p\n" + pr_emerg("vma %px start %px end %px\n" + "next %px prev %px mm %px\n" + "prot %lx anon_vma %px vm_ops %px\n" + "pgoff %lx file %px private_data %px\n" "flags: %#lx(%pGv)\n", vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next, vma->vm_prev, vma->vm_mm, @@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma); void dump_mm(const struct mm_struct *mm) { - pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n" + pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n" #ifdef CONFIG_MMU - "get_unmapped_area %p\n" + "get_unmapped_area %px\n" #endif "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" - "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" + "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n" "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n" "start_code %lx end_code %lx start_data %lx end_data %lx\n" "start_brk %lx brk %lx start_stack %lx\n" "arg_start %lx arg_end %lx env_start %lx env_end %lx\n" - "binfmt %p flags %lx core_state %p\n" + "binfmt %px flags %lx core_state %px\n" #ifdef CONFIG_AIO - "ioctx_table %p\n" + "ioctx_table %px\n" #endif #ifdef CONFIG_MEMCG - "owner %p " + "owner %px " #endif - "exe_file %p\n" + "exe_file %px\n" #ifdef CONFIG_MMU_NOTIFIER - "mmu_notifier_mm %p\n" + "mmu_notifier_mm %px\n" #endif #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" -- cgit v1.2.3 From cdc346b36e1dfec201b24eddb7bdbcff6727db04 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 4 Jan 2018 16:18:02 -0800 Subject: mm/zsmalloc.c: include fs.h `struct file_system_type' and alloc_anon_inode() function are defined in fs.h, include it directly. Link: http://lkml.kernel.org/r/20171219104219.3017-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 685049a9048d..683c0651098c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -53,6 +53,7 @@ #include #include #include +#include #define ZSPAGE_MAGIC 0x58 -- cgit v1.2.3 From d09cfbbfa0f761a97687828b5afb27b56cbf2e19 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 4 Jan 2018 16:18:06 -0800 Subject: mm/sparse.c: wrong allocation for mem_section In commit 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y") mem_section is allocated at runtime to save memory. It allocates the first dimension of array with sizeof(struct mem_section). It costs extra memory, should be sizeof(struct mem_section *). Fix it. Link: http://lkml.kernel.org/r/1513932498-20350-1-git-send-email-bhe@redhat.com Fixes: 83e3c48729 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y") Signed-off-by: Baoquan He Tested-by: Dave Young Acked-by: Kirill A. Shutemov Cc: Kirill A. Shutemov Cc: Ingo Molnar Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Atsushi Kumagai Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..2609aba121e8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) if (unlikely(!mem_section)) { unsigned long size, align; - size = sizeof(struct mem_section) * NR_SECTION_ROOTS; + size = sizeof(struct mem_section*) * NR_SECTION_ROOTS; align = 1 << (INTERNODE_CACHE_SHIFT); mem_section = memblock_virt_alloc(size, align); } -- cgit v1.2.3 From 0cbb4b4f4c44f54af268969b18d8deda63aded59 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 4 Jan 2018 16:18:09 -0800 Subject: userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails The previous fix in commit 384632e67e08 ("userfaultfd: non-cooperative: fix fork use after free") corrected the refcounting in case of UFFD_EVENT_FORK failure for the fork userfault paths. That still didn't clear the vma->vm_userfaultfd_ctx of the vmas that were set to point to the aborted new uffd ctx earlier in dup_userfaultfd. Link: http://lkml.kernel.org/r/20171223002505.593-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: syzbot Reviewed-by: Mike Rapoport Cc: Eric Biggers Cc: Dmitry Vyukov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index ac9a4e65ca49..41a75f9f23fd 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -570,11 +570,14 @@ out: static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, struct userfaultfd_wait_queue *ewq) { + struct userfaultfd_ctx *release_new_ctx; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); + release_new_ctx = NULL; spin_lock(&ctx->event_wqh.lock); /* @@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, new = (struct userfaultfd_ctx *) (unsigned long) ewq->msg.arg.reserved.reserved1; - - userfaultfd_ctx_put(new); + release_new_ctx = new; } break; } @@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, __set_current_state(TASK_RUNNING); spin_unlock(&ctx->event_wqh.lock); + if (release_new_ctx) { + struct vm_area_struct *vma; + struct mm_struct *mm = release_new_ctx->mm; + + /* the various vma->vm_userfaultfd_ctx still points to it */ + down_write(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) + vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; + up_write(&mm->mmap_sem); + + userfaultfd_ctx_put(release_new_ctx); + } + /* * ctx may go away after this if the userfault pseudo fd is * already released. -- cgit v1.2.3 From 9a0e7120109632910e77295ce6fc512c16cd367b Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Thu, 4 Jan 2018 16:18:12 -0800 Subject: mailmap: update Mark Yao's email address Change the previous employers email addresses to the current email address. Link: http://lkml.kernel.org/r/20171229121726.31589-1-jeffy.chen@rock-chips.com Signed-off-by: Jeffy Chen Acked-by: Martin Kepplinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 1469ff0d3f4d..e18cab73e209 100644 --- a/.mailmap +++ b/.mailmap @@ -107,6 +107,7 @@ Linus Lüssing Maciej W. Rozycki Marcin Nowakowski Mark Brown +Mark Yao Martin Kepplinger Martin Kepplinger Matthieu CASTET -- cgit v1.2.3 From 9a00674213a3f00394f4e3221b88f2d21fc05789 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 14:30:19 -0600 Subject: crypto: algapi - fix NULL dereference in crypto_remove_spawns() syzkaller triggered a NULL pointer dereference in crypto_remove_spawns() via a program that repeatedly and concurrently requests AEADs "authenc(cmac(des3_ede-asm),pcbc-aes-aesni)" and hashes "cmac(des3_ede)" through AF_ALG, where the hashes are requested as "untested" (CRYPTO_ALG_TESTED is set in ->salg_mask but clear in ->salg_feat; this causes the template to be instantiated for every request). Although AF_ALG users really shouldn't be able to request an "untested" algorithm, the NULL pointer dereference is actually caused by a longstanding race condition where crypto_remove_spawns() can encounter an instance which has had spawn(s) "grabbed" but hasn't yet been registered, resulting in ->cra_users still being NULL. We probably should properly initialize ->cra_users earlier, but that would require updating many templates individually. For now just fix the bug in a simple way that can easily be backported: make crypto_remove_spawns() treat a NULL ->cra_users list as empty. Reported-by: syzbot Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/algapi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crypto/algapi.c b/crypto/algapi.c index 60d7366ed343..9a636f961572 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, spawn->alg = NULL; spawns = &inst->alg.cra_users; + + /* + * We may encounter an unregistered instance here, since + * an instance's spawns are set up prior to the instance + * being registered. An unregistered instance will have + * NULL ->cra_users.next, since ->cra_users isn't + * properly initialized until registration. But an + * unregistered instance cannot have any users, so treat + * it the same as ->cra_users being empty. + */ + if (spawns->next == NULL) + break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); -- cgit v1.2.3 From d16b46e4fd8bc6063624605f25b8c0835bb1fbe3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Jan 2018 22:25:07 +1100 Subject: xfrm: Use __skb_queue_tail in xfrm_trans_queue We do not need locking in xfrm_trans_queue because it is designed to use per-CPU buffers. However, the original code incorrectly used skb_queue_tail which takes the lock. This patch switches it to __skb_queue_tail instead. Reported-and-tested-by: Artem Savkov Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets...") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 3f6f6f8c9fa5..5b2409746ae0 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -518,7 +518,7 @@ int xfrm_trans_queue(struct sk_buff *skb, return -ENOBUFS; XFRM_TRANS_SKB_CB(skb)->finish = finish; - skb_queue_tail(&trans->queue, skb); + __skb_queue_tail(&trans->queue, skb); tasklet_schedule(&trans->tasklet); return 0; } -- cgit v1.2.3 From 56aeb07c914a616ab84357d34f8414a69b140cdf Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 4 Jan 2018 17:53:12 +0100 Subject: ARM: dts: kirkwood: fix pin-muxing of MPP7 on OpenBlocks A7 MPP7 is currently muxed as "gpio", but this function doesn't exist for MPP7, only "gpo" is available. This causes the following error: kirkwood-pinctrl f1010000.pin-controller: unsupported function gpio on pin mpp7 pinctrl core: failed to register map default (6): invalid type given kirkwood-pinctrl f1010000.pin-controller: error claiming hogs: -22 kirkwood-pinctrl f1010000.pin-controller: could not claim hogs: -22 kirkwood-pinctrl f1010000.pin-controller: unable to register pinctrl driver kirkwood-pinctrl: probe of f1010000.pin-controller failed with error -22 So the pinctrl driver is not probed, all device drivers (including the UART driver) do a -EPROBE_DEFER, and therefore the system doesn't really boot (well, it boots, but with no UART, and no devices that require pin-muxing). Back when the Device Tree file for this board was introduced, the definition was already wrong. The pinctrl driver also always described as "gpo" this function for MPP7. However, between Linux 4.10 and 4.11, a hog pin failing to be muxed was turned from a simple warning to a hard error that caused the entire pinctrl driver probe to bail out. This is probably the result of commit 6118714275f0a ("pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable()"). This commit fixes the Device Tree to use the proper "gpo" function for MPP7, which fixes the boot of OpenBlocks A7, which was broken since Linux 4.11. Fixes: f24b56cbcd9d ("ARM: kirkwood: add support for OpenBlocks A7 platform") Cc: Signed-off-by: Thomas Petazzoni Reviewed-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/kirkwood-openblocks_a7.dts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176..27cc913ca0f5 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; -- cgit v1.2.3 From 107b7d9fa94c4692d9104243f0e793e2a4e1366e Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Wed, 3 Jan 2018 07:32:45 -0500 Subject: mfd: rtsx: Release IRQ during shutdown 'Commit cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown")' revealed a resource leak in rtsx_pci driver during shutdown. Issue shows up as a warning during shutdown as follows: remove_proc_entry: removing non-empty directory 'irq/17', leaking at least 'rtsx_pci' WARNING: CPU: 0 PID: 1578 at fs/proc/generic.c:572 remove_proc_entry+0x11d/0x130 Modules linked in ... Call Trace: unregister_irq_proc free_desc irq_free_descs mp_unmap_irq acpi_unregister_gsi_apic acpi_pci_irq_disable do_pci_disable_device pci_disable_device device_shutdown kernel_restart Sys_reboot Even though rtsx_pci driver implements a shutdown callback, it is not releasing the interrupt that it registered during probe. This is causing the ACPI layer to complain that the shared IRQ is in use while freeing IRQ. This code releases the IRQ to prevent resource leak and eliminate the warning. Fixes: cc27b735ad3a ("PCI/portdrv: Turn off PCIe services during shutdown") Link: https://bugzilla.kernel.org/show_bug.cgi?id=198141 Reported-by: Chris Clayton Signed-off-by: Sinan Kaya Reviewed-by: Rafael J. Wysocki Signed-off-by: Lee Jones --- drivers/mfd/rtsx_pcr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 590fb9aad77d..c3ed885c155c 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev) rtsx_pci_power_off(pcr, HOST_ENTER_S1); pci_disable_device(pcidev); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); } #else /* CONFIG_PM */ -- cgit v1.2.3 From d43c17daf2771f703bd92a78b34c7b0f3dcc9576 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 5 Jan 2018 12:18:07 +0200 Subject: ASoC: davinci-mcasp: Add rule to constrain the minimum period size The minimum period size (in frames) must be not lower than the FIFO size of McASP and in general too small period size would easily result underrun in applications as eDMA - the most common DMA servicing McASP have support for limited number of periods. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown --- sound/soc/davinci/davinci-mcasp.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 804c6f2bcf21..03ba218160ca 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1242,6 +1242,20 @@ static int davinci_mcasp_hw_rule_format(struct snd_pcm_hw_params *params, return snd_mask_refine(fmt, &nfmt); } +static int davinci_mcasp_hw_rule_min_periodsize( + struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) +{ + struct snd_interval *period_size = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE); + struct snd_interval frames; + + snd_interval_any(&frames); + frames.min = 64; + frames.integer = 1; + + return snd_interval_refine(period_size, &frames); +} + static int davinci_mcasp_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { @@ -1333,6 +1347,11 @@ static int davinci_mcasp_startup(struct snd_pcm_substream *substream, return ret; } + snd_pcm_hw_rule_add(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + davinci_mcasp_hw_rule_min_periodsize, NULL, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); + return 0; } -- cgit v1.2.3 From 943309d4aad6732b905f3f500e6e17e33c211494 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 4 Jan 2018 09:19:13 +0200 Subject: iwlwifi: pcie: fix DMA memory mapping / unmapping 22000 devices (previously referenced as A000) can support short transmit queues. This means that we have less DMA descriptors (TFD) for those shorter queues. Previous devices must still have 256 TFDs for each queue even if those 256 TFDs point to fewer buffers. When I introduced support for the short queues for 22000 I broke older devices by assuming that they can also have less TFDs in their queues. This led to several problems: 1) the payload of the commands weren't unmapped properly which caused the SWIOTLB to complain at some point. 2) the hardware could get confused and we get hardware crashes. The corresponding bugzilla entries are: https://bugzilla.kernel.org/show_bug.cgi?id=198201 https://bugzilla.kernel.org/show_bug.cgi?id=198265 Cc: stable@vger.kernel.org # 4.14+ Fixes: 4ecab5616023 ("iwlwifi: pcie: support short Tx queues for A000 device family") Reviewed-by: Sharon, Sara Signed-off-by: Emmanuel Grumbach Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 10 +++++++--- drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 11 +++-------- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index d749abeca3ae..403e65c309d0 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -670,11 +670,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index) return index & (q->n_window - 1); } -static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, +static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans, struct iwl_txq *txq, int idx) { - return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq, - idx); + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + if (trans->cfg->use_tfh) + idx = iwl_pcie_get_cmd_index(txq, idx); + + return txq->tfds + trans_pcie->tfd_size * idx; } static inline void iwl_enable_rfkill_int(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 16b345f54ff0..6d0a907d5ba5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans, static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ @@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) lockdep_assert_held(&txq->lock); iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta, - iwl_pcie_get_tfd(trans_pcie, txq, idx)); + iwl_pcie_get_tfd(trans, txq, idx)); /* free SKB */ if (txq->entries) { @@ -364,11 +362,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *out_meta) { - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr); - struct iwl_tfh_tfd *tfd = - iwl_pcie_get_tfd(trans_pcie, txq, idx); + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx); dma_addr_t tb_phys; bool amsdu; int i, len, tb1_len, tb2_len, hdr_len; @@ -565,8 +561,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, u8 group_id = iwl_cmd_groupid(cmd->id); const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; - struct iwl_tfh_tfd *tfd = - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr); memset(tfd, 0, sizeof(*tfd)); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index fed6d842a5e1..3f85713c41dc 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); int i, num_tbs; - void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index); + void *tfd = iwl_pcie_get_tfd(trans, txq, index); /* Sanity check on number of chunks */ num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); @@ -2018,7 +2018,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, } trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), + iwl_pcie_get_tfd(trans, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, hdr_len); @@ -2092,7 +2092,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), + iwl_pcie_get_tfd(trans, txq, txq->write_ptr), trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0); @@ -2425,7 +2425,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); - tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); + tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), iwl_pcie_tfd_get_num_tbs(trans, tfd)); -- cgit v1.2.3 From ee1f4a7dafa997816ff3de96155c6f3edc21c1e6 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 13 Dec 2017 12:27:39 +0530 Subject: powernv-cpufreq: Add helper to extract pstate from PMSR On POWERNV platform, the fields for pstates in the Power Management Status Register (PMSR) and the Power Management Control Register (PMCR) are 8-bits wide. On POWER8 the pstates are negatively numbered while on POWER9 they are positively numbered. The device-tree exports pstates as 32-bit entries. The device-tree implementation sign-extends the 8-bit pstate values to obtain the corresponding 32-bit entry. Eg: On POWER8, a pstate value 0x82 [-126] is represented in the device-tree as 0xfffffff82 while on POWER9, the same value 0x82 [130] is represented in the device-tree as 0x00000082. The powernv-cpufreq driver implementation represents pstates using the integer type. In multiple places in the driver, the code interprets the pstates extracted from the PMSR as a signed byte and assigns it to a integer variable to get the sign-extention. On POWER9 platforms which have greater than 128 pstates, this results in the driver performing incorrect sign-extention, and thereby treating a legitimate pstate (say 130) as an invalid pstates (since it is interpreted as -126). This patch fixes the issue by implementing a helper function to extract Pstates from PMSR register, and correctly sign-extend it to be consistent with the values provided by the device-tree. Signed-off-by: Gautham R. Shenoy Acked-by: Balbir Singh Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index b6d7c4c98d0a..f46b60fb3084 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -41,11 +41,9 @@ #define POWERNV_MAX_PSTATES 256 #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) -#define PMSR_MAX(x) ((x >> 32) & 0xFF) +#define MAX_PSTATE_SHIFT 32 #define LPSTATE_SHIFT 48 #define GPSTATE_SHIFT 56 -#define GET_LPSTATE(x) (((x) >> LPSTATE_SHIFT) & 0xFF) -#define GET_GPSTATE(x) (((x) >> GPSTATE_SHIFT) & 0xFF) #define MAX_RAMP_DOWN_TIME 5120 /* @@ -94,6 +92,7 @@ struct global_pstate_info { }; static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; +u32 pstate_sign_prefix; static bool rebooting, throttled, occ_reset; static const char * const throttle_reason[] = { @@ -148,6 +147,20 @@ static struct powernv_pstate_info { bool wof_enabled; } powernv_pstate_info; +static inline int extract_pstate(u64 pmsr_val, unsigned int shift) +{ + int ret = ((pmsr_val >> shift) & 0xFF); + + if (!ret) + return ret; + + return (pstate_sign_prefix | ret); +} + +#define extract_local_pstate(x) extract_pstate(x, LPSTATE_SHIFT) +#define extract_global_pstate(x) extract_pstate(x, GPSTATE_SHIFT) +#define extract_max_pstate(x) extract_pstate(x, MAX_PSTATE_SHIFT) + /* Use following macros for conversions between pstate_id and index */ static inline int idx_to_pstate(unsigned int i) { @@ -278,6 +291,9 @@ next: powernv_pstate_info.nr_pstates = nr_pstates; pr_debug("NR PStates %d\n", nr_pstates); + + pstate_sign_prefix = pstate_min & ~0xFF; + for (i = 0; i < nr_pstates; i++) { u32 id = be32_to_cpu(pstate_ids[i]); u32 freq = be32_to_cpu(pstate_freqs[i]); @@ -438,17 +454,10 @@ struct powernv_smp_call_data { static void powernv_read_cpu_freq(void *arg) { unsigned long pmspr_val; - s8 local_pstate_id; struct powernv_smp_call_data *freq_data = arg; pmspr_val = get_pmspr(SPRN_PMSR); - - /* - * The local pstate id corresponds bits 48..55 in the PMSR. - * Note: Watch out for the sign! - */ - local_pstate_id = (pmspr_val >> 48) & 0xFF; - freq_data->pstate_id = local_pstate_id; + freq_data->pstate_id = extract_local_pstate(pmspr_val); freq_data->freq = pstate_id_to_freq(freq_data->pstate_id); pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n", @@ -522,7 +531,7 @@ static void powernv_cpufreq_throttle_check(void *data) chip = this_cpu_read(chip_info); /* Check for Pmax Capping */ - pmsr_pmax = (s8)PMSR_MAX(pmsr); + pmsr_pmax = extract_max_pstate(pmsr); pmsr_pmax_idx = pstate_to_idx(pmsr_pmax); if (pmsr_pmax_idx != powernv_pstate_info.max) { if (chip->throttled) @@ -645,8 +654,8 @@ void gpstate_timer_handler(struct timer_list *t) * value. Hence, read from PMCR to get correct data. */ val = get_pmspr(SPRN_PMCR); - freq_data.gpstate_id = (s8)GET_GPSTATE(val); - freq_data.pstate_id = (s8)GET_LPSTATE(val); + freq_data.gpstate_id = extract_global_pstate(val); + freq_data.pstate_id = extract_local_pstate(val); if (freq_data.gpstate_id == freq_data.pstate_id) { reset_gpstates(policy); spin_unlock(&gpstates->gpstate_lock); -- cgit v1.2.3 From 332f0a01f0dd669dcd208e4e9666d80dffd62e7b Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 13 Dec 2017 12:27:40 +0530 Subject: powernv-cpufreq: Fix pstate_to_idx() to handle non-continguous pstates The code in powernv-cpufreq, makes the following two assumptions which are not guaranteed by the device-tree bindings: 1) Pstate ids are continguous: This is used in pstate_to_idx() to obtain the reverse map from a pstate to it's corresponding entry into the cpufreq frequency table. 2) Every Pstate should always lie between the max and the min pstates that are explicitly reported in the device tree: This is used to determine whether a pstate reported by the PMSR is out of bounds. Both these assumptions are unwarranted and can change on future platforms. In this patch, we maintain the reverse map from a pstate to it's index in the cpufreq frequency table and use this in pstate_to_idx(). This does away with the assumptions (1) mentioned above, and will work with non continguous pstate ids. If no entry exists for a particular pstate, then such a pstate is treated as being out of bounds. This gets rid of assumption (2). On all the existing platforms, where the pstates are 8-bit long values, the new implementation of pstate_to_idx() takes constant time. Signed-off-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 85 +++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index f46b60fb3084..8e3dbcaee286 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,8 @@ #include #include -#define POWERNV_MAX_PSTATES 256 +#define POWERNV_MAX_PSTATES_ORDER 8 +#define POWERNV_MAX_PSTATES (1UL << (POWERNV_MAX_PSTATES_ORDER)) #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define MAX_PSTATE_SHIFT 32 @@ -92,6 +94,27 @@ struct global_pstate_info { }; static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; + +DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); +/** + * struct pstate_idx_revmap_data: Entry in the hashmap pstate_revmap + * indexed by a function of pstate id. + * + * @pstate_id: pstate id for this entry. + * + * @cpufreq_table_idx: Index into the powernv_freqs + * cpufreq_frequency_table for frequency + * corresponding to pstate_id. + * + * @hentry: hlist_node that hooks this entry into the pstate_revmap + * hashtable + */ +struct pstate_idx_revmap_data { + int pstate_id; + unsigned int cpufreq_table_idx; + struct hlist_node hentry; +}; + u32 pstate_sign_prefix; static bool rebooting, throttled, occ_reset; @@ -161,39 +184,47 @@ static inline int extract_pstate(u64 pmsr_val, unsigned int shift) #define extract_global_pstate(x) extract_pstate(x, GPSTATE_SHIFT) #define extract_max_pstate(x) extract_pstate(x, MAX_PSTATE_SHIFT) -/* Use following macros for conversions between pstate_id and index */ +/* Use following functions for conversions between pstate_id and index */ + +/** + * idx_to_pstate : Returns the pstate id corresponding to the + * frequency in the cpufreq frequency table + * powernv_freqs indexed by @i. + * + * If @i is out of bound, this will return the pstate + * corresponding to the nominal frequency. + */ static inline int idx_to_pstate(unsigned int i) { if (unlikely(i >= powernv_pstate_info.nr_pstates)) { - pr_warn_once("index %u is out of bound\n", i); + pr_warn_once("idx_to_pstate: index %u is out of bound\n", i); return powernv_freqs[powernv_pstate_info.nominal].driver_data; } return powernv_freqs[i].driver_data; } -static inline unsigned int pstate_to_idx(int pstate) +/** + * pstate_to_idx : Returns the index in the cpufreq frequencytable + * powernv_freqs for the frequency whose corresponding + * pstate id is @pstate. + * + * If no frequency corresponding to @pstate is found, + * this will return the index of the nominal + * frequency. + */ +static unsigned int pstate_to_idx(int pstate) { - int min = powernv_freqs[powernv_pstate_info.min].driver_data; - int max = powernv_freqs[powernv_pstate_info.max].driver_data; + unsigned int key = pstate % POWERNV_MAX_PSTATES; + struct pstate_idx_revmap_data *revmap_data; - if (min > 0) { - if (unlikely((pstate < max) || (pstate > min))) { - pr_warn_once("pstate %d is out of bound\n", pstate); - return powernv_pstate_info.nominal; - } - } else { - if (unlikely((pstate > max) || (pstate < min))) { - pr_warn_once("pstate %d is out of bound\n", pstate); - return powernv_pstate_info.nominal; - } + hash_for_each_possible(pstate_revmap, revmap_data, hentry, key) { + if (revmap_data->pstate_id == pstate) + return revmap_data->cpufreq_table_idx; } - /* - * abs() is deliberately used so that is works with - * both monotonically increasing and decreasing - * pstate values - */ - return abs(pstate - idx_to_pstate(powernv_pstate_info.max)); + + pr_warn_once("pstate_to_idx: pstate %d not found\n", pstate); + return powernv_pstate_info.nominal; } static inline void reset_gpstates(struct cpufreq_policy *policy) @@ -297,11 +328,21 @@ next: for (i = 0; i < nr_pstates; i++) { u32 id = be32_to_cpu(pstate_ids[i]); u32 freq = be32_to_cpu(pstate_freqs[i]); + struct pstate_idx_revmap_data *revmap_data; + unsigned int key; pr_debug("PState id %d freq %d MHz\n", id, freq); powernv_freqs[i].frequency = freq * 1000; /* kHz */ powernv_freqs[i].driver_data = id; + revmap_data = (struct pstate_idx_revmap_data *) + kmalloc(sizeof(*revmap_data), GFP_KERNEL); + + revmap_data->pstate_id = id; + revmap_data->cpufreq_table_idx = i; + key = id % POWERNV_MAX_PSTATES; + hash_add(pstate_revmap, &revmap_data->hentry, key); + if (id == pstate_max) powernv_pstate_info.max = i; else if (id == pstate_nominal) -- cgit v1.2.3 From 967b87fd81d513de7aac247320f02b1645f9ca64 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 13 Dec 2017 12:27:41 +0530 Subject: powernv-cpufreq: Treat pstates as opaque 8-bit values On POWER8 and POWER9, the PMSR and the PMCR registers define pstates to be 8-bit wide values. The device-tree exports pstates as 32-bit wide values of which the lower byte is the actual pstate. The current implementation in the kernel treats pstates as integer type, since it used to use the sign of the pstate for performing some boundary-checks. This is no longer required after the patch "powernv-cpufreq: Fix pstate_to_idx() to handle non-continguous pstates". So, in this patch, we modify the powernv-cpufreq driver to uniformly treat pstates as opaque 8-bit values obtained from the device-tree or the PMCR. This simplifies the extract_pstate() helper function since we no longer no longer require to worry about the sign-extentions. Signed-off-by: Gautham R. Shenoy Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 47 ++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8e3dbcaee286..8a4e2ce0804c 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -110,12 +110,11 @@ DEFINE_HASHTABLE(pstate_revmap, POWERNV_MAX_PSTATES_ORDER); * hashtable */ struct pstate_idx_revmap_data { - int pstate_id; + u8 pstate_id; unsigned int cpufreq_table_idx; struct hlist_node hentry; }; -u32 pstate_sign_prefix; static bool rebooting, throttled, occ_reset; static const char * const throttle_reason[] = { @@ -170,14 +169,9 @@ static struct powernv_pstate_info { bool wof_enabled; } powernv_pstate_info; -static inline int extract_pstate(u64 pmsr_val, unsigned int shift) +static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift) { - int ret = ((pmsr_val >> shift) & 0xFF); - - if (!ret) - return ret; - - return (pstate_sign_prefix | ret); + return ((pmsr_val >> shift) & 0xFF); } #define extract_local_pstate(x) extract_pstate(x, LPSTATE_SHIFT) @@ -194,7 +188,7 @@ static inline int extract_pstate(u64 pmsr_val, unsigned int shift) * If @i is out of bound, this will return the pstate * corresponding to the nominal frequency. */ -static inline int idx_to_pstate(unsigned int i) +static inline u8 idx_to_pstate(unsigned int i) { if (unlikely(i >= powernv_pstate_info.nr_pstates)) { pr_warn_once("idx_to_pstate: index %u is out of bound\n", i); @@ -213,7 +207,7 @@ static inline int idx_to_pstate(unsigned int i) * this will return the index of the nominal * frequency. */ -static unsigned int pstate_to_idx(int pstate) +static unsigned int pstate_to_idx(u8 pstate) { unsigned int key = pstate % POWERNV_MAX_PSTATES; struct pstate_idx_revmap_data *revmap_data; @@ -223,7 +217,7 @@ static unsigned int pstate_to_idx(int pstate) return revmap_data->cpufreq_table_idx; } - pr_warn_once("pstate_to_idx: pstate %d not found\n", pstate); + pr_warn_once("pstate_to_idx: pstate 0x%x not found\n", pstate); return powernv_pstate_info.nominal; } @@ -291,7 +285,7 @@ static int init_powernv_pstates(void) powernv_pstate_info.wof_enabled = true; next: - pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min, + pr_info("cpufreq pstate min 0x%x nominal 0x%x max 0x%x\n", pstate_min, pstate_nominal, pstate_max); pr_info("Workload Optimized Frequency is %s in the platform\n", (powernv_pstate_info.wof_enabled) ? "enabled" : "disabled"); @@ -323,8 +317,6 @@ next: powernv_pstate_info.nr_pstates = nr_pstates; pr_debug("NR PStates %d\n", nr_pstates); - pstate_sign_prefix = pstate_min & ~0xFF; - for (i = 0; i < nr_pstates; i++) { u32 id = be32_to_cpu(pstate_ids[i]); u32 freq = be32_to_cpu(pstate_freqs[i]); @@ -333,14 +325,14 @@ next: pr_debug("PState id %d freq %d MHz\n", id, freq); powernv_freqs[i].frequency = freq * 1000; /* kHz */ - powernv_freqs[i].driver_data = id; + powernv_freqs[i].driver_data = id & 0xFF; revmap_data = (struct pstate_idx_revmap_data *) kmalloc(sizeof(*revmap_data), GFP_KERNEL); - revmap_data->pstate_id = id; + revmap_data->pstate_id = id & 0xFF; revmap_data->cpufreq_table_idx = i; - key = id % POWERNV_MAX_PSTATES; + key = (revmap_data->pstate_id) % POWERNV_MAX_PSTATES; hash_add(pstate_revmap, &revmap_data->hentry, key); if (id == pstate_max) @@ -364,14 +356,13 @@ next: } /* Returns the CPU frequency corresponding to the pstate_id. */ -static unsigned int pstate_id_to_freq(int pstate_id) +static unsigned int pstate_id_to_freq(u8 pstate_id) { int i; i = pstate_to_idx(pstate_id); if (i >= powernv_pstate_info.nr_pstates || i < 0) { - pr_warn("PState id %d outside of PState table, " - "reporting nominal id %d instead\n", + pr_warn("PState id 0x%x outside of PState table, reporting nominal id 0x%x instead\n", pstate_id, idx_to_pstate(powernv_pstate_info.nominal)); i = powernv_pstate_info.nominal; } @@ -477,8 +468,8 @@ static inline void set_pmspr(unsigned long sprn, unsigned long val) */ struct powernv_smp_call_data { unsigned int freq; - int pstate_id; - int gpstate_id; + u8 pstate_id; + u8 gpstate_id; }; /* @@ -501,9 +492,9 @@ static void powernv_read_cpu_freq(void *arg) freq_data->pstate_id = extract_local_pstate(pmspr_val); freq_data->freq = pstate_id_to_freq(freq_data->pstate_id); - pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n", - raw_smp_processor_id(), pmspr_val, freq_data->pstate_id, - freq_data->freq); + pr_debug("cpu %d pmsr %016lX pstate_id 0x%x frequency %d kHz\n", + raw_smp_processor_id(), pmspr_val, freq_data->pstate_id, + freq_data->freq); } /* @@ -565,7 +556,7 @@ static void powernv_cpufreq_throttle_check(void *data) struct chip *chip; unsigned int cpu = smp_processor_id(); unsigned long pmsr; - int pmsr_pmax; + u8 pmsr_pmax; unsigned int pmsr_pmax_idx; pmsr = get_pmspr(SPRN_PMSR); @@ -579,7 +570,7 @@ static void powernv_cpufreq_throttle_check(void *data) goto next; chip->throttled = true; if (pmsr_pmax_idx > powernv_pstate_info.nominal) { - pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n", + pr_warn_once("CPU %d on Chip %u has Pmax(0x%x) reduced below that of nominal frequency(0x%x)\n", cpu, chip->id, pmsr_pmax, idx_to_pstate(powernv_pstate_info.nominal)); chip->throttle_sub_turbo++; -- cgit v1.2.3 From d476ec4f7f5aba47b0a570cbf659d7330d7e71cf Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 4 Jan 2018 08:53:54 +0530 Subject: cpufreq: stats: Change return type of cpufreq_stats_update() as void It always returns 0 and none of its callers check its return value. Make it return void. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 1e55b5790853..1572129844a5 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -27,7 +27,7 @@ struct cpufreq_stats { unsigned int *trans_table; }; -static int cpufreq_stats_update(struct cpufreq_stats *stats) +static void cpufreq_stats_update(struct cpufreq_stats *stats) { unsigned long long cur_time = get_jiffies_64(); @@ -35,7 +35,6 @@ static int cpufreq_stats_update(struct cpufreq_stats *stats) stats->time_in_state[stats->last_index] += cur_time - stats->last_time; stats->last_time = cur_time; spin_unlock(&cpufreq_stats_lock); - return 0; } static void cpufreq_stats_clear_table(struct cpufreq_stats *stats) -- cgit v1.2.3 From 5a2772a82034722b4d4c7a2d4bfd07939ee46926 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Jan 2018 14:27:57 +0100 Subject: PM / AVS: rockchip-io: account for const type of of_device_id.data This driver creates a number of const structures that it stores in the data field of an of_device_id array. The data field of an of_device_id structure has type const void *, so there is no need for a const-discarding cast when putting const values into such a structure. Furthermore, adding const to the declaration of the location that receives a const value from such a field ensures that the compiler will continue to check that the value is not modified. The const-discarding cast on the extraction from the data field is thus no longer needed. Done using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Rafael J. Wysocki --- drivers/power/avs/rockchip-io-domain.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c index 75f63e38a8d1..ed2b109ae8fc 100644 --- a/drivers/power/avs/rockchip-io-domain.c +++ b/drivers/power/avs/rockchip-io-domain.c @@ -76,7 +76,7 @@ struct rockchip_iodomain_supply { struct rockchip_iodomain { struct device *dev; struct regmap *grf; - struct rockchip_iodomain_soc_data *soc_data; + const struct rockchip_iodomain_soc_data *soc_data; struct rockchip_iodomain_supply supplies[MAX_SUPPLIES]; }; @@ -382,43 +382,43 @@ static const struct rockchip_iodomain_soc_data soc_data_rv1108_pmu = { static const struct of_device_id rockchip_iodomain_match[] = { { .compatible = "rockchip,rk3188-io-voltage-domain", - .data = (void *)&soc_data_rk3188 + .data = &soc_data_rk3188 }, { .compatible = "rockchip,rk3228-io-voltage-domain", - .data = (void *)&soc_data_rk3228 + .data = &soc_data_rk3228 }, { .compatible = "rockchip,rk3288-io-voltage-domain", - .data = (void *)&soc_data_rk3288 + .data = &soc_data_rk3288 }, { .compatible = "rockchip,rk3328-io-voltage-domain", - .data = (void *)&soc_data_rk3328 + .data = &soc_data_rk3328 }, { .compatible = "rockchip,rk3368-io-voltage-domain", - .data = (void *)&soc_data_rk3368 + .data = &soc_data_rk3368 }, { .compatible = "rockchip,rk3368-pmu-io-voltage-domain", - .data = (void *)&soc_data_rk3368_pmu + .data = &soc_data_rk3368_pmu }, { .compatible = "rockchip,rk3399-io-voltage-domain", - .data = (void *)&soc_data_rk3399 + .data = &soc_data_rk3399 }, { .compatible = "rockchip,rk3399-pmu-io-voltage-domain", - .data = (void *)&soc_data_rk3399_pmu + .data = &soc_data_rk3399_pmu }, { .compatible = "rockchip,rv1108-io-voltage-domain", - .data = (void *)&soc_data_rv1108 + .data = &soc_data_rv1108 }, { .compatible = "rockchip,rv1108-pmu-io-voltage-domain", - .data = (void *)&soc_data_rv1108_pmu + .data = &soc_data_rv1108_pmu }, { /* sentinel */ }, }; @@ -443,7 +443,7 @@ static int rockchip_iodomain_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iod); match = of_match_node(rockchip_iodomain_match, np); - iod->soc_data = (struct rockchip_iodomain_soc_data *)match->data; + iod->soc_data = match->data; parent = pdev->dev.parent; if (parent && parent->of_node) { -- cgit v1.2.3 From ba6c29592545635f7c476cbef0db0c4f39495f23 Mon Sep 17 00:00:00 2001 From: Steven Eckhoff Date: Tue, 19 Dec 2017 14:54:25 -0600 Subject: ASoC: TSCS42xx: Add support for Tempo Semiconductor's TSCS42xx audio CODEC Currently there is no support for TSCS42xx audio CODECs. Add support for TSCS42xx audio CODECs. Reviewed-by: Charles Keepax Acked-by: Philippe Ombredanne Signed-off-by: Steven Eckhoff Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/tscs42xx.txt | 16 + .../devicetree/bindings/vendor-prefixes.txt | 1 + MAINTAINERS | 7 + sound/soc/codecs/Kconfig | 8 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/tscs42xx.c | 1456 +++++++++++ sound/soc/codecs/tscs42xx.h | 2693 ++++++++++++++++++++ 7 files changed, 4183 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/tscs42xx.txt create mode 100644 sound/soc/codecs/tscs42xx.c create mode 100644 sound/soc/codecs/tscs42xx.h diff --git a/Documentation/devicetree/bindings/sound/tscs42xx.txt b/Documentation/devicetree/bindings/sound/tscs42xx.txt new file mode 100644 index 000000000000..2ac2f0996697 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tscs42xx.txt @@ -0,0 +1,16 @@ +TSCS42XX Audio CODEC + +Required Properties: + + - compatible : "tempo,tscs42A1" for analog mic + "tempo,tscs42A2" for digital mic + + - reg : <0x71> for analog mic + <0x69> for digital mic + +Example: + +wookie: codec@69 { + compatible = "tempo,tscs42A2"; + reg = <0x69>; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 0994bdd82cd3..f776fb804a8c 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -347,6 +347,7 @@ tcg Trusted Computing Group tcl Toby Churchill Ltd. technexion TechNexion technologic Technologic Systems +tempo Tempo Semiconductor terasic Terasic Inc. thine THine Electronics, Inc. ti Texas Instruments diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..8254a90d1cbd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13831,6 +13831,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git S: Maintained K: ^Subject:.*(?i)trivial +TEMPO SEMICONDUCTOR DRIVERS +M: Steven Eckhoff +S: Maintained +F: sound/soc/codecs/tscs*.c +F: sound/soc/codecs/tscs*.h +F: Documentation/devicetree/bindings/sound/tscs*.txt + TTY LAYER M: Greg Kroah-Hartman M: Jiri Slaby diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..fe3bff2f4238 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -158,6 +158,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TLV320AIC3X if I2C select SND_SOC_TPA6130A2 if I2C select SND_SOC_TLV320DAC33 if I2C + select SND_SOC_TSCS42XX if I2C select SND_SOC_TS3A227E if I2C select SND_SOC_TWL4030 if TWL4030_CORE select SND_SOC_TWL6040 if TWL6040_CORE @@ -933,6 +934,13 @@ config SND_SOC_TS3A227E tristate "TI Headset/Mic detect and keypress chip" depends on I2C +config SND_SOC_TSCS42XX + tristate "Tempo Semiconductor TSCS42xx CODEC" + depends on I2C + select REGMAP_I2C + help + Add support for Tempo Semiconductor's TSCS42xx audio CODEC. + config SND_SOC_TWL4030 select MFD_TWL4030_AUDIO tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 0001069ce2a7..ded86bceca37 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -167,6 +167,7 @@ snd-soc-tlv320aic32x4-i2c-objs := tlv320aic32x4-i2c.o snd-soc-tlv320aic32x4-spi-objs := tlv320aic32x4-spi.o snd-soc-tlv320aic3x-objs := tlv320aic3x.o snd-soc-tlv320dac33-objs := tlv320dac33.o +snd-soc-tscs42xx-objs := tscs42xx.o snd-soc-ts3a227e-objs := ts3a227e.o snd-soc-twl4030-objs := twl4030.o snd-soc-twl6040-objs := twl6040.o @@ -406,6 +407,7 @@ obj-$(CONFIG_SND_SOC_TLV320AIC32X4_I2C) += snd-soc-tlv320aic32x4-i2c.o obj-$(CONFIG_SND_SOC_TLV320AIC32X4_SPI) += snd-soc-tlv320aic32x4-spi.o obj-$(CONFIG_SND_SOC_TLV320AIC3X) += snd-soc-tlv320aic3x.o obj-$(CONFIG_SND_SOC_TLV320DAC33) += snd-soc-tlv320dac33.o +obj-$(CONFIG_SND_SOC_TSCS42XX) += snd-soc-tscs42xx.o obj-$(CONFIG_SND_SOC_TS3A227E) += snd-soc-ts3a227e.o obj-$(CONFIG_SND_SOC_TWL4030) += snd-soc-twl4030.o obj-$(CONFIG_SND_SOC_TWL6040) += snd-soc-twl6040.o diff --git a/sound/soc/codecs/tscs42xx.c b/sound/soc/codecs/tscs42xx.c new file mode 100644 index 000000000000..eedd600875e5 --- /dev/null +++ b/sound/soc/codecs/tscs42xx.c @@ -0,0 +1,1456 @@ +// SPDX-License-Identifier: GPL-2.0 +// tscs42xx.c -- TSCS42xx ALSA SoC Audio driver +// Copyright 2017 Tempo Semiconductor, Inc. +// Author: Steven Eckhoff + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tscs42xx.h" + +#define COEFF_SIZE 3 +#define BIQUAD_COEFF_COUNT 5 +#define BIQUAD_SIZE (COEFF_SIZE * BIQUAD_COEFF_COUNT) + +#define COEFF_RAM_MAX_ADDR 0xcd +#define COEFF_RAM_COEFF_COUNT (COEFF_RAM_MAX_ADDR + 1) +#define COEFF_RAM_SIZE (COEFF_SIZE * COEFF_RAM_COEFF_COUNT) + +struct tscs42xx { + + int bclk_ratio; + int samplerate; + unsigned int blrcm; + struct mutex audio_params_lock; + + u8 coeff_ram[COEFF_RAM_SIZE]; + bool coeff_ram_synced; + struct mutex coeff_ram_lock; + + struct mutex pll_lock; + + struct regmap *regmap; + + struct device *dev; +}; + +struct coeff_ram_ctl { + unsigned int addr; + struct soc_bytes_ext bytes_ext; +}; + +static bool tscs42xx_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case R_DACCRWRL: + case R_DACCRWRM: + case R_DACCRWRH: + case R_DACCRRDL: + case R_DACCRRDM: + case R_DACCRRDH: + case R_DACCRSTAT: + case R_DACCRADDR: + case R_PLLCTL0: + return true; + default: + return false; + }; +} + +static bool tscs42xx_precious(struct device *dev, unsigned int reg) +{ + switch (reg) { + case R_DACCRWRL: + case R_DACCRWRM: + case R_DACCRWRH: + case R_DACCRRDL: + case R_DACCRRDM: + case R_DACCRRDH: + return true; + default: + return false; + }; +} + +static const struct regmap_config tscs42xx_regmap = { + .reg_bits = 8, + .val_bits = 8, + + .volatile_reg = tscs42xx_volatile, + .precious_reg = tscs42xx_precious, + .max_register = R_DACMBCREL3H, + + .cache_type = REGCACHE_RBTREE, + .can_multi_write = true, +}; + +#define MAX_PLL_LOCK_20MS_WAITS 1 +static bool plls_locked(struct snd_soc_codec *codec) +{ + int ret; + int count = MAX_PLL_LOCK_20MS_WAITS; + + do { + ret = snd_soc_read(codec, R_PLLCTL0); + if (ret < 0) { + dev_err(codec->dev, + "Failed to read PLL lock status (%d)\n", ret); + return false; + } else if (ret > 0) { + return true; + } + msleep(20); + } while (count--); + + return false; +} + +static int sample_rate_to_pll_freq_out(int sample_rate) +{ + switch (sample_rate) { + case 11025: + case 22050: + case 44100: + case 88200: + return 112896000; + case 8000: + case 16000: + case 32000: + case 48000: + case 96000: + return 122880000; + default: + return -EINVAL; + } +} + +#define DACCRSTAT_MAX_TRYS 10 +static int write_coeff_ram(struct snd_soc_codec *codec, u8 *coeff_ram, + unsigned int addr, unsigned int coeff_cnt) +{ + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + int cnt; + int trys; + int ret; + + for (cnt = 0; cnt < coeff_cnt; cnt++, addr++) { + + for (trys = 0; trys < DACCRSTAT_MAX_TRYS; trys++) { + ret = snd_soc_read(codec, R_DACCRSTAT); + if (ret < 0) { + dev_err(codec->dev, + "Failed to read stat (%d)\n", ret); + return ret; + } + if (!ret) + break; + } + + if (trys == DACCRSTAT_MAX_TRYS) { + ret = -EIO; + dev_err(codec->dev, + "dac coefficient write error (%d)\n", ret); + return ret; + } + + ret = regmap_write(tscs42xx->regmap, R_DACCRADDR, addr); + if (ret < 0) { + dev_err(codec->dev, + "Failed to write dac ram address (%d)\n", ret); + return ret; + } + + ret = regmap_bulk_write(tscs42xx->regmap, R_DACCRWRL, + &coeff_ram[addr * COEFF_SIZE], + COEFF_SIZE); + if (ret < 0) { + dev_err(codec->dev, + "Failed to write dac ram (%d)\n", ret); + return ret; + } + } + + return 0; +} + +static int power_up_audio_plls(struct snd_soc_codec *codec) +{ + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + int freq_out; + int ret; + unsigned int mask; + unsigned int val; + + freq_out = sample_rate_to_pll_freq_out(tscs42xx->samplerate); + switch (freq_out) { + case 122880000: /* 48k */ + mask = RM_PLLCTL1C_PDB_PLL1; + val = RV_PLLCTL1C_PDB_PLL1_ENABLE; + break; + case 112896000: /* 44.1k */ + mask = RM_PLLCTL1C_PDB_PLL2; + val = RV_PLLCTL1C_PDB_PLL2_ENABLE; + break; + default: + ret = -EINVAL; + dev_err(codec->dev, "Unrecognized PLL output freq (%d)\n", ret); + return ret; + } + + mutex_lock(&tscs42xx->pll_lock); + + ret = snd_soc_update_bits(codec, R_PLLCTL1C, mask, val); + if (ret < 0) { + dev_err(codec->dev, "Failed to turn PLL on (%d)\n", ret); + goto exit; + } + + if (!plls_locked(codec)) { + dev_err(codec->dev, "Failed to lock plls\n"); + ret = -ENOMSG; + goto exit; + } + + ret = 0; +exit: + mutex_unlock(&tscs42xx->pll_lock); + + return ret; +} + +static int power_down_audio_plls(struct snd_soc_codec *codec) +{ + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + int ret; + + mutex_lock(&tscs42xx->pll_lock); + + ret = snd_soc_update_bits(codec, R_PLLCTL1C, + RM_PLLCTL1C_PDB_PLL1, + RV_PLLCTL1C_PDB_PLL1_DISABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to turn PLL off (%d)\n", ret); + goto exit; + } + ret = snd_soc_update_bits(codec, R_PLLCTL1C, + RM_PLLCTL1C_PDB_PLL2, + RV_PLLCTL1C_PDB_PLL2_DISABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to turn PLL off (%d)\n", ret); + goto exit; + } + + ret = 0; +exit: + mutex_unlock(&tscs42xx->pll_lock); + + return ret; +} + +static int coeff_ram_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + struct coeff_ram_ctl *ctl = + (struct coeff_ram_ctl *)kcontrol->private_value; + struct soc_bytes_ext *params = &ctl->bytes_ext; + + mutex_lock(&tscs42xx->coeff_ram_lock); + + memcpy(ucontrol->value.bytes.data, + &tscs42xx->coeff_ram[ctl->addr * COEFF_SIZE], params->max); + + mutex_unlock(&tscs42xx->coeff_ram_lock); + + return 0; +} + +static int coeff_ram_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + struct coeff_ram_ctl *ctl = + (struct coeff_ram_ctl *)kcontrol->private_value; + struct soc_bytes_ext *params = &ctl->bytes_ext; + unsigned int coeff_cnt = params->max / COEFF_SIZE; + int ret; + + mutex_lock(&tscs42xx->coeff_ram_lock); + + tscs42xx->coeff_ram_synced = false; + + memcpy(&tscs42xx->coeff_ram[ctl->addr * COEFF_SIZE], + ucontrol->value.bytes.data, params->max); + + mutex_lock(&tscs42xx->pll_lock); + + if (plls_locked(codec)) { + ret = write_coeff_ram(codec, tscs42xx->coeff_ram, + ctl->addr, coeff_cnt); + if (ret < 0) { + dev_err(codec->dev, + "Failed to flush coeff ram cache (%d)\n", ret); + goto exit; + } + tscs42xx->coeff_ram_synced = true; + } + + ret = 0; +exit: + mutex_unlock(&tscs42xx->pll_lock); + + mutex_unlock(&tscs42xx->coeff_ram_lock); + + return ret; +} + +/* Input L Capture Route */ +static char const * const input_select_text[] = { + "Line 1", "Line 2", "Line 3", "D2S" +}; + +static const struct soc_enum left_input_select_enum = +SOC_ENUM_SINGLE(R_INSELL, FB_INSELL, ARRAY_SIZE(input_select_text), + input_select_text); + +static const struct snd_kcontrol_new left_input_select = +SOC_DAPM_ENUM("LEFT_INPUT_SELECT_ENUM", left_input_select_enum); + +/* Input R Capture Route */ +static const struct soc_enum right_input_select_enum = +SOC_ENUM_SINGLE(R_INSELR, FB_INSELR, ARRAY_SIZE(input_select_text), + input_select_text); + +static const struct snd_kcontrol_new right_input_select = +SOC_DAPM_ENUM("RIGHT_INPUT_SELECT_ENUM", right_input_select_enum); + +/* Input Channel Mapping */ +static char const * const ch_map_select_text[] = { + "Normal", "Left to Right", "Right to Left", "Swap" +}; + +static const struct soc_enum ch_map_select_enum = +SOC_ENUM_SINGLE(R_AIC2, FB_AIC2_ADCDSEL, ARRAY_SIZE(ch_map_select_text), + ch_map_select_text); + +static int dapm_vref_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + msleep(20); + return 0; +} + +static int dapm_micb_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + msleep(20); + return 0; +} + +int pll_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + int ret; + + if (SND_SOC_DAPM_EVENT_ON(event)) + ret = power_up_audio_plls(codec); + else + ret = power_down_audio_plls(codec); + + return ret; +} + +int dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + int ret; + + mutex_lock(&tscs42xx->coeff_ram_lock); + + if (tscs42xx->coeff_ram_synced == false) { + ret = write_coeff_ram(codec, tscs42xx->coeff_ram, 0x00, + COEFF_RAM_COEFF_COUNT); + if (ret < 0) + goto exit; + tscs42xx->coeff_ram_synced = true; + } + + ret = 0; +exit: + mutex_unlock(&tscs42xx->coeff_ram_lock); + + return ret; +} + +static const struct snd_soc_dapm_widget tscs42xx_dapm_widgets[] = { + /* Vref */ + SND_SOC_DAPM_SUPPLY_S("Vref", 1, R_PWRM2, FB_PWRM2_VREF, 0, + dapm_vref_event, SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_PRE_PMD), + + /* PLL */ + SND_SOC_DAPM_SUPPLY("PLL", SND_SOC_NOPM, 0, 0, pll_event, + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + + /* Headphone */ + SND_SOC_DAPM_DAC_E("DAC L", "HiFi Playback", R_PWRM2, FB_PWRM2_HPL, 0, + dac_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_DAC_E("DAC R", "HiFi Playback", R_PWRM2, FB_PWRM2_HPR, 0, + dac_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_OUTPUT("Headphone L"), + SND_SOC_DAPM_OUTPUT("Headphone R"), + + /* Speaker */ + SND_SOC_DAPM_DAC_E("ClassD L", "HiFi Playback", + R_PWRM2, FB_PWRM2_SPKL, 0, + dac_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_DAC_E("ClassD R", "HiFi Playback", + R_PWRM2, FB_PWRM2_SPKR, 0, + dac_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_OUTPUT("Speaker L"), + SND_SOC_DAPM_OUTPUT("Speaker R"), + + /* Capture */ + SND_SOC_DAPM_PGA("Analog In PGA L", R_PWRM1, FB_PWRM1_PGAL, 0, NULL, 0), + SND_SOC_DAPM_PGA("Analog In PGA R", R_PWRM1, FB_PWRM1_PGAR, 0, NULL, 0), + SND_SOC_DAPM_PGA("Analog Boost L", R_PWRM1, FB_PWRM1_BSTL, 0, NULL, 0), + SND_SOC_DAPM_PGA("Analog Boost R", R_PWRM1, FB_PWRM1_BSTR, 0, NULL, 0), + SND_SOC_DAPM_PGA("ADC Mute", R_CNVRTR0, FB_CNVRTR0_HPOR, true, NULL, 0), + SND_SOC_DAPM_ADC("ADC L", "HiFi Capture", R_PWRM1, FB_PWRM1_ADCL, 0), + SND_SOC_DAPM_ADC("ADC R", "HiFi Capture", R_PWRM1, FB_PWRM1_ADCR, 0), + + /* Capture Input */ + SND_SOC_DAPM_MUX("Input L Capture Route", R_PWRM2, + FB_PWRM2_INSELL, 0, &left_input_select), + SND_SOC_DAPM_MUX("Input R Capture Route", R_PWRM2, + FB_PWRM2_INSELR, 0, &right_input_select), + + /* Digital Mic */ + SND_SOC_DAPM_SUPPLY_S("Digital Mic Enable", 2, R_DMICCTL, + FB_DMICCTL_DMICEN, 0, NULL, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_PRE_PMD), + + /* Analog Mic */ + SND_SOC_DAPM_SUPPLY_S("Mic Bias", 2, R_PWRM1, FB_PWRM1_MICB, + 0, dapm_micb_event, SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_PRE_PMD), + + /* Line In */ + SND_SOC_DAPM_INPUT("Line In 1 L"), + SND_SOC_DAPM_INPUT("Line In 1 R"), + SND_SOC_DAPM_INPUT("Line In 2 L"), + SND_SOC_DAPM_INPUT("Line In 2 R"), + SND_SOC_DAPM_INPUT("Line In 3 L"), + SND_SOC_DAPM_INPUT("Line In 3 R"), +}; + +static const struct snd_soc_dapm_route tscs42xx_intercon[] = { + {"DAC L", NULL, "PLL"}, + {"DAC R", NULL, "PLL"}, + {"DAC L", NULL, "Vref"}, + {"DAC R", NULL, "Vref"}, + {"Headphone L", NULL, "DAC L"}, + {"Headphone R", NULL, "DAC R"}, + + {"ClassD L", NULL, "PLL"}, + {"ClassD R", NULL, "PLL"}, + {"ClassD L", NULL, "Vref"}, + {"ClassD R", NULL, "Vref"}, + {"Speaker L", NULL, "ClassD L"}, + {"Speaker R", NULL, "ClassD R"}, + + {"Input L Capture Route", NULL, "Vref"}, + {"Input R Capture Route", NULL, "Vref"}, + + {"Mic Bias", NULL, "Vref"}, + + {"Input L Capture Route", "Line 1", "Line In 1 L"}, + {"Input R Capture Route", "Line 1", "Line In 1 R"}, + {"Input L Capture Route", "Line 2", "Line In 2 L"}, + {"Input R Capture Route", "Line 2", "Line In 2 R"}, + {"Input L Capture Route", "Line 3", "Line In 3 L"}, + {"Input R Capture Route", "Line 3", "Line In 3 R"}, + + {"Analog In PGA L", NULL, "Input L Capture Route"}, + {"Analog In PGA R", NULL, "Input R Capture Route"}, + {"Analog Boost L", NULL, "Analog In PGA L"}, + {"Analog Boost R", NULL, "Analog In PGA R"}, + {"ADC Mute", NULL, "Analog Boost L"}, + {"ADC Mute", NULL, "Analog Boost R"}, + {"ADC L", NULL, "PLL"}, + {"ADC R", NULL, "PLL"}, + {"ADC L", NULL, "ADC Mute"}, + {"ADC R", NULL, "ADC Mute"}, +}; + +/************ + * CONTROLS * + ************/ + +static char const * const eq_band_enable_text[] = { + "Prescale only", + "Band1", + "Band1:2", + "Band1:3", + "Band1:4", + "Band1:5", + "Band1:6", +}; + +static char const * const level_detection_text[] = { + "Average", + "Peak", +}; + +static char const * const level_detection_window_text[] = { + "512 Samples", + "64 Samples", +}; + +static char const * const compressor_ratio_text[] = { + "Reserved", "1.5:1", "2:1", "3:1", "4:1", "5:1", "6:1", + "7:1", "8:1", "9:1", "10:1", "11:1", "12:1", "13:1", "14:1", + "15:1", "16:1", "17:1", "18:1", "19:1", "20:1", +}; + +static DECLARE_TLV_DB_SCALE(hpvol_scale, -8850, 75, 0); +static DECLARE_TLV_DB_SCALE(spkvol_scale, -7725, 75, 0); +static DECLARE_TLV_DB_SCALE(dacvol_scale, -9563, 38, 0); +static DECLARE_TLV_DB_SCALE(adcvol_scale, -7125, 38, 0); +static DECLARE_TLV_DB_SCALE(invol_scale, -1725, 75, 0); +static DECLARE_TLV_DB_SCALE(mic_boost_scale, 0, 1000, 0); +static DECLARE_TLV_DB_MINMAX(mugain_scale, 0, 4650); +static DECLARE_TLV_DB_MINMAX(compth_scale, -9562, 0); + +static const struct soc_enum eq1_band_enable_enum = + SOC_ENUM_SINGLE(R_CONFIG1, FB_CONFIG1_EQ1_BE, + ARRAY_SIZE(eq_band_enable_text), eq_band_enable_text); + +static const struct soc_enum eq2_band_enable_enum = + SOC_ENUM_SINGLE(R_CONFIG1, FB_CONFIG1_EQ2_BE, + ARRAY_SIZE(eq_band_enable_text), eq_band_enable_text); + +static const struct soc_enum cle_level_detection_enum = + SOC_ENUM_SINGLE(R_CLECTL, FB_CLECTL_LVL_MODE, + ARRAY_SIZE(level_detection_text), + level_detection_text); + +static const struct soc_enum cle_level_detection_window_enum = + SOC_ENUM_SINGLE(R_CLECTL, FB_CLECTL_WINDOWSEL, + ARRAY_SIZE(level_detection_window_text), + level_detection_window_text); + +static const struct soc_enum mbc_level_detection_enums[] = { + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE1, + ARRAY_SIZE(level_detection_text), + level_detection_text), + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE2, + ARRAY_SIZE(level_detection_text), + level_detection_text), + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE3, + ARRAY_SIZE(level_detection_text), + level_detection_text), +}; + +static const struct soc_enum mbc_level_detection_window_enums[] = { + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL1, + ARRAY_SIZE(level_detection_window_text), + level_detection_window_text), + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL2, + ARRAY_SIZE(level_detection_window_text), + level_detection_window_text), + SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL3, + ARRAY_SIZE(level_detection_window_text), + level_detection_window_text), +}; + +static const struct soc_enum compressor_ratio_enum = + SOC_ENUM_SINGLE(R_CMPRAT, FB_CMPRAT, + ARRAY_SIZE(compressor_ratio_text), compressor_ratio_text); + +static const struct soc_enum dac_mbc1_compressor_ratio_enum = + SOC_ENUM_SINGLE(R_DACMBCRAT1, FB_DACMBCRAT1_RATIO, + ARRAY_SIZE(compressor_ratio_text), compressor_ratio_text); + +static const struct soc_enum dac_mbc2_compressor_ratio_enum = + SOC_ENUM_SINGLE(R_DACMBCRAT2, FB_DACMBCRAT2_RATIO, + ARRAY_SIZE(compressor_ratio_text), compressor_ratio_text); + +static const struct soc_enum dac_mbc3_compressor_ratio_enum = + SOC_ENUM_SINGLE(R_DACMBCRAT3, FB_DACMBCRAT3_RATIO, + ARRAY_SIZE(compressor_ratio_text), compressor_ratio_text); + +static int bytes_info_ext(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *ucontrol) +{ + struct coeff_ram_ctl *ctl = + (struct coeff_ram_ctl *)kcontrol->private_value; + struct soc_bytes_ext *params = &ctl->bytes_ext; + + ucontrol->type = SNDRV_CTL_ELEM_TYPE_BYTES; + ucontrol->count = params->max; + + return 0; +} + +#define COEFF_RAM_CTL(xname, xcount, xaddr) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .info = bytes_info_ext, \ + .get = coeff_ram_get, .put = coeff_ram_put, \ + .private_value = (unsigned long)&(struct coeff_ram_ctl) { \ + .addr = xaddr, \ + .bytes_ext = {.max = xcount, }, \ + } \ +} + +static const struct snd_kcontrol_new tscs42xx_snd_controls[] = { + /* Volumes */ + SOC_DOUBLE_R_TLV("Headphone Playback Volume", R_HPVOLL, R_HPVOLR, + FB_HPVOLL, 0x7F, 0, hpvol_scale), + SOC_DOUBLE_R_TLV("Speaker Playback Volume", R_SPKVOLL, R_SPKVOLR, + FB_SPKVOLL, 0x7F, 0, spkvol_scale), + SOC_DOUBLE_R_TLV("Master Playback Volume", R_DACVOLL, R_DACVOLR, + FB_DACVOLL, 0xFF, 0, dacvol_scale), + SOC_DOUBLE_R_TLV("PCM Capture Volume", R_ADCVOLL, R_ADCVOLR, + FB_ADCVOLL, 0xFF, 0, adcvol_scale), + SOC_DOUBLE_R_TLV("Master Capture Volume", R_INVOLL, R_INVOLR, + FB_INVOLL, 0x3F, 0, invol_scale), + + /* INSEL */ + SOC_DOUBLE_R_TLV("Mic Boost Capture Volume", R_INSELL, R_INSELR, + FB_INSELL_MICBSTL, FV_INSELL_MICBSTL_30DB, + 0, mic_boost_scale), + + /* Input Channel Map */ + SOC_ENUM("Input Channel Map Switch", ch_map_select_enum), + + /* Coefficient Ram */ + COEFF_RAM_CTL("Cascade1L BiQuad1", BIQUAD_SIZE, 0x00), + COEFF_RAM_CTL("Cascade1L BiQuad2", BIQUAD_SIZE, 0x05), + COEFF_RAM_CTL("Cascade1L BiQuad3", BIQUAD_SIZE, 0x0a), + COEFF_RAM_CTL("Cascade1L BiQuad4", BIQUAD_SIZE, 0x0f), + COEFF_RAM_CTL("Cascade1L BiQuad5", BIQUAD_SIZE, 0x14), + COEFF_RAM_CTL("Cascade1L BiQuad6", BIQUAD_SIZE, 0x19), + + COEFF_RAM_CTL("Cascade1R BiQuad1", BIQUAD_SIZE, 0x20), + COEFF_RAM_CTL("Cascade1R BiQuad2", BIQUAD_SIZE, 0x25), + COEFF_RAM_CTL("Cascade1R BiQuad3", BIQUAD_SIZE, 0x2a), + COEFF_RAM_CTL("Cascade1R BiQuad4", BIQUAD_SIZE, 0x2f), + COEFF_RAM_CTL("Cascade1R BiQuad5", BIQUAD_SIZE, 0x34), + COEFF_RAM_CTL("Cascade1R BiQuad6", BIQUAD_SIZE, 0x39), + + COEFF_RAM_CTL("Cascade1L Prescale", COEFF_SIZE, 0x1f), + COEFF_RAM_CTL("Cascade1R Prescale", COEFF_SIZE, 0x3f), + + COEFF_RAM_CTL("Cascade2L BiQuad1", BIQUAD_SIZE, 0x40), + COEFF_RAM_CTL("Cascade2L BiQuad2", BIQUAD_SIZE, 0x45), + COEFF_RAM_CTL("Cascade2L BiQuad3", BIQUAD_SIZE, 0x4a), + COEFF_RAM_CTL("Cascade2L BiQuad4", BIQUAD_SIZE, 0x4f), + COEFF_RAM_CTL("Cascade2L BiQuad5", BIQUAD_SIZE, 0x54), + COEFF_RAM_CTL("Cascade2L BiQuad6", BIQUAD_SIZE, 0x59), + + COEFF_RAM_CTL("Cascade2R BiQuad1", BIQUAD_SIZE, 0x60), + COEFF_RAM_CTL("Cascade2R BiQuad2", BIQUAD_SIZE, 0x65), + COEFF_RAM_CTL("Cascade2R BiQuad3", BIQUAD_SIZE, 0x6a), + COEFF_RAM_CTL("Cascade2R BiQuad4", BIQUAD_SIZE, 0x6f), + COEFF_RAM_CTL("Cascade2R BiQuad5", BIQUAD_SIZE, 0x74), + COEFF_RAM_CTL("Cascade2R BiQuad6", BIQUAD_SIZE, 0x79), + + COEFF_RAM_CTL("Cascade2L Prescale", COEFF_SIZE, 0x5f), + COEFF_RAM_CTL("Cascade2R Prescale", COEFF_SIZE, 0x7f), + + COEFF_RAM_CTL("Bass Extraction BiQuad1", BIQUAD_SIZE, 0x80), + COEFF_RAM_CTL("Bass Extraction BiQuad2", BIQUAD_SIZE, 0x85), + + COEFF_RAM_CTL("Bass Non Linear Function 1", COEFF_SIZE, 0x8a), + COEFF_RAM_CTL("Bass Non Linear Function 2", COEFF_SIZE, 0x8b), + + COEFF_RAM_CTL("Bass Limiter BiQuad", BIQUAD_SIZE, 0x8c), + + COEFF_RAM_CTL("Bass Cut Off BiQuad", BIQUAD_SIZE, 0x91), + + COEFF_RAM_CTL("Bass Mix", COEFF_SIZE, 0x96), + + COEFF_RAM_CTL("Treb Extraction BiQuad1", BIQUAD_SIZE, 0x97), + COEFF_RAM_CTL("Treb Extraction BiQuad2", BIQUAD_SIZE, 0x9c), + + COEFF_RAM_CTL("Treb Non Linear Function 1", COEFF_SIZE, 0xa1), + COEFF_RAM_CTL("Treb Non Linear Function 2", COEFF_SIZE, 0xa2), + + COEFF_RAM_CTL("Treb Limiter BiQuad", BIQUAD_SIZE, 0xa3), + + COEFF_RAM_CTL("Treb Cut Off BiQuad", BIQUAD_SIZE, 0xa8), + + COEFF_RAM_CTL("Treb Mix", COEFF_SIZE, 0xad), + + COEFF_RAM_CTL("3D", COEFF_SIZE, 0xae), + + COEFF_RAM_CTL("3D Mix", COEFF_SIZE, 0xaf), + + COEFF_RAM_CTL("MBC1 BiQuad1", BIQUAD_SIZE, 0xb0), + COEFF_RAM_CTL("MBC1 BiQuad2", BIQUAD_SIZE, 0xb5), + + COEFF_RAM_CTL("MBC2 BiQuad1", BIQUAD_SIZE, 0xba), + COEFF_RAM_CTL("MBC2 BiQuad2", BIQUAD_SIZE, 0xbf), + + COEFF_RAM_CTL("MBC3 BiQuad1", BIQUAD_SIZE, 0xc4), + COEFF_RAM_CTL("MBC3 BiQuad2", BIQUAD_SIZE, 0xc9), + + /* EQ */ + SOC_SINGLE("EQ1 Switch", R_CONFIG1, FB_CONFIG1_EQ1_EN, 1, 0), + SOC_SINGLE("EQ2 Switch", R_CONFIG1, FB_CONFIG1_EQ2_EN, 1, 0), + SOC_ENUM("EQ1 Band Enable Switch", eq1_band_enable_enum), + SOC_ENUM("EQ2 Band Enable Switch", eq2_band_enable_enum), + + /* CLE */ + SOC_ENUM("CLE Level Detect Switch", + cle_level_detection_enum), + SOC_ENUM("CLE Level Detect Win Switch", + cle_level_detection_window_enum), + SOC_SINGLE("Expander Switch", + R_CLECTL, FB_CLECTL_EXP_EN, 1, 0), + SOC_SINGLE("Limiter Switch", + R_CLECTL, FB_CLECTL_LIMIT_EN, 1, 0), + SOC_SINGLE("Comp Switch", + R_CLECTL, FB_CLECTL_COMP_EN, 1, 0), + SOC_SINGLE_TLV("CLE Make-Up Gain Playback Volume", + R_MUGAIN, FB_MUGAIN_CLEMUG, 0x1f, 0, mugain_scale), + SOC_SINGLE_TLV("Comp Thresh Playback Volume", + R_COMPTH, FB_COMPTH, 0xff, 0, compth_scale), + SOC_ENUM("Comp Ratio Switch", compressor_ratio_enum), + SND_SOC_BYTES("Comp Atk Time", R_CATKTCL, 2), + + /* Effects */ + SOC_SINGLE("3D Switch", R_FXCTL, FB_FXCTL_3DEN, 1, 0), + SOC_SINGLE("Treble Switch", R_FXCTL, FB_FXCTL_TEEN, 1, 0), + SOC_SINGLE("Treble Bypass Switch", R_FXCTL, FB_FXCTL_TNLFBYPASS, 1, 0), + SOC_SINGLE("Bass Switch", R_FXCTL, FB_FXCTL_BEEN, 1, 0), + SOC_SINGLE("Bass Bypass Switch", R_FXCTL, FB_FXCTL_BNLFBYPASS, 1, 0), + + /* MBC */ + SOC_SINGLE("MBC Band1 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN1, 1, 0), + SOC_SINGLE("MBC Band2 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN2, 1, 0), + SOC_SINGLE("MBC Band3 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN3, 1, 0), + SOC_ENUM("MBC Band1 Level Detect Switch", + mbc_level_detection_enums[0]), + SOC_ENUM("MBC Band2 Level Detect Switch", + mbc_level_detection_enums[1]), + SOC_ENUM("MBC Band3 Level Detect Switch", + mbc_level_detection_enums[2]), + SOC_ENUM("MBC Band1 Level Detect Win Switch", + mbc_level_detection_window_enums[0]), + SOC_ENUM("MBC Band2 Level Detect Win Switch", + mbc_level_detection_window_enums[1]), + SOC_ENUM("MBC Band3 Level Detect Win Switch", + mbc_level_detection_window_enums[2]), + + SOC_SINGLE("MBC1 Phase Invert", R_DACMBCMUG1, FB_DACMBCMUG1_PHASE, + 1, 0), + SOC_SINGLE_TLV("DAC MBC1 Make-Up Gain Playback Volume", + R_DACMBCMUG1, FB_DACMBCMUG1_MUGAIN, 0x1f, 0, mugain_scale), + SOC_SINGLE_TLV("DAC MBC1 Comp Thresh Playback Volume", + R_DACMBCTHR1, FB_DACMBCTHR1_THRESH, 0xff, 0, compth_scale), + SOC_ENUM("DAC MBC1 Comp Ratio Switch", + dac_mbc1_compressor_ratio_enum), + SND_SOC_BYTES("DAC MBC1 Comp Atk Time", R_DACMBCATK1L, 2), + SND_SOC_BYTES("DAC MBC1 Comp Rel Time Const", + R_DACMBCREL1L, 2), + + SOC_SINGLE("MBC2 Phase Invert", R_DACMBCMUG2, FB_DACMBCMUG2_PHASE, + 1, 0), + SOC_SINGLE_TLV("DAC MBC2 Make-Up Gain Playback Volume", + R_DACMBCMUG2, FB_DACMBCMUG2_MUGAIN, 0x1f, 0, mugain_scale), + SOC_SINGLE_TLV("DAC MBC2 Comp Thresh Playback Volume", + R_DACMBCTHR2, FB_DACMBCTHR2_THRESH, 0xff, 0, compth_scale), + SOC_ENUM("DAC MBC2 Comp Ratio Switch", + dac_mbc2_compressor_ratio_enum), + SND_SOC_BYTES("DAC MBC2 Comp Atk Time", R_DACMBCATK2L, 2), + SND_SOC_BYTES("DAC MBC2 Comp Rel Time Const", + R_DACMBCREL2L, 2), + + SOC_SINGLE("MBC3 Phase Invert", R_DACMBCMUG3, FB_DACMBCMUG3_PHASE, + 1, 0), + SOC_SINGLE_TLV("DAC MBC3 Make-Up Gain Playback Volume", + R_DACMBCMUG3, FB_DACMBCMUG3_MUGAIN, 0x1f, 0, mugain_scale), + SOC_SINGLE_TLV("DAC MBC3 Comp Thresh Playback Volume", + R_DACMBCTHR3, FB_DACMBCTHR3_THRESH, 0xff, 0, compth_scale), + SOC_ENUM("DAC MBC3 Comp Ratio Switch", + dac_mbc3_compressor_ratio_enum), + SND_SOC_BYTES("DAC MBC3 Comp Atk Time", R_DACMBCATK3L, 2), + SND_SOC_BYTES("DAC MBC3 Comp Rel Time Const", + R_DACMBCREL3L, 2), +}; + +static int setup_sample_format(struct snd_soc_codec *codec, + snd_pcm_format_t format) +{ + unsigned int width; + int ret; + + switch (format) { + case SNDRV_PCM_FORMAT_S16_LE: + width = RV_AIC1_WL_16; + break; + case SNDRV_PCM_FORMAT_S20_3LE: + width = RV_AIC1_WL_20; + break; + case SNDRV_PCM_FORMAT_S24_LE: + width = RV_AIC1_WL_24; + break; + case SNDRV_PCM_FORMAT_S32_LE: + width = RV_AIC1_WL_32; + break; + default: + ret = -EINVAL; + dev_err(codec->dev, "Unsupported format width (%d)\n", ret); + return ret; + } + ret = snd_soc_update_bits(codec, R_AIC1, RM_AIC1_WL, width); + if (ret < 0) { + dev_err(codec->dev, "Failed to set sample width (%d)\n", ret); + return ret; + } + + return 0; +} + +static int setup_sample_rate(struct snd_soc_codec *codec, unsigned int rate) +{ + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + unsigned int br, bm; + int ret; + + switch (rate) { + case 8000: + br = RV_DACSR_DBR_32; + bm = RV_DACSR_DBM_PT25; + break; + case 16000: + br = RV_DACSR_DBR_32; + bm = RV_DACSR_DBM_PT5; + break; + case 24000: + br = RV_DACSR_DBR_48; + bm = RV_DACSR_DBM_PT5; + break; + case 32000: + br = RV_DACSR_DBR_32; + bm = RV_DACSR_DBM_1; + break; + case 48000: + br = RV_DACSR_DBR_48; + bm = RV_DACSR_DBM_1; + break; + case 96000: + br = RV_DACSR_DBR_48; + bm = RV_DACSR_DBM_2; + break; + case 11025: + br = RV_DACSR_DBR_44_1; + bm = RV_DACSR_DBM_PT25; + break; + case 22050: + br = RV_DACSR_DBR_44_1; + bm = RV_DACSR_DBM_PT5; + break; + case 44100: + br = RV_DACSR_DBR_44_1; + bm = RV_DACSR_DBM_1; + break; + case 88200: + br = RV_DACSR_DBR_44_1; + bm = RV_DACSR_DBM_2; + break; + default: + dev_err(codec->dev, "Unsupported sample rate %d\n", rate); + return -EINVAL; + } + + /* DAC and ADC share bit and frame clock */ + ret = snd_soc_update_bits(codec, R_DACSR, RM_DACSR_DBR, br); + if (ret < 0) { + dev_err(codec->dev, "Failed to update register (%d)\n", ret); + return ret; + } + ret = snd_soc_update_bits(codec, R_DACSR, RM_DACSR_DBM, bm); + if (ret < 0) { + dev_err(codec->dev, "Failed to update register (%d)\n", ret); + return ret; + } + ret = snd_soc_update_bits(codec, R_ADCSR, RM_DACSR_DBR, br); + if (ret < 0) { + dev_err(codec->dev, "Failed to update register (%d)\n", ret); + return ret; + } + ret = snd_soc_update_bits(codec, R_ADCSR, RM_DACSR_DBM, bm); + if (ret < 0) { + dev_err(codec->dev, "Failed to update register (%d)\n", ret); + return ret; + } + + mutex_lock(&tscs42xx->audio_params_lock); + + tscs42xx->samplerate = rate; + + mutex_unlock(&tscs42xx->audio_params_lock); + + return 0; +} + +struct reg_setting { + unsigned int addr; + unsigned int val; + unsigned int mask; +}; + +#define PLL_REG_SETTINGS_COUNT 13 +struct pll_ctl { + int input_freq; + struct reg_setting settings[PLL_REG_SETTINGS_COUNT]; +}; + +#define PLL_CTL(f, rt, rd, r1b_l, r9, ra, rb, \ + rc, r12, r1b_h, re, rf, r10, r11) \ + { \ + .input_freq = f, \ + .settings = { \ + {R_TIMEBASE, rt, 0xFF}, \ + {R_PLLCTLD, rd, 0xFF}, \ + {R_PLLCTL1B, r1b_l, 0x0F}, \ + {R_PLLCTL9, r9, 0xFF}, \ + {R_PLLCTLA, ra, 0xFF}, \ + {R_PLLCTLB, rb, 0xFF}, \ + {R_PLLCTLC, rc, 0xFF}, \ + {R_PLLCTL12, r12, 0xFF}, \ + {R_PLLCTL1B, r1b_h, 0xF0}, \ + {R_PLLCTLE, re, 0xFF}, \ + {R_PLLCTLF, rf, 0xFF}, \ + {R_PLLCTL10, r10, 0xFF}, \ + {R_PLLCTL11, r11, 0xFF}, \ + }, \ + } + +static const struct pll_ctl pll_ctls[] = { + PLL_CTL(1411200, 0x05, + 0x39, 0x04, 0x07, 0x02, 0xC3, 0x04, + 0x1B, 0x10, 0x03, 0x03, 0xD0, 0x02), + PLL_CTL(1536000, 0x05, + 0x1A, 0x04, 0x02, 0x03, 0xE0, 0x01, + 0x1A, 0x10, 0x02, 0x03, 0xB9, 0x01), + PLL_CTL(2822400, 0x0A, + 0x23, 0x04, 0x07, 0x04, 0xC3, 0x04, + 0x22, 0x10, 0x05, 0x03, 0x58, 0x02), + PLL_CTL(3072000, 0x0B, + 0x22, 0x04, 0x07, 0x03, 0x48, 0x03, + 0x1A, 0x10, 0x04, 0x03, 0xB9, 0x01), + PLL_CTL(5644800, 0x15, + 0x23, 0x04, 0x0E, 0x04, 0xC3, 0x04, + 0x1A, 0x10, 0x08, 0x03, 0xE0, 0x01), + PLL_CTL(6144000, 0x17, + 0x1A, 0x04, 0x08, 0x03, 0xE0, 0x01, + 0x1A, 0x10, 0x08, 0x03, 0xB9, 0x01), + PLL_CTL(12000000, 0x2E, + 0x1B, 0x04, 0x19, 0x03, 0x00, 0x03, + 0x2A, 0x10, 0x19, 0x05, 0x98, 0x04), + PLL_CTL(19200000, 0x4A, + 0x13, 0x04, 0x14, 0x03, 0x80, 0x01, + 0x1A, 0x10, 0x19, 0x03, 0xB9, 0x01), + PLL_CTL(22000000, 0x55, + 0x2A, 0x04, 0x37, 0x05, 0x00, 0x06, + 0x22, 0x10, 0x26, 0x03, 0x49, 0x02), + PLL_CTL(22579200, 0x57, + 0x22, 0x04, 0x31, 0x03, 0x20, 0x03, + 0x1A, 0x10, 0x1D, 0x03, 0xB3, 0x01), + PLL_CTL(24000000, 0x5D, + 0x13, 0x04, 0x19, 0x03, 0x80, 0x01, + 0x1B, 0x10, 0x19, 0x05, 0x4C, 0x02), + PLL_CTL(24576000, 0x5F, + 0x13, 0x04, 0x1D, 0x03, 0xB3, 0x01, + 0x22, 0x10, 0x40, 0x03, 0x72, 0x03), + PLL_CTL(27000000, 0x68, + 0x22, 0x04, 0x4B, 0x03, 0x00, 0x04, + 0x2A, 0x10, 0x7D, 0x03, 0x20, 0x06), + PLL_CTL(36000000, 0x8C, + 0x1B, 0x04, 0x4B, 0x03, 0x00, 0x03, + 0x2A, 0x10, 0x7D, 0x03, 0x98, 0x04), + PLL_CTL(25000000, 0x61, + 0x1B, 0x04, 0x37, 0x03, 0x2B, 0x03, + 0x1A, 0x10, 0x2A, 0x03, 0x39, 0x02), + PLL_CTL(26000000, 0x65, + 0x23, 0x04, 0x41, 0x05, 0x00, 0x06, + 0x1A, 0x10, 0x26, 0x03, 0xEF, 0x01), + PLL_CTL(12288000, 0x2F, + 0x1A, 0x04, 0x12, 0x03, 0x1C, 0x02, + 0x22, 0x10, 0x20, 0x03, 0x72, 0x03), + PLL_CTL(40000000, 0x9B, + 0x22, 0x08, 0x7D, 0x03, 0x80, 0x04, + 0x23, 0x10, 0x7D, 0x05, 0xE4, 0x06), + PLL_CTL(512000, 0x01, + 0x22, 0x04, 0x01, 0x03, 0xD0, 0x02, + 0x1B, 0x10, 0x01, 0x04, 0x72, 0x03), + PLL_CTL(705600, 0x02, + 0x22, 0x04, 0x02, 0x03, 0x15, 0x04, + 0x22, 0x10, 0x01, 0x04, 0x80, 0x02), + PLL_CTL(1024000, 0x03, + 0x22, 0x04, 0x02, 0x03, 0xD0, 0x02, + 0x1B, 0x10, 0x02, 0x04, 0x72, 0x03), + PLL_CTL(2048000, 0x07, + 0x22, 0x04, 0x04, 0x03, 0xD0, 0x02, + 0x1B, 0x10, 0x04, 0x04, 0x72, 0x03), + PLL_CTL(2400000, 0x08, + 0x22, 0x04, 0x05, 0x03, 0x00, 0x03, + 0x23, 0x10, 0x05, 0x05, 0x98, 0x04), +}; + +static const struct pll_ctl *get_pll_ctl(int input_freq) +{ + int i; + const struct pll_ctl *pll_ctl = NULL; + + for (i = 0; i < ARRAY_SIZE(pll_ctls); ++i) + if (input_freq == pll_ctls[i].input_freq) { + pll_ctl = &pll_ctls[i]; + break; + } + + return pll_ctl; +} + +static int set_pll_ctl_from_input_freq(struct snd_soc_codec *codec, + const int input_freq) +{ + int ret; + int i; + const struct pll_ctl *pll_ctl; + + pll_ctl = get_pll_ctl(input_freq); + if (!pll_ctl) { + ret = -EINVAL; + dev_err(codec->dev, "No PLL input entry for %d (%d)\n", + input_freq, ret); + return ret; + } + + for (i = 0; i < PLL_REG_SETTINGS_COUNT; ++i) { + ret = snd_soc_update_bits(codec, + pll_ctl->settings[i].addr, + pll_ctl->settings[i].mask, + pll_ctl->settings[i].val); + if (ret < 0) { + dev_err(codec->dev, "Failed to set pll ctl (%d)\n", + ret); + return ret; + } + } + + return 0; +} + +static int tscs42xx_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *codec_dai) +{ + struct snd_soc_codec *codec = codec_dai->codec; + int ret; + + ret = setup_sample_format(codec, params_format(params)); + if (ret < 0) { + dev_err(codec->dev, "Failed to setup sample format (%d)\n", + ret); + return ret; + } + + ret = setup_sample_rate(codec, params_rate(params)); + if (ret < 0) { + dev_err(codec->dev, "Failed to setup sample rate (%d)\n", ret); + return ret; + } + + return 0; +} + +static inline int dac_mute(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_update_bits(codec, R_CNVRTR1, RM_CNVRTR1_DACMU, + RV_CNVRTR1_DACMU_ENABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to mute DAC (%d)\n", + ret); + return ret; + } + + return 0; +} + +static inline int dac_unmute(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_update_bits(codec, R_CNVRTR1, RM_CNVRTR1_DACMU, + RV_CNVRTR1_DACMU_DISABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to unmute DAC (%d)\n", + ret); + return ret; + } + + return 0; +} + +static inline int adc_mute(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_update_bits(codec, R_CNVRTR0, RM_CNVRTR0_ADCMU, + RV_CNVRTR0_ADCMU_ENABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to mute ADC (%d)\n", + ret); + return ret; + } + + return 0; +} + +static inline int adc_unmute(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_update_bits(codec, R_CNVRTR0, RM_CNVRTR0_ADCMU, + RV_CNVRTR0_ADCMU_DISABLE); + if (ret < 0) { + dev_err(codec->dev, "Failed to unmute ADC (%d)\n", + ret); + return ret; + } + + return 0; +} + +static int tscs42xx_mute_stream(struct snd_soc_dai *dai, int mute, int stream) +{ + struct snd_soc_codec *codec = dai->codec; + int ret; + + if (mute) + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + ret = dac_mute(codec); + else + ret = adc_mute(codec); + else + if (stream == SNDRV_PCM_STREAM_PLAYBACK) + ret = dac_unmute(codec); + else + ret = adc_unmute(codec); + + return ret; +} + +static int tscs42xx_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + int ret; + + /* Slave mode not supported since it needs always-on frame clock */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + ret = snd_soc_update_bits(codec, R_AIC1, RM_AIC1_MS, + RV_AIC1_MS_MASTER); + if (ret < 0) { + dev_err(codec->dev, + "Failed to set codec DAI master (%d)\n", ret); + return ret; + } + break; + default: + ret = -EINVAL; + dev_err(codec->dev, "Unsupported format (%d)\n", ret); + return ret; + } + + return 0; +} + +static int tscs42xx_set_dai_bclk_ratio(struct snd_soc_dai *codec_dai, + unsigned int ratio) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); + unsigned int value; + int ret = 0; + + switch (ratio) { + case 32: + value = RV_DACSR_DBCM_32; + break; + case 40: + value = RV_DACSR_DBCM_40; + break; + case 64: + value = RV_DACSR_DBCM_64; + break; + default: + dev_err(codec->dev, "Unsupported bclk ratio (%d)\n", ret); + return -EINVAL; + } + + ret = snd_soc_update_bits(codec, R_DACSR, RM_DACSR_DBCM, value); + if (ret < 0) { + dev_err(codec->dev, "Failed to set DAC BCLK ratio (%d)\n", ret); + return ret; + } + ret = snd_soc_update_bits(codec, R_ADCSR, RM_ADCSR_ABCM, value); + if (ret < 0) { + dev_err(codec->dev, "Failed to set ADC BCLK ratio (%d)\n", ret); + return ret; + } + + mutex_lock(&tscs42xx->audio_params_lock); + + tscs42xx->bclk_ratio = ratio; + + mutex_unlock(&tscs42xx->audio_params_lock); + + return 0; +} + +static int tscs42xx_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + int ret; + + switch (clk_id) { + case TSCS42XX_PLL_SRC_XTAL: + case TSCS42XX_PLL_SRC_MCLK1: + ret = snd_soc_write(codec, R_PLLREFSEL, + RV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1 | + RV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1); + if (ret < 0) { + dev_err(codec->dev, + "Failed to set pll reference input (%d)\n", + ret); + return ret; + } + break; + case TSCS42XX_PLL_SRC_MCLK2: + ret = snd_soc_write(codec, R_PLLREFSEL, + RV_PLLREFSEL_PLL1_REF_SEL_MCLK2 | + RV_PLLREFSEL_PLL2_REF_SEL_MCLK2); + if (ret < 0) { + dev_err(codec->dev, + "Failed to set PLL reference (%d)\n", ret); + return ret; + } + break; + default: + dev_err(codec->dev, "pll src is unsupported\n"); + return -EINVAL; + } + + ret = set_pll_ctl_from_input_freq(codec, freq); + if (ret < 0) { + dev_err(codec->dev, + "Failed to setup PLL input freq (%d)\n", ret); + return ret; + } + + return 0; +} + +static const struct snd_soc_dai_ops tscs42xx_dai_ops = { + .hw_params = tscs42xx_hw_params, + .mute_stream = tscs42xx_mute_stream, + .set_fmt = tscs42xx_set_dai_fmt, + .set_bclk_ratio = tscs42xx_set_dai_bclk_ratio, + .set_sysclk = tscs42xx_set_dai_sysclk, +}; + +static int part_is_valid(struct tscs42xx *tscs42xx) +{ + int val; + int ret; + unsigned int reg; + + ret = regmap_read(tscs42xx->regmap, R_DEVIDH, ®); + if (ret < 0) + return ret; + + val = reg << 8; + ret = regmap_read(tscs42xx->regmap, R_DEVIDL, ®); + if (ret < 0) + return ret; + + val |= reg; + + switch (val) { + case 0x4A74: + case 0x4A73: + return true; + default: + return false; + }; +} + +static struct snd_soc_codec_driver soc_codec_dev_tscs42xx = { + .component_driver = { + .dapm_widgets = tscs42xx_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(tscs42xx_dapm_widgets), + .dapm_routes = tscs42xx_intercon, + .num_dapm_routes = ARRAY_SIZE(tscs42xx_intercon), + .controls = tscs42xx_snd_controls, + .num_controls = ARRAY_SIZE(tscs42xx_snd_controls), + }, +}; + +static inline void init_coeff_ram_cache(struct tscs42xx *tscs42xx) +{ + const u8 norm_addrs[] = { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x19, 0x1f, + 0x20, 0x25, 0x2a, 0x2f, 0x34, 0x39, 0x3f, 0x40, 0x45, 0x4a, + 0x4f, 0x54, 0x59, 0x5f, 0x60, 0x65, 0x6a, 0x6f, 0x74, 0x79, + 0x7f, 0x80, 0x85, 0x8c, 0x91, 0x96, 0x97, 0x9c, 0xa3, 0xa8, + 0xad, 0xaf, 0xb0, 0xb5, 0xba, 0xbf, 0xc4, 0xc9, }; + u8 *coeff_ram = tscs42xx->coeff_ram; + int i; + + for (i = 0; i < ARRAY_SIZE(norm_addrs); i++) + coeff_ram[((norm_addrs[i] + 1) * COEFF_SIZE) - 1] = 0x40; +} + +#define TSCS42XX_RATES SNDRV_PCM_RATE_8000_96000 + +#define TSCS42XX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE \ + | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S32_LE) + +static struct snd_soc_dai_driver tscs42xx_dai = { + .name = "tscs42xx-HiFi", + .playback = { + .stream_name = "HiFi Playback", + .channels_min = 2, + .channels_max = 2, + .rates = TSCS42XX_RATES, + .formats = TSCS42XX_FORMATS,}, + .capture = { + .stream_name = "HiFi Capture", + .channels_min = 2, + .channels_max = 2, + .rates = TSCS42XX_RATES, + .formats = TSCS42XX_FORMATS,}, + .ops = &tscs42xx_dai_ops, + .symmetric_rates = 1, + .symmetric_channels = 1, + .symmetric_samplebits = 1, +}; + +static const struct reg_sequence tscs42xx_patch[] = { + { R_AIC2, RV_AIC2_BLRCM_DAC_BCLK_LRCLK_SHARED }, +}; + +static int tscs42xx_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct tscs42xx *tscs42xx; + int ret = 0; + + tscs42xx = devm_kzalloc(&i2c->dev, sizeof(*tscs42xx), GFP_KERNEL); + if (!tscs42xx) { + ret = -ENOMEM; + dev_err(&i2c->dev, + "Failed to allocate memory for data (%d)\n", ret); + return ret; + } + i2c_set_clientdata(i2c, tscs42xx); + tscs42xx->dev = &i2c->dev; + + tscs42xx->regmap = devm_regmap_init_i2c(i2c, &tscs42xx_regmap); + if (IS_ERR(tscs42xx->regmap)) { + ret = PTR_ERR(tscs42xx->regmap); + dev_err(tscs42xx->dev, "Failed to allocate regmap (%d)\n", ret); + return ret; + } + + init_coeff_ram_cache(tscs42xx); + + ret = part_is_valid(tscs42xx); + if (ret <= 0) { + dev_err(tscs42xx->dev, "No valid part (%d)\n", ret); + ret = -ENODEV; + return ret; + } + + ret = regmap_write(tscs42xx->regmap, R_RESET, RV_RESET_ENABLE); + if (ret < 0) { + dev_err(tscs42xx->dev, "Failed to reset device (%d)\n", ret); + return ret; + } + + ret = regmap_register_patch(tscs42xx->regmap, tscs42xx_patch, + ARRAY_SIZE(tscs42xx_patch)); + if (ret < 0) { + dev_err(tscs42xx->dev, "Failed to apply patch (%d)\n", ret); + return ret; + } + + mutex_init(&tscs42xx->audio_params_lock); + mutex_init(&tscs42xx->coeff_ram_lock); + mutex_init(&tscs42xx->pll_lock); + + ret = snd_soc_register_codec(tscs42xx->dev, &soc_codec_dev_tscs42xx, + &tscs42xx_dai, 1); + if (ret) { + dev_err(tscs42xx->dev, "Failed to register codec (%d)\n", ret); + return ret; + } + + return 0; +} + +static int tscs42xx_i2c_remove(struct i2c_client *client) +{ + snd_soc_unregister_codec(&client->dev); + + return 0; +} + +static const struct i2c_device_id tscs42xx_i2c_id[] = { + { "tscs42A1", 0 }, + { "tscs42A2", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tscs42xx_i2c_id); + +static const struct of_device_id tscs42xx_of_match[] = { + { .compatible = "tempo,tscs42A1", }, + { .compatible = "tempo,tscs42A2", }, + { } +}; +MODULE_DEVICE_TABLE(of, tscs42xx_of_match); + +static struct i2c_driver tscs42xx_i2c_driver = { + .driver = { + .name = "tscs42xx", + .owner = THIS_MODULE, + .of_match_table = tscs42xx_of_match, + }, + .probe = tscs42xx_i2c_probe, + .remove = tscs42xx_i2c_remove, + .id_table = tscs42xx_i2c_id, +}; + +module_i2c_driver(tscs42xx_i2c_driver); + +MODULE_AUTHOR("Tempo Semiconductor + +#ifndef __WOOKIE_H__ +#define __WOOKIE_H__ + +enum { + TSCS42XX_PLL_SRC_NONE, + TSCS42XX_PLL_SRC_XTAL, + TSCS42XX_PLL_SRC_MCLK1, + TSCS42XX_PLL_SRC_MCLK2, +}; + +#define R_HPVOLL 0x0 +#define R_HPVOLR 0x1 +#define R_SPKVOLL 0x2 +#define R_SPKVOLR 0x3 +#define R_DACVOLL 0x4 +#define R_DACVOLR 0x5 +#define R_ADCVOLL 0x6 +#define R_ADCVOLR 0x7 +#define R_INVOLL 0x8 +#define R_INVOLR 0x9 +#define R_INMODE 0x0B +#define R_INSELL 0x0C +#define R_INSELR 0x0D +#define R_AIC1 0x13 +#define R_AIC2 0x14 +#define R_CNVRTR0 0x16 +#define R_ADCSR 0x17 +#define R_CNVRTR1 0x18 +#define R_DACSR 0x19 +#define R_PWRM1 0x1A +#define R_PWRM2 0x1B +#define R_CONFIG0 0x1F +#define R_CONFIG1 0x20 +#define R_DMICCTL 0x24 +#define R_CLECTL 0x25 +#define R_MUGAIN 0x26 +#define R_COMPTH 0x27 +#define R_CMPRAT 0x28 +#define R_CATKTCL 0x29 +#define R_CATKTCH 0x2A +#define R_CRELTCL 0x2B +#define R_CRELTCH 0x2C +#define R_LIMTH 0x2D +#define R_LIMTGT 0x2E +#define R_LATKTCL 0x2F +#define R_LATKTCH 0x30 +#define R_LRELTCL 0x31 +#define R_LRELTCH 0x32 +#define R_EXPTH 0x33 +#define R_EXPRAT 0x34 +#define R_XATKTCL 0x35 +#define R_XATKTCH 0x36 +#define R_XRELTCL 0x37 +#define R_XRELTCH 0x38 +#define R_FXCTL 0x39 +#define R_DACCRWRL 0x3A +#define R_DACCRWRM 0x3B +#define R_DACCRWRH 0x3C +#define R_DACCRRDL 0x3D +#define R_DACCRRDM 0x3E +#define R_DACCRRDH 0x3F +#define R_DACCRADDR 0x40 +#define R_DCOFSEL 0x41 +#define R_PLLCTL9 0x4E +#define R_PLLCTLA 0x4F +#define R_PLLCTLB 0x50 +#define R_PLLCTLC 0x51 +#define R_PLLCTLD 0x52 +#define R_PLLCTLE 0x53 +#define R_PLLCTLF 0x54 +#define R_PLLCTL10 0x55 +#define R_PLLCTL11 0x56 +#define R_PLLCTL12 0x57 +#define R_PLLCTL1B 0x60 +#define R_PLLCTL1C 0x61 +#define R_TIMEBASE 0x77 +#define R_DEVIDL 0x7D +#define R_DEVIDH 0x7E +#define R_RESET 0x80 +#define R_DACCRSTAT 0x8A +#define R_PLLCTL0 0x8E +#define R_PLLREFSEL 0x8F +#define R_DACMBCEN 0xC7 +#define R_DACMBCCTL 0xC8 +#define R_DACMBCMUG1 0xC9 +#define R_DACMBCTHR1 0xCA +#define R_DACMBCRAT1 0xCB +#define R_DACMBCATK1L 0xCC +#define R_DACMBCATK1H 0xCD +#define R_DACMBCREL1L 0xCE +#define R_DACMBCREL1H 0xCF +#define R_DACMBCMUG2 0xD0 +#define R_DACMBCTHR2 0xD1 +#define R_DACMBCRAT2 0xD2 +#define R_DACMBCATK2L 0xD3 +#define R_DACMBCATK2H 0xD4 +#define R_DACMBCREL2L 0xD5 +#define R_DACMBCREL2H 0xD6 +#define R_DACMBCMUG3 0xD7 +#define R_DACMBCTHR3 0xD8 +#define R_DACMBCRAT3 0xD9 +#define R_DACMBCATK3L 0xDA +#define R_DACMBCATK3H 0xDB +#define R_DACMBCREL3L 0xDC +#define R_DACMBCREL3H 0xDD + +/* Helpers */ +#define RM(m, b) ((m)<<(b)) +#define RV(v, b) ((v)<<(b)) + +/**************************** + * R_HPVOLL (0x0) * + ****************************/ + +/* Field Offsets */ +#define FB_HPVOLL 0 + +/* Field Masks */ +#define FM_HPVOLL 0X7F + +/* Field Values */ +#define FV_HPVOLL_P6DB 0x7F +#define FV_HPVOLL_N88PT5DB 0x1 +#define FV_HPVOLL_MUTE 0x0 + +/* Register Masks */ +#define RM_HPVOLL RM(FM_HPVOLL, FB_HPVOLL) + +/* Register Values */ +#define RV_HPVOLL_P6DB RV(FV_HPVOLL_P6DB, FB_HPVOLL) +#define RV_HPVOLL_N88PT5DB RV(FV_HPVOLL_N88PT5DB, FB_HPVOLL) +#define RV_HPVOLL_MUTE RV(FV_HPVOLL_MUTE, FB_HPVOLL) + +/**************************** + * R_HPVOLR (0x1) * + ****************************/ + +/* Field Offsets */ +#define FB_HPVOLR 0 + +/* Field Masks */ +#define FM_HPVOLR 0X7F + +/* Field Values */ +#define FV_HPVOLR_P6DB 0x7F +#define FV_HPVOLR_N88PT5DB 0x1 +#define FV_HPVOLR_MUTE 0x0 + +/* Register Masks */ +#define RM_HPVOLR RM(FM_HPVOLR, FB_HPVOLR) + +/* Register Values */ +#define RV_HPVOLR_P6DB RV(FV_HPVOLR_P6DB, FB_HPVOLR) +#define RV_HPVOLR_N88PT5DB RV(FV_HPVOLR_N88PT5DB, FB_HPVOLR) +#define RV_HPVOLR_MUTE RV(FV_HPVOLR_MUTE, FB_HPVOLR) + +/***************************** + * R_SPKVOLL (0x2) * + *****************************/ + +/* Field Offsets */ +#define FB_SPKVOLL 0 + +/* Field Masks */ +#define FM_SPKVOLL 0X7F + +/* Field Values */ +#define FV_SPKVOLL_P12DB 0x7F +#define FV_SPKVOLL_N77PT25DB 0x8 +#define FV_SPKVOLL_MUTE 0x0 + +/* Register Masks */ +#define RM_SPKVOLL RM(FM_SPKVOLL, FB_SPKVOLL) + +/* Register Values */ +#define RV_SPKVOLL_P12DB RV(FV_SPKVOLL_P12DB, FB_SPKVOLL) +#define RV_SPKVOLL_N77PT25DB \ + RV(FV_SPKVOLL_N77PT25DB, FB_SPKVOLL) + +#define RV_SPKVOLL_MUTE RV(FV_SPKVOLL_MUTE, FB_SPKVOLL) + +/***************************** + * R_SPKVOLR (0x3) * + *****************************/ + +/* Field Offsets */ +#define FB_SPKVOLR 0 + +/* Field Masks */ +#define FM_SPKVOLR 0X7F + +/* Field Values */ +#define FV_SPKVOLR_P12DB 0x7F +#define FV_SPKVOLR_N77PT25DB 0x8 +#define FV_SPKVOLR_MUTE 0x0 + +/* Register Masks */ +#define RM_SPKVOLR RM(FM_SPKVOLR, FB_SPKVOLR) + +/* Register Values */ +#define RV_SPKVOLR_P12DB RV(FV_SPKVOLR_P12DB, FB_SPKVOLR) +#define RV_SPKVOLR_N77PT25DB \ + RV(FV_SPKVOLR_N77PT25DB, FB_SPKVOLR) + +#define RV_SPKVOLR_MUTE RV(FV_SPKVOLR_MUTE, FB_SPKVOLR) + +/***************************** + * R_DACVOLL (0x4) * + *****************************/ + +/* Field Offsets */ +#define FB_DACVOLL 0 + +/* Field Masks */ +#define FM_DACVOLL 0XFF + +/* Field Values */ +#define FV_DACVOLL_0DB 0xFF +#define FV_DACVOLL_N95PT625DB 0x1 +#define FV_DACVOLL_MUTE 0x0 + +/* Register Masks */ +#define RM_DACVOLL RM(FM_DACVOLL, FB_DACVOLL) + +/* Register Values */ +#define RV_DACVOLL_0DB RV(FV_DACVOLL_0DB, FB_DACVOLL) +#define RV_DACVOLL_N95PT625DB \ + RV(FV_DACVOLL_N95PT625DB, FB_DACVOLL) + +#define RV_DACVOLL_MUTE RV(FV_DACVOLL_MUTE, FB_DACVOLL) + +/***************************** + * R_DACVOLR (0x5) * + *****************************/ + +/* Field Offsets */ +#define FB_DACVOLR 0 + +/* Field Masks */ +#define FM_DACVOLR 0XFF + +/* Field Values */ +#define FV_DACVOLR_0DB 0xFF +#define FV_DACVOLR_N95PT625DB 0x1 +#define FV_DACVOLR_MUTE 0x0 + +/* Register Masks */ +#define RM_DACVOLR RM(FM_DACVOLR, FB_DACVOLR) + +/* Register Values */ +#define RV_DACVOLR_0DB RV(FV_DACVOLR_0DB, FB_DACVOLR) +#define RV_DACVOLR_N95PT625DB \ + RV(FV_DACVOLR_N95PT625DB, FB_DACVOLR) + +#define RV_DACVOLR_MUTE RV(FV_DACVOLR_MUTE, FB_DACVOLR) + +/***************************** + * R_ADCVOLL (0x6) * + *****************************/ + +/* Field Offsets */ +#define FB_ADCVOLL 0 + +/* Field Masks */ +#define FM_ADCVOLL 0XFF + +/* Field Values */ +#define FV_ADCVOLL_P24DB 0xFF +#define FV_ADCVOLL_N71PT25DB 0x1 +#define FV_ADCVOLL_MUTE 0x0 + +/* Register Masks */ +#define RM_ADCVOLL RM(FM_ADCVOLL, FB_ADCVOLL) + +/* Register Values */ +#define RV_ADCVOLL_P24DB RV(FV_ADCVOLL_P24DB, FB_ADCVOLL) +#define RV_ADCVOLL_N71PT25DB \ + RV(FV_ADCVOLL_N71PT25DB, FB_ADCVOLL) + +#define RV_ADCVOLL_MUTE RV(FV_ADCVOLL_MUTE, FB_ADCVOLL) + +/***************************** + * R_ADCVOLR (0x7) * + *****************************/ + +/* Field Offsets */ +#define FB_ADCVOLR 0 + +/* Field Masks */ +#define FM_ADCVOLR 0XFF + +/* Field Values */ +#define FV_ADCVOLR_P24DB 0xFF +#define FV_ADCVOLR_N71PT25DB 0x1 +#define FV_ADCVOLR_MUTE 0x0 + +/* Register Masks */ +#define RM_ADCVOLR RM(FM_ADCVOLR, FB_ADCVOLR) + +/* Register Values */ +#define RV_ADCVOLR_P24DB RV(FV_ADCVOLR_P24DB, FB_ADCVOLR) +#define RV_ADCVOLR_N71PT25DB \ + RV(FV_ADCVOLR_N71PT25DB, FB_ADCVOLR) + +#define RV_ADCVOLR_MUTE RV(FV_ADCVOLR_MUTE, FB_ADCVOLR) + +/**************************** + * R_INVOLL (0x8) * + ****************************/ + +/* Field Offsets */ +#define FB_INVOLL_INMUTEL 7 +#define FB_INVOLL_IZCL 6 +#define FB_INVOLL 0 + +/* Field Masks */ +#define FM_INVOLL_INMUTEL 0X1 +#define FM_INVOLL_IZCL 0X1 +#define FM_INVOLL 0X3F + +/* Field Values */ +#define FV_INVOLL_INMUTEL_ENABLE 0x1 +#define FV_INVOLL_INMUTEL_DISABLE 0x0 +#define FV_INVOLL_IZCL_ENABLE 0x1 +#define FV_INVOLL_IZCL_DISABLE 0x0 +#define FV_INVOLL_P30DB 0x3F +#define FV_INVOLL_N17PT25DB 0x0 + +/* Register Masks */ +#define RM_INVOLL_INMUTEL \ + RM(FM_INVOLL_INMUTEL, FB_INVOLL_INMUTEL) + +#define RM_INVOLL_IZCL RM(FM_INVOLL_IZCL, FB_INVOLL_IZCL) +#define RM_INVOLL RM(FM_INVOLL, FB_INVOLL) + +/* Register Values */ +#define RV_INVOLL_INMUTEL_ENABLE \ + RV(FV_INVOLL_INMUTEL_ENABLE, FB_INVOLL_INMUTEL) + +#define RV_INVOLL_INMUTEL_DISABLE \ + RV(FV_INVOLL_INMUTEL_DISABLE, FB_INVOLL_INMUTEL) + +#define RV_INVOLL_IZCL_ENABLE \ + RV(FV_INVOLL_IZCL_ENABLE, FB_INVOLL_IZCL) + +#define RV_INVOLL_IZCL_DISABLE \ + RV(FV_INVOLL_IZCL_DISABLE, FB_INVOLL_IZCL) + +#define RV_INVOLL_P30DB RV(FV_INVOLL_P30DB, FB_INVOLL) +#define RV_INVOLL_N17PT25DB RV(FV_INVOLL_N17PT25DB, FB_INVOLL) + +/**************************** + * R_INVOLR (0x9) * + ****************************/ + +/* Field Offsets */ +#define FB_INVOLR_INMUTER 7 +#define FB_INVOLR_IZCR 6 +#define FB_INVOLR 0 + +/* Field Masks */ +#define FM_INVOLR_INMUTER 0X1 +#define FM_INVOLR_IZCR 0X1 +#define FM_INVOLR 0X3F + +/* Field Values */ +#define FV_INVOLR_INMUTER_ENABLE 0x1 +#define FV_INVOLR_INMUTER_DISABLE 0x0 +#define FV_INVOLR_IZCR_ENABLE 0x1 +#define FV_INVOLR_IZCR_DISABLE 0x0 +#define FV_INVOLR_P30DB 0x3F +#define FV_INVOLR_N17PT25DB 0x0 + +/* Register Masks */ +#define RM_INVOLR_INMUTER \ + RM(FM_INVOLR_INMUTER, FB_INVOLR_INMUTER) + +#define RM_INVOLR_IZCR RM(FM_INVOLR_IZCR, FB_INVOLR_IZCR) +#define RM_INVOLR RM(FM_INVOLR, FB_INVOLR) + +/* Register Values */ +#define RV_INVOLR_INMUTER_ENABLE \ + RV(FV_INVOLR_INMUTER_ENABLE, FB_INVOLR_INMUTER) + +#define RV_INVOLR_INMUTER_DISABLE \ + RV(FV_INVOLR_INMUTER_DISABLE, FB_INVOLR_INMUTER) + +#define RV_INVOLR_IZCR_ENABLE \ + RV(FV_INVOLR_IZCR_ENABLE, FB_INVOLR_IZCR) + +#define RV_INVOLR_IZCR_DISABLE \ + RV(FV_INVOLR_IZCR_DISABLE, FB_INVOLR_IZCR) + +#define RV_INVOLR_P30DB RV(FV_INVOLR_P30DB, FB_INVOLR) +#define RV_INVOLR_N17PT25DB RV(FV_INVOLR_N17PT25DB, FB_INVOLR) + +/***************************** + * R_INMODE (0x0B) * + *****************************/ + +/* Field Offsets */ +#define FB_INMODE_DS 0 + +/* Field Masks */ +#define FM_INMODE_DS 0X1 + +/* Field Values */ +#define FV_INMODE_DS_LRIN1 0x0 +#define FV_INMODE_DS_LRIN2 0x1 + +/* Register Masks */ +#define RM_INMODE_DS RM(FM_INMODE_DS, FB_INMODE_DS) + +/* Register Values */ +#define RV_INMODE_DS_LRIN1 \ + RV(FV_INMODE_DS_LRIN1, FB_INMODE_DS) + +#define RV_INMODE_DS_LRIN2 \ + RV(FV_INMODE_DS_LRIN2, FB_INMODE_DS) + + +/***************************** + * R_INSELL (0x0C) * + *****************************/ + +/* Field Offsets */ +#define FB_INSELL 6 +#define FB_INSELL_MICBSTL 4 + +/* Field Masks */ +#define FM_INSELL 0X3 +#define FM_INSELL_MICBSTL 0X3 + +/* Field Values */ +#define FV_INSELL_IN1 0x0 +#define FV_INSELL_IN2 0x1 +#define FV_INSELL_IN3 0x2 +#define FV_INSELL_D2S 0x3 +#define FV_INSELL_MICBSTL_OFF 0x0 +#define FV_INSELL_MICBSTL_10DB 0x1 +#define FV_INSELL_MICBSTL_20DB 0x2 +#define FV_INSELL_MICBSTL_30DB 0x3 + +/* Register Masks */ +#define RM_INSELL RM(FM_INSELL, FB_INSELL) +#define RM_INSELL_MICBSTL \ + RM(FM_INSELL_MICBSTL, FB_INSELL_MICBSTL) + + +/* Register Values */ +#define RV_INSELL_IN1 RV(FV_INSELL_IN1, FB_INSELL) +#define RV_INSELL_IN2 RV(FV_INSELL_IN2, FB_INSELL) +#define RV_INSELL_IN3 RV(FV_INSELL_IN3, FB_INSELL) +#define RV_INSELL_D2S RV(FV_INSELL_D2S, FB_INSELL) +#define RV_INSELL_MICBSTL_OFF \ + RV(FV_INSELL_MICBSTL_OFF, FB_INSELL_MICBSTL) + +#define RV_INSELL_MICBSTL_10DB \ + RV(FV_INSELL_MICBSTL_10DB, FB_INSELL_MICBSTL) + +#define RV_INSELL_MICBSTL_20DB \ + RV(FV_INSELL_MICBSTL_20DB, FB_INSELL_MICBSTL) + +#define RV_INSELL_MICBSTL_30DB \ + RV(FV_INSELL_MICBSTL_30DB, FB_INSELL_MICBSTL) + + +/***************************** + * R_INSELR (0x0D) * + *****************************/ + +/* Field Offsets */ +#define FB_INSELR 6 +#define FB_INSELR_MICBSTR 4 + +/* Field Masks */ +#define FM_INSELR 0X3 +#define FM_INSELR_MICBSTR 0X3 + +/* Field Values */ +#define FV_INSELR_IN1 0x0 +#define FV_INSELR_IN2 0x1 +#define FV_INSELR_IN3 0x2 +#define FV_INSELR_D2S 0x3 +#define FV_INSELR_MICBSTR_OFF 0x0 +#define FV_INSELR_MICBSTR_10DB 0x1 +#define FV_INSELR_MICBSTR_20DB 0x2 +#define FV_INSELR_MICBSTR_30DB 0x3 + +/* Register Masks */ +#define RM_INSELR RM(FM_INSELR, FB_INSELR) +#define RM_INSELR_MICBSTR \ + RM(FM_INSELR_MICBSTR, FB_INSELR_MICBSTR) + + +/* Register Values */ +#define RV_INSELR_IN1 RV(FV_INSELR_IN1, FB_INSELR) +#define RV_INSELR_IN2 RV(FV_INSELR_IN2, FB_INSELR) +#define RV_INSELR_IN3 RV(FV_INSELR_IN3, FB_INSELR) +#define RV_INSELR_D2S RV(FV_INSELR_D2S, FB_INSELR) +#define RV_INSELR_MICBSTR_OFF \ + RV(FV_INSELR_MICBSTR_OFF, FB_INSELR_MICBSTR) + +#define RV_INSELR_MICBSTR_10DB \ + RV(FV_INSELR_MICBSTR_10DB, FB_INSELR_MICBSTR) + +#define RV_INSELR_MICBSTR_20DB \ + RV(FV_INSELR_MICBSTR_20DB, FB_INSELR_MICBSTR) + +#define RV_INSELR_MICBSTR_30DB \ + RV(FV_INSELR_MICBSTR_30DB, FB_INSELR_MICBSTR) + + +/*************************** + * R_AIC1 (0x13) * + ***************************/ + +/* Field Offsets */ +#define FB_AIC1_BCLKINV 6 +#define FB_AIC1_MS 5 +#define FB_AIC1_LRP 4 +#define FB_AIC1_WL 2 +#define FB_AIC1_FORMAT 0 + +/* Field Masks */ +#define FM_AIC1_BCLKINV 0X1 +#define FM_AIC1_MS 0X1 +#define FM_AIC1_LRP 0X1 +#define FM_AIC1_WL 0X3 +#define FM_AIC1_FORMAT 0X3 + +/* Field Values */ +#define FV_AIC1_BCLKINV_ENABLE 0x1 +#define FV_AIC1_BCLKINV_DISABLE 0x0 +#define FV_AIC1_MS_MASTER 0x1 +#define FV_AIC1_MS_SLAVE 0x0 +#define FV_AIC1_LRP_INVERT 0x1 +#define FV_AIC1_LRP_NORMAL 0x0 +#define FV_AIC1_WL_16 0x0 +#define FV_AIC1_WL_20 0x1 +#define FV_AIC1_WL_24 0x2 +#define FV_AIC1_WL_32 0x3 +#define FV_AIC1_FORMAT_RIGHT 0x0 +#define FV_AIC1_FORMAT_LEFT 0x1 +#define FV_AIC1_FORMAT_I2S 0x2 + +/* Register Masks */ +#define RM_AIC1_BCLKINV \ + RM(FM_AIC1_BCLKINV, FB_AIC1_BCLKINV) + +#define RM_AIC1_MS RM(FM_AIC1_MS, FB_AIC1_MS) +#define RM_AIC1_LRP RM(FM_AIC1_LRP, FB_AIC1_LRP) +#define RM_AIC1_WL RM(FM_AIC1_WL, FB_AIC1_WL) +#define RM_AIC1_FORMAT RM(FM_AIC1_FORMAT, FB_AIC1_FORMAT) + +/* Register Values */ +#define RV_AIC1_BCLKINV_ENABLE \ + RV(FV_AIC1_BCLKINV_ENABLE, FB_AIC1_BCLKINV) + +#define RV_AIC1_BCLKINV_DISABLE \ + RV(FV_AIC1_BCLKINV_DISABLE, FB_AIC1_BCLKINV) + +#define RV_AIC1_MS_MASTER RV(FV_AIC1_MS_MASTER, FB_AIC1_MS) +#define RV_AIC1_MS_SLAVE RV(FV_AIC1_MS_SLAVE, FB_AIC1_MS) +#define RV_AIC1_LRP_INVERT \ + RV(FV_AIC1_LRP_INVERT, FB_AIC1_LRP) + +#define RV_AIC1_LRP_NORMAL \ + RV(FV_AIC1_LRP_NORMAL, FB_AIC1_LRP) + +#define RV_AIC1_WL_16 RV(FV_AIC1_WL_16, FB_AIC1_WL) +#define RV_AIC1_WL_20 RV(FV_AIC1_WL_20, FB_AIC1_WL) +#define RV_AIC1_WL_24 RV(FV_AIC1_WL_24, FB_AIC1_WL) +#define RV_AIC1_WL_32 RV(FV_AIC1_WL_32, FB_AIC1_WL) +#define RV_AIC1_FORMAT_RIGHT \ + RV(FV_AIC1_FORMAT_RIGHT, FB_AIC1_FORMAT) + +#define RV_AIC1_FORMAT_LEFT \ + RV(FV_AIC1_FORMAT_LEFT, FB_AIC1_FORMAT) + +#define RV_AIC1_FORMAT_I2S \ + RV(FV_AIC1_FORMAT_I2S, FB_AIC1_FORMAT) + + +/*************************** + * R_AIC2 (0x14) * + ***************************/ + +/* Field Offsets */ +#define FB_AIC2_DACDSEL 6 +#define FB_AIC2_ADCDSEL 4 +#define FB_AIC2_TRI 3 +#define FB_AIC2_BLRCM 0 + +/* Field Masks */ +#define FM_AIC2_DACDSEL 0X3 +#define FM_AIC2_ADCDSEL 0X3 +#define FM_AIC2_TRI 0X1 +#define FM_AIC2_BLRCM 0X7 + +/* Field Values */ +#define FV_AIC2_BLRCM_DAC_BCLK_LRCLK_SHARED 0x3 + +/* Register Masks */ +#define RM_AIC2_DACDSEL \ + RM(FM_AIC2_DACDSEL, FB_AIC2_DACDSEL) + +#define RM_AIC2_ADCDSEL \ + RM(FM_AIC2_ADCDSEL, FB_AIC2_ADCDSEL) + +#define RM_AIC2_TRI RM(FM_AIC2_TRI, FB_AIC2_TRI) +#define RM_AIC2_BLRCM RM(FM_AIC2_BLRCM, FB_AIC2_BLRCM) + +/* Register Values */ +#define RV_AIC2_BLRCM_DAC_BCLK_LRCLK_SHARED \ + RV(FV_AIC2_BLRCM_DAC_BCLK_LRCLK_SHARED, FB_AIC2_BLRCM) + + +/****************************** + * R_CNVRTR0 (0x16) * + ******************************/ + +/* Field Offsets */ +#define FB_CNVRTR0_ADCPOLR 7 +#define FB_CNVRTR0_ADCPOLL 6 +#define FB_CNVRTR0_AMONOMIX 4 +#define FB_CNVRTR0_ADCMU 3 +#define FB_CNVRTR0_HPOR 2 +#define FB_CNVRTR0_ADCHPDR 1 +#define FB_CNVRTR0_ADCHPDL 0 + +/* Field Masks */ +#define FM_CNVRTR0_ADCPOLR 0X1 +#define FM_CNVRTR0_ADCPOLL 0X1 +#define FM_CNVRTR0_AMONOMIX 0X3 +#define FM_CNVRTR0_ADCMU 0X1 +#define FM_CNVRTR0_HPOR 0X1 +#define FM_CNVRTR0_ADCHPDR 0X1 +#define FM_CNVRTR0_ADCHPDL 0X1 + +/* Field Values */ +#define FV_CNVRTR0_ADCPOLR_INVERT 0x1 +#define FV_CNVRTR0_ADCPOLR_NORMAL 0x0 +#define FV_CNVRTR0_ADCPOLL_INVERT 0x1 +#define FV_CNVRTR0_ADCPOLL_NORMAL 0x0 +#define FV_CNVRTR0_ADCMU_ENABLE 0x1 +#define FV_CNVRTR0_ADCMU_DISABLE 0x0 +#define FV_CNVRTR0_ADCHPDR_ENABLE 0x1 +#define FV_CNVRTR0_ADCHPDR_DISABLE 0x0 +#define FV_CNVRTR0_ADCHPDL_ENABLE 0x1 +#define FV_CNVRTR0_ADCHPDL_DISABLE 0x0 + +/* Register Masks */ +#define RM_CNVRTR0_ADCPOLR \ + RM(FM_CNVRTR0_ADCPOLR, FB_CNVRTR0_ADCPOLR) + +#define RM_CNVRTR0_ADCPOLL \ + RM(FM_CNVRTR0_ADCPOLL, FB_CNVRTR0_ADCPOLL) + +#define RM_CNVRTR0_AMONOMIX \ + RM(FM_CNVRTR0_AMONOMIX, FB_CNVRTR0_AMONOMIX) + +#define RM_CNVRTR0_ADCMU \ + RM(FM_CNVRTR0_ADCMU, FB_CNVRTR0_ADCMU) + +#define RM_CNVRTR0_HPOR \ + RM(FM_CNVRTR0_HPOR, FB_CNVRTR0_HPOR) + +#define RM_CNVRTR0_ADCHPDR \ + RM(FM_CNVRTR0_ADCHPDR, FB_CNVRTR0_ADCHPDR) + +#define RM_CNVRTR0_ADCHPDL \ + RM(FM_CNVRTR0_ADCHPDL, FB_CNVRTR0_ADCHPDL) + + +/* Register Values */ +#define RV_CNVRTR0_ADCPOLR_INVERT \ + RV(FV_CNVRTR0_ADCPOLR_INVERT, FB_CNVRTR0_ADCPOLR) + +#define RV_CNVRTR0_ADCPOLR_NORMAL \ + RV(FV_CNVRTR0_ADCPOLR_NORMAL, FB_CNVRTR0_ADCPOLR) + +#define RV_CNVRTR0_ADCPOLL_INVERT \ + RV(FV_CNVRTR0_ADCPOLL_INVERT, FB_CNVRTR0_ADCPOLL) + +#define RV_CNVRTR0_ADCPOLL_NORMAL \ + RV(FV_CNVRTR0_ADCPOLL_NORMAL, FB_CNVRTR0_ADCPOLL) + +#define RV_CNVRTR0_ADCMU_ENABLE \ + RV(FV_CNVRTR0_ADCMU_ENABLE, FB_CNVRTR0_ADCMU) + +#define RV_CNVRTR0_ADCMU_DISABLE \ + RV(FV_CNVRTR0_ADCMU_DISABLE, FB_CNVRTR0_ADCMU) + +#define RV_CNVRTR0_ADCHPDR_ENABLE \ + RV(FV_CNVRTR0_ADCHPDR_ENABLE, FB_CNVRTR0_ADCHPDR) + +#define RV_CNVRTR0_ADCHPDR_DISABLE \ + RV(FV_CNVRTR0_ADCHPDR_DISABLE, FB_CNVRTR0_ADCHPDR) + +#define RV_CNVRTR0_ADCHPDL_ENABLE \ + RV(FV_CNVRTR0_ADCHPDL_ENABLE, FB_CNVRTR0_ADCHPDL) + +#define RV_CNVRTR0_ADCHPDL_DISABLE \ + RV(FV_CNVRTR0_ADCHPDL_DISABLE, FB_CNVRTR0_ADCHPDL) + + +/**************************** + * R_ADCSR (0x17) * + ****************************/ + +/* Field Offsets */ +#define FB_ADCSR_ABCM 6 +#define FB_ADCSR_ABR 3 +#define FB_ADCSR_ABM 0 + +/* Field Masks */ +#define FM_ADCSR_ABCM 0X3 +#define FM_ADCSR_ABR 0X3 +#define FM_ADCSR_ABM 0X7 + +/* Field Values */ +#define FV_ADCSR_ABCM_AUTO 0x0 +#define FV_ADCSR_ABCM_32 0x1 +#define FV_ADCSR_ABCM_40 0x2 +#define FV_ADCSR_ABCM_64 0x3 +#define FV_ADCSR_ABR_32 0x0 +#define FV_ADCSR_ABR_44_1 0x1 +#define FV_ADCSR_ABR_48 0x2 +#define FV_ADCSR_ABM_PT25 0x0 +#define FV_ADCSR_ABM_PT5 0x1 +#define FV_ADCSR_ABM_1 0x2 +#define FV_ADCSR_ABM_2 0x3 + +/* Register Masks */ +#define RM_ADCSR_ABCM RM(FM_ADCSR_ABCM, FB_ADCSR_ABCM) +#define RM_ADCSR_ABR RM(FM_ADCSR_ABR, FB_ADCSR_ABR) +#define RM_ADCSR_ABM RM(FM_ADCSR_ABM, FB_ADCSR_ABM) + +/* Register Values */ +#define RV_ADCSR_ABCM_AUTO \ + RV(FV_ADCSR_ABCM_AUTO, FB_ADCSR_ABCM) + +#define RV_ADCSR_ABCM_32 \ + RV(FV_ADCSR_ABCM_32, FB_ADCSR_ABCM) + +#define RV_ADCSR_ABCM_40 \ + RV(FV_ADCSR_ABCM_40, FB_ADCSR_ABCM) + +#define RV_ADCSR_ABCM_64 \ + RV(FV_ADCSR_ABCM_64, FB_ADCSR_ABCM) + +#define RV_ADCSR_ABR_32 RV(FV_ADCSR_ABR_32, FB_ADCSR_ABR) +#define RV_ADCSR_ABR_44_1 \ + RV(FV_ADCSR_ABR_44_1, FB_ADCSR_ABR) + +#define RV_ADCSR_ABR_48 RV(FV_ADCSR_ABR_48, FB_ADCSR_ABR) +#define RV_ADCSR_ABR_ RV(FV_ADCSR_ABR_, FB_ADCSR_ABR) +#define RV_ADCSR_ABM_PT25 \ + RV(FV_ADCSR_ABM_PT25, FB_ADCSR_ABM) + +#define RV_ADCSR_ABM_PT5 RV(FV_ADCSR_ABM_PT5, FB_ADCSR_ABM) +#define RV_ADCSR_ABM_1 RV(FV_ADCSR_ABM_1, FB_ADCSR_ABM) +#define RV_ADCSR_ABM_2 RV(FV_ADCSR_ABM_2, FB_ADCSR_ABM) + +/****************************** + * R_CNVRTR1 (0x18) * + ******************************/ + +/* Field Offsets */ +#define FB_CNVRTR1_DACPOLR 7 +#define FB_CNVRTR1_DACPOLL 6 +#define FB_CNVRTR1_DMONOMIX 4 +#define FB_CNVRTR1_DACMU 3 +#define FB_CNVRTR1_DEEMPH 2 +#define FB_CNVRTR1_DACDITH 0 + +/* Field Masks */ +#define FM_CNVRTR1_DACPOLR 0X1 +#define FM_CNVRTR1_DACPOLL 0X1 +#define FM_CNVRTR1_DMONOMIX 0X3 +#define FM_CNVRTR1_DACMU 0X1 +#define FM_CNVRTR1_DEEMPH 0X1 +#define FM_CNVRTR1_DACDITH 0X3 + +/* Field Values */ +#define FV_CNVRTR1_DACPOLR_INVERT 0x1 +#define FV_CNVRTR1_DACPOLR_NORMAL 0x0 +#define FV_CNVRTR1_DACPOLL_INVERT 0x1 +#define FV_CNVRTR1_DACPOLL_NORMAL 0x0 +#define FV_CNVRTR1_DMONOMIX_ENABLE 0x1 +#define FV_CNVRTR1_DMONOMIX_DISABLE 0x0 +#define FV_CNVRTR1_DACMU_ENABLE 0x1 +#define FV_CNVRTR1_DACMU_DISABLE 0x0 + +/* Register Masks */ +#define RM_CNVRTR1_DACPOLR \ + RM(FM_CNVRTR1_DACPOLR, FB_CNVRTR1_DACPOLR) + +#define RM_CNVRTR1_DACPOLL \ + RM(FM_CNVRTR1_DACPOLL, FB_CNVRTR1_DACPOLL) + +#define RM_CNVRTR1_DMONOMIX \ + RM(FM_CNVRTR1_DMONOMIX, FB_CNVRTR1_DMONOMIX) + +#define RM_CNVRTR1_DACMU \ + RM(FM_CNVRTR1_DACMU, FB_CNVRTR1_DACMU) + +#define RM_CNVRTR1_DEEMPH \ + RM(FM_CNVRTR1_DEEMPH, FB_CNVRTR1_DEEMPH) + +#define RM_CNVRTR1_DACDITH \ + RM(FM_CNVRTR1_DACDITH, FB_CNVRTR1_DACDITH) + + +/* Register Values */ +#define RV_CNVRTR1_DACPOLR_INVERT \ + RV(FV_CNVRTR1_DACPOLR_INVERT, FB_CNVRTR1_DACPOLR) + +#define RV_CNVRTR1_DACPOLR_NORMAL \ + RV(FV_CNVRTR1_DACPOLR_NORMAL, FB_CNVRTR1_DACPOLR) + +#define RV_CNVRTR1_DACPOLL_INVERT \ + RV(FV_CNVRTR1_DACPOLL_INVERT, FB_CNVRTR1_DACPOLL) + +#define RV_CNVRTR1_DACPOLL_NORMAL \ + RV(FV_CNVRTR1_DACPOLL_NORMAL, FB_CNVRTR1_DACPOLL) + +#define RV_CNVRTR1_DMONOMIX_ENABLE \ + RV(FV_CNVRTR1_DMONOMIX_ENABLE, FB_CNVRTR1_DMONOMIX) + +#define RV_CNVRTR1_DMONOMIX_DISABLE \ + RV(FV_CNVRTR1_DMONOMIX_DISABLE, FB_CNVRTR1_DMONOMIX) + +#define RV_CNVRTR1_DACMU_ENABLE \ + RV(FV_CNVRTR1_DACMU_ENABLE, FB_CNVRTR1_DACMU) + +#define RV_CNVRTR1_DACMU_DISABLE \ + RV(FV_CNVRTR1_DACMU_DISABLE, FB_CNVRTR1_DACMU) + + +/**************************** + * R_DACSR (0x19) * + ****************************/ + +/* Field Offsets */ +#define FB_DACSR_DBCM 6 +#define FB_DACSR_DBR 3 +#define FB_DACSR_DBM 0 + +/* Field Masks */ +#define FM_DACSR_DBCM 0X3 +#define FM_DACSR_DBR 0X3 +#define FM_DACSR_DBM 0X7 + +/* Field Values */ +#define FV_DACSR_DBCM_AUTO 0x0 +#define FV_DACSR_DBCM_32 0x1 +#define FV_DACSR_DBCM_40 0x2 +#define FV_DACSR_DBCM_64 0x3 +#define FV_DACSR_DBR_32 0x0 +#define FV_DACSR_DBR_44_1 0x1 +#define FV_DACSR_DBR_48 0x2 +#define FV_DACSR_DBM_PT25 0x0 +#define FV_DACSR_DBM_PT5 0x1 +#define FV_DACSR_DBM_1 0x2 +#define FV_DACSR_DBM_2 0x3 + +/* Register Masks */ +#define RM_DACSR_DBCM RM(FM_DACSR_DBCM, FB_DACSR_DBCM) +#define RM_DACSR_DBR RM(FM_DACSR_DBR, FB_DACSR_DBR) +#define RM_DACSR_DBM RM(FM_DACSR_DBM, FB_DACSR_DBM) + +/* Register Values */ +#define RV_DACSR_DBCM_AUTO \ + RV(FV_DACSR_DBCM_AUTO, FB_DACSR_DBCM) + +#define RV_DACSR_DBCM_32 \ + RV(FV_DACSR_DBCM_32, FB_DACSR_DBCM) + +#define RV_DACSR_DBCM_40 \ + RV(FV_DACSR_DBCM_40, FB_DACSR_DBCM) + +#define RV_DACSR_DBCM_64 \ + RV(FV_DACSR_DBCM_64, FB_DACSR_DBCM) + +#define RV_DACSR_DBR_32 RV(FV_DACSR_DBR_32, FB_DACSR_DBR) +#define RV_DACSR_DBR_44_1 \ + RV(FV_DACSR_DBR_44_1, FB_DACSR_DBR) + +#define RV_DACSR_DBR_48 RV(FV_DACSR_DBR_48, FB_DACSR_DBR) +#define RV_DACSR_DBM_PT25 \ + RV(FV_DACSR_DBM_PT25, FB_DACSR_DBM) + +#define RV_DACSR_DBM_PT5 RV(FV_DACSR_DBM_PT5, FB_DACSR_DBM) +#define RV_DACSR_DBM_1 RV(FV_DACSR_DBM_1, FB_DACSR_DBM) +#define RV_DACSR_DBM_2 RV(FV_DACSR_DBM_2, FB_DACSR_DBM) + +/**************************** + * R_PWRM1 (0x1A) * + ****************************/ + +/* Field Offsets */ +#define FB_PWRM1_BSTL 7 +#define FB_PWRM1_BSTR 6 +#define FB_PWRM1_PGAL 5 +#define FB_PWRM1_PGAR 4 +#define FB_PWRM1_ADCL 3 +#define FB_PWRM1_ADCR 2 +#define FB_PWRM1_MICB 1 +#define FB_PWRM1_DIGENB 0 + +/* Field Masks */ +#define FM_PWRM1_BSTL 0X1 +#define FM_PWRM1_BSTR 0X1 +#define FM_PWRM1_PGAL 0X1 +#define FM_PWRM1_PGAR 0X1 +#define FM_PWRM1_ADCL 0X1 +#define FM_PWRM1_ADCR 0X1 +#define FM_PWRM1_MICB 0X1 +#define FM_PWRM1_DIGENB 0X1 + +/* Field Values */ +#define FV_PWRM1_BSTL_ENABLE 0x1 +#define FV_PWRM1_BSTL_DISABLE 0x0 +#define FV_PWRM1_BSTR_ENABLE 0x1 +#define FV_PWRM1_BSTR_DISABLE 0x0 +#define FV_PWRM1_PGAL_ENABLE 0x1 +#define FV_PWRM1_PGAL_DISABLE 0x0 +#define FV_PWRM1_PGAR_ENABLE 0x1 +#define FV_PWRM1_PGAR_DISABLE 0x0 +#define FV_PWRM1_ADCL_ENABLE 0x1 +#define FV_PWRM1_ADCL_DISABLE 0x0 +#define FV_PWRM1_ADCR_ENABLE 0x1 +#define FV_PWRM1_ADCR_DISABLE 0x0 +#define FV_PWRM1_MICB_ENABLE 0x1 +#define FV_PWRM1_MICB_DISABLE 0x0 +#define FV_PWRM1_DIGENB_DISABLE 0x1 +#define FV_PWRM1_DIGENB_ENABLE 0x0 + +/* Register Masks */ +#define RM_PWRM1_BSTL RM(FM_PWRM1_BSTL, FB_PWRM1_BSTL) +#define RM_PWRM1_BSTR RM(FM_PWRM1_BSTR, FB_PWRM1_BSTR) +#define RM_PWRM1_PGAL RM(FM_PWRM1_PGAL, FB_PWRM1_PGAL) +#define RM_PWRM1_PGAR RM(FM_PWRM1_PGAR, FB_PWRM1_PGAR) +#define RM_PWRM1_ADCL RM(FM_PWRM1_ADCL, FB_PWRM1_ADCL) +#define RM_PWRM1_ADCR RM(FM_PWRM1_ADCR, FB_PWRM1_ADCR) +#define RM_PWRM1_MICB RM(FM_PWRM1_MICB, FB_PWRM1_MICB) +#define RM_PWRM1_DIGENB \ + RM(FM_PWRM1_DIGENB, FB_PWRM1_DIGENB) + + +/* Register Values */ +#define RV_PWRM1_BSTL_ENABLE \ + RV(FV_PWRM1_BSTL_ENABLE, FB_PWRM1_BSTL) + +#define RV_PWRM1_BSTL_DISABLE \ + RV(FV_PWRM1_BSTL_DISABLE, FB_PWRM1_BSTL) + +#define RV_PWRM1_BSTR_ENABLE \ + RV(FV_PWRM1_BSTR_ENABLE, FB_PWRM1_BSTR) + +#define RV_PWRM1_BSTR_DISABLE \ + RV(FV_PWRM1_BSTR_DISABLE, FB_PWRM1_BSTR) + +#define RV_PWRM1_PGAL_ENABLE \ + RV(FV_PWRM1_PGAL_ENABLE, FB_PWRM1_PGAL) + +#define RV_PWRM1_PGAL_DISABLE \ + RV(FV_PWRM1_PGAL_DISABLE, FB_PWRM1_PGAL) + +#define RV_PWRM1_PGAR_ENABLE \ + RV(FV_PWRM1_PGAR_ENABLE, FB_PWRM1_PGAR) + +#define RV_PWRM1_PGAR_DISABLE \ + RV(FV_PWRM1_PGAR_DISABLE, FB_PWRM1_PGAR) + +#define RV_PWRM1_ADCL_ENABLE \ + RV(FV_PWRM1_ADCL_ENABLE, FB_PWRM1_ADCL) + +#define RV_PWRM1_ADCL_DISABLE \ + RV(FV_PWRM1_ADCL_DISABLE, FB_PWRM1_ADCL) + +#define RV_PWRM1_ADCR_ENABLE \ + RV(FV_PWRM1_ADCR_ENABLE, FB_PWRM1_ADCR) + +#define RV_PWRM1_ADCR_DISABLE \ + RV(FV_PWRM1_ADCR_DISABLE, FB_PWRM1_ADCR) + +#define RV_PWRM1_MICB_ENABLE \ + RV(FV_PWRM1_MICB_ENABLE, FB_PWRM1_MICB) + +#define RV_PWRM1_MICB_DISABLE \ + RV(FV_PWRM1_MICB_DISABLE, FB_PWRM1_MICB) + +#define RV_PWRM1_DIGENB_DISABLE \ + RV(FV_PWRM1_DIGENB_DISABLE, FB_PWRM1_DIGENB) + +#define RV_PWRM1_DIGENB_ENABLE \ + RV(FV_PWRM1_DIGENB_ENABLE, FB_PWRM1_DIGENB) + + +/**************************** + * R_PWRM2 (0x1B) * + ****************************/ + +/* Field Offsets */ +#define FB_PWRM2_D2S 7 +#define FB_PWRM2_HPL 6 +#define FB_PWRM2_HPR 5 +#define FB_PWRM2_SPKL 4 +#define FB_PWRM2_SPKR 3 +#define FB_PWRM2_INSELL 2 +#define FB_PWRM2_INSELR 1 +#define FB_PWRM2_VREF 0 + +/* Field Masks */ +#define FM_PWRM2_D2S 0X1 +#define FM_PWRM2_HPL 0X1 +#define FM_PWRM2_HPR 0X1 +#define FM_PWRM2_SPKL 0X1 +#define FM_PWRM2_SPKR 0X1 +#define FM_PWRM2_INSELL 0X1 +#define FM_PWRM2_INSELR 0X1 +#define FM_PWRM2_VREF 0X1 + +/* Field Values */ +#define FV_PWRM2_D2S_ENABLE 0x1 +#define FV_PWRM2_D2S_DISABLE 0x0 +#define FV_PWRM2_HPL_ENABLE 0x1 +#define FV_PWRM2_HPL_DISABLE 0x0 +#define FV_PWRM2_HPR_ENABLE 0x1 +#define FV_PWRM2_HPR_DISABLE 0x0 +#define FV_PWRM2_SPKL_ENABLE 0x1 +#define FV_PWRM2_SPKL_DISABLE 0x0 +#define FV_PWRM2_SPKR_ENABLE 0x1 +#define FV_PWRM2_SPKR_DISABLE 0x0 +#define FV_PWRM2_INSELL_ENABLE 0x1 +#define FV_PWRM2_INSELL_DISABLE 0x0 +#define FV_PWRM2_INSELR_ENABLE 0x1 +#define FV_PWRM2_INSELR_DISABLE 0x0 +#define FV_PWRM2_VREF_ENABLE 0x1 +#define FV_PWRM2_VREF_DISABLE 0x0 + +/* Register Masks */ +#define RM_PWRM2_D2S RM(FM_PWRM2_D2S, FB_PWRM2_D2S) +#define RM_PWRM2_HPL RM(FM_PWRM2_HPL, FB_PWRM2_HPL) +#define RM_PWRM2_HPR RM(FM_PWRM2_HPR, FB_PWRM2_HPR) +#define RM_PWRM2_SPKL RM(FM_PWRM2_SPKL, FB_PWRM2_SPKL) +#define RM_PWRM2_SPKR RM(FM_PWRM2_SPKR, FB_PWRM2_SPKR) +#define RM_PWRM2_INSELL \ + RM(FM_PWRM2_INSELL, FB_PWRM2_INSELL) + +#define RM_PWRM2_INSELR \ + RM(FM_PWRM2_INSELR, FB_PWRM2_INSELR) + +#define RM_PWRM2_VREF RM(FM_PWRM2_VREF, FB_PWRM2_VREF) + +/* Register Values */ +#define RV_PWRM2_D2S_ENABLE \ + RV(FV_PWRM2_D2S_ENABLE, FB_PWRM2_D2S) + +#define RV_PWRM2_D2S_DISABLE \ + RV(FV_PWRM2_D2S_DISABLE, FB_PWRM2_D2S) + +#define RV_PWRM2_HPL_ENABLE \ + RV(FV_PWRM2_HPL_ENABLE, FB_PWRM2_HPL) + +#define RV_PWRM2_HPL_DISABLE \ + RV(FV_PWRM2_HPL_DISABLE, FB_PWRM2_HPL) + +#define RV_PWRM2_HPR_ENABLE \ + RV(FV_PWRM2_HPR_ENABLE, FB_PWRM2_HPR) + +#define RV_PWRM2_HPR_DISABLE \ + RV(FV_PWRM2_HPR_DISABLE, FB_PWRM2_HPR) + +#define RV_PWRM2_SPKL_ENABLE \ + RV(FV_PWRM2_SPKL_ENABLE, FB_PWRM2_SPKL) + +#define RV_PWRM2_SPKL_DISABLE \ + RV(FV_PWRM2_SPKL_DISABLE, FB_PWRM2_SPKL) + +#define RV_PWRM2_SPKR_ENABLE \ + RV(FV_PWRM2_SPKR_ENABLE, FB_PWRM2_SPKR) + +#define RV_PWRM2_SPKR_DISABLE \ + RV(FV_PWRM2_SPKR_DISABLE, FB_PWRM2_SPKR) + +#define RV_PWRM2_INSELL_ENABLE \ + RV(FV_PWRM2_INSELL_ENABLE, FB_PWRM2_INSELL) + +#define RV_PWRM2_INSELL_DISABLE \ + RV(FV_PWRM2_INSELL_DISABLE, FB_PWRM2_INSELL) + +#define RV_PWRM2_INSELR_ENABLE \ + RV(FV_PWRM2_INSELR_ENABLE, FB_PWRM2_INSELR) + +#define RV_PWRM2_INSELR_DISABLE \ + RV(FV_PWRM2_INSELR_DISABLE, FB_PWRM2_INSELR) + +#define RV_PWRM2_VREF_ENABLE \ + RV(FV_PWRM2_VREF_ENABLE, FB_PWRM2_VREF) + +#define RV_PWRM2_VREF_DISABLE \ + RV(FV_PWRM2_VREF_DISABLE, FB_PWRM2_VREF) + + +/****************************** + * R_CONFIG0 (0x1F) * + ******************************/ + +/* Field Offsets */ +#define FB_CONFIG0_ASDM 6 +#define FB_CONFIG0_DSDM 4 +#define FB_CONFIG0_DC_BYPASS 1 +#define FB_CONFIG0_SD_FORCE_ON 0 + +/* Field Masks */ +#define FM_CONFIG0_ASDM 0X3 +#define FM_CONFIG0_DSDM 0X3 +#define FM_CONFIG0_DC_BYPASS 0X1 +#define FM_CONFIG0_SD_FORCE_ON 0X1 + +/* Field Values */ +#define FV_CONFIG0_ASDM_HALF 0x1 +#define FV_CONFIG0_ASDM_FULL 0x2 +#define FV_CONFIG0_ASDM_AUTO 0x3 +#define FV_CONFIG0_DSDM_HALF 0x1 +#define FV_CONFIG0_DSDM_FULL 0x2 +#define FV_CONFIG0_DSDM_AUTO 0x3 +#define FV_CONFIG0_DC_BYPASS_ENABLE 0x1 +#define FV_CONFIG0_DC_BYPASS_DISABLE 0x0 +#define FV_CONFIG0_SD_FORCE_ON_ENABLE 0x1 +#define FV_CONFIG0_SD_FORCE_ON_DISABLE 0x0 + +/* Register Masks */ +#define RM_CONFIG0_ASDM \ + RM(FM_CONFIG0_ASDM, FB_CONFIG0_ASDM) + +#define RM_CONFIG0_DSDM \ + RM(FM_CONFIG0_DSDM, FB_CONFIG0_DSDM) + +#define RM_CONFIG0_DC_BYPASS \ + RM(FM_CONFIG0_DC_BYPASS, FB_CONFIG0_DC_BYPASS) + +#define RM_CONFIG0_SD_FORCE_ON \ + RM(FM_CONFIG0_SD_FORCE_ON, FB_CONFIG0_SD_FORCE_ON) + + +/* Register Values */ +#define RV_CONFIG0_ASDM_HALF \ + RV(FV_CONFIG0_ASDM_HALF, FB_CONFIG0_ASDM) + +#define RV_CONFIG0_ASDM_FULL \ + RV(FV_CONFIG0_ASDM_FULL, FB_CONFIG0_ASDM) + +#define RV_CONFIG0_ASDM_AUTO \ + RV(FV_CONFIG0_ASDM_AUTO, FB_CONFIG0_ASDM) + +#define RV_CONFIG0_DSDM_HALF \ + RV(FV_CONFIG0_DSDM_HALF, FB_CONFIG0_DSDM) + +#define RV_CONFIG0_DSDM_FULL \ + RV(FV_CONFIG0_DSDM_FULL, FB_CONFIG0_DSDM) + +#define RV_CONFIG0_DSDM_AUTO \ + RV(FV_CONFIG0_DSDM_AUTO, FB_CONFIG0_DSDM) + +#define RV_CONFIG0_DC_BYPASS_ENABLE \ + RV(FV_CONFIG0_DC_BYPASS_ENABLE, FB_CONFIG0_DC_BYPASS) + +#define RV_CONFIG0_DC_BYPASS_DISABLE \ + RV(FV_CONFIG0_DC_BYPASS_DISABLE, FB_CONFIG0_DC_BYPASS) + +#define RV_CONFIG0_SD_FORCE_ON_ENABLE \ + RV(FV_CONFIG0_SD_FORCE_ON_ENABLE, FB_CONFIG0_SD_FORCE_ON) + +#define RV_CONFIG0_SD_FORCE_ON_DISABLE \ + RV(FV_CONFIG0_SD_FORCE_ON_DISABLE, FB_CONFIG0_SD_FORCE_ON) + + +/****************************** + * R_CONFIG1 (0x20) * + ******************************/ + +/* Field Offsets */ +#define FB_CONFIG1_EQ2_EN 7 +#define FB_CONFIG1_EQ2_BE 4 +#define FB_CONFIG1_EQ1_EN 3 +#define FB_CONFIG1_EQ1_BE 0 + +/* Field Masks */ +#define FM_CONFIG1_EQ2_EN 0X1 +#define FM_CONFIG1_EQ2_BE 0X7 +#define FM_CONFIG1_EQ1_EN 0X1 +#define FM_CONFIG1_EQ1_BE 0X7 + +/* Field Values */ +#define FV_CONFIG1_EQ2_EN_ENABLE 0x1 +#define FV_CONFIG1_EQ2_EN_DISABLE 0x0 +#define FV_CONFIG1_EQ2_BE_PRE 0x0 +#define FV_CONFIG1_EQ2_BE_PRE_EQ_0 0x1 +#define FV_CONFIG1_EQ2_BE_PRE_EQ0_1 0x2 +#define FV_CONFIG1_EQ2_BE_PRE_EQ0_2 0x3 +#define FV_CONFIG1_EQ2_BE_PRE_EQ0_3 0x4 +#define FV_CONFIG1_EQ2_BE_PRE_EQ0_4 0x5 +#define FV_CONFIG1_EQ2_BE_PRE_EQ0_5 0x6 +#define FV_CONFIG1_EQ1_EN_ENABLE 0x1 +#define FV_CONFIG1_EQ1_EN_DISABLE 0x0 +#define FV_CONFIG1_EQ1_BE_PRE 0x0 +#define FV_CONFIG1_EQ1_BE_PRE_EQ_0 0x1 +#define FV_CONFIG1_EQ1_BE_PRE_EQ0_1 0x2 +#define FV_CONFIG1_EQ1_BE_PRE_EQ0_2 0x3 +#define FV_CONFIG1_EQ1_BE_PRE_EQ0_3 0x4 +#define FV_CONFIG1_EQ1_BE_PRE_EQ0_4 0x5 +#define FV_CONFIG1_EQ1_BE_PRE_EQ0_5 0x6 + +/* Register Masks */ +#define RM_CONFIG1_EQ2_EN \ + RM(FM_CONFIG1_EQ2_EN, FB_CONFIG1_EQ2_EN) + +#define RM_CONFIG1_EQ2_BE \ + RM(FM_CONFIG1_EQ2_BE, FB_CONFIG1_EQ2_BE) + +#define RM_CONFIG1_EQ1_EN \ + RM(FM_CONFIG1_EQ1_EN, FB_CONFIG1_EQ1_EN) + +#define RM_CONFIG1_EQ1_BE \ + RM(FM_CONFIG1_EQ1_BE, FB_CONFIG1_EQ1_BE) + + +/* Register Values */ +#define RV_CONFIG1_EQ2_EN_ENABLE \ + RV(FV_CONFIG1_EQ2_EN_ENABLE, FB_CONFIG1_EQ2_EN) + +#define RV_CONFIG1_EQ2_EN_DISABLE \ + RV(FV_CONFIG1_EQ2_EN_DISABLE, FB_CONFIG1_EQ2_EN) + +#define RV_CONFIG1_EQ2_BE_PRE \ + RV(FV_CONFIG1_EQ2_BE_PRE, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ_0 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ_0, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ0_1 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ0_1, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ0_2 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ0_2, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ0_3 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ0_3, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ0_4 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ0_4, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ2_BE_PRE_EQ0_5 \ + RV(FV_CONFIG1_EQ2_BE_PRE_EQ0_5, FB_CONFIG1_EQ2_BE) + +#define RV_CONFIG1_EQ1_EN_ENABLE \ + RV(FV_CONFIG1_EQ1_EN_ENABLE, FB_CONFIG1_EQ1_EN) + +#define RV_CONFIG1_EQ1_EN_DISABLE \ + RV(FV_CONFIG1_EQ1_EN_DISABLE, FB_CONFIG1_EQ1_EN) + +#define RV_CONFIG1_EQ1_BE_PRE \ + RV(FV_CONFIG1_EQ1_BE_PRE, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ_0 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ_0, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ0_1 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ0_1, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ0_2 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ0_2, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ0_3 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ0_3, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ0_4 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ0_4, FB_CONFIG1_EQ1_BE) + +#define RV_CONFIG1_EQ1_BE_PRE_EQ0_5 \ + RV(FV_CONFIG1_EQ1_BE_PRE_EQ0_5, FB_CONFIG1_EQ1_BE) + + +/****************************** + * R_DMICCTL (0x24) * + ******************************/ + +/* Field Offsets */ +#define FB_DMICCTL_DMICEN 7 +#define FB_DMICCTL_DMONO 4 +#define FB_DMICCTL_DMPHADJ 2 +#define FB_DMICCTL_DMRATE 0 + +/* Field Masks */ +#define FM_DMICCTL_DMICEN 0X1 +#define FM_DMICCTL_DMONO 0X1 +#define FM_DMICCTL_DMPHADJ 0X3 +#define FM_DMICCTL_DMRATE 0X3 + +/* Field Values */ +#define FV_DMICCTL_DMICEN_ENABLE 0x1 +#define FV_DMICCTL_DMICEN_DISABLE 0x0 +#define FV_DMICCTL_DMONO_STEREO 0x0 +#define FV_DMICCTL_DMONO_MONO 0x1 + +/* Register Masks */ +#define RM_DMICCTL_DMICEN \ + RM(FM_DMICCTL_DMICEN, FB_DMICCTL_DMICEN) + +#define RM_DMICCTL_DMONO \ + RM(FM_DMICCTL_DMONO, FB_DMICCTL_DMONO) + +#define RM_DMICCTL_DMPHADJ \ + RM(FM_DMICCTL_DMPHADJ, FB_DMICCTL_DMPHADJ) + +#define RM_DMICCTL_DMRATE \ + RM(FM_DMICCTL_DMRATE, FB_DMICCTL_DMRATE) + + +/* Register Values */ +#define RV_DMICCTL_DMICEN_ENABLE \ + RV(FV_DMICCTL_DMICEN_ENABLE, FB_DMICCTL_DMICEN) + +#define RV_DMICCTL_DMICEN_DISABLE \ + RV(FV_DMICCTL_DMICEN_DISABLE, FB_DMICCTL_DMICEN) + +#define RV_DMICCTL_DMONO_STEREO \ + RV(FV_DMICCTL_DMONO_STEREO, FB_DMICCTL_DMONO) + +#define RV_DMICCTL_DMONO_MONO \ + RV(FV_DMICCTL_DMONO_MONO, FB_DMICCTL_DMONO) + + +/***************************** + * R_CLECTL (0x25) * + *****************************/ + +/* Field Offsets */ +#define FB_CLECTL_LVL_MODE 4 +#define FB_CLECTL_WINDOWSEL 3 +#define FB_CLECTL_EXP_EN 2 +#define FB_CLECTL_LIMIT_EN 1 +#define FB_CLECTL_COMP_EN 0 + +/* Field Masks */ +#define FM_CLECTL_LVL_MODE 0X1 +#define FM_CLECTL_WINDOWSEL 0X1 +#define FM_CLECTL_EXP_EN 0X1 +#define FM_CLECTL_LIMIT_EN 0X1 +#define FM_CLECTL_COMP_EN 0X1 + +/* Field Values */ +#define FV_CLECTL_LVL_MODE_AVG 0x0 +#define FV_CLECTL_LVL_MODE_PEAK 0x1 +#define FV_CLECTL_WINDOWSEL_512 0x0 +#define FV_CLECTL_WINDOWSEL_64 0x1 +#define FV_CLECTL_EXP_EN_ENABLE 0x1 +#define FV_CLECTL_EXP_EN_DISABLE 0x0 +#define FV_CLECTL_LIMIT_EN_ENABLE 0x1 +#define FV_CLECTL_LIMIT_EN_DISABLE 0x0 +#define FV_CLECTL_COMP_EN_ENABLE 0x1 +#define FV_CLECTL_COMP_EN_DISABLE 0x0 + +/* Register Masks */ +#define RM_CLECTL_LVL_MODE \ + RM(FM_CLECTL_LVL_MODE, FB_CLECTL_LVL_MODE) + +#define RM_CLECTL_WINDOWSEL \ + RM(FM_CLECTL_WINDOWSEL, FB_CLECTL_WINDOWSEL) + +#define RM_CLECTL_EXP_EN \ + RM(FM_CLECTL_EXP_EN, FB_CLECTL_EXP_EN) + +#define RM_CLECTL_LIMIT_EN \ + RM(FM_CLECTL_LIMIT_EN, FB_CLECTL_LIMIT_EN) + +#define RM_CLECTL_COMP_EN \ + RM(FM_CLECTL_COMP_EN, FB_CLECTL_COMP_EN) + + +/* Register Values */ +#define RV_CLECTL_LVL_MODE_AVG \ + RV(FV_CLECTL_LVL_MODE_AVG, FB_CLECTL_LVL_MODE) + +#define RV_CLECTL_LVL_MODE_PEAK \ + RV(FV_CLECTL_LVL_MODE_PEAK, FB_CLECTL_LVL_MODE) + +#define RV_CLECTL_WINDOWSEL_512 \ + RV(FV_CLECTL_WINDOWSEL_512, FB_CLECTL_WINDOWSEL) + +#define RV_CLECTL_WINDOWSEL_64 \ + RV(FV_CLECTL_WINDOWSEL_64, FB_CLECTL_WINDOWSEL) + +#define RV_CLECTL_EXP_EN_ENABLE \ + RV(FV_CLECTL_EXP_EN_ENABLE, FB_CLECTL_EXP_EN) + +#define RV_CLECTL_EXP_EN_DISABLE \ + RV(FV_CLECTL_EXP_EN_DISABLE, FB_CLECTL_EXP_EN) + +#define RV_CLECTL_LIMIT_EN_ENABLE \ + RV(FV_CLECTL_LIMIT_EN_ENABLE, FB_CLECTL_LIMIT_EN) + +#define RV_CLECTL_LIMIT_EN_DISABLE \ + RV(FV_CLECTL_LIMIT_EN_DISABLE, FB_CLECTL_LIMIT_EN) + +#define RV_CLECTL_COMP_EN_ENABLE \ + RV(FV_CLECTL_COMP_EN_ENABLE, FB_CLECTL_COMP_EN) + +#define RV_CLECTL_COMP_EN_DISABLE \ + RV(FV_CLECTL_COMP_EN_DISABLE, FB_CLECTL_COMP_EN) + + +/***************************** + * R_MUGAIN (0x26) * + *****************************/ + +/* Field Offsets */ +#define FB_MUGAIN_CLEMUG 0 + +/* Field Masks */ +#define FM_MUGAIN_CLEMUG 0X1F + +/* Field Values */ +#define FV_MUGAIN_CLEMUG_46PT5DB 0x1F +#define FV_MUGAIN_CLEMUG_0DB 0x0 + +/* Register Masks */ +#define RM_MUGAIN_CLEMUG \ + RM(FM_MUGAIN_CLEMUG, FB_MUGAIN_CLEMUG) + + +/* Register Values */ +#define RV_MUGAIN_CLEMUG_46PT5DB \ + RV(FV_MUGAIN_CLEMUG_46PT5DB, FB_MUGAIN_CLEMUG) + +#define RV_MUGAIN_CLEMUG_0DB \ + RV(FV_MUGAIN_CLEMUG_0DB, FB_MUGAIN_CLEMUG) + + +/***************************** + * R_COMPTH (0x27) * + *****************************/ + +/* Field Offsets */ +#define FB_COMPTH 0 + +/* Field Masks */ +#define FM_COMPTH 0XFF + +/* Field Values */ +#define FV_COMPTH_0DB 0xFF +#define FV_COMPTH_N95PT625DB 0x0 + +/* Register Masks */ +#define RM_COMPTH RM(FM_COMPTH, FB_COMPTH) + +/* Register Values */ +#define RV_COMPTH_0DB RV(FV_COMPTH_0DB, FB_COMPTH) +#define RV_COMPTH_N95PT625DB \ + RV(FV_COMPTH_N95PT625DB, FB_COMPTH) + + +/***************************** + * R_CMPRAT (0x28) * + *****************************/ + +/* Field Offsets */ +#define FB_CMPRAT 0 + +/* Field Masks */ +#define FM_CMPRAT 0X1F + +/* Register Masks */ +#define RM_CMPRAT RM(FM_CMPRAT, FB_CMPRAT) + +/****************************** + * R_CATKTCL (0x29) * + ******************************/ + +/* Field Offsets */ +#define FB_CATKTCL 0 + +/* Field Masks */ +#define FM_CATKTCL 0XFF + +/* Register Masks */ +#define RM_CATKTCL RM(FM_CATKTCL, FB_CATKTCL) + +/****************************** + * R_CATKTCH (0x2A) * + ******************************/ + +/* Field Offsets */ +#define FB_CATKTCH 0 + +/* Field Masks */ +#define FM_CATKTCH 0XFF + +/* Register Masks */ +#define RM_CATKTCH RM(FM_CATKTCH, FB_CATKTCH) + +/****************************** + * R_CRELTCL (0x2B) * + ******************************/ + +/* Field Offsets */ +#define FB_CRELTCL 0 + +/* Field Masks */ +#define FM_CRELTCL 0XFF + +/* Register Masks */ +#define RM_CRELTCL RM(FM_CRELTCL, FB_CRELTCL) + +/****************************** + * R_CRELTCH (0x2C) * + ******************************/ + +/* Field Offsets */ +#define FB_CRELTCH 0 + +/* Field Masks */ +#define FM_CRELTCH 0XFF + +/* Register Masks */ +#define RM_CRELTCH RM(FM_CRELTCH, FB_CRELTCH) + +/**************************** + * R_LIMTH (0x2D) * + ****************************/ + +/* Field Offsets */ +#define FB_LIMTH 0 + +/* Field Masks */ +#define FM_LIMTH 0XFF + +/* Field Values */ +#define FV_LIMTH_0DB 0xFF +#define FV_LIMTH_N95PT625DB 0x0 + +/* Register Masks */ +#define RM_LIMTH RM(FM_LIMTH, FB_LIMTH) + +/* Register Values */ +#define RV_LIMTH_0DB RV(FV_LIMTH_0DB, FB_LIMTH) +#define RV_LIMTH_N95PT625DB RV(FV_LIMTH_N95PT625DB, FB_LIMTH) + +/***************************** + * R_LIMTGT (0x2E) * + *****************************/ + +/* Field Offsets */ +#define FB_LIMTGT 0 + +/* Field Masks */ +#define FM_LIMTGT 0XFF + +/* Field Values */ +#define FV_LIMTGT_0DB 0xFF +#define FV_LIMTGT_N95PT625DB 0x0 + +/* Register Masks */ +#define RM_LIMTGT RM(FM_LIMTGT, FB_LIMTGT) + +/* Register Values */ +#define RV_LIMTGT_0DB RV(FV_LIMTGT_0DB, FB_LIMTGT) +#define RV_LIMTGT_N95PT625DB \ + RV(FV_LIMTGT_N95PT625DB, FB_LIMTGT) + + +/****************************** + * R_LATKTCL (0x2F) * + ******************************/ + +/* Field Offsets */ +#define FB_LATKTCL 0 + +/* Field Masks */ +#define FM_LATKTCL 0XFF + +/* Register Masks */ +#define RM_LATKTCL RM(FM_LATKTCL, FB_LATKTCL) + +/****************************** + * R_LATKTCH (0x30) * + ******************************/ + +/* Field Offsets */ +#define FB_LATKTCH 0 + +/* Field Masks */ +#define FM_LATKTCH 0XFF + +/* Register Masks */ +#define RM_LATKTCH RM(FM_LATKTCH, FB_LATKTCH) + +/****************************** + * R_LRELTCL (0x31) * + ******************************/ + +/* Field Offsets */ +#define FB_LRELTCL 0 + +/* Field Masks */ +#define FM_LRELTCL 0XFF + +/* Register Masks */ +#define RM_LRELTCL RM(FM_LRELTCL, FB_LRELTCL) + +/****************************** + * R_LRELTCH (0x32) * + ******************************/ + +/* Field Offsets */ +#define FB_LRELTCH 0 + +/* Field Masks */ +#define FM_LRELTCH 0XFF + +/* Register Masks */ +#define RM_LRELTCH RM(FM_LRELTCH, FB_LRELTCH) + +/**************************** + * R_EXPTH (0x33) * + ****************************/ + +/* Field Offsets */ +#define FB_EXPTH 0 + +/* Field Masks */ +#define FM_EXPTH 0XFF + +/* Field Values */ +#define FV_EXPTH_0DB 0xFF +#define FV_EXPTH_N95PT625DB 0x0 + +/* Register Masks */ +#define RM_EXPTH RM(FM_EXPTH, FB_EXPTH) + +/* Register Values */ +#define RV_EXPTH_0DB RV(FV_EXPTH_0DB, FB_EXPTH) +#define RV_EXPTH_N95PT625DB RV(FV_EXPTH_N95PT625DB, FB_EXPTH) + +/***************************** + * R_EXPRAT (0x34) * + *****************************/ + +/* Field Offsets */ +#define FB_EXPRAT 0 + +/* Field Masks */ +#define FM_EXPRAT 0X7 + +/* Register Masks */ +#define RM_EXPRAT RM(FM_EXPRAT, FB_EXPRAT) + +/****************************** + * R_XATKTCL (0x35) * + ******************************/ + +/* Field Offsets */ +#define FB_XATKTCL 0 + +/* Field Masks */ +#define FM_XATKTCL 0XFF + +/* Register Masks */ +#define RM_XATKTCL RM(FM_XATKTCL, FB_XATKTCL) + +/****************************** + * R_XATKTCH (0x36) * + ******************************/ + +/* Field Offsets */ +#define FB_XATKTCH 0 + +/* Field Masks */ +#define FM_XATKTCH 0XFF + +/* Register Masks */ +#define RM_XATKTCH RM(FM_XATKTCH, FB_XATKTCH) + +/****************************** + * R_XRELTCL (0x37) * + ******************************/ + +/* Field Offsets */ +#define FB_XRELTCL 0 + +/* Field Masks */ +#define FM_XRELTCL 0XFF + +/* Register Masks */ +#define RM_XRELTCL RM(FM_XRELTCL, FB_XRELTCL) + +/****************************** + * R_XRELTCH (0x38) * + ******************************/ + +/* Field Offsets */ +#define FB_XRELTCH 0 + +/* Field Masks */ +#define FM_XRELTCH 0XFF + +/* Register Masks */ +#define RM_XRELTCH RM(FM_XRELTCH, FB_XRELTCH) + +/**************************** + * R_FXCTL (0x39) * + ****************************/ + +/* Field Offsets */ +#define FB_FXCTL_3DEN 4 +#define FB_FXCTL_TEEN 3 +#define FB_FXCTL_TNLFBYPASS 2 +#define FB_FXCTL_BEEN 1 +#define FB_FXCTL_BNLFBYPASS 0 + +/* Field Masks */ +#define FM_FXCTL_3DEN 0X1 +#define FM_FXCTL_TEEN 0X1 +#define FM_FXCTL_TNLFBYPASS 0X1 +#define FM_FXCTL_BEEN 0X1 +#define FM_FXCTL_BNLFBYPASS 0X1 + +/* Field Values */ +#define FV_FXCTL_3DEN_ENABLE 0x1 +#define FV_FXCTL_3DEN_DISABLE 0x0 +#define FV_FXCTL_TEEN_ENABLE 0x1 +#define FV_FXCTL_TEEN_DISABLE 0x0 +#define FV_FXCTL_TNLFBYPASS_ENABLE 0x1 +#define FV_FXCTL_TNLFBYPASS_DISABLE 0x0 +#define FV_FXCTL_BEEN_ENABLE 0x1 +#define FV_FXCTL_BEEN_DISABLE 0x0 +#define FV_FXCTL_BNLFBYPASS_ENABLE 0x1 +#define FV_FXCTL_BNLFBYPASS_DISABLE 0x0 + +/* Register Masks */ +#define RM_FXCTL_3DEN RM(FM_FXCTL_3DEN, FB_FXCTL_3DEN) +#define RM_FXCTL_TEEN RM(FM_FXCTL_TEEN, FB_FXCTL_TEEN) +#define RM_FXCTL_TNLFBYPASS \ + RM(FM_FXCTL_TNLFBYPASS, FB_FXCTL_TNLFBYPASS) + +#define RM_FXCTL_BEEN RM(FM_FXCTL_BEEN, FB_FXCTL_BEEN) +#define RM_FXCTL_BNLFBYPASS \ + RM(FM_FXCTL_BNLFBYPASS, FB_FXCTL_BNLFBYPASS) + + +/* Register Values */ +#define RV_FXCTL_3DEN_ENABLE \ + RV(FV_FXCTL_3DEN_ENABLE, FB_FXCTL_3DEN) + +#define RV_FXCTL_3DEN_DISABLE \ + RV(FV_FXCTL_3DEN_DISABLE, FB_FXCTL_3DEN) + +#define RV_FXCTL_TEEN_ENABLE \ + RV(FV_FXCTL_TEEN_ENABLE, FB_FXCTL_TEEN) + +#define RV_FXCTL_TEEN_DISABLE \ + RV(FV_FXCTL_TEEN_DISABLE, FB_FXCTL_TEEN) + +#define RV_FXCTL_TNLFBYPASS_ENABLE \ + RV(FV_FXCTL_TNLFBYPASS_ENABLE, FB_FXCTL_TNLFBYPASS) + +#define RV_FXCTL_TNLFBYPASS_DISABLE \ + RV(FV_FXCTL_TNLFBYPASS_DISABLE, FB_FXCTL_TNLFBYPASS) + +#define RV_FXCTL_BEEN_ENABLE \ + RV(FV_FXCTL_BEEN_ENABLE, FB_FXCTL_BEEN) + +#define RV_FXCTL_BEEN_DISABLE \ + RV(FV_FXCTL_BEEN_DISABLE, FB_FXCTL_BEEN) + +#define RV_FXCTL_BNLFBYPASS_ENABLE \ + RV(FV_FXCTL_BNLFBYPASS_ENABLE, FB_FXCTL_BNLFBYPASS) + +#define RV_FXCTL_BNLFBYPASS_DISABLE \ + RV(FV_FXCTL_BNLFBYPASS_DISABLE, FB_FXCTL_BNLFBYPASS) + + +/******************************* + * R_DACCRWRL (0x3A) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRWRL_DACCRWDL 0 + +/* Field Masks */ +#define FM_DACCRWRL_DACCRWDL 0XFF + +/* Register Masks */ +#define RM_DACCRWRL_DACCRWDL \ + RM(FM_DACCRWRL_DACCRWDL, FB_DACCRWRL_DACCRWDL) + + +/******************************* + * R_DACCRWRM (0x3B) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRWRM_DACCRWDM 0 + +/* Field Masks */ +#define FM_DACCRWRM_DACCRWDM 0XFF + +/* Register Masks */ +#define RM_DACCRWRM_DACCRWDM \ + RM(FM_DACCRWRM_DACCRWDM, FB_DACCRWRM_DACCRWDM) + + +/******************************* + * R_DACCRWRH (0x3C) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRWRH_DACCRWDH 0 + +/* Field Masks */ +#define FM_DACCRWRH_DACCRWDH 0XFF + +/* Register Masks */ +#define RM_DACCRWRH_DACCRWDH \ + RM(FM_DACCRWRH_DACCRWDH, FB_DACCRWRH_DACCRWDH) + + +/******************************* + * R_DACCRRDL (0x3D) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRRDL 0 + +/* Field Masks */ +#define FM_DACCRRDL 0XFF + +/* Register Masks */ +#define RM_DACCRRDL RM(FM_DACCRRDL, FB_DACCRRDL) + +/******************************* + * R_DACCRRDM (0x3E) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRRDM 0 + +/* Field Masks */ +#define FM_DACCRRDM 0XFF + +/* Register Masks */ +#define RM_DACCRRDM RM(FM_DACCRRDM, FB_DACCRRDM) + +/******************************* + * R_DACCRRDH (0x3F) * + *******************************/ + +/* Field Offsets */ +#define FB_DACCRRDH 0 + +/* Field Masks */ +#define FM_DACCRRDH 0XFF + +/* Register Masks */ +#define RM_DACCRRDH RM(FM_DACCRRDH, FB_DACCRRDH) + +/******************************** + * R_DACCRADDR (0x40) * + ********************************/ + +/* Field Offsets */ +#define FB_DACCRADDR_DACCRADD 0 + +/* Field Masks */ +#define FM_DACCRADDR_DACCRADD 0XFF + +/* Register Masks */ +#define RM_DACCRADDR_DACCRADD \ + RM(FM_DACCRADDR_DACCRADD, FB_DACCRADDR_DACCRADD) + + +/****************************** + * R_DCOFSEL (0x41) * + ******************************/ + +/* Field Offsets */ +#define FB_DCOFSEL_DC_COEF_SEL 0 + +/* Field Masks */ +#define FM_DCOFSEL_DC_COEF_SEL 0X7 + +/* Field Values */ +#define FV_DCOFSEL_DC_COEF_SEL_2_N8 0x0 +#define FV_DCOFSEL_DC_COEF_SEL_2_N9 0x1 +#define FV_DCOFSEL_DC_COEF_SEL_2_N10 0x2 +#define FV_DCOFSEL_DC_COEF_SEL_2_N11 0x3 +#define FV_DCOFSEL_DC_COEF_SEL_2_N12 0x4 +#define FV_DCOFSEL_DC_COEF_SEL_2_N13 0x5 +#define FV_DCOFSEL_DC_COEF_SEL_2_N14 0x6 +#define FV_DCOFSEL_DC_COEF_SEL_2_N15 0x7 + +/* Register Masks */ +#define RM_DCOFSEL_DC_COEF_SEL \ + RM(FM_DCOFSEL_DC_COEF_SEL, FB_DCOFSEL_DC_COEF_SEL) + + +/* Register Values */ +#define RV_DCOFSEL_DC_COEF_SEL_2_N8 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N8, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N9 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N9, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N10 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N10, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N11 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N11, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N12 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N12, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N13 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N13, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N14 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N14, FB_DCOFSEL_DC_COEF_SEL) + +#define RV_DCOFSEL_DC_COEF_SEL_2_N15 \ + RV(FV_DCOFSEL_DC_COEF_SEL_2_N15, FB_DCOFSEL_DC_COEF_SEL) + + +/****************************** + * R_PLLCTL9 (0x4E) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTL9_REFDIV_PLL1 0 + +/* Field Masks */ +#define FM_PLLCTL9_REFDIV_PLL1 0XFF + +/* Register Masks */ +#define RM_PLLCTL9_REFDIV_PLL1 \ + RM(FM_PLLCTL9_REFDIV_PLL1, FB_PLLCTL9_REFDIV_PLL1) + + +/****************************** + * R_PLLCTLA (0x4F) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLA_OUTDIV_PLL1 0 + +/* Field Masks */ +#define FM_PLLCTLA_OUTDIV_PLL1 0XFF + +/* Register Masks */ +#define RM_PLLCTLA_OUTDIV_PLL1 \ + RM(FM_PLLCTLA_OUTDIV_PLL1, FB_PLLCTLA_OUTDIV_PLL1) + + +/****************************** + * R_PLLCTLB (0x50) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLB_FBDIV_PLL1L 0 + +/* Field Masks */ +#define FM_PLLCTLB_FBDIV_PLL1L 0XFF + +/* Register Masks */ +#define RM_PLLCTLB_FBDIV_PLL1L \ + RM(FM_PLLCTLB_FBDIV_PLL1L, FB_PLLCTLB_FBDIV_PLL1L) + + +/****************************** + * R_PLLCTLC (0x51) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLC_FBDIV_PLL1H 0 + +/* Field Masks */ +#define FM_PLLCTLC_FBDIV_PLL1H 0X7 + +/* Register Masks */ +#define RM_PLLCTLC_FBDIV_PLL1H \ + RM(FM_PLLCTLC_FBDIV_PLL1H, FB_PLLCTLC_FBDIV_PLL1H) + + +/****************************** + * R_PLLCTLD (0x52) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLD_RZ_PLL1 3 +#define FB_PLLCTLD_CP_PLL1 0 + +/* Field Masks */ +#define FM_PLLCTLD_RZ_PLL1 0X7 +#define FM_PLLCTLD_CP_PLL1 0X7 + +/* Register Masks */ +#define RM_PLLCTLD_RZ_PLL1 \ + RM(FM_PLLCTLD_RZ_PLL1, FB_PLLCTLD_RZ_PLL1) + +#define RM_PLLCTLD_CP_PLL1 \ + RM(FM_PLLCTLD_CP_PLL1, FB_PLLCTLD_CP_PLL1) + + +/****************************** + * R_PLLCTLE (0x53) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLE_REFDIV_PLL2 0 + +/* Field Masks */ +#define FM_PLLCTLE_REFDIV_PLL2 0XFF + +/* Register Masks */ +#define RM_PLLCTLE_REFDIV_PLL2 \ + RM(FM_PLLCTLE_REFDIV_PLL2, FB_PLLCTLE_REFDIV_PLL2) + + +/****************************** + * R_PLLCTLF (0x54) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTLF_OUTDIV_PLL2 0 + +/* Field Masks */ +#define FM_PLLCTLF_OUTDIV_PLL2 0XFF + +/* Register Masks */ +#define RM_PLLCTLF_OUTDIV_PLL2 \ + RM(FM_PLLCTLF_OUTDIV_PLL2, FB_PLLCTLF_OUTDIV_PLL2) + + +/******************************* + * R_PLLCTL10 (0x55) * + *******************************/ + +/* Field Offsets */ +#define FB_PLLCTL10_FBDIV_PLL2L 0 + +/* Field Masks */ +#define FM_PLLCTL10_FBDIV_PLL2L 0XFF + +/* Register Masks */ +#define RM_PLLCTL10_FBDIV_PLL2L \ + RM(FM_PLLCTL10_FBDIV_PLL2L, FB_PLLCTL10_FBDIV_PLL2L) + + +/******************************* + * R_PLLCTL11 (0x56) * + *******************************/ + +/* Field Offsets */ +#define FB_PLLCTL11_FBDIV_PLL2H 0 + +/* Field Masks */ +#define FM_PLLCTL11_FBDIV_PLL2H 0X7 + +/* Register Masks */ +#define RM_PLLCTL11_FBDIV_PLL2H \ + RM(FM_PLLCTL11_FBDIV_PLL2H, FB_PLLCTL11_FBDIV_PLL2H) + + +/******************************* + * R_PLLCTL12 (0x57) * + *******************************/ + +/* Field Offsets */ +#define FB_PLLCTL12_RZ_PLL2 3 +#define FB_PLLCTL12_CP_PLL2 0 + +/* Field Masks */ +#define FM_PLLCTL12_RZ_PLL2 0X7 +#define FM_PLLCTL12_CP_PLL2 0X7 + +/* Register Masks */ +#define RM_PLLCTL12_RZ_PLL2 \ + RM(FM_PLLCTL12_RZ_PLL2, FB_PLLCTL12_RZ_PLL2) + +#define RM_PLLCTL12_CP_PLL2 \ + RM(FM_PLLCTL12_CP_PLL2, FB_PLLCTL12_CP_PLL2) + + +/******************************* + * R_PLLCTL1B (0x60) * + *******************************/ + +/* Field Offsets */ +#define FB_PLLCTL1B_VCOI_PLL2 4 +#define FB_PLLCTL1B_VCOI_PLL1 2 + +/* Field Masks */ +#define FM_PLLCTL1B_VCOI_PLL2 0X3 +#define FM_PLLCTL1B_VCOI_PLL1 0X3 + +/* Register Masks */ +#define RM_PLLCTL1B_VCOI_PLL2 \ + RM(FM_PLLCTL1B_VCOI_PLL2, FB_PLLCTL1B_VCOI_PLL2) + +#define RM_PLLCTL1B_VCOI_PLL1 \ + RM(FM_PLLCTL1B_VCOI_PLL1, FB_PLLCTL1B_VCOI_PLL1) + + +/******************************* + * R_PLLCTL1C (0x61) * + *******************************/ + +/* Field Offsets */ +#define FB_PLLCTL1C_PDB_PLL2 2 +#define FB_PLLCTL1C_PDB_PLL1 1 + +/* Field Masks */ +#define FM_PLLCTL1C_PDB_PLL2 0X1 +#define FM_PLLCTL1C_PDB_PLL1 0X1 + +/* Field Values */ +#define FV_PLLCTL1C_PDB_PLL2_ENABLE 0x1 +#define FV_PLLCTL1C_PDB_PLL2_DISABLE 0x0 +#define FV_PLLCTL1C_PDB_PLL1_ENABLE 0x1 +#define FV_PLLCTL1C_PDB_PLL1_DISABLE 0x0 + +/* Register Masks */ +#define RM_PLLCTL1C_PDB_PLL2 \ + RM(FM_PLLCTL1C_PDB_PLL2, FB_PLLCTL1C_PDB_PLL2) + +#define RM_PLLCTL1C_PDB_PLL1 \ + RM(FM_PLLCTL1C_PDB_PLL1, FB_PLLCTL1C_PDB_PLL1) + + +/* Register Values */ +#define RV_PLLCTL1C_PDB_PLL2_ENABLE \ + RV(FV_PLLCTL1C_PDB_PLL2_ENABLE, FB_PLLCTL1C_PDB_PLL2) + +#define RV_PLLCTL1C_PDB_PLL2_DISABLE \ + RV(FV_PLLCTL1C_PDB_PLL2_DISABLE, FB_PLLCTL1C_PDB_PLL2) + +#define RV_PLLCTL1C_PDB_PLL1_ENABLE \ + RV(FV_PLLCTL1C_PDB_PLL1_ENABLE, FB_PLLCTL1C_PDB_PLL1) + +#define RV_PLLCTL1C_PDB_PLL1_DISABLE \ + RV(FV_PLLCTL1C_PDB_PLL1_DISABLE, FB_PLLCTL1C_PDB_PLL1) + + +/******************************* + * R_TIMEBASE (0x77) * + *******************************/ + +/* Field Offsets */ +#define FB_TIMEBASE_DIVIDER 0 + +/* Field Masks */ +#define FM_TIMEBASE_DIVIDER 0XFF + +/* Register Masks */ +#define RM_TIMEBASE_DIVIDER \ + RM(FM_TIMEBASE_DIVIDER, FB_TIMEBASE_DIVIDER) + + +/***************************** + * R_DEVIDL (0x7D) * + *****************************/ + +/* Field Offsets */ +#define FB_DEVIDL_DIDL 0 + +/* Field Masks */ +#define FM_DEVIDL_DIDL 0XFF + +/* Register Masks */ +#define RM_DEVIDL_DIDL RM(FM_DEVIDL_DIDL, FB_DEVIDL_DIDL) + +/***************************** + * R_DEVIDH (0x7E) * + *****************************/ + +/* Field Offsets */ +#define FB_DEVIDH_DIDH 0 + +/* Field Masks */ +#define FM_DEVIDH_DIDH 0XFF + +/* Register Masks */ +#define RM_DEVIDH_DIDH RM(FM_DEVIDH_DIDH, FB_DEVIDH_DIDH) + +/**************************** + * R_RESET (0x80) * + ****************************/ + +/* Field Offsets */ +#define FB_RESET 0 + +/* Field Masks */ +#define FM_RESET 0XFF + +/* Field Values */ +#define FV_RESET_ENABLE 0x85 + +/* Register Masks */ +#define RM_RESET RM(FM_RESET, FB_RESET) + +/* Register Values */ +#define RV_RESET_ENABLE RV(FV_RESET_ENABLE, FB_RESET) + +/******************************** + * R_DACCRSTAT (0x8A) * + ********************************/ + +/* Field Offsets */ +#define FB_DACCRSTAT_DACCR_BUSY 7 + +/* Field Masks */ +#define FM_DACCRSTAT_DACCR_BUSY 0X1 + +/* Register Masks */ +#define RM_DACCRSTAT_DACCR_BUSY \ + RM(FM_DACCRSTAT_DACCR_BUSY, FB_DACCRSTAT_DACCR_BUSY) + + +/****************************** + * R_PLLCTL0 (0x8E) * + ******************************/ + +/* Field Offsets */ +#define FB_PLLCTL0_PLL2_LOCK 1 +#define FB_PLLCTL0_PLL1_LOCK 0 + +/* Field Masks */ +#define FM_PLLCTL0_PLL2_LOCK 0X1 +#define FM_PLLCTL0_PLL1_LOCK 0X1 + +/* Register Masks */ +#define RM_PLLCTL0_PLL2_LOCK \ + RM(FM_PLLCTL0_PLL2_LOCK, FB_PLLCTL0_PLL2_LOCK) + +#define RM_PLLCTL0_PLL1_LOCK \ + RM(FM_PLLCTL0_PLL1_LOCK, FB_PLLCTL0_PLL1_LOCK) + + +/******************************** + * R_PLLREFSEL (0x8F) * + ********************************/ + +/* Field Offsets */ +#define FB_PLLREFSEL_PLL2_REF_SEL 4 +#define FB_PLLREFSEL_PLL1_REF_SEL 0 + +/* Field Masks */ +#define FM_PLLREFSEL_PLL2_REF_SEL 0X7 +#define FM_PLLREFSEL_PLL1_REF_SEL 0X7 + +/* Field Values */ +#define FV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1 0x0 +#define FV_PLLREFSEL_PLL2_REF_SEL_MCLK2 0x1 +#define FV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1 0x0 +#define FV_PLLREFSEL_PLL1_REF_SEL_MCLK2 0x1 + +/* Register Masks */ +#define RM_PLLREFSEL_PLL2_REF_SEL \ + RM(FM_PLLREFSEL_PLL2_REF_SEL, FB_PLLREFSEL_PLL2_REF_SEL) + +#define RM_PLLREFSEL_PLL1_REF_SEL \ + RM(FM_PLLREFSEL_PLL1_REF_SEL, FB_PLLREFSEL_PLL1_REF_SEL) + + +/* Register Values */ +#define RV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1 \ + RV(FV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1, FB_PLLREFSEL_PLL2_REF_SEL) + +#define RV_PLLREFSEL_PLL2_REF_SEL_MCLK2 \ + RV(FV_PLLREFSEL_PLL2_REF_SEL_MCLK2, FB_PLLREFSEL_PLL2_REF_SEL) + +#define RV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1 \ + RV(FV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1, FB_PLLREFSEL_PLL1_REF_SEL) + +#define RV_PLLREFSEL_PLL1_REF_SEL_MCLK2 \ + RV(FV_PLLREFSEL_PLL1_REF_SEL_MCLK2, FB_PLLREFSEL_PLL1_REF_SEL) + + +/******************************* + * R_DACMBCEN (0xC7) * + *******************************/ + +/* Field Offsets */ +#define FB_DACMBCEN_MBCEN3 2 +#define FB_DACMBCEN_MBCEN2 1 +#define FB_DACMBCEN_MBCEN1 0 + +/* Field Masks */ +#define FM_DACMBCEN_MBCEN3 0X1 +#define FM_DACMBCEN_MBCEN2 0X1 +#define FM_DACMBCEN_MBCEN1 0X1 + +/* Register Masks */ +#define RM_DACMBCEN_MBCEN3 \ + RM(FM_DACMBCEN_MBCEN3, FB_DACMBCEN_MBCEN3) + +#define RM_DACMBCEN_MBCEN2 \ + RM(FM_DACMBCEN_MBCEN2, FB_DACMBCEN_MBCEN2) + +#define RM_DACMBCEN_MBCEN1 \ + RM(FM_DACMBCEN_MBCEN1, FB_DACMBCEN_MBCEN1) + + +/******************************** + * R_DACMBCCTL (0xC8) * + ********************************/ + +/* Field Offsets */ +#define FB_DACMBCCTL_LVLMODE3 5 +#define FB_DACMBCCTL_WINSEL3 4 +#define FB_DACMBCCTL_LVLMODE2 3 +#define FB_DACMBCCTL_WINSEL2 2 +#define FB_DACMBCCTL_LVLMODE1 1 +#define FB_DACMBCCTL_WINSEL1 0 + +/* Field Masks */ +#define FM_DACMBCCTL_LVLMODE3 0X1 +#define FM_DACMBCCTL_WINSEL3 0X1 +#define FM_DACMBCCTL_LVLMODE2 0X1 +#define FM_DACMBCCTL_WINSEL2 0X1 +#define FM_DACMBCCTL_LVLMODE1 0X1 +#define FM_DACMBCCTL_WINSEL1 0X1 + +/* Register Masks */ +#define RM_DACMBCCTL_LVLMODE3 \ + RM(FM_DACMBCCTL_LVLMODE3, FB_DACMBCCTL_LVLMODE3) + +#define RM_DACMBCCTL_WINSEL3 \ + RM(FM_DACMBCCTL_WINSEL3, FB_DACMBCCTL_WINSEL3) + +#define RM_DACMBCCTL_LVLMODE2 \ + RM(FM_DACMBCCTL_LVLMODE2, FB_DACMBCCTL_LVLMODE2) + +#define RM_DACMBCCTL_WINSEL2 \ + RM(FM_DACMBCCTL_WINSEL2, FB_DACMBCCTL_WINSEL2) + +#define RM_DACMBCCTL_LVLMODE1 \ + RM(FM_DACMBCCTL_LVLMODE1, FB_DACMBCCTL_LVLMODE1) + +#define RM_DACMBCCTL_WINSEL1 \ + RM(FM_DACMBCCTL_WINSEL1, FB_DACMBCCTL_WINSEL1) + + +/********************************* + * R_DACMBCMUG1 (0xC9) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCMUG1_PHASE 5 +#define FB_DACMBCMUG1_MUGAIN 0 + +/* Field Masks */ +#define FM_DACMBCMUG1_PHASE 0X1 +#define FM_DACMBCMUG1_MUGAIN 0X1F + +/* Register Masks */ +#define RM_DACMBCMUG1_PHASE \ + RM(FM_DACMBCMUG1_PHASE, FB_DACMBCMUG1_PHASE) + +#define RM_DACMBCMUG1_MUGAIN \ + RM(FM_DACMBCMUG1_MUGAIN, FB_DACMBCMUG1_MUGAIN) + + +/********************************* + * R_DACMBCTHR1 (0xCA) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCTHR1_THRESH 0 + +/* Field Masks */ +#define FM_DACMBCTHR1_THRESH 0XFF + +/* Register Masks */ +#define RM_DACMBCTHR1_THRESH \ + RM(FM_DACMBCTHR1_THRESH, FB_DACMBCTHR1_THRESH) + + +/********************************* + * R_DACMBCRAT1 (0xCB) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCRAT1_RATIO 0 + +/* Field Masks */ +#define FM_DACMBCRAT1_RATIO 0X1F + +/* Register Masks */ +#define RM_DACMBCRAT1_RATIO \ + RM(FM_DACMBCRAT1_RATIO, FB_DACMBCRAT1_RATIO) + + +/********************************** + * R_DACMBCATK1L (0xCC) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK1L_TCATKL 0 + +/* Field Masks */ +#define FM_DACMBCATK1L_TCATKL 0XFF + +/* Register Masks */ +#define RM_DACMBCATK1L_TCATKL \ + RM(FM_DACMBCATK1L_TCATKL, FB_DACMBCATK1L_TCATKL) + + +/********************************** + * R_DACMBCATK1H (0xCD) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK1H_TCATKH 0 + +/* Field Masks */ +#define FM_DACMBCATK1H_TCATKH 0XFF + +/* Register Masks */ +#define RM_DACMBCATK1H_TCATKH \ + RM(FM_DACMBCATK1H_TCATKH, FB_DACMBCATK1H_TCATKH) + + +/********************************** + * R_DACMBCREL1L (0xCE) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL1L_TCRELL 0 + +/* Field Masks */ +#define FM_DACMBCREL1L_TCRELL 0XFF + +/* Register Masks */ +#define RM_DACMBCREL1L_TCRELL \ + RM(FM_DACMBCREL1L_TCRELL, FB_DACMBCREL1L_TCRELL) + + +/********************************** + * R_DACMBCREL1H (0xCF) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL1H_TCRELH 0 + +/* Field Masks */ +#define FM_DACMBCREL1H_TCRELH 0XFF + +/* Register Masks */ +#define RM_DACMBCREL1H_TCRELH \ + RM(FM_DACMBCREL1H_TCRELH, FB_DACMBCREL1H_TCRELH) + + +/********************************* + * R_DACMBCMUG2 (0xD0) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCMUG2_PHASE 5 +#define FB_DACMBCMUG2_MUGAIN 0 + +/* Field Masks */ +#define FM_DACMBCMUG2_PHASE 0X1 +#define FM_DACMBCMUG2_MUGAIN 0X1F + +/* Register Masks */ +#define RM_DACMBCMUG2_PHASE \ + RM(FM_DACMBCMUG2_PHASE, FB_DACMBCMUG2_PHASE) + +#define RM_DACMBCMUG2_MUGAIN \ + RM(FM_DACMBCMUG2_MUGAIN, FB_DACMBCMUG2_MUGAIN) + + +/********************************* + * R_DACMBCTHR2 (0xD1) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCTHR2_THRESH 0 + +/* Field Masks */ +#define FM_DACMBCTHR2_THRESH 0XFF + +/* Register Masks */ +#define RM_DACMBCTHR2_THRESH \ + RM(FM_DACMBCTHR2_THRESH, FB_DACMBCTHR2_THRESH) + + +/********************************* + * R_DACMBCRAT2 (0xD2) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCRAT2_RATIO 0 + +/* Field Masks */ +#define FM_DACMBCRAT2_RATIO 0X1F + +/* Register Masks */ +#define RM_DACMBCRAT2_RATIO \ + RM(FM_DACMBCRAT2_RATIO, FB_DACMBCRAT2_RATIO) + + +/********************************** + * R_DACMBCATK2L (0xD3) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK2L_TCATKL 0 + +/* Field Masks */ +#define FM_DACMBCATK2L_TCATKL 0XFF + +/* Register Masks */ +#define RM_DACMBCATK2L_TCATKL \ + RM(FM_DACMBCATK2L_TCATKL, FB_DACMBCATK2L_TCATKL) + + +/********************************** + * R_DACMBCATK2H (0xD4) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK2H_TCATKH 0 + +/* Field Masks */ +#define FM_DACMBCATK2H_TCATKH 0XFF + +/* Register Masks */ +#define RM_DACMBCATK2H_TCATKH \ + RM(FM_DACMBCATK2H_TCATKH, FB_DACMBCATK2H_TCATKH) + + +/********************************** + * R_DACMBCREL2L (0xD5) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL2L_TCRELL 0 + +/* Field Masks */ +#define FM_DACMBCREL2L_TCRELL 0XFF + +/* Register Masks */ +#define RM_DACMBCREL2L_TCRELL \ + RM(FM_DACMBCREL2L_TCRELL, FB_DACMBCREL2L_TCRELL) + + +/********************************** + * R_DACMBCREL2H (0xD6) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL2H_TCRELH 0 + +/* Field Masks */ +#define FM_DACMBCREL2H_TCRELH 0XFF + +/* Register Masks */ +#define RM_DACMBCREL2H_TCRELH \ + RM(FM_DACMBCREL2H_TCRELH, FB_DACMBCREL2H_TCRELH) + + +/********************************* + * R_DACMBCMUG3 (0xD7) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCMUG3_PHASE 5 +#define FB_DACMBCMUG3_MUGAIN 0 + +/* Field Masks */ +#define FM_DACMBCMUG3_PHASE 0X1 +#define FM_DACMBCMUG3_MUGAIN 0X1F + +/* Register Masks */ +#define RM_DACMBCMUG3_PHASE \ + RM(FM_DACMBCMUG3_PHASE, FB_DACMBCMUG3_PHASE) + +#define RM_DACMBCMUG3_MUGAIN \ + RM(FM_DACMBCMUG3_MUGAIN, FB_DACMBCMUG3_MUGAIN) + + +/********************************* + * R_DACMBCTHR3 (0xD8) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCTHR3_THRESH 0 + +/* Field Masks */ +#define FM_DACMBCTHR3_THRESH 0XFF + +/* Register Masks */ +#define RM_DACMBCTHR3_THRESH \ + RM(FM_DACMBCTHR3_THRESH, FB_DACMBCTHR3_THRESH) + + +/********************************* + * R_DACMBCRAT3 (0xD9) * + *********************************/ + +/* Field Offsets */ +#define FB_DACMBCRAT3_RATIO 0 + +/* Field Masks */ +#define FM_DACMBCRAT3_RATIO 0X1F + +/* Register Masks */ +#define RM_DACMBCRAT3_RATIO \ + RM(FM_DACMBCRAT3_RATIO, FB_DACMBCRAT3_RATIO) + + +/********************************** + * R_DACMBCATK3L (0xDA) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK3L_TCATKL 0 + +/* Field Masks */ +#define FM_DACMBCATK3L_TCATKL 0XFF + +/* Register Masks */ +#define RM_DACMBCATK3L_TCATKL \ + RM(FM_DACMBCATK3L_TCATKL, FB_DACMBCATK3L_TCATKL) + + +/********************************** + * R_DACMBCATK3H (0xDB) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCATK3H_TCATKH 0 + +/* Field Masks */ +#define FM_DACMBCATK3H_TCATKH 0XFF + +/* Register Masks */ +#define RM_DACMBCATK3H_TCATKH \ + RM(FM_DACMBCATK3H_TCATKH, FB_DACMBCATK3H_TCATKH) + + +/********************************** + * R_DACMBCREL3L (0xDC) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL3L_TCRELL 0 + +/* Field Masks */ +#define FM_DACMBCREL3L_TCRELL 0XFF + +/* Register Masks */ +#define RM_DACMBCREL3L_TCRELL \ + RM(FM_DACMBCREL3L_TCRELL, FB_DACMBCREL3L_TCRELL) + + +/********************************** + * R_DACMBCREL3H (0xDD) * + **********************************/ + +/* Field Offsets */ +#define FB_DACMBCREL3H_TCRELH 0 + +/* Field Masks */ +#define FM_DACMBCREL3H_TCRELH 0XFF + +/* Register Masks */ +#define RM_DACMBCREL3H_TCRELH \ + RM(FM_DACMBCREL3H_TCRELH, FB_DACMBCREL3H_TCRELH) + + +#endif /* __WOOKIE_H__ */ -- cgit v1.2.3 From a9889ed62d06ec76f41492ebdc6cc6538e761e3e Mon Sep 17 00:00:00 2001 From: Radu Pirea Date: Tue, 19 Dec 2017 17:17:59 +0200 Subject: spi: atmel: Implements transfers with bounce buffer This patch enables SPI DMA transfers for Atmel SAM9 SoCs and implements a bounce buffer for transfers which have vmalloc allocated buffers. Those buffers are not cache coherent even if they have been transformed into sg lists. UBIFS is affected by this cache coherency issue. In this patch I also reverted "spi: atmel: fix corrupted data issue on SAM9 family SoCs"(7094576ccdc3acfe1e06a1e2ab547add375baf7f). Signed-off-by: Radu Pirea Acked-by: Nicolas Ferre Signed-off-by: Mark Brown --- drivers/spi/spi-atmel.c | 113 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 84 insertions(+), 29 deletions(-) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 669470971023..4a11fc0d4136 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -291,6 +291,10 @@ struct atmel_spi { struct spi_transfer *current_transfer; int current_remaining_bytes; int done_status; + dma_addr_t dma_addr_rx_bbuf; + dma_addr_t dma_addr_tx_bbuf; + void *addr_rx_bbuf; + void *addr_tx_bbuf; struct completion xfer_completion; @@ -436,6 +440,11 @@ static void atmel_spi_unlock(struct atmel_spi *as) __releases(&as->lock) spin_unlock_irqrestore(&as->lock, as->flags); } +static inline bool atmel_spi_is_vmalloc_xfer(struct spi_transfer *xfer) +{ + return is_vmalloc_addr(xfer->tx_buf) || is_vmalloc_addr(xfer->rx_buf); +} + static inline bool atmel_spi_use_dma(struct atmel_spi *as, struct spi_transfer *xfer) { @@ -448,7 +457,12 @@ static bool atmel_spi_can_dma(struct spi_master *master, { struct atmel_spi *as = spi_master_get_devdata(master); - return atmel_spi_use_dma(as, xfer); + if (IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) + return atmel_spi_use_dma(as, xfer) && + !atmel_spi_is_vmalloc_xfer(xfer); + else + return atmel_spi_use_dma(as, xfer); + } static int atmel_spi_dma_slave_config(struct atmel_spi *as, @@ -594,6 +608,11 @@ static void dma_callback(void *data) struct spi_master *master = data; struct atmel_spi *as = spi_master_get_devdata(master); + if (is_vmalloc_addr(as->current_transfer->rx_buf) && + IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) { + memcpy(as->current_transfer->rx_buf, as->addr_rx_bbuf, + as->current_transfer->len); + } complete(&as->xfer_completion); } @@ -744,17 +763,41 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master, goto err_exit; /* Send both scatterlists */ - rxdesc = dmaengine_prep_slave_sg(rxchan, - xfer->rx_sg.sgl, xfer->rx_sg.nents, - DMA_FROM_DEVICE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (atmel_spi_is_vmalloc_xfer(xfer) && + IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) { + rxdesc = dmaengine_prep_slave_single(rxchan, + as->dma_addr_rx_bbuf, + xfer->len, + DMA_FROM_DEVICE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + } else { + rxdesc = dmaengine_prep_slave_sg(rxchan, + xfer->rx_sg.sgl, + xfer->rx_sg.nents, + DMA_FROM_DEVICE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + } if (!rxdesc) goto err_dma; - txdesc = dmaengine_prep_slave_sg(txchan, - xfer->tx_sg.sgl, xfer->tx_sg.nents, - DMA_TO_DEVICE, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (atmel_spi_is_vmalloc_xfer(xfer) && + IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) { + memcpy(as->addr_tx_bbuf, xfer->tx_buf, xfer->len); + txdesc = dmaengine_prep_slave_single(txchan, + as->dma_addr_tx_bbuf, + xfer->len, DMA_TO_DEVICE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + } else { + txdesc = dmaengine_prep_slave_sg(txchan, + xfer->tx_sg.sgl, + xfer->tx_sg.nents, + DMA_TO_DEVICE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + } if (!txdesc) goto err_dma; @@ -1426,27 +1469,7 @@ static void atmel_get_caps(struct atmel_spi *as) as->caps.is_spi2 = version > 0x121; as->caps.has_wdrbt = version >= 0x210; -#ifdef CONFIG_SOC_SAM_V4_V5 - /* - * Atmel SoCs based on ARM9 (SAM9x) cores should not use spi_map_buf() - * since this later function tries to map buffers with dma_map_sg() - * even if they have not been allocated inside DMA-safe areas. - * On SoCs based on Cortex A5 (SAMA5Dx), it works anyway because for - * those ARM cores, the data cache follows the PIPT model. - * Also the L2 cache controller of SAMA5D2 uses the PIPT model too. - * In case of PIPT caches, there cannot be cache aliases. - * However on ARM9 cores, the data cache follows the VIVT model, hence - * the cache aliases issue can occur when buffers are allocated from - * DMA-unsafe areas, by vmalloc() for instance, where cache coherency is - * not taken into account or at least not handled completely (cache - * lines of aliases are not invalidated). - * This is not a theorical issue: it was reproduced when trying to mount - * a UBI file-system on a at91sam9g35ek board. - */ - as->caps.has_dma_support = false; -#else as->caps.has_dma_support = version >= 0x212; -#endif as->caps.has_pdc_support = version < 0x212; } @@ -1592,6 +1615,30 @@ static int atmel_spi_probe(struct platform_device *pdev) as->use_pdc = true; } + if (IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) { + as->addr_rx_bbuf = dma_alloc_coherent(&pdev->dev, + SPI_MAX_DMA_XFER, + &as->dma_addr_rx_bbuf, + GFP_KERNEL | GFP_DMA); + if (!as->addr_rx_bbuf) { + as->use_dma = false; + } else { + as->addr_tx_bbuf = dma_alloc_coherent(&pdev->dev, + SPI_MAX_DMA_XFER, + &as->dma_addr_tx_bbuf, + GFP_KERNEL | GFP_DMA); + if (!as->addr_tx_bbuf) { + as->use_dma = false; + dma_free_coherent(&pdev->dev, SPI_MAX_DMA_XFER, + as->addr_rx_bbuf, + as->dma_addr_rx_bbuf); + } + } + if (!as->use_dma) + dev_info(master->dev.parent, + " can not allocate dma coherent memory\n"); + } + if (as->caps.has_dma_support && !as->use_dma) dev_info(&pdev->dev, "Atmel SPI Controller using PIO only\n"); @@ -1664,6 +1711,14 @@ static int atmel_spi_remove(struct platform_device *pdev) if (as->use_dma) { atmel_spi_stop_dma(master); atmel_spi_release_dma(master); + if (IS_ENABLED(CONFIG_SOC_SAM_V4_V5)) { + dma_free_coherent(&pdev->dev, SPI_MAX_DMA_XFER, + as->addr_tx_bbuf, + as->dma_addr_tx_bbuf); + dma_free_coherent(&pdev->dev, SPI_MAX_DMA_XFER, + as->addr_rx_bbuf, + as->dma_addr_rx_bbuf); + } } spin_lock_irq(&as->lock); -- cgit v1.2.3 From b9e705ef7cfaf22db0daab91ad3cd33b0fa32eb9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 4 Jan 2018 14:37:05 +0000 Subject: x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm Where an ALTERNATIVE is used in the middle of an inline asm block, this would otherwise lead to the following instruction being appended directly to the trailing ".popsection", and a failed compile. Fixes: 9cebed423c84 ("x86, alternative: Use .pushsection/.popsection") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: ak@linux.intel.com Cc: Tim Chen Cc: Peter Zijlstra Cc: Paul Turner Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180104143710.8961-8-dwmw@amazon.co.uk --- arch/x86/include/asm/alternative.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index dbfd0854651f..cf5961ca8677 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. -- cgit v1.2.3 From 3cfd68b5ba8737d28bfcf9b6487ea4d9216b8504 Mon Sep 17 00:00:00 2001 From: gaurav jindal Date: Fri, 5 Jan 2018 14:01:30 +0100 Subject: cpuidle: Avoid NULL argument in cpuidle_switch_governor() Checks if the new governor is NULL before updating the cupidle_curr_governor. Signed-off-by: gaurav jindal [ rjw : Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governor.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 4e78263e34a4..5d359aff3cc5 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -36,14 +36,15 @@ static struct cpuidle_governor * __cpuidle_find_governor(const char *str) /** * cpuidle_switch_governor - changes the governor * @gov: the new target governor - * - * NOTE: "gov" can be NULL to specify disabled * Must be called with cpuidle_lock acquired. */ int cpuidle_switch_governor(struct cpuidle_governor *gov) { struct cpuidle_device *dev; + if (!gov) + return -EINVAL; + if (gov == cpuidle_curr_governor) return 0; -- cgit v1.2.3 From a89bca278220a4ea57ea5e57a037262f258c7d72 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Jan 2018 16:08:36 +0200 Subject: ACPI / x86: boot: Propagate error code in acpi_gsi_to_irq() acpi_get_override_irq() followed by acpi_register_gsi() returns negative error code on failure. Propagate it from acpi_gsi_to_irq() to callers. Signed-off-by: Andy Shevchenko [ rjw : Subject/changelog ] Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4bf004bab4b2..ec3a286163c3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -619,17 +619,17 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) } rc = acpi_get_override_irq(gsi, &trigger, &polarity); - if (rc == 0) { - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - irq = acpi_register_gsi(NULL, gsi, trigger, polarity); - if (irq >= 0) { - *irqp = irq; - return 0; - } - } + if (rc) + return rc; - return -1; + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); + if (irq < 0) + return irq; + + *irqp = irq; + return 0; } EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); -- cgit v1.2.3 From bdbc98abb3aa323f6323b11db39c740e6f8fc5b1 Mon Sep 17 00:00:00 2001 From: Rainer Fiebig Date: Fri, 22 Dec 2017 11:13:59 +0100 Subject: PM: hibernate: Do not subtract NR_FILE_MAPPED in minimum_image_size() s2disk/s2both may fail unnecessarily and erratically if NR_FILE_MAPPED is high - for instance when using VMs with VirtualBox and perhaps VMware Player. In those situations s2disk becomes unreliable and therefore unusable. A typical scenario is: user issues a s2disk and it fails. User issues a second s2disk immediately after that and it succeeds. And user wonders why. The problem is caused by minimum_image_size() in snapshot.c. The value it returns is roughly 100% too high because NR_FILE_MAPPED is subtracted in its calculation. Eventually the number of preallocated image pages is falsely too low. This doesn't matter as long as NR_FILE_MAPPED-values are in a normal range or in 32bit-environments as the code allows for allocation of additional pages from highmem. But with the high values generated by VirtualBox-VMs (a 2-GB-VM causes NR_FILE_MAPPED go up by 2 GB) it may lead to failure in 64bit-systems. Not subtracting NR_FILE_MAPPED in minimum_image_size() solves the problem. I've done at least hundreds of successful s2both/s2disk now on an x86_64 system (with and without VirtualBox) which gives me some confidence that this is right. It has turned s2disk/s2both from unusable into 100% reliable. Link: https://bugzilla.kernel.org/show_bug.cgi?id=97201 Signed-off-by: Rainer Fiebig Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index bce0464524d8..3d37c279c090 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1645,8 +1645,7 @@ static unsigned long free_unnecessary_pages(void) * [number of saveable pages] - [number of pages that can be freed in theory] * * where the second term is the sum of (1) reclaimable slab pages, (2) active - * and (3) inactive anonymous pages, (4) active and (5) inactive file pages, - * minus mapped file pages. + * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. */ static unsigned long minimum_image_size(unsigned long saveable) { @@ -1656,8 +1655,7 @@ static unsigned long minimum_image_size(unsigned long saveable) + global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) - + global_node_page_state(NR_INACTIVE_FILE) - - global_node_page_state(NR_FILE_MAPPED); + + global_node_page_state(NR_INACTIVE_FILE); return saveable <= size ? 0 : saveable - size; } -- cgit v1.2.3 From 7669b122085018c1f64720d11c24ae6d2549193d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 15 Dec 2017 13:46:57 +0100 Subject: ARM: dts: da850-lcdk: Remove leading 0x and 0s from unit address Improve the DTS files by removing all the leading "0x" and zeros to fix the following dtc warnings: Warning (unit_address_format): Node /XXX unit name should not have leading "0x" and Warning (unit_address_format): Node /XXX unit name should not have leading 0s Converted using the following command: find . -type f \( -iname *.dts -o -iname *.dtsi \) -exec sed -i -e "s/@\([0-9a-fA-FxX\.;:#]+\)\s*{/@\L\1 {/g" -e "s/@0x\(.*\) {/@\1 {/g" -e "s/@0+\(.*\) {/@\1 {/g" {} +^C For simplicity, two sed expressions were used to solve each warnings separately. To make the regex expression more robust a few other issues were resolved, namely setting unit-address to lower case, and adding a whitespace before the the opening curly brace: https://elinux.org/Device_Tree_Linux#Linux_conventions This will solve as a side effect warning: Warning (simple_bus_reg): Node /XXX@ simple-bus unit address format error, expected "" This is a follow up to commit 4c9847b7375a ("dt-bindings: Remove leading 0x from bindings notation") Reported-by: David Daney Suggested-by: Rob Herring Signed-off-by: Mathieu Malaterre Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850-lcdk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index eed89e659143..a1f4d6d5a569 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -293,12 +293,12 @@ label = "u-boot env"; reg = <0 0x020000>; }; - partition@0x020000 { + partition@20000 { /* The LCDK defaults to booting from this partition */ label = "u-boot"; reg = <0x020000 0x080000>; }; - partition@0x0a0000 { + partition@a0000 { label = "free space"; reg = <0x0a0000 0>; }; -- cgit v1.2.3 From de791821c295cc61419a06fe5562288417d1bc58 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jan 2018 15:27:34 +0100 Subject: x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN Use the name associated with the particular attack which needs page table isolation for mitigation. Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Alan Cox Cc: Jiri Koshina Cc: Linus Torvalds Cc: Tim Chen Cc: Andi Lutomirski Cc: Andi Kleen Cc: Peter Zijlstra Cc: Paul Turner Cc: Tom Lendacky Cc: Greg KH Cc: Dave Hansen Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801051525300.1724@nanos --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/mm/pti.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 07cdd1715705..21ac898df2d8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -341,6 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b1be494ab4e8..2d3bd2215e5b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -900,7 +900,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_INSECURE); + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); fpu__init_system(c); diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 2da28ba97508..43d4a4a29037 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -56,13 +56,13 @@ static void __init pti_print_if_insecure(const char *reason) { - if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) pr_info("%s\n", reason); } static void __init pti_print_if_secure(const char *reason) { - if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) pr_info("%s\n", reason); } @@ -96,7 +96,7 @@ void __init pti_check_boottime_disable(void) } autosel: - if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) return; enable: setup_force_cpu_cap(X86_FEATURE_PTI); -- cgit v1.2.3 From fb51f1cd06f9ced7b7085a2a4636375d520431ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Jan 2018 15:16:30 +0100 Subject: ALSA: pcm: Workaround for weird PulseAudio behavior on rewind error The commit 9027c4639ef1 ("ALSA: pcm: Call ack() whenever appl_ptr is updated") introduced the possible error code returned from the PCM rewind ioctl. Basically the change was for handling the indirect PCM more correctly, but ironically, it caused rather a side-effect: PulseAudio gets pissed off when receiving an error from rewind, throws everything away and stops processing further, resulting in the silence. It's clearly a failure in the application side, so the best would be to fix that bug in PA. OTOH, PA is mostly the only user of the rewind feature, so it's not good to slap the sole customer. This patch tries to mitigate the situation: instead of returning an error, now the rewind ioctl returns zero when the driver can't rewind. It indicates that no rewind was performed, so the behavior is consistent, at least. Fixes: 9027c4639ef1 ("ALSA: pcm: Call ack() whenever appl_ptr is updated") Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a4d92e46c459..f08772568c17 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -2580,7 +2580,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream, return ret < 0 ? ret : frames; } -/* decrease the appl_ptr; returns the processed frames or a negative error */ +/* decrease the appl_ptr; returns the processed frames or zero for error */ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream, snd_pcm_uframes_t frames, snd_pcm_sframes_t avail) @@ -2597,7 +2597,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream, if (appl_ptr < 0) appl_ptr += runtime->boundary; ret = pcm_lib_apply_appl_ptr(substream, appl_ptr); - return ret < 0 ? ret : frames; + /* NOTE: we return zero for errors because PulseAudio gets depressed + * upon receiving an error from rewind ioctl and stops processing + * any longer. Returning zero means that no rewind is done, so + * it's not absolutely wrong to answer like that. + */ + return ret < 0 ? 0 : frames; } static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream, -- cgit v1.2.3 From 9685347aa0a5c2869058ca6ab79fd8e93084a67f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:09:47 +0100 Subject: ALSA: aloop: Release cable upon open error path The aloop runtime object and its assignment in the cable are left even when opening a substream fails. This doesn't mean any memory leak, but it still keeps the invalid pointer that may be referred by the another side of the cable spontaneously, which is a potential Oops cause. Clean up the cable assignment and the empty cable upon the error path properly. Fixes: 597603d615d2 ("ALSA: introduce the snd-aloop module for the PCM loopback") Cc: Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index afac886ffa28..8b6a39cb7f06 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -658,12 +658,31 @@ static int rule_channels(struct snd_pcm_hw_params *params, return snd_interval_refine(hw_param_interval(params, rule->var), &t); } +static void free_cable(struct snd_pcm_substream *substream) +{ + struct loopback *loopback = substream->private_data; + int dev = get_cable_index(substream); + struct loopback_cable *cable; + + cable = loopback->cables[substream->number][dev]; + if (!cable) + return; + if (cable->streams[!substream->stream]) { + /* other stream is still alive */ + cable->streams[substream->stream] = NULL; + } else { + /* free the cable */ + loopback->cables[substream->number][dev] = NULL; + kfree(cable); + } +} + static int loopback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm; - struct loopback_cable *cable; + struct loopback_cable *cable = NULL; int err = 0; int dev = get_cable_index(substream); @@ -681,7 +700,6 @@ static int loopback_open(struct snd_pcm_substream *substream) if (!cable) { cable = kzalloc(sizeof(*cable), GFP_KERNEL); if (!cable) { - kfree(dpcm); err = -ENOMEM; goto unlock; } @@ -723,6 +741,10 @@ static int loopback_open(struct snd_pcm_substream *substream) else runtime->hw = cable->hw; unlock: + if (err < 0) { + free_cable(substream); + kfree(dpcm); + } mutex_unlock(&loopback->cable_lock); return err; } @@ -731,20 +753,10 @@ static int loopback_close(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; - struct loopback_cable *cable; - int dev = get_cable_index(substream); loopback_timer_stop(dpcm); mutex_lock(&loopback->cable_lock); - cable = loopback->cables[substream->number][dev]; - if (cable->streams[!substream->stream]) { - /* other stream is still alive */ - cable->streams[substream->stream] = NULL; - } else { - /* free the cable */ - loopback->cables[substream->number][dev] = NULL; - kfree(cable); - } + free_cable(substream); mutex_unlock(&loopback->cable_lock); return 0; } -- cgit v1.2.3 From 913a9500b94566351e8f920e7f2501c8124205b1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 00:09:06 -0700 Subject: blk-mq: remove confusing comment of blk_mq_sched_dispatch_requests Commit de1482974080 ("blk-mq: introduce .get_budget and .put_budget in blk_mq_ops") changes the function to return bool type, and then commit 1f460b63d4b3 ("blk-mq: don't restart queue when .get_budget returns BLK_STS_RESOURCE") changes it back to void, but the comment remains. Signed-off-by: Liu Bo Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c117bd8fd1f6..2ff7cf0cbf73 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -172,7 +172,6 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) WRITE_ONCE(hctx->dispatch_from, ctx); } -/* return true if hw queue need to be run again */ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; -- cgit v1.2.3 From b088b53e20c7d09b5ab84c5688e609f478e5c417 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jan 2018 16:15:33 +0100 Subject: ALSA: aloop: Fix inconsistent format due to incomplete rule The extra hw constraint rule for the formats the aloop driver introduced has a slight flaw, where it doesn't return a positive value when the mask got changed. It came from the fact that it's basically a copy&paste from snd_hw_constraint_mask64(). The original code is supposed to be a single-shot and it modifies the mask bits only once and never after, while what we need for aloop is the dynamic hw rule that limits the mask bits. This difference results in the inconsistent state, as the hw_refine doesn't apply the dependencies fully. The worse and surprisingly result is that it causes a crash in OSS emulation when multiple full-duplex reads/writes are performed concurrently (I leave why it triggers Oops to readers as a homework). For fixing this, replace a few open-codes with the standard snd_mask_*() macros. Reported-by: syzbot+3902b5220e8ca27889ca@syzkaller.appspotmail.com Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Cc: Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 8b6a39cb7f06..006521db487d 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -622,14 +623,12 @@ static int rule_format(struct snd_pcm_hw_params *params, { struct snd_pcm_hardware *hw = rule->private; - struct snd_mask *maskp = hw_param_mask(params, rule->var); + struct snd_mask m; - maskp->bits[0] &= (u_int32_t)hw->formats; - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32); - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */ - if (! maskp->bits[0] && ! maskp->bits[1]) - return -EINVAL; - return 0; + snd_mask_none(&m); + m.bits[0] = (u_int32_t)hw->formats; + m.bits[1] = (u_int32_t)(hw->formats >> 32); + return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, -- cgit v1.2.3 From 898dfe4687f460ba337a01c11549f87269a13fa2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Jan 2018 17:38:54 +0100 Subject: ALSA: aloop: Fix racy hw constraints adjustment The aloop driver tries to update the hw constraints of the connected target on the cable of the opened PCM substream. This is done by adding the extra hw constraints rules referring to the substream runtime->hw fields, while the other substream may update the runtime hw of another side on the fly. This is, however, racy and may result in the inconsistent values when both PCM streams perform the prepare concurrently. One of the reason is that it overwrites the other's runtime->hw field; which is not only racy but also broken when it's called before the open of another side finishes. And, since the reference to runtime->hw isn't protected, the concurrent write may give the partial value update and become inconsistent. This patch is an attempt to fix and clean up: - The prepare doesn't change the runtime->hw of other side any longer, but only update the cable->hw that is referred commonly. - The extra rules refer to the loopback_pcm object instead of the runtime->hw. The actual hw is deduced from cable->hw. - The extra rules take the cable_lock to protect against the race. Fixes: b1c73fc8e697 ("ALSA: snd-aloop: Fix hw_params restrictions and checking") Cc: Signed-off-by: Takashi Iwai --- sound/drivers/aloop.c | 51 +++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c index 006521db487d..0333143a1fa7 100644 --- a/sound/drivers/aloop.c +++ b/sound/drivers/aloop.c @@ -306,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) return 0; } -static void params_change_substream(struct loopback_pcm *dpcm, - struct snd_pcm_runtime *runtime) -{ - struct snd_pcm_runtime *dst_runtime; - - if (dpcm == NULL || dpcm->substream == NULL) - return; - dst_runtime = dpcm->substream->runtime; - if (dst_runtime == NULL) - return; - dst_runtime->hw = dpcm->cable->hw; -} - static void params_change(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -330,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream) cable->hw.rate_max = runtime->rate; cable->hw.channels_min = runtime->channels; cable->hw.channels_max = runtime->channels; - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK], - runtime); - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE], - runtime); } static int loopback_prepare(struct snd_pcm_substream *substream) @@ -621,24 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream) static int rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_mask m; snd_mask_none(&m); - m.bits[0] = (u_int32_t)hw->formats; - m.bits[1] = (u_int32_t)(hw->formats >> 32); + mutex_lock(&dpcm->loopback->cable_lock); + m.bits[0] = (u_int32_t)cable->hw.formats; + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); + mutex_unlock(&dpcm->loopback->cable_lock); return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->rate_min; - t.max = hw->rate_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.rate_min; + t.max = cable->hw.rate_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -647,11 +635,14 @@ static int rule_rate(struct snd_pcm_hw_params *params, static int rule_channels(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { - struct snd_pcm_hardware *hw = rule->private; + struct loopback_pcm *dpcm = rule->private; + struct loopback_cable *cable = dpcm->cable; struct snd_interval t; - t.min = hw->channels_min; - t.max = hw->channels_max; + mutex_lock(&dpcm->loopback->cable_lock); + t.min = cable->hw.channels_min; + t.max = cable->hw.channels_max; + mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); @@ -716,19 +707,19 @@ static int loopback_open(struct snd_pcm_substream *substream) /* are cached -> they do not reflect the actual state */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, - rule_format, &runtime->hw, + rule_format, dpcm, SNDRV_PCM_HW_PARAM_FORMAT, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, - rule_rate, &runtime->hw, + rule_rate, dpcm, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - rule_channels, &runtime->hw, + rule_channels, dpcm, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) goto unlock; -- cgit v1.2.3 From 0cb5b30698fdc8f6b4646012e3acb4ddce430788 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 3 Jan 2018 14:31:38 -0800 Subject: kvm: vmx: Scrub hardware GPRs at VM-exit Guest GPR values are live in the hardware GPRs at VM-exit. Do not leave any guest values in hardware GPRs after the guest GPR values are saved to the vcpu_vmx structure. This is a partial mitigation for CVE 2017-5715 and CVE 2017-5753. Specifically, it defeats the Project Zero PoC for CVE 2017-5715. Suggested-by: Eric Northup Signed-off-by: Jim Mattson Reviewed-by: Eric Northup Reviewed-by: Benjamin Serebrin Reviewed-by: Andrew Honig [Paolo: Add AMD bits, Signed-off-by: Tom Lendacky ] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 19 +++++++++++++++++++ arch/x86/kvm/vmx.c | 14 +++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index eb714f1cdf7e..bb31c801f1fc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4985,6 +4985,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" +#endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" #endif "pop %%" _ASM_BP : diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8eba631c4dbd..c1e7ed371259 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9415,6 +9415,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -9431,12 +9432,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" -- cgit v1.2.3 From 74ede5af27c7fb91b18d8386128486290b8d44be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:15:57 +0100 Subject: null_blk: remove lightnvm support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With rrpc to be removed, the null_blk lightnvm support is no longer functional. Remove the lightnvm implementation and maybe add it to another module in the future if someone takes on the challenge. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 220 +---------------------------------------------- 1 file changed, 3 insertions(+), 217 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ccb9975a97fa..1e1981c6c557 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -107,7 +106,6 @@ struct nullb_device { unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */ - bool use_lightnvm; /* register as a LightNVM device */ bool blocking; /* blocking blk-mq device */ bool use_per_node_hctx; /* use per-node allocation for hardware context */ bool power; /* power on/off the device */ @@ -121,7 +119,6 @@ struct nullb { unsigned int index; struct request_queue *q; struct gendisk *disk; - struct nvm_dev *ndev; struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set __tag_set; unsigned int queue_depth; @@ -139,7 +136,6 @@ static LIST_HEAD(nullb_list); static struct mutex lock; static int null_major; static DEFINE_IDA(nullb_indexes); -static struct kmem_cache *ppa_cache; static struct blk_mq_tag_set tag_set; enum { @@ -208,10 +204,6 @@ static int nr_devices = 1; module_param(nr_devices, int, S_IRUGO); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); -static bool g_use_lightnvm; -module_param_named(use_lightnvm, g_use_lightnvm, bool, S_IRUGO); -MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device"); - static bool g_blocking; module_param_named(blocking, g_blocking, bool, S_IRUGO); MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); @@ -345,7 +337,6 @@ NULLB_DEVICE_ATTR(blocksize, uint); NULLB_DEVICE_ATTR(irqmode, uint); NULLB_DEVICE_ATTR(hw_queue_depth, uint); NULLB_DEVICE_ATTR(index, uint); -NULLB_DEVICE_ATTR(use_lightnvm, bool); NULLB_DEVICE_ATTR(blocking, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool); NULLB_DEVICE_ATTR(memory_backed, bool); @@ -455,7 +446,6 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, - &nullb_device_attr_use_lightnvm, &nullb_device_attr_blocking, &nullb_device_attr_use_per_node_hctx, &nullb_device_attr_power, @@ -573,7 +563,6 @@ static struct nullb_device *null_alloc_dev(void) dev->blocksize = g_bs; dev->irqmode = g_irqmode; dev->hw_queue_depth = g_hw_queue_depth; - dev->use_lightnvm = g_use_lightnvm; dev->blocking = g_blocking; dev->use_per_node_hctx = g_use_per_node_hctx; return dev; @@ -1423,170 +1412,6 @@ static void cleanup_queues(struct nullb *nullb) kfree(nullb->queues); } -#ifdef CONFIG_NVM - -static void null_lnvm_end_io(struct request *rq, blk_status_t status) -{ - struct nvm_rq *rqd = rq->end_io_data; - - /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */ - rqd->error = status ? -EIO : 0; - nvm_end_io(rqd); - - blk_put_request(rq); -} - -static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) -{ - struct request_queue *q = dev->q; - struct request *rq; - struct bio *bio = rqd->bio; - - rq = blk_mq_alloc_request(q, - op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); - if (IS_ERR(rq)) - return -ENOMEM; - - blk_init_request_from_bio(rq, bio); - - rq->end_io_data = rqd; - - blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io); - - return 0; -} - -static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) -{ - struct nullb *nullb = dev->q->queuedata; - sector_t size = (sector_t)nullb->dev->size * 1024 * 1024ULL; - sector_t blksize; - struct nvm_id_group *grp; - - id->ver_id = 0x1; - id->vmnt = 0; - id->cap = 0x2; - id->dom = 0x1; - - id->ppaf.blk_offset = 0; - id->ppaf.blk_len = 16; - id->ppaf.pg_offset = 16; - id->ppaf.pg_len = 16; - id->ppaf.sect_offset = 32; - id->ppaf.sect_len = 8; - id->ppaf.pln_offset = 40; - id->ppaf.pln_len = 8; - id->ppaf.lun_offset = 48; - id->ppaf.lun_len = 8; - id->ppaf.ch_offset = 56; - id->ppaf.ch_len = 8; - - sector_div(size, nullb->dev->blocksize); /* convert size to pages */ - size >>= 8; /* concert size to pgs pr blk */ - grp = &id->grp; - grp->mtype = 0; - grp->fmtype = 0; - grp->num_ch = 1; - grp->num_pg = 256; - blksize = size; - size >>= 16; - grp->num_lun = size + 1; - sector_div(blksize, grp->num_lun); - grp->num_blk = blksize; - grp->num_pln = 1; - - grp->fpg_sz = nullb->dev->blocksize; - grp->csecs = nullb->dev->blocksize; - grp->trdt = 25000; - grp->trdm = 25000; - grp->tprt = 500000; - grp->tprm = 500000; - grp->tbet = 1500000; - grp->tbem = 1500000; - grp->mpos = 0x010101; /* single plane rwe */ - grp->cpar = nullb->dev->hw_queue_depth; - - return 0; -} - -static void *null_lnvm_create_dma_pool(struct nvm_dev *dev, char *name) -{ - mempool_t *virtmem_pool; - - virtmem_pool = mempool_create_slab_pool(64, ppa_cache); - if (!virtmem_pool) { - pr_err("null_blk: Unable to create virtual memory pool\n"); - return NULL; - } - - return virtmem_pool; -} - -static void null_lnvm_destroy_dma_pool(void *pool) -{ - mempool_destroy(pool); -} - -static void *null_lnvm_dev_dma_alloc(struct nvm_dev *dev, void *pool, - gfp_t mem_flags, dma_addr_t *dma_handler) -{ - return mempool_alloc(pool, mem_flags); -} - -static void null_lnvm_dev_dma_free(void *pool, void *entry, - dma_addr_t dma_handler) -{ - mempool_free(entry, pool); -} - -static struct nvm_dev_ops null_lnvm_dev_ops = { - .identity = null_lnvm_id, - .submit_io = null_lnvm_submit_io, - - .create_dma_pool = null_lnvm_create_dma_pool, - .destroy_dma_pool = null_lnvm_destroy_dma_pool, - .dev_dma_alloc = null_lnvm_dev_dma_alloc, - .dev_dma_free = null_lnvm_dev_dma_free, - - /* Simulate nvme protocol restriction */ - .max_phys_sect = 64, -}; - -static int null_nvm_register(struct nullb *nullb) -{ - struct nvm_dev *dev; - int rv; - - dev = nvm_alloc_dev(0); - if (!dev) - return -ENOMEM; - - dev->q = nullb->q; - memcpy(dev->name, nullb->disk_name, DISK_NAME_LEN); - dev->ops = &null_lnvm_dev_ops; - - rv = nvm_register(dev); - if (rv) { - kfree(dev); - return rv; - } - nullb->ndev = dev; - return 0; -} - -static void null_nvm_unregister(struct nullb *nullb) -{ - nvm_unregister(nullb->ndev); -} -#else -static int null_nvm_register(struct nullb *nullb) -{ - pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n"); - return -EINVAL; -} -static void null_nvm_unregister(struct nullb *nullb) {} -#endif /* CONFIG_NVM */ - static void null_del_dev(struct nullb *nullb) { struct nullb_device *dev = nullb->dev; @@ -1595,10 +1420,7 @@ static void null_del_dev(struct nullb *nullb) list_del_init(&nullb->list); - if (dev->use_lightnvm) - null_nvm_unregister(nullb); - else - del_gendisk(nullb->disk); + del_gendisk(nullb->disk); if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) { hrtimer_cancel(&nullb->bw_timer); @@ -1610,8 +1432,7 @@ static void null_del_dev(struct nullb *nullb) if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); - if (!dev->use_lightnvm) - put_disk(nullb->disk); + put_disk(nullb->disk); cleanup_queues(nullb); if (null_cache_active(nullb)) null_free_device_storage(nullb->dev, true); @@ -1775,11 +1596,6 @@ static void null_validate_conf(struct nullb_device *dev) { dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); - if (dev->use_lightnvm && dev->blocksize != 4096) - dev->blocksize = 4096; - - if (dev->use_lightnvm && dev->queue_mode != NULL_Q_MQ) - dev->queue_mode = NULL_Q_MQ; if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) { if (dev->submit_queues != nr_online_nodes) @@ -1895,11 +1711,7 @@ static int null_add_dev(struct nullb_device *dev) sprintf(nullb->disk_name, "nullb%d", nullb->index); - if (dev->use_lightnvm) - rv = null_nvm_register(nullb); - else - rv = null_gendisk_register(nullb); - + rv = null_gendisk_register(nullb); if (rv) goto out_cleanup_blk_queue; @@ -1938,18 +1750,6 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } - if (g_use_lightnvm && g_bs != 4096) { - pr_warn("null_blk: LightNVM only supports 4k block size\n"); - pr_warn("null_blk: defaults block size to 4k\n"); - g_bs = 4096; - } - - if (g_use_lightnvm && g_queue_mode != NULL_Q_MQ) { - pr_warn("null_blk: LightNVM only supported for blk-mq\n"); - pr_warn("null_blk: defaults queue mode to blk-mq\n"); - g_queue_mode = NULL_Q_MQ; - } - if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) { if (g_submit_queues != nr_online_nodes) { pr_warn("null_blk: submit_queues param is set to %u.\n", @@ -1982,16 +1782,6 @@ static int __init null_init(void) goto err_conf; } - if (g_use_lightnvm) { - ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64), - 0, 0, NULL); - if (!ppa_cache) { - pr_err("null_blk: unable to create ppa cache\n"); - ret = -ENOMEM; - goto err_ppa; - } - } - for (i = 0; i < nr_devices; i++) { dev = null_alloc_dev(); if (!dev) { @@ -2015,8 +1805,6 @@ err_dev: null_del_dev(nullb); null_free_dev(dev); } - kmem_cache_destroy(ppa_cache); -err_ppa: unregister_blkdev(null_major, "nullb"); err_conf: configfs_unregister_subsystem(&nullb_subsys); @@ -2047,8 +1835,6 @@ static void __exit null_exit(void) if (g_queue_mode == NULL_Q_MQ && shared_tags) blk_mq_free_tag_set(&tag_set); - - kmem_cache_destroy(ppa_cache); } module_init(null_init); -- cgit v1.2.3 From aba203d155c1d58c871c7ad4bea320fb6ff5e68f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:15:58 +0100 Subject: lightnvm: remove rrpc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hybrid mode for 1.2 revision was deprecated, and have no users. Remove to make it easier to move to the 2.0 revision. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 7 - drivers/lightnvm/Makefile | 1 - drivers/lightnvm/rrpc.c | 1625 --------------------------------------------- drivers/lightnvm/rrpc.h | 290 -------- 4 files changed, 1923 deletions(-) delete mode 100644 drivers/lightnvm/rrpc.c delete mode 100644 drivers/lightnvm/rrpc.h diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 2a953efec4e1..10c08982185a 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -27,13 +27,6 @@ config NVM_DEBUG It is required to create/remove targets without IOCTLs. -config NVM_RRPC - tristate "Round-robin Hybrid Open-Channel SSD target" - ---help--- - Allows an open-channel SSD to be exposed as a block device to the - host. The target is implemented using a linear mapping table and - cost-based garbage collection. It is optimized for 4K IO sizes. - config NVM_PBLK tristate "Physical Block Device Open-Channel SSD target" ---help--- diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index 2c3fd9d2c08c..97d9d7c71550 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -4,7 +4,6 @@ # obj-$(CONFIG_NVM) := core.o -obj-$(CONFIG_NVM_RRPC) += rrpc.o obj-$(CONFIG_NVM_PBLK) += pblk.o pblk-y := pblk-init.o pblk-core.o pblk-rb.o \ pblk-write.o pblk-cache.o pblk-read.o \ diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c deleted file mode 100644 index 0993c14be860..000000000000 --- a/drivers/lightnvm/rrpc.c +++ /dev/null @@ -1,1625 +0,0 @@ -/* - * Copyright (C) 2015 IT University of Copenhagen - * Initial release: Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs. - */ - -#include "rrpc.h" - -static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache; -static DECLARE_RWSEM(rrpc_lock); - -static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags); - -#define rrpc_for_each_lun(rrpc, rlun, i) \ - for ((i) = 0, rlun = &(rrpc)->luns[0]; \ - (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)]) - -static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_block *rblk = a->rblk; - unsigned int pg_offset; - - lockdep_assert_held(&rrpc->rev_lock); - - if (a->addr == ADDR_EMPTY || !rblk) - return; - - spin_lock(&rblk->lock); - - div_u64_rem(a->addr, dev->geo.sec_per_blk, &pg_offset); - WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages)); - rblk->nr_invalid_pages++; - - spin_unlock(&rblk->lock); - - rrpc->rev_trans_map[a->addr].addr = ADDR_EMPTY; -} - -static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba, - unsigned int len) -{ - sector_t i; - - spin_lock(&rrpc->rev_lock); - for (i = slba; i < slba + len; i++) { - struct rrpc_addr *gp = &rrpc->trans_map[i]; - - rrpc_page_invalidate(rrpc, gp); - gp->rblk = NULL; - } - spin_unlock(&rrpc->rev_lock); -} - -static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc, - sector_t laddr, unsigned int pages) -{ - struct nvm_rq *rqd; - struct rrpc_inflight_rq *inf; - - rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC); - if (!rqd) - return ERR_PTR(-ENOMEM); - - inf = rrpc_get_inflight_rq(rqd); - if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) { - mempool_free(rqd, rrpc->rq_pool); - return NULL; - } - - return rqd; -} - -static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd) -{ - struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd); - - rrpc_unlock_laddr(rrpc, inf); - - mempool_free(rqd, rrpc->rq_pool); -} - -static void rrpc_discard(struct rrpc *rrpc, struct bio *bio) -{ - sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG; - sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE; - struct nvm_rq *rqd; - - while (1) { - rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len); - if (rqd) - break; - - schedule(); - } - - if (IS_ERR(rqd)) { - pr_err("rrpc: unable to acquire inflight IO\n"); - bio_io_error(bio); - return; - } - - rrpc_invalidate_range(rrpc, slba, len); - rrpc_inflight_laddr_release(rrpc, rqd); -} - -static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - - return (rblk->next_page == dev->geo.sec_per_blk); -} - -/* Calculate relative addr for the given block, considering instantiated LUNs */ -static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_lun *rlun = rblk->rlun; - - return rlun->id * dev->geo.sec_per_blk; -} - -static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev, - struct rrpc_addr *gp) -{ - struct rrpc_block *rblk = gp->rblk; - struct rrpc_lun *rlun = rblk->rlun; - u64 addr = gp->addr; - struct ppa_addr paddr; - - paddr.ppa = addr; - paddr = rrpc_linear_to_generic_addr(&dev->geo, paddr); - paddr.g.ch = rlun->bppa.g.ch; - paddr.g.lun = rlun->bppa.g.lun; - paddr.g.blk = rblk->id; - - return paddr; -} - -/* requires lun->lock taken */ -static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *new_rblk, - struct rrpc_block **cur_rblk) -{ - struct rrpc *rrpc = rlun->rrpc; - - if (*cur_rblk) { - spin_lock(&(*cur_rblk)->lock); - WARN_ON(!block_is_full(rrpc, *cur_rblk)); - spin_unlock(&(*cur_rblk)->lock); - } - *cur_rblk = new_rblk; -} - -static struct rrpc_block *__rrpc_get_blk(struct rrpc *rrpc, - struct rrpc_lun *rlun) -{ - struct rrpc_block *rblk = NULL; - - if (list_empty(&rlun->free_list)) - goto out; - - rblk = list_first_entry(&rlun->free_list, struct rrpc_block, list); - - list_move_tail(&rblk->list, &rlun->used_list); - rblk->state = NVM_BLK_ST_TGT; - rlun->nr_free_blocks--; - -out: - return rblk; -} - -static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun, - unsigned long flags) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_block *rblk; - int is_gc = flags & NVM_IOTYPE_GC; - - spin_lock(&rlun->lock); - if (!is_gc && rlun->nr_free_blocks < rlun->reserved_blocks) { - pr_err("nvm: rrpc: cannot give block to non GC request\n"); - spin_unlock(&rlun->lock); - return NULL; - } - - rblk = __rrpc_get_blk(rrpc, rlun); - if (!rblk) { - pr_err("nvm: rrpc: cannot get new block\n"); - spin_unlock(&rlun->lock); - return NULL; - } - spin_unlock(&rlun->lock); - - bitmap_zero(rblk->invalid_pages, dev->geo.sec_per_blk); - rblk->next_page = 0; - rblk->nr_invalid_pages = 0; - atomic_set(&rblk->data_cmnt_size, 0); - - return rblk; -} - -static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct rrpc_lun *rlun = rblk->rlun; - - spin_lock(&rlun->lock); - if (rblk->state & NVM_BLK_ST_TGT) { - list_move_tail(&rblk->list, &rlun->free_list); - rlun->nr_free_blocks++; - rblk->state = NVM_BLK_ST_FREE; - } else if (rblk->state & NVM_BLK_ST_BAD) { - list_move_tail(&rblk->list, &rlun->bb_list); - rblk->state = NVM_BLK_ST_BAD; - } else { - WARN_ON_ONCE(1); - pr_err("rrpc: erroneous type (ch:%d,lun:%d,blk%d-> %u)\n", - rlun->bppa.g.ch, rlun->bppa.g.lun, - rblk->id, rblk->state); - list_move_tail(&rblk->list, &rlun->bb_list); - } - spin_unlock(&rlun->lock); -} - -static void rrpc_put_blks(struct rrpc *rrpc) -{ - struct rrpc_lun *rlun; - int i; - - for (i = 0; i < rrpc->nr_luns; i++) { - rlun = &rrpc->luns[i]; - if (rlun->cur) - rrpc_put_blk(rrpc, rlun->cur); - if (rlun->gc_cur) - rrpc_put_blk(rrpc, rlun->gc_cur); - } -} - -static struct rrpc_lun *get_next_lun(struct rrpc *rrpc) -{ - int next = atomic_inc_return(&rrpc->next_lun); - - return &rrpc->luns[next % rrpc->nr_luns]; -} - -static void rrpc_gc_kick(struct rrpc *rrpc) -{ - struct rrpc_lun *rlun; - unsigned int i; - - for (i = 0; i < rrpc->nr_luns; i++) { - rlun = &rrpc->luns[i]; - queue_work(rrpc->krqd_wq, &rlun->ws_gc); - } -} - -/* - * timed GC every interval. - */ -static void rrpc_gc_timer(struct timer_list *t) -{ - struct rrpc *rrpc = from_timer(rrpc, t, gc_timer); - - rrpc_gc_kick(rrpc); - mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10)); -} - -static void rrpc_end_sync_bio(struct bio *bio) -{ - struct completion *waiting = bio->bi_private; - - if (bio->bi_status) - pr_err("nvm: gc request failed (%u).\n", bio->bi_status); - - complete(waiting); -} - -/* - * rrpc_move_valid_pages -- migrate live data off the block - * @rrpc: the 'rrpc' structure - * @block: the block from which to migrate live pages - * - * Description: - * GC algorithms may call this function to migrate remaining live - * pages off the block prior to erasing it. This function blocks - * further execution until the operation is complete. - */ -static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct request_queue *q = dev->q; - struct rrpc_rev_addr *rev; - struct nvm_rq *rqd; - struct bio *bio; - struct page *page; - int slot; - int nr_sec_per_blk = dev->geo.sec_per_blk; - u64 phys_addr; - DECLARE_COMPLETION_ONSTACK(wait); - - if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) - return 0; - - bio = bio_alloc(GFP_NOIO, 1); - if (!bio) { - pr_err("nvm: could not alloc bio to gc\n"); - return -ENOMEM; - } - - page = mempool_alloc(rrpc->page_pool, GFP_NOIO); - - while ((slot = find_first_zero_bit(rblk->invalid_pages, - nr_sec_per_blk)) < nr_sec_per_blk) { - - /* Lock laddr */ - phys_addr = rrpc_blk_to_ppa(rrpc, rblk) + slot; - -try: - spin_lock(&rrpc->rev_lock); - /* Get logical address from physical to logical table */ - rev = &rrpc->rev_trans_map[phys_addr]; - /* already updated by previous regular write */ - if (rev->addr == ADDR_EMPTY) { - spin_unlock(&rrpc->rev_lock); - continue; - } - - rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1); - if (IS_ERR_OR_NULL(rqd)) { - spin_unlock(&rrpc->rev_lock); - schedule(); - goto try; - } - - spin_unlock(&rrpc->rev_lock); - - /* Perform read to do GC */ - bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr); - bio_set_op_attrs(bio, REQ_OP_READ, 0); - bio->bi_private = &wait; - bio->bi_end_io = rrpc_end_sync_bio; - - /* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */ - bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); - - if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) { - pr_err("rrpc: gc read failed.\n"); - rrpc_inflight_laddr_release(rrpc, rqd); - goto finished; - } - wait_for_completion_io(&wait); - if (bio->bi_status) { - rrpc_inflight_laddr_release(rrpc, rqd); - goto finished; - } - - bio_reset(bio); - reinit_completion(&wait); - - bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr); - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio->bi_private = &wait; - bio->bi_end_io = rrpc_end_sync_bio; - - bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); - - /* turn the command around and write the data back to a new - * address - */ - if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) { - pr_err("rrpc: gc write failed.\n"); - rrpc_inflight_laddr_release(rrpc, rqd); - goto finished; - } - wait_for_completion_io(&wait); - - rrpc_inflight_laddr_release(rrpc, rqd); - if (bio->bi_status) - goto finished; - - bio_reset(bio); - } - -finished: - mempool_free(page, rrpc->page_pool); - bio_put(bio); - - if (!bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) { - pr_err("nvm: failed to garbage collect block\n"); - return -EIO; - } - - return 0; -} - -static void rrpc_block_gc(struct work_struct *work) -{ - struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, - ws_gc); - struct rrpc *rrpc = gcb->rrpc; - struct rrpc_block *rblk = gcb->rblk; - struct rrpc_lun *rlun = rblk->rlun; - struct ppa_addr ppa; - - mempool_free(gcb, rrpc->gcb_pool); - pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' being reclaimed\n", - rlun->bppa.g.ch, rlun->bppa.g.lun, - rblk->id); - - if (rrpc_move_valid_pages(rrpc, rblk)) - goto put_back; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - ppa.g.blk = rblk->id; - - if (nvm_erase_sync(rrpc->dev, &ppa, 1)) - goto put_back; - - rrpc_put_blk(rrpc, rblk); - - return; - -put_back: - spin_lock(&rlun->lock); - list_add_tail(&rblk->prio, &rlun->prio_list); - spin_unlock(&rlun->lock); -} - -/* the block with highest number of invalid pages, will be in the beginning - * of the list - */ -static struct rrpc_block *rblk_max_invalid(struct rrpc_block *ra, - struct rrpc_block *rb) -{ - if (ra->nr_invalid_pages == rb->nr_invalid_pages) - return ra; - - return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra; -} - -/* linearly find the block with highest number of invalid pages - * requires lun->lock - */ -static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun) -{ - struct list_head *prio_list = &rlun->prio_list; - struct rrpc_block *rblk, *max; - - BUG_ON(list_empty(prio_list)); - - max = list_first_entry(prio_list, struct rrpc_block, prio); - list_for_each_entry(rblk, prio_list, prio) - max = rblk_max_invalid(max, rblk); - - return max; -} - -static void rrpc_lun_gc(struct work_struct *work) -{ - struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc); - struct rrpc *rrpc = rlun->rrpc; - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_block_gc *gcb; - unsigned int nr_blocks_need; - - nr_blocks_need = dev->geo.blks_per_lun / GC_LIMIT_INVERSE; - - if (nr_blocks_need < rrpc->nr_luns) - nr_blocks_need = rrpc->nr_luns; - - spin_lock(&rlun->lock); - while (nr_blocks_need > rlun->nr_free_blocks && - !list_empty(&rlun->prio_list)) { - struct rrpc_block *rblk = block_prio_find_max(rlun); - - if (!rblk->nr_invalid_pages) - break; - - gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); - if (!gcb) - break; - - list_del_init(&rblk->prio); - - WARN_ON(!block_is_full(rrpc, rblk)); - - pr_debug("rrpc: selected block 'ch:%d,lun:%d,blk:%d' for GC\n", - rlun->bppa.g.ch, rlun->bppa.g.lun, - rblk->id); - - gcb->rrpc = rrpc; - gcb->rblk = rblk; - INIT_WORK(&gcb->ws_gc, rrpc_block_gc); - - queue_work(rrpc->kgc_wq, &gcb->ws_gc); - - nr_blocks_need--; - } - spin_unlock(&rlun->lock); - - /* TODO: Hint that request queue can be started again */ -} - -static void rrpc_gc_queue(struct work_struct *work) -{ - struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, - ws_gc); - struct rrpc *rrpc = gcb->rrpc; - struct rrpc_block *rblk = gcb->rblk; - struct rrpc_lun *rlun = rblk->rlun; - - spin_lock(&rlun->lock); - list_add_tail(&rblk->prio, &rlun->prio_list); - spin_unlock(&rlun->lock); - - mempool_free(gcb, rrpc->gcb_pool); - pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' full, allow GC (sched)\n", - rlun->bppa.g.ch, rlun->bppa.g.lun, - rblk->id); -} - -static const struct block_device_operations rrpc_fops = { - .owner = THIS_MODULE, -}; - -static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc) -{ - unsigned int i; - struct rrpc_lun *rlun, *max_free; - - if (!is_gc) - return get_next_lun(rrpc); - - /* during GC, we don't care about RR, instead we want to make - * sure that we maintain evenness between the block luns. - */ - max_free = &rrpc->luns[0]; - /* prevent GC-ing lun from devouring pages of a lun with - * little free blocks. We don't take the lock as we only need an - * estimate. - */ - rrpc_for_each_lun(rrpc, rlun, i) { - if (rlun->nr_free_blocks > max_free->nr_free_blocks) - max_free = rlun; - } - - return max_free; -} - -static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr, - struct rrpc_block *rblk, u64 paddr) -{ - struct rrpc_addr *gp; - struct rrpc_rev_addr *rev; - - BUG_ON(laddr >= rrpc->nr_sects); - - gp = &rrpc->trans_map[laddr]; - spin_lock(&rrpc->rev_lock); - if (gp->rblk) - rrpc_page_invalidate(rrpc, gp); - - gp->addr = paddr; - gp->rblk = rblk; - - rev = &rrpc->rev_trans_map[gp->addr]; - rev->addr = laddr; - spin_unlock(&rrpc->rev_lock); - - return gp; -} - -static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - u64 addr = ADDR_EMPTY; - - spin_lock(&rblk->lock); - if (block_is_full(rrpc, rblk)) - goto out; - - addr = rblk->next_page; - - rblk->next_page++; -out: - spin_unlock(&rblk->lock); - return addr; -} - -/* Map logical address to a physical page. The mapping implements a round robin - * approach and allocates a page from the next lun available. - * - * Returns rrpc_addr with the physical address and block. Returns NULL if no - * blocks in the next rlun are available. - */ -static struct ppa_addr rrpc_map_page(struct rrpc *rrpc, sector_t laddr, - int is_gc) -{ - struct nvm_tgt_dev *tgt_dev = rrpc->dev; - struct rrpc_lun *rlun; - struct rrpc_block *rblk, **cur_rblk; - struct rrpc_addr *p; - struct ppa_addr ppa; - u64 paddr; - int gc_force = 0; - - ppa.ppa = ADDR_EMPTY; - rlun = rrpc_get_lun_rr(rrpc, is_gc); - - if (!is_gc && rlun->nr_free_blocks < rrpc->nr_luns * 4) - return ppa; - - /* - * page allocation steps: - * 1. Try to allocate new page from current rblk - * 2a. If succeed, proceed to map it in and return - * 2b. If fail, first try to allocate a new block from media manger, - * and then retry step 1. Retry until the normal block pool is - * exhausted. - * 3. If exhausted, and garbage collector is requesting the block, - * go to the reserved block and retry step 1. - * In the case that this fails as well, or it is not GC - * requesting, report not able to retrieve a block and let the - * caller handle further processing. - */ - - spin_lock(&rlun->lock); - cur_rblk = &rlun->cur; - rblk = rlun->cur; -retry: - paddr = rrpc_alloc_addr(rrpc, rblk); - - if (paddr != ADDR_EMPTY) - goto done; - - if (!list_empty(&rlun->wblk_list)) { -new_blk: - rblk = list_first_entry(&rlun->wblk_list, struct rrpc_block, - prio); - rrpc_set_lun_cur(rlun, rblk, cur_rblk); - list_del(&rblk->prio); - goto retry; - } - spin_unlock(&rlun->lock); - - rblk = rrpc_get_blk(rrpc, rlun, gc_force); - if (rblk) { - spin_lock(&rlun->lock); - list_add_tail(&rblk->prio, &rlun->wblk_list); - /* - * another thread might already have added a new block, - * Therefore, make sure that one is used, instead of the - * one just added. - */ - goto new_blk; - } - - if (unlikely(is_gc) && !gc_force) { - /* retry from emergency gc block */ - cur_rblk = &rlun->gc_cur; - rblk = rlun->gc_cur; - gc_force = 1; - spin_lock(&rlun->lock); - goto retry; - } - - pr_err("rrpc: failed to allocate new block\n"); - return ppa; -done: - spin_unlock(&rlun->lock); - p = rrpc_update_map(rrpc, laddr, rblk, paddr); - if (!p) - return ppa; - - /* return global address */ - return rrpc_ppa_to_gaddr(tgt_dev, p); -} - -static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct rrpc_block_gc *gcb; - - gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); - if (!gcb) { - pr_err("rrpc: unable to queue block for gc."); - return; - } - - gcb->rrpc = rrpc; - gcb->rblk = rblk; - - INIT_WORK(&gcb->ws_gc, rrpc_gc_queue); - queue_work(rrpc->kgc_wq, &gcb->ws_gc); -} - -static struct rrpc_lun *rrpc_ppa_to_lun(struct rrpc *rrpc, struct ppa_addr p) -{ - struct rrpc_lun *rlun = NULL; - int i; - - for (i = 0; i < rrpc->nr_luns; i++) { - if (rrpc->luns[i].bppa.g.ch == p.g.ch && - rrpc->luns[i].bppa.g.lun == p.g.lun) { - rlun = &rrpc->luns[i]; - break; - } - } - - return rlun; -} - -static void __rrpc_mark_bad_block(struct rrpc *rrpc, struct ppa_addr ppa) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_lun *rlun; - struct rrpc_block *rblk; - - rlun = rrpc_ppa_to_lun(rrpc, ppa); - rblk = &rlun->blocks[ppa.g.blk]; - rblk->state = NVM_BLK_ST_BAD; - - nvm_set_tgt_bb_tbl(dev, &ppa, 1, NVM_BLK_T_GRWN_BAD); -} - -static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd) -{ - void *comp_bits = &rqd->ppa_status; - struct ppa_addr ppa, prev_ppa; - int nr_ppas = rqd->nr_ppas; - int bit; - - if (rqd->nr_ppas == 1) - __rrpc_mark_bad_block(rrpc, rqd->ppa_addr); - - ppa_set_empty(&prev_ppa); - bit = -1; - while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) { - ppa = rqd->ppa_list[bit]; - if (ppa_cmp_blk(ppa, prev_ppa)) - continue; - - __rrpc_mark_bad_block(rrpc, ppa); - } -} - -static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, - sector_t laddr, uint8_t npages) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_addr *p; - struct rrpc_block *rblk; - int cmnt_size, i; - - for (i = 0; i < npages; i++) { - p = &rrpc->trans_map[laddr + i]; - rblk = p->rblk; - - cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); - if (unlikely(cmnt_size == dev->geo.sec_per_blk)) - rrpc_run_gc(rrpc, rblk); - } -} - -static void rrpc_end_io(struct nvm_rq *rqd) -{ - struct rrpc *rrpc = rqd->private; - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); - uint8_t npages = rqd->nr_ppas; - sector_t laddr = rrpc_get_laddr(rqd->bio) - npages; - - if (bio_data_dir(rqd->bio) == WRITE) { - if (rqd->error == NVM_RSP_ERR_FAILWRITE) - rrpc_mark_bad_block(rrpc, rqd); - - rrpc_end_io_write(rrpc, rrqd, laddr, npages); - } - - bio_put(rqd->bio); - - if (rrqd->flags & NVM_IOTYPE_GC) - return; - - rrpc_unlock_rq(rrpc, rqd); - - if (npages > 1) - nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); - - mempool_free(rqd, rrpc->rq_pool); -} - -static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags, int npages) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); - struct rrpc_addr *gp; - sector_t laddr = rrpc_get_laddr(bio); - int is_gc = flags & NVM_IOTYPE_GC; - int i; - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) { - nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); - return NVM_IO_REQUEUE; - } - - for (i = 0; i < npages; i++) { - /* We assume that mapping occurs at 4KB granularity */ - BUG_ON(!(laddr + i < rrpc->nr_sects)); - gp = &rrpc->trans_map[laddr + i]; - - if (gp->rblk) { - rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, gp); - } else { - BUG_ON(is_gc); - rrpc_unlock_laddr(rrpc, r); - nvm_dev_dma_free(dev->parent, rqd->ppa_list, - rqd->dma_ppa_list); - return NVM_IO_DONE; - } - } - - rqd->opcode = NVM_OP_HBREAD; - - return NVM_IO_OK; -} - -static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, - unsigned long flags) -{ - int is_gc = flags & NVM_IOTYPE_GC; - sector_t laddr = rrpc_get_laddr(bio); - struct rrpc_addr *gp; - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) - return NVM_IO_REQUEUE; - - BUG_ON(!(laddr < rrpc->nr_sects)); - gp = &rrpc->trans_map[laddr]; - - if (gp->rblk) { - rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp); - } else { - BUG_ON(is_gc); - rrpc_unlock_rq(rrpc, rqd); - return NVM_IO_DONE; - } - - rqd->opcode = NVM_OP_HBREAD; - - return NVM_IO_OK; -} - -static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags, int npages) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); - struct ppa_addr p; - sector_t laddr = rrpc_get_laddr(bio); - int is_gc = flags & NVM_IOTYPE_GC; - int i; - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) { - nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); - return NVM_IO_REQUEUE; - } - - for (i = 0; i < npages; i++) { - /* We assume that mapping occurs at 4KB granularity */ - p = rrpc_map_page(rrpc, laddr + i, is_gc); - if (p.ppa == ADDR_EMPTY) { - BUG_ON(is_gc); - rrpc_unlock_laddr(rrpc, r); - nvm_dev_dma_free(dev->parent, rqd->ppa_list, - rqd->dma_ppa_list); - rrpc_gc_kick(rrpc); - return NVM_IO_REQUEUE; - } - - rqd->ppa_list[i] = p; - } - - rqd->opcode = NVM_OP_HBWRITE; - - return NVM_IO_OK; -} - -static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags) -{ - struct ppa_addr p; - int is_gc = flags & NVM_IOTYPE_GC; - sector_t laddr = rrpc_get_laddr(bio); - - if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) - return NVM_IO_REQUEUE; - - p = rrpc_map_page(rrpc, laddr, is_gc); - if (p.ppa == ADDR_EMPTY) { - BUG_ON(is_gc); - rrpc_unlock_rq(rrpc, rqd); - rrpc_gc_kick(rrpc); - return NVM_IO_REQUEUE; - } - - rqd->ppa_addr = p; - rqd->opcode = NVM_OP_HBWRITE; - - return NVM_IO_OK; -} - -static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags, uint8_t npages) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - - if (npages > 1) { - rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, - &rqd->dma_ppa_list); - if (!rqd->ppa_list) { - pr_err("rrpc: not able to allocate ppa list\n"); - return NVM_IO_ERR; - } - - if (bio_op(bio) == REQ_OP_WRITE) - return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags, - npages); - - return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages); - } - - if (bio_op(bio) == REQ_OP_WRITE) - return rrpc_write_rq(rrpc, bio, rqd, flags); - - return rrpc_read_rq(rrpc, bio, rqd, flags); -} - -static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd, unsigned long flags) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd); - uint8_t nr_pages = rrpc_get_pages(bio); - int bio_size = bio_sectors(bio) << 9; - int err; - - if (bio_size < dev->geo.sec_size) - return NVM_IO_ERR; - else if (bio_size > dev->geo.max_rq_size) - return NVM_IO_ERR; - - err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages); - if (err) - return err; - - bio_get(bio); - rqd->bio = bio; - rqd->private = rrpc; - rqd->nr_ppas = nr_pages; - rqd->end_io = rrpc_end_io; - rrq->flags = flags; - - err = nvm_submit_io(dev, rqd); - if (err) { - pr_err("rrpc: I/O submission failed: %d\n", err); - bio_put(bio); - if (!(flags & NVM_IOTYPE_GC)) { - rrpc_unlock_rq(rrpc, rqd); - if (rqd->nr_ppas > 1) - nvm_dev_dma_free(dev->parent, rqd->ppa_list, - rqd->dma_ppa_list); - } - return NVM_IO_ERR; - } - - return NVM_IO_OK; -} - -static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio) -{ - struct rrpc *rrpc = q->queuedata; - struct nvm_rq *rqd; - int err; - - blk_queue_split(q, &bio); - - if (bio_op(bio) == REQ_OP_DISCARD) { - rrpc_discard(rrpc, bio); - return BLK_QC_T_NONE; - } - - rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); - memset(rqd, 0, sizeof(struct nvm_rq)); - - err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE); - switch (err) { - case NVM_IO_OK: - return BLK_QC_T_NONE; - case NVM_IO_ERR: - bio_io_error(bio); - break; - case NVM_IO_DONE: - bio_endio(bio); - break; - case NVM_IO_REQUEUE: - spin_lock(&rrpc->bio_lock); - bio_list_add(&rrpc->requeue_bios, bio); - spin_unlock(&rrpc->bio_lock); - queue_work(rrpc->kgc_wq, &rrpc->ws_requeue); - break; - } - - mempool_free(rqd, rrpc->rq_pool); - return BLK_QC_T_NONE; -} - -static void rrpc_requeue(struct work_struct *work) -{ - struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue); - struct bio_list bios; - struct bio *bio; - - bio_list_init(&bios); - - spin_lock(&rrpc->bio_lock); - bio_list_merge(&bios, &rrpc->requeue_bios); - bio_list_init(&rrpc->requeue_bios); - spin_unlock(&rrpc->bio_lock); - - while ((bio = bio_list_pop(&bios))) - rrpc_make_rq(rrpc->disk->queue, bio); -} - -static void rrpc_gc_free(struct rrpc *rrpc) -{ - if (rrpc->krqd_wq) - destroy_workqueue(rrpc->krqd_wq); - - if (rrpc->kgc_wq) - destroy_workqueue(rrpc->kgc_wq); -} - -static int rrpc_gc_init(struct rrpc *rrpc) -{ - rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND, - rrpc->nr_luns); - if (!rrpc->krqd_wq) - return -ENOMEM; - - rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1); - if (!rrpc->kgc_wq) - return -ENOMEM; - - timer_setup(&rrpc->gc_timer, rrpc_gc_timer, 0); - - return 0; -} - -static void rrpc_map_free(struct rrpc *rrpc) -{ - vfree(rrpc->rev_trans_map); - vfree(rrpc->trans_map); -} - -static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) -{ - struct rrpc *rrpc = (struct rrpc *)private; - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_addr *addr = rrpc->trans_map + slba; - struct rrpc_rev_addr *raddr = rrpc->rev_trans_map; - struct rrpc_lun *rlun; - struct rrpc_block *rblk; - u64 i; - - for (i = 0; i < nlb; i++) { - struct ppa_addr gaddr; - u64 pba = le64_to_cpu(entries[i]); - unsigned int mod; - - /* LNVM treats address-spaces as silos, LBA and PBA are - * equally large and zero-indexed. - */ - if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) { - pr_err("nvm: L2P data entry is out of bounds!\n"); - pr_err("nvm: Maybe loaded an old target L2P\n"); - return -EINVAL; - } - - /* Address zero is a special one. The first page on a disk is - * protected. As it often holds internal device boot - * information. - */ - if (!pba) - continue; - - div_u64_rem(pba, rrpc->nr_sects, &mod); - - gaddr = rrpc_recov_addr(dev, pba); - rlun = rrpc_ppa_to_lun(rrpc, gaddr); - if (!rlun) { - pr_err("rrpc: l2p corruption on lba %llu\n", - slba + i); - return -EINVAL; - } - - rblk = &rlun->blocks[gaddr.g.blk]; - if (!rblk->state) { - /* at this point, we don't know anything about the - * block. It's up to the FTL on top to re-etablish the - * block state. The block is assumed to be open. - */ - list_move_tail(&rblk->list, &rlun->used_list); - rblk->state = NVM_BLK_ST_TGT; - rlun->nr_free_blocks--; - } - - addr[i].addr = pba; - addr[i].rblk = rblk; - raddr[mod].addr = slba + i; - } - - return 0; -} - -static int rrpc_map_init(struct rrpc *rrpc) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - sector_t i; - int ret; - - rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects); - if (!rrpc->trans_map) - return -ENOMEM; - - rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) - * rrpc->nr_sects); - if (!rrpc->rev_trans_map) - return -ENOMEM; - - for (i = 0; i < rrpc->nr_sects; i++) { - struct rrpc_addr *p = &rrpc->trans_map[i]; - struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i]; - - p->addr = ADDR_EMPTY; - r->addr = ADDR_EMPTY; - } - - /* Bring up the mapping table from device */ - ret = nvm_get_l2p_tbl(dev, rrpc->soffset, rrpc->nr_sects, - rrpc_l2p_update, rrpc); - if (ret) { - pr_err("nvm: rrpc: could not read L2P table.\n"); - return -EINVAL; - } - - return 0; -} - -/* Minimum pages needed within a lun */ -#define PAGE_POOL_SIZE 16 -#define ADDR_POOL_SIZE 64 - -static int rrpc_core_init(struct rrpc *rrpc) -{ - down_write(&rrpc_lock); - if (!rrpc_gcb_cache) { - rrpc_gcb_cache = kmem_cache_create("rrpc_gcb", - sizeof(struct rrpc_block_gc), 0, 0, NULL); - if (!rrpc_gcb_cache) { - up_write(&rrpc_lock); - return -ENOMEM; - } - - rrpc_rq_cache = kmem_cache_create("rrpc_rq", - sizeof(struct nvm_rq) + sizeof(struct rrpc_rq), - 0, 0, NULL); - if (!rrpc_rq_cache) { - kmem_cache_destroy(rrpc_gcb_cache); - up_write(&rrpc_lock); - return -ENOMEM; - } - } - up_write(&rrpc_lock); - - rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); - if (!rrpc->page_pool) - return -ENOMEM; - - rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->geo.nr_luns, - rrpc_gcb_cache); - if (!rrpc->gcb_pool) - return -ENOMEM; - - rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache); - if (!rrpc->rq_pool) - return -ENOMEM; - - spin_lock_init(&rrpc->inflights.lock); - INIT_LIST_HEAD(&rrpc->inflights.reqs); - - return 0; -} - -static void rrpc_core_free(struct rrpc *rrpc) -{ - mempool_destroy(rrpc->page_pool); - mempool_destroy(rrpc->gcb_pool); - mempool_destroy(rrpc->rq_pool); -} - -static void rrpc_luns_free(struct rrpc *rrpc) -{ - struct rrpc_lun *rlun; - int i; - - if (!rrpc->luns) - return; - - for (i = 0; i < rrpc->nr_luns; i++) { - rlun = &rrpc->luns[i]; - vfree(rlun->blocks); - } - - kfree(rrpc->luns); -} - -static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun) -{ - struct nvm_geo *geo = &dev->geo; - struct rrpc_block *rblk; - struct ppa_addr ppa; - u8 *blks; - int nr_blks; - int i; - int ret; - - if (!dev->parent->ops->get_bb_tbl) - return 0; - - nr_blks = geo->blks_per_lun * geo->plane_mode; - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; - - ppa.ppa = 0; - ppa.g.ch = rlun->bppa.g.ch; - ppa.g.lun = rlun->bppa.g.lun; - - ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); - if (ret) { - pr_err("rrpc: could not get BB table\n"); - goto out; - } - - nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) { - ret = nr_blks; - goto out; - } - - for (i = 0; i < nr_blks; i++) { - if (blks[i] == NVM_BLK_T_FREE) - continue; - - rblk = &rlun->blocks[i]; - list_move_tail(&rblk->list, &rlun->bb_list); - rblk->state = NVM_BLK_ST_BAD; - rlun->nr_free_blocks--; - } - -out: - kfree(blks); - return ret; -} - -static void rrpc_set_lun_ppa(struct rrpc_lun *rlun, struct ppa_addr ppa) -{ - rlun->bppa.ppa = 0; - rlun->bppa.g.ch = ppa.g.ch; - rlun->bppa.g.lun = ppa.g.lun; -} - -static int rrpc_luns_init(struct rrpc *rrpc, struct ppa_addr *luns) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct nvm_geo *geo = &dev->geo; - struct rrpc_lun *rlun; - int i, j, ret = -EINVAL; - - if (geo->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { - pr_err("rrpc: number of pages per block too high."); - return -EINVAL; - } - - spin_lock_init(&rrpc->rev_lock); - - rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun), - GFP_KERNEL); - if (!rrpc->luns) - return -ENOMEM; - - /* 1:1 mapping */ - for (i = 0; i < rrpc->nr_luns; i++) { - rlun = &rrpc->luns[i]; - rlun->id = i; - rrpc_set_lun_ppa(rlun, luns[i]); - rlun->blocks = vzalloc(sizeof(struct rrpc_block) * - geo->blks_per_lun); - if (!rlun->blocks) { - ret = -ENOMEM; - goto err; - } - - INIT_LIST_HEAD(&rlun->free_list); - INIT_LIST_HEAD(&rlun->used_list); - INIT_LIST_HEAD(&rlun->bb_list); - - for (j = 0; j < geo->blks_per_lun; j++) { - struct rrpc_block *rblk = &rlun->blocks[j]; - - rblk->id = j; - rblk->rlun = rlun; - rblk->state = NVM_BLK_T_FREE; - INIT_LIST_HEAD(&rblk->prio); - INIT_LIST_HEAD(&rblk->list); - spin_lock_init(&rblk->lock); - - list_add_tail(&rblk->list, &rlun->free_list); - } - - rlun->rrpc = rrpc; - rlun->nr_free_blocks = geo->blks_per_lun; - rlun->reserved_blocks = 2; /* for GC only */ - - INIT_LIST_HEAD(&rlun->prio_list); - INIT_LIST_HEAD(&rlun->wblk_list); - - INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); - spin_lock_init(&rlun->lock); - - if (rrpc_bb_discovery(dev, rlun)) - goto err; - - } - - return 0; -err: - return ret; -} - -/* returns 0 on success and stores the beginning address in *begin */ -static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - sector_t size = rrpc->nr_sects * dev->geo.sec_size; - int ret; - - size >>= 9; - - ret = nvm_get_area(dev, begin, size); - if (!ret) - *begin >>= (ilog2(dev->geo.sec_size) - 9); - - return ret; -} - -static void rrpc_area_free(struct rrpc *rrpc) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - sector_t begin = rrpc->soffset << (ilog2(dev->geo.sec_size) - 9); - - nvm_put_area(dev, begin); -} - -static void rrpc_free(struct rrpc *rrpc) -{ - rrpc_gc_free(rrpc); - rrpc_map_free(rrpc); - rrpc_core_free(rrpc); - rrpc_luns_free(rrpc); - rrpc_area_free(rrpc); - - kfree(rrpc); -} - -static void rrpc_exit(void *private) -{ - struct rrpc *rrpc = private; - - del_timer(&rrpc->gc_timer); - - flush_workqueue(rrpc->krqd_wq); - flush_workqueue(rrpc->kgc_wq); - - rrpc_free(rrpc); -} - -static sector_t rrpc_capacity(void *private) -{ - struct rrpc *rrpc = private; - struct nvm_tgt_dev *dev = rrpc->dev; - sector_t reserved, provisioned; - - /* cur, gc, and two emergency blocks for each lun */ - reserved = rrpc->nr_luns * dev->geo.sec_per_blk * 4; - provisioned = rrpc->nr_sects - reserved; - - if (reserved > rrpc->nr_sects) { - pr_err("rrpc: not enough space available to expose storage.\n"); - return 0; - } - - sector_div(provisioned, 10); - return provisioned * 9 * NR_PHY_IN_LOG; -} - -/* - * Looks up the logical address from reverse trans map and check if its valid by - * comparing the logical to physical address with the physical address. - * Returns 0 on free, otherwise 1 if in use - */ -static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - int offset; - struct rrpc_addr *laddr; - u64 bpaddr, paddr, pladdr; - - bpaddr = block_to_rel_addr(rrpc, rblk); - for (offset = 0; offset < dev->geo.sec_per_blk; offset++) { - paddr = bpaddr + offset; - - pladdr = rrpc->rev_trans_map[paddr].addr; - if (pladdr == ADDR_EMPTY) - continue; - - laddr = &rrpc->trans_map[pladdr]; - - if (paddr == laddr->addr) { - laddr->rblk = rblk; - } else { - set_bit(offset, rblk->invalid_pages); - rblk->nr_invalid_pages++; - } - } -} - -static int rrpc_blocks_init(struct rrpc *rrpc) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct rrpc_lun *rlun; - struct rrpc_block *rblk; - int lun_iter, blk_iter; - - for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) { - rlun = &rrpc->luns[lun_iter]; - - for (blk_iter = 0; blk_iter < dev->geo.blks_per_lun; - blk_iter++) { - rblk = &rlun->blocks[blk_iter]; - rrpc_block_map_update(rrpc, rblk); - } - } - - return 0; -} - -static int rrpc_luns_configure(struct rrpc *rrpc) -{ - struct rrpc_lun *rlun; - struct rrpc_block *rblk; - int i; - - for (i = 0; i < rrpc->nr_luns; i++) { - rlun = &rrpc->luns[i]; - - rblk = rrpc_get_blk(rrpc, rlun, 0); - if (!rblk) - goto err; - rrpc_set_lun_cur(rlun, rblk, &rlun->cur); - - /* Emergency gc block */ - rblk = rrpc_get_blk(rrpc, rlun, 1); - if (!rblk) - goto err; - rrpc_set_lun_cur(rlun, rblk, &rlun->gc_cur); - } - - return 0; -err: - rrpc_put_blks(rrpc); - return -EINVAL; -} - -static struct nvm_tgt_type tt_rrpc; - -static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, - int flags) -{ - struct request_queue *bqueue = dev->q; - struct request_queue *tqueue = tdisk->queue; - struct nvm_geo *geo = &dev->geo; - struct rrpc *rrpc; - sector_t soffset; - int ret; - - if (!(dev->identity.dom & NVM_RSP_L2P)) { - pr_err("nvm: rrpc: device does not support l2p (%x)\n", - dev->identity.dom); - return ERR_PTR(-EINVAL); - } - - rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL); - if (!rrpc) - return ERR_PTR(-ENOMEM); - - rrpc->dev = dev; - rrpc->disk = tdisk; - - bio_list_init(&rrpc->requeue_bios); - spin_lock_init(&rrpc->bio_lock); - INIT_WORK(&rrpc->ws_requeue, rrpc_requeue); - - rrpc->nr_luns = geo->nr_luns; - rrpc->nr_sects = (unsigned long long)geo->sec_per_lun * rrpc->nr_luns; - - /* simple round-robin strategy */ - atomic_set(&rrpc->next_lun, -1); - - ret = rrpc_area_init(rrpc, &soffset); - if (ret < 0) { - pr_err("nvm: rrpc: could not initialize area\n"); - return ERR_PTR(ret); - } - rrpc->soffset = soffset; - - ret = rrpc_luns_init(rrpc, dev->luns); - if (ret) { - pr_err("nvm: rrpc: could not initialize luns\n"); - goto err; - } - - ret = rrpc_core_init(rrpc); - if (ret) { - pr_err("nvm: rrpc: could not initialize core\n"); - goto err; - } - - ret = rrpc_map_init(rrpc); - if (ret) { - pr_err("nvm: rrpc: could not initialize maps\n"); - goto err; - } - - ret = rrpc_blocks_init(rrpc); - if (ret) { - pr_err("nvm: rrpc: could not initialize state for blocks\n"); - goto err; - } - - ret = rrpc_luns_configure(rrpc); - if (ret) { - pr_err("nvm: rrpc: not enough blocks available in LUNs.\n"); - goto err; - } - - ret = rrpc_gc_init(rrpc); - if (ret) { - pr_err("nvm: rrpc: could not initialize gc\n"); - goto err; - } - - /* inherit the size from the underlying device */ - blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); - blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); - - pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n", - rrpc->nr_luns, (unsigned long long)rrpc->nr_sects); - - mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10)); - - return rrpc; -err: - rrpc_free(rrpc); - return ERR_PTR(ret); -} - -/* round robin, page-based FTL, and cost-based GC */ -static struct nvm_tgt_type tt_rrpc = { - .name = "rrpc", - .version = {1, 0, 0}, - - .make_rq = rrpc_make_rq, - .capacity = rrpc_capacity, - - .init = rrpc_init, - .exit = rrpc_exit, -}; - -static int __init rrpc_module_init(void) -{ - return nvm_register_tgt_type(&tt_rrpc); -} - -static void rrpc_module_exit(void) -{ - nvm_unregister_tgt_type(&tt_rrpc); -} - -module_init(rrpc_module_init); -module_exit(rrpc_module_exit); -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs"); diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h deleted file mode 100644 index fdb6ff902903..000000000000 --- a/drivers/lightnvm/rrpc.h +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright (C) 2015 IT University of Copenhagen - * Initial release: Matias Bjorling - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs. - */ - -#ifndef RRPC_H_ -#define RRPC_H_ - -#include -#include -#include -#include -#include -#include - -#include - -/* Run only GC if less than 1/X blocks are free */ -#define GC_LIMIT_INVERSE 10 -#define GC_TIME_SECS 100 - -#define RRPC_SECTOR (512) -#define RRPC_EXPOSED_PAGE_SIZE (4096) - -#define NR_PHY_IN_LOG (RRPC_EXPOSED_PAGE_SIZE / RRPC_SECTOR) - -struct rrpc_inflight { - struct list_head reqs; - spinlock_t lock; -}; - -struct rrpc_inflight_rq { - struct list_head list; - sector_t l_start; - sector_t l_end; -}; - -struct rrpc_rq { - struct rrpc_inflight_rq inflight_rq; - unsigned long flags; -}; - -struct rrpc_block { - int id; /* id inside of LUN */ - struct rrpc_lun *rlun; - - struct list_head prio; /* LUN CG list */ - struct list_head list; /* LUN free, used, bb list */ - -#define MAX_INVALID_PAGES_STORAGE 8 - /* Bitmap for invalid page intries */ - unsigned long invalid_pages[MAX_INVALID_PAGES_STORAGE]; - /* points to the next writable page within a block */ - unsigned int next_page; - /* number of pages that are invalid, wrt host page size */ - unsigned int nr_invalid_pages; - - int state; - - spinlock_t lock; - atomic_t data_cmnt_size; /* data pages committed to stable storage */ -}; - -struct rrpc_lun { - struct rrpc *rrpc; - - int id; - struct ppa_addr bppa; - - struct rrpc_block *cur, *gc_cur; - struct rrpc_block *blocks; /* Reference to block allocation */ - - struct list_head prio_list; /* Blocks that may be GC'ed */ - struct list_head wblk_list; /* Queued blocks to be written to */ - - /* lun block lists */ - struct list_head used_list; /* In-use blocks */ - struct list_head free_list; /* Not used blocks i.e. released - * and ready for use - */ - struct list_head bb_list; /* Bad blocks. Mutually exclusive with - * free_list and used_list - */ - unsigned int nr_free_blocks; /* Number of unused blocks */ - - struct work_struct ws_gc; - - int reserved_blocks; - - spinlock_t lock; -}; - -struct rrpc { - struct nvm_tgt_dev *dev; - struct gendisk *disk; - - sector_t soffset; /* logical sector offset */ - - int nr_luns; - struct rrpc_lun *luns; - - /* calculated values */ - unsigned long long nr_sects; - - /* Write strategy variables. Move these into each for structure for each - * strategy - */ - atomic_t next_lun; /* Whenever a page is written, this is updated - * to point to the next write lun - */ - - spinlock_t bio_lock; - struct bio_list requeue_bios; - struct work_struct ws_requeue; - - /* Simple translation map of logical addresses to physical addresses. - * The logical addresses is known by the host system, while the physical - * addresses are used when writing to the disk block device. - */ - struct rrpc_addr *trans_map; - /* also store a reverse map for garbage collection */ - struct rrpc_rev_addr *rev_trans_map; - spinlock_t rev_lock; - - struct rrpc_inflight inflights; - - mempool_t *addr_pool; - mempool_t *page_pool; - mempool_t *gcb_pool; - mempool_t *rq_pool; - - struct timer_list gc_timer; - struct workqueue_struct *krqd_wq; - struct workqueue_struct *kgc_wq; -}; - -struct rrpc_block_gc { - struct rrpc *rrpc; - struct rrpc_block *rblk; - struct work_struct ws_gc; -}; - -/* Logical to physical mapping */ -struct rrpc_addr { - u64 addr; - struct rrpc_block *rblk; -}; - -/* Physical to logical mapping */ -struct rrpc_rev_addr { - u64 addr; -}; - -static inline struct ppa_addr rrpc_linear_to_generic_addr(struct nvm_geo *geo, - struct ppa_addr r) -{ - struct ppa_addr l; - int secs, pgs; - sector_t ppa = r.ppa; - - l.ppa = 0; - - div_u64_rem(ppa, geo->sec_per_pg, &secs); - l.g.sec = secs; - - sector_div(ppa, geo->sec_per_pg); - div_u64_rem(ppa, geo->pgs_per_blk, &pgs); - l.g.pg = pgs; - - return l; -} - -static inline struct ppa_addr rrpc_recov_addr(struct nvm_tgt_dev *dev, u64 pba) -{ - return linear_to_generic_addr(&dev->geo, pba); -} - -static inline u64 rrpc_blk_to_ppa(struct rrpc *rrpc, struct rrpc_block *rblk) -{ - struct nvm_tgt_dev *dev = rrpc->dev; - struct nvm_geo *geo = &dev->geo; - struct rrpc_lun *rlun = rblk->rlun; - - return (rlun->id * geo->sec_per_lun) + (rblk->id * geo->sec_per_blk); -} - -static inline sector_t rrpc_get_laddr(struct bio *bio) -{ - return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; -} - -static inline unsigned int rrpc_get_pages(struct bio *bio) -{ - return bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE; -} - -static inline sector_t rrpc_get_sector(sector_t laddr) -{ - return laddr * NR_PHY_IN_LOG; -} - -static inline int request_intersects(struct rrpc_inflight_rq *r, - sector_t laddr_start, sector_t laddr_end) -{ - return (laddr_end >= r->l_start) && (laddr_start <= r->l_end); -} - -static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, - unsigned int pages, struct rrpc_inflight_rq *r) -{ - sector_t laddr_end = laddr + pages - 1; - struct rrpc_inflight_rq *rtmp; - - WARN_ON(irqs_disabled()); - - spin_lock_irq(&rrpc->inflights.lock); - list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) { - if (unlikely(request_intersects(rtmp, laddr, laddr_end))) { - /* existing, overlapping request, come back later */ - spin_unlock_irq(&rrpc->inflights.lock); - return 1; - } - } - - r->l_start = laddr; - r->l_end = laddr_end; - - list_add_tail(&r->list, &rrpc->inflights.reqs); - spin_unlock_irq(&rrpc->inflights.lock); - return 0; -} - -static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, - unsigned int pages, - struct rrpc_inflight_rq *r) -{ - BUG_ON((laddr + pages) > rrpc->nr_sects); - - return __rrpc_lock_laddr(rrpc, laddr, pages, r); -} - -static inline struct rrpc_inflight_rq *rrpc_get_inflight_rq(struct nvm_rq *rqd) -{ - struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); - - return &rrqd->inflight_rq; -} - -static inline int rrpc_lock_rq(struct rrpc *rrpc, struct bio *bio, - struct nvm_rq *rqd) -{ - sector_t laddr = rrpc_get_laddr(bio); - unsigned int pages = rrpc_get_pages(bio); - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); - - return rrpc_lock_laddr(rrpc, laddr, pages, r); -} - -static inline void rrpc_unlock_laddr(struct rrpc *rrpc, - struct rrpc_inflight_rq *r) -{ - unsigned long flags; - - spin_lock_irqsave(&rrpc->inflights.lock, flags); - list_del_init(&r->list); - spin_unlock_irqrestore(&rrpc->inflights.lock, flags); -} - -static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd) -{ - struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); - uint8_t pages = rqd->nr_ppas; - - BUG_ON((r->l_start + pages) > rrpc->nr_sects); - - rrpc_unlock_laddr(rrpc, r); -} - -#endif /* RRPC_H_ */ -- cgit v1.2.3 From 26f76dce60d28028e5c1fbbc39e771366a27671f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:15:59 +0100 Subject: lightnvm: use internal pblk methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that rrpc has been removed, the only users of the ppa helpers is pblk. However, pblk already defines similar functions. Switch pblk to use the internal ones, and remove the generic ppa helpers. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-map.c | 2 +- drivers/lightnvm/pblk-write.c | 4 ++-- include/linux/lightnvm.h | 19 ------------------- 3 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 6f3ecde2140f..7445e6430c52 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -146,7 +146,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, return; /* Erase blocks that are bad in this line but might not be in next */ - if (unlikely(ppa_empty(*erase_ppa)) && + if (unlikely(pblk_ppa_empty(*erase_ppa)) && bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) { int bit = -1; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 6c1cafafef53..6c30b7a6e559 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -439,7 +439,7 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) struct pblk_line *meta_line; int err; - ppa_set_empty(&erase_ppa); + pblk_ppa_set_empty(&erase_ppa); /* Assign lbas to ppas and populate request structure */ err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); @@ -457,7 +457,7 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_ERR; } - if (!ppa_empty(erase_ppa)) { + if (!pblk_ppa_empty(erase_ppa)) { /* Submit erase for next data line */ if (pblk_blk_erase_async(pblk, erase_ppa)) { struct pblk_line *e_line = pblk_line_get_erase(pblk); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2d1d9de06728..14e274b7d094 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -418,25 +418,6 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, return l; } -static inline int ppa_empty(struct ppa_addr ppa_addr) -{ - return (ppa_addr.ppa == ADDR_EMPTY); -} - -static inline void ppa_set_empty(struct ppa_addr *ppa_addr) -{ - ppa_addr->ppa = ADDR_EMPTY; -} - -static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) -{ - if (ppa_empty(ppa1) || ppa_empty(ppa2)) - return 0; - - return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) && - (ppa1.g.blk == ppa2.g.blk)); -} - typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, -- cgit v1.2.3 From e3e13bcc14717800e3e3239ca3faac24f2f04575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:16:00 +0100 Subject: lightnvm: remove hybrid ocssd 1.2 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that rrpc have been removed. Also remove the hybrid 1.2 support from the core. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 141 ------------------------------------------- drivers/nvme/host/lightnvm.c | 96 ----------------------------- include/linux/lightnvm.h | 43 ------------- 3 files changed, 280 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 83249b43dd06..390d5efd6287 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -45,12 +45,6 @@ struct nvm_dev_map { int nr_chnls; }; -struct nvm_area { - struct list_head list; - sector_t begin; - sector_t end; /* end is excluded */ -}; - static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) { struct nvm_target *tgt; @@ -524,35 +518,6 @@ static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); } -void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, - int len) -{ - struct nvm_geo *geo = &dev->geo; - struct nvm_dev_map *dev_rmap = dev->rmap; - u64 i; - - for (i = 0; i < len; i++) { - struct nvm_ch_map *ch_rmap; - int *lun_roffs; - struct ppa_addr gaddr; - u64 pba = le64_to_cpu(entries[i]); - u64 diff; - - if (!pba) - continue; - - gaddr = linear_to_generic_addr(geo, pba); - ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; - lun_roffs = ch_rmap->lun_offs; - - diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + - (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; - - entries[i] -= cpu_to_le64(diff); - } -} -EXPORT_SYMBOL(nvm_part_to_tgt); - int nvm_register_tgt_type(struct nvm_tgt_type *tt) { int ret = 0; @@ -726,112 +691,6 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_submit_io_sync); -int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, - int nr_ppas) -{ - struct nvm_geo *geo = &tgt_dev->geo; - struct nvm_rq rqd; - int ret; - - memset(&rqd, 0, sizeof(struct nvm_rq)); - - rqd.opcode = NVM_OP_ERASE; - rqd.flags = geo->plane_mode >> 1; - - ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); - if (ret) - return ret; - - ret = nvm_submit_io_sync(tgt_dev, &rqd); - if (ret) { - pr_err("rrpr: erase I/O submission failed: %d\n", ret); - goto free_ppa_list; - } - -free_ppa_list: - nvm_free_rqd_ppalist(tgt_dev, &rqd); - - return ret; -} -EXPORT_SYMBOL(nvm_erase_sync); - -int nvm_get_l2p_tbl(struct nvm_tgt_dev *tgt_dev, u64 slba, u32 nlb, - nvm_l2p_update_fn *update_l2p, void *priv) -{ - struct nvm_dev *dev = tgt_dev->parent; - - if (!dev->ops->get_l2p_tbl) - return 0; - - return dev->ops->get_l2p_tbl(dev, slba, nlb, update_l2p, priv); -} -EXPORT_SYMBOL(nvm_get_l2p_tbl); - -int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_geo *geo = &dev->geo; - struct nvm_area *area, *prev, *next; - sector_t begin = 0; - sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9; - - if (len > max_sectors) - return -EINVAL; - - area = kmalloc(sizeof(struct nvm_area), GFP_KERNEL); - if (!area) - return -ENOMEM; - - prev = NULL; - - spin_lock(&dev->lock); - list_for_each_entry(next, &dev->area_list, list) { - if (begin + len > next->begin) { - begin = next->end; - prev = next; - continue; - } - break; - } - - if ((begin + len) > max_sectors) { - spin_unlock(&dev->lock); - kfree(area); - return -EINVAL; - } - - area->begin = *lba = begin; - area->end = begin + len; - - if (prev) /* insert into sorted order */ - list_add(&area->list, &prev->list); - else - list_add(&area->list, &dev->area_list); - spin_unlock(&dev->lock); - - return 0; -} -EXPORT_SYMBOL(nvm_get_area); - -void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_area *area; - - spin_lock(&dev->lock); - list_for_each_entry(area, &dev->area_list, list) { - if (area->begin != begin) - continue; - - list_del(&area->list); - spin_unlock(&dev->lock); - kfree(area); - return; - } - spin_unlock(&dev->lock); -} -EXPORT_SYMBOL(nvm_put_area); - void nvm_end_io(struct nvm_rq *rqd) { struct nvm_tgt_dev *tgt_dev = rqd->dev; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index ba3d7f3349e5..26f7eccc1684 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -31,27 +31,10 @@ enum nvme_nvm_admin_opcode { nvme_nvm_admin_identity = 0xe2, - nvme_nvm_admin_get_l2p_tbl = 0xea, nvme_nvm_admin_get_bb_tbl = 0xf2, nvme_nvm_admin_set_bb_tbl = 0xf1, }; -struct nvme_nvm_hb_rw { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 slba; -}; - struct nvme_nvm_ph_rw { __u8 opcode; __u8 flags; @@ -80,19 +63,6 @@ struct nvme_nvm_identity { __u32 rsvd11[5]; }; -struct nvme_nvm_l2ptbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le32 cdw2[4]; - __le64 prp1; - __le64 prp2; - __le64 slba; - __le32 nlb; - __le16 cdw14[6]; -}; - struct nvme_nvm_getbbtbl { __u8 opcode; __u8 flags; @@ -139,9 +109,7 @@ struct nvme_nvm_command { union { struct nvme_common_command common; struct nvme_nvm_identity identity; - struct nvme_nvm_hb_rw hb_rw; struct nvme_nvm_ph_rw ph_rw; - struct nvme_nvm_l2ptbl l2p; struct nvme_nvm_getbbtbl get_bb; struct nvme_nvm_setbbtbl set_bb; struct nvme_nvm_erase_blk erase; @@ -234,11 +202,9 @@ struct nvme_nvm_bb_tbl { static inline void _nvme_nvm_check_size(void) { BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); @@ -332,62 +298,6 @@ out: return ret; } -static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, - nvm_l2p_update_fn *update_l2p, void *priv) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_command c = {}; - u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9; - u32 nlb_pr_rq = len / sizeof(u64); - u64 cmd_slba = slba; - void *entries; - int ret = 0; - - c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl; - c.l2p.nsid = cpu_to_le32(ns->head->ns_id); - entries = kmalloc(len, GFP_KERNEL); - if (!entries) - return -ENOMEM; - - while (nlb) { - u32 cmd_nlb = min(nlb_pr_rq, nlb); - u64 elba = slba + cmd_nlb; - - c.l2p.slba = cpu_to_le64(cmd_slba); - c.l2p.nlb = cpu_to_le32(cmd_nlb); - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, - (struct nvme_command *)&c, entries, len); - if (ret) { - dev_err(ns->ctrl->device, - "L2P table transfer failed (%d)\n", ret); - ret = -EIO; - goto out; - } - - if (unlikely(elba > nvmdev->total_secs)) { - pr_err("nvm: L2P data from device is out of bounds!\n"); - ret = -EINVAL; - goto out; - } - - /* Transform physical address to target address space */ - nvm_part_to_tgt(nvmdev, entries, cmd_nlb); - - if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { - ret = -EINTR; - goto out; - } - - cmd_slba += cmd_nlb; - nlb -= cmd_nlb; - } - -out: - kfree(entries); - return ret; -} - static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, u8 *blks) { @@ -474,10 +384,6 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); c->ph_rw.control = cpu_to_le16(rqd->flags); c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); - - if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) - c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, - rqd->bio->bi_iter.bi_sector)); } static void nvme_nvm_end_io(struct request *rq, blk_status_t status) @@ -597,8 +503,6 @@ static void nvme_nvm_dev_dma_free(void *pool, void *addr, static struct nvm_dev_ops nvme_nvm_dev_ops = { .identity = nvme_nvm_identity, - .get_l2p_tbl = nvme_nvm_get_l2p_tbl, - .get_bb_tbl = nvme_nvm_get_bb_tbl, .set_bb_tbl = nvme_nvm_set_bb_tbl, diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 14e274b7d094..97ceb841e9a0 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -50,10 +50,7 @@ struct nvm_id; struct nvm_dev; struct nvm_tgt_dev; -typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *); -typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, - nvm_l2p_update_fn *, void *); typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); @@ -66,7 +63,6 @@ typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); struct nvm_dev_ops { nvm_id_fn *identity; - nvm_get_l2p_tbl_fn *get_l2p_tbl; nvm_op_bb_tbl_fn *get_bb_tbl; nvm_op_set_bb_fn *set_bb_tbl; @@ -112,8 +108,6 @@ enum { NVM_RSP_WARN_HIGHECC = 0x4700, /* Device opcodes */ - NVM_OP_HBREAD = 0x02, - NVM_OP_HBWRITE = 0x81, NVM_OP_PWRITE = 0x91, NVM_OP_PREAD = 0x92, NVM_OP_ERASE = 0x90, @@ -346,36 +340,6 @@ struct nvm_dev { struct list_head targets; }; -static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, - u64 pba) -{ - struct ppa_addr l; - int secs, pgs, blks, luns; - sector_t ppa = pba; - - l.ppa = 0; - - div_u64_rem(ppa, geo->sec_per_pg, &secs); - l.g.sec = secs; - - sector_div(ppa, geo->sec_per_pg); - div_u64_rem(ppa, geo->pgs_per_blk, &pgs); - l.g.pg = pgs; - - sector_div(ppa, geo->pgs_per_blk); - div_u64_rem(ppa, geo->blks_per_lun, &blks); - l.g.blk = blks; - - sector_div(ppa, geo->blks_per_lun); - div_u64_rem(ppa, geo->luns_per_chnl, &luns); - l.g.lun = luns; - - sector_div(ppa, geo->luns_per_chnl); - l.g.ch = ppa; - - return l; -} - static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, struct ppa_addr r) { @@ -462,17 +426,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); -extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); -extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, - void *); -extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); -extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); extern void nvm_end_io(struct nvm_rq *); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); - #else /* CONFIG_NVM */ struct nvm_dev_ops; -- cgit v1.2.3 From 98281a90acc04d8a10407dabd2e397e4312b80c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:01 +0100 Subject: lightnvm: remove unnecessary field from nvm_rq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the wait filed in nvm_rq. It is not used anymore, as targets rely on the functionality provided by the LightNVM subsystem when sending sync I/O. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 97ceb841e9a0..07cdb05a9a87 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -233,7 +233,6 @@ struct nvm_rq { void *meta_list; dma_addr_t dma_meta_list; - struct completion *wait; nvm_end_io_fn *end_io; uint8_t opcode; -- cgit v1.2.3 From bb27aa9ecd1f72e68b0fa2dffeb45bee3b1cb5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:16:02 +0100 Subject: lightnvm: remove lower page tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lower page table is unused. All page tables reported by 1.2 devices are all reporting a sequential 1:1 page mapping. This is also not used going forward with the 2.0 revision. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 67 -------------------------------------------- drivers/nvme/host/lightnvm.c | 14 --------- include/linux/lightnvm.h | 6 ---- 3 files changed, 87 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 390d5efd6287..52059dd0ed18 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -751,53 +751,6 @@ int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, } EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); -static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp) -{ - struct nvm_geo *geo = &dev->geo; - int i; - - dev->lps_per_blk = geo->pgs_per_blk; - dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL); - if (!dev->lptbl) - return -ENOMEM; - - /* Just a linear array */ - for (i = 0; i < dev->lps_per_blk; i++) - dev->lptbl[i] = i; - - return 0; -} - -static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp) -{ - int i, p; - struct nvm_id_lp_mlc *mlc = &grp->lptbl.mlc; - - if (!mlc->num_pairs) - return 0; - - dev->lps_per_blk = mlc->num_pairs; - dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL); - if (!dev->lptbl) - return -ENOMEM; - - /* The lower page table encoding consists of a list of bytes, where each - * has a lower and an upper half. The first half byte maintains the - * increment value and every value after is an offset added to the - * previous incrementation value - */ - dev->lptbl[0] = mlc->pairs[0] & 0xF; - for (i = 1; i < dev->lps_per_blk; i++) { - p = mlc->pairs[i >> 1]; - if (i & 0x1) /* upper */ - dev->lptbl[i] = dev->lptbl[i - 1] + ((p & 0xF0) >> 4); - else /* lower */ - dev->lptbl[i] = dev->lptbl[i - 1] + (p & 0xF); - } - - return 0; -} - static int nvm_core_init(struct nvm_dev *dev) { struct nvm_id *id = &dev->identity; @@ -846,25 +799,6 @@ static int nvm_core_init(struct nvm_dev *dev) if (!dev->lun_map) return -ENOMEM; - switch (grp->fmtype) { - case NVM_ID_FMTYPE_SLC: - if (nvm_init_slc_tbl(dev, grp)) { - ret = -ENOMEM; - goto err_fmtype; - } - break; - case NVM_ID_FMTYPE_MLC: - if (nvm_init_mlc_tbl(dev, grp)) { - ret = -ENOMEM; - goto err_fmtype; - } - break; - default: - pr_err("nvm: flash type not supported\n"); - ret = -EINVAL; - goto err_fmtype; - } - INIT_LIST_HEAD(&dev->area_list); INIT_LIST_HEAD(&dev->targets); mutex_init(&dev->mlock); @@ -890,7 +824,6 @@ static void nvm_free(struct nvm_dev *dev) dev->ops->destroy_dma_pool(dev->dma_pool); nvm_unregister_map(dev); - kfree(dev->lptbl); kfree(dev->lun_map); kfree(dev); } diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 26f7eccc1684..15bf243f6096 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -246,20 +246,6 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) dst->cpar = le16_to_cpu(src->cpar); - if (dst->fmtype == NVM_ID_FMTYPE_MLC) { - memcpy(dst->lptbl.id, src->lptbl.id, 8); - dst->lptbl.mlc.num_pairs = - le16_to_cpu(src->lptbl.mlc.num_pairs); - - if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { - pr_err("nvm: number of MLC pairs not supported\n"); - return -EINVAL; - } - - memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, - dst->lptbl.mlc.num_pairs); - } - return 0; } diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 07cdb05a9a87..a5d8e0cbbb46 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -174,8 +174,6 @@ struct nvm_id_group { u32 mpos; u32 mccap; u16 cpar; - - struct nvm_id_lp_tbl lptbl; }; struct nvm_addr_format { @@ -313,10 +311,6 @@ struct nvm_dev { /* Device information */ struct nvm_geo geo; - /* lower page table */ - int lps_per_blk; - int *lptbl; - unsigned long total_secs; unsigned long *lun_map; -- cgit v1.2.3 From fae7fae4077c24dc2be720b9f21f53adea98d7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:16:03 +0100 Subject: lightnvm: make geometry structures 2.0 ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for the 2.0 revision by adapting the geometry structures to coexist with the 1.2 revision. Signed-off-by: Matias Bjørling Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 91 +++++++++++++++++++--------------------- drivers/lightnvm/pblk-core.c | 6 +-- drivers/lightnvm/pblk-init.c | 62 ++++++++++++++------------- drivers/lightnvm/pblk-recovery.c | 2 +- drivers/lightnvm/pblk-sysfs.c | 6 +-- drivers/lightnvm/pblk.h | 8 ++-- drivers/nvme/host/lightnvm.c | 79 +++++++++++++++++++++------------- include/linux/lightnvm.h | 52 ++++++++++++++--------- 8 files changed, 170 insertions(+), 136 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 52059dd0ed18..6d6d2c12ff5b 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -98,7 +98,7 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) if (clear) { for (j = 0; j < ch_map->nr_luns; j++) { int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.luns_per_chnl) + lun; + int lunid = (ch * dev->geo.nr_luns) + lun; WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); @@ -124,10 +124,10 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, struct ppa_addr *luns; int nr_luns = lun_end - lun_begin + 1; int luns_left = nr_luns; - int nr_chnls = nr_luns / dev->geo.luns_per_chnl; - int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl; - int bch = lun_begin / dev->geo.luns_per_chnl; - int blun = lun_begin % dev->geo.luns_per_chnl; + int nr_chnls = nr_luns / dev->geo.nr_luns; + int nr_chnls_mod = nr_luns % dev->geo.nr_luns; + int bch = lun_begin / dev->geo.nr_luns; + int blun = lun_begin % dev->geo.nr_luns; int lunid = 0; int lun_balanced = 1; int prev_nr_luns; @@ -148,15 +148,15 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, if (!luns) goto err_luns; - prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ? - dev->geo.luns_per_chnl : luns_left; + prev_nr_luns = (luns_left > dev->geo.nr_luns) ? + dev->geo.nr_luns : luns_left; for (i = 0; i < nr_chnls; i++) { struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; int *lun_roffs = ch_rmap->lun_offs; struct nvm_ch_map *ch_map = &dev_map->chnls[i]; int *lun_offs; - int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ? - dev->geo.luns_per_chnl : luns_left; + int luns_in_chnl = (luns_left > dev->geo.nr_luns) ? + dev->geo.nr_luns : luns_left; if (lun_balanced && prev_nr_luns != luns_in_chnl) lun_balanced = 0; @@ -193,8 +193,8 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); /* Target device only owns a portion of the physical device */ tgt_dev->geo.nr_chnls = nr_chnls; - tgt_dev->geo.nr_luns = nr_luns; - tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1; + tgt_dev->geo.all_luns = nr_luns; + tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1; tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; tgt_dev->q = dev->q; tgt_dev->map = dev_map; @@ -414,7 +414,7 @@ static int nvm_register_map(struct nvm_dev *dev) for (i = 0; i < dev->geo.nr_chnls; i++) { struct nvm_ch_map *ch_rmap; int *lun_roffs; - int luns_in_chnl = dev->geo.luns_per_chnl; + int luns_in_chnl = dev->geo.nr_luns; ch_rmap = &rmap->chnls[i]; @@ -717,10 +717,10 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) struct nvm_geo *geo = &dev->geo; int blk, offset, pl, blktype; - if (nr_blks != geo->blks_per_lun * geo->plane_mode) + if (nr_blks != geo->nr_chks * geo->plane_mode) return -EINVAL; - for (blk = 0; blk < geo->blks_per_lun; blk++) { + for (blk = 0; blk < geo->nr_chks; blk++) { offset = blk * geo->plane_mode; blktype = blks[offset]; @@ -736,7 +736,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) blks[blk] = blktype; } - return geo->blks_per_lun; + return geo->nr_chks; } EXPORT_SYMBOL(nvm_bb_tbl_fold); @@ -758,43 +758,40 @@ static int nvm_core_init(struct nvm_dev *dev) struct nvm_geo *geo = &dev->geo; int ret; + memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format)); + + if (grp->mtype != 0) { + pr_err("nvm: memory type not supported\n"); + return -EINVAL; + } + /* Whole device values */ geo->nr_chnls = grp->num_ch; - geo->luns_per_chnl = grp->num_lun; - - /* Generic device values */ - geo->pgs_per_blk = grp->num_pg; - geo->blks_per_lun = grp->num_blk; - geo->nr_planes = grp->num_pln; - geo->fpg_size = grp->fpg_sz; - geo->pfpg_size = grp->fpg_sz * grp->num_pln; + geo->nr_luns = grp->num_lun; + + /* Generic device geometry values */ + geo->ws_min = grp->ws_min; + geo->ws_opt = grp->ws_opt; + geo->ws_seq = grp->ws_seq; + geo->ws_per_chk = grp->ws_per_chk; + geo->nr_chks = grp->num_chk; geo->sec_size = grp->csecs; geo->oob_size = grp->sos; - geo->sec_per_pg = grp->fpg_sz / grp->csecs; geo->mccap = grp->mccap; - memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format)); - - geo->plane_mode = NVM_PLANE_SINGLE; geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size; - if (grp->mpos & 0x020202) - geo->plane_mode = NVM_PLANE_DOUBLE; - if (grp->mpos & 0x040404) - geo->plane_mode = NVM_PLANE_QUAD; - - if (grp->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } + geo->sec_per_chk = grp->clba; + geo->sec_per_lun = geo->sec_per_chk * geo->nr_chks; + geo->all_luns = geo->nr_luns * geo->nr_chnls; - /* calculated values */ + /* 1.2 spec device geometry values */ + geo->plane_mode = 1 << geo->ws_seq; + geo->nr_planes = geo->ws_opt / geo->ws_min; + geo->sec_per_pg = geo->ws_min; geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes; - geo->sec_per_blk = geo->sec_per_pl * geo->pgs_per_blk; - geo->sec_per_lun = geo->sec_per_blk * geo->blks_per_lun; - geo->nr_luns = geo->luns_per_chnl * geo->nr_chnls; - dev->total_secs = geo->nr_luns * geo->sec_per_lun; - dev->lun_map = kcalloc(BITS_TO_LONGS(geo->nr_luns), + dev->total_secs = geo->all_luns * geo->sec_per_lun; + dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns), sizeof(unsigned long), GFP_KERNEL); if (!dev->lun_map) return -ENOMEM; @@ -854,8 +851,8 @@ static int nvm_init(struct nvm_dev *dev) pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n", dev->name, geo->sec_per_pg, geo->nr_planes, - geo->pgs_per_blk, geo->blks_per_lun, - geo->nr_luns, geo->nr_chnls); + geo->ws_per_chk, geo->nr_chks, + geo->all_luns, geo->nr_chnls); return 0; err: pr_err("nvm: failed to initialize nvm\n"); @@ -946,12 +943,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) if (s->lun_begin == -1 && s->lun_end == -1) { s->lun_begin = 0; - s->lun_end = dev->geo.nr_luns - 1; + s->lun_end = dev->geo.all_luns - 1; } - if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.nr_luns) { + if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.all_luns) { pr_err("nvm: lun out of bound (%u:%u > %u)\n", - s->lun_begin, s->lun_end, dev->geo.nr_luns - 1); + s->lun_begin, s->lun_end, dev->geo.all_luns - 1); return -EINVAL; } diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 76516ee84e9a..0849046b2a7a 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -979,7 +979,7 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, /* Start metadata */ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); - smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns); + smeta_buf->window_wr_lun = cpu_to_le32(geo->all_luns); /* Fill metadata among lines */ if (cur) { @@ -1032,7 +1032,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, lm->sec_per_line); bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, lm->sec_per_line); - line->sec_in_line -= geo->sec_per_blk; + line->sec_in_line -= geo->sec_per_chk; if (bit >= lm->emeta_bb) nr_bb++; } @@ -1746,7 +1746,7 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int nr_luns = geo->nr_luns; + int nr_luns = geo->all_luns; int bit = -1; while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) { diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 695826a06b5d..d13bb51f0e2f 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -169,8 +169,8 @@ static int pblk_set_ppaf(struct pblk *pblk) } ppaf.ch_len = power_len; - power_len = get_count_order(geo->luns_per_chnl); - if (1 << power_len != geo->luns_per_chnl) { + power_len = get_count_order(geo->nr_luns); + if (1 << power_len != geo->nr_luns) { pr_err("pblk: supports only power-of-two LUN config.\n"); return -EINVAL; } @@ -254,7 +254,7 @@ static int pblk_core_init(struct pblk *pblk) struct nvm_geo *geo = &dev->geo; pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * - geo->nr_planes * geo->nr_luns; + geo->nr_planes * geo->all_luns; if (pblk_init_global_caches(pblk)) return -ENOMEM; @@ -270,21 +270,22 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->gen_ws_pool) goto free_page_bio_pool; - pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); + pblk->rec_pool = mempool_create_slab_pool(geo->all_luns, + pblk_rec_cache); if (!pblk->rec_pool) goto free_gen_ws_pool; - pblk->r_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns, pblk_g_rq_cache); if (!pblk->r_rq_pool) goto free_rec_pool; - pblk->e_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns, pblk_g_rq_cache); if (!pblk->e_rq_pool) goto free_r_rq_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns, pblk_w_rq_cache); if (!pblk->w_rq_pool) goto free_e_rq_pool; @@ -409,7 +410,7 @@ static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) u8 *blks; int nr_blks, ret; - nr_blks = geo->blks_per_lun * geo->plane_mode; + nr_blks = geo->nr_chks * geo->plane_mode; blks = kmalloc(nr_blks, GFP_KERNEL); if (!blks) return -ENOMEM; @@ -482,20 +483,21 @@ static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) int i, ret; /* TODO: Implement unbalanced LUN support */ - if (geo->luns_per_chnl < 0) { + if (geo->nr_luns < 0) { pr_err("pblk: unbalanced LUN config.\n"); return -EINVAL; } - pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL); + pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun), + GFP_KERNEL); if (!pblk->luns) return -ENOMEM; - for (i = 0; i < geo->nr_luns; i++) { + for (i = 0; i < geo->all_luns; i++) { /* Stripe across channels */ int ch = i % geo->nr_chnls; int lun_raw = i / geo->nr_chnls; - int lunid = lun_raw + ch * geo->luns_per_chnl; + int lunid = lun_raw + ch * geo->nr_luns; rlun = &pblk->luns[i]; rlun->bppa = luns[lunid]; @@ -590,8 +592,8 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) * on user capacity consider only provisioned blocks */ pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk; - pblk->capacity = provisioned * geo->sec_per_blk; + pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk; + pblk->capacity = provisioned * geo->sec_per_chk; atomic_set(&pblk->rl.free_blocks, nr_free_blks); } @@ -683,7 +685,7 @@ static int pblk_lines_init(struct pblk *pblk) int i, ret; pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); - max_write_ppas = pblk->min_write_pgs * geo->nr_luns; + max_write_ppas = pblk->min_write_pgs * geo->all_luns; pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? max_write_ppas : nvm_max_phys_sects(dev); pblk_set_sec_per_write(pblk, pblk->min_write_pgs); @@ -693,26 +695,26 @@ static int pblk_lines_init(struct pblk *pblk) return -EINVAL; } - div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod); + div_u64_rem(geo->sec_per_chk, pblk->min_write_pgs, &mod); if (mod) { pr_err("pblk: bad configuration of sectors/pages\n"); return -EINVAL; } - l_mg->nr_lines = geo->blks_per_lun; + l_mg->nr_lines = geo->nr_chks; l_mg->log_line = l_mg->data_line = NULL; l_mg->l_seq_nr = l_mg->d_seq_nr = 0; l_mg->nr_free_lines = 0; bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); - lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; - lm->blk_per_line = geo->nr_luns; - lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->sec_per_line = geo->sec_per_chk * geo->all_luns; + lm->blk_per_line = geo->all_luns; + lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); - lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); + lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); lm->mid_thrs = lm->sec_per_line / 2; lm->high_thrs = lm->sec_per_line / 4; - lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs; + lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs; /* Calculate necessary pages for smeta. See comment over struct * line_smeta definition @@ -742,12 +744,12 @@ add_emeta_page: goto add_emeta_page; } - lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0; + lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0; lm->min_blk_line = 1; - if (geo->nr_luns > 1) + if (geo->all_luns > 1) lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + - lm->emeta_sec[0], geo->sec_per_blk); + lm->emeta_sec[0], geo->sec_per_chk); if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", @@ -772,7 +774,7 @@ add_emeta_page: goto fail_free_bb_template; } - bb_distance = (geo->nr_luns) * geo->sec_per_pl; + bb_distance = (geo->all_luns) * geo->sec_per_pl; for (i = 0; i < lm->sec_per_line; i += bb_distance) bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); @@ -844,7 +846,7 @@ add_emeta_page: pblk_set_provision(pblk, nr_free_blks); /* Cleanup per-LUN bad block lists - managed within lines on run-time */ - for (i = 0; i < geo->nr_luns; i++) + for (i = 0; i < geo->all_luns; i++) kfree(pblk->luns[i].bb_list); return 0; @@ -858,7 +860,7 @@ fail_free_bb_template: fail_free_meta: pblk_line_meta_free(pblk); fail: - for (i = 0; i < geo->nr_luns; i++) + for (i = 0; i < geo->all_luns; i++) kfree(pblk->luns[i].bb_list); return ret; @@ -1041,13 +1043,13 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, blk_queue_write_cache(tqueue, true, false); - tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size; + tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size; tqueue->limits.discard_alignment = 0; blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", - geo->nr_luns, pblk->l_mg.nr_lines, + geo->all_luns, pblk->l_mg.nr_lines, (unsigned long long)pblk->rl.nr_secs, pblk->rwb.nr_entries); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index eadb3eb5d4dc..ceec12d26643 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -188,7 +188,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - - nr_bb * geo->sec_per_blk; + nr_bb * geo->sec_per_chk; } struct pblk_recov_alloc { diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index cd49e8875d4e..5cee2ac49c72 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -28,7 +28,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) ssize_t sz = 0; int i; - for (i = 0; i < geo->nr_luns; i++) { + for (i = 0; i < geo->all_luns; i++) { int active = 1; rlun = &pblk->luns[i]; @@ -238,7 +238,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sz = snprintf(page, PAGE_SIZE - sz, "line: nluns:%d, nblks:%d, nsecs:%d\n", - geo->nr_luns, lm->blk_per_line, lm->sec_per_line); + geo->all_luns, lm->blk_per_line, lm->sec_per_line); sz += snprintf(page + sz, PAGE_SIZE - sz, "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n", @@ -287,7 +287,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) "blk_line:%d, sec_line:%d, sec_blk:%d\n", lm->blk_per_line, lm->sec_per_line, - geo->sec_per_blk); + geo->sec_per_chk); return sz; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 59a64d461a5d..c150728c3b49 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -907,7 +907,7 @@ static inline int pblk_pad_distance(struct pblk *pblk) struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl; + return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl; } static inline int pblk_dev_ppa_to_line(struct ppa_addr p) @@ -1212,10 +1212,10 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, if (!ppa->c.is_cached && ppa->g.ch < geo->nr_chnls && - ppa->g.lun < geo->luns_per_chnl && + ppa->g.lun < geo->nr_luns && ppa->g.pl < geo->nr_planes && - ppa->g.blk < geo->blks_per_lun && - ppa->g.pg < geo->pgs_per_blk && + ppa->g.blk < geo->nr_chks && + ppa->g.pg < geo->ws_per_chk && ppa->g.sec < geo->sec_per_pg) continue; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 15bf243f6096..50ef71ee3d86 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -135,7 +135,7 @@ struct nvme_nvm_id_group { __u8 num_lun; __u8 num_pln; __u8 rsvd1; - __le16 num_blk; + __le16 num_chk; __le16 num_pg; __le16 fpg_sz; __le16 csecs; @@ -215,36 +215,57 @@ static inline void _nvme_nvm_check_size(void) static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) { struct nvme_nvm_id_group *src; - struct nvm_id_group *dst; + struct nvm_id_group *grp; + int sec_per_pg, sec_per_pl, pg_per_blk; if (nvme_nvm_id->cgrps != 1) return -EINVAL; src = &nvme_nvm_id->groups[0]; - dst = &nvm_id->grp; - - dst->mtype = src->mtype; - dst->fmtype = src->fmtype; - dst->num_ch = src->num_ch; - dst->num_lun = src->num_lun; - dst->num_pln = src->num_pln; - - dst->num_pg = le16_to_cpu(src->num_pg); - dst->num_blk = le16_to_cpu(src->num_blk); - dst->fpg_sz = le16_to_cpu(src->fpg_sz); - dst->csecs = le16_to_cpu(src->csecs); - dst->sos = le16_to_cpu(src->sos); - - dst->trdt = le32_to_cpu(src->trdt); - dst->trdm = le32_to_cpu(src->trdm); - dst->tprt = le32_to_cpu(src->tprt); - dst->tprm = le32_to_cpu(src->tprm); - dst->tbet = le32_to_cpu(src->tbet); - dst->tbem = le32_to_cpu(src->tbem); - dst->mpos = le32_to_cpu(src->mpos); - dst->mccap = le32_to_cpu(src->mccap); - - dst->cpar = le16_to_cpu(src->cpar); + grp = &nvm_id->grp; + + grp->mtype = src->mtype; + grp->fmtype = src->fmtype; + + grp->num_ch = src->num_ch; + grp->num_lun = src->num_lun; + + grp->num_chk = le16_to_cpu(src->num_chk); + grp->csecs = le16_to_cpu(src->csecs); + grp->sos = le16_to_cpu(src->sos); + + pg_per_blk = le16_to_cpu(src->num_pg); + sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; + sec_per_pl = sec_per_pg * src->num_pln; + grp->clba = sec_per_pl * pg_per_blk; + grp->ws_per_chk = pg_per_blk; + + grp->mpos = le32_to_cpu(src->mpos); + grp->cpar = le16_to_cpu(src->cpar); + grp->mccap = le32_to_cpu(src->mccap); + + grp->ws_opt = grp->ws_min = sec_per_pg; + grp->ws_seq = NVM_IO_SNGL_ACCESS; + + if (grp->mpos & 0x020202) { + grp->ws_seq = NVM_IO_DUAL_ACCESS; + grp->ws_opt <<= 1; + } else if (grp->mpos & 0x040404) { + grp->ws_seq = NVM_IO_QUAD_ACCESS; + grp->ws_opt <<= 2; + } + + grp->trdt = le32_to_cpu(src->trdt); + grp->trdm = le32_to_cpu(src->trdm); + grp->tprt = le32_to_cpu(src->tprt); + grp->tprm = le32_to_cpu(src->tprm); + grp->tbet = le32_to_cpu(src->tbet); + grp->tbem = le32_to_cpu(src->tbem); + + /* 1.2 compatibility */ + grp->num_pln = src->num_pln; + grp->num_pg = le16_to_cpu(src->num_pg); + grp->fpg_sz = le16_to_cpu(src->fpg_sz); return 0; } @@ -293,7 +314,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->blks_per_lun * geo->plane_mode; + int nr_blks = geo->nr_chks * geo->plane_mode; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -334,7 +355,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode); + memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); out: kfree(bb_tbl); return ret; @@ -773,7 +794,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev, } else if (strcmp(attr->name, "num_planes") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk); + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); } else if (strcmp(attr->name, "num_pages") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); } else if (strcmp(attr->name, "page_size") == 0) { diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index a5d8e0cbbb46..8e43bfebd38d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -159,12 +159,16 @@ struct nvm_id_group { u8 fmtype; u8 num_ch; u8 num_lun; - u8 num_pln; - u16 num_blk; - u16 num_pg; - u16 fpg_sz; + u16 num_chk; + u16 clba; u16 csecs; u16 sos; + + u16 ws_min; + u16 ws_opt; + u16 ws_seq; + u16 ws_per_chk; + u32 trdt; u32 trdm; u32 tprt; @@ -174,6 +178,11 @@ struct nvm_id_group { u32 mpos; u32 mccap; u16 cpar; + + /* 1.2 compatibility */ + u8 num_pln; + u16 num_pg; + u16 fpg_sz; }; struct nvm_addr_format { @@ -259,31 +268,36 @@ enum { NVM_BLK_ST_BAD = 0x8, /* Bad block */ }; + /* Device generic information */ struct nvm_geo { + /* generic geometry */ int nr_chnls; - int nr_luns; - int luns_per_chnl; /* -1 if channels are not symmetric */ - int nr_planes; - int sec_per_pg; /* only sectors for a single page */ - int pgs_per_blk; - int blks_per_lun; - int fpg_size; - int pfpg_size; /* size of buffer if all pages are to be read */ + int all_luns; /* across channels */ + int nr_luns; /* per channel */ + int nr_chks; /* per lun */ + int sec_size; int oob_size; int mccap; - struct nvm_addr_format ppaf; - /* Calculated/Cached values. These do not reflect the actual usable - * blocks at run-time. - */ + int sec_per_chk; + int sec_per_lun; + + int ws_min; + int ws_opt; + int ws_seq; + int ws_per_chk; + int max_rq_size; - int plane_mode; /* drive device in single, double or quad mode */ + struct nvm_addr_format ppaf; + + /* Legacy 1.2 specific geometry */ + int plane_mode; /* drive device in single, double or quad mode */ + int nr_planes; + int sec_per_pg; /* only sectors for a single page */ int sec_per_pl; /* all sectors across planes */ - int sec_per_blk; - int sec_per_lun; }; /* sub-device structure */ -- cgit v1.2.3 From e29c80e6dd70d60de5db305eecf1aecf707b02e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:04 +0100 Subject: lightnvm: refactor target type lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor target type lookup to use/not use locks explicitly instead of using a hidden parameter to make the function locking. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 6d6d2c12ff5b..5c2d0f3a830b 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -220,21 +220,25 @@ static const struct block_device_operations nvm_fops = { .owner = THIS_MODULE, }; -static struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) +static struct nvm_tgt_type *__nvm_find_target_type(const char *name) { - struct nvm_tgt_type *tmp, *tt = NULL; + struct nvm_tgt_type *tt; - if (lock) - down_write(&nvm_tgtt_lock); + list_for_each_entry(tt, &nvm_tgt_types, list) + if (!strcmp(name, tt->name)) + return tt; - list_for_each_entry(tmp, &nvm_tgt_types, list) - if (!strcmp(name, tmp->name)) { - tt = tmp; - break; - } + return NULL; +} + +static struct nvm_tgt_type *nvm_find_target_type(const char *name) +{ + struct nvm_tgt_type *tt; + + down_write(&nvm_tgtt_lock); + tt = __nvm_find_target_type(name); + up_write(&nvm_tgtt_lock); - if (lock) - up_write(&nvm_tgtt_lock); return tt; } @@ -249,7 +253,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) void *targetdata; int ret; - tt = nvm_find_target_type(create->tgttype, 1); + tt = nvm_find_target_type(create->tgttype); if (!tt) { pr_err("nvm: target type %s not found\n", create->tgttype); return -EINVAL; @@ -523,7 +527,7 @@ int nvm_register_tgt_type(struct nvm_tgt_type *tt) int ret = 0; down_write(&nvm_tgtt_lock); - if (nvm_find_target_type(tt->name, 0)) + if (__nvm_find_target_type(tt->name)) ret = -EEXIST; else list_add(&tt->list, &nvm_tgt_types); -- cgit v1.2.3 From bd77b23b40370f0f37b6457a8d2a4ed2f2ba22c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:05 +0100 Subject: lightnvm: guarantee target unique name across devs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, target unique naming is only guaranteed per device. This is ok from a lightnvm perspective, but not from a sysfs one, since groups will collide regardless of the underlying device. Check that names are unique across all lightnvm-capable devices. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 5c2d0f3a830b..d5f231c9339e 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -56,6 +56,30 @@ static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) return NULL; } +static bool nvm_target_exists(const char *name) +{ + struct nvm_dev *dev; + struct nvm_target *tgt; + bool ret = false; + + down_write(&nvm_lock); + list_for_each_entry(dev, &nvm_devices, devices) { + mutex_lock(&dev->mlock); + list_for_each_entry(tgt, &dev->targets, list) { + if (!strcmp(name, tgt->disk->disk_name)) { + ret = true; + mutex_unlock(&dev->mlock); + goto out; + } + } + mutex_unlock(&dev->mlock); + } + +out: + up_write(&nvm_lock); + return ret; +} + static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) { int i; @@ -259,14 +283,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } - mutex_lock(&dev->mlock); - t = nvm_find_target(dev, create->tgtname); - if (t) { - pr_err("nvm: target name already exists.\n"); - mutex_unlock(&dev->mlock); + if (nvm_target_exists(create->tgtname)) { + pr_err("nvm: target name already exists (%s)\n", + create->tgtname); return -EINVAL; } - mutex_unlock(&dev->mlock); ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end); if (ret) -- cgit v1.2.3 From b1bcfda10549c6f887e4360a7691021808206c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:06 +0100 Subject: lightnvm: pblk: compress and reorder helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Through time, we have generated some redundant helper functions. Refactor them to eliminate redundant and unnecessary code. Also, reorder them to improve readability Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 24 ++++---- drivers/lightnvm/pblk-rb.c | 2 +- drivers/lightnvm/pblk-read.c | 4 +- drivers/lightnvm/pblk-recovery.c | 20 +++---- drivers/lightnvm/pblk.h | 119 +++++++++++++++------------------------ 5 files changed, 71 insertions(+), 98 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0849046b2a7a..54d0cef7116e 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -32,8 +32,8 @@ static void pblk_line_mark_bb(struct work_struct *work) struct pblk_line *line; int pos; - line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; - pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); + line = &pblk->lines[pblk_ppa_to_line(*ppa)]; + pos = pblk_ppa_to_pos(&dev->geo, *ppa); pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", line->id, pos); @@ -48,7 +48,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_dev_ppa_to_pos(geo, *ppa); + int pos = pblk_ppa_to_pos(geo, *ppa); pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos); atomic_long_inc(&pblk->erase_failed); @@ -66,7 +66,7 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) { struct pblk_line *line; - line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)]; + line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; atomic_dec(&line->left_seblks); if (rqd->error) { @@ -144,7 +144,7 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa) BUG_ON(pblk_ppa_empty(ppa)); #endif - line_id = pblk_tgt_ppa_to_line(ppa); + line_id = pblk_ppa_to_line(ppa); line = &pblk->lines[line_id]; paddr = pblk_dev_ppa_to_line_addr(pblk, ppa); @@ -650,7 +650,7 @@ next_rq: } else { for (i = 0; i < rqd.nr_ppas; ) { struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); - int pos = pblk_dev_ppa_to_pos(geo, ppa); + int pos = pblk_ppa_to_pos(geo, ppa); int read_type = PBLK_READ_RANDOM; if (pblk_io_aligned(pblk, rq_ppas)) @@ -668,7 +668,7 @@ next_rq: } ppa = addr_to_gen_ppa(pblk, paddr, id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); } if (pblk_boundary_paddr_checks(pblk, paddr + min)) { @@ -854,8 +854,8 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) struct nvm_geo *geo = &dev->geo; pr_err("pblk: could not sync erase line:%d,blk:%d\n", - pblk_dev_ppa_to_line(ppa), - pblk_dev_ppa_to_pos(geo, ppa)); + pblk_ppa_to_line(ppa), + pblk_ppa_to_pos(geo, ppa)); rqd.error = ret; goto out; @@ -1561,8 +1561,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) struct nvm_geo *geo = &dev->geo; pr_err("pblk: could not async erase line:%d,blk:%d\n", - pblk_dev_ppa_to_line(ppa), - pblk_dev_ppa_to_pos(geo, ppa)); + pblk_ppa_to_line(ppa), + pblk_ppa_to_pos(geo, ppa)); } return err; @@ -1884,7 +1884,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, /* If the L2P entry maps to a line, the reference is valid */ if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { - int line_id = pblk_dev_ppa_to_line(ppa); + int line_id = pblk_ppa_to_line(ppa); struct pblk_line *line = &pblk->lines[line_id]; kref_get(&line->ref); diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index b8f78e401482..62db40845bfd 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -226,7 +226,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, entry->cacheline); - line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)]; + line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)]; kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index ca79d8fb3e60..0fe0c040f359 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -141,7 +141,7 @@ static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) struct ppa_addr ppa = ppa_list[i]; struct pblk_line *line; - line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + line = &pblk->lines[pblk_ppa_to_line(ppa)]; kref_put(&line->ref, pblk_line_put_wq); } } @@ -270,7 +270,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { - int line_id = pblk_dev_ppa_to_line(rqd->ppa_list[i]); + int line_id = pblk_ppa_to_line(rqd->ppa_list[i]); struct pblk_line *line = &pblk->lines[line_id]; kref_put(&line->ref, pblk_line_put); diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index ceec12d26643..1b272ae8a315 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -149,7 +149,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) struct ppa_addr ppa; int pos; - ppa = addr_to_pblk_ppa(pblk, i, line->id); + ppa = addr_to_gen_ppa(pblk, i, line->id); pos = pblk_ppa_to_pos(geo, ppa); /* Do not update bad blocks */ @@ -263,12 +263,12 @@ next_read_rq: int pos; ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); while (test_bit(pos, line->blk_bitmap)) { r_ptr_int += pblk->min_write_pgs; ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); } for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) @@ -411,12 +411,12 @@ next_pad_rq: int pos; w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); - ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); pos = pblk_ppa_to_pos(geo, ppa); while (test_bit(pos, line->blk_bitmap)) { w_ptr += pblk->min_write_pgs; - ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); + ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); pos = pblk_ppa_to_pos(geo, ppa); } @@ -541,12 +541,12 @@ next_rq: w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); while (test_bit(pos, line->blk_bitmap)) { w_ptr += pblk->min_write_pgs; ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); } for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) @@ -672,12 +672,12 @@ next_rq: paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); while (test_bit(pos, line->blk_bitmap)) { paddr += pblk->min_write_pgs; ppa = addr_to_gen_ppa(pblk, paddr, line->id); - pos = pblk_dev_ppa_to_pos(geo, ppa); + pos = pblk_ppa_to_pos(geo, ppa); } for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) @@ -817,7 +817,7 @@ static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) while (emeta_secs) { emeta_start--; - ppa = addr_to_pblk_ppa(pblk, emeta_start, line->id); + ppa = addr_to_gen_ppa(pblk, emeta_start, line->id); pos = pblk_ppa_to_pos(geo, ppa); if (!test_bit(pos, line->blk_bitmap)) emeta_secs--; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index c150728c3b49..d68a94dca731 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -910,25 +910,44 @@ static inline int pblk_pad_distance(struct pblk *pblk) return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl; } -static inline int pblk_dev_ppa_to_line(struct ppa_addr p) +static inline int pblk_ppa_to_line(struct ppa_addr p) { return p.g.blk; } -static inline int pblk_tgt_ppa_to_line(struct ppa_addr p) +static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) { - return p.g.blk; + return p.g.lun * geo->nr_chnls + p.g.ch; } -static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, + u64 line_id) { - return p.g.lun * geo->nr_chnls + p.g.ch; + struct ppa_addr ppa; + + ppa.ppa = 0; + ppa.g.blk = line_id; + ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; + ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; + ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; + ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; + ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; + + return ppa; } -/* A block within a line corresponds to the lun */ -static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) +static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, + struct ppa_addr p) { - return p.g.lun * geo->nr_chnls + p.g.ch; + u64 paddr; + + paddr = (u64)p.g.pg << pblk->ppaf.pg_offset; + paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; + paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; + paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; + paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + + return paddr; } static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) @@ -960,24 +979,6 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) return ppa64; } -static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, - sector_t lba) -{ - struct ppa_addr ppa; - - if (pblk->ppaf_bitsize < 32) { - u32 *map = (u32 *)pblk->trans_map; - - ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); - } else { - struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - - ppa = map[lba]; - } - - return ppa; -} - static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) { u32 ppa32 = 0; @@ -999,33 +1000,36 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) return ppa32; } -static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, - struct ppa_addr ppa) +static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, + sector_t lba) { + struct ppa_addr ppa; + if (pblk->ppaf_bitsize < 32) { u32 *map = (u32 *)pblk->trans_map; - map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); } else { - u64 *map = (u64 *)pblk->trans_map; + struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map; - map[lba] = ppa.ppa; + ppa = map[lba]; } + + return ppa; } -static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, - struct ppa_addr p) +static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa) { - u64 paddr; + if (pblk->ppaf_bitsize < 32) { + u32 *map = (u32 *)pblk->trans_map; - paddr = 0; - paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset; - paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; - paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; - paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; - paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); + } else { + u64 *map = (u64 *)pblk->trans_map; - return paddr; + map[lba] = ppa.ppa; + } } static inline int pblk_ppa_empty(struct ppa_addr ppa_addr) @@ -1066,32 +1070,6 @@ static inline struct ppa_addr pblk_cacheline_to_addr(int addr) return p; } -static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct ppa_addr ppa; - - ppa.ppa = 0; - ppa.g.blk = line_id; - ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; - ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; - ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; - ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; - ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; - - return ppa; -} - -static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr, - u64 line_id) -{ - struct ppa_addr ppa; - - ppa = addr_to_gen_ppa(pblk, paddr, line_id); - - return ppa; -} - static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, struct line_header *header) { @@ -1245,7 +1223,7 @@ static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) for (i = 0; i < rqd->nr_ppas; i++) { ppa = ppa_list[i]; - line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + line = &pblk->lines[pblk_ppa_to_line(ppa)]; spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_OPEN) { @@ -1288,11 +1266,6 @@ static inline unsigned int pblk_get_secs(struct bio *bio) return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE; } -static inline sector_t pblk_get_sector(sector_t lba) -{ - return lba * NR_PHY_IN_LOG; -} - static inline void pblk_setup_uuid(struct pblk *pblk) { uuid_le uuid; -- cgit v1.2.3 From d6d3ec2a3be37ca5309013b46ede37b2aa09ced1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:07 +0100 Subject: lightnvm: pblk: remove pblk_for_each_lun helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index d68a94dca731..1dbb0bf4d9a3 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -51,10 +51,6 @@ #define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR) -#define pblk_for_each_lun(pblk, rlun, i) \ - for ((i) = 0, rlun = &(pblk)->luns[0]; \ - (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) - /* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) -- cgit v1.2.3 From 06bc072b3fb1d08898b96118ab428ea33a8da0a6 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 5 Jan 2018 14:16:08 +0100 Subject: lightnvm: pblk: refactor emeta consistency check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently pblk_recov_get_lba list does two separate things: it checks the consistency of the emeta and extracts the lba list. This patch separates the consistency check to make the code easier to read and to prepare for version checks of the line emeta persistent data format version. Signed-off-by: Hans Holmberg Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 9 ++++++++- drivers/lightnvm/pblk-recovery.c | 15 ++++++++++----- drivers/lightnvm/pblk.h | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 9c8e114c8a54..cec9a56dda14 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -169,7 +169,14 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) * the line untouched. TODO: Implement a recovery routine that scans and * moves all sectors on the line. */ - lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); + + ret = pblk_recov_check_emeta(pblk, emeta_buf); + if (ret) { + pr_err("pblk: inconsistent emeta (line %d)\n", line->id); + goto fail_free_emeta; + } + + lba_list = emeta_to_lbas(pblk, emeta_buf); if (!lba_list) { pr_err("pblk: could not interpret emeta (line %d)\n", line->id); goto fail_free_emeta; diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 1b272ae8a315..39a2e193edcc 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -111,18 +111,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, return 0; } -__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf) +int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf) { u32 crc; crc = pblk_calc_emeta_crc(pblk, emeta_buf); if (le32_to_cpu(emeta_buf->crc) != crc) - return NULL; + return 1; if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) - return NULL; + return 1; - return emeta_to_lbas(pblk, emeta_buf); + return 0; } static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) @@ -137,7 +137,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) u64 nr_valid_lbas, nr_lbas = 0; u64 i; - lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); + lba_list = emeta_to_lbas(pblk, emeta_buf); if (!lba_list) return 1; @@ -938,6 +938,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) goto next; } + if (pblk_recov_check_emeta(pblk, line->emeta->buf)) { + pblk_recov_l2p_from_oob(pblk, line); + goto next; + } + if (pblk_recov_l2p_from_emeta(pblk, line)) pblk_recov_l2p_from_oob(pblk, line); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 1dbb0bf4d9a3..8851b18bb099 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -808,7 +808,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); void pblk_submit_rec(struct work_struct *work); struct pblk_line *pblk_recov_l2p(struct pblk *pblk); int pblk_recov_pad(struct pblk *pblk); -__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); +int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta); int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, struct pblk_rec_ctx *recovery, u64 *comp_bits, unsigned int comp); -- cgit v1.2.3 From 8154d296d904a6627d69878ab217ef6928335c2d Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 5 Jan 2018 14:16:09 +0100 Subject: lightnvm: pblk: rename sync_point to flush_point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sync point is a really confusing name for keeping track of the last entry that needs to be flushed so change the name to to flush_point instead. Signed-off-by: Hans Holmberg Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 61 ++++++++++++++++++++++--------------------- drivers/lightnvm/pblk-write.c | 2 +- drivers/lightnvm/pblk.h | 6 ++--- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 62db40845bfd..941842e321ea 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -54,7 +54,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, rb->seg_size = (1 << power_seg_sz); rb->nr_entries = (1 << power_size); rb->mem = rb->subm = rb->sync = rb->l2p_update = 0; - rb->sync_point = EMPTY_ENTRY; + rb->flush_point = EMPTY_ENTRY; spin_lock_init(&rb->w_lock); spin_lock_init(&rb->s_lock); @@ -112,7 +112,7 @@ int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base, up_write(&pblk_rb_lock); #ifdef CONFIG_NVM_DEBUG - atomic_set(&rb->inflight_sync_point, 0); + atomic_set(&rb->inflight_flush_point, 0); #endif /* @@ -349,26 +349,26 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, smp_store_release(&entry->w_ctx.flags, flags); } -static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, +static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, unsigned int pos) { struct pblk_rb_entry *entry; - unsigned int subm, sync_point; + unsigned int subm, flush_point; subm = READ_ONCE(rb->subm); #ifdef CONFIG_NVM_DEBUG - atomic_inc(&rb->inflight_sync_point); + atomic_inc(&rb->inflight_flush_point); #endif if (pos == subm) return 0; - sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); - entry = &rb->entries[sync_point]; + flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); + entry = &rb->entries[flush_point]; - /* Protect syncs */ - smp_store_release(&rb->sync_point, sync_point); + /* Protect flush points */ + smp_store_release(&rb->flush_point, flush_point); if (!bio) return 0; @@ -416,7 +416,7 @@ void pblk_rb_flush(struct pblk_rb *rb) struct pblk *pblk = container_of(rb, struct pblk, rwb); unsigned int mem = READ_ONCE(rb->mem); - if (pblk_rb_sync_point_set(rb, NULL, mem)) + if (pblk_rb_flush_point_set(rb, NULL, mem)) return; pblk_write_should_kick(pblk); @@ -440,7 +440,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->nr_flush); #endif - if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem)) + if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem)) *io_ret = NVM_IO_OK; } @@ -607,17 +607,18 @@ try: } if (flags & PBLK_FLUSH_ENTRY) { - unsigned int sync_point; + unsigned int flush_point; - sync_point = READ_ONCE(rb->sync_point); - if (sync_point == pos) { - /* Protect syncs */ - smp_store_release(&rb->sync_point, EMPTY_ENTRY); + flush_point = READ_ONCE(rb->flush_point); + if (flush_point == pos) { + /* Protect flush points */ + smp_store_release(&rb->flush_point, + EMPTY_ENTRY); } flags &= ~PBLK_FLUSH_ENTRY; #ifdef CONFIG_NVM_DEBUG - atomic_dec(&rb->inflight_sync_point); + atomic_dec(&rb->inflight_flush_point); #endif } @@ -746,20 +747,20 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) return sync; } -unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb) +unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) { - unsigned int subm, sync_point; + unsigned int subm, flush_point; unsigned int count; - /* Protect syncs */ - sync_point = smp_load_acquire(&rb->sync_point); - if (sync_point == EMPTY_ENTRY) + /* Protect flush points */ + flush_point = smp_load_acquire(&rb->flush_point); + if (flush_point == EMPTY_ENTRY) return 0; subm = READ_ONCE(rb->subm); /* The sync point itself counts as a sector to sync */ - count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1; + count = pblk_rb_ring_count(flush_point, subm, rb->nr_entries) + 1; return count; } @@ -801,7 +802,7 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb) if ((rb->mem == rb->subm) && (rb->subm == rb->sync) && (rb->sync == rb->l2p_update) && - (rb->sync_point == EMPTY_ENTRY)) { + (rb->flush_point == EMPTY_ENTRY)) { goto out; } @@ -848,7 +849,7 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) queued_entries++; spin_unlock_irq(&rb->s_lock); - if (rb->sync_point != EMPTY_ENTRY) + if (rb->flush_point != EMPTY_ENTRY) offset = scnprintf(buf, PAGE_SIZE, "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n", rb->nr_entries, @@ -857,14 +858,14 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) rb->sync, rb->l2p_update, #ifdef CONFIG_NVM_DEBUG - atomic_read(&rb->inflight_sync_point), + atomic_read(&rb->inflight_flush_point), #else 0, #endif - rb->sync_point, + rb->flush_point, pblk_rb_read_count(rb), pblk_rb_space(rb), - pblk_rb_sync_point_count(rb), + pblk_rb_flush_point_count(rb), queued_entries); else offset = scnprintf(buf, PAGE_SIZE, @@ -875,13 +876,13 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf) rb->sync, rb->l2p_update, #ifdef CONFIG_NVM_DEBUG - atomic_read(&rb->inflight_sync_point), + atomic_read(&rb->inflight_flush_point), #else 0, #endif pblk_rb_read_count(rb), pblk_rb_space(rb), - pblk_rb_sync_point_count(rb), + pblk_rb_flush_point_count(rb), queued_entries); return offset; diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 6c30b7a6e559..018af87cadee 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -508,7 +508,7 @@ static int pblk_submit_write(struct pblk *pblk) if (!secs_avail) return 1; - secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb); + secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb); if (!secs_to_flush && secs_avail < pblk->min_write_pgs) return 1; diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 8851b18bb099..5f7cd6faa8c7 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -166,7 +166,7 @@ struct pblk_rb { * the last submitted entry that has * been successfully persisted to media */ - unsigned int sync_point; /* Sync point - last entry that must be + unsigned int flush_point; /* Sync point - last entry that must be * flushed to the media. Used with * REQ_FLUSH and REQ_FUA */ @@ -189,7 +189,7 @@ struct pblk_rb { spinlock_t s_lock; /* Sync lock */ #ifdef CONFIG_NVM_DEBUG - atomic_t inflight_sync_point; /* Not served REQ_FLUSH | REQ_FUA */ + atomic_t inflight_flush_point; /* Not served REQ_FLUSH | REQ_FUA */ #endif }; @@ -687,7 +687,7 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries); struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb, struct ppa_addr *ppa); void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags); -unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); +unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb); unsigned int pblk_rb_read_count(struct pblk_rb *rb); unsigned int pblk_rb_sync_count(struct pblk_rb *rb); -- cgit v1.2.3 From 533657c190e5a94e585e08d1c4bdd2295c76391a Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 5 Jan 2018 14:16:10 +0100 Subject: lightnvm: pblk: clear flush point on completed writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move completion of syncs and clearing of flush points to the write completion path - this ensures that the data has been comitted to the media before completing bios containing syncs. Signed-off-by: Hans Holmberg Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 58 +++++++++++++++++++++---------------------- drivers/lightnvm/pblk-write.c | 17 ++++++++++++- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 941842e321ea..672ef8c47892 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -353,17 +353,17 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, unsigned int pos) { struct pblk_rb_entry *entry; - unsigned int subm, flush_point; + unsigned int sync, flush_point; - subm = READ_ONCE(rb->subm); + sync = READ_ONCE(rb->sync); + + if (pos == sync) + return 0; #ifdef CONFIG_NVM_DEBUG atomic_inc(&rb->inflight_flush_point); #endif - if (pos == subm) - return 0; - flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); entry = &rb->entries[flush_point]; @@ -606,22 +606,6 @@ try: return NVM_IO_ERR; } - if (flags & PBLK_FLUSH_ENTRY) { - unsigned int flush_point; - - flush_point = READ_ONCE(rb->flush_point); - if (flush_point == pos) { - /* Protect flush points */ - smp_store_release(&rb->flush_point, - EMPTY_ENTRY); - } - - flags &= ~PBLK_FLUSH_ENTRY; -#ifdef CONFIG_NVM_DEBUG - atomic_dec(&rb->inflight_flush_point); -#endif - } - flags &= ~PBLK_WRITTEN_DATA; flags |= PBLK_SUBMITTED_ENTRY; @@ -731,15 +715,24 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags) unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) { - unsigned int sync; - unsigned int i; - + unsigned int sync, flush_point; lockdep_assert_held(&rb->s_lock); sync = READ_ONCE(rb->sync); + flush_point = READ_ONCE(rb->flush_point); - for (i = 0; i < nr_entries; i++) - sync = (sync + 1) & (rb->nr_entries - 1); + if (flush_point != EMPTY_ENTRY) { + unsigned int secs_to_flush; + + secs_to_flush = pblk_rb_ring_count(flush_point, sync, + rb->nr_entries); + if (secs_to_flush < nr_entries) { + /* Protect flush points */ + smp_store_release(&rb->flush_point, EMPTY_ENTRY); + } + } + + sync = (sync + nr_entries) & (rb->nr_entries - 1); /* Protect from counts */ smp_store_release(&rb->sync, sync); @@ -747,22 +740,27 @@ unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries) return sync; } +/* Calculate how many sectors to submit up to the current flush point. */ unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb) { - unsigned int subm, flush_point; - unsigned int count; + unsigned int subm, sync, flush_point; + unsigned int submitted, to_flush; /* Protect flush points */ flush_point = smp_load_acquire(&rb->flush_point); if (flush_point == EMPTY_ENTRY) return 0; + /* Protect syncs */ + sync = smp_load_acquire(&rb->sync); + subm = READ_ONCE(rb->subm); + submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries); /* The sync point itself counts as a sector to sync */ - count = pblk_rb_ring_count(flush_point, subm, rb->nr_entries) + 1; + to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1; - return count; + return (submitted < to_flush) ? (to_flush - submitted) : 0; } /* diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 018af87cadee..aae86ed60b98 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -21,13 +21,28 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) { struct bio *original_bio; + struct pblk_rb *rwb = &pblk->rwb; unsigned long ret; int i; for (i = 0; i < c_ctx->nr_valid; i++) { struct pblk_w_ctx *w_ctx; + int pos = c_ctx->sentry + i; + int flags; + + w_ctx = pblk_rb_w_ctx(rwb, pos); + flags = READ_ONCE(w_ctx->flags); + + if (flags & PBLK_FLUSH_ENTRY) { + flags &= ~PBLK_FLUSH_ENTRY; + /* Release flags on context. Protect from writes */ + smp_store_release(&w_ctx->flags, flags); + +#ifdef CONFIG_NVM_DEBUG + atomic_dec(&rwb->inflight_flush_point); +#endif + } - w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i); while ((original_bio = bio_list_pop(&w_ctx->bios))) bio_endio(original_bio); } -- cgit v1.2.3 From b36bbf9d4f731269b4fe88e1060af1e8c28b7972 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 5 Jan 2018 14:16:11 +0100 Subject: lightnvm: pblk: prevent premature sync point resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unless we protect flush pointer updates with a lock, we risk resetting new flush points before we've synced all sectors up to that point. This patch protects new flush points with the same spin lock that is being held when advancing the sync pointer and resetting completed flush points. Signed-off-by: Hans Holmberg Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-rb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 672ef8c47892..ec8fc314646b 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -367,17 +367,17 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); entry = &rb->entries[flush_point]; + pblk_rb_sync_init(rb, NULL); + /* Protect flush points */ smp_store_release(&rb->flush_point, flush_point); - if (!bio) - return 0; + if (bio) + bio_list_add(&entry->w_ctx.bios, bio); - spin_lock_irq(&rb->s_lock); - bio_list_add(&entry->w_ctx.bios, bio); - spin_unlock_irq(&rb->s_lock); + pblk_rb_sync_end(rb, NULL); - return 1; + return bio ? 1 : 0; } static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries, -- cgit v1.2.3 From aed49e195af6b621d59b3e7f85aa5c5a71e90050 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 5 Jan 2018 14:16:12 +0100 Subject: lightnvm: pblk: remove pblk_gc_stop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk_gc_stop just sets pblk->gc->gc_active to zero, ignoring the flush parameter. This is plain confusing, so remove the function and set the gc active flag at the call points instead. Signed-off-by: Hans Holmberg Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-gc.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index cec9a56dda14..3d899383666e 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -526,22 +526,12 @@ void pblk_gc_should_start(struct pblk *pblk) } } -/* - * If flush_wq == 1 then no lock should be held by the caller since - * flush_workqueue can sleep - */ -static void pblk_gc_stop(struct pblk *pblk, int flush_wq) -{ - pblk->gc.gc_active = 0; - pr_debug("pblk: gc stop\n"); -} - void pblk_gc_should_stop(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; if (gc->gc_active && !gc->gc_forced) - pblk_gc_stop(pblk, 0); + gc->gc_active = 0; } void pblk_gc_should_kick(struct pblk *pblk) @@ -667,7 +657,7 @@ void pblk_gc_exit(struct pblk *pblk) gc->gc_enabled = 0; del_timer_sync(&gc->gc_timer); - pblk_gc_stop(pblk, 1); + gc->gc_active = 0; if (gc->gc_ts) kthread_stop(gc->gc_ts); -- cgit v1.2.3 From a7689938ef4c8678ff78c44d11e4b0b897244714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:13 +0100 Subject: lightnvm: pblk: use exact free block counter in RL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, pblk's rate-limiter has used a heuristic to reserve space for GC I/O given that the over-provision area was fixed. In preparation for allowing to define the over-provision area on target creation, define a dedicated free_block counter in the rate-limiter to track the number of blocks being used for user data. Signed-off-by: Javier González Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 19 +++++--------- drivers/lightnvm/pblk-init.c | 18 +++++++++++--- drivers/lightnvm/pblk-recovery.c | 4 +-- drivers/lightnvm/pblk-rl.c | 54 +++++++++++++++++++++++++++------------- drivers/lightnvm/pblk-sysfs.c | 9 ++++--- drivers/lightnvm/pblk.h | 15 ++++++----- 6 files changed, 73 insertions(+), 46 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 54d0cef7116e..5ec7ad68ff38 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1145,7 +1145,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) } spin_unlock(&l_mg->free_lock); - pblk_rl_free_lines_dec(&pblk->rl, line); + pblk_rl_free_lines_dec(&pblk->rl, line, true); if (!pblk_line_init_bb(pblk, line, 0)) { list_add(&line->list, &l_mg->free_list); @@ -1233,7 +1233,7 @@ retry: l_mg->data_line = retry_line; spin_unlock(&l_mg->free_lock); - pblk_rl_free_lines_dec(&pblk->rl, retry_line); + pblk_rl_free_lines_dec(&pblk->rl, line, false); if (pblk_line_erase(pblk, retry_line)) goto retry; @@ -1252,7 +1252,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line; - int is_next = 0; spin_lock(&l_mg->free_lock); line = pblk_line_get(pblk); @@ -1280,7 +1279,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) } else { l_mg->data_next->seq_nr = l_mg->d_seq_nr++; l_mg->data_next->type = PBLK_LINETYPE_DATA; - is_next = 1; } spin_unlock(&l_mg->free_lock); @@ -1290,10 +1288,6 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) return NULL; } - pblk_rl_free_lines_dec(&pblk->rl, line); - if (is_next) - pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); - retry_setup: if (!pblk_line_init_metadata(pblk, line, NULL)) { line = pblk_line_retry(pblk, line); @@ -1311,6 +1305,8 @@ retry_setup: goto retry_setup; } + pblk_rl_free_lines_dec(&pblk->rl, line, true); + return line; } @@ -1395,7 +1391,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *cur, *new = NULL; unsigned int left_seblks; - int is_next = 0; cur = l_mg->data_line; new = l_mg->data_next; @@ -1444,6 +1439,8 @@ retry_setup: goto retry_setup; } + pblk_rl_free_lines_dec(&pblk->rl, new, true); + /* Allocate next line for preparation */ spin_lock(&l_mg->free_lock); l_mg->data_next = pblk_line_get(pblk); @@ -1457,13 +1454,9 @@ retry_setup: } else { l_mg->data_next->seq_nr = l_mg->d_seq_nr++; l_mg->data_next->type = PBLK_LINETYPE_DATA; - is_next = 1; } spin_unlock(&l_mg->free_lock); - if (is_next) - pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); - out: return new; } diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index d13bb51f0e2f..c8a718249e26 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -579,22 +579,34 @@ static unsigned int calc_emeta_len(struct pblk *pblk) static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) { struct nvm_tgt_dev *dev = pblk->dev; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; struct nvm_geo *geo = &dev->geo; sector_t provisioned; + int sec_meta, blk_meta; - pblk->over_pct = 20; + pblk->op = 20; provisioned = nr_free_blks; - provisioned *= (100 - pblk->over_pct); + provisioned *= (100 - pblk->op); sector_div(provisioned, 100); + pblk->op_blks = nr_free_blks - provisioned; + /* Internally pblk manages all free blocks, but all calculations based * on user capacity consider only provisioned blocks */ pblk->rl.total_blocks = nr_free_blks; pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk; - pblk->capacity = provisioned * geo->sec_per_chk; + + /* Consider sectors used for metadata */ + sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; + blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); + + pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk; + atomic_set(&pblk->rl.free_blocks, nr_free_blks); + atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } static int pblk_lines_alloc_metadata(struct pblk *pblk) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 39a2e193edcc..fd3803656e68 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -989,10 +989,8 @@ next: } spin_unlock(&l_mg->free_lock); - if (is_next) { + if (is_next) pblk_line_erase(pblk, l_mg->data_next); - pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); - } out: if (found_lines != recovered_lines) diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index dacc71922260..0d457b162f23 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -89,17 +89,15 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) return atomic_read(&rl->free_blocks); } -/* - * We check for (i) the number of free blocks in the current LUN and (ii) the - * total number of free blocks in the pblk instance. This is to even out the - * number of free blocks on each LUN when GC kicks in. - * - * Only the total number of free blocks is used to configure the rate limiter. - */ -void pblk_rl_update_rates(struct pblk_rl *rl) +unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl) +{ + return atomic_read(&rl->free_user_blocks); +} + +static void __pblk_rl_update_rates(struct pblk_rl *rl, + unsigned long free_blocks) { struct pblk *pblk = container_of(rl, struct pblk, rl); - unsigned long free_blocks = pblk_rl_nr_free_blks(rl); int max = rl->rb_budget; if (free_blocks >= rl->high) { @@ -132,20 +130,37 @@ void pblk_rl_update_rates(struct pblk_rl *rl) pblk_gc_should_stop(pblk); } +void pblk_rl_update_rates(struct pblk_rl *rl) +{ + __pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl)); +} + void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) { int blk_in_line = atomic_read(&line->blk_in_line); + int free_blocks; atomic_add(blk_in_line, &rl->free_blocks); - pblk_rl_update_rates(rl); + free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks); + + __pblk_rl_update_rates(rl, free_blocks); } -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, + bool used) { int blk_in_line = atomic_read(&line->blk_in_line); + int free_blocks; atomic_sub(blk_in_line, &rl->free_blocks); - pblk_rl_update_rates(rl); + + if (used) + free_blocks = atomic_sub_return(blk_in_line, + &rl->free_user_blocks); + else + free_blocks = atomic_read(&rl->free_user_blocks); + + __pblk_rl_update_rates(rl, free_blocks); } int pblk_rl_high_thrs(struct pblk_rl *rl) @@ -174,16 +189,21 @@ void pblk_rl_free(struct pblk_rl *rl) void pblk_rl_init(struct pblk_rl *rl, int budget) { struct pblk *pblk = container_of(rl, struct pblk, rl); + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE; + int sec_meta, blk_meta; + unsigned int rb_windows; - rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; - rl->high_pw = get_count_order(rl->high); + /* Consider sectors used for metadata */ + sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; + blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); - rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; - if (rl->low < min_blocks) - rl->low = min_blocks; + rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; + rl->high_pw = get_count_order(rl->high); rl->rsv_blocks = min_blocks; diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 5cee2ac49c72..620bab853579 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -49,11 +49,12 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) { - int free_blocks, total_blocks; + int free_blocks, free_user_blocks, total_blocks; int rb_user_max, rb_user_cnt; int rb_gc_max, rb_gc_cnt, rb_budget, rb_state; - free_blocks = atomic_read(&pblk->rl.free_blocks); + free_blocks = pblk_rl_nr_free_blks(&pblk->rl); + free_user_blocks = pblk_rl_nr_user_free_blks(&pblk->rl); rb_user_max = pblk->rl.rb_user_max; rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); rb_gc_max = pblk->rl.rb_gc_max; @@ -64,16 +65,16 @@ static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) total_blocks = pblk->rl.total_blocks; return snprintf(page, PAGE_SIZE, - "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", + "u:%u/%u,gc:%u/%u(%u)(stop:<%u,full:>%u,free:%d/%d/%d)-%d\n", rb_user_cnt, rb_user_max, rb_gc_cnt, rb_gc_max, rb_state, rb_budget, - pblk->rl.low, pblk->rl.high, free_blocks, + free_user_blocks, total_blocks, READ_ONCE(pblk->rl.rb_user_active)); } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 5f7cd6faa8c7..1e719d4181ce 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -252,9 +252,6 @@ struct pblk_rl { unsigned int high; /* Upper threshold for rate limiter (free run - * user I/O rate limiter */ - unsigned int low; /* Lower threshold for rate limiter (user I/O - * rate limiter - stall) - */ unsigned int high_pw; /* High rounded up as a power of 2 */ #define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ @@ -288,7 +285,9 @@ struct pblk_rl { unsigned long long nr_secs; unsigned long total_blocks; - atomic_t free_blocks; + + atomic_t free_blocks; /* Total number of free blocks (+ OP) */ + atomic_t free_user_blocks; /* Number of user free blocks (no OP) */ }; #define PBLK_LINE_EMPTY (~0U) @@ -579,7 +578,9 @@ struct pblk { */ sector_t capacity; /* Device capacity when bad blocks are subtracted */ - int over_pct; /* Percentage of device used for over-provisioning */ + + int op; /* Percentage of device used for over-provisioning */ + int op_blks; /* Number of blocks used for over-provisioning */ /* pblk provisioning values. Used by rate limiter */ struct pblk_rl rl; @@ -839,6 +840,7 @@ void pblk_rl_free(struct pblk_rl *rl); void pblk_rl_update_rates(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); +unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); @@ -847,7 +849,8 @@ void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); int pblk_rl_max_io(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); +void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line, + bool used); int pblk_rl_is_limit(struct pblk_rl *rl); /* -- cgit v1.2.3 From e53927393b9987b7c986b6364c27111077f0ea3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:14 +0100 Subject: lightnvm: set target over-provision on create ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow to set the over-provision percentage on target creation. In case that the value is not provided, fall back to the default value set by the target. In pblk, set the default OP to 11% of the total size of the device Signed-off-by: Javier González Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 106 +++++++++++++++++++++++++++++++++--------- drivers/lightnvm/pblk-init.c | 5 +- drivers/lightnvm/pblk.h | 2 + include/linux/lightnvm.h | 6 +++ include/uapi/linux/lightnvm.h | 9 ++++ 5 files changed, 104 insertions(+), 24 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index d5f231c9339e..dcc9e621e651 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -140,7 +140,8 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) } static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, - int lun_begin, int lun_end) + u16 lun_begin, u16 lun_end, + u16 op) { struct nvm_tgt_dev *tgt_dev = NULL; struct nvm_dev_map *dev_rmap = dev->rmap; @@ -219,6 +220,7 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, tgt_dev->geo.nr_chnls = nr_chnls; tgt_dev->geo.all_luns = nr_luns; tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1; + tgt_dev->geo.op = op; tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; tgt_dev->q = dev->q; tgt_dev->map = dev_map; @@ -266,9 +268,57 @@ static struct nvm_tgt_type *nvm_find_target_type(const char *name) return tt; } +static int nvm_config_check_luns(struct nvm_geo *geo, int lun_begin, + int lun_end) +{ + if (lun_begin > lun_end || lun_end >= geo->all_luns) { + pr_err("nvm: lun out of bound (%u:%u > %u)\n", + lun_begin, lun_end, geo->all_luns - 1); + return -EINVAL; + } + + return 0; +} + +static int __nvm_config_simple(struct nvm_dev *dev, + struct nvm_ioctl_create_simple *s) +{ + struct nvm_geo *geo = &dev->geo; + + if (s->lun_begin == -1 && s->lun_end == -1) { + s->lun_begin = 0; + s->lun_end = geo->all_luns - 1; + } + + return nvm_config_check_luns(geo, s->lun_begin, s->lun_end); +} + +static int __nvm_config_extended(struct nvm_dev *dev, + struct nvm_ioctl_create_extended *e) +{ + struct nvm_geo *geo = &dev->geo; + + if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { + e->lun_begin = 0; + e->lun_end = dev->geo.all_luns - 1; + } + + /* op not set falls into target's default */ + if (e->op == 0xFFFF) + e->op = NVM_TARGET_DEFAULT_OP; + + if (e->op < NVM_TARGET_MIN_OP || + e->op > NVM_TARGET_MAX_OP) { + pr_err("nvm: invalid over provisioning value\n"); + return -EINVAL; + } + + return nvm_config_check_luns(geo, e->lun_begin, e->lun_end); +} + static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) { - struct nvm_ioctl_create_simple *s = &create->conf.s; + struct nvm_ioctl_create_extended e; struct request_queue *tqueue; struct gendisk *tdisk; struct nvm_tgt_type *tt; @@ -277,6 +327,28 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) void *targetdata; int ret; + switch (create->conf.type) { + case NVM_CONFIG_TYPE_SIMPLE: + ret = __nvm_config_simple(dev, &create->conf.s); + if (ret) + return ret; + + e.lun_begin = create->conf.s.lun_begin; + e.lun_end = create->conf.s.lun_end; + e.op = NVM_TARGET_DEFAULT_OP; + break; + case NVM_CONFIG_TYPE_EXTENDED: + ret = __nvm_config_extended(dev, &create->conf.e); + if (ret) + return ret; + + e = create->conf.e; + break; + default: + pr_err("nvm: config type not valid\n"); + return -EINVAL; + } + tt = nvm_find_target_type(create->tgttype); if (!tt) { pr_err("nvm: target type %s not found\n", create->tgttype); @@ -289,7 +361,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) return -EINVAL; } - ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end); + ret = nvm_reserve_luns(dev, e.lun_begin, e.lun_end); if (ret) return ret; @@ -299,7 +371,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_reserve; } - tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); + tgt_dev = nvm_create_tgt_dev(dev, e.lun_begin, e.lun_end, e.op); if (!tgt_dev) { pr_err("nvm: could not create target device\n"); ret = -ENOMEM; @@ -369,7 +441,7 @@ err_dev: err_t: kfree(t); err_reserve: - nvm_release_luns_err(dev, s->lun_begin, s->lun_end); + nvm_release_luns_err(dev, e.lun_begin, e.lun_end); return ret; } @@ -949,7 +1021,6 @@ EXPORT_SYMBOL(nvm_unregister); static int __nvm_configure_create(struct nvm_ioctl_create *create) { struct nvm_dev *dev; - struct nvm_ioctl_create_simple *s; down_write(&nvm_lock); dev = nvm_find_nvm_dev(create->dev); @@ -960,23 +1031,6 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) return -EINVAL; } - if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { - pr_err("nvm: config type not valid\n"); - return -EINVAL; - } - s = &create->conf.s; - - if (s->lun_begin == -1 && s->lun_end == -1) { - s->lun_begin = 0; - s->lun_end = dev->geo.all_luns - 1; - } - - if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.all_luns) { - pr_err("nvm: lun out of bound (%u:%u > %u)\n", - s->lun_begin, s->lun_end, dev->geo.all_luns - 1); - return -EINVAL; - } - return nvm_create_tgt(dev, create); } @@ -1076,6 +1130,12 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) return -EFAULT; + if (create.conf.type == NVM_CONFIG_TYPE_EXTENDED && + create.conf.e.rsv != 0) { + pr_err("nvm: reserved config field in use\n"); + return -EINVAL; + } + create.dev[DISK_NAME_LEN - 1] = '\0'; create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0'; create.tgtname[DISK_NAME_LEN - 1] = '\0'; diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index c8a718249e26..533f6908e238 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -585,7 +585,10 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) sector_t provisioned; int sec_meta, blk_meta; - pblk->op = 20; + if (geo->op == NVM_TARGET_DEFAULT_OP) + pblk->op = PBLK_DEFAULT_OP; + else + pblk->op = geo->op; provisioned = nr_free_blks; provisioned *= (100 - pblk->op); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 1e719d4181ce..19e622c65e92 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -54,6 +54,8 @@ /* Static pool sizes */ #define PBLK_GEN_WS_POOL_SIZE (2) +#define PBLK_DEFAULT_OP (11) + enum { PBLK_READ = READ, PBLK_WRITE = WRITE,/* Write from write buffer */ diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 8e43bfebd38d..7f4b60abdf27 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -218,6 +218,10 @@ struct nvm_target { #define ADDR_EMPTY (~0ULL) +#define NVM_TARGET_DEFAULT_OP (101) +#define NVM_TARGET_MIN_OP (3) +#define NVM_TARGET_MAX_OP (80) + #define NVM_VERSION_MAJOR 1 #define NVM_VERSION_MINOR 0 #define NVM_VERSION_PATCH 0 @@ -291,6 +295,8 @@ struct nvm_geo { int max_rq_size; + int op; + struct nvm_addr_format ppaf; /* Legacy 1.2 specific geometry */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index 42d1a434af29..f9a1be7fc696 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -75,14 +75,23 @@ struct nvm_ioctl_create_simple { __u32 lun_end; }; +struct nvm_ioctl_create_extended { + __u16 lun_begin; + __u16 lun_end; + __u16 op; + __u16 rsv; +}; + enum { NVM_CONFIG_TYPE_SIMPLE = 0, + NVM_CONFIG_TYPE_EXTENDED = 1, }; struct nvm_ioctl_create_conf { __u32 type; union { struct nvm_ioctl_create_simple s; + struct nvm_ioctl_create_extended e; }; }; -- cgit v1.2.3 From 5d201f07204893c02ef85d562bfcc71299f06f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:15 +0100 Subject: lightnvm: pblk: ignore high ecc errors on recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On recovery, do not stop L2P recovery if reads report high ECC error as the data is still available. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index fd3803656e68..1d5e961bf5e0 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -288,7 +288,7 @@ next_read_rq: /* At this point, the read should not fail. If it does, it is a problem * we cannot recover from here. Need FTL log. */ - if (rqd->error) { + if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) { pr_err("pblk: L2P recovery failed (%d)\n", rqd->error); return -EINTR; } -- cgit v1.2.3 From 8f554597e00abe04fd1f37c351b38aff33c37fc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:16 +0100 Subject: lightnvm: pblk: do not log recovery read errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On scan recovery, reads can fail. This happens because the first page for each line is read in order to determined if the line has been used (and thus needs to be recovered), or not. This can lead to "empty page" read errors. Since these errors are normal, do not log them, as they are confusing when reviewing the logs. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-core.c | 6 +++--- drivers/lightnvm/pblk.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 5ec7ad68ff38..0487b9340c1d 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -742,7 +742,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, cmd_op = NVM_OP_PWRITE; flags = pblk_set_progr_mode(pblk, PBLK_WRITE); lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == PBLK_READ) { + } else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -802,7 +802,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, if (rqd.error) { if (dir == PBLK_WRITE) pblk_log_write_err(pblk, &rqd); - else + else if (dir == PBLK_READ) pblk_log_read_err(pblk, &rqd); } @@ -816,7 +816,7 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) { u64 bpaddr = pblk_line_smeta_start(pblk, line); - return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ); + return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ_RECOV); } int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 19e622c65e92..93ec4fd2c26e 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -60,6 +60,7 @@ enum { PBLK_READ = READ, PBLK_WRITE = WRITE,/* Write from write buffer */ PBLK_WRITE_INT, /* Internal write - no write buffer */ + PBLK_READ_RECOV, /* Recovery read - errors allowed */ PBLK_ERASE, }; -- cgit v1.2.3 From cc4f5ba1fb6ebe977fb44293abebb79af77556f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:17 +0100 Subject: lightnvm: pblk: ensure kthread alloc. before kicking it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating the write thread, ensure that the kthread has been created before initializing the timer responsible from kicking it. Otherwise, if the kthread creation fails or gets killed from used space, we risk kicking an empty thread structure. Also, since the kthread creation can be interrupted form user space, adapt the error path to not report an error when this happens, since it is intentional that the instance creation is aborted. Signed-off-by: Javier González Updated source to reflect the new timer_setup API. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 533f6908e238..7e11926830db 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -883,15 +883,19 @@ fail: static int pblk_writer_init(struct pblk *pblk) { - timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0); - mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); - pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); if (IS_ERR(pblk->writer_ts)) { - pr_err("pblk: could not allocate writer kthread\n"); - return PTR_ERR(pblk->writer_ts); + int err = PTR_ERR(pblk->writer_ts); + + if (err != -EINTR) + pr_err("pblk: could not allocate writer kthread (%d)\n", + err); + return err; } + timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0); + mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); + return 0; } @@ -1042,7 +1046,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, ret = pblk_writer_init(pblk); if (ret) { - pr_err("pblk: could not initialize write thread\n"); + if (ret != -EINTR) + pr_err("pblk: could not initialize write thread\n"); goto fail_free_lines; } -- cgit v1.2.3 From c6847e4e359f01745bad5aea0fa20a0c5edfdc51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:18 +0100 Subject: lightnvm: pblk: free write buffer on init failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the way we free the write buffer to ensure that all entries get freed in case of an error on the init sequence. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 7e11926830db..8c40bc358b71 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -355,6 +355,8 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); + pblk_rwb_free(pblk); + pblk_free_global_caches(pblk); } @@ -931,7 +933,6 @@ static void pblk_tear_down(struct pblk *pblk) pblk_pipeline_stop(pblk); pblk_writer_stop(pblk); pblk_rb_sync_l2p(&pblk->rwb); - pblk_rwb_free(pblk); pblk_rl_free(&pblk->rl); pr_debug("pblk: consistent tear down\n"); -- cgit v1.2.3 From 30d82a8631428709c436ea5568bf68fe7cfaf8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:19 +0100 Subject: lightnvm: pblk: print instance name on instance info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the instance name to the information printed out on target creation. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 8c40bc358b71..93d671ca518e 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1069,7 +1069,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); - pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", + pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n", + tdisk->disk_name, geo->all_luns, pblk->l_mg.nr_lines, (unsigned long long)pblk->rl.nr_secs, pblk->rwb.nr_entries); -- cgit v1.2.3 From 998ba62973132e886b0411f00bc66fbb31507e84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 5 Jan 2018 14:16:20 +0100 Subject: lightnvm: pblk: add iostat support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since pblk registers its own block device, the iostat accounting is not automatically done for us. Therefore, add the necessary accounting logic to satisfy the iostat interface. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-cache.c | 5 +++++ drivers/lightnvm/pblk-read.c | 31 +++++++++++++++++++------------ drivers/lightnvm/pblk.h | 1 + 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 0d227ef7d1b9..000fcad38136 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -19,12 +19,16 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) { + struct request_queue *q = pblk->dev->q; struct pblk_w_ctx w_ctx; sector_t lba = pblk_get_lba(bio); + unsigned long start_time = jiffies; unsigned int bpos, pos; int nr_entries = pblk_get_secs(bio); int i, ret; + generic_start_io_acct(q, WRITE, bio_sectors(bio), &pblk->disk->part0); + /* Update the write buffer head (mem) with the entries that we can * write. The write in itself cannot fail, so there is no need to * rollback from here on. @@ -67,6 +71,7 @@ retry: pblk_rl_inserted(&pblk->rl, nr_entries); out: + generic_end_io_acct(q, WRITE, &pblk->disk->part0, start_time); pblk_write_should_kick(pblk); return ret; } diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 0fe0c040f359..2f761283f43e 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -158,8 +158,12 @@ static void pblk_end_user_read(struct bio *bio) static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, bool put_line) { + struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct bio *bio = rqd->bio; + unsigned long start_time = r_ctx->start_time; + + generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time); if (rqd->error) pblk_log_read_err(pblk, rqd); @@ -193,9 +197,9 @@ static void pblk_end_io_read(struct nvm_rq *rqd) __pblk_end_io_read(pblk, rqd, true); } -static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, - unsigned int bio_init_idx, - unsigned long *read_bitmap) +static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, + unsigned int bio_init_idx, + unsigned long *read_bitmap) { struct bio *new_bio, *bio = rqd->bio; struct pblk_sec_meta *meta_list = rqd->meta_list; @@ -306,6 +310,8 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, return NVM_IO_OK; err: + pr_err("pblk: failed to perform partial read\n"); + /* Free allocated pages in new bio */ pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); __pblk_end_io_read(pblk, rqd, false); @@ -357,6 +363,7 @@ retry: int pblk_submit_read(struct pblk *pblk, struct bio *bio) { struct nvm_tgt_dev *dev = pblk->dev; + struct request_queue *q = dev->q; sector_t blba = pblk_get_lba(bio); unsigned int nr_secs = pblk_get_secs(bio); struct pblk_g_ctx *r_ctx; @@ -372,6 +379,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) return NVM_IO_ERR; } + generic_start_io_acct(q, READ, bio_sectors(bio), &pblk->disk->part0); + bitmap_zero(&read_bitmap, nr_secs); rqd = pblk_alloc_rqd(pblk, PBLK_READ); @@ -383,6 +392,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->end_io = pblk_end_io_read; r_ctx = nvm_rq_to_pdu(rqd); + r_ctx->start_time = jiffies; r_ctx->lba = blba; /* Save the index for this bio's start. This is needed in case @@ -422,7 +432,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); if (!int_bio) { pr_err("pblk: could not clone read bio\n"); - return NVM_IO_ERR; + goto fail_end_io; } rqd->bio = int_bio; @@ -433,7 +443,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) pr_err("pblk: read IO submission failed\n"); if (int_bio) bio_put(int_bio); - return ret; + goto fail_end_io; } return NVM_IO_OK; @@ -442,17 +452,14 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) /* The read bio request could be partially filled by the write buffer, * but there are some holes that need to be read from the drive. */ - ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); - if (ret) { - pr_err("pblk: failed to perform partial read\n"); - return ret; - } - - return NVM_IO_OK; + return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); fail_rqd_free: pblk_free_rqd(pblk, rqd, PBLK_READ); return ret; +fail_end_io: + __pblk_end_io_read(pblk, rqd, false); + return ret; } static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 93ec4fd2c26e..8af374ee54c5 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -113,6 +113,7 @@ struct pblk_c_ctx { /* read context */ struct pblk_g_ctx { void *private; + unsigned long start_time; u64 lba; }; -- cgit v1.2.3 From 8b7bc849889d36572af5c6e3c3b2ad4b19a81be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 5 Jan 2018 14:16:21 +0100 Subject: lightnvm: pblk: refactor pblk_ppa_comp function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shorten function to simply return the value of the if statement. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 8af374ee54c5..8c357fb6538e 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -1047,10 +1047,7 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr) static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa) { - if (lppa.ppa == rppa.ppa) - return true; - - return false; + return (lppa.ppa == rppa.ppa); } static inline int pblk_addr_in_cache(struct ppa_addr ppa) -- cgit v1.2.3 From e3af9f7c6ece29fdb7fe0aeb83ac5d3077a06edb Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 25 Jul 2017 16:51:20 +0200 Subject: ARM64: dts: marvell: armada-cp110: Fix clock resources for various node On the CP modules we found on Armada 7K/8K, many IP block actually also need a "functional" clock (from the bus). This patch add them which allows to fix some issues hanging the kernel: If Ethernet and sdhci driver are built as modules and sdhci was loaded first then the kernel hang. Fixes: bb16ea1742c8 ("mmc: sdhci-xenon: Fix clock resource by adding an optional bus clock") Cc: stable@vger.kernel.org Reported-by: Riku Voipio Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 13 ++++++++----- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 9 ++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index e3b64d03fbd8..9c7724e82aff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -63,8 +63,10 @@ cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, + <&cpm_clk 1 5>, <&cpm_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk","axi_clk"; marvell,system-controller = <&cpm_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>; + clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>, + <&cpm_clk 1 6>, <&cpm_clk 1 18>; status = "disabled"; }; @@ -338,8 +341,8 @@ compatible = "marvell,armada-cp110-sdhci"; reg = <0x780000 0x300>; interrupts = ; - clock-names = "core"; - clocks = <&cpm_clk 1 4>; + clock-names = "core","axi"; + clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>; dma-coherent; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 0d51096c69f8..87ac68b2cf37 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -63,8 +63,10 @@ cps_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, + <&cps_clk 1 5>, <&cps_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk", "axi_clk"; marvell,system-controller = <&cps_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cps_clk 1 9>, <&cps_clk 1 5>; + clocks = <&cps_clk 1 9>, <&cps_clk 1 5>, + <&cps_clk 1 6>, <&cps_clk 1 18>; status = "disabled"; }; -- cgit v1.2.3 From 5a0ec388ef0f6e33841aeb810d7fa23f049ec4cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:47 -0800 Subject: pktcdvd: Fix pkt_setup_dev() error path Commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") modified add_disk() and disk_release() but did not update any of the error paths that trigger a put_disk() call after disk->queue has been assigned. That introduced the following behavior in the pktcdvd driver if pkt_new_dev() fails: Kernel BUG at 00000000e98fd882 [verbose debug info unavailable] Since disk_release() calls blk_put_queue() anyway if disk->queue != NULL, fix this by removing the blk_cleanup_queue() call from the pkt_setup_dev() error path. Fixes: commit 523e1d399ce0 ("block: make gendisk hold a reference to its queue") Signed-off-by: Bart Van Assche Cc: Tejun Heo Cc: Maciej S. Szmigiero Cc: # v3.2 Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 67974796c350..2659b2534073 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2745,7 +2745,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) - goto out_new_dev; + goto out_mem2; /* inherit events of the host device */ disk->events = pd->bdev->bd_disk->events; @@ -2763,8 +2763,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) mutex_unlock(&ctl_mutex); return 0; -out_new_dev: - blk_cleanup_queue(disk->queue); out_mem2: put_disk(disk); out_mem: -- cgit v1.2.3 From 882d4171a8950646413b1a3cbe0e4a6a612fe82e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 2 Jan 2018 11:39:48 -0800 Subject: pktcdvd: Fix a recently introduced NULL pointer dereference Call bdev_get_queue(bdev) after bdev->bd_disk has been initialized instead of just before that pointer has been initialized. This patch avoids that the following command pktsetup 1 /dev/sr0 triggers the following kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000548 IP: pkt_setup_dev+0x2db/0x670 [pktcdvd] CPU: 2 PID: 724 Comm: pktsetup Not tainted 4.15.0-rc4-dbg+ #1 Call Trace: pkt_ctl_ioctl+0xce/0x1c0 [pktcdvd] do_vfs_ioctl+0x8e/0x670 SyS_ioctl+0x3c/0x70 entry_SYSCALL_64_fastpath+0x23/0x9a Reported-by: Maciej S. Szmigiero Fixes: commit ca18d6f769d2 ("block: Make most scsi_req_init() calls implicit") Signed-off-by: Bart Van Assche Tested-by: Maciej S. Szmigiero Cc: Maciej S. Szmigiero Cc: # v4.13 Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 2659b2534073..531a0915066b 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2579,14 +2579,14 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) bdev = bdget(dev); if (!bdev) return -ENOMEM; + ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); + if (ret) + return ret; if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { WARN_ONCE(true, "Attempt to register a non-SCSI queue\n"); - bdput(bdev); + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); return -EINVAL; } - ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); - if (ret) - return ret; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); -- cgit v1.2.3 From 454be724f6f99cc7e7bbf15067128be9868186c6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Nov 2017 07:56:35 +0800 Subject: block: drain queue before waiting for q_usage_counter becoming zero Now we track legacy requests with .q_usage_counter in commit 055f6e18e08f ("block: Make q_usage_counter also track legacy requests"), but that commit never runs and drains legacy queue before waiting for this counter becoming zero, then IO hang is caused in the test of pulling disk during IO. This patch fixes the issue by draining requests before waiting for q_usage_counter becoming zero, both Mauricio and chenxiang reported this issue, and observed that it can be fixed by this patch. Link: https://marc.info/?l=linux-block&m=151192424731797&w=2 Fixes: 055f6e18e08f("block: Make q_usage_counter also track legacy requests") Cc: Wen Xiong Tested-by: "chenxiang (M)" Tested-by: Mauricio Faria de Oliveira Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 9 +++++++-- block/blk-mq.c | 2 ++ block/blk.h | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b8881750a3ac..3ba4326a63b5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) } } +void blk_drain_queue(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + __blk_drain_queue(q, true); + spin_unlock_irq(q->queue_lock); +} + /** * blk_queue_bypass_start - enter queue bypass mode * @q: queue of interest @@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); spin_lock_irq(lock); - if (!q->mq_ops) - __blk_drain_queue(q, true); queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index 11097477eeab..3d3797327491 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q) * exported to drivers as the only user for unfreeze is blk_mq. */ blk_freeze_queue_start(q); + if (!q->mq_ops) + blk_drain_queue(q); blk_mq_freeze_queue_wait(q); } diff --git a/block/blk.h b/block/blk.h index 3f1446937aec..442098aa9463 100644 --- a/block/blk.h +++ b/block/blk.h @@ -330,4 +330,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_BOUNCE */ +extern void blk_drain_queue(struct request_queue *q); + #endif /* BLK_INTERNAL_H */ -- cgit v1.2.3 From d1616f07e8f1a4a490d1791316d4a68906b284aa Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Thu, 4 Jan 2018 10:47:20 +0800 Subject: net: fec: free/restore resource in related probe error pathes Fixes in probe error path: - Restore dev_id before failed_ioremap path. Fixes: ("net: fec: restore dev_id in the cases of probe error") - Call of_node_put(phy_node) before failed_phy path. Fixes: ("net: fec: Support phys probed from devicetree and fixed-link") Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 19f198e22e15..a74300a4459c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3556,11 +3556,11 @@ failed_clk_ipg: failed_clk: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); -failed_phy: of_node_put(phy_node); +failed_phy: + dev_id--; failed_ioremap: free_netdev(ndev); - dev_id--; return ret; } -- cgit v1.2.3 From 6cc77e9cb08041627fe1d32ac3a743249deb8167 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Dec 2017 15:43:38 +0900 Subject: block: introduce zoned block devices zone write locking Components relying only on the request_queue structure for accessing block devices (e.g. I/O schedulers) have a limited knowledged of the device characteristics. In particular, the device capacity cannot be easily discovered, which for a zoned block device also result in the inability to easily know the number of zones of the device (the zone size is indicated by the chunk_sectors field of the queue limits). Introduce the nr_zones field to the request_queue structure to simplify access to this information. Also, add the bitmap seq_zone_bitmap which indicates which zones of the device are sequential zones (write preferred or write required) and the bitmap seq_zones_wlock which indicates if a zone is write locked, that is, if a write request targeting a zone was dispatched to the device. These fields are initialized by the low level block device driver (sd.c for ZBC/ZAC disks). They are not initialized by stacking drivers (device mappers) handling zoned block devices (e.g. dm-linear). Using this, I/O schedulers can introduce zone write locking to control request dispatching to a zoned block device and avoid write request reordering by limiting to at most a single write request per zone outside of the scheduler at any time. Based on previous patches from Damien Le Moal. Signed-off-by: Christoph Hellwig [Damien] * Fixed comments and identation in blkdev.h * Changed helper functions * Fixed this commit message Signed-off-by: Damien Le Moal Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-zoned.c | 42 +++++++++++++++++++ include/linux/blkdev.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index b8881750a3ac..e6e5bbc4c366 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1641,6 +1641,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) lockdep_assert_held(q->queue_lock); + blk_req_zone_write_unlock(req); blk_pm_put_request(req); elv_completed_request(q, req); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ff57fb51b338..acb7252c7e81 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -21,6 +21,48 @@ static inline sector_t blk_zone_start(struct request_queue *q, return sector & ~zone_mask; } +/* + * Return true if a request is a write requests that needs zone write locking. + */ +bool blk_req_needs_zone_write_lock(struct request *rq) +{ + if (!rq->q->seq_zones_wlock) + return false; + + if (blk_rq_is_passthrough(rq)) + return false; + + switch (req_op(rq)) { + case REQ_OP_WRITE_ZEROES: + case REQ_OP_WRITE_SAME: + case REQ_OP_WRITE: + return blk_rq_zone_is_seq(rq); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock); + +void __blk_req_zone_write_lock(struct request *rq) +{ + if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), + rq->q->seq_zones_wlock))) + return; + + WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); + rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; +} +EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock); + +void __blk_req_zone_write_unlock(struct request *rq) +{ + rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED; + if (rq->q->seq_zones_wlock) + WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), + rq->q->seq_zones_wlock)); +} +EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); + /* * Check that a zone report belongs to the partition. * If yes, fix its start sector and write pointer, copy it in the diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..46e606f5b44b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -121,6 +121,8 @@ typedef __u32 __bitwise req_flags_t; /* Look at ->special_vec for the actual data payload instead of the bio chain. */ #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) +/* The per-zone write lock is held for this request */ +#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -546,6 +548,22 @@ struct request_queue { struct queue_limits limits; + /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit clear) or + * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. All three fields are + * initialized by the low level device driver (e.g. scsi/sd.c). + * Stacking drivers (device mappers) may or may not initialize + * these fields. + */ + unsigned int nr_zones; + unsigned long *seq_zones_bitmap; + unsigned long *seq_zones_wlock; + /* * sg stuff */ @@ -790,6 +808,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } +static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +{ + return q->nr_zones; +} + +static inline unsigned int blk_queue_zone_no(struct request_queue *q, + sector_t sector) +{ + if (!blk_queue_is_zoned(q)) + return 0; + return sector >> ilog2(q->limits.chunk_sectors); +} + +static inline bool blk_queue_zone_is_seq(struct request_queue *q, + sector_t sector) +{ + if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap) + return false; + return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap); +} + static inline bool rq_is_sync(struct request *rq) { return op_is_sync(rq->cmd_flags); @@ -1029,6 +1068,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_rq_zone_no(struct request *rq) +{ + return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); +} + +static inline unsigned int blk_rq_zone_is_seq(struct request *rq) +{ + return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); +} + /* * Some commands like WRITE SAME have a payload or data transfer size which * is different from the size of the request. Any driver that supports such @@ -1578,7 +1627,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev) if (q) return blk_queue_zone_sectors(q); + return 0; +} +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return blk_queue_nr_zones(q); return 0; } @@ -1954,6 +2011,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); + +#ifdef CONFIG_BLK_DEV_ZONED +bool blk_req_needs_zone_write_lock(struct request *rq); +void __blk_req_zone_write_lock(struct request *rq); +void __blk_req_zone_write_unlock(struct request *rq); + +static inline void blk_req_zone_write_lock(struct request *rq) +{ + if (blk_req_needs_zone_write_lock(rq)) + __blk_req_zone_write_lock(rq); +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ + if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) + __blk_req_zone_write_unlock(rq); +} + +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return rq->q->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + if (!blk_req_needs_zone_write_lock(rq)) + return true; + return !blk_req_zone_is_write_locked(rq); +} +#else +static inline bool blk_req_needs_zone_write_lock(struct request *rq) +{ + return false; +} + +static inline void blk_req_zone_write_lock(struct request *rq) +{ +} + +static inline void blk_req_zone_write_unlock(struct request *rq) +{ +} +static inline bool blk_req_zone_is_write_locked(struct request *rq) +{ + return false; +} + +static inline bool blk_req_can_dispatch_to_zone(struct request *rq) +{ + return true; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From bf09ce56f0e654b94d980b9aa89e3fce78887e01 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Dec 2017 15:43:39 +0900 Subject: mq-deadline: Introduce dispatch helpers Avoid directly referencing the next_rq and fifo_list arrays using the helper functions deadline_next_request() and deadline_fifo_request() to facilitate changes in the dispatch request selection in __dd_dispatch_request() for zoned block devices. Signed-off-by: Damien Le Moal Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/mq-deadline.c | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 0179e484ec98..8bd6db9e69c7 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -191,6 +191,35 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) return 0; } +/* + * For the specified data direction, return the next request to + * dispatch using arrival ordered lists. + */ +static struct request * +deadline_fifo_request(struct deadline_data *dd, int data_dir) +{ + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) + return NULL; + + if (list_empty(&dd->fifo_list[data_dir])) + return NULL; + + return rq_entry_fifo(dd->fifo_list[data_dir].next); +} + +/* + * For the specified data direction, return the next request to + * dispatch using sector position sorted lists. + */ +static struct request * +deadline_next_request(struct deadline_data *dd, int data_dir) +{ + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) + return NULL; + + return dd->next_rq[data_dir]; +} + /* * deadline_dispatch_requests selects the best request according to * read/write expire, fifo_batch, etc @@ -198,7 +227,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; - struct request *rq; + struct request *rq, *next_rq; bool reads, writes; int data_dir; @@ -214,10 +243,9 @@ static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) /* * batches are currently reads XOR writes */ - if (dd->next_rq[WRITE]) - rq = dd->next_rq[WRITE]; - else - rq = dd->next_rq[READ]; + rq = deadline_next_request(dd, WRITE); + if (!rq) + rq = deadline_next_request(dd, READ); if (rq && dd->batching < dd->fifo_batch) /* we have a next request are still entitled to batch */ @@ -260,19 +288,20 @@ dispatch_find_request: /* * we are not running a batch, find best request for selected data_dir */ - if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { + next_rq = deadline_next_request(dd, data_dir); + if (deadline_check_fifo(dd, data_dir) || !next_rq) { /* * A deadline has expired, the last request was in the other * direction, or we have run out of higher-sectored requests. * Start again from the request with the earliest expiry time. */ - rq = rq_entry_fifo(dd->fifo_list[data_dir].next); + rq = deadline_fifo_request(dd, data_dir); } else { /* * The last req was the same dir and we have a next request in * sort order. No expired requests so continue on from here. */ - rq = dd->next_rq[data_dir]; + rq = next_rq; } dd->batching = 0; -- cgit v1.2.3 From 5700f69178e91a6b21250049b86148ed5e9550c1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Dec 2017 15:43:40 +0900 Subject: mq-deadline: Introduce zone locking support Introduce zone write locking to avoid write request reordering with zoned block devices. This is achieved using a finer selection of the next request to dispatch: 1) Any non-write request is always allowed to proceed. 2) Any write to a conventional zone is always allowed to proceed. 3) For a write to a sequential zone, the zone lock is first checked. a) If the zone is not locked, the write is allowed to proceed after its target zone is locked. b) If the zone is locked, the write request is skipped and the next request in the dispatch queue tested (back to step 1). For a write request that has locked its target zone, the zone is unlocked either when the request completes with a call to the method deadline_request_completed() or when the request is requeued using dd_insert_request(). Requests targeting a locked zone are always left in the scheduler queue to preserve the lba ordering for write requests. If no write request can be dispatched, allow reads to be dispatched even if the write batch is not done. If the device used is not a zoned block device, or if zoned block device support is disabled, this patch does not modify mq-deadline behavior. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/mq-deadline.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 8bd6db9e69c7..d56972e8ebda 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -59,6 +59,7 @@ struct deadline_data { int front_merges; spinlock_t lock; + spinlock_t zone_lock; struct list_head dispatch; }; @@ -198,13 +199,33 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) static struct request * deadline_fifo_request(struct deadline_data *dd, int data_dir) { + struct request *rq; + unsigned long flags; + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) return NULL; if (list_empty(&dd->fifo_list[data_dir])) return NULL; - return rq_entry_fifo(dd->fifo_list[data_dir].next); + rq = rq_entry_fifo(dd->fifo_list[data_dir].next); + if (data_dir == READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) { + if (blk_req_can_dispatch_to_zone(rq)) + goto out; + } + rq = NULL; +out: + spin_unlock_irqrestore(&dd->zone_lock, flags); + + return rq; } /* @@ -214,10 +235,32 @@ deadline_fifo_request(struct deadline_data *dd, int data_dir) static struct request * deadline_next_request(struct deadline_data *dd, int data_dir) { + struct request *rq; + unsigned long flags; + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) return NULL; - return dd->next_rq[data_dir]; + rq = dd->next_rq[data_dir]; + if (!rq) + return NULL; + + if (data_dir == READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + while (rq) { + if (blk_req_can_dispatch_to_zone(rq)) + break; + rq = deadline_latter_request(rq); + } + spin_unlock_irqrestore(&dd->zone_lock, flags); + + return rq; } /* @@ -259,7 +302,8 @@ static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) if (reads) { BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); - if (writes && (dd->starved++ >= dd->writes_starved)) + if (deadline_fifo_request(dd, WRITE) && + (dd->starved++ >= dd->writes_starved)) goto dispatch_writes; data_dir = READ; @@ -304,6 +348,13 @@ dispatch_find_request: rq = next_rq; } + /* + * For a zoned block device, if we only have writes queued and none of + * them can be dispatched, rq will be NULL. + */ + if (!rq) + return NULL; + dd->batching = 0; dispatch_request: @@ -313,6 +364,10 @@ dispatch_request: dd->batching++; deadline_move_request(dd, rq); done: + /* + * If the request needs its target zone locked, do it. + */ + blk_req_zone_write_lock(rq); rq->rq_flags |= RQF_STARTED; return rq; } @@ -368,6 +423,7 @@ static int dd_init_queue(struct request_queue *q, struct elevator_type *e) dd->front_merges = 1; dd->fifo_batch = fifo_batch; spin_lock_init(&dd->lock); + spin_lock_init(&dd->zone_lock); INIT_LIST_HEAD(&dd->dispatch); q->elevator = eq; @@ -424,6 +480,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct deadline_data *dd = q->elevator->elevator_data; const int data_dir = rq_data_dir(rq); + /* + * This may be a requeue of a write request that has locked its + * target zone. If it is the case, this releases the zone lock. + */ + blk_req_zone_write_unlock(rq); + if (blk_mq_sched_try_insert_merge(q, rq)) return; @@ -468,6 +530,26 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, spin_unlock(&dd->lock); } +/* + * For zoned block devices, write unlock the target zone of + * completed write requests. Do this while holding the zone lock + * spinlock so that the zone is never unlocked while deadline_fifo_request() + * while deadline_next_request() are executing. + */ +static void dd_completed_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + if (blk_queue_is_zoned(q)) { + struct deadline_data *dd = q->elevator->elevator_data; + unsigned long flags; + + spin_lock_irqsave(&dd->zone_lock, flags); + blk_req_zone_write_unlock(rq); + spin_unlock_irqrestore(&dd->zone_lock, flags); + } +} + static bool dd_has_work(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; @@ -669,6 +751,7 @@ static struct elevator_type mq_deadline = { .ops.mq = { .insert_requests = dd_insert_requests, .dispatch_request = dd_dispatch_request, + .completed_request = dd_completed_request, .next_request = elv_rb_latter_request, .former_request = elv_rb_former_request, .bio_merge = dd_bio_merge, -- cgit v1.2.3 From c117bac70133dbff9ed7fcbd91ef82b4ee518797 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Dec 2017 15:43:41 +0900 Subject: deadline-iosched: Introduce dispatch helpers Avoid directly referencing the next_rq and fifo_list arrays using the helper functions deadline_next_request() and deadline_fifo_request() to facilitate changes in the dispatch request selection in deadline_dispatch_requests() for zoned block devices. While at it, also remove the unnecessary forward declaration of the function deadline_move_request(). Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/deadline-iosched.c | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b83f77460d28..81e3f0897457 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -50,8 +50,6 @@ struct deadline_data { int front_merges; }; -static void deadline_move_request(struct deadline_data *, struct request *); - static inline struct rb_root * deadline_rb_root(struct deadline_data *dd, struct request *rq) { @@ -230,6 +228,35 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) return 0; } +/* + * For the specified data direction, return the next request to dispatch using + * arrival ordered lists. + */ +static struct request * +deadline_fifo_request(struct deadline_data *dd, int data_dir) +{ + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) + return NULL; + + if (list_empty(&dd->fifo_list[data_dir])) + return NULL; + + return rq_entry_fifo(dd->fifo_list[data_dir].next); +} + +/* + * For the specified data direction, return the next request to dispatch using + * sector position sorted lists. + */ +static struct request * +deadline_next_request(struct deadline_data *dd, int data_dir) +{ + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) + return NULL; + + return dd->next_rq[data_dir]; +} + /* * deadline_dispatch_requests selects the best request according to * read/write expire, fifo_batch, etc @@ -239,16 +266,15 @@ static int deadline_dispatch_requests(struct request_queue *q, int force) struct deadline_data *dd = q->elevator->elevator_data; const int reads = !list_empty(&dd->fifo_list[READ]); const int writes = !list_empty(&dd->fifo_list[WRITE]); - struct request *rq; + struct request *rq, *next_rq; int data_dir; /* * batches are currently reads XOR writes */ - if (dd->next_rq[WRITE]) - rq = dd->next_rq[WRITE]; - else - rq = dd->next_rq[READ]; + rq = deadline_next_request(dd, WRITE); + if (!rq) + rq = deadline_next_request(dd, READ); if (rq && dd->batching < dd->fifo_batch) /* we have a next request are still entitled to batch */ @@ -291,19 +317,20 @@ dispatch_find_request: /* * we are not running a batch, find best request for selected data_dir */ - if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { + next_rq = deadline_next_request(dd, data_dir); + if (deadline_check_fifo(dd, data_dir) || !next_rq) { /* * A deadline has expired, the last request was in the other * direction, or we have run out of higher-sectored requests. * Start again from the request with the earliest expiry time. */ - rq = rq_entry_fifo(dd->fifo_list[data_dir].next); + rq = deadline_fifo_request(dd, data_dir); } else { /* * The last req was the same dir and we have a next request in * sort order. No expired requests so continue on from here. */ - rq = dd->next_rq[data_dir]; + rq = next_rq; } dd->batching = 0; -- cgit v1.2.3 From 8dc8146f9c92c17caa3c50f979d351c87ed372f8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 21 Dec 2017 15:43:42 +0900 Subject: deadline-iosched: Introduce zone locking support Introduce zone write locking to avoid write request reordering with zoned block devices. This is achieved using a finer selection of the next request to dispatch: 1) Any non-write request is always allowed to proceed. 2) Any write to a conventional zone is always allowed to proceed. 3) For a write to a sequential zone, the zone lock is first checked. a) If the zone is not locked, the write is allowed to proceed after its target zone is locked. b) If the zone is locked, the write request is skipped and the next request in the dispatch queue tested (back to step 1). For a write request that has locked its target zone, the zone is unlocked either when the request completes and the method deadline_request_completed() is called, or when the request is requeued using the method deadline_add_request(). Requests targeting a locked zone are always left in the scheduler queue to preserve the initial write order. If no write request can be dispatched, allow reads to be dispatched even if the write batch is not done. If the device used is not a zoned block device, or if zoned block device support is disabled, this patch does not modify deadline behavior. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/deadline-iosched.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 81e3f0897457..9de9f156e203 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -98,6 +98,12 @@ deadline_add_request(struct request_queue *q, struct request *rq) struct deadline_data *dd = q->elevator->elevator_data; const int data_dir = rq_data_dir(rq); + /* + * This may be a requeue of a write request that has locked its + * target zone. If it is the case, this releases the zone lock. + */ + blk_req_zone_write_unlock(rq); + deadline_add_rq_rb(dd, rq); /* @@ -188,6 +194,12 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq) { struct request_queue *q = rq->q; + /* + * For a zoned block device, write requests must write lock their + * target zone. + */ + blk_req_zone_write_lock(rq); + deadline_remove_request(q, rq); elv_dispatch_add_tail(q, rq); } @@ -235,13 +247,28 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) static struct request * deadline_fifo_request(struct deadline_data *dd, int data_dir) { + struct request *rq; + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) return NULL; if (list_empty(&dd->fifo_list[data_dir])) return NULL; - return rq_entry_fifo(dd->fifo_list[data_dir].next); + rq = rq_entry_fifo(dd->fifo_list[data_dir].next); + if (data_dir == READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + list_for_each_entry(rq, &dd->fifo_list[WRITE], queuelist) { + if (blk_req_can_dispatch_to_zone(rq)) + return rq; + } + + return NULL; } /* @@ -251,10 +278,29 @@ deadline_fifo_request(struct deadline_data *dd, int data_dir) static struct request * deadline_next_request(struct deadline_data *dd, int data_dir) { + struct request *rq; + if (WARN_ON_ONCE(data_dir != READ && data_dir != WRITE)) return NULL; - return dd->next_rq[data_dir]; + rq = dd->next_rq[data_dir]; + if (!rq) + return NULL; + + if (data_dir == READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + while (rq) { + if (blk_req_can_dispatch_to_zone(rq)) + return rq; + rq = deadline_latter_request(rq); + } + + return NULL; } /* @@ -288,7 +334,8 @@ static int deadline_dispatch_requests(struct request_queue *q, int force) if (reads) { BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); - if (writes && (dd->starved++ >= dd->writes_starved)) + if (deadline_fifo_request(dd, WRITE) && + (dd->starved++ >= dd->writes_starved)) goto dispatch_writes; data_dir = READ; @@ -333,6 +380,13 @@ dispatch_find_request: rq = next_rq; } + /* + * For a zoned block device, if we only have writes queued and none of + * them can be dispatched, rq will be NULL. + */ + if (!rq) + return 0; + dd->batching = 0; dispatch_request: @@ -345,6 +399,16 @@ dispatch_request: return 1; } +/* + * For zoned block devices, write unlock the target zone of completed + * write requests. + */ +static void +deadline_completed_request(struct request_queue *q, struct request *rq) +{ + blk_req_zone_write_unlock(rq); +} + static void deadline_exit_queue(struct elevator_queue *e) { struct deadline_data *dd = e->elevator_data; @@ -466,6 +530,7 @@ static struct elevator_type iosched_deadline = { .elevator_merged_fn = deadline_merged_request, .elevator_merge_req_fn = deadline_merged_requests, .elevator_dispatch_fn = deadline_dispatch_requests, + .elevator_completed_req_fn = deadline_completed_request, .elevator_add_req_fn = deadline_add_request, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, -- cgit v1.2.3 From f0ba5ea2fe45c0ad24a7dedae84a97f7aa046494 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 20 Dec 2017 17:27:36 +0100 Subject: block, bfq: increase threshold to deem I/O as random If two processes do I/O close to each other, i.e., are cooperating processes in BFQ (and CFQ'S) nomenclature, then BFQ merges their associated bfq_queues, so as to get sequential I/O from the union of the I/O requests of the processes, and thus reach a higher throughput. A merged queue is then split if its I/O stops being sequential. In this respect, BFQ deems the I/O of a bfq_queue as (mostly) sequential only if less than 4 I/O requests are random, out of the last 32 requests inserted into the queue. Unfortunately, extensive testing (with the interleaved_io benchmark of the S suite [1], and with real applications spawning cooperating processes) has clearly shown that, with such a low threshold, only a rather low I/O throughput may be reached when several cooperating processes do I/O. In particular, the outcome of each test run was bimodal: if queue merging occurred and was stable during the test, then the throughput was close to the peak rate of the storage device, otherwise the throughput was arbitrarily low (usually around 1/10 of the peak rate with a rotational device). The probability to get the unlucky outcomes grew with the number of cooperating processes: it was already significant with 5 processes, and close to one with 7 or more processes. The cause of the low throughput in the unlucky runs was that the merged queues containing the I/O of these cooperating processes were soon split, because they contained more random I/O requests than those tolerated by the 4/32 threshold, but - that I/O would have however allowed the storage device to reach peak throughput or almost peak throughput; - in contrast, the I/O of these processes, if served individually (from separate queues) yielded a rather low throughput. So we repeated our tests with increasing values of the threshold, until we found the minimum value (19) for which we obtained maximum throughput, reliably, with at least up to 9 cooperating processes. Then we checked that the use of that higher threshold value did not cause any regression for any other benchmark in the suite [1]. This commit raises the threshold to such a higher value. [1] https://github.com/Algodev-github/S Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index bcb6d21baf12..0f48583b9380 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -178,7 +178,7 @@ static struct kmem_cache *bfq_pool; #define BFQQ_SEEK_THR (sector_t)(8 * 100) #define BFQQ_SECT_THR_NONROT (sector_t)(2 * 32) #define BFQQ_CLOSE_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) +#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19) /* Min number of samples required to perform peak-rate update */ #define BFQ_RATE_MIN_SAMPLES 32 -- cgit v1.2.3 From 05e90283561648301e30232fe0c91bd345ceba03 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 20 Dec 2017 12:38:31 +0100 Subject: block, bfq: add missing rq_pos_tree update on rq removal If two processes do I/O close to each other, then BFQ merges the bfq_queues associated with these processes, to get a more sequential I/O, and thus a higher throughput. In this respect, to detect whether two processes are doing I/O close to each other, BFQ keeps a list of the head-of-line I/O requests of all active bfq_queues. The list is ordered by initial sectors, and implemented through a red-black tree (rq_pos_tree). Unfortunately, the update of the rq_pos_tree was incomplete, because the tree was not updated on the removal of the head-of-line I/O request of a bfq_queue, in case the queue did not remain empty. This commit adds the missing update. Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0f48583b9380..fa395a260a23 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1627,6 +1627,8 @@ static void bfq_remove_request(struct request_queue *q, rb_erase(&bfqq->pos_node, bfqq->pos_root); bfqq->pos_root = NULL; } + } else { + bfq_pos_tree_add_move(bfqd, bfqq); } if (rq->cmd_flags & REQ_META) -- cgit v1.2.3 From 1be6e8a964ee9aa8d4daac523ce29e5f486dd756 Mon Sep 17 00:00:00 2001 From: Angelo Ruocco Date: Wed, 20 Dec 2017 12:38:32 +0100 Subject: block, bfq: check low_latency flag in bfq_bfqq_save_state() A just-created bfq_queue will certainly be deemed as interactive on the arrival of its first I/O request, if the low_latency flag is set. Yet, if the queue is merged with another queue on the arrival of its first I/O request, it will not have the chance to be flagged as interactive. Nevertheless, if the queue is then split soon enough, it has to be flagged as interactive after the split. To handle this early-merge scenario correctly, BFQ saves the state of the queue, on the merge, as if the latter had already been deemed interactive. So, if the queue is split soon, it will get weight-raised, because the previous state of the queue is resumed on the split. Unfortunately, in the act of saving the state of the newly-created queue, BFQ doesn't check whether the low_latency flag is set, and this causes early-merged queues to be then weight-raised, on queue splits, even if low_latency is off. This commit addresses this problem by adding the missing check. Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index fa395a260a23..2cf395daee80 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2064,7 +2064,8 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); if (unlikely(bfq_bfqq_just_created(bfqq) && - !bfq_bfqq_in_large_burst(bfqq))) { + !bfq_bfqq_in_large_burst(bfqq) && + bfqq->bfqd->low_latency)) { /* * bfqq being merged right after being created: bfqq * would have deserved interactive weight raising, but -- cgit v1.2.3 From 7b8fa3b900a087bc03b11329a92398fde563ba37 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 20 Dec 2017 12:38:33 +0100 Subject: block, bfq: let a queue be merged only shortly after starting I/O In BFQ and CFQ, two processes are said to be cooperating if they do I/O in such a way that the union of their I/O requests yields a sequential I/O pattern. To get such a sequential I/O pattern out of the non-sequential pattern of each cooperating process, BFQ and CFQ merge the queues associated with these processes. In more detail, cooperating processes, and thus their associated queues, usually start, or restart, to do I/O shortly after each other. This is the case, e.g., for the I/O threads of KVM/QEMU and of the dump utility. Basing on this assumption, this commit allows a bfq_queue to be merged only during a short time interval (100ms) after it starts, or re-starts, to do I/O. This filtering provides two important benefits. First, it greatly reduces the probability that two non-cooperating processes have their queues merged by mistake, if they just happen to do I/O close to each other for a short time interval. These spurious merges cause loss of service guarantees. A low-weight bfq_queue may unjustly get more than its expected share of the throughput: if such a low-weight queue is merged with a high-weight queue, then the I/O for the low-weight queue is served as if the queue had a high weight. This may damage other high-weight queues unexpectedly. For instance, because of this issue, lxterminal occasionally took 7.5 seconds to start, instead of 6.5 seconds, when some sequential readers and writers did I/O in the background on a FUJITSU MHX2300BT HDD. The reason is that the bfq_queues associated with some of the readers or the writers were merged with the high-weight queues of some processes that had to do some urgent but little I/O. The readers then exploited the inherited high weight for all or most of their I/O, during the start-up of terminal. The filtering introduced by this commit eliminated any outlier caused by spurious queue merges in our start-up time tests. This filtering also provides a little boost of the throughput sustainable by BFQ: 3-4%, depending on the CPU. The reason is that, once a bfq_queue cannot be merged any longer, this commit makes BFQ stop updating the data needed to handle merging for the queue. Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 57 ++++++++++++++++++++++++++++++++++++++++++----------- block/bfq-iosched.h | 2 ++ block/bfq-wf2q.c | 4 ++++ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 2cf395daee80..7066d90f09df 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -166,6 +166,20 @@ static const int bfq_async_charge_factor = 10; /* Default timeout values, in jiffies, approximating CFQ defaults. */ const int bfq_timeout = HZ / 8; +/* + * Time limit for merging (see comments in bfq_setup_cooperator). Set + * to the slowest value that, in our tests, proved to be effective in + * removing false positives, while not causing true positives to miss + * queue merging. + * + * As can be deduced from the low time limit below, queue merging, if + * successful, happens at the very beggining of the I/O of the involved + * cooperating processes, as a consequence of the arrival of the very + * first requests from each cooperator. After that, there is very + * little chance to find cooperators. + */ +static const unsigned long bfq_merge_time_limit = HZ/10; + static struct kmem_cache *bfq_pool; /* Below this threshold (in ns), we consider thinktime immediate. */ @@ -444,6 +458,13 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, return bfqq; } +static bool bfq_too_late_for_merging(struct bfq_queue *bfqq) +{ + return bfqq->service_from_backlogged > 0 && + time_is_before_jiffies(bfqq->first_IO_time + + bfq_merge_time_limit); +} + void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct rb_node **p, *parent; @@ -454,6 +475,14 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* + * bfqq cannot be merged any longer (see comments in + * bfq_setup_cooperator): no point in adding bfqq into the + * position tree. + */ + if (bfq_too_late_for_merging(bfqq)) + return; + if (bfq_class_idle(bfqq)) return; if (!bfqq->next_rq) @@ -1935,6 +1964,9 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) { + if (bfq_too_late_for_merging(new_bfqq)) + return false; + if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) || (bfqq->ioprio_class != new_bfqq->ioprio_class)) return false; @@ -2003,6 +2035,20 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* + * Prevent bfqq from being merged if it has been created too + * long ago. The idea is that true cooperating processes, and + * thus their associated bfq_queues, are supposed to be + * created shortly after each other. This is the case, e.g., + * for KVM/QEMU and dump I/O threads. Basing on this + * assumption, the following filtering greatly reduces the + * probability that two non-cooperating processes, which just + * happen to do close I/O for some short time interval, have + * their queues merged by mistake. + */ + if (bfq_too_late_for_merging(bfqq)) + return NULL; + if (bfqq->new_bfqq) return bfqq->new_bfqq; @@ -3002,17 +3048,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, */ slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); - /* - * Increase service_from_backlogged before next statement, - * because the possible next invocation of - * bfq_bfqq_charge_time would likely inflate - * entity->service. In contrast, service_from_backlogged must - * contain real service, to enable the soft real-time - * heuristic to correctly compute the bandwidth consumed by - * bfqq. - */ - bfqq->service_from_backlogged += entity->service; - /* * As above explained, charge slow (typically seeky) and * timed-out queues with the time and not the service diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 91c4390903a1..5d47b58d5fc8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -344,6 +344,8 @@ struct bfq_queue { unsigned long wr_start_at_switch_to_srt; unsigned long split_time; /* time of last split */ + + unsigned long first_IO_time; /* time of first I/O for this queue */ }; /** diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index e495d3f9b4b0..4456eda34e48 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -835,6 +835,10 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) struct bfq_entity *entity = &bfqq->entity; struct bfq_service_tree *st; + if (!bfqq->service_from_backlogged) + bfqq->first_IO_time = jiffies; + + bfqq->service_from_backlogged += served; for_each_entity(entity) { st = bfq_entity_service_tree(entity); -- cgit v1.2.3 From 4403e4e467c365b4189e3e3d3ad35cf67b8c36ed Mon Sep 17 00:00:00 2001 From: Angelo Ruocco Date: Wed, 20 Dec 2017 12:38:34 +0100 Subject: block, bfq: remove superfluous check in queue-merging setup When two or more processes do I/O in a way that the their requests are sequential in respect to one another, BFQ merges the bfq_queues associated with the processes. This way the overall I/O pattern becomes sequential, and thus there is a boost in througput. These cooperating processes usually start or restart to do I/O shortly after each other. So, in order to avoid merging non-cooperating processes, BFQ ensures that none of these queues has been in weight raising for too long. In this respect, from commit "block, bfq-sq, bfq-mq: let a queue be merged only shortly after being created", BFQ checks whether any queue (and not only weight-raised ones) is doing I/O continuously from too long to be merged. This new additional check makes the first one useless: a queue doing I/O from long enough, if being weight-raised, is also a queue in weight raising for too long to be merged. Accordingly, this commit removes the first check. Signed-off-by: Angelo Ruocco Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7066d90f09df..9625550b2f85 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1990,20 +1990,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, return true; } -/* - * If this function returns true, then bfqq cannot be merged. The idea - * is that true cooperation happens very early after processes start - * to do I/O. Usually, late cooperations are just accidental false - * positives. In case bfqq is weight-raised, such false positives - * would evidently degrade latency guarantees for bfqq. - */ -static bool wr_from_too_long(struct bfq_queue *bfqq) -{ - return bfqq->wr_coeff > 1 && - time_is_before_jiffies(bfqq->last_wr_start_finish + - msecs_to_jiffies(100)); -} - /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return @@ -2017,11 +2003,6 @@ static bool wr_from_too_long(struct bfq_queue *bfqq) * to maintain. Besides, in such a critical condition as an out of memory, * the benefits of queue merging may be little relevant, or even negligible. * - * Weight-raised queues can be merged only if their weight-raising - * period has just started. In fact cooperating processes are usually - * started together. Thus, with this filter we avoid false positives - * that would jeopardize low-latency guarantees. - * * WARNING: queue merging may impair fairness among non-weight raised * queues, for at least two reasons: 1) the original weight of a * merged queue may change during the merged state, 2) even being the @@ -2052,9 +2033,7 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->new_bfqq) return bfqq->new_bfqq; - if (!io_struct || - wr_from_too_long(bfqq) || - unlikely(bfqq == &bfqd->oom_bfqq)) + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; /* If there is only one backlogged queue, don't search. */ @@ -2063,12 +2042,9 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, in_service_bfqq = bfqd->in_service_queue; - if (!in_service_bfqq || in_service_bfqq == bfqq - || wr_from_too_long(in_service_bfqq) || - unlikely(in_service_bfqq == &bfqd->oom_bfqq)) - goto check_scheduled; - - if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && + if (in_service_bfqq && in_service_bfqq != bfqq && + likely(in_service_bfqq != &bfqd->oom_bfqq) && + bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && bfqq->entity.parent == in_service_bfqq->entity.parent && bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) { new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); @@ -2080,12 +2056,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, * queues. The only thing we need is that the bio/request is not * NULL, as we need it to establish whether a cooperator exists. */ -check_scheduled: new_bfqq = bfq_find_close_cooperator(bfqd, bfqq, bfq_io_struct_pos(io_struct, request)); - if (new_bfqq && !wr_from_too_long(new_bfqq) && - likely(new_bfqq != &bfqd->oom_bfqq) && + if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) && bfq_may_be_close_cooperator(bfqq, new_bfqq)) return bfq_setup_merge(bfqq, new_bfqq); -- cgit v1.2.3 From a34b024448eb71b0e51ad011fa1862236e366034 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 15 Dec 2017 07:23:12 +0100 Subject: block, bfq: consider also past I/O in soft real-time detection BFQ privileges the I/O of soft real-time applications, such as video players, to guarantee to these application a high bandwidth and a low latency. In this respect, it is not easy to correctly detect when an application is soft real-time. A particularly nasty false positive is that of an I/O-bound application that occasionally happens to meet all requirements to be deemed as soft real-time. After being detected as soft real-time, such an application monopolizes the device. Fortunately, BFQ will realize soon that the application is actually not soft real-time and suspend every privilege. Yet, the application may happen again to be wrongly detected as soft real-time, and so on. As highlighted by our tests, this problem causes BFQ to occasionally fail to guarantee a high responsiveness, in the presence of heavy background I/O workloads. The reason is that the background workload happens to be detected as soft real-time, more or less frequently, during the execution of the interactive task under test. To give an idea, because of this problem, Libreoffice Writer occasionally takes 8 seconds, instead of 3, to start up, if there are sequential reads and writes in the background, on a Kingston SSDNow V300. This commit addresses this issue by leveraging the following facts. The reason why some applications are detected as soft real-time despite all BFQ checks to avoid false positives, is simply that, during high CPU or storage-device load, I/O-bound applications may happen to do I/O slowly enough to meet all soft real-time requirements, and pass all BFQ extra checks. Yet, this happens only for limited time periods: slow-speed time intervals are usually interspersed between other time intervals during which these applications do I/O at a very high speed. To exploit these facts, this commit introduces a little change, in the detection of soft real-time behavior, to systematically consider also the recent past: the higher the speed was in the recent past, the later next I/O should arrive for the application to be considered as soft real-time. At the beginning of a slow-speed interval, the minimum arrival time allowed for the next I/O usually happens to still be so high, to fall *after* the end of the slow-speed period itself. As a consequence, the application does not risk to be deemed as soft real-time during the slow-speed interval. Then, during the next high-speed interval, the application cannot, evidently, be deemed as soft real-time (exactly because of its speed), and so on. This extra filtering proved to be rather effective: in the above test, the frequency of false positives became so low that the start-up time was 3 seconds in all iterations (apart from occasional outliers, caused by page-cache-management issues, which are out of the scope of this commit, and cannot be solved by an I/O scheduler). Tested-by: Lee Tibbert Signed-off-by: Paolo Valente Signed-off-by: Angelo Ruocco Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 115 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 34 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9625550b2f85..e33c5c4c9856 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2940,45 +2940,87 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, * whereas soft_rt_next_start is set to infinity for applications that do * not. * - * Unfortunately, even a greedy application may happen to behave in an - * isochronous way if the CPU load is high. In fact, the application may - * stop issuing requests while the CPUs are busy serving other processes, - * then restart, then stop again for a while, and so on. In addition, if - * the disk achieves a low enough throughput with the request pattern - * issued by the application (e.g., because the request pattern is random - * and/or the device is slow), then the application may meet the above - * bandwidth requirement too. To prevent such a greedy application to be - * deemed as soft real-time, a further rule is used in the computation of - * soft_rt_next_start: soft_rt_next_start must be higher than the current - * time plus the maximum time for which the arrival of a request is waited - * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. - * This filters out greedy applications, as the latter issue instead their - * next request as soon as possible after the last one has been completed - * (in contrast, when a batch of requests is completed, a soft real-time - * application spends some time processing data). + * Unfortunately, even a greedy (i.e., I/O-bound) application may + * happen to meet, occasionally or systematically, both the above + * bandwidth and isochrony requirements. This may happen at least in + * the following circumstances. First, if the CPU load is high. The + * application may stop issuing requests while the CPUs are busy + * serving other processes, then restart, then stop again for a while, + * and so on. The other circumstances are related to the storage + * device: the storage device is highly loaded or reaches a low-enough + * throughput with the I/O of the application (e.g., because the I/O + * is random and/or the device is slow). In all these cases, the + * I/O of the application may be simply slowed down enough to meet + * the bandwidth and isochrony requirements. To reduce the probability + * that greedy applications are deemed as soft real-time in these + * corner cases, a further rule is used in the computation of + * soft_rt_next_start: the return value of this function is forced to + * be higher than the maximum between the following two quantities. * - * Unfortunately, the last filter may easily generate false positives if - * only bfqd->bfq_slice_idle is used as a reference time interval and one - * or both the following cases occur: - * 1) HZ is so low that the duration of a jiffy is comparable to or higher - * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with - * HZ=100. + * (a) Current time plus: (1) the maximum time for which the arrival + * of a request is waited for when a sync queue becomes idle, + * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We + * postpone for a moment the reason for adding a few extra + * jiffies; we get back to it after next item (b). Lower-bounding + * the return value of this function with the current time plus + * bfqd->bfq_slice_idle tends to filter out greedy applications, + * because the latter issue their next request as soon as possible + * after the last one has been completed. In contrast, a soft + * real-time application spends some time processing data, after a + * batch of its requests has been completed. + * + * (b) Current value of bfqq->soft_rt_next_start. As pointed out + * above, greedy applications may happen to meet both the + * bandwidth and isochrony requirements under heavy CPU or + * storage-device load. In more detail, in these scenarios, these + * applications happen, only for limited time periods, to do I/O + * slowly enough to meet all the requirements described so far, + * including the filtering in above item (a). These slow-speed + * time intervals are usually interspersed between other time + * intervals during which these applications do I/O at a very high + * speed. Fortunately, exactly because of the high speed of the + * I/O in the high-speed intervals, the values returned by this + * function happen to be so high, near the end of any such + * high-speed interval, to be likely to fall *after* the end of + * the low-speed time interval that follows. These high values are + * stored in bfqq->soft_rt_next_start after each invocation of + * this function. As a consequence, if the last value of + * bfqq->soft_rt_next_start is constantly used to lower-bound the + * next value that this function may return, then, from the very + * beginning of a low-speed interval, bfqq->soft_rt_next_start is + * likely to be constantly kept so high that any I/O request + * issued during the low-speed interval is considered as arriving + * to soon for the application to be deemed as soft + * real-time. Then, in the high-speed interval that follows, the + * application will not be deemed as soft real-time, just because + * it will do I/O at a high speed. And so on. + * + * Getting back to the filtering in item (a), in the following two + * cases this filtering might be easily passed by a greedy + * application, if the reference quantity was just + * bfqd->bfq_slice_idle: + * 1) HZ is so low that the duration of a jiffy is comparable to or + * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow + * devices with HZ=100. The time granularity may be so coarse + * that the approximation, in jiffies, of bfqd->bfq_slice_idle + * is rather lower than the exact value. * 2) jiffies, instead of increasing at a constant rate, may stop increasing * for a while, then suddenly 'jump' by several units to recover the lost * increments. This seems to happen, e.g., inside virtual machines. - * To address this issue, we do not use as a reference time interval just - * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In - * particular we add the minimum number of jiffies for which the filter - * seems to be quite precise also in embedded systems and KVM/QEMU virtual - * machines. + * To address this issue, in the filtering in (a) we do not use as a + * reference time interval just bfqd->bfq_slice_idle, but + * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the + * minimum number of jiffies for which the filter seems to be quite + * precise also in embedded systems and KVM/QEMU virtual machines. */ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - return max(bfqq->last_idle_bklogged + - HZ * bfqq->service_from_backlogged / - bfqd->bfq_wr_max_softrt_rate, - jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); + return max3(bfqq->soft_rt_next_start, + bfqq->last_idle_bklogged + + HZ * bfqq->service_from_backlogged / + bfqd->bfq_wr_max_softrt_rate, + jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); } /** @@ -4014,10 +4056,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqq->split_time = bfq_smallest_from_now(); /* - * Set to the value for which bfqq will not be deemed as - * soft rt when it becomes backlogged. + * To not forget the possibly high bandwidth consumed by a + * process/queue in the recent past, + * bfq_bfqq_softrt_next_start() returns a value at least equal + * to the current value of bfqq->soft_rt_next_start (see + * comments on bfq_bfqq_softrt_next_start). Set + * soft_rt_next_start to now, to mean that bfqq has consumed + * no bandwidth so far. */ - bfqq->soft_rt_next_start = bfq_greatest_from_now(); + bfqq->soft_rt_next_start = jiffies; /* first request is almost certainly seeky */ bfqq->seek_history = 1; -- cgit v1.2.3 From 9b25bd0368d562d1929059e8eb9de4102567b923 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Mon, 4 Dec 2017 11:42:05 +0100 Subject: block, bfq: remove batches of confusing ifdefs Commit a33801e8b473 ("block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP") introduced two batches of confusing ifdefs: one reported in [1], plus a similar one in another function. This commit removes both batches, in the way suggested in [1]. [1] https://www.spinics.net/lists/linux-block/msg20043.html Fixes: a33801e8b473 ("block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUP") Reported-by: Linus Torvalds Tested-by: Luca Miccio Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 127 +++++++++++++++++++++++++++++----------------------- 1 file changed, 72 insertions(+), 55 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e33c5c4c9856..7bd789da7a29 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3743,35 +3743,16 @@ exit: return rq; } -static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) -{ - struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; - struct request *rq; #if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) - struct bfq_queue *in_serv_queue, *bfqq; - bool waiting_rq, idle_timer_disabled; -#endif - - spin_lock_irq(&bfqd->lock); - -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) - in_serv_queue = bfqd->in_service_queue; - waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); - - rq = __bfq_dispatch_request(hctx); - - idle_timer_disabled = - waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); - -#else - rq = __bfq_dispatch_request(hctx); -#endif - spin_unlock_irq(&bfqd->lock); +static void bfq_update_dispatch_stats(struct request_queue *q, + struct request *rq, + struct bfq_queue *in_serv_queue, + bool idle_timer_disabled) +{ + struct bfq_queue *bfqq = rq ? RQ_BFQQ(rq) : NULL; -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) - bfqq = rq ? RQ_BFQQ(rq) : NULL; if (!idle_timer_disabled && !bfqq) - return rq; + return; /* * rq and bfqq are guaranteed to exist until this function @@ -3786,7 +3767,7 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * In addition, the following queue lock guarantees that * bfqq_group(bfqq) exists as well. */ - spin_lock_irq(hctx->queue->queue_lock); + spin_lock_irq(q->queue_lock); if (idle_timer_disabled) /* * Since the idle timer has been disabled, @@ -3805,9 +3786,37 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) bfqg_stats_set_start_empty_time(bfqg); bfqg_stats_update_io_remove(bfqg, rq->cmd_flags); } - spin_unlock_irq(hctx->queue->queue_lock); + spin_unlock_irq(q->queue_lock); +} +#else +static inline void bfq_update_dispatch_stats(struct request_queue *q, + struct request *rq, + struct bfq_queue *in_serv_queue, + bool idle_timer_disabled) {} #endif +static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) +{ + struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; + struct request *rq; + struct bfq_queue *in_serv_queue; + bool waiting_rq, idle_timer_disabled; + + spin_lock_irq(&bfqd->lock); + + in_serv_queue = bfqd->in_service_queue; + waiting_rq = in_serv_queue && bfq_bfqq_wait_request(in_serv_queue); + + rq = __bfq_dispatch_request(hctx); + + idle_timer_disabled = + waiting_rq && !bfq_bfqq_wait_request(in_serv_queue); + + spin_unlock_irq(&bfqd->lock); + + bfq_update_dispatch_stats(hctx->queue, rq, in_serv_queue, + idle_timer_disabled); + return rq; } @@ -4335,16 +4344,46 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) return idle_timer_disabled; } +#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) +static void bfq_update_insert_stats(struct request_queue *q, + struct bfq_queue *bfqq, + bool idle_timer_disabled, + unsigned int cmd_flags) +{ + if (!bfqq) + return; + + /* + * bfqq still exists, because it can disappear only after + * either it is merged with another queue, or the process it + * is associated with exits. But both actions must be taken by + * the same process currently executing this flow of + * instructions. + * + * In addition, the following queue lock guarantees that + * bfqq_group(bfqq) exists as well. + */ + spin_lock_irq(q->queue_lock); + bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); + if (idle_timer_disabled) + bfqg_stats_update_idle_time(bfqq_group(bfqq)); + spin_unlock_irq(q->queue_lock); +} +#else +static inline void bfq_update_insert_stats(struct request_queue *q, + struct bfq_queue *bfqq, + bool idle_timer_disabled, + unsigned int cmd_flags) {} +#endif + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) struct bfq_queue *bfqq = RQ_BFQQ(rq); bool idle_timer_disabled = false; unsigned int cmd_flags; -#endif spin_lock_irq(&bfqd->lock); if (blk_mq_sched_try_insert_merge(q, rq)) { @@ -4363,7 +4402,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, else list_add_tail(&rq->queuelist, &bfqd->dispatch); } else { -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) idle_timer_disabled = __bfq_insert_request(bfqd, rq); /* * Update bfqq, because, if a queue merge has occurred @@ -4371,9 +4409,6 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * redirected into a new queue. */ bfqq = RQ_BFQQ(rq); -#else - __bfq_insert_request(bfqd, rq); -#endif if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); @@ -4382,35 +4417,17 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, } } -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) /* * Cache cmd_flags before releasing scheduler lock, because rq * may disappear afterwards (for example, because of a request * merge). */ cmd_flags = rq->cmd_flags; -#endif + spin_unlock_irq(&bfqd->lock); -#if defined(CONFIG_BFQ_GROUP_IOSCHED) && defined(CONFIG_DEBUG_BLK_CGROUP) - if (!bfqq) - return; - /* - * bfqq still exists, because it can disappear only after - * either it is merged with another queue, or the process it - * is associated with exits. But both actions must be taken by - * the same process currently executing this flow of - * instruction. - * - * In addition, the following queue lock guarantees that - * bfqq_group(bfqq) exists as well. - */ - spin_lock_irq(q->queue_lock); - bfqg_stats_update_io_add(bfqq_group(bfqq), bfqq, cmd_flags); - if (idle_timer_disabled) - bfqg_stats_update_idle_time(bfqq_group(bfqq)); - spin_unlock_irq(q->queue_lock); -#endif + bfq_update_insert_stats(q, bfqq, idle_timer_disabled, + cmd_flags); } static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, -- cgit v1.2.3 From 040ee69226f8a96b7943645d68f41d5d44b5ff7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Dec 2017 20:20:38 -0500 Subject: fix "netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'" Descriptor table is a shared object; it's not a place where you can stick temporary references to files, especially when we don't need an opened file at all. Cc: stable@vger.kernel.org # v4.14 Fixes: 98589a0998b8 ("netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'") Signed-off-by: Al Viro --- include/linux/bpf.h | 10 ++++++++++ kernel/bpf/inode.c | 40 +++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 2 +- net/netfilter/xt_bpf.c | 14 ++------------ 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..b63a592ad29d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -419,6 +419,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -506,6 +508,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, { return 0; } + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -514,6 +522,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 01aaef1a77c5..5bb5e49ef4c3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -368,7 +368,45 @@ out: putname(pname); return ret; } -EXPORT_SYMBOL_GPL(bpf_obj_get_user); + +static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + int ret = inode_permission(inode, MAY_READ | MAY_WRITE); + if (ret) + return ERR_PTR(ret); + + if (inode->i_op == &bpf_map_iops) + return ERR_PTR(-EINVAL); + if (inode->i_op != &bpf_prog_iops) + return ERR_PTR(-EACCES); + + prog = inode->i_private; + + ret = security_bpf_prog(prog); + if (ret < 0) + return ERR_PTR(ret); + + if (!bpf_prog_get_ok(prog, &type, false)) + return ERR_PTR(-EINVAL); + + return bpf_prog_inc(prog); +} + +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) +{ + struct bpf_prog *prog; + struct path path; + int ret = kern_path(name, LOOKUP_FOLLOW, &path); + if (ret) + return ERR_PTR(ret); + prog = __get_prog_inode(d_backing_inode(path.dentry), type); + if (!IS_ERR(prog)) + touch_atime(&path); + path_put(&path); + return prog; +} +EXPORT_SYMBOL(bpf_prog_get_type_path); static void bpf_evict_inode(struct inode *inode) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2c4cfeaa8d5e..5cb783fc8224 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) } EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); -static bool bpf_prog_get_ok(struct bpf_prog *prog, +bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv) { /* not an attachment, just a refcount inc, always allow */ diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 041da0d9c06f..fa2ca0a13619 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -52,18 +52,8 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { - mm_segment_t oldfs = get_fs(); - int retval, fd; - - set_fs(KERNEL_DS); - fd = bpf_obj_get_user(path, 0); - set_fs(oldfs); - if (fd < 0) - return fd; - - retval = __bpf_mt_check_fd(fd, ret); - sys_close(fd); - return retval; + *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); + return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3 From 9059a3493efea6492451430c7e2fa0af799a2abb Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 16 Nov 2017 20:06:39 -0500 Subject: kconfig: fix relational operators for bool and tristate symbols Since commit 31847b67bec0 ("kconfig: allow use of relations other than (in)equality") it is possible to use relational operators in Kconfig statements. However, those operators give unexpected results when applied to bool/tristate values: (n < y) = y (correct) (m < y) = y (correct) (n < m) = n (wrong) This happens because relational operators process bool and tristate symbols as strings and m sorts before n. It makes little sense to do a lexicographical compare on bool and tristate values though. Documentation/kbuild/kconfig-language.txt states that expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2 respectively for calculations). Let's make it so for relational comparisons with bool/tristate expressions as well and document them. If at least one symbol is an actual string then the lexicographical compare works just as before. Signed-off-by: Nicolas Pitre Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.txt | 23 +++++++++++++++-------- scripts/kconfig/expr.c | 5 ++++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index 262722d8867b..c4a293a03c33 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax: ::= (1) '=' (2) '!=' (3) - '(' ')' (4) - '!' (5) - '&&' (6) - '||' (7) + '<' (4) + '>' (4) + '<=' (4) + '>=' (4) + '(' ')' (5) + '!' (6) + '&&' (7) + '||' (8) Expressions are listed in decreasing order of precedence. @@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence. otherwise 'n'. (3) If the values of both symbols are equal, it returns 'n', otherwise 'y'. -(4) Returns the value of the expression. Used to override precedence. -(5) Returns the result of (2-/expr/). -(6) Returns the result of min(/expr/, /expr/). -(7) Returns the result of max(/expr/, /expr/). +(4) If value of is respectively lower, greater, lower-or-equal, + or greater-or-equal than value of , it returns 'y', + otherwise 'n'. +(5) Returns the value of the expression. Used to override precedence. +(6) Returns the result of (2-/expr/). +(7) Returns the result of min(/expr/, /expr/). +(8) Returns the result of max(/expr/, /expr/). An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2 respectively for calculations). A menu entry becomes visible when its diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index cbf4996dd9c1..8cee597d33a5 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -893,7 +893,10 @@ static enum string_value_kind expr_parse_string(const char *str, switch (type) { case S_BOOLEAN: case S_TRISTATE: - return k_string; + val->s = !strcmp(str, "n") ? 0 : + !strcmp(str, "m") ? 1 : + !strcmp(str, "y") ? 2 : -1; + return k_signed; case S_INT: val->s = strtoll(str, &tail, 10); kind = k_signed; -- cgit v1.2.3 From 5133550296d43236439494aa955bfb765a89f615 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: sh_eth: fix SH7757 GEther initialization Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- include/linux/sh_eth.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 1bdd67a8a869..f21c1db91c3f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3254,8 +3254,8 @@ static int sh_eth_drv_probe(struct platform_device *pdev) ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER; } - /* initialize first or needed device */ - if (!devno || pd->needs_init) { + /* Need to init only the first port of the two sharing a TSU */ + if (devno % 2 == 0) { if (mdp->cd->chip_reset) mdp->cd->chip_reset(ndev); diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index ff3642d267f7..94081e9a5010 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -17,7 +17,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif -- cgit v1.2.3 From 5b9f57cf47b87f07210875d6a24776b4496b818d Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 7 Dec 2017 00:28:27 -0800 Subject: apparmor: fix regression in mount mediation when feature set is pinned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the mount code was refactored for Labels it was not correctly updated to check whether policy supported mediation of the mount class. This causes a regression when the kernel feature set is reported as supporting mount and policy is pinned to a feature set that does not support mount mediation. BugLink: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=882697#41 Fixes: 2ea3ffb7782a ("apparmor: add mount mediation") Reported-by: Fabian Grünbichler Cc: Stable Signed-off-by: John Johansen --- security/apparmor/mount.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c index ed9b4d0f9f7e..8c558cbce930 100644 --- a/security/apparmor/mount.c +++ b/security/apparmor/mount.c @@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile, AA_BUG(!mntpath); AA_BUG(!buffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, &mntpnt, &info, profile->disconnected); if (error) @@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path, AA_BUG(!profile); AA_BUG(devpath && !devbuffer); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + if (devpath) { error = aa_path_name(devpath, path_flags(profile, devpath), devbuffer, &devname, &info, @@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path, AA_BUG(!profile); AA_BUG(!path); + if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) + return 0; + error = aa_path_name(path, path_flags(profile, path), buffer, &name, &info, profile->disconnected); if (error) @@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile, AA_BUG(!new_path); AA_BUG(!old_path); - if (profile_unconfined(profile)) + if (profile_unconfined(profile) || + !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT)) return aa_get_newest_label(&profile->label); error = aa_path_name(old_path, path_flags(profile, old_path), -- cgit v1.2.3 From ca47480921587ae30417dd234a9f79af188e3666 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 5 Jan 2018 14:27:58 -0800 Subject: xtensa: fix futex_atomic_cmpxchg_inatomic Return 0 if the operation was successful, not the userspace memory value. Check that userspace value equals passed oldval, not itself. Don't update *uval if the value wasn't read from userspace memory. This fixes process hang due to infinite loop in futex_lock_pi. It also fixes a bunch of glibc tests nptl/tst-mutexpi*. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/include/asm/futex.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index eaaf1ebcc7a4..5bfbc1c401d4 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -92,7 +92,6 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int ret = 0; - u32 prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; @@ -103,26 +102,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - "1: l32i %1, %3, 0\n" - " mov %0, %5\n" - " wsr %1, scompare1\n" - "2: s32c1i %0, %3, 0\n" - "3:\n" + " wsr %5, scompare1\n" + "1: s32c1i %1, %4, 0\n" + " s32i %1, %6, 0\n" + "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" - "4: .long 3b\n" - "5: l32r %1, 4b\n" - " movi %0, %6\n" + "3: .long 2b\n" + "4: l32r %1, 3b\n" + " movi %0, %7\n" " jx %1\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,5b,2b,5b\n" + " .long 1b,4b\n" " .previous\n" - : "+r" (ret), "=&r" (prev), "+m" (*uaddr) - : "r" (uaddr), "r" (oldval), "r" (newval), "I" (-EFAULT) + : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) + : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) : "memory"); - *uval = prev; return ret; } -- cgit v1.2.3 From bdae44705c0d5b751fbd79bc4a169905b25ed335 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Thu, 4 Jan 2018 00:31:55 +0800 Subject: ARM: dts: sun[47]i: Fix display backend 1 output to TCON0 remote endpoint There is a copy-paste error in the display pipeline device tree graph. The remote endpoint of the display backend 1's output to TCON0 points to the wrong endpoint. This will result in the driver incorrectly parsing the relationship of the components. Reported-by: Andrea Venturi Fixes: 0df4cf33a594 ("ARM: dts: sun4i: Add device nodes for display pipelines") Fixes: 5b92b29bed45 ("ARM: dts: sun7i: Add device nodes for display pipelines") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard --- arch/arm/boot/dts/sun4i-a10.dtsi | 2 +- arch/arm/boot/dts/sun7i-a20.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5840f5c75c3b..4f2f2eea0755 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -1104,7 +1104,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 59655e42e4b0..bd0cd3204273 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -1354,7 +1354,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { -- cgit v1.2.3 From d787b8b3509a78b1bf922cc3a1061711074847a8 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 22 Dec 2017 18:12:41 +0100 Subject: mtd: nand: Fix unfinished comment in nand_init_data_interface() Give an unfinished comment a meaning. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index ab8ad9e8a8d8..96c97588e1ba 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1284,7 +1284,10 @@ static int nand_init_data_interface(struct nand_chip *chip) if (ret) continue; - /* Pass -1 to only */ + /* + * Pass NAND_DATA_IFACE_CHECK_ONLY to only check if the + * controller supports the requested timings. + */ ret = chip->setup_data_interface(mtd, NAND_DATA_IFACE_CHECK_ONLY, &chip->data_interface); -- cgit v1.2.3 From 039b4377e5621a2cc197a7aff1e06db432e4dcc2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 5 Jan 2018 18:02:56 -0200 Subject: mtd: nand: brcmnand: Add a NULL check for devm_kasprintf() devm_kasprintf() may fail, so we should better add a NULL check and propagate an error on failure. Signed-off-by: Fabio Estevam Signed-off-by: Boris Brezillon --- drivers/mtd/nand/brcmnand/brcmnand.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index 54842512edb1..e0797abb1ebd 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2237,6 +2237,9 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn) nand_set_controller_data(chip, host); mtd->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "brcmnand.%d", host->cs); + if (!mtd->name) + return -ENOMEM; + mtd->owner = THIS_MODULE; mtd->dev.parent = &pdev->dev; -- cgit v1.2.3 From 7729bebc619307a0233c86f8585a4bf3eadc7ce4 Mon Sep 17 00:00:00 2001 From: Valentin Ilie Date: Fri, 5 Jan 2018 23:12:59 +0000 Subject: ia64, sched/cputime: Fix build error if CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y Remove the extra parenthesis. This bug was introduced by: e2339a4caa5e: ("ia64: Convert vtime to use nsec units directly") Signed-off-by: Valentin Ilie Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: fenghua.yu@intel.com Cc: linux-ia64@vger.kernel.org Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1515193979-24873-1-git-send-email-valentin.ilie@gmail.com Signed-off-by: Ingo Molnar --- arch/ia64/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index c6ecb97151a2..9025699049ca 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk) } if (ti->softirq_time) { - delta = cycle_to_nsec(ti->softirq_time)); + delta = cycle_to_nsec(ti->softirq_time); account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); } -- cgit v1.2.3 From 9ae21dd66b970b5e3192a636353d75ede0529338 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 5 Jan 2018 08:18:52 -0800 Subject: perf/x86/msr: Add support for MSR_IA32_THERM_STATUS This patch adds support for the Digital Readout provided by the IA32_THERM_STATUS MSR (0x19C) on Intel X86 processors. The readout shows the number of degrees Celcius to the TCC (critical temperature) supported by the processor. Thus, the larger, the better. The perf_event support is provided via the msr PMU. The new logical event is called cpu_thermal_margin. It comes with a unit and snapshot files. The event shows the current temprature distance (margin). It is not an accumulating event. The unit is degrees C. The event is provided per logical CPU to make things simpler but it is the same for both hyper-threads sharing a physical core. $ perf stat -I 1000 -a -A -e msr/cpu_thermal_margin/ This will print the temperature for all logical CPUs. time CPU counts unit events 1.000123741 CPU0 38 C msr/cpu_thermal_margin/ 1.000161837 CPU1 37 C msr/cpu_thermal_margin/ 1.000187906 CPU2 36 C msr/cpu_thermal_margin/ 1.000189046 CPU3 39 C msr/cpu_thermal_margin/ 1.000283044 CPU4 40 C msr/cpu_thermal_margin/ 1.000344297 CPU5 40 C msr/cpu_thermal_margin/ 1.000365832 CPU6 39 C msr/cpu_thermal_margin/ ... In case the temperature margin cannot be read, the reported value would be -1. Works on all processors supporting the Digital Readout (dtherm in cpuinfo) Signed-off-by: Stephane Eranian Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1515169132-3980-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 14efaa0e8684..0be15b9b2376 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -10,7 +10,9 @@ enum perf_msr_id { PERF_MSR_SMI = 4, PERF_MSR_PTSC = 5, PERF_MSR_IRPERF = 6, - + PERF_MSR_THERM = 7, + PERF_MSR_THERM_SNAP = 8, + PERF_MSR_THERM_UNIT = 9, PERF_MSR_EVENT_MAX, }; @@ -29,6 +31,12 @@ static bool test_irperf(int idx) return boot_cpu_has(X86_FEATURE_IRPERF); } +static bool test_therm_status(int idx) +{ + return boot_cpu_has(X86_FEATURE_DTHERM); +} + + static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || @@ -102,6 +110,9 @@ PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05"); PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06"); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07"); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1"); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C"); static struct perf_msr msr[] = { [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, @@ -111,6 +122,9 @@ static struct perf_msr msr[] = { [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, + [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, + [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, }; static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { @@ -193,10 +207,15 @@ again: goto again; delta = now - prev; - if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) + if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { delta = sign_extend64(delta, 31); - - local64_add(delta, &event->count); + local64_add(delta, &event->count); + } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) { + /* if valid, extract digital readout, other set to -1 */ + now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1; + local64_set(&event->count, now); + } else + local64_add(delta, &event->count); } static void msr_event_start(struct perf_event *event, int flags) -- cgit v1.2.3 From 9128d3ed9de3882c83b927eb553d5d44c84505f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Jan 2018 08:18:52 -0800 Subject: perf/x86/msr: Clean up the code Recent changes made a bit of an inconsistent mess out of arch/x86/events/msr.c, fix it: - re-align the initialization tables to be vertically aligned and readable again - harmonize comment style in terms of punctuation, capitalization and spelling - use curly braces for multi-condition branches - remove extra newlines - simplify the code a bit Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1515169132-3980-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 61 ++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 0be15b9b2376..18e2628e2d8f 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -36,7 +36,6 @@ static bool test_therm_status(int idx) return boot_cpu_has(X86_FEATURE_DTHERM); } - static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || @@ -103,28 +102,28 @@ struct perf_msr { bool (*test)(int idx); }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); -PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05"); -PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06"); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07"); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1"); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C"); +PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, - [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, - [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, - [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, + [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, + [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, + [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, }; static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { @@ -175,9 +174,9 @@ static int msr_event_init(struct perf_event *event) if (!msr[cfg].attr) return -EINVAL; - event->hw.idx = -1; - event->hw.event_base = msr[cfg].msr; - event->hw.config = cfg; + event->hw.idx = -1; + event->hw.event_base = msr[cfg].msr; + event->hw.config = cfg; return 0; } @@ -198,7 +197,7 @@ static void msr_event_update(struct perf_event *event) u64 prev, now; s64 delta; - /* Careful, an NMI might modify the previous event value. */ + /* Careful, an NMI might modify the previous event value: */ again: prev = local64_read(&event->hw.prev_count); now = msr_read_counter(event); @@ -211,18 +210,18 @@ again: delta = sign_extend64(delta, 31); local64_add(delta, &event->count); } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) { - /* if valid, extract digital readout, other set to -1 */ + /* If valid, extract digital readout, otherwise set to -1: */ now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1; local64_set(&event->count, now); - } else + } else { local64_add(delta, &event->count); + } } static void msr_event_start(struct perf_event *event, int flags) { - u64 now; + u64 now = msr_read_counter(event); - now = msr_read_counter(event); local64_set(&event->hw.prev_count, now); } @@ -269,9 +268,7 @@ static int __init msr_init(void) for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { u64 val; - /* - * Virt sucks arse; you cannot tell if a R/O MSR is present :/ - */ + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) msr[i].attr = NULL; } -- cgit v1.2.3 From 310d82784fb4d60c80569f5ca9f53a7f3bf1d477 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 5 Jan 2018 21:55:38 +0100 Subject: parisc: qemu idle sleep support Add qemu idle sleep support when running under qemu with SeaBIOS PDC firmware. Like the power architecture we use the "or" assembler instructions, which translate to nops on real hardware, to indicate that qemu shall idle sleep. Signed-off-by: Helge Deller Cc: Richard Henderson CC: stable@vger.kernel.org # v4.9+ --- arch/parisc/kernel/process.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 30f92391a93e..cad3e8661cd6 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) return 1; } +/* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + /* * Copy architecture-specific thread state */ -- cgit v1.2.3 From b94b7373317164402ff7728d10f7023127a02b60 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Mon, 1 Jan 2018 10:04:47 +0800 Subject: x86/microcode/intel: Extend BDW late-loading with a revision check Instead of blacklisting all model 79 CPUs when attempting a late microcode loading, limit that only to CPUs with microcode revisions < 0x0b000021 because only on those late loading may cause a system hang. For such processors either: a) a BIOS update which might contain a newer microcode revision or b) the early microcode loading method should be considered. Processors with revisions 0x0b000021 or higher will not experience such hangs. For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. [ bp: Heavily massage commit message and pr_* statements. ] Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Cc: x86-ml Cc: # v4.14 Link: http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue.zj@alibaba-inc.com --- arch/x86/kernel/cpu/microcode/intel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 8ccdca6d3f9e..d9e460fc7a3b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { - pr_err_once("late loading on model 79 is disabled.\n"); + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 + * may result in a system hang. This behavior is documented in item + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); return true; } -- cgit v1.2.3 From 911c3a30cab8269239d24e68df4adf9f7f9e2a01 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 20 Dec 2017 05:45:36 +0000 Subject: mtd: sharpslpart: make local function sharpsl_nand_cleanup_ftl() static Fixes the following sparse warnings: drivers/mtd/parsers/sharpslpart.c:222:6: warning: symbol 'sharpsl_nand_cleanup_ftl' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Andrea Adami Signed-off-by: Boris Brezillon --- drivers/mtd/parsers/sharpslpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/sharpslpart.c b/drivers/mtd/parsers/sharpslpart.c index 0ddb79ac390d..8893dc82a5c8 100644 --- a/drivers/mtd/parsers/sharpslpart.c +++ b/drivers/mtd/parsers/sharpslpart.c @@ -219,7 +219,7 @@ exit: return ret; } -void sharpsl_nand_cleanup_ftl(struct sharpsl_ftl *ftl) +static void sharpsl_nand_cleanup_ftl(struct sharpsl_ftl *ftl) { kfree(ftl->log2phy); } -- cgit v1.2.3 From 33f45c44d68b3593826524ba6d02bd9cce9e101e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 15 Dec 2017 13:39:51 +0100 Subject: mtd: Do not allow MTD devices with inconsistent erase properties When mtd->erasesize is 0 or mtd->_erase is NULL, that means the device does not support the erase operation, which in turn means it should have the MTD_NO_ERASE flag set. Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal --- drivers/mtd/mtdcore.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index f80e911b8843..642c35dde686 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -503,6 +503,11 @@ int add_mtd_device(struct mtd_info *mtd) return -EEXIST; BUG_ON(mtd->writesize == 0); + + if (WARN_ON((!mtd->erasesize || !mtd->_erase) && + !(mtd->flags & MTD_NO_ERASE))) + return -EINVAL; + mutex_lock(&mtd_table_mutex); i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); -- cgit v1.2.3 From f72071b892d6f5eccf90756f3c12b1422bd4b474 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 15 Dec 2017 13:39:52 +0100 Subject: mtd: Add an helper to make erase request aligned on ->erasesize There's currently nothing forcing alignment of einfo->addr and einfo->len on mtd->erasesize. Since we don't know if automatically aligning those field in mtd_erase() will hurt some drivers, we add an helper function to let drivers that need such an alignment explicitly ask for it. Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal --- include/linux/mtd/mtd.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index cd55bf14ad51..205ededccc60 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -489,6 +489,34 @@ static inline uint32_t mtd_mod_by_eb(uint64_t sz, struct mtd_info *mtd) return do_div(sz, mtd->erasesize); } +/** + * mtd_align_erase_req - Adjust an erase request to align things on eraseblock + * boundaries. + * @mtd: the MTD device this erase request applies on + * @req: the erase request to adjust + * + * This function will adjust @req->addr and @req->len to align them on + * @mtd->erasesize. Of course we expect @mtd->erasesize to be != 0. + */ +static inline void mtd_align_erase_req(struct mtd_info *mtd, + struct erase_info *req) +{ + u32 mod; + + if (WARN_ON(!mtd->erasesize)) + return; + + mod = mtd_mod_by_eb(req->addr, mtd); + if (mod) { + req->addr -= mod; + req->len += mod; + } + + mod = mtd_mod_by_eb(req->addr + req->len, mtd); + if (mod) + req->len += mtd->erasesize - mod; +} + static inline uint32_t mtd_div_by_ws(uint64_t sz, struct mtd_info *mtd) { if (mtd->writesize_shift) -- cgit v1.2.3 From 86292abc5af206f64192a0b60da06fd604debdc0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:03 +0800 Subject: block: introduce bio helpers for converting to multipage bvec The following helpers are introduced for converting current users of direct access to bvec table, and prepares for supporting multipage bvec: bio_pages_all() bio_first_bvec_all() bio_first_page_all() bio_last_bvec_all() All are named as bio_*_all() to following bio_for_each_segment_all(), they can only be used on bio of !bio_flagged(bio, BIO_CLONED), that means the whole bvec table is covered. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/bio.h b/include/linux/bio.h index 82f0c8fd7be8..435ddf04e889 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -300,6 +300,29 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) bv->bv_len = iter.bi_bvec_done; } +static inline unsigned bio_pages_all(struct bio *bio) +{ + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + return bio->bi_vcnt; +} + +static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) +{ + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + return bio->bi_io_vec; +} + +static inline struct page *bio_first_page_all(struct bio *bio) +{ + return bio_first_bvec_all(bio)->bv_page; +} + +static inline struct bio_vec *bio_last_bvec_all(struct bio *bio) +{ + WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); + return &bio->bi_io_vec[bio->bi_vcnt - 1]; +} + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ -- cgit v1.2.3 From 263663cd3c4fbfc40cb7504c4be2dadbc0992cc1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:04 +0800 Subject: block: convert to bio_first_bvec_all & bio_first_page_all This patch converts to bio_first_bvec_all() & bio_first_page_all() for retrieving the 1st bvec/page, and prepares for supporting multipage bvec. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_bitmap.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/bcache/super.c | 8 ++++---- fs/btrfs/compression.c | 2 +- fs/btrfs/inode.c | 4 ++-- fs/f2fs/data.c | 2 +- kernel/power/swap.c | 2 +- mm/page_io.c | 4 ++-- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index bd97908c766f..9f4e6f502b84 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -953,7 +953,7 @@ static void drbd_bm_endio(struct bio *bio) struct drbd_bm_aio_ctx *ctx = bio->bi_private; struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; - unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); + unsigned int idx = bm_page_to_idx(bio_first_page_all(bio)); if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && !bm_test_page_unchanged(b->bm_pages[idx])) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d70eba30003a..0afa6c8c3857 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -430,7 +430,7 @@ static void put_entry_bdev(struct zram *zram, unsigned long entry) static void zram_page_end_io(struct bio *bio) { - struct page *page = bio->bi_io_vec[0].bv_page; + struct page *page = bio_first_page_all(bio); page_endio(page, op_is_write(bio_op(bio)), blk_status_to_errno(bio->bi_status)); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index b4d28928dec5..8399fe0651f2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -211,7 +211,7 @@ static void write_bdev_super_endio(struct bio *bio) static void __write_super(struct cache_sb *sb, struct bio *bio) { - struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page); + struct cache_sb *out = page_address(bio_first_page_all(bio)); unsigned i; bio->bi_iter.bi_sector = SB_SECTOR; @@ -1166,7 +1166,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, dc->bdev->bd_holder = dc; bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1); - dc->sb_bio.bi_io_vec[0].bv_page = sb_page; + bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page; get_page(sb_page); if (cached_dev_init(dc, sb->block_size << 9)) @@ -1810,7 +1810,7 @@ void bch_cache_release(struct kobject *kobj) free_fifo(&ca->free[i]); if (ca->sb_bio.bi_inline_vecs[0].bv_page) - put_page(ca->sb_bio.bi_io_vec[0].bv_page); + put_page(bio_first_page_all(&ca->sb_bio)); if (!IS_ERR_OR_NULL(ca->bdev)) blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); @@ -1864,7 +1864,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->bdev->bd_holder = ca; bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1); - ca->sb_bio.bi_io_vec[0].bv_page = sb_page; + bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page; get_page(sb_page); if (blk_queue_discard(bdev_get_queue(ca->bdev))) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5982c8a71f02..38a6b091bc25 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -563,7 +563,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, /* we need the actual starting offset of this extent in the file */ read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, - page_offset(bio->bi_io_vec->bv_page), + page_offset(bio_first_page_all(bio)), PAGE_SIZE); read_unlock(&em_tree->lock); if (!em) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e1a7f3cb5be9..4d5cb6e93c80 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8074,7 +8074,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio) ASSERT(bio->bi_vcnt == 1); io_tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode)); + ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode)); done->uptodate = 1; ASSERT(!bio_flagged(bio, BIO_CLONED)); @@ -8164,7 +8164,7 @@ static void btrfs_retry_endio(struct bio *bio) uptodate = 1; ASSERT(bio->bi_vcnt == 1); - ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode)); + ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode)); io_tree = &BTRFS_I(inode)->io_tree; failure_tree = &BTRFS_I(inode)->io_failure_tree; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 516fa0d3ff9c..455f086cce3d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -56,7 +56,7 @@ static void f2fs_read_end_io(struct bio *bio) int i; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { + if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) { f2fs_show_injection_info(FAULT_IO); bio->bi_status = BLK_STS_IOERR; } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 293ead59eccc..96c736313faa 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -240,7 +240,7 @@ static void hib_init_batch(struct hib_bio_batch *hb) static void hib_end_io(struct bio *bio) { struct hib_bio_batch *hb = bio->bi_private; - struct page *page = bio->bi_io_vec[0].bv_page; + struct page *page = bio_first_page_all(bio); if (bio->bi_status) { pr_alert("Read-error on swap-device (%u:%u:%Lu)\n", diff --git a/mm/page_io.c b/mm/page_io.c index e93f1a4cacd7..b41cf9644585 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -50,7 +50,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, void end_swap_bio_write(struct bio *bio) { - struct page *page = bio->bi_io_vec[0].bv_page; + struct page *page = bio_first_page_all(bio); if (bio->bi_status) { SetPageError(page); @@ -122,7 +122,7 @@ static void swap_slot_free_notify(struct page *page) static void end_swap_bio_read(struct bio *bio) { - struct page *page = bio->bi_io_vec[0].bv_page; + struct page *page = bio_first_page_all(bio); struct task_struct *waiter = bio->bi_private; if (bio->bi_status) { -- cgit v1.2.3 From c45a8f2def865e0d75b45618aef2963e15725cc4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:05 +0800 Subject: fs: convert to bio_last_bvec_all() This patch converts 3 users to bio_last_bvec_all(), so that we can go ahead and convert to multipage bvec. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/btrfs/compression.c | 2 +- fs/btrfs/extent_io.c | 2 +- fs/buffer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 38a6b091bc25..75610d23d197 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -411,7 +411,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, static u64 bio_end_offset(struct bio *bio) { - struct bio_vec *last = &bio->bi_io_vec[bio->bi_vcnt - 1]; + struct bio_vec *last = bio_last_bvec_all(bio); return page_offset(last->bv_page) + last->bv_len + last->bv_offset; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 012d63870b99..69cd63d4503d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2724,7 +2724,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, unsigned long bio_flags) { blk_status_t ret = 0; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct bio_vec *bvec = bio_last_bvec_all(bio); struct page *page = bvec->bv_page; struct extent_io_tree *tree = bio->bi_private; u64 start; diff --git a/fs/buffer.c b/fs/buffer.c index 0736a6a2e2f0..8b26295a56fe 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3014,7 +3014,7 @@ static void end_bio_bh_io_sync(struct bio *bio) void guard_bio_eod(int op, struct bio *bio) { sector_t maxsector; - struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; + struct bio_vec *bvec = bio_last_bvec_all(bio); unsigned truncated_bytes; struct hd_struct *part; -- cgit v1.2.3 From 7891f05cbf4944a5436491d66de2be7533089aea Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:06 +0800 Subject: block: bounce: avoid direct access to bvec table We will support multipage bvecs in the future, so change to iterator way for getting bv_page of bvec from original bio. Cc: Matthew Wilcox Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bounce.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/block/bounce.c b/block/bounce.c index fceb1a96480b..0274c31d6c05 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -137,21 +137,20 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) static void bounce_end_io(struct bio *bio, mempool_t *pool) { struct bio *bio_orig = bio->bi_private; - struct bio_vec *bvec, *org_vec; + struct bio_vec *bvec, orig_vec; int i; - int start = bio_orig->bi_iter.bi_idx; + struct bvec_iter orig_iter = bio_orig->bi_iter; /* * free up bounce indirect pages used */ bio_for_each_segment_all(bvec, bio, i) { - org_vec = bio_orig->bi_io_vec + i + start; - - if (bvec->bv_page == org_vec->bv_page) - continue; - - dec_zone_page_state(bvec->bv_page, NR_BOUNCE); - mempool_free(bvec->bv_page, pool); + orig_vec = bio_iter_iovec(bio_orig, orig_iter); + if (bvec->bv_page != orig_vec.bv_page) { + dec_zone_page_state(bvec->bv_page, NR_BOUNCE); + mempool_free(bvec->bv_page, pool); + } + bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len); } bio_orig->bi_status = bio->bi_status; -- cgit v1.2.3 From 3c892a098b0bfa3e571f1f0d2a7e72fbaeea691a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:07 +0800 Subject: block: bounce: don't access bio->bi_io_vec in copy_to_high_bio_irq Firstly this patch introduces BVEC_ITER_ALL_INIT for iterating one bio from start to end. As we need to support multipage bvecs, don't access bio->bi_io_vec in copy_to_high_bio_irq(), and just use the standard iterator for that. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bounce.c | 16 +++++++++++----- include/linux/bvec.h | 9 +++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/block/bounce.c b/block/bounce.c index 0274c31d6c05..c35a3d7f0528 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -113,24 +113,30 @@ int init_emergency_isa_pool(void) static void copy_to_high_bio_irq(struct bio *to, struct bio *from) { unsigned char *vfrom; - struct bio_vec tovec, *fromvec = from->bi_io_vec; + struct bio_vec tovec, fromvec; struct bvec_iter iter; + /* + * The bio of @from is created by bounce, so we can iterate + * its bvec from start to end, but the @from->bi_iter can't be + * trusted because it might be changed by splitting. + */ + struct bvec_iter from_iter = BVEC_ITER_ALL_INIT; bio_for_each_segment(tovec, to, iter) { - if (tovec.bv_page != fromvec->bv_page) { + fromvec = bio_iter_iovec(from, from_iter); + if (tovec.bv_page != fromvec.bv_page) { /* * fromvec->bv_offset and fromvec->bv_len might have * been modified by the block layer, so use the original * copy, bounce_copy_vec already uses tovec->bv_len */ - vfrom = page_address(fromvec->bv_page) + + vfrom = page_address(fromvec.bv_page) + tovec.bv_offset; bounce_copy_vec(&tovec, vfrom); flush_dcache_page(tovec.bv_page); } - - fromvec++; + bio_advance_iter(from, &from_iter, tovec.bv_len); } } diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ec8a4d7af6bd..fe7a22dd133b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -125,4 +125,13 @@ static inline bool bvec_iter_rewind(const struct bio_vec *bv, ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) +/* for iterating one bio from start to end */ +#define BVEC_ITER_ALL_INIT (struct bvec_iter) \ +{ \ + .bi_sector = 0, \ + .bi_size = UINT_MAX, \ + .bi_idx = 0, \ + .bi_bvec_done = 0, \ +} + #endif /* __LINUX_BVEC_ITER_H */ -- cgit v1.2.3 From 8f50e358153dd68182c714626be4a90b64179cf4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:08 +0800 Subject: dm: limit the max bio size as BIO_MAX_PAGES * PAGE_SIZE For BIO based DM, some targets aren't ready for dealing with bigger incoming bio than 1Mbyte, such as crypt target. Cc: Mike Snitzer Cc:dm-devel@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/md/dm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index de17b7193299..7475739fee49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -920,7 +920,15 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) return -EINVAL; } - ti->max_io_len = (uint32_t) len; + /* + * BIO based queue uses its own splitting. When multipage bvecs + * is switched on, size of the incoming bio may be too big to + * be handled in some targets, such as crypt. + * + * When these targets are ready for the big bio, we can remove + * the limit. + */ + ti->max_io_len = min_t(uint32_t, len, BIO_MAX_PAGES * PAGE_SIZE); return 0; } -- cgit v1.2.3 From c2421edf5f9151d0eb28affbf76e9e4f8ddd03c6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:09 +0800 Subject: bcache: comment on direct access to bvec table All direct access to bvec table are safe even after multipage bvec is supported. Cc: linux-bcache@vger.kernel.org Acked-by: Coly Li Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 1 + drivers/md/bcache/util.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 81e8dc3dbe5e..02a4cf646fdc 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -432,6 +432,7 @@ static void do_btree_node_write(struct btree *b) continue_at(cl, btree_node_write_done, NULL); } else { + /* No problem for multipage bvec since the bio is just allocated */ b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index e548b8b51322..61813d230015 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -249,6 +249,13 @@ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) : 0; } +/* + * Generally it isn't good to access .bi_io_vec and .bi_vcnt directly, + * the preferred way is bio_add_page, but in this case, bch_bio_map() + * supposes that the bvec table is empty, so it is safe to access + * .bi_vcnt & .bi_io_vec in this way even after multipage bvec is + * supported. + */ void bch_bio_map(struct bio *bio, void *base) { size_t size = bio->bi_iter.bi_size; -- cgit v1.2.3 From 25d8be77e19224d8f21b363d77b5283c5dc21a57 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:10 +0800 Subject: block: move bio_alloc_pages() to bcache bcache is the only user of bio_alloc_pages(), so move this function into bcache, and avoid it being misused in the future. Also rename it to bch_bio_allo_pages() since it is bcache only. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/bio.c | 28 ---------------------------- drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/movinggc.c | 2 +- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/util.c | 27 +++++++++++++++++++++++++++ drivers/md/bcache/util.h | 1 + drivers/md/bcache/writeback.c | 2 +- include/linux/bio.h | 1 - 9 files changed, 33 insertions(+), 34 deletions(-) diff --git a/block/bio.c b/block/bio.c index 8bfdea58159b..fe1efbeaf4aa 100644 --- a/block/bio.c +++ b/block/bio.c @@ -968,34 +968,6 @@ void bio_advance(struct bio *bio, unsigned bytes) } EXPORT_SYMBOL(bio_advance); -/** - * bio_alloc_pages - allocates a single page for each bvec in a bio - * @bio: bio to allocate pages for - * @gfp_mask: flags for allocation - * - * Allocates pages up to @bio->bi_vcnt. - * - * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are - * freed. - */ -int bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) -{ - int i; - struct bio_vec *bv; - - bio_for_each_segment_all(bv, bio, i) { - bv->bv_page = alloc_page(gfp_mask); - if (!bv->bv_page) { - while (--bv >= bio->bi_io_vec) - __free_page(bv->bv_page); - return -ENOMEM; - } - } - - return 0; -} -EXPORT_SYMBOL(bio_alloc_pages); - /** * bio_copy_data - copy contents of data buffers from one chain of bios to * another diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 02a4cf646fdc..ebb1874218e7 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -419,7 +419,7 @@ static void do_btree_node_write(struct btree *b) SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_sector_offset(&b->keys, i)); - if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { + if (!bch_bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index c7a02c4900da..879ab21074c6 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -116,7 +116,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) return; check->bi_opf = REQ_OP_READ; - if (bio_alloc_pages(check, GFP_NOIO)) + if (bch_bio_alloc_pages(check, GFP_NOIO)) goto out_put; submit_bio_wait(check); diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index d50c1c97da68..a24c3a95b2c0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -162,7 +162,7 @@ static void read_moving(struct cache_set *c) bio_set_op_attrs(bio, REQ_OP_READ, 0); bio->bi_end_io = read_moving_endio; - if (bio_alloc_pages(bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(bio, GFP_KERNEL)) goto err; trace_bcache_gc_copy(&w->key); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 643c3021624f..c493fb947dc9 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -841,7 +841,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, cache_bio->bi_private = &s->cl; bch_bio_map(cache_bio, NULL); - if (bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO)) + if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO)) goto out_put; if (reada) diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 61813d230015..a23cd6a14b74 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -283,6 +283,33 @@ start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset, } } +/** + * bch_bio_alloc_pages - allocates a single page for each bvec in a bio + * @bio: bio to allocate pages for + * @gfp_mask: flags for allocation + * + * Allocates pages up to @bio->bi_vcnt. + * + * Returns 0 on success, -ENOMEM on failure. On failure, any allocated pages are + * freed. + */ +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask) +{ + int i; + struct bio_vec *bv; + + bio_for_each_segment_all(bv, bio, i) { + bv->bv_page = alloc_page(gfp_mask); + if (!bv->bv_page) { + while (--bv >= bio->bi_io_vec) + __free_page(bv->bv_page); + return -ENOMEM; + } + } + + return 0; +} + /* * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any * use permitted, subject to terms of PostgreSQL license; see.) diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ed5e8a412eb8..4df4c5c1cab2 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -558,6 +558,7 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) } void bch_bio_map(struct bio *bio, void *base); +int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask); static inline sector_t bdev_sectors(struct block_device *bdev) { diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 56a37884ca8b..1ac2af6128b1 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -278,7 +278,7 @@ static void read_dirty(struct cached_dev *dc) bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); io->bio.bi_end_io = read_dirty_endio; - if (bio_alloc_pages(&io->bio, GFP_KERNEL)) + if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) goto err_free; trace_bcache_writeback(&w->key); diff --git a/include/linux/bio.h b/include/linux/bio.h index 435ddf04e889..367a979fd4a6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -500,7 +500,6 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); -extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, -- cgit v1.2.3 From a0b60d725e54f1caba4f5dc0dfef68040bcf9a8e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:11 +0800 Subject: btrfs: avoid access to .bi_vcnt directly BTRFS uses bio->bi_vcnt to figure out page numbers, this approach is no longer valid once we start enabling multipage bvecs. correct once we start to enable multipage bvec. Use bio_nr_pages() to do that instead. Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: linux-btrfs@vger.kernel.org Acked-by: David Sterba Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/btrfs/extent_io.c | 9 +++++---- fs/btrfs/extent_io.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 69cd63d4503d..d43360b33ef6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2257,7 +2257,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return 0; } -bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, +bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages, struct io_failure_record *failrec, int failed_mirror) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -2281,7 +2281,7 @@ bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, * a) deliver good data to the caller * b) correct the bad sectors on disk */ - if (failed_bio->bi_vcnt > 1) { + if (failed_bio_pages > 1) { /* * to fulfill b), we need to know the exact failing sectors, as * we don't want to rewrite any more than the failed ones. thus, @@ -2374,6 +2374,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, int read_mode = 0; blk_status_t status; int ret; + unsigned failed_bio_pages = bio_pages_all(failed_bio); BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2381,13 +2382,13 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, if (ret) return ret; - if (!btrfs_check_repairable(inode, failed_bio, failrec, + if (!btrfs_check_repairable(inode, failed_bio_pages, failrec, failed_mirror)) { free_io_failure(failure_tree, tree, failrec); return -EIO; } - if (failed_bio->bi_vcnt > 1) + if (failed_bio_pages > 1) read_mode |= REQ_FAILFAST_DEV; phy_offset >>= inode->i_sb->s_blocksize_bits; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 93dcae0c3183..20854d63c75b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -540,7 +540,7 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end); int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, struct io_failure_record **failrec_ret); -bool btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, +bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages, struct io_failure_record *failrec, int fail_mirror); struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec, -- cgit v1.2.3 From c16a8ac3c021f454550f851f5e0772b29a5125b0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:12 +0800 Subject: btrfs: avoid accessing bvec table directly for a cloned bio Commit 17347cec15f919901c90(Btrfs: change how we iterate bios in endio) mentioned that for dio the submitted bio may be fast cloned, we can't access the bvec table directly for a cloned bio, so use bio_get_first_bvec() to retrieve the 1st bvec. Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: linux-btrfs@vger.kernel.org Cc: Liu Bo Reviewed-by: Liu Bo Acked: David Sterba Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d5cb6e93c80..cb1e2d201434 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8015,6 +8015,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, int segs; int ret; blk_status_t status; + struct bio_vec bvec; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -8030,8 +8031,9 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio, } segs = bio_segments(failed_bio); + bio_get_first_bvec(failed_bio, &bvec); if (segs > 1 || - (failed_bio->bi_io_vec->bv_len > btrfs_inode_sectorsize(inode))) + (bvec.bv_len > btrfs_inode_sectorsize(inode))) read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; -- cgit v1.2.3 From 92681eca6104d2ec2fdf9fc65f529deb226ab0a1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:13 +0800 Subject: dm-crypt: don't clear bvec->bv_page in crypt_free_buffer_pages() The bio is always freed after running crypt_free_buffer_pages(), so it isn't necessary to clear bv->bv_page. Cc: Mike Snitzer Cc:dm-devel@redhat.com Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/md/dm-crypt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9fc12f556534..48332666fc38 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1446,7 +1446,6 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) bio_for_each_segment_all(bv, clone, i) { BUG_ON(!bv->bv_page); mempool_free(bv->bv_page, cc->page_pool); - bv->bv_page = NULL; } } -- cgit v1.2.3 From 6a501bf0807b5dc024fe52a4f956800a352c39ab Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:14 +0800 Subject: blk-merge: compute bio->bi_seg_front_size efficiently It is enough to check and compute bio->bi_seg_front_size just after the 1st segment is found, but current code checks that for each bvec, which is inefficient. This patch follows the way in __blk_recalc_rq_segments() for computing bio->bi_seg_front_size, and it is more efficient and code becomes more readable too. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index f5dedd57dff6..a476337a8ff4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -146,22 +146,21 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, bvprvp = &bvprv; sectors += bv.bv_len >> 9; - if (nsegs == 1 && seg_size > front_seg_size) - front_seg_size = seg_size; continue; } new_segment: if (nsegs == queue_max_segments(q)) goto split; + if (nsegs == 1 && seg_size > front_seg_size) + front_seg_size = seg_size; + nsegs++; bvprv = bv; bvprvp = &bvprv; seg_size = bv.bv_len; sectors += bv.bv_len >> 9; - if (nsegs == 1 && seg_size > front_seg_size) - front_seg_size = seg_size; } do_split = false; @@ -174,6 +173,8 @@ split: bio = new; } + if (nsegs == 1 && seg_size > front_seg_size) + front_seg_size = seg_size; bio->bi_seg_front_size = front_seg_size; if (seg_size > bio->bi_seg_back_size) bio->bi_seg_back_size = seg_size; -- cgit v1.2.3 From a2d37968d784363842f87820a21e106741d28004 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:15 +0800 Subject: block: blk-merge: try to make front segments in full size When merging one bvec into segment, if the bvec is too big to merge, current policy is to move the whole bvec into another new segment. This patchset changes the policy into trying to maximize size of front segments, that means in above situation, part of bvec is merged into current segment, and the remainder is put into next segment. This patch prepares for support multipage bvec because it can be quite common to see this case and we should try to make front segments in full size. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index a476337a8ff4..ca2e7aec8e77 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -109,6 +109,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, bool do_split = true; struct bio *new = NULL; const unsigned max_sectors = get_max_io_size(q, bio); + unsigned advance = 0; bio_for_each_segment(bv, bio, iter) { /* @@ -134,12 +135,32 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, } if (bvprvp && blk_queue_cluster(q)) { - if (seg_size + bv.bv_len > queue_max_segment_size(q)) - goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) goto new_segment; + if (seg_size + bv.bv_len > queue_max_segment_size(q)) { + /* + * One assumption is that initial value of + * @seg_size(equals to bv.bv_len) won't be + * bigger than max segment size, but this + * becomes false after multipage bvecs. + */ + advance = queue_max_segment_size(q) - seg_size; + + if (advance > 0) { + seg_size += advance; + sectors += advance >> 9; + bv.bv_len -= advance; + bv.bv_offset += advance; + } + + /* + * Still need to put remainder of current + * bvec into a new segment. + */ + goto new_segment; + } seg_size += bv.bv_len; bvprv = bv; @@ -161,6 +182,12 @@ new_segment: seg_size = bv.bv_len; sectors += bv.bv_len >> 9; + /* restore the bvec for iterator */ + if (advance) { + bv.bv_len += advance; + bv.bv_offset -= advance; + advance = 0; + } } do_split = false; @@ -361,16 +388,29 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, { int nbytes = bvec->bv_len; + unsigned advance = 0; if (*sg && *cluster) { - if ((*sg)->length + nbytes > queue_max_segment_size(q)) - goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; + /* + * try best to merge part of the bvec into previous + * segment and follow same policy with + * blk_bio_segment_split() + */ + if ((*sg)->length + nbytes > queue_max_segment_size(q)) { + advance = queue_max_segment_size(q) - (*sg)->length; + if (advance) { + (*sg)->length += advance; + bvec->bv_offset += advance; + bvec->bv_len -= advance; + } + goto new_segment; + } + (*sg)->length += nbytes; } else { new_segment: @@ -393,6 +433,10 @@ new_segment: sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); (*nsegs)++; + + /* for making iterator happy */ + bvec->bv_offset -= advance; + bvec->bv_len += advance; } *bvprv = *bvec; } -- cgit v1.2.3 From cf8c0c6a3830583bd0e7c94933e155bf97cd162b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 20:22:16 +0800 Subject: block: blk-merge: remove unnecessary check In this case, 'sectors' can't be zero at all, so remove the check and let the bio be split. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index ca2e7aec8e77..446f63e076aa 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -129,9 +129,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, nsegs++; sectors = max_sectors; } - if (sectors) - goto split; - /* Make this single bvec as the 1st segment */ + goto split; } if (bvprvp && blk_queue_cluster(q)) { -- cgit v1.2.3 From 91f7b74aca363da5f294c9a24e870ae6ba3a27d3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Dec 2017 13:11:17 +0100 Subject: DAC960: split up ioctl function to reduce stack size When CONFIG_KASAN is set, all the local variables in this function are allocated on the stack together, leading to a warning about possible kernel stack overflow: drivers/block/DAC960.c: In function 'DAC960_gam_ioctl': drivers/block/DAC960.c:7061:1: error: the frame size of 2240 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] By splitting up the function into smaller chunks, we can avoid that and make the code slightly more readable at the same time. The coding style in this file is completely nonstandard, and I chose to not touch that at all, leaving the unconventional intendation unchanged to make it easier to review the diff. Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/DAC960.c | 160 +++++++++++++++++++++++++++---------------------- 1 file changed, 90 insertions(+), 70 deletions(-) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 442e777bdfb2..728075214959 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -6619,43 +6619,27 @@ static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller) #ifdef DAC960_GAM_MINOR -/* - * DAC960_gam_ioctl is the ioctl function for performing RAID operations. -*/ - -static long DAC960_gam_ioctl(struct file *file, unsigned int Request, - unsigned long Argument) +static long DAC960_gam_get_controller_info(DAC960_ControllerInfo_T __user *UserSpaceControllerInfo) { - long ErrorCode = 0; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - - mutex_lock(&DAC960_mutex); - switch (Request) - { - case DAC960_IOCTL_GET_CONTROLLER_COUNT: - ErrorCode = DAC960_ControllerCount; - break; - case DAC960_IOCTL_GET_CONTROLLER_INFO: - { - DAC960_ControllerInfo_T __user *UserSpaceControllerInfo = - (DAC960_ControllerInfo_T __user *) Argument; DAC960_ControllerInfo_T ControllerInfo; DAC960_Controller_T *Controller; int ControllerNumber; + long ErrorCode; + if (UserSpaceControllerInfo == NULL) ErrorCode = -EINVAL; else ErrorCode = get_user(ControllerNumber, &UserSpaceControllerInfo->ControllerNumber); if (ErrorCode != 0) - break; + goto out; ErrorCode = -ENXIO; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) { - break; + goto out; } Controller = DAC960_Controllers[ControllerNumber]; if (Controller == NULL) - break; + goto out; memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T)); ControllerInfo.ControllerNumber = ControllerNumber; ControllerInfo.FirmwareType = Controller->FirmwareType; @@ -6670,12 +6654,12 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion); ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo, sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0); - break; - } - case DAC960_IOCTL_V1_EXECUTE_COMMAND: - { - DAC960_V1_UserCommand_T __user *UserSpaceUserCommand = - (DAC960_V1_UserCommand_T __user *) Argument; +out: + return ErrorCode; +} + +static long DAC960_gam_v1_execute_command(DAC960_V1_UserCommand_T __user *UserSpaceUserCommand) +{ DAC960_V1_UserCommand_T UserCommand; DAC960_Controller_T *Controller; DAC960_Command_T *Command = NULL; @@ -6688,39 +6672,41 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, int ControllerNumber, DataTransferLength; unsigned char *DataTransferBuffer = NULL; dma_addr_t DataTransferBufferDMA; + long ErrorCode; + if (UserSpaceUserCommand == NULL) { ErrorCode = -EINVAL; - break; + goto out; } if (copy_from_user(&UserCommand, UserSpaceUserCommand, sizeof(DAC960_V1_UserCommand_T))) { ErrorCode = -EFAULT; - break; + goto out; } ControllerNumber = UserCommand.ControllerNumber; ErrorCode = -ENXIO; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) - break; + goto out; Controller = DAC960_Controllers[ControllerNumber]; if (Controller == NULL) - break; + goto out; ErrorCode = -EINVAL; if (Controller->FirmwareType != DAC960_V1_Controller) - break; + goto out; CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode; DataTransferLength = UserCommand.DataTransferLength; if (CommandOpcode & 0x80) - break; + goto out; if (CommandOpcode == DAC960_V1_DCDB) { if (copy_from_user(&DCDB, UserCommand.DCDB, sizeof(DAC960_V1_DCDB_T))) { ErrorCode = -EFAULT; - break; + goto out; } if (DCDB.Channel >= DAC960_V1_MaxChannels) - break; + goto out; if (!((DataTransferLength == 0 && DCDB.Direction == DAC960_V1_DCDB_NoDataTransfer) || @@ -6730,15 +6716,15 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, (DataTransferLength < 0 && DCDB.Direction == DAC960_V1_DCDB_DataTransferSystemToDevice))) - break; + goto out; if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength) != abs(DataTransferLength)) - break; + goto out; DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA); if (DCDB_IOBUF == NULL) { ErrorCode = -ENOMEM; - break; + goto out; } } ErrorCode = -ENOMEM; @@ -6748,19 +6734,19 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, DataTransferLength, &DataTransferBufferDMA); if (DataTransferBuffer == NULL) - break; + goto out; } else if (DataTransferLength < 0) { DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, -DataTransferLength, &DataTransferBufferDMA); if (DataTransferBuffer == NULL) - break; + goto out; if (copy_from_user(DataTransferBuffer, UserCommand.DataTransferBuffer, -DataTransferLength)) { ErrorCode = -EFAULT; - break; + goto out; } } if (CommandOpcode == DAC960_V1_DCDB) @@ -6837,12 +6823,12 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, if (DCDB_IOBUF != NULL) pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T), DCDB_IOBUF, DCDB_IOBUFDMA); - break; - } - case DAC960_IOCTL_V2_EXECUTE_COMMAND: - { - DAC960_V2_UserCommand_T __user *UserSpaceUserCommand = - (DAC960_V2_UserCommand_T __user *) Argument; + out: + return ErrorCode; +} + +static long DAC960_gam_v2_execute_command(DAC960_V2_UserCommand_T __user *UserSpaceUserCommand) +{ DAC960_V2_UserCommand_T UserCommand; DAC960_Controller_T *Controller; DAC960_Command_T *Command = NULL; @@ -6855,26 +6841,26 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, dma_addr_t DataTransferBufferDMA; unsigned char *RequestSenseBuffer = NULL; dma_addr_t RequestSenseBufferDMA; + long ErrorCode = -EINVAL; - ErrorCode = -EINVAL; if (UserSpaceUserCommand == NULL) - break; + goto out; if (copy_from_user(&UserCommand, UserSpaceUserCommand, sizeof(DAC960_V2_UserCommand_T))) { ErrorCode = -EFAULT; - break; + goto out; } ErrorCode = -ENXIO; ControllerNumber = UserCommand.ControllerNumber; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) - break; + goto out; Controller = DAC960_Controllers[ControllerNumber]; if (Controller == NULL) - break; + goto out; if (Controller->FirmwareType != DAC960_V2_Controller){ ErrorCode = -EINVAL; - break; + goto out; } DataTransferLength = UserCommand.DataTransferLength; ErrorCode = -ENOMEM; @@ -6884,14 +6870,14 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, DataTransferLength, &DataTransferBufferDMA); if (DataTransferBuffer == NULL) - break; + goto out; } else if (DataTransferLength < 0) { DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice, -DataTransferLength, &DataTransferBufferDMA); if (DataTransferBuffer == NULL) - break; + goto out; if (copy_from_user(DataTransferBuffer, UserCommand.DataTransferBuffer, -DataTransferLength)) { @@ -7001,42 +6987,44 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, if (RequestSenseBuffer != NULL) pci_free_consistent(Controller->PCIDevice, RequestSenseLength, RequestSenseBuffer, RequestSenseBufferDMA); - break; - } - case DAC960_IOCTL_V2_GET_HEALTH_STATUS: - { - DAC960_V2_GetHealthStatus_T __user *UserSpaceGetHealthStatus = - (DAC960_V2_GetHealthStatus_T __user *) Argument; +out: + return ErrorCode; +} + +static long DAC960_gam_v2_get_health_status(DAC960_V2_GetHealthStatus_T __user *UserSpaceGetHealthStatus) +{ DAC960_V2_GetHealthStatus_T GetHealthStatus; DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer; DAC960_Controller_T *Controller; int ControllerNumber; + long ErrorCode; + if (UserSpaceGetHealthStatus == NULL) { ErrorCode = -EINVAL; - break; + goto out; } if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus, sizeof(DAC960_V2_GetHealthStatus_T))) { ErrorCode = -EFAULT; - break; + goto out; } ErrorCode = -ENXIO; ControllerNumber = GetHealthStatus.ControllerNumber; if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) - break; + goto out; Controller = DAC960_Controllers[ControllerNumber]; if (Controller == NULL) - break; + goto out; if (Controller->FirmwareType != DAC960_V2_Controller) { ErrorCode = -EINVAL; - break; + goto out; } if (copy_from_user(&HealthStatusBuffer, GetHealthStatus.HealthStatusBuffer, sizeof(DAC960_V2_HealthStatusBuffer_T))) { ErrorCode = -EFAULT; - break; + goto out; } ErrorCode = wait_event_interruptible_timeout(Controller->HealthStatusWaitQueue, !(Controller->V2.HealthStatusBuffer->StatusChangeCounter @@ -7046,7 +7034,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, DAC960_MonitoringTimerInterval); if (ErrorCode == -ERESTARTSYS) { ErrorCode = -EINTR; - break; + goto out; } if (copy_to_user(GetHealthStatus.HealthStatusBuffer, Controller->V2.HealthStatusBuffer, @@ -7054,7 +7042,39 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, ErrorCode = -EFAULT; else ErrorCode = 0; - } + +out: + return ErrorCode; +} + +/* + * DAC960_gam_ioctl is the ioctl function for performing RAID operations. +*/ + +static long DAC960_gam_ioctl(struct file *file, unsigned int Request, + unsigned long Argument) +{ + long ErrorCode = 0; + void __user *argp = (void __user *)Argument; + if (!capable(CAP_SYS_ADMIN)) return -EACCES; + + mutex_lock(&DAC960_mutex); + switch (Request) + { + case DAC960_IOCTL_GET_CONTROLLER_COUNT: + ErrorCode = DAC960_ControllerCount; + break; + case DAC960_IOCTL_GET_CONTROLLER_INFO: + ErrorCode = DAC960_gam_get_controller_info(argp); + break; + case DAC960_IOCTL_V1_EXECUTE_COMMAND: + ErrorCode = DAC960_gam_v1_execute_command(argp); + break; + case DAC960_IOCTL_V2_EXECUTE_COMMAND: + ErrorCode = DAC960_gam_v2_execute_command(argp); + break; + case DAC960_IOCTL_V2_GET_HEALTH_STATUS: + ErrorCode = DAC960_gam_v2_get_health_status(argp); break; default: ErrorCode = -ENOTTY; -- cgit v1.2.3 From bbbc3c1cfaf6900d24e3c9fcaac25d267ad2bc40 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Tue, 5 Dec 2017 07:23:19 -0500 Subject: writeback: update comment in inode_io_list_move_locked The @head can be wb->b_dirty_time, so update the comment. Acked-by: Tejun Heo Signed-off-by: Wang Long Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cea4836385b7..d4d04fee568a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -126,7 +126,7 @@ static void wb_io_lists_depopulated(struct bdi_writeback *wb) * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list * @inode: inode to be moved * @wb: target bdi_writeback - * @head: one of @wb->b_{dirty|io|more_io} + * @head: one of @wb->b_{dirty|io|more_io|dirty_time} * * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io. * Returns %true if @inode is the first occupant of the !dirty_time IO -- cgit v1.2.3 From e80a0af4759a164214f02da157a3800753ce135f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:46 -0800 Subject: lib/scatterlist: Introduce sgl_alloc() and sgl_free() Many kernel drivers contain code that allocates and frees both a scatterlist and the pages that populate that scatterlist. Introduce functions in lib/scatterlist.c that perform these tasks instead of duplicating this functionality in multiple drivers. Only include these functions in the build if CONFIG_SGL_ALLOC=y to avoid that the kernel size increases if this functionality is not used. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 10 +++++ lib/Kconfig | 4 ++ lib/scatterlist.c | 105 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b7c83254c566..b8a7c1d1dbe3 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -276,6 +276,16 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); +#ifdef CONFIG_SGL_ALLOC +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p); +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p); +void sgl_free_order(struct scatterlist *sgl, int order); +void sgl_free(struct scatterlist *sgl); +#endif /* CONFIG_SGL_ALLOC */ + size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); diff --git a/lib/Kconfig b/lib/Kconfig index c5e84fbcb30b..4dd5c11366f9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -409,6 +409,10 @@ config HAS_DMA depends on !NO_DMA default y +config SGL_ALLOC + bool + default n + config DMA_NOOP_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7c1c55f7daaa..9afc9b432083 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -474,6 +474,111 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +#ifdef CONFIG_SGL_ALLOC + +/** + * sgl_alloc_order - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist. Must be at least one + * @order: Second argument for alloc_pages() + * @chainable: Whether or not to allocate an extra element in the scatterlist + * for scatterlist chaining purposes + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist that have pages + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p) +{ + struct scatterlist *sgl, *sg; + struct page *page; + unsigned int nent, nalloc; + u32 elem_len; + + nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); + /* Check for integer overflow */ + if (length > (nent << (PAGE_SHIFT + order))) + return NULL; + nalloc = nent; + if (chainable) { + /* Check for integer overflow */ + if (nalloc + 1 < nalloc) + return NULL; + nalloc++; + } + sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), + (gfp & ~GFP_DMA) | __GFP_ZERO); + if (!sgl) + return NULL; + + sg_init_table(sgl, nent); + sg = sgl; + while (length) { + elem_len = min_t(u64, length, PAGE_SIZE << order); + page = alloc_pages(gfp, order); + if (!page) { + sgl_free(sgl); + return NULL; + } + + sg_set_page(sg, page, elem_len, 0); + length -= elem_len; + sg = sg_next(sg); + } + WARN_ON_ONCE(sg); + if (nent_p) + *nent_p = nent; + return sgl; +} +EXPORT_SYMBOL(sgl_alloc_order); + +/** + * sgl_alloc - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p) +{ + return sgl_alloc_order(length, 0, false, gfp, nent_p); +} +EXPORT_SYMBOL(sgl_alloc); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + struct scatterlist *sg; + struct page *page; + + for (sg = sgl; sg; sg = sg_next(sg)) { + page = sg_page(sg); + if (page) + __free_pages(page, order); + } + kfree(sgl); +} +EXPORT_SYMBOL(sgl_free_order); + +/** + * sgl_free - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + */ +void sgl_free(struct scatterlist *sgl) +{ + sgl_free_order(sgl, 0); +} +EXPORT_SYMBOL(sgl_free); + +#endif /* CONFIG_SGL_ALLOC */ + void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset) -- cgit v1.2.3 From 8cd579d2794b90f810e534e75783ba78cdc91a07 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:47 -0800 Subject: crypto: scompress - use sgl_alloc() and sgl_free() Use the sgl_alloc() and sgl_free() functions instead of open coding these functions. Signed-off-by: Bart Van Assche Acked-by: Ard Biesheuvel Cc: Herbert Xu Signed-off-by: Jens Axboe --- crypto/Kconfig | 1 + crypto/scompress.c | 51 ++------------------------------------------------- 2 files changed, 3 insertions(+), 49 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index f7911963bb79..20360e040425 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -106,6 +106,7 @@ config CRYPTO_KPP config CRYPTO_ACOMP2 tristate select CRYPTO_ALGAPI2 + select SGL_ALLOC config CRYPTO_ACOMP tristate diff --git a/crypto/scompress.c b/crypto/scompress.c index 2075e2c4e7df..968bbcf65c94 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -140,53 +140,6 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) return ret; } -static void crypto_scomp_sg_free(struct scatterlist *sgl) -{ - int i, n; - struct page *page; - - if (!sgl) - return; - - n = sg_nents(sgl); - for_each_sg(sgl, sgl, n, i) { - page = sg_page(sgl); - if (page) - __free_page(page); - } - - kfree(sgl); -} - -static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp) -{ - struct scatterlist *sgl; - struct page *page; - int i, n; - - n = ((size - 1) >> PAGE_SHIFT) + 1; - - sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp); - if (!sgl) - return NULL; - - sg_init_table(sgl, n); - - for (i = 0; i < n; i++) { - page = alloc_page(gfp); - if (!page) - goto err; - sg_set_page(sgl + i, page, PAGE_SIZE, 0); - } - - return sgl; - -err: - sg_mark_end(sgl + i); - crypto_scomp_sg_free(sgl); - return NULL; -} - static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); @@ -220,7 +173,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) scratch_dst, &req->dlen, *ctx); if (!ret) { if (!req->dst) { - req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC); + req->dst = sgl_alloc(req->dlen, GFP_ATOMIC, NULL); if (!req->dst) goto out; } @@ -274,7 +227,7 @@ int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) crt->compress = scomp_acomp_compress; crt->decompress = scomp_acomp_decompress; - crt->dst_free = crypto_scomp_sg_free; + crt->dst_free = sgl_free; crt->reqsize = sizeof(void *); return 0; -- cgit v1.2.3 From 4442b56fb5151e9a7e21c0f73aba5a071f559dce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:48 -0800 Subject: nvmet/fc: Use sgl_alloc() and sgl_free() Use the sgl_alloc() and sgl_free() functions instead of open coding these functions. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Cc: Keith Busch Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/Kconfig | 1 + drivers/nvme/target/fc.c | 36 ++---------------------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 03e4ab65fe77..4d9715630e21 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -39,6 +39,7 @@ config NVME_TARGET_FC tristate "NVMe over Fabrics FC target driver" depends on NVME_TARGET depends on HAS_DMA + select SGL_ALLOC help This enables the NVMe FC target support, which allows exporting NVMe devices over FC. diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 5fd86039e353..840d1a39de33 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1697,31 +1697,12 @@ static int nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) { struct scatterlist *sg; - struct page *page; unsigned int nent; - u32 page_len, length; - int i = 0; - length = fod->req.transfer_len; - nent = DIV_ROUND_UP(length, PAGE_SIZE); - sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL); + sg = sgl_alloc(fod->req.transfer_len, GFP_KERNEL, &nent); if (!sg) goto out; - sg_init_table(sg, nent); - - while (length) { - page_len = min_t(u32, length, PAGE_SIZE); - - page = alloc_page(GFP_KERNEL); - if (!page) - goto out_free_pages; - - sg_set_page(&sg[i], page, page_len, 0); - length -= page_len; - i++; - } - fod->data_sg = sg; fod->data_sg_cnt = nent; fod->data_sg_cnt = fc_dma_map_sg(fod->tgtport->dev, sg, nent, @@ -1731,14 +1712,6 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod) return 0; -out_free_pages: - while (i > 0) { - i--; - __free_page(sg_page(&sg[i])); - } - kfree(sg); - fod->data_sg = NULL; - fod->data_sg_cnt = 0; out: return NVME_SC_INTERNAL; } @@ -1746,18 +1719,13 @@ out: static void nvmet_fc_free_tgt_pgs(struct nvmet_fc_fcp_iod *fod) { - struct scatterlist *sg; - int count; - if (!fod->data_sg || !fod->data_sg_cnt) return; fc_dma_unmap_sg(fod->tgtport->dev, fod->data_sg, fod->data_sg_cnt, ((fod->io_dir == NVMET_FCP_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE)); - for_each_sg(fod->data_sg, sg, fod->data_sg_cnt, count) - __free_page(sg_page(sg)); - kfree(fod->data_sg); + sgl_free(fod->data_sg); fod->data_sg = NULL; fod->data_sg_cnt = 0; } -- cgit v1.2.3 From 68c6e9cd2fa4f0109364834475628b4b1dd12257 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:49 -0800 Subject: nvmet/rdma: Use sgl_alloc() and sgl_free() Use the sgl_alloc() and sgl_free() functions instead of open coding these functions. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Cc: Keith Busch Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/target/Kconfig | 1 + drivers/nvme/target/rdma.c | 63 +++------------------------------------------ 2 files changed, 5 insertions(+), 59 deletions(-) diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 4d9715630e21..5f4f8b16685f 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -29,6 +29,7 @@ config NVME_TARGET_RDMA tristate "NVMe over Fabrics RDMA target support" depends on INFINIBAND depends on NVME_TARGET + select SGL_ALLOC help This enables the NVMe RDMA target support, which allows exporting NVMe devices over RDMA. diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 49912909c298..0e4c15754c58 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -185,59 +185,6 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); } -static void nvmet_rdma_free_sgl(struct scatterlist *sgl, unsigned int nents) -{ - struct scatterlist *sg; - int count; - - if (!sgl || !nents) - return; - - for_each_sg(sgl, sg, nents, count) - __free_page(sg_page(sg)); - kfree(sgl); -} - -static int nvmet_rdma_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, - u32 length) -{ - struct scatterlist *sg; - struct page *page; - unsigned int nent; - int i = 0; - - nent = DIV_ROUND_UP(length, PAGE_SIZE); - sg = kmalloc_array(nent, sizeof(struct scatterlist), GFP_KERNEL); - if (!sg) - goto out; - - sg_init_table(sg, nent); - - while (length) { - u32 page_len = min_t(u32, length, PAGE_SIZE); - - page = alloc_page(GFP_KERNEL); - if (!page) - goto out_free_pages; - - sg_set_page(&sg[i], page, page_len, 0); - length -= page_len; - i++; - } - *sgl = sg; - *nents = nent; - return 0; - -out_free_pages: - while (i > 0) { - i--; - __free_page(sg_page(&sg[i])); - } - kfree(sg); -out: - return NVME_SC_INTERNAL; -} - static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c, bool admin) { @@ -484,7 +431,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) } if (rsp->req.sg != &rsp->cmd->inline_sg) - nvmet_rdma_free_sgl(rsp->req.sg, rsp->req.sg_cnt); + sgl_free(rsp->req.sg); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) nvmet_rdma_process_wr_wait_list(queue); @@ -621,16 +568,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, u32 len = get_unaligned_le24(sgl->length); u32 key = get_unaligned_le32(sgl->key); int ret; - u16 status; /* no data command? */ if (!len) return 0; - status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt, - len); - if (status) - return status; + rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt); + if (!rsp->req.sg) + return NVME_SC_INTERNAL; ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, -- cgit v1.2.3 From 14db49172649aac001fd77a3fd53d12c6df22daf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 08:26:50 -0800 Subject: target: Use sgl_alloc_order() and sgl_free() Use the sgl_alloc_order() and sgl_free() functions instead of open coding these functions. Signed-off-by: Bart Van Assche Acked-by: Nicholas A. Bellinger Reviewed-by: Hannes Reinecke Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/target/Kconfig | 1 + drivers/target/target_core_transport.c | 46 +++------------------------------- 2 files changed, 5 insertions(+), 42 deletions(-) diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig index e2bc99980f75..4c44d7bed01a 100644 --- a/drivers/target/Kconfig +++ b/drivers/target/Kconfig @@ -5,6 +5,7 @@ menuconfig TARGET_CORE select CONFIGFS_FS select CRC_T10DIF select BLK_SCSI_REQUEST # only for scsi_command_size_tbl.. + select SGL_ALLOC default n help Say Y or M here to enable the TCM Storage Engine and ConfigFS enabled diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 58caacd54a3b..a001ba711cca 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2300,13 +2300,7 @@ queue_full: void target_free_sgl(struct scatterlist *sgl, int nents) { - struct scatterlist *sg; - int count; - - for_each_sg(sgl, sg, nents, count) - __free_page(sg_page(sg)); - - kfree(sgl); + sgl_free(sgl); } EXPORT_SYMBOL(target_free_sgl); @@ -2414,42 +2408,10 @@ int target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length, bool zero_page, bool chainable) { - struct scatterlist *sg; - struct page *page; - gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0; - unsigned int nalloc, nent; - int i = 0; - - nalloc = nent = DIV_ROUND_UP(length, PAGE_SIZE); - if (chainable) - nalloc++; - sg = kmalloc_array(nalloc, sizeof(struct scatterlist), GFP_KERNEL); - if (!sg) - return -ENOMEM; + gfp_t gfp = GFP_KERNEL | (zero_page ? __GFP_ZERO : 0); - sg_init_table(sg, nalloc); - - while (length) { - u32 page_len = min_t(u32, length, PAGE_SIZE); - page = alloc_page(GFP_KERNEL | zero_flag); - if (!page) - goto out; - - sg_set_page(&sg[i], page, page_len, 0); - length -= page_len; - i++; - } - *sgl = sg; - *nents = nent; - return 0; - -out: - while (i > 0) { - i--; - __free_page(sg_page(&sg[i])); - } - kfree(sg); - return -ENOMEM; + *sgl = sgl_alloc_order(length, 0, chainable, gfp, nents); + return *sgl ? 0 : -ENOMEM; } EXPORT_SYMBOL(target_alloc_sgl); -- cgit v1.2.3 From ca11f209a4c88743fb4b652fd812470e6fecc598 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 6 Jan 2018 09:23:11 -0700 Subject: mq-deadline: make it clear that __dd_dispatch_request() works on all hw queues Don't pass in the hardware queue to __dd_dispatch_request(), since it leads the reader to believe that we are returning a request for that specific hardware queue. That's not how mq-deadline works, the state for determining which request to serve next is shared across all hardware queues for a device. Reviewed-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/mq-deadline.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index d56972e8ebda..c56f211c8440 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -267,9 +267,8 @@ deadline_next_request(struct deadline_data *dd, int data_dir) * deadline_dispatch_requests selects the best request according to * read/write expire, fifo_batch, etc */ -static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) +static struct request *__dd_dispatch_request(struct deadline_data *dd) { - struct deadline_data *dd = hctx->queue->elevator->elevator_data; struct request *rq, *next_rq; bool reads, writes; int data_dir; @@ -372,13 +371,19 @@ done: return rq; } +/* + * One confusing aspect here is that we get called for a specific + * hardware queue, but we return a request that may not be for a + * different hardware queue. This is because mq-deadline has shared + * state for all hardware queues, in terms of sorting, FIFOs, etc. + */ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; struct request *rq; spin_lock(&dd->lock); - rq = __dd_dispatch_request(hctx); + rq = __dd_dispatch_request(dd); spin_unlock(&dd->lock); return rq; -- cgit v1.2.3 From c2856ae2f315d754a0b6a268e4c6745b332b42e7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 6 Jan 2018 16:27:37 +0800 Subject: blk-mq: quiesce queue before freeing queue After queue is frozen, dispatch still may happen, for example: 1) requests are submitted from several contexts 2) requests from all these contexts are inserted to queue, but may dispatch to LLD in one of these paths, but other paths sill need to move on even all these requests are completed(that means blk_mq_freeze_queue_wait() returns at that time) 3) dispatch after queue freezing still moves on and causes use-after-free, because request queue is freed This patch quiesces queue after it is frozen, and makes sure all in-progress dispatch are completed. This patch fixes the following kernel crash when running heavy IOs vs. deleting device: [ 36.719251] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 36.720318] IP: kyber_has_work+0x14/0x40 [ 36.720847] PGD 254bf5067 P4D 254bf5067 PUD 255e6a067 PMD 0 [ 36.721584] Oops: 0000 [#1] PREEMPT SMP [ 36.722105] Dumping ftrace buffer: [ 36.722570] (ftrace buffer empty) [ 36.723057] Modules linked in: scsi_debug ebtable_filter ebtables ip6table_filter ip6_tables tcm_loop iscsi_target_mod target_core_file target_core_iblock target_core_pscsi target_core_mod xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c bridge stp llc fuse iptable_filter ip_tables sd_mod sg btrfs xor zstd_decompress zstd_compress xxhash raid6_pq mptsas mptscsih bcache crc32c_intel ahci mptbase libahci serio_raw scsi_transport_sas nvme libata shpchp lpc_ich virtio_scsi nvme_core binfmt_misc dm_mod iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi null_blk configs [ 36.733438] CPU: 2 PID: 2374 Comm: fio Not tainted 4.15.0-rc2.blk_mq_quiesce+ #714 [ 36.735143] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.9.3-1.fc25 04/01/2014 [ 36.736688] RIP: 0010:kyber_has_work+0x14/0x40 [ 36.737515] RSP: 0018:ffffc9000209bca0 EFLAGS: 00010202 [ 36.738431] RAX: 0000000000000008 RBX: ffff88025578bfc8 RCX: ffff880257bf4ed0 [ 36.739581] RDX: 0000000000000038 RSI: ffffffff81a98c6d RDI: ffff88025578bfc8 [ 36.740730] RBP: ffff880253cebfc8 R08: ffffc9000209bda0 R09: ffff8802554f3480 [ 36.741885] R10: ffffc9000209be60 R11: ffff880263f72538 R12: ffff88025573e9e8 [ 36.743036] R13: ffff88025578bfd0 R14: 0000000000000001 R15: 0000000000000000 [ 36.744189] FS: 00007f9b9bee67c0(0000) GS:ffff88027fc80000(0000) knlGS:0000000000000000 [ 36.746617] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.748483] CR2: 0000000000000008 CR3: 0000000254bf4001 CR4: 00000000003606e0 [ 36.750164] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 36.751455] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 36.752796] Call Trace: [ 36.753992] blk_mq_do_dispatch_sched+0x7f/0xe0 [ 36.755110] blk_mq_sched_dispatch_requests+0x119/0x190 [ 36.756179] __blk_mq_run_hw_queue+0x83/0x90 [ 36.757144] __blk_mq_delay_run_hw_queue+0xaf/0x110 [ 36.758046] blk_mq_run_hw_queue+0x24/0x70 [ 36.758845] blk_mq_flush_plug_list+0x1e7/0x270 [ 36.759676] blk_flush_plug_list+0xd6/0x240 [ 36.760463] blk_finish_plug+0x27/0x40 [ 36.761195] do_io_submit+0x19b/0x780 [ 36.761921] ? entry_SYSCALL_64_fastpath+0x1a/0x7d [ 36.762788] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 36.763639] RIP: 0033:0x7f9b9699f697 [ 36.764352] RSP: 002b:00007ffc10f991b8 EFLAGS: 00000206 ORIG_RAX: 00000000000000d1 [ 36.765773] RAX: ffffffffffffffda RBX: 00000000008f6f00 RCX: 00007f9b9699f697 [ 36.766965] RDX: 0000000000a5e6c0 RSI: 0000000000000001 RDI: 00007f9b8462a000 [ 36.768377] RBP: 0000000000000000 R08: 0000000000000001 R09: 00000000008f6420 [ 36.769649] R10: 00007f9b846e5000 R11: 0000000000000206 R12: 00007f9b795d6a70 [ 36.770807] R13: 00007f9b795e4140 R14: 00007f9b795e3fe0 R15: 0000000100000000 [ 36.771955] Code: 83 c7 10 e9 3f 68 d1 ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 97 b0 00 00 00 48 8d 42 08 48 83 c2 38 <48> 3b 00 74 06 b8 01 00 00 00 c3 48 3b 40 08 75 f4 48 83 c0 10 [ 36.775004] RIP: kyber_has_work+0x14/0x40 RSP: ffffc9000209bca0 [ 36.776012] CR2: 0000000000000008 [ 36.776690] ---[ end trace 4045cbce364ff2a4 ]--- [ 36.777527] Kernel panic - not syncing: Fatal exception [ 36.778526] Dumping ftrace buffer: [ 36.779313] (ftrace buffer empty) [ 36.780081] Kernel Offset: disabled [ 36.780877] ---[ end Kernel panic - not syncing: Fatal exception Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index e6e5bbc4c366..2e0d041e2daf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -694,6 +694,15 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); + /* + * make sure all in-progress dispatch are completed because + * blk_freeze_queue() can only complete all requests, and + * dispatch may still be in-progress since we dispatch requests + * from more than one contexts + */ + if (q->mq_ops) + blk_mq_quiesce_queue(q); + /* for synchronous bio-based driver finish in-flight integrity i/o */ blk_flush_integrity(); -- cgit v1.2.3 From 24f5a90f0d13a97b51aa79f468143fafea4246bb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 6 Jan 2018 16:27:38 +0800 Subject: blk-mq: quiesce queue during switching io sched and updating nr_requests Dispatch may still be in-progress after queue is frozen, so we have to quiesce queue before switching IO scheduler and updating nr_requests. Also when switching io schedulers, blk_mq_run_hw_queue() may still be called somewhere(such as from nvme_reset_work()), and io scheduler's per-hctx data may not be setup yet, so cause oops even inside blk_mq_hctx_has_pending(), such as it can be run just between: ret = e->ops.mq.init_sched(q, e); AND ret = e->ops.mq.init_hctx(hctx, i) inside blk_mq_init_sched(). This reverts commit 7a148c2fcff8330(block: don't call blk_mq_quiesce_queue() after queue is frozen) basically, and makes sure blk_mq_hctx_has_pending won't be called if queue is quiesced. Reviewed-by: Christoph Hellwig Fixes: 7a148c2fcff83309(block: don't call blk_mq_quiesce_queue() after queue is frozen) Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 27 ++++++++++++++++++++++++++- block/elevator.c | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 11097477eeab..1c66c319325c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1285,7 +1285,30 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { - if (blk_mq_hctx_has_pending(hctx)) { + int srcu_idx; + bool need_run; + + /* + * When queue is quiesced, we may be switching io scheduler, or + * updating nr_hw_queues, or other things, and we can't run queue + * any more, even __blk_mq_hctx_has_pending() can't be called safely. + * + * And queue will be rerun in blk_mq_unquiesce_queue() if it is + * quiesced. + */ + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { + rcu_read_lock(); + need_run = !blk_queue_quiesced(hctx->queue) && + blk_mq_hctx_has_pending(hctx); + rcu_read_unlock(); + } else { + srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); + need_run = !blk_queue_quiesced(hctx->queue) && + blk_mq_hctx_has_pending(hctx); + srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); + } + + if (need_run) { __blk_mq_delay_run_hw_queue(hctx, async, 0); return true; } @@ -2710,6 +2733,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return -EINVAL; blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); ret = 0; queue_for_each_hw_ctx(q, hctx, i) { @@ -2733,6 +2757,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) if (!ret) q->nr_requests = nr; + blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); return ret; diff --git a/block/elevator.c b/block/elevator.c index 7bda083d5968..138faeb08a7c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -968,6 +968,7 @@ static int elevator_switch_mq(struct request_queue *q, int ret; blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); if (q->elevator) { if (q->elevator->registered) @@ -994,6 +995,7 @@ static int elevator_switch_mq(struct request_queue *q, blk_add_trace_msg(q, "elv switch: none"); out: + blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q); return ret; } -- cgit v1.2.3 From 7d4901a90d02500c8011472a060f9b2e60e6e605 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 6 Jan 2018 16:27:39 +0800 Subject: blk-mq: avoid to map CPU into stale hw queue blk_mq_pci_map_queues() may not map one CPU into any hw queue, but its previous map isn't cleared yet, and may point to one stale hw queue index. This patch fixes the following issue by clearing the mapping table before setting it up in blk_mq_pci_map_queues(). This patches fixes this following issue reported by Zhang Yi: [ 101.202734] BUG: unable to handle kernel NULL pointer dereference at 0000000094d3013f [ 101.211487] IP: blk_mq_map_swqueue+0xbc/0x200 [ 101.216346] PGD 0 P4D 0 [ 101.219171] Oops: 0000 [#1] SMP [ 101.222674] Modules linked in: sunrpc ipmi_ssif vfat fat intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_cstate intel_uncore mxm_wmi intel_rapl_perf iTCO_wdt ipmi_si ipmi_devintf pcspkr iTCO_vendor_support sg dcdbas ipmi_msghandler wmi mei_me lpc_ich shpchp mei acpi_power_meter dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel libata tg3 nvme nvme_core megaraid_sas ptp i2c_core pps_core dm_mirror dm_region_hash dm_log dm_mod [ 101.284881] CPU: 0 PID: 504 Comm: kworker/u25:5 Not tainted 4.15.0-rc2 #1 [ 101.292455] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017 [ 101.301001] Workqueue: nvme-wq nvme_reset_work [nvme] [ 101.306636] task: 00000000f2c53190 task.stack: 000000002da874f9 [ 101.313241] RIP: 0010:blk_mq_map_swqueue+0xbc/0x200 [ 101.318681] RSP: 0018:ffffc9000234fd70 EFLAGS: 00010282 [ 101.324511] RAX: ffff88047ffc9480 RBX: ffff88047e130850 RCX: 0000000000000000 [ 101.332471] RDX: ffffe8ffffd40580 RSI: ffff88047e509b40 RDI: ffff88046f37a008 [ 101.340432] RBP: 000000000000000b R08: ffff88046f37a008 R09: 0000000011f94280 [ 101.348392] R10: ffff88047ffd4d00 R11: 0000000000000000 R12: ffff88046f37a008 [ 101.356353] R13: ffff88047e130f38 R14: 000000000000000b R15: ffff88046f37a558 [ 101.364314] FS: 0000000000000000(0000) GS:ffff880277c00000(0000) knlGS:0000000000000000 [ 101.373342] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 101.379753] CR2: 0000000000000098 CR3: 000000047f409004 CR4: 00000000001606f0 [ 101.387714] Call Trace: [ 101.390445] blk_mq_update_nr_hw_queues+0xbf/0x130 [ 101.395791] nvme_reset_work+0x6f4/0xc06 [nvme] [ 101.400848] ? pick_next_task_fair+0x290/0x5f0 [ 101.405807] ? __switch_to+0x1f5/0x430 [ 101.409988] ? put_prev_entity+0x2f/0xd0 [ 101.414365] process_one_work+0x141/0x340 [ 101.418836] worker_thread+0x47/0x3e0 [ 101.422921] kthread+0xf5/0x130 [ 101.426424] ? rescuer_thread+0x380/0x380 [ 101.430896] ? kthread_associate_blkcg+0x90/0x90 [ 101.436048] ret_from_fork+0x1f/0x30 [ 101.440034] Code: 48 83 3c ca 00 0f 84 2b 01 00 00 48 63 cd 48 8b 93 10 01 00 00 8b 0c 88 48 8b 83 20 01 00 00 4a 03 14 f5 60 04 af 81 48 8b 0c c8 <48> 8b 81 98 00 00 00 f0 4c 0f ab 30 8b 81 f8 00 00 00 89 42 44 [ 101.461116] RIP: blk_mq_map_swqueue+0xbc/0x200 RSP: ffffc9000234fd70 [ 101.468205] CR2: 0000000000000098 [ 101.471907] ---[ end trace 5fe710f98228a3ca ]--- [ 101.482489] Kernel panic - not syncing: Fatal exception [ 101.488505] Kernel Offset: disabled [ 101.497752] ---[ end Kernel panic - not syncing: Fatal exception Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1c66c319325c..dd21051fb251 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2622,9 +2622,27 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) { - if (set->ops->map_queues) + if (set->ops->map_queues) { + int cpu; + /* + * transport .map_queues is usually done in the following + * way: + * + * for (queue = 0; queue < set->nr_hw_queues; queue++) { + * mask = get_cpu_mask(queue) + * for_each_cpu(cpu, mask) + * set->mq_map[cpu] = queue; + * } + * + * When we need to remap, the table has to be cleared for + * killing stale mapping since one CPU may not be mapped + * to any hw queue. + */ + for_each_possible_cpu(cpu) + set->mq_map[cpu] = 0; + return set->ops->map_queues(set); - else + } else return blk_mq_map_queues(set); } -- cgit v1.2.3 From fb350e0ad99359768e1e80b4784692031ec340e4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 6 Jan 2018 16:27:40 +0800 Subject: blk-mq: fix race between updating nr_hw_queues and switching io sched In both elevator_switch_mq() and blk_mq_update_nr_hw_queues(), sched tags can be allocated, and q->nr_hw_queue is used, and race is inevitable, for example: blk_mq_init_sched() may trigger use-after-free on hctx, which is freed in blk_mq_realloc_hw_ctxs() when nr_hw_queues is decreased. This patch fixes the race be holding q->sysfs_lock. Reviewed-by: Christoph Hellwig Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index dd21051fb251..111e1aa5562f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2407,6 +2407,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; blk_mq_sysfs_unregister(q); + + /* protect against switching io scheduler */ + mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { int node; @@ -2451,6 +2454,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, } } q->nr_hw_queues = i; + mutex_unlock(&q->sysfs_lock); blk_mq_sysfs_register(q); } -- cgit v1.2.3 From ae6650163c66a7eff1acd6eb8b0f752dcfa8eba5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Jan 2018 16:26:00 -0800 Subject: loop: fix concurrent lo_open/lo_release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 范龙飞 reports that KASAN can report a use-after-free in __lock_acquire. The reason is due to insufficient serialization in lo_release(), which will continue to use the loop device even after it has decremented the lo_refcnt to zero. In the meantime, another process can come in, open the loop device again as it is being shut down. Confusion ensues. Reported-by: 范龙飞 Signed-off-by: Linus Torvalds Signed-off-by: Jens Axboe --- drivers/block/loop.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bc8e61506968..d5fe720cf149 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1581,9 +1581,8 @@ out: return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1610,6 +1609,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, -- cgit v1.2.3 From de53c3786a3ce162a1c815d0c04c766c23ec9c0a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 5 Jan 2018 22:35:41 +0100 Subject: x86/pti: Unbreak EFI old_memmap EFI_OLD_MEMMAP's efi_call_phys_prolog() calls set_pgd() with swapper PGD that has PAGE_USER set, which makes PTI set NX on it, and therefore EFI can't execute it's code. Fix that by forcefully clearing _PAGE_NX from the PGD (this can't be done by the pgprot API). _PAGE_NX will be automatically reintroduced in efi_call_phys_epilog(), as _set_pgd() will again notice that this is _PAGE_USER, and set _PAGE_NX on it. Tested-by: Dimitri Sivanich Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Acked-by: Dave Hansen Cc: Andrea Arcangeli Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/nycvar.YFH.7.76.1801052215460.11852@cbobk.fhfr.pm --- arch/x86/platform/efi/efi_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39c4b35ac7a4..61975b6bcb1a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -134,7 +134,9 @@ pgd_t * __init efi_call_phys_prolog(void) pud[j] = *pud_offset(p4d_k, vaddr); } } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; } + out: __flush_tlb_all(); -- cgit v1.2.3 From 01c9b17bf673b05bb401b76ec763e9730ccf1376 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 5 Jan 2018 09:44:36 -0800 Subject: x86/Documentation: Add PTI description Add some details about how PTI works, what some of the downsides are, and how to debug it when things go wrong. Also document the kernel parameter: 'pti/nopti'. Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Randy Dunlap Reviewed-by: Kees Cook Cc: Moritz Lipp Cc: Daniel Gruss Cc: Michael Schwarz Cc: Richard Fellner Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Hugh Dickins Cc: Andi Lutomirsky Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180105174436.1BC6FA2B@viggo.jf.intel.com --- Documentation/admin-guide/kernel-parameters.txt | 21 ++- Documentation/x86/pti.txt | 186 ++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 7 deletions(-) create mode 100644 Documentation/x86/pti.txt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 520fdec15bbb..905991745d26 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2685,8 +2685,6 @@ steal time is computed, but won't influence scheduler behaviour - nopti [X86-64] Disable kernel page table isolation - nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. @@ -3255,11 +3253,20 @@ pt. [PARIDE] See Documentation/blockdev/paride.txt. - pti= [X86_64] - Control user/kernel address space isolation: - on - enable - off - disable - auto - default setting + pti= [X86_64] Control Page Table Isolation of user and + kernel address spaces. Disabling this feature + removes hardening, but improves performance of + system calls and interrupts. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable to issues that PTI mitigates + + Not specifying this option is equivalent to pti=auto. + + nopti [X86_64] + Equivalent to pti=off pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt new file mode 100644 index 000000000000..d11eff61fc9a --- /dev/null +++ b/Documentation/x86/pti.txt @@ -0,0 +1,186 @@ +Overview +======== + +Page Table Isolation (pti, previously known as KAISER[1]) is a +countermeasure against attacks on the shared user/kernel address +space such as the "Meltdown" approach[2]. + +To mitigate this class of attacks, we create an independent set of +page tables for use only when running userspace applications. When +the kernel is entered via syscalls, interrupts or exceptions, the +page tables are switched to the full "kernel" copy. When the system +switches back to user mode, the user copy is used again. + +The userspace page tables contain only a minimal amount of kernel +data: only what is needed to enter/exit the kernel such as the +entry/exit functions themselves and the interrupt descriptor table +(IDT). There are a few strictly unnecessary things that get mapped +such as the first C function when entering an interrupt (see +comments in pti.c). + +This approach helps to ensure that side-channel attacks leveraging +the paging structures do not function when PTI is enabled. It can be +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. +Once enabled at compile-time, it can be disabled at boot with the +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). + +Page Table Management +===================== + +When PTI is enabled, the kernel manages two sets of page tables. +The first set is very similar to the single set which is present in +kernels without PTI. This includes a complete mapping of userspace +that the kernel can use for things like copy_to_user(). + +Although _complete_, the user portion of the kernel page tables is +crippled by setting the NX bit in the top level. This ensures +that any missed kernel->user CR3 switch will immediately crash +userspace upon executing its first instruction. + +The userspace page tables map only the kernel data needed to enter +and exit the kernel. This data is entirely contained in the 'struct +cpu_entry_area' structure which is placed in the fixmap which gives +each CPU's copy of the area a compile-time-fixed virtual address. + +For new userspace mappings, the kernel makes the entries in its +page tables like normal. The only difference is when the kernel +makes entries in the top (PGD) level. In addition to setting the +entry in the main kernel PGD, a copy of the entry is made in the +userspace page tables' PGD. + +This sharing at the PGD level also inherently shares all the lower +layers of the page tables. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set of +accessed bits, dirty bits, etc... + +Overhead +======== + +Protection against side-channel attacks is important. But, +this protection comes at a cost: + +1. Increased Memory Use + a. Each process now needs an order-1 PGD instead of order-0. + (Consumes an additional 4k per process). + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB + aligned so that it can be mapped by setting a single PMD + entry. This consumes nearly 2MB of RAM once the kernel + is decompressed, but no space in the kernel image itself. + +2. Runtime Cost + a. CR3 manipulation to switch between the page table copies + must be done at interrupt, syscall, and exception entry + and exit (it can be skipped when the kernel is interrupted, + though.) Moves to CR3 are on the order of a hundred + cycles, and are required at every entry and exit. + b. A "trampoline" must be used for SYSCALL entry. This + trampoline depends on a smaller set of resources than the + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. + d. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more + TLB misses after a context switch. The actual loss of + performance is very small, however, never exceeding 1%. + d. Process Context IDentifiers (PCID) is a CPU feature that + allows us to skip flushing the entire TLB when switching page + tables by setting a special bit in CR3 when the page tables + are changed. This makes switching the page tables (at context + switch, or kernel entry/exit) cheaper. But, on systems with + PCID support, the context switch code must flush both the user + and kernel entries out of the TLB. The user PCID TLB flush is + deferred until the exit to userspace, minimizing the cost. + See intel.com/sdm for the gory PCID/INVPCID details. + e. The userspace page tables must be populated for each new + process. Even without PTI, the shared kernel mappings + are created by copying top-level (PGD) entries into each + new process. But, with PTI, there are now *two* kernel + mappings: one in the kernel page tables that maps everything + and one for the entry/exit structures. At fork(), we need to + copy both. + f. In addition to the fork()-time copying, there must also + be an update to the userspace PGD any time a set_pgd() is done + on a PGD used to map userspace. This ensures that the kernel + and userspace copies always map the same userspace + memory. + g. On systems without PCID support, each CR3 write flushes + the entire TLB. That means that each syscall, interrupt + or exception flushes the TLB. + h. INVPCID is a TLB-flushing instruction which allows flushing + of TLB entries for non-current PCIDs. Some systems support + PCIDs, but do not support INVPCID. On these systems, addresses + can only be flushed from the TLB for the current PCID. When + flushing a kernel address, we need to flush all PCIDs, so a + single kernel address flush will require a TLB-flushing CR3 + write upon the next use of every PCID. + +Possible Future Work +==================== +1. We can be more careful about not actually writing to CR3 + unless its value is actually changed. +2. Allow PTI to be enabled/disabled at runtime in addition to the + boot-time switching. + +Testing +======== + +To test stability of PTI, the following test procedure is recommended, +ideally doing all of these in parallel: + +1. Set CONFIG_DEBUG_ENTRY=y +2. Run several copies of all of the tools/testing/selftests/x86/ tests + (excluding MPX and protection_keys) in a loop on multiple CPUs for + several minutes. These tests frequently uncover corner cases in the + kernel entry code. In general, old kernels might cause these tests + themselves to crash, but they should never crash the kernel. +3. Run the 'perf' tool in a mode (top or record) that generates many + frequent performance monitoring non-maskable interrupts (see "NMI" + in /proc/interrupts). This exercises the NMI entry/exit code which + is known to trigger bugs in code paths that did not expect to be + interrupted, including nested NMIs. Using "-c" boosts the rate of + NMIs, and using two -c with separate counters encourages nested NMIs + and less deterministic behavior. + + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done + +4. Launch a KVM virtual machine. +5. Run 32-bit binaries on systems supporting the SYSCALL instruction. + This has been a lightly-tested code path and needs extra scrutiny. + +Debugging +========= + +Bugs in PTI cause a few different signatures of crashes +that are worth noting here. + + * Failures of the selftests/x86 code. Usually a bug in one of the + more obscure corners of entry_64.S + * Crashes in early boot, especially around CPU bringup. Bugs + in the trampoline code or mappings cause these. + * Crashes at the first interrupt. Caused by bugs in entry_64.S, + like screwing up a page table switch. Also caused by + incorrectly mapping the IRQ handler entry code. + * Crashes at the first NMI. The NMI code is separate from main + interrupt handlers and can have bugs that do not affect + normal interrupts. Also caused by incorrectly mapping NMI + code. NMIs that interrupt the entry code must be very + careful and can be the cause of crashes that show up when + running perf. + * Kernel crashes at the first exit to userspace. entry_64.S + bugs, or failing to map some of the exit code. + * Crashes at first interrupt that interrupts userspace. The paths + in entry_64.S that return to userspace are sometimes separate + from the ones that return to the kernel. + * Double faults: overflowing the kernel stack because of page + faults upon page faults. Caused by touching non-pti-mapped + data in the entry code, or forgetting to switch to kernel + CR3 before calling into C functions which are not pti-mapped. + * Userspace segfaults early in boot, sometimes manifesting + as mount(8) failing to mount the rootfs. These have + tended to be TLB invalidation issues. Usually invalidating + the wrong PCID, or otherwise missing an invalidation. + +1. https://gruss.cc/files/kaiser.pdf +2. https://meltdownattack.com/meltdown.pdf -- cgit v1.2.3 From 069f05346d01e7298939f16533953cdf52370be3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 5 Jan 2018 18:02:55 -0200 Subject: mtd: nand: qcom: Add a NULL check for devm_kasprintf() devm_kasprintf() may fail, so we should better add a NULL check and propagate an error on failure. Signed-off-by: Fabio Estevam Signed-off-by: Boris Brezillon --- drivers/mtd/nand/qcom_nandc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c index 245d0f39e0aa..6be555806eca 100644 --- a/drivers/mtd/nand/qcom_nandc.c +++ b/drivers/mtd/nand/qcom_nandc.c @@ -2639,6 +2639,9 @@ static int qcom_nand_host_init(struct qcom_nand_controller *nandc, nand_set_flash_node(chip, dn); mtd->name = devm_kasprintf(dev, GFP_KERNEL, "qcom_nand.%d", host->cs); + if (!mtd->name) + return -ENOMEM; + mtd->owner = THIS_MODULE; mtd->dev.parent = dev; -- cgit v1.2.3 From 99c6fa2511d8a683e61468be91b83f85452115fa Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 6 Jan 2018 11:49:23 +0000 Subject: x86/cpufeatures: Add X86_BUG_SPECTRE_V[12] Add the bug bits for spectre v1/2 and force them unconditionally for all cpus. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1515239374-23361-2-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 21ac898df2d8..1641c2f96363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -342,5 +342,7 @@ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2d3bd2215e5b..372ba3fb400f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (c->x86_vendor != X86_VENDOR_AMD) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + fpu__init_system(c); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From fee4380f368e84ed216b62ccd2fbc4126f2bf40b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 18 Dec 2017 11:32:45 +0100 Subject: mtd: nand: pxa3xx: Fix READOOB implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current driver, OOB bytes are accessed in raw mode, and when a page access is done with NDCR_SPARE_EN set and NDCR_ECC_EN cleared, the driver must read the whole spare area (64 bytes in case of a 2k page, 16 bytes for a 512 page). The driver was only reading the free OOB bytes, which was leaving some unread data in the FIFO and was somehow leading to a timeout. We could patch the driver to read ->spare_size + ->ecc_size instead of just ->spare_size when READOOB is requested, but we'd better make in-band and OOB accesses consistent. Since the driver is always accessing in-band data in non-raw mode (with the ECC engine enabled), we should also access OOB data in this mode. That's particularly useful when using the BCH engine because in this mode the free OOB bytes are also ECC protected. Fixes: 43bcfd2bb24a ("mtd: nand: pxa3xx: Add driver-specific ECC BCH support") Cc: stable@vger.kernel.org Reported-by: Sean Nyekjær Tested-by: Willy Tarreau Signed-off-by: Boris Brezillon Acked-by: Ezequiel Garcia Tested-by: Sean Nyekjaer Acked-by: Robert Jarzmik Signed-off-by: Richard Weinberger --- drivers/mtd/nand/pxa3xx_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 90b9a9ccbe60..9285f60e5783 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command) switch (command) { case NAND_CMD_READ0: + case NAND_CMD_READOOB: case NAND_CMD_PAGEPROG: info->use_ecc = 1; break; -- cgit v1.2.3 From 5731a879d03bdaa00265f8ebc32dfd0e65d25276 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 4 Jan 2018 20:02:09 -0800 Subject: bpf: sockmap missing NULL psock check Add psock NULL check to handle a racing sock event that can get the sk_callback_lock before this case but after xchg happens causing the refcnt to hit zero and sock user data (psock) to be null and queued for garbage collection. Also add a comment in the code because this is a bit subtle and not obvious in my opinion. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 5ee2e41893d9..1712d319c2d8 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map) write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); - smap_list_remove(psock, &stab->sock_map[i]); - smap_release_sock(psock, sock); + /* This check handles a racing sock event that can get the + * sk_callback_lock before this case but after xchg happens + * causing the refcnt to hit zero and sock user data (psock) + * to be null and queued for garbage collection. + */ + if (likely(psock)) { + smap_list_remove(psock, &stab->sock_map[i]); + smap_release_sock(psock, sock); + } write_unlock_bh(&sock->sk_callback_lock); } rcu_read_unlock(); -- cgit v1.2.3 From 2b36047e7889b7efee22c11e17f035f721855731 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 5 Jan 2018 15:02:00 -0800 Subject: selftests/bpf: fix test_align since commit 82abbf8d2fc4 the verifier rejects the bit-wise arithmetic on pointers earlier. The test 'dubious pointer arithmetic' now has less output to match on. Adjust it. Fixes: 82abbf8d2fc4 ("bpf: do not allow root to mangle valid pointers") Reported-by: kernel test robot Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_align.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 8591c89c0828..471bbbdb94db 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = { .result = REJECT, .matches = { {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - /* ptr & 0x40 == either 0 or 0x40 */ - {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, - /* ptr << 2 == unknown, (4n) */ - {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, - /* (4n) + 14 == (4n+2). We blow our bounds, because - * the add could overflow. - */ - {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, - /* Checked s>=0 */ - {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - /* packet pointer + nonnegative (4n+2) */ - {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. - * We checked the bounds, but it might have been able - * to overflow if the packet pointer started in the - * upper half of the address space. - * So we did not get a 'range' on R6, and the access - * attempt will fail. - */ - {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* R5 bitwise operator &= on pointer prohibited */ } }, { -- cgit v1.2.3 From 7b6af2c53192f1766892ef40c8f48a413509ed72 Mon Sep 17 00:00:00 2001 From: Jacek Anaszewski Date: Wed, 3 Jan 2018 21:13:45 +0100 Subject: leds: core: Fix regression caused by commit 2b83ff96f51d Commit 2b83ff96f51d ("led: core: Fix brightness setting when setting delay_off=0") replaced del_timer_sync(&led_cdev->blink_timer) with led_stop_software_blink() in led_blink_set(), which additionally clears LED_BLINK_SW flag as well as zeroes blink_delay_on and blink_delay_off properties of the struct led_classdev. Cleansing of the latter ones wasn't required to fix the original issue but wasn't considered harmful. It nonetheless turned out to be so in case when pointer to one or both props is passed to led_blink_set() like in the ledtrig-timer.c. In such cases zeroes are passed later in delay_on and/or delay_off arguments to led_blink_setup(), which results either in stopping the software blinking or setting blinking frequency always to 1Hz. Avoid using led_stop_software_blink() and add a single call required to clear LED_BLINK_SW flag, which was the only needed modification to fix the original issue. Fixes 2b83ff96f51d ("led: core: Fix brightness setting when setting delay_off=0") Signed-off-by: Jacek Anaszewski --- drivers/leds/led-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index f3654fd2eaf3..ede4fa0ac2cc 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -186,8 +186,9 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on, unsigned long *delay_off) { - led_stop_software_blink(led_cdev); + del_timer_sync(&led_cdev->blink_timer); + clear_bit(LED_BLINK_SW, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags); clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags); -- cgit v1.2.3 From e4b580bc04af6b3408a99113d2d69f9dd268eafa Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Fri, 29 Dec 2017 14:41:02 +0530 Subject: mtd: spi-nor: cadence-quadspi: Refactor indirect read/write sequence. Move configuring of indirect read/write start address to cqspi_indirect_*_execute() function and rename cqspi_indirect_*_setup() function. This will help to reuse cqspi_indirect_*_setup() function for supporting direct access mode. Signed-off-by: Vignesh R Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/cadence-quadspi.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index 75a2bc447a99..c7bf4d523f9c 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -450,8 +450,7 @@ static int cqspi_command_write_addr(struct spi_nor *nor, return cqspi_exec_flash_cmd(cqspi, reg); } -static int cqspi_indirect_read_setup(struct spi_nor *nor, - const unsigned int from_addr) +static int cqspi_read_setup(struct spi_nor *nor) { struct cqspi_flash_pdata *f_pdata = nor->priv; struct cqspi_st *cqspi = f_pdata->cqspi; @@ -459,8 +458,6 @@ static int cqspi_indirect_read_setup(struct spi_nor *nor, unsigned int dummy_clk = 0; unsigned int reg; - writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR); - reg = nor->read_opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB; reg |= cqspi_calc_rdreg(nor, nor->read_opcode); @@ -493,8 +490,8 @@ static int cqspi_indirect_read_setup(struct spi_nor *nor, return 0; } -static int cqspi_indirect_read_execute(struct spi_nor *nor, - u8 *rxbuf, const unsigned n_rx) +static int cqspi_indirect_read_execute(struct spi_nor *nor, u8 *rxbuf, + loff_t from_addr, const size_t n_rx) { struct cqspi_flash_pdata *f_pdata = nor->priv; struct cqspi_st *cqspi = f_pdata->cqspi; @@ -504,6 +501,7 @@ static int cqspi_indirect_read_execute(struct spi_nor *nor, unsigned int bytes_to_read = 0; int ret = 0; + writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR); writel(remaining, reg_base + CQSPI_REG_INDIRECTRDBYTES); /* Clear all interrupts. */ @@ -570,8 +568,7 @@ failrd: return ret; } -static int cqspi_indirect_write_setup(struct spi_nor *nor, - const unsigned int to_addr) +static int cqspi_write_setup(struct spi_nor *nor) { unsigned int reg; struct cqspi_flash_pdata *f_pdata = nor->priv; @@ -584,8 +581,6 @@ static int cqspi_indirect_write_setup(struct spi_nor *nor, reg = cqspi_calc_rdreg(nor, nor->program_opcode); writel(reg, reg_base + CQSPI_REG_RD_INSTR); - writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR); - reg = readl(reg_base + CQSPI_REG_SIZE); reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK; reg |= (nor->addr_width - 1); @@ -593,8 +588,8 @@ static int cqspi_indirect_write_setup(struct spi_nor *nor, return 0; } -static int cqspi_indirect_write_execute(struct spi_nor *nor, - const u8 *txbuf, const unsigned n_tx) +static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr, + const u8 *txbuf, const size_t n_tx) { const unsigned int page_size = nor->page_size; struct cqspi_flash_pdata *f_pdata = nor->priv; @@ -604,6 +599,7 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, unsigned int write_bytes; int ret; + writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR); writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES); /* Clear all interrupts. */ @@ -900,11 +896,11 @@ static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, if (ret) return ret; - ret = cqspi_indirect_write_setup(nor, to); + ret = cqspi_write_setup(nor); if (ret) return ret; - ret = cqspi_indirect_write_execute(nor, buf, len); + ret = cqspi_indirect_write_execute(nor, to, buf, len); if (ret) return ret; @@ -920,11 +916,11 @@ static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, if (ret) return ret; - ret = cqspi_indirect_read_setup(nor, from); + ret = cqspi_read_setup(nor); if (ret) return ret; - ret = cqspi_indirect_read_execute(nor, buf, len); + ret = cqspi_indirect_read_execute(nor, buf, from, len); if (ret) return ret; -- cgit v1.2.3 From a27f2eaf2b275758d269ba519833df53c6181878 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Fri, 29 Dec 2017 14:41:03 +0530 Subject: mtd: spi-nor: cadence-quadspi: Add support for direct access mode Cadence QSPI controller provides direct access mode through which flash can be accessed in a memory-mapped IO mode. This enables read/write to flash using memcpy*() functions. This mode provides higher throughput for both read/write operations when compared to current indirect mode of operation. This patch therefore adds support to use QSPI in direct mode. If the window reserved in SoC's memory map for MMIO access is less that of flash size(like on most SoCFPGA variants), then the driver falls back to indirect mode of operation. On TI's 66AK2G SoC, with ARM running at 600MHz and QSPI at 96MHz switching to direct mode improves read throughput from 3MB/s to 8MB/s. Signed-off-by: Vignesh R Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/cadence-quadspi.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index c7bf4d523f9c..4b8e9183489a 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -58,6 +58,7 @@ struct cqspi_flash_pdata { u8 data_width; u8 cs; bool registered; + bool use_direct_mode; }; struct cqspi_st { @@ -68,6 +69,7 @@ struct cqspi_st { void __iomem *iobase; void __iomem *ahb_base; + resource_size_t ahb_size; struct completion transfer_complete; struct mutex bus_mutex; @@ -103,6 +105,7 @@ struct cqspi_st { /* Register map */ #define CQSPI_REG_CONFIG 0x00 #define CQSPI_REG_CONFIG_ENABLE_MASK BIT(0) +#define CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL BIT(7) #define CQSPI_REG_CONFIG_DECODE_MASK BIT(9) #define CQSPI_REG_CONFIG_CHIPSELECT_LSB 10 #define CQSPI_REG_CONFIG_DMA_MASK BIT(15) @@ -890,6 +893,8 @@ static int cqspi_set_protocol(struct spi_nor *nor, const int read) static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, size_t len, const u_char *buf) { + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; int ret; ret = cqspi_set_protocol(nor, 0); @@ -900,7 +905,10 @@ static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, if (ret) return ret; - ret = cqspi_indirect_write_execute(nor, to, buf, len); + if (f_pdata->use_direct_mode) + memcpy_toio(cqspi->ahb_base + to, buf, len); + else + ret = cqspi_indirect_write_execute(nor, to, buf, len); if (ret) return ret; @@ -910,6 +918,8 @@ static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, size_t len, u_char *buf) { + struct cqspi_flash_pdata *f_pdata = nor->priv; + struct cqspi_st *cqspi = f_pdata->cqspi; int ret; ret = cqspi_set_protocol(nor, 1); @@ -920,7 +930,10 @@ static ssize_t cqspi_read(struct spi_nor *nor, loff_t from, if (ret) return ret; - ret = cqspi_indirect_read_execute(nor, buf, from, len); + if (f_pdata->use_direct_mode) + memcpy_fromio(buf, cqspi->ahb_base + from, len); + else + ret = cqspi_indirect_read_execute(nor, buf, from, len); if (ret) return ret; @@ -1055,6 +1068,8 @@ static int cqspi_of_get_pdata(struct platform_device *pdev) static void cqspi_controller_init(struct cqspi_st *cqspi) { + u32 reg; + cqspi_controller_enable(cqspi, 0); /* Configure the remap address register, no remap */ @@ -1077,6 +1092,11 @@ static void cqspi_controller_init(struct cqspi_st *cqspi) writel(cqspi->fifo_depth * cqspi->fifo_width / 8, cqspi->iobase + CQSPI_REG_INDIRECTWRWATERMARK); + /* Enable Direct Access Controller */ + reg = readl(cqspi->iobase + CQSPI_REG_CONFIG); + reg |= CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL; + writel(reg, cqspi->iobase + CQSPI_REG_CONFIG); + cqspi_controller_enable(cqspi, 1); } @@ -1152,6 +1172,12 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np) goto err; f_pdata->registered = true; + + if (mtd->size <= cqspi->ahb_size) { + f_pdata->use_direct_mode = true; + dev_dbg(nor->dev, "using direct mode for %s\n", + mtd->name); + } } return 0; @@ -1211,6 +1237,7 @@ static int cqspi_probe(struct platform_device *pdev) dev_err(dev, "Cannot remap AHB address.\n"); return PTR_ERR(cqspi->ahb_base); } + cqspi->ahb_size = resource_size(res_ahb); init_completion(&cqspi->transfer_complete); -- cgit v1.2.3 From 2167d6d7a96a35614be64769f4d36d6eddc1860f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Jan 2018 14:28:05 +0100 Subject: mtd: fsl-quadspi: account for const type of of_device_id.data This driver creates a number of const structures that it stores in the data field of an of_device_id array. The data field of an of_device_id structure has type const void *, so there is no need for a const-discarding cast when putting const values into such a structure. Done using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/fsl-quadspi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c index f17d22435bfc..2901c7bd9e30 100644 --- a/drivers/mtd/spi-nor/fsl-quadspi.c +++ b/drivers/mtd/spi-nor/fsl-quadspi.c @@ -801,10 +801,10 @@ static int fsl_qspi_nor_setup_last(struct fsl_qspi *q) } static const struct of_device_id fsl_qspi_dt_ids[] = { - { .compatible = "fsl,vf610-qspi", .data = (void *)&vybrid_data, }, - { .compatible = "fsl,imx6sx-qspi", .data = (void *)&imx6sx_data, }, - { .compatible = "fsl,imx7d-qspi", .data = (void *)&imx7d_data, }, - { .compatible = "fsl,imx6ul-qspi", .data = (void *)&imx6ul_data, }, + { .compatible = "fsl,vf610-qspi", .data = &vybrid_data, }, + { .compatible = "fsl,imx6sx-qspi", .data = &imx6sx_data, }, + { .compatible = "fsl,imx7d-qspi", .data = &imx7d_data, }, + { .compatible = "fsl,imx6ul-qspi", .data = &imx6ul_data, }, { .compatible = "fsl,ls1021a-qspi", .data = (void *)&ls1021a_data, }, { /* sentinel */ } }; -- cgit v1.2.3 From a6e4836d6991b86736ed00835ffac94cc2ec5158 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 4 Jan 2018 12:07:44 +0300 Subject: spi-nor: intel-spi: Remove unused preopcodes field This field is not used in the driver anymore so remove it. Signed-off-by: Mika Westerberg Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/intel-spi.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c index ef034d898a23..699951523179 100644 --- a/drivers/mtd/spi-nor/intel-spi.c +++ b/drivers/mtd/spi-nor/intel-spi.c @@ -138,7 +138,6 @@ * @erase_64k: 64k erase supported * @opcodes: Opcodes which are supported. This are programmed by BIOS * before it locks down the controller. - * @preopcodes: Preopcodes which are supported. */ struct intel_spi { struct device *dev; @@ -155,7 +154,6 @@ struct intel_spi { bool swseq_erase; bool erase_64k; u8 opcodes[8]; - u8 preopcodes[2]; }; static bool writeable; @@ -400,10 +398,6 @@ static int intel_spi_init(struct intel_spi *ispi) ispi->opcodes[i] = opmenu0 >> i * 8; ispi->opcodes[i + 4] = opmenu1 >> i * 8; } - - val = readl(ispi->sregs + PREOP_OPTYPE); - ispi->preopcodes[0] = val; - ispi->preopcodes[1] = val >> 8; } } -- cgit v1.2.3 From b2cd1df66037e7c4697c7e40496bf7e4a5e16a2d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 7 Jan 2018 14:22:41 -0800 Subject: Linux 4.15-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eb1f5973813e..eb59638035dd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 33c57c0d3c67f51f491a9d27108f7e97adc03d96 Mon Sep 17 00:00:00 2001 From: Karsten Merker Date: Thu, 4 Jan 2018 23:37:02 +0100 Subject: RISC-V: Add a basic defconfig This patch provides a basic defconfig for the RISC-V architecture that enables enough kernel features to run a basic Linux distribution on qemu's "virt" board for native software development. Features include: - serial console - virtio block and network device support - VFAT and ext2/3/4 filesystem support - NFS client and NFS rootfs support - an assortment of other kernel features required for running systemd It also enables a number of drivers for physical hardware that target the "SiFive U500" SoC and the corresponding development platform. These include: - PCIe host controller support for the FPGA-based U500 development platform (PCIE_XILINX) - USB host controller support (OHCI/EHCI/XHCI) - USB HID (keyboard/mouse) support - USB mass storage support (bulk and UAS) - SATA support (AHCI) - ethernet drivers (MACB for a SoC-internal MAC block, microsemi ethernet phy, E1000E and R8169 for PCIe-connected external devices) - DRM and framebuffer console support for PCIe-connected Radeon graphics chips Signed-off-by: Karsten Merker Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index e69de29bb2d1..47dacf06c679 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -0,0 +1,75 @@ +CONFIG_SMP=y +CONFIG_PCI=y +CONFIG_PCIE_XILINX=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_BPF_SYSCALL=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NETLINK_DIAG=y +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_MACB=y +CONFIG_E1000E=y +CONFIG_R8169=y +CONFIG_MICROSEMI_PHY=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_PLATFORM=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_UAS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_RAS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +# CONFIG_RCU_TRACE is not set +CONFIG_CRYPTO_USER_API_HASH=y -- cgit v1.2.3 From 9e49a4ed072ab67b17238c5a45d7cba7f848659e Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 26 Dec 2017 19:11:22 -0800 Subject: RISC-V: Make __NR_riscv_flush_icache visible to userspace We were hoping to avoid making this visible to userspace, but it looks like we're going to have to because QEMU's user-mode emulation doesn't want to emulate a vDSO. Having vDSO-only system calls was a bit unothodox anyway, so I think in this case it's OK to just make the actual system call number public. This patch simply moves the definition of __NR_riscv_flush_icache availiable to userspace, which results in the deletion of the now empty vdso-syscalls.h. Changes since v1: * I've moved the definition into uapi/asm/syscalls.h rathen than uapi/asm/unistd.h. This allows me to keep asm/unistd.h, so we can keep the syscall table macros sane. * As a side effect of the above, this no longer disables all system calls on RISC-V. Whoops! Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/unistd.h | 1 + arch/riscv/include/asm/vdso-syscalls.h | 28 ---------------------------- arch/riscv/include/uapi/asm/syscalls.h | 26 ++++++++++++++++++++++++++ arch/riscv/kernel/syscall_table.c | 1 - arch/riscv/kernel/vdso/flush_icache.S | 1 - 5 files changed, 27 insertions(+), 30 deletions(-) delete mode 100644 arch/riscv/include/asm/vdso-syscalls.h create mode 100644 arch/riscv/include/uapi/asm/syscalls.h diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 9f250ed007cd..2f704a5c4196 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -14,3 +14,4 @@ #define __ARCH_HAVE_MMU #define __ARCH_WANT_SYS_CLONE #include +#include diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h deleted file mode 100644 index a2ccf1894929..000000000000 --- a/arch/riscv/include/asm/vdso-syscalls.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2017 SiFive - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _ASM_RISCV_VDSO_SYSCALLS_H -#define _ASM_RISCV_VDSO_SYSCALLS_H - -#ifdef CONFIG_SMP - -/* These syscalls are only used by the vDSO and are not in the uapi. */ -#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) -__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) - -#endif - -#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h new file mode 100644 index 000000000000..818655b0d535 --- /dev/null +++ b/arch/riscv/include/uapi/asm/syscalls.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2017 SiFive + */ + +#ifndef _ASM__UAPI__SYSCALLS_H +#define _ASM__UAPI__SYSCALLS_H + +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * __NR_riscv_flush_icache is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index a5bd6401f95e..ade52b903a43 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -23,5 +23,4 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include -#include }; diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index b0fbad74e873..023e4d4aef58 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -13,7 +13,6 @@ #include #include -#include .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ -- cgit v1.2.3 From c163fb38ca34694b0cce99bb5604257bc29bf200 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Jan 2018 18:35:02 +0100 Subject: riscv: remove CONFIG_MMU ifdefs The RISC-V port doesn't suport a nommu mode, so there is no reason to provide some code only under a CONFIG_MMU ifdef. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 4 ---- arch/riscv/include/asm/pgtable.h | 4 ---- arch/riscv/include/asm/tlbflush.h | 4 ---- arch/riscv/include/asm/uaccess.h | 12 ------------ 4 files changed, 24 deletions(-) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index a82ce599b639..b269451e7e85 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -21,8 +21,6 @@ #include -#ifdef CONFIG_MMU - extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); /* @@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); -#endif /* CONFIG_MMU */ - /* Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 2cbd92ed1629..16301966d65b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -20,8 +20,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_MMU - /* Page Upper Directory not used in RISC-V */ #include #include @@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void) /* No page table caches to initialize */ } -#endif /* CONFIG_MMU */ - #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 715b0f10af58..7b9c24ebdf52 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -15,8 +15,6 @@ #ifndef _ASM_RISCV_TLBFLUSH_H #define _ASM_RISCV_TLBFLUSH_H -#ifdef CONFIG_MMU - #include /* @@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -#endif /* CONFIG_MMU */ - #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 27b90d64814b..14b0b22fb578 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state); * call. */ -#ifdef CONFIG_MMU #define __get_user_asm(insn, x, ptr, err) \ do { \ uintptr_t __tmp; \ @@ -153,13 +152,11 @@ do { \ __disable_user_access(); \ (x) = __x; \ } while (0) -#endif /* CONFIG_MMU */ #ifdef CONFIG_64BIT #define __get_user_8(x, ptr, err) \ __get_user_asm("ld", x, ptr, err) #else /* !CONFIG_64BIT */ -#ifdef CONFIG_MMU #define __get_user_8(x, ptr, err) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ @@ -193,7 +190,6 @@ do { \ (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) -#endif /* CONFIG_MMU */ #endif /* CONFIG_64BIT */ @@ -267,8 +263,6 @@ do { \ ((x) = 0, -EFAULT); \ }) - -#ifdef CONFIG_MMU #define __put_user_asm(insn, x, ptr, err) \ do { \ uintptr_t __tmp; \ @@ -292,14 +286,11 @@ do { \ : "rJ" (__x), "i" (-EFAULT)); \ __disable_user_access(); \ } while (0) -#endif /* CONFIG_MMU */ - #ifdef CONFIG_64BIT #define __put_user_8(x, ptr, err) \ __put_user_asm("sd", x, ptr, err) #else /* !CONFIG_64BIT */ -#ifdef CONFIG_MMU #define __put_user_8(x, ptr, err) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ @@ -329,7 +320,6 @@ do { \ : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ __disable_user_access(); \ } while (0) -#endif /* CONFIG_MMU */ #endif /* CONFIG_64BIT */ @@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) * will set "err" to -EFAULT, while successful accesses return the previous * value. */ -#ifdef CONFIG_MMU #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ @@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) (err) = __err; \ __ret; \ }) -#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_UACCESS_H */ -- cgit v1.2.3 From 1125203c13b9da32125e171b4bd75e93d4918ddd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Jan 2018 18:35:03 +0100 Subject: riscv: rename SR_* constants to match the spec Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 8 ++++---- arch/riscv/include/asm/irqflags.h | 10 +++++----- arch/riscv/include/asm/ptrace.h | 2 +- arch/riscv/kernel/entry.S | 8 ++++---- arch/riscv/kernel/process.c | 4 ++-- arch/riscv/mm/fault.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 0d64bc9f4f91..3c7a2c97e377 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -17,10 +17,10 @@ #include /* Status register flags */ -#define SR_IE _AC(0x00000002, UL) /* Interrupt Enable */ -#define SR_PIE _AC(0x00000020, UL) /* Previous IE */ -#define SR_PS _AC(0x00000100, UL) /* Previously Supervisor */ -#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ +#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */ +#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */ +#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */ +#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ #define SR_FS _AC(0x00006000, UL) /* Floating-point Status */ #define SR_FS_OFF _AC(0x00000000, UL) diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h index 6fdc860d7f84..07a3c6d5706f 100644 --- a/arch/riscv/include/asm/irqflags.h +++ b/arch/riscv/include/asm/irqflags.h @@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void) /* unconditionally enable interrupts */ static inline void arch_local_irq_enable(void) { - csr_set(sstatus, SR_IE); + csr_set(sstatus, SR_SIE); } /* unconditionally disable interrupts */ static inline void arch_local_irq_disable(void) { - csr_clear(sstatus, SR_IE); + csr_clear(sstatus, SR_SIE); } /* get status and disable interrupts */ static inline unsigned long arch_local_irq_save(void) { - return csr_read_clear(sstatus, SR_IE); + return csr_read_clear(sstatus, SR_SIE); } /* test flags */ static inline int arch_irqs_disabled_flags(unsigned long flags) { - return !(flags & SR_IE); + return !(flags & SR_SIE); } /* test hardware interrupt enable bit */ @@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - csr_set(sstatus, flags & SR_IE); + csr_set(sstatus, flags & SR_SIE); } #endif /* _ASM_RISCV_IRQFLAGS_H */ diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 93b8956e25e4..2c5df945d43c 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -66,7 +66,7 @@ struct pt_regs { #define REG_FMT "%08lx" #endif -#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0) +#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0) /* Helpers for working with the instruction pointer */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 20ee86f782a9..7404ec222406 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -196,7 +196,7 @@ handle_syscall: addi s2, s2, 0x4 REG_S s2, PT_SEPC(sp) /* System calls run with interrupts enabled */ - csrs sstatus, SR_IE + csrs sstatus, SR_SIE /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_TRACE @@ -224,8 +224,8 @@ ret_from_syscall: ret_from_exception: REG_L s0, PT_SSTATUS(sp) - csrc sstatus, SR_IE - andi s0, s0, SR_PS + csrc sstatus, SR_SIE + andi s0, s0, SR_SPP bnez s0, restore_all resume_userspace: @@ -255,7 +255,7 @@ work_pending: bnez s1, work_resched work_notifysig: /* Handle pending signals and notify-resume requests */ - csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */ + csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */ move a0, sp /* pt_regs */ move a1, s0 /* current_thread_info->flags */ tail do_notify_resume diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 0d90dcc1fbd3..d74d4adf2d54 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs) void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL; + regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL; regs->sepc = pc; regs->sp = sp; set_fs(USER_DS); @@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, const register unsigned long gp __asm__ ("gp"); memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp; - childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */ + childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */ p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = usp; /* fn */ diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index df2ca3c65048..0713f3c67ab4 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) goto vmalloc_fault; /* Enable interrupts if they were enabled in the parent context. */ - if (likely(regs->sstatus & SR_PIE)) + if (likely(regs->sstatus & SR_SPIE)) local_irq_enable(); /* -- cgit v1.2.3 From e2d5915293ffdff977ddcfc12b817b08c53ffa7a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 8 Jan 2018 14:54:32 +1100 Subject: powerpc/pseries: Make RAS IRQ explicitly dependent on DLPAR WQ The hotplug code uses its own workqueue to handle IRQ requests (pseries_hp_wq), however that workqueue is initialized after init_ras_IRQ(). That can lead to a kernel panic if any hotplug interrupts fire after init_ras_IRQ() but before pseries_hp_wq is initialised. eg: UDP-Lite hash table entries: 2048 (order: 0, 65536 bytes) NET: Registered protocol family 1 Unpacking initramfs... (qemu) object_add memory-backend-ram,id=mem1,size=10G (qemu) device_add pc-dimm,id=dimm1,memdev=mem1 Unable to handle kernel paging request for data at address 0xf94d03007c421378 Faulting instruction address: 0xc00000000012d744 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2-ziviani+ #26 task: (ptrval) task.stack: (ptrval) NIP: c00000000012d744 LR: c00000000012d744 CTR: 0000000000000000 REGS: (ptrval) TRAP: 0380 Not tainted (4.15.0-rc2-ziviani+) MSR: 8000000000009033 CR: 28088042 XER: 20040000 CFAR: c00000000012d3c4 SOFTE: 0 ... NIP [c00000000012d744] __queue_work+0xd4/0x5c0 LR [c00000000012d744] __queue_work+0xd4/0x5c0 Call Trace: [c0000000fffefb90] [c00000000012d744] __queue_work+0xd4/0x5c0 (unreliable) [c0000000fffefc70] [c00000000012dce4] queue_work_on+0xb4/0xf0 This commit makes the RAS IRQ registration explicitly dependent on the creation of the pseries_hp_wq. Reported-by: Min Deng Reported-by: Daniel Henrique Barboza Tested-by: Jose Ricardo Ziviani Signed-off-by: Michael Ellerman Reviewed-by: David Gibson --- arch/powerpc/platforms/pseries/dlpar.c | 21 ++++++++++++++++++--- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/ras.c | 3 ++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6e35780c5962..a0b20c03f078 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, static CLASS_ATTR_RW(dlpar); -static int __init pseries_dlpar_init(void) +int __init dlpar_workqueue_init(void) { + if (pseries_hp_wq) + return 0; + pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + WQ_UNBOUND, 1); + + return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ + int rc; + + rc = dlpar_workqueue_init(); + if (rc) + return rc; + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } -machine_device_initcall(pseries, pseries_dlpar_init); +machine_device_initcall(pseries, dlpar_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4470a3194311..1ae1d9f4dbe9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void) return CMO_PageSize; } +int dlpar_workqueue_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 4923ffe230cf..81d8614e7379 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void) /* Hotplug Events */ np = of_find_node_by_path("/event-sources/hot-plug-events"); if (np != NULL) { - request_event_sources_irqs(np, ras_hotplug_interrupt, + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, "RAS_HOTPLUG"); of_node_put(np); } -- cgit v1.2.3 From 65e7439204b57b7a7f6e4694f9e2a9adde5e77ed Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 21 Dec 2017 10:29:32 +0800 Subject: drm/i915/gvt: Fix stack-out-of-bounds bug in cmd parser for_each_set_bit() only accepts variable of type unsigned long, and we can not cast it from smaller types. [ 16.499365] ================================================================== [ 16.506655] BUG: KASAN: stack-out-of-bounds in find_first_bit+0x1d/0x70 [ 16.513313] Read of size 8 at addr ffff8803616cf510 by task systemd-udevd/180 [ 16.521998] CPU: 0 PID: 180 Comm: systemd-udevd Tainted: G U O 4.15.0-rc3+ #14 [ 16.530317] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.2.8 01/26/2016 [ 16.537760] Call Trace: [ 16.540230] dump_stack+0x7c/0xbb [ 16.543569] print_address_description+0x6b/0x290 [ 16.548306] kasan_report+0x28a/0x370 [ 16.551993] ? find_first_bit+0x1d/0x70 [ 16.555858] find_first_bit+0x1d/0x70 [ 16.559625] intel_gvt_init_cmd_parser+0x127/0x3c0 [i915] [ 16.565060] ? __lock_is_held+0x8f/0xf0 [ 16.568990] ? intel_gvt_clean_cmd_parser+0x10/0x10 [i915] [ 16.574514] ? __hrtimer_init+0x5d/0xb0 [ 16.578445] intel_gvt_init_device+0x2c3/0x690 [i915] [ 16.583537] ? unregister_module_notifier+0x20/0x20 [ 16.588515] intel_gvt_init+0x89/0x100 [i915] [ 16.592962] i915_driver_load+0x1992/0x1c70 [i915] [ 16.597846] ? __i915_printk+0x210/0x210 [i915] [ 16.602410] ? wait_for_completion+0x280/0x280 [ 16.606883] ? lock_downgrade+0x2c0/0x2c0 [ 16.610923] ? __pm_runtime_resume+0x46/0x90 [ 16.615238] ? acpi_dev_found+0x76/0x80 [ 16.619162] ? i915_pci_remove+0x30/0x30 [i915] [ 16.623733] local_pci_probe+0x74/0xe0 [ 16.627518] pci_device_probe+0x208/0x310 [ 16.631561] ? pci_device_remove+0x100/0x100 [ 16.635871] ? __list_add_valid+0x29/0xa0 [ 16.639919] driver_probe_device+0x40b/0x6b0 [ 16.644223] ? driver_probe_device+0x6b0/0x6b0 [ 16.648696] __driver_attach+0x11d/0x130 [ 16.652649] bus_for_each_dev+0xe7/0x160 [ 16.656600] ? subsys_dev_iter_exit+0x10/0x10 [ 16.660987] ? __list_add_valid+0x29/0xa0 [ 16.665028] bus_add_driver+0x31d/0x3a0 [ 16.668893] driver_register+0xc6/0x170 [ 16.672758] ? 0xffffffffc0ad8000 [ 16.676108] do_one_initcall+0x9c/0x206 [ 16.679984] ? initcall_blacklisted+0x150/0x150 [ 16.684545] ? do_init_module+0x35/0x33b [ 16.688494] ? kasan_unpoison_shadow+0x31/0x40 [ 16.692968] ? kasan_kmalloc+0xa6/0xd0 [ 16.696743] ? do_init_module+0x35/0x33b [ 16.700694] ? kasan_unpoison_shadow+0x31/0x40 [ 16.705168] ? __asan_register_globals+0x82/0xa0 [ 16.709819] do_init_module+0xe7/0x33b [ 16.713597] load_module+0x4481/0x4ce0 [ 16.717397] ? module_frob_arch_sections+0x20/0x20 [ 16.722228] ? vfs_read+0x13b/0x190 [ 16.725742] ? kernel_read+0x74/0xa0 [ 16.729351] ? get_user_arg_ptr.isra.17+0x70/0x70 [ 16.734099] ? SYSC_finit_module+0x175/0x1b0 [ 16.738399] SYSC_finit_module+0x175/0x1b0 [ 16.742524] ? SYSC_init_module+0x1e0/0x1e0 [ 16.746741] ? __fget+0x157/0x240 [ 16.750090] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 16.754747] entry_SYSCALL_64_fastpath+0x23/0x9a [ 16.759397] RIP: 0033:0x7f8fbc837499 [ 16.762996] RSP: 002b:00007ffead76c138 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 16.770618] RAX: ffffffffffffffda RBX: 0000000000000012 RCX: 00007f8fbc837499 [ 16.777800] RDX: 0000000000000000 RSI: 000056484e67b080 RDI: 0000000000000012 [ 16.784979] RBP: 00007ffead76b140 R08: 0000000000000000 R09: 0000000000000021 [ 16.792164] R10: 0000000000000012 R11: 0000000000000246 R12: 000056484e67b460 [ 16.799345] R13: 00007ffead76b120 R14: 0000000000000005 R15: 0000000000000000 [ 16.808052] The buggy address belongs to the page: [ 16.812876] page:00000000dc4b8c1e count:0 mapcount:0 mapping: (null) index:0x0 [ 16.820934] flags: 0x17ffffc0000000() [ 16.824621] raw: 0017ffffc0000000 0000000000000000 0000000000000000 00000000ffffffff [ 16.832416] raw: ffffea000d85b3e0 ffffea000d85b3e0 0000000000000000 0000000000000000 [ 16.840208] page dumped because: kasan: bad access detected [ 16.847318] Memory state around the buggy address: [ 16.852143] ffff8803616cf400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 16.859427] ffff8803616cf480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 [ 16.866708] >ffff8803616cf500: f1 f1 04 f4 f4 f4 f3 f3 f3 f3 00 00 00 00 00 00 [ 16.873988] ^ [ 16.877770] ffff8803616cf580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 16.885042] ffff8803616cf600: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 [ 16.892312] ================================================================== Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 701a3c6f1669..9d12090939e3 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2777,12 +2777,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) } static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt, - unsigned int opcode, int rings) + unsigned int opcode, unsigned long rings) { struct cmd_info *info = NULL; unsigned int ring; - for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) { + for_each_set_bit(ring, &rings, I915_NUM_ENGINES) { info = find_cmd_entry(gvt, opcode, ring); if (info) break; -- cgit v1.2.3 From bcfd09f7837f5240c30fd2f52ee7293516641faa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 5 Jan 2018 22:12:32 +1100 Subject: xfrm: Return error on unknown encap_type in init_state Currently esp will happily create an xfrm state with an unknown encap type for IPv4, without setting the necessary state parameters. This patch fixes it by returning -EINVAL. There is a similar problem in IPv6 where if the mode is unknown we will skip initialisation while returning zero. However, this is harmless as the mode has already been checked further up the stack. This patch removes this anomaly by aligning the IPv6 behaviour with IPv4 and treating unknown modes (which cannot actually happen) as transport mode. Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 1 + net/ipv6/esp6.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d57aa64fa7c7..61fe6e4d23fc 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -981,6 +981,7 @@ static int esp_init_state(struct xfrm_state *x) switch (encap->encap_type) { default: + err = -EINVAL; goto error; case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a902ff8f59be..1a7f00cd4803 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -890,13 +890,12 @@ static int esp6_init_state(struct xfrm_state *x) x->props.header_len += IPV4_BEET_PHMAXLEN + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); break; + default: case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; - default: - goto error; } align = ALIGN(crypto_aead_blocksize(aead), 4); -- cgit v1.2.3 From b1bdcb59b64f806ef08d25a85c39ffb3ad841ce6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 6 Jan 2018 01:13:08 +0100 Subject: xfrm: don't call xfrm_policy_cache_flush while holding spinlock xfrm_policy_cache_flush can sleep, so it cannot be called while holding a spinlock. We could release the lock first, but I don't see why we need to invoke this function here in first place, the packet path won't reuse an xdst entry unless its still valid. While at it, add an annotation to xfrm_policy_cache_flush, it would have probably caught this bug sooner. Fixes: ec30d78c14a813 ("xfrm: add xdst pcpu cache") Reported-by: syzbot+e149f7d1328c26f9c12f@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2ef6db98e9ba..bc5eae12fb09 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -975,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; - else - xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1744,6 +1742,8 @@ void xfrm_policy_cache_flush(void) bool found = 0; int cpu; + might_sleep(); + local_bh_disable(); rcu_read_lock(); for_each_possible_cpu(cpu) { -- cgit v1.2.3 From 2023b0524a6310e9ea80daf085f51c71bff9289f Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Thu, 28 Dec 2017 09:27:41 +0100 Subject: backlight: tdo24m: Fix the SPI CS between transfers Currently the LCD display (TD035S) on the cm-x300 platform is broken and remains blank. The TD0245S specification requires that the chipselect is toggled between commands sent to the panel. This was also the purpose of the former patch of commit f64dcac0b124 ("backlight: tdo24m: ensure chip select changes between transfers"). Unfortunately, the "cs_change" field of a SPI transfer is misleading. Its true meaning is that for a SPI message holding multiple transfers, the chip select is toggled between each transfer, but for the last transfer it remains asserted. In this driver, all the SPI messages contain exactly one transfer, which means that each transfer is the last of its message, and as a consequence the chip select is never toggled. Actually, there was a second bug hidding the first one, hence the problem was not seen until v4.6. This problem was fixed by commit a52db659c79c ("spi: pxa2xx: Fix cs_change management") for PXA based boards. This fix makes the TD035S work again on a cm-x300 board. The same applies to other PXA boards, ie. corgi and tosa. Fixes: a52db659c79c ("spi: pxa2xx: Fix cs_change management") Reported-by: Andrea Adami Signed-off-by: Robert Jarzmik Acked-by: Daniel Thompson Signed-off-by: Lee Jones --- drivers/video/backlight/corgi_lcd.c | 2 +- drivers/video/backlight/tdo24m.c | 2 +- drivers/video/backlight/tosa_lcd.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c index d7c239ea3d09..f5574060f9c8 100644 --- a/drivers/video/backlight/corgi_lcd.c +++ b/drivers/video/backlight/corgi_lcd.c @@ -177,7 +177,7 @@ static int corgi_ssp_lcdtg_send(struct corgi_lcd *lcd, int adrs, uint8_t data) struct spi_message msg; struct spi_transfer xfer = { .len = 1, - .cs_change = 1, + .cs_change = 0, .tx_buf = lcd->buf, }; diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c index eab1f842f9c0..e4bd63e9db6b 100644 --- a/drivers/video/backlight/tdo24m.c +++ b/drivers/video/backlight/tdo24m.c @@ -369,7 +369,7 @@ static int tdo24m_probe(struct spi_device *spi) spi_message_init(m); - x->cs_change = 1; + x->cs_change = 0; x->tx_buf = &lcd->buf[0]; spi_message_add_tail(x, m); diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c index 6a41ea92737a..4dc5ee8debeb 100644 --- a/drivers/video/backlight/tosa_lcd.c +++ b/drivers/video/backlight/tosa_lcd.c @@ -49,7 +49,7 @@ static int tosa_tg_send(struct spi_device *spi, int adrs, uint8_t data) struct spi_message msg; struct spi_transfer xfer = { .len = 1, - .cs_change = 1, + .cs_change = 0, .tx_buf = buf, }; -- cgit v1.2.3 From da911b1f5e98f21b20aa042748dfe73e36322fb1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 5 Jan 2018 16:50:08 +0800 Subject: ALSA: hda/realtek - update ALC225 depop optimize Add ALC225 its own depop functions for alc_init and alc_shutup. Add depop optimize step for headset mode functions. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 101 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b2037131eed9..440972975bd4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3166,6 +3166,93 @@ static void alc256_shutup(struct hda_codec *codec) snd_hda_shutup_pins(codec); } +static void alc225_init(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) + return; + + msleep(30); + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x16); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(2); + + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x16, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(85); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x16, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(100); + + alc_update_coef_idx(codec, 0x4a, 3 << 10, 0); + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ +} + +static void alc225_shutup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + hda_nid_t hp_pin = spec->gen.autocfg.hp_pins[0]; + bool hp1_pin_sense, hp2_pin_sense; + + if (!hp_pin) { + alc269_shutup(codec); + return; + } + + /* 3k pull low control for Headset jack. */ + alc_update_coef_idx(codec, 0x4a, 0, 3 << 10); + + hp1_pin_sense = snd_hda_jack_detect(codec, hp_pin); + hp2_pin_sense = snd_hda_jack_detect(codec, 0x16); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(2); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x16, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(85); + + if (hp1_pin_sense) + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + if (hp2_pin_sense) + snd_hda_codec_write(codec, 0x16, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + + if (hp1_pin_sense || hp2_pin_sense) + msleep(100); + + alc_auto_setup_eapd(codec, false); + snd_hda_shutup_pins(codec); +} + static void alc_default_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; @@ -4569,6 +4656,12 @@ static void alc_determine_headset_type(struct hda_codec *codec) case 0x10ec0225: case 0x10ec0295: case 0x10ec0299: + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); + msleep(80); + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); + alc_process_coef_fw(codec, alc225_pre_hsmode); alc_update_coef_idx(codec, 0x67, 0xf000, 0x1000); val = alc_read_coef_idx(codec, 0x45); @@ -4588,6 +4681,12 @@ static void alc_determine_headset_type(struct hda_codec *codec) alc_update_coef_idx(codec, 0x4a, 7<<6, 7<<6); alc_update_coef_idx(codec, 0x4a, 3<<4, 3<<4); alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); + + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + msleep(80); + snd_hda_codec_write(codec, 0x21, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); break; case 0x10ec0867: is_ctia = true; @@ -6925,6 +7024,8 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0295: case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; + spec->shutup = alc225_shutup; + spec->init_hook = alc225_init; spec->gen.mixer_nid = 0; /* no loopback on ALC225, ALC295 and ALC299 */ break; case 0x10ec0234: -- cgit v1.2.3 From 6b018235b4daabae96d855219fae59c3fb8be417 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Fri, 5 Jan 2018 12:44:06 -0500 Subject: nvme-fabrics: initialize default host->id in nvmf_host_default() The field was uninitialized before use. Signed-off-by: Ewan D. Milne Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 76b4fe6816a0..894c2ccb3891 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -74,6 +74,7 @@ static struct nvmf_host *nvmf_host_default(void) return NULL; kref_init(&host->ref); + uuid_gen(&host->id); snprintf(host->nqn, NVMF_NQN_SIZE, "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); -- cgit v1.2.3 From cb4876e8ce1c6d78306c206df1970748ebb89025 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Nov 2017 23:29:05 +0200 Subject: nvmet-rdma: removed queue cleanup from module exit We already do that when we are notified in device removal which is triggered when unregistering as an ib client. Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 0e4c15754c58..454a5dce81f8 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1503,25 +1503,9 @@ err_ib_client: static void __exit nvmet_rdma_exit(void) { - struct nvmet_rdma_queue *queue; - nvmet_unregister_transport(&nvmet_rdma_ops); - - flush_scheduled_work(); - - mutex_lock(&nvmet_rdma_queue_mutex); - while ((queue = list_first_entry_or_null(&nvmet_rdma_queue_list, - struct nvmet_rdma_queue, queue_list))) { - list_del_init(&queue->queue_list); - - mutex_unlock(&nvmet_rdma_queue_mutex); - __nvmet_rdma_queue_disconnect(queue); - mutex_lock(&nvmet_rdma_queue_mutex); - } - mutex_unlock(&nvmet_rdma_queue_mutex); - - flush_scheduled_work(); ib_unregister_client(&nvmet_rdma_ib_client); + WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); ida_destroy(&nvmet_rdma_queue_ida); } -- cgit v1.2.3 From 424125a09db7a207ab53876db50a6198ca88518f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 Dec 2017 10:47:10 +0200 Subject: nvmet-rdma: lowering log level for chatty debug messages It is a bit chatty to report on every deleted queue, so keep it for debug purposes only. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 454a5dce81f8..978e169c11bf 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -921,7 +921,7 @@ static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) { - pr_info("freeing queue %d\n", queue->idx); + pr_debug("freeing queue %d\n", queue->idx); nvmet_sq_destroy(&queue->nvme_sq); -- cgit v1.2.3 From 6a1c57acab85e2e7a18827b43710b4e16c11148d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 4 Dec 2017 10:47:09 +0200 Subject: nvmet: lower log level for each queue creation It is a bit chatty to report on each queue, log it only for debug purposes. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index db3bf6b8bf9e..19e9e42ae943 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -225,7 +225,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out_ctrl_put; } - pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); + pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: kfree(d); -- cgit v1.2.3 From 278e096063f1914fccfc77a617be9fc8dbb31b0e Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:30 -0800 Subject: nvme_fcloop: fix abort race condition A test case revealed a race condition of an i/o completing on a thread parallel to the delete_association generating the aborts for the outstanding ios on the controller. The i/o completion was freeing the target fcloop context, thus the abort task referenced the just-freed memory. Correct by clearing the target/initiator cross pointers in the io completion and abort tasks before calling the callbacks. On aborts that detect already finished io's, ensure the complete context is called. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 7b75d9de55ab..3eb2a0733f46 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -370,6 +370,7 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) spin_lock(&tfcp_req->reqlock); fcpreq = tfcp_req->fcpreq; + tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); if (tport->remoteport && fcpreq) { @@ -611,11 +612,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, if (!tfcp_req) /* abort has already been called */ - return; - - if (rport->targetport) - nvmet_fc_rcv_fcp_abort(rport->targetport, - &tfcp_req->tgt_fcp_req); + goto finish; /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); @@ -623,6 +620,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + +finish: /* post the aborted io completion */ fcpreq->status = -ECANCELED; schedule_work(&inireq->iniwork); -- cgit v1.2.3 From 6fda20283e55b9d288cd56822ce39fc8e64f2208 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:31 -0800 Subject: nvme_fcloop: disassocate local port structs The current fcloop driver gets its lport structure from the private area co-allocated with the fc_localport. All is fine except the teardown path, which wants to wait on the completion, which is marked complete by the delete_localport callback performed after unregister_localport. The issue is, the nvme_fc transport frees the localport structure immediately after delete_localport is called, meaning the original routine is trying to wait on a complete that was just freed. Change such that a lport struct is allocated coincident with the addition and registration of a localport. The private area of the localport now contains just a backpointer to the real lport struct. Now, the completion can be waited for, and after completing, the new structure can be kfree'd. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 3eb2a0733f46..c0080f6ab2f5 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -204,6 +204,10 @@ struct fcloop_lport { struct completion unreg_done; }; +struct fcloop_lport_priv { + struct fcloop_lport *lport; +}; + struct fcloop_rport { struct nvme_fc_remote_port *remoteport; struct nvmet_fc_target_port *targetport; @@ -659,7 +663,8 @@ fcloop_nport_get(struct fcloop_nport *nport) static void fcloop_localport_delete(struct nvme_fc_local_port *localport) { - struct fcloop_lport *lport = localport->private; + struct fcloop_lport_priv *lport_priv = localport->private; + struct fcloop_lport *lport = lport_priv->lport; /* release any threads waiting for the unreg to complete */ complete(&lport->unreg_done); @@ -699,7 +704,7 @@ static struct nvme_fc_port_template fctemplate = { .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* sizes of additional private data for data structures */ - .local_priv_sz = sizeof(struct fcloop_lport), + .local_priv_sz = sizeof(struct fcloop_lport_priv), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), @@ -730,11 +735,17 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, struct fcloop_ctrl_options *opts; struct nvme_fc_local_port *localport; struct fcloop_lport *lport; - int ret; + struct fcloop_lport_priv *lport_priv; + unsigned long flags; + int ret = -ENOMEM; + + lport = kzalloc(sizeof(*lport), GFP_KERNEL); + if (!lport) + return -ENOMEM; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) - return -ENOMEM; + goto out_free_lport; ret = fcloop_parse_options(opts, buf); if (ret) @@ -754,23 +765,25 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, ret = nvme_fc_register_localport(&pinfo, &fctemplate, NULL, &localport); if (!ret) { - unsigned long flags; - /* success */ - lport = localport->private; + lport_priv = localport->private; + lport_priv->lport = lport; + lport->localport = localport; INIT_LIST_HEAD(&lport->lport_list); spin_lock_irqsave(&fcloop_lock, flags); list_add_tail(&lport->lport_list, &fcloop_lports); spin_unlock_irqrestore(&fcloop_lock, flags); - - /* mark all of the input buffer consumed */ - ret = count; } out_free_opts: kfree(opts); +out_free_lport: + /* free only if we're going to fail */ + if (ret) + kfree(lport); + return ret ? ret : count; } @@ -792,6 +805,8 @@ __wait_localport_unreg(struct fcloop_lport *lport) wait_for_completion(&lport->unreg_done); + kfree(lport); + return ret; } -- cgit v1.2.3 From 24431d60d3fbfd4c8c05e1828e5d9b35db4fd81c Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:32 -0800 Subject: nvme_fcloop: rework to remove xxx_IN_ISR feature flags The existing fcloop driver expects the target side upcalls to the transport to context switch, thus the calls into the nvmet layer are not done in the calling context of the host/initiator down calls. The xxx_IN_ISR feature flags are used to select this logic. The xxx_IN_ISR feature flags should go away in the nvmet_fc transport as no other lldd utilizes them. Both Broadcom and Cavium lldds have their own non-ISR deferred handlers thus the nvmet calls can be made directly. This patch converts the paths that make the target upcalls (command receive, abort receive) such that they schedule a work item rather than expecting the transport to schedule the work item. The patch also cleans up the following: - The completion path from target to host scheduled a host work element called "work". Rename it "tio_done_work" for code clarity. - The abort io path called a iniwork item to call the host side io done. This is no longer needed as the abort routine can make the same call. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 98 ++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c0080f6ab2f5..c5015199c031 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -249,14 +249,15 @@ struct fcloop_fcpreq { u16 status; bool active; bool aborted; - struct work_struct work; + struct work_struct fcp_rcv_work; + struct work_struct abort_rcv_work; + struct work_struct tio_done_work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; struct fcloop_ini_fcpreq { struct nvmefc_fcp_req *fcpreq; struct fcloop_fcpreq *tfcp_req; - struct work_struct iniwork; }; static inline struct fcloop_lsreq * @@ -347,17 +348,58 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, return 0; } -/* - * FCP IO operation done by initiator abort. - * call back up initiator "done" flows. - */ static void -fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +fcloop_fcp_recv_work(struct work_struct *work) +{ + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, fcp_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct fcloop_ini_fcpreq *inireq = NULL; + int ret = 0; + + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); + if (ret) { + inireq = fcpreq->private; + inireq->tfcp_req = NULL; + + fcpreq->status = tfcp_req->status; + fcpreq->done(fcpreq); + } +} + +static void +fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, + struct fcloop_fcpreq *tfcp_req, int status) +{ + struct fcloop_ini_fcpreq *inireq = NULL; + + if (fcpreq) { + inireq = fcpreq->private; + inireq->tfcp_req = NULL; + + fcpreq->status = status; + fcpreq->done(fcpreq); + } +} + +static void +fcloop_fcp_abort_recv_work(struct work_struct *work) { - struct fcloop_ini_fcpreq *inireq = - container_of(work, struct fcloop_ini_fcpreq, iniwork); + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, abort_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + + if (tfcp_req->tport->targetport) + nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req); + + spin_lock(&tfcp_req->reqlock); + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); - inireq->fcpreq->done(inireq->fcpreq); + fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); } /* @@ -368,8 +410,7 @@ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = - container_of(work, struct fcloop_fcpreq, work); - struct fcloop_tport *tport = tfcp_req->tport; + container_of(work, struct fcloop_fcpreq, tio_done_work); struct nvmefc_fcp_req *fcpreq; spin_lock(&tfcp_req->reqlock); @@ -377,10 +418,7 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); - if (tport->remoteport && fcpreq) { - fcpreq->status = tfcp_req->status; - fcpreq->done(fcpreq); - } + fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); kfree(tfcp_req); } @@ -395,7 +433,6 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, struct fcloop_rport *rport = remoteport->private; struct fcloop_ini_fcpreq *inireq = fcpreq->private; struct fcloop_fcpreq *tfcp_req; - int ret = 0; if (!rport->targetport) return -ECONNREFUSED; @@ -406,16 +443,16 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, inireq->fcpreq = fcpreq; inireq->tfcp_req = tfcp_req; - INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; spin_lock_init(&tfcp_req->reqlock); - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); + INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); + INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); - ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); + schedule_work(&tfcp_req->fcp_rcv_work); - return ret; + return 0; } static void @@ -594,7 +631,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - schedule_work(&tfcp_req->work); + schedule_work(&tfcp_req->tio_done_work); } static void @@ -610,13 +647,12 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_rport *rport = remoteport->private; struct fcloop_ini_fcpreq *inireq = fcpreq->private; struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; if (!tfcp_req) /* abort has already been called */ - goto finish; + return; /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); @@ -624,14 +660,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, tfcp_req->fcpreq = NULL; spin_unlock(&tfcp_req->reqlock); - if (rport->targetport) - nvmet_fc_rcv_fcp_abort(rport->targetport, - &tfcp_req->tgt_fcp_req); - -finish: - /* post the aborted io completion */ - fcpreq->status = -ECANCELED; - schedule_work(&inireq->iniwork); + WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); } static void @@ -721,8 +750,7 @@ static struct nvmet_fc_target_template tgttemplate = { .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | - NVMET_FCTGTFEAT_OPDONE_IN_ISR, + .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; -- cgit v1.2.3 From b6f807738b5e3a24eda3ea6864abc18d10279e69 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 16:47:33 -0800 Subject: nvme_fcloop: refactor host/target io job access The split between what the host accesses on its flows vs what the target side accesses was flawed. Abort handling didn't properly clear initiator vs target structure cross-reference and locks weren't used for synchronization. Thus, there were issues of freeing structures too soon and access after free. A couple of these existed pre the IN_ISR mods, but when the target upcalls were converted to work items, thus adding delays between the 2 sides of accesses, the problems became pronounced. Resolve by: - tracking io state mainly in the tgt-side io structure. - make the tgt-side io structure released by reference not by code flow. - when changing initiator structures, use locks for synchronization - aborts are clearly tracked for which side saw the abort, and after seeing the abort, cross-references are cleared under lock. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 147 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 125 insertions(+), 22 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c5015199c031..9f8a6726df91 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -242,13 +242,22 @@ struct fcloop_lsreq { int status; }; +enum { + INI_IO_START = 0, + INI_IO_ACTIVE = 1, + INI_IO_ABORTED = 2, + INI_IO_COMPLETED = 3, +}; + struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; spinlock_t reqlock; u16 status; + u32 inistate; bool active; bool aborted; + struct kref ref; struct work_struct fcp_rcv_work; struct work_struct abort_rcv_work; struct work_struct tio_done_work; @@ -258,6 +267,7 @@ struct fcloop_fcpreq { struct fcloop_ini_fcpreq { struct nvmefc_fcp_req *fcpreq; struct fcloop_fcpreq *tfcp_req; + spinlock_t inilock; }; static inline struct fcloop_lsreq * @@ -349,24 +359,24 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } static void -fcloop_fcp_recv_work(struct work_struct *work) +fcloop_tfcp_req_free(struct kref *ref) { struct fcloop_fcpreq *tfcp_req = - container_of(work, struct fcloop_fcpreq, fcp_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; - struct fcloop_ini_fcpreq *inireq = NULL; - int ret = 0; + container_of(ref, struct fcloop_fcpreq, ref); - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, - &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); - if (ret) { - inireq = fcpreq->private; - inireq->tfcp_req = NULL; + kfree(tfcp_req); +} - fcpreq->status = tfcp_req->status; - fcpreq->done(fcpreq); - } +static void +fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) +{ + kref_put(&tfcp_req->ref, fcloop_tfcp_req_free); +} + +static int +fcloop_tfcp_req_get(struct fcloop_fcpreq *tfcp_req) +{ + return kref_get_unless_zero(&tfcp_req->ref); } static void @@ -377,11 +387,52 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, if (fcpreq) { inireq = fcpreq->private; + spin_lock(&inireq->inilock); inireq->tfcp_req = NULL; + spin_unlock(&inireq->inilock); fcpreq->status = status; fcpreq->done(fcpreq); } + + /* release original io reference on tgt struct */ + fcloop_tfcp_req_put(tfcp_req); +} + +static void +fcloop_fcp_recv_work(struct work_struct *work) +{ + struct fcloop_fcpreq *tfcp_req = + container_of(work, struct fcloop_fcpreq, fcp_rcv_work); + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + int ret = 0; + bool aborted = false; + + spin_lock(&tfcp_req->reqlock); + switch (tfcp_req->inistate) { + case INI_IO_START: + tfcp_req->inistate = INI_IO_ACTIVE; + break; + case INI_IO_ABORTED: + aborted = true; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(aborted)) + ret = -ECANCELED; + else + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); + if (ret) + fcloop_call_host_done(fcpreq, tfcp_req, ret); + + return; } static void @@ -389,7 +440,29 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, abort_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; + bool completed = false; + + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + switch (tfcp_req->inistate) { + case INI_IO_ABORTED: + break; + case INI_IO_COMPLETED: + completed = true; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(completed)) { + /* remove reference taken in original abort downcall */ + fcloop_tfcp_req_put(tfcp_req); + return; + } if (tfcp_req->tport->targetport) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, @@ -400,6 +473,7 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) spin_unlock(&tfcp_req->reqlock); fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); + /* call_host_done releases reference for abort downcall */ } /* @@ -415,12 +489,10 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) spin_lock(&tfcp_req->reqlock); fcpreq = tfcp_req->fcpreq; - tfcp_req->fcpreq = NULL; + tfcp_req->inistate = INI_IO_COMPLETED; spin_unlock(&tfcp_req->reqlock); fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status); - - kfree(tfcp_req); } @@ -443,12 +515,16 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, inireq->fcpreq = fcpreq; inireq->tfcp_req = tfcp_req; + spin_lock_init(&inireq->inilock); + tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + tfcp_req->inistate = INI_IO_START; spin_lock_init(&tfcp_req->reqlock); INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); + kref_init(&tfcp_req->ref); schedule_work(&tfcp_req->fcp_rcv_work); @@ -648,7 +724,14 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct nvmefc_fcp_req *fcpreq) { struct fcloop_ini_fcpreq *inireq = fcpreq->private; - struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + struct fcloop_fcpreq *tfcp_req; + bool abortio = true; + + spin_lock(&inireq->inilock); + tfcp_req = inireq->tfcp_req; + if (tfcp_req) + fcloop_tfcp_req_get(tfcp_req); + spin_unlock(&inireq->inilock); if (!tfcp_req) /* abort has already been called */ @@ -656,11 +739,31 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, /* break initiator/target relationship for io */ spin_lock(&tfcp_req->reqlock); - inireq->tfcp_req = NULL; - tfcp_req->fcpreq = NULL; + switch (tfcp_req->inistate) { + case INI_IO_START: + case INI_IO_ACTIVE: + tfcp_req->inistate = INI_IO_ABORTED; + break; + case INI_IO_COMPLETED: + abortio = false; + break; + default: + spin_unlock(&tfcp_req->reqlock); + WARN_ON(1); + return; + } spin_unlock(&tfcp_req->reqlock); - WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + if (abortio) + /* leave the reference while the work item is scheduled */ + WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + else { + /* + * as the io has already had the done callback made, + * nothing more to do. So release the reference taken above + */ + fcloop_tfcp_req_put(tfcp_req); + } } static void -- cgit v1.2.3 From 9ce1f2e12e017607fe17a67cea79ebcf0184e5b3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 29 Nov 2017 15:11:55 -0800 Subject: nvmet-fc: cleanup nvmet add_port/remove_port The current fc transport add_port routine validates that there is a matching port to the target port config. It then takes a reference on the targetport. The del_port removes the reference. Unfortunately, if the LLDD undergoes a hw reset or driver unload and wants to unreg the targetport, due to the reference, the targetport effectively can't be removed. It requires the admin to remove the port from the nvmet config first, which calls the del_port. Note: it appears nvmetcli clear skips over the del_port call (I'm not attempting to change that). There's no real reason to take the reference. With FC, there is nothing to enable or disable as the presence of the FC targetport implicitly means its enabled, and removal of the targtport means its disabled. Change add_port to simply validate and change remove_port to a noop. No references are taken on the targetport. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 840d1a39de33..9b39a6cb1935 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2490,14 +2490,8 @@ nvmet_fc_add_port(struct nvmet_port *port) list_for_each_entry(tgtport, &nvmet_fc_target_list, tgt_list) { if ((tgtport->fc_target_port.node_name == traddr.nn) && (tgtport->fc_target_port.port_name == traddr.pn)) { - /* a FC port can only be 1 nvmet port id */ - if (!tgtport->port) { - tgtport->port = port; - port->priv = tgtport; - nvmet_fc_tgtport_get(tgtport); - ret = 0; - } else - ret = -EALREADY; + tgtport->port = port; + ret = 0; break; } } @@ -2508,19 +2502,7 @@ nvmet_fc_add_port(struct nvmet_port *port) static void nvmet_fc_remove_port(struct nvmet_port *port) { - struct nvmet_fc_tgtport *tgtport = port->priv; - unsigned long flags; - bool matched = false; - - spin_lock_irqsave(&nvmet_fc_tgtlock, flags); - if (tgtport->port == port) { - matched = true; - tgtport->port = NULL; - } - spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); - - if (matched) - nvmet_fc_tgtport_put(tgtport); + /* nothing to do */ } static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { -- cgit v1.2.3 From 0de5cd367c6aa2a31a1c931628f778f79f8ef22e Mon Sep 17 00:00:00 2001 From: Roy Shterman Date: Mon, 25 Dec 2017 14:18:30 +0200 Subject: nvme-fabrics: protect against module unload during create_ctrl NVMe transport driver module unload may (and usually does) trigger iteration over the active controllers and delete them all (sometimes under a mutex). However, a controller can be created concurrently with module unload which can lead to leakage of resources (most important char device node leakage) in case the controller creation occured after the unload delete and drain sequence. To protect against this, we take a module reference to guarantee that the nvme transport driver is not unloaded while creating a controller. Signed-off-by: Roy Shterman Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 17 +++++++++++++---- drivers/nvme/host/fabrics.h | 2 ++ drivers/nvme/host/fc.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/target/loop.c | 1 + 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 76b4fe6816a0..2f68befd31bf 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -492,7 +492,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect); */ int nvmf_register_transport(struct nvmf_transport_ops *ops) { - if (!ops->create_ctrl) + if (!ops->create_ctrl || !ops->module) return -EINVAL; down_write(&nvmf_transports_rwsem); @@ -868,32 +868,41 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_unlock; } + if (!try_module_get(ops->module)) { + ret = -EBUSY; + goto out_unlock; + } + ret = nvmf_check_required_opts(opts, ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS | ops->allowed_opts | ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ctrl = ops->create_ctrl(dev, opts); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); - goto out_unlock; + goto out_module_put; } if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subsys->subnqn); + module_put(ops->module); up_read(&nvmf_transports_rwsem); nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); } + module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; +out_module_put: + module_put(ops->module); out_unlock: up_read(&nvmf_transports_rwsem); out_free_opts: diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 9ba614953607..25b19f722f5b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -108,6 +108,7 @@ struct nvmf_ctrl_options { * fabric implementation of NVMe fabrics. * @entry: Used by the fabrics library to add the new * registration entry to its linked-list internal tree. + * @module: Transport module reference * @name: Name of the NVMe fabric driver implementation. * @required_opts: sysfs command-line options that must be specified * when adding a new NVMe controller. @@ -126,6 +127,7 @@ struct nvmf_ctrl_options { */ struct nvmf_transport_ops { struct list_head entry; + struct module *module; const char *name; int required_opts; int allowed_opts; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0a8af4daef89..2a7a9a75105d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3381,6 +3381,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 37af56596be6..75d6956eb380 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2006,6 +2006,7 @@ out_free_ctrl: static struct nvmf_transport_ops nvme_rdma_transport = { .name = "rdma", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 1e21b286f299..fdfcc961029f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -686,6 +686,7 @@ static struct nvmet_fabrics_ops nvme_loop_ops = { static struct nvmf_transport_ops nvme_loop_transport = { .name = "loop", + .module = THIS_MODULE, .create_ctrl = nvme_loop_create_ctrl, }; -- cgit v1.2.3 From 6fbcde6691b514faa963c60f5537332530f1bf0a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Tue, 5 Dec 2017 05:23:54 +0900 Subject: nvme-pci: remove an unnecessary initialization in HMB code The local variable __size__ will be set a bit later in a for-loop. Remove the explicit initialization at the beginning of this function. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5800c3c9082..35331fa0013c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1770,7 +1770,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, dma_addr_t descs_dma; int i = 0; void **bufs; - u64 size = 0, tmp; + u64 size, tmp; tmp = (preferred + chunk_size - 1); do_div(tmp, chunk_size); @@ -1853,7 +1853,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) u64 preferred = (u64)dev->ctrl.hmpre * 4096; u64 min = (u64)dev->ctrl.hmmin * 4096; u32 enable_bits = NVME_HOST_MEM_ENABLE; - int ret = 0; + int ret; preferred = min(preferred, max); if (min > max) { -- cgit v1.2.3 From eca19dc1d84d924544dda0c8d2fd4bb4131affeb Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 13 Nov 2017 12:29:40 +0000 Subject: nvmet: fix error flow in nvmet_alloc_ctrl() Remove the allocated id on error. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b54748ad5f48..07eb45d32a7a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -830,7 +830,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, /* Don't accept keep-alive timeout for discovery controllers */ if (kato) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; - goto out_free_sqs; + goto out_remove_ida; } /* @@ -860,6 +860,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, *ctrlp = ctrl; return 0; +out_remove_ida: + ida_simple_remove(&cntlid_ida, ctrl->cntlid); out_free_sqs: kfree(ctrl->sqs); out_free_cqs: -- cgit v1.2.3 From 6b1943af3f4329c814ec7a651121746d08e6c9ee Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 13 Nov 2017 12:29:41 +0000 Subject: nvmet: rearrange nvmet_ctrl_free() Make it symmetric to nvmet_alloc_ctrl(). Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 07eb45d32a7a..7282ea8d3b96 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -879,21 +879,22 @@ static void nvmet_ctrl_free(struct kref *ref) struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref); struct nvmet_subsys *subsys = ctrl->subsys; - nvmet_stop_keep_alive_timer(ctrl); - mutex_lock(&subsys->lock); list_del(&ctrl->subsys_entry); mutex_unlock(&subsys->lock); + nvmet_stop_keep_alive_timer(ctrl); + flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fatal_err_work); ida_simple_remove(&cntlid_ida, ctrl->cntlid); - nvmet_subsys_put(subsys); kfree(ctrl->sqs); kfree(ctrl->cqs); kfree(ctrl); + + nvmet_subsys_put(subsys); } void nvmet_ctrl_put(struct nvmet_ctrl *ctrl) -- cgit v1.2.3 From 4caff8fc19f10ffb06f095a9cf5a9e755377112e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 31 Dec 2017 14:01:19 +0200 Subject: nvme-pci: don't open-code nvme_reset_ctrl Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 35331fa0013c..add7b18d825d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2498,10 +2498,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; - nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); - queue_work(nvme_wq, &dev->ctrl.reset_work); + nvme_reset_ctrl(&dev->ctrl); + return 0; release_pools: -- cgit v1.2.3 From 1a3838d732eaae47385490de88d978d4132d3d84 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 31 Dec 2017 15:33:27 +0200 Subject: nvme: modify the debug level for setting shutdown timeout When an NVMe controller reports RTD3 Entry Latency larger than the value of shutdown_timeout module parameter, we update the shutdown_timeout accordingly to honor RTD3 Entry Latency. Use an informational debug level instead of a warning level for it. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..2a69d735efbc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2252,7 +2252,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) shutdown_timeout, 60); if (ctrl->shutdown_timeout != shutdown_timeout) - dev_warn(ctrl->device, + dev_info(ctrl->device, "Shutdown timeout set to %u seconds\n", ctrl->shutdown_timeout); } else -- cgit v1.2.3 From 2b1b7e784a63f5ded4dda804e05e3f34b3880b25 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Sat, 6 Jan 2018 08:01:58 +0800 Subject: nvme-pci: fix NULL pointer reference in nvme_alloc_ns When the io queues setup or tagset allocation failed, ctrl.tagset is NULL. But the scan work will still be queued and executed, then panic comes up due to NULL pointer reference of ctrl.tagset. To fix this, add a new ctrl state NVME_CTRL_ADMIN_ONLY to inidcate only admin queue is live. When non io queues or tagset allocation failed, ctrl enters into this state, scan work will not be started. But async event work and nvme dev ioctl will be still available. This will be helpful to do further investigation and recovery. Suggested-by: Sagi Grimberg Signed-off-by: Jianchao Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 25 ++++++++++++++++++++++--- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 30 +++++++++++++++++++++--------- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2a69d735efbc..609307ca9e4d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -232,6 +232,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, old_state = ctrl->state; switch (new_state) { + case NVME_CTRL_ADMIN_ONLY: + switch (old_state) { + case NVME_CTRL_RESETTING: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; case NVME_CTRL_LIVE: switch (old_state) { case NVME_CTRL_NEW: @@ -247,6 +256,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: changed = true; /* FALLTHRU */ default: @@ -266,6 +276,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: switch (old_state) { case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: case NVME_CTRL_RESETTING: case NVME_CTRL_RECONNECTING: changed = true; @@ -2336,8 +2347,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - if (ctrl->state != NVME_CTRL_LIVE) + switch (ctrl->state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: + break; + default: return -EWOULDBLOCK; + } + file->private_data = ctrl; return 0; } @@ -2601,6 +2618,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_ADMIN_ONLY] = "only-admin", [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_RECONNECTING]= "reconnecting", [NVME_CTRL_DELETING] = "deleting", @@ -3073,6 +3091,8 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->state != NVME_CTRL_LIVE) return; + WARN_ON_ONCE(!ctrl->tagset); + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3093,8 +3113,7 @@ static void nvme_scan_work(struct work_struct *work) void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* - * Do not queue new scan work when a controller is reset during - * removal. + * Only new queue scan work when admin and IO queues are both alive */ if (ctrl->state == NVME_CTRL_LIVE) queue_work(nvme_wq, &ctrl->scan_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ea1aa5283e8e..eecf71ce6e75 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -119,6 +119,7 @@ static inline struct nvme_request *nvme_req(struct request *req) enum nvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, + NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_RECONNECTING, NVME_CTRL_DELETING, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index add7b18d825d..62119078c2bf 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2035,13 +2035,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) } /* - * Return: error value if an error occurred setting up the queues or calling - * Identify Device. 0 if these succeeded, even if adding some of the - * namespaces failed. At the moment, these failures are silent. TBD which - * failures should be reported. + * return error value only when tagset allocation failed */ static int nvme_dev_add(struct nvme_dev *dev) { + int ret; + if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; @@ -2057,8 +2056,12 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; - if (blk_mq_alloc_tag_set(&dev->tagset)) - return 0; + ret = blk_mq_alloc_tag_set(&dev->tagset); + if (ret) { + dev_warn(dev->ctrl.device, + "IO queues tagset allocation failed %d\n", ret); + return ret; + } dev->ctrl.tagset = &dev->tagset; nvme_dbbuf_set(dev); @@ -2291,6 +2294,7 @@ static void nvme_reset_work(struct work_struct *work) container_of(work, struct nvme_dev, ctrl.reset_work); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result = -ENODEV; + enum nvme_ctrl_state new_state = NVME_CTRL_LIVE; if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) goto out; @@ -2354,15 +2358,23 @@ static void nvme_reset_work(struct work_struct *work) dev_warn(dev->ctrl.device, "IO queues not created\n"); nvme_kill_queues(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl); + new_state = NVME_CTRL_ADMIN_ONLY; } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - nvme_dev_add(dev); + /* hit this only when allocate tagset fails */ + if (nvme_dev_add(dev)) + new_state = NVME_CTRL_ADMIN_ONLY; nvme_unfreeze(&dev->ctrl); } - if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { - dev_warn(dev->ctrl.device, "failed to mark controller live\n"); + /* + * If only admin queue live, keep it to do further investigation or + * recovery. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { + dev_warn(dev->ctrl.device, + "failed to mark controller state %d\n", new_state); goto out; } -- cgit v1.2.3 From 0d85adb5fbd33daf81276d6fa0f990136eb4bf29 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 3 Jan 2018 15:39:58 -0800 Subject: lib/crc-ccitt: Add CCITT-FALSE CRC16 variant In support of a soon to be published MFD driver using serdev to talk to a supervisory processor that uses the CCITT-FALSE CRC16 variant in it's protocol, this patch was tested successfully on an i.MX6 ARM platform. Link: http://lkml.kernel.org/r/20170413142932.27287-1-andrew.smirnov@gmail.com Signed-off-by: Andrey Vostrikov Signed-off-by: Andrey Smirnov Tested-by: Chris Healy Signed-off-by: Andrew Morton Signed-off-by: Lee Jones --- include/linux/crc-ccitt.h | 7 ++++++ lib/crc-ccitt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index cd4f420231ba..72c92c396bb8 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -5,12 +5,19 @@ #include extern u16 const crc_ccitt_table[256]; +extern u16 const crc_ccitt_false_table[256]; extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len); +extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len); static inline u16 crc_ccitt_byte(u16 crc, const u8 c) { return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } +static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c) +{ + return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; +} + #endif /* _LINUX_CRC_CCITT_H */ diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c index 7f6dd68d2d09..d873b34039ff 100644 --- a/lib/crc-ccitt.c +++ b/lib/crc-ccitt.c @@ -51,8 +51,49 @@ u16 const crc_ccitt_table[256] = { }; EXPORT_SYMBOL(crc_ccitt_table); +/* + * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE + * Reflected bits order, does not augment final value. + */ +u16 const crc_ccitt_false_table[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, + 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, + 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, + 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, + 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, + 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, + 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, + 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, + 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, + 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, + 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, + 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, + 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, + 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, + 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, + 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, + 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, + 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, + 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, + 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, + 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, + 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, + 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 +}; +EXPORT_SYMBOL(crc_ccitt_false_table); + /** - * crc_ccitt - recompute the CRC for the data buffer + * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data + * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer @@ -65,5 +106,20 @@ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) } EXPORT_SYMBOL(crc_ccitt); +/** + * crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant) + * for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + */ +u16 crc_ccitt_false(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc_ccitt_false_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc_ccitt_false); + MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c5ff7de262b6e92ec88a20ea0a0244c29ccdc764 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Dec 2017 22:51:14 -0800 Subject: serdev: Make .remove in struct serdev_device_driver optional Using devres infrastructure it is possible to write a serdev driver that doesn't have any code that needs to be called as a part of .remove. Add code to make .remove optional. Acked-by: Philippe Ombredanne Acked-by: Pavel Machek Acked-by: Rob Herring Reviewed-by: Sebastian Reichel Reviewed-by: Guenter Roeck Signed-off-by: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/tty/serdev/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 1bef39828ca7..34050b439c1f 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -268,8 +268,8 @@ static int serdev_drv_probe(struct device *dev) static int serdev_drv_remove(struct device *dev) { const struct serdev_device_driver *sdrv = to_serdev_device_driver(dev->driver); - - sdrv->remove(to_serdev_device(dev)); + if (sdrv->remove) + sdrv->remove(to_serdev_device(dev)); return 0; } -- cgit v1.2.3 From 2cb67d20cdea1f4da60bbe073d32bd2ba11c22ad Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Dec 2017 22:51:15 -0800 Subject: serdev: Introduce devm_serdev_device_open() Add code implementing managed version of serdev_device_open() for serdev device drivers that "open" the device during driver's lifecycle only once (e.g. opened in .probe() and closed in .remove()). Acked-by: Philippe Ombredanne Acked-by: Pavel Machek Acked-by: Rob Herring Reviewed-by: Sebastian Reichel Reviewed-by: Guenter Roeck Signed-off-by: Andrey Smirnov Signed-off-by: Lee Jones --- Documentation/driver-model/devres.txt | 3 +++ drivers/tty/serdev/core.c | 27 +++++++++++++++++++++++++++ include/linux/serdev.h | 1 + 3 files changed, 31 insertions(+) diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index c180045eb43b..7c1bb3d0c222 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -384,6 +384,9 @@ RESET devm_reset_control_get() devm_reset_controller_register() +SERDEV + devm_serdev_device_open() + SLAVE DMA ENGINE devm_acpi_dma_controller_register() diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index 34050b439c1f..28133dbd2808 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -132,6 +132,33 @@ void serdev_device_close(struct serdev_device *serdev) } EXPORT_SYMBOL_GPL(serdev_device_close); +static void devm_serdev_device_release(struct device *dev, void *dr) +{ + serdev_device_close(*(struct serdev_device **)dr); +} + +int devm_serdev_device_open(struct device *dev, struct serdev_device *serdev) +{ + struct serdev_device **dr; + int ret; + + dr = devres_alloc(devm_serdev_device_release, sizeof(*dr), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + ret = serdev_device_open(serdev); + if (ret) { + devres_free(dr); + return ret; + } + + *dr = serdev; + devres_add(dev, dr); + + return 0; +} +EXPORT_SYMBOL_GPL(devm_serdev_device_open); + void serdev_device_write_wakeup(struct serdev_device *serdev) { complete(&serdev->write_comp); diff --git a/include/linux/serdev.h b/include/linux/serdev.h index e69402d4a8ae..9929063bd45d 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -193,6 +193,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); +int devm_serdev_device_open(struct device *, struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); -- cgit v1.2.3 From 538ee27290fa277f82159f61da1c5f95f6d631e2 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Dec 2017 22:51:16 -0800 Subject: mfd: Add driver for RAVE Supervisory Processor Add a driver for RAVE Supervisory Processor, an MCU implementing various bits of housekeeping functionality (watchdoging, backlight control, LED control, etc) on RAVE family of products by Zodiac Inflight Innovations. This driver implementes core MFD/serdev device as well as communication subroutines necessary for commanding the device. Signed-off-by: Andrey Smirnov Acked-by: Philippe Ombredanne Acked-by: Pavel Machek Reviewed-by: Guenter Roeck Reviewed-by: Andy Shevchenko Tested-by: Chris Healy Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 8 + drivers/mfd/Makefile | 2 + drivers/mfd/rave-sp.c | 710 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rave-sp.h | 60 ++++ 4 files changed, 780 insertions(+) create mode 100644 drivers/mfd/rave-sp.c create mode 100644 include/linux/mfd/rave-sp.h diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 1d20a800e967..ec90d408bfa9 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -1859,5 +1859,13 @@ config MFD_VEXPRESS_SYSREG System Registers are the platform configuration block on the ARM Ltd. Versatile Express board. +config RAVE_SP_CORE + tristate "RAVE SP MCU core driver" + depends on SERIAL_DEV_BUS + select CRC_CCITT + help + Select this to get support for the Supervisory Processor + device found on several devices in RAVE line of hardware. + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index d9474ade32e6..61abc297b97c 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -230,3 +230,5 @@ obj-$(CONFIG_MFD_STM32_LPTIMER) += stm32-lptimer.o obj-$(CONFIG_MFD_STM32_TIMERS) += stm32-timers.o obj-$(CONFIG_MFD_MXS_LRADC) += mxs-lradc.o obj-$(CONFIG_MFD_SC27XX_PMIC) += sprd-sc27xx-spi.o +obj-$(CONFIG_RAVE_SP_CORE) += rave-sp.o + diff --git a/drivers/mfd/rave-sp.c b/drivers/mfd/rave-sp.c new file mode 100644 index 000000000000..5c858e784a89 --- /dev/null +++ b/drivers/mfd/rave-sp.c @@ -0,0 +1,710 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Multifunction core driver for Zodiac Inflight Innovations RAVE + * Supervisory Processor(SP) MCU that is connected via dedicated UART + * port + * + * Copyright (C) 2017 Zodiac Inflight Innovations + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * UART protocol using following entities: + * - message to MCU => ACK response + * - event from MCU => event ACK + * + * Frame structure: + * + * Where: + * - STX - is start of transmission character + * - ETX - end of transmission + * - DATA - payload + * - CHECKSUM - checksum calculated on + * + * If or contain one of control characters, then it is + * escaped using control code. Added does not participate in + * checksum calculation. + */ +#define RAVE_SP_STX 0x02 +#define RAVE_SP_ETX 0x03 +#define RAVE_SP_DLE 0x10 + +#define RAVE_SP_MAX_DATA_SIZE 64 +#define RAVE_SP_CHECKSUM_SIZE 2 /* Worst case scenario on RDU2 */ +/* + * We don't store STX, ETX and unescaped bytes, so Rx is only + * DATA + CSUM + */ +#define RAVE_SP_RX_BUFFER_SIZE \ + (RAVE_SP_MAX_DATA_SIZE + RAVE_SP_CHECKSUM_SIZE) + +#define RAVE_SP_STX_ETX_SIZE 2 +/* + * For Tx we have to have space for everything, STX, EXT and + * potentially stuffed DATA + CSUM data + csum + */ +#define RAVE_SP_TX_BUFFER_SIZE \ + (RAVE_SP_STX_ETX_SIZE + 2 * RAVE_SP_RX_BUFFER_SIZE) + +#define RAVE_SP_BOOT_SOURCE_GET 0 +#define RAVE_SP_BOOT_SOURCE_SET 1 + +#define RAVE_SP_RDU2_BOARD_TYPE_RMB 0 +#define RAVE_SP_RDU2_BOARD_TYPE_DEB 1 + +#define RAVE_SP_BOOT_SOURCE_SD 0 +#define RAVE_SP_BOOT_SOURCE_EMMC 1 +#define RAVE_SP_BOOT_SOURCE_NOR 2 + +/** + * enum rave_sp_deframer_state - Possible state for de-framer + * + * @RAVE_SP_EXPECT_SOF: Scanning input for start-of-frame marker + * @RAVE_SP_EXPECT_DATA: Got start of frame marker, collecting frame + * @RAVE_SP_EXPECT_ESCAPED_DATA: Got escape character, collecting escaped byte + */ +enum rave_sp_deframer_state { + RAVE_SP_EXPECT_SOF, + RAVE_SP_EXPECT_DATA, + RAVE_SP_EXPECT_ESCAPED_DATA, +}; + +/** + * struct rave_sp_deframer - Device protocol deframer + * + * @state: Current state of the deframer + * @data: Buffer used to collect deframed data + * @length: Number of bytes de-framed so far + */ +struct rave_sp_deframer { + enum rave_sp_deframer_state state; + unsigned char data[RAVE_SP_RX_BUFFER_SIZE]; + size_t length; +}; + +/** + * struct rave_sp_reply - Reply as per RAVE device protocol + * + * @length: Expected reply length + * @data: Buffer to store reply payload in + * @code: Expected reply code + * @ackid: Expected reply ACK ID + * @completion: Successful reply reception completion + */ +struct rave_sp_reply { + size_t length; + void *data; + u8 code; + u8 ackid; + struct completion received; +}; + +/** + * struct rave_sp_checksum - Variant specific checksum implementation details + * + * @length: Caculated checksum length + * @subroutine: Utilized checksum algorithm implementation + */ +struct rave_sp_checksum { + size_t length; + void (*subroutine)(const u8 *, size_t, u8 *); +}; + +/** + * struct rave_sp_variant_cmds - Variant specific command routines + * + * @translate: Generic to variant specific command mapping routine + * + */ +struct rave_sp_variant_cmds { + int (*translate)(enum rave_sp_command); +}; + +/** + * struct rave_sp_variant - RAVE supervisory processor core variant + * + * @checksum: Variant specific checksum implementation + * @cmd: Variant specific command pointer table + * + */ +struct rave_sp_variant { + const struct rave_sp_checksum *checksum; + struct rave_sp_variant_cmds cmd; +}; + +/** + * struct rave_sp - RAVE supervisory processor core + * + * @serdev: Pointer to underlying serdev + * @deframer: Stored state of the protocol deframer + * @ackid: ACK ID used in last reply sent to the device + * @bus_lock: Lock to serialize access to the device + * @reply_lock: Lock protecting @reply + * @reply: Pointer to memory to store reply payload + * + * @variant: Device variant specific information + * @event_notifier_list: Input event notification chain + * + */ +struct rave_sp { + struct serdev_device *serdev; + struct rave_sp_deframer deframer; + atomic_t ackid; + struct mutex bus_lock; + struct mutex reply_lock; + struct rave_sp_reply *reply; + + const struct rave_sp_variant *variant; + struct blocking_notifier_head event_notifier_list; +}; + +static bool rave_sp_id_is_event(u8 code) +{ + return (code & 0xF0) == RAVE_SP_EVNT_BASE; +} + +static void rave_sp_unregister_event_notifier(struct device *dev, void *res) +{ + struct rave_sp *sp = dev_get_drvdata(dev->parent); + struct notifier_block *nb = *(struct notifier_block **)res; + struct blocking_notifier_head *bnh = &sp->event_notifier_list; + + WARN_ON(blocking_notifier_chain_unregister(bnh, nb)); +} + +int devm_rave_sp_register_event_notifier(struct device *dev, + struct notifier_block *nb) +{ + struct rave_sp *sp = dev_get_drvdata(dev->parent); + struct notifier_block **rcnb; + int ret; + + rcnb = devres_alloc(rave_sp_unregister_event_notifier, + sizeof(*rcnb), GFP_KERNEL); + if (!rcnb) + return -ENOMEM; + + ret = blocking_notifier_chain_register(&sp->event_notifier_list, nb); + if (!ret) { + *rcnb = nb; + devres_add(dev, rcnb); + } else { + devres_free(rcnb); + } + + return ret; +} +EXPORT_SYMBOL_GPL(devm_rave_sp_register_event_notifier); + +static void csum_8b2c(const u8 *buf, size_t size, u8 *crc) +{ + *crc = *buf++; + size--; + + while (size--) + *crc += *buf++; + + *crc = 1 + ~(*crc); +} + +static void csum_ccitt(const u8 *buf, size_t size, u8 *crc) +{ + const u16 calculated = crc_ccitt_false(0xffff, buf, size); + + /* + * While the rest of the wire protocol is little-endian, + * CCITT-16 CRC in RDU2 device is sent out in big-endian order. + */ + put_unaligned_be16(calculated, crc); +} + +static void *stuff(unsigned char *dest, const unsigned char *src, size_t n) +{ + while (n--) { + const unsigned char byte = *src++; + + switch (byte) { + case RAVE_SP_STX: + case RAVE_SP_ETX: + case RAVE_SP_DLE: + *dest++ = RAVE_SP_DLE; + /* FALLTHROUGH */ + default: + *dest++ = byte; + } + } + + return dest; +} + +static int rave_sp_write(struct rave_sp *sp, const u8 *data, u8 data_size) +{ + const size_t checksum_length = sp->variant->checksum->length; + unsigned char frame[RAVE_SP_TX_BUFFER_SIZE]; + unsigned char crc[RAVE_SP_CHECKSUM_SIZE]; + unsigned char *dest = frame; + size_t length; + + if (WARN_ON(checksum_length > sizeof(crc))) + return -ENOMEM; + + if (WARN_ON(data_size > sizeof(frame))) + return -ENOMEM; + + sp->variant->checksum->subroutine(data, data_size, crc); + + *dest++ = RAVE_SP_STX; + dest = stuff(dest, data, data_size); + dest = stuff(dest, crc, checksum_length); + *dest++ = RAVE_SP_ETX; + + length = dest - frame; + + print_hex_dump(KERN_DEBUG, "rave-sp tx: ", DUMP_PREFIX_NONE, + 16, 1, frame, length, false); + + return serdev_device_write(sp->serdev, frame, length, HZ); +} + +static u8 rave_sp_reply_code(u8 command) +{ + /* + * There isn't a single rule that describes command code -> + * ACK code transformation, but, going through various + * versions of ICDs, there appear to be three distinct groups + * that can be described by simple transformation. + */ + switch (command) { + case 0xA0 ... 0xBE: + /* + * Commands implemented by firmware found in RDU1 and + * older devices all seem to obey the following rule + */ + return command + 0x20; + case 0xE0 ... 0xEF: + /* + * Events emitted by all versions of the firmare use + * least significant bit to get an ACK code + */ + return command | 0x01; + default: + /* + * Commands implemented by firmware found in RDU2 are + * similar to "old" commands, but they use slightly + * different offset + */ + return command + 0x40; + } +} + +int rave_sp_exec(struct rave_sp *sp, + void *__data, size_t data_size, + void *reply_data, size_t reply_data_size) +{ + struct rave_sp_reply reply = { + .data = reply_data, + .length = reply_data_size, + .received = COMPLETION_INITIALIZER_ONSTACK(reply.received), + }; + unsigned char *data = __data; + int command, ret = 0; + u8 ackid; + + command = sp->variant->cmd.translate(data[0]); + if (command < 0) + return command; + + ackid = atomic_inc_return(&sp->ackid); + reply.ackid = ackid; + reply.code = rave_sp_reply_code((u8)command), + + mutex_lock(&sp->bus_lock); + + mutex_lock(&sp->reply_lock); + sp->reply = &reply; + mutex_unlock(&sp->reply_lock); + + data[0] = command; + data[1] = ackid; + + rave_sp_write(sp, data, data_size); + + if (!wait_for_completion_timeout(&reply.received, HZ)) { + dev_err(&sp->serdev->dev, "Command timeout\n"); + ret = -ETIMEDOUT; + + mutex_lock(&sp->reply_lock); + sp->reply = NULL; + mutex_unlock(&sp->reply_lock); + } + + mutex_unlock(&sp->bus_lock); + return ret; +} +EXPORT_SYMBOL_GPL(rave_sp_exec); + +static void rave_sp_receive_event(struct rave_sp *sp, + const unsigned char *data, size_t length) +{ + u8 cmd[] = { + [0] = rave_sp_reply_code(data[0]), + [1] = data[1], + }; + + rave_sp_write(sp, cmd, sizeof(cmd)); + + blocking_notifier_call_chain(&sp->event_notifier_list, + rave_sp_action_pack(data[0], data[2]), + NULL); +} + +static void rave_sp_receive_reply(struct rave_sp *sp, + const unsigned char *data, size_t length) +{ + struct device *dev = &sp->serdev->dev; + struct rave_sp_reply *reply; + const size_t payload_length = length - 2; + + mutex_lock(&sp->reply_lock); + reply = sp->reply; + + if (reply) { + if (reply->code == data[0] && reply->ackid == data[1] && + payload_length >= reply->length) { + /* + * We are relying on memcpy(dst, src, 0) to be a no-op + * when handling commands that have a no-payload reply + */ + memcpy(reply->data, &data[2], reply->length); + complete(&reply->received); + sp->reply = NULL; + } else { + dev_err(dev, "Ignoring incorrect reply\n"); + dev_dbg(dev, "Code: expected = 0x%08x received = 0x%08x\n", + reply->code, data[0]); + dev_dbg(dev, "ACK ID: expected = 0x%08x received = 0x%08x\n", + reply->ackid, data[1]); + dev_dbg(dev, "Length: expected = %zu received = %zu\n", + reply->length, payload_length); + } + } + + mutex_unlock(&sp->reply_lock); +} + +static void rave_sp_receive_frame(struct rave_sp *sp, + const unsigned char *data, + size_t length) +{ + const size_t checksum_length = sp->variant->checksum->length; + const size_t payload_length = length - checksum_length; + const u8 *crc_reported = &data[payload_length]; + struct device *dev = &sp->serdev->dev; + u8 crc_calculated[checksum_length]; + + print_hex_dump(KERN_DEBUG, "rave-sp rx: ", DUMP_PREFIX_NONE, + 16, 1, data, length, false); + + if (unlikely(length <= checksum_length)) { + dev_warn(dev, "Dropping short frame\n"); + return; + } + + sp->variant->checksum->subroutine(data, payload_length, + crc_calculated); + + if (memcmp(crc_calculated, crc_reported, checksum_length)) { + dev_warn(dev, "Dropping bad frame\n"); + return; + } + + if (rave_sp_id_is_event(data[0])) + rave_sp_receive_event(sp, data, length); + else + rave_sp_receive_reply(sp, data, length); +} + +static int rave_sp_receive_buf(struct serdev_device *serdev, + const unsigned char *buf, size_t size) +{ + struct device *dev = &serdev->dev; + struct rave_sp *sp = dev_get_drvdata(dev); + struct rave_sp_deframer *deframer = &sp->deframer; + const unsigned char *src = buf; + const unsigned char *end = buf + size; + + while (src < end) { + const unsigned char byte = *src++; + + switch (deframer->state) { + case RAVE_SP_EXPECT_SOF: + if (byte == RAVE_SP_STX) + deframer->state = RAVE_SP_EXPECT_DATA; + break; + + case RAVE_SP_EXPECT_DATA: + /* + * Treat special byte values first + */ + switch (byte) { + case RAVE_SP_ETX: + rave_sp_receive_frame(sp, + deframer->data, + deframer->length); + /* + * Once we extracted a complete frame + * out of a stream, we call it done + * and proceed to bailing out while + * resetting the framer to initial + * state, regardless if we've consumed + * all of the stream or not. + */ + goto reset_framer; + case RAVE_SP_STX: + dev_warn(dev, "Bad frame: STX before ETX\n"); + /* + * If we encounter second "start of + * the frame" marker before seeing + * corresponding "end of frame", we + * reset the framer and ignore both: + * frame started by first SOF and + * frame started by current SOF. + * + * NOTE: The above means that only the + * frame started by third SOF, sent + * after this one will have a chance + * to get throught. + */ + goto reset_framer; + case RAVE_SP_DLE: + deframer->state = RAVE_SP_EXPECT_ESCAPED_DATA; + /* + * If we encounter escape sequence we + * need to skip it and collect the + * byte that follows. We do it by + * forcing the next iteration of the + * encompassing while loop. + */ + continue; + } + /* + * For the rest of the bytes, that are not + * speical snoflakes, we do the same thing + * that we do to escaped data - collect it in + * deframer buffer + */ + + /* FALLTHROUGH */ + + case RAVE_SP_EXPECT_ESCAPED_DATA: + deframer->data[deframer->length++] = byte; + + if (deframer->length == sizeof(deframer->data)) { + dev_warn(dev, "Bad frame: Too long\n"); + /* + * If the amount of data we've + * accumulated for current frame so + * far starts to exceed the capacity + * of deframer's buffer, there's + * nothing else we can do but to + * discard that data and start + * assemblying a new frame again + */ + goto reset_framer; + } + + /* + * We've extracted out special byte, now we + * can go back to regular data collecting + */ + deframer->state = RAVE_SP_EXPECT_DATA; + break; + } + } + + /* + * The only way to get out of the above loop and end up here + * is throught consuming all of the supplied data, so here we + * report that we processed it all. + */ + return size; + +reset_framer: + /* + * NOTE: A number of codepaths that will drop us here will do + * so before consuming all 'size' bytes of the data passed by + * serdev layer. We rely on the fact that serdev layer will + * re-execute this handler with the remainder of the Rx bytes + * once we report actual number of bytes that we processed. + */ + deframer->state = RAVE_SP_EXPECT_SOF; + deframer->length = 0; + + return src - buf; +} + +static int rave_sp_rdu1_cmd_translate(enum rave_sp_command command) +{ + if (command >= RAVE_SP_CMD_STATUS && + command <= RAVE_SP_CMD_CONTROL_EVENTS) + return command; + + return -EINVAL; +} + +static int rave_sp_rdu2_cmd_translate(enum rave_sp_command command) +{ + if (command >= RAVE_SP_CMD_GET_FIRMWARE_VERSION && + command <= RAVE_SP_CMD_GET_GPIO_STATE) + return command; + + if (command == RAVE_SP_CMD_REQ_COPPER_REV) { + /* + * As per RDU2 ICD 3.4.47 CMD_GET_COPPER_REV code is + * different from that for RDU1 and it is set to 0x28. + */ + return 0x28; + } + + return rave_sp_rdu1_cmd_translate(command); +} + +static int rave_sp_default_cmd_translate(enum rave_sp_command command) +{ + /* + * All of the following command codes were taken from "Table : + * Communications Protocol Message Types" in section 3.3 + * "MESSAGE TYPES" of Rave PIC24 ICD. + */ + switch (command) { + case RAVE_SP_CMD_GET_FIRMWARE_VERSION: + return 0x11; + case RAVE_SP_CMD_GET_BOOTLOADER_VERSION: + return 0x12; + case RAVE_SP_CMD_BOOT_SOURCE: + return 0x14; + case RAVE_SP_CMD_SW_WDT: + return 0x1C; + case RAVE_SP_CMD_RESET: + return 0x1E; + case RAVE_SP_CMD_RESET_REASON: + return 0x1F; + default: + return -EINVAL; + } +} + +static const struct rave_sp_checksum rave_sp_checksum_8b2c = { + .length = 1, + .subroutine = csum_8b2c, +}; + +static const struct rave_sp_checksum rave_sp_checksum_ccitt = { + .length = 2, + .subroutine = csum_ccitt, +}; + +static const struct rave_sp_variant rave_sp_legacy = { + .checksum = &rave_sp_checksum_8b2c, + .cmd = { + .translate = rave_sp_default_cmd_translate, + }, +}; + +static const struct rave_sp_variant rave_sp_rdu1 = { + .checksum = &rave_sp_checksum_8b2c, + .cmd = { + .translate = rave_sp_rdu1_cmd_translate, + }, +}; + +static const struct rave_sp_variant rave_sp_rdu2 = { + .checksum = &rave_sp_checksum_ccitt, + .cmd = { + .translate = rave_sp_rdu2_cmd_translate, + }, +}; + +static const struct of_device_id rave_sp_dt_ids[] = { + { .compatible = "zii,rave-sp-niu", .data = &rave_sp_legacy }, + { .compatible = "zii,rave-sp-mezz", .data = &rave_sp_legacy }, + { .compatible = "zii,rave-sp-esb", .data = &rave_sp_legacy }, + { .compatible = "zii,rave-sp-rdu1", .data = &rave_sp_rdu1 }, + { .compatible = "zii,rave-sp-rdu2", .data = &rave_sp_rdu2 }, + { /* sentinel */ } +}; + +static const struct serdev_device_ops rave_sp_serdev_device_ops = { + .receive_buf = rave_sp_receive_buf, + .write_wakeup = serdev_device_write_wakeup, +}; + +static int rave_sp_probe(struct serdev_device *serdev) +{ + struct device *dev = &serdev->dev; + struct rave_sp *sp; + u32 baud; + int ret; + + if (of_property_read_u32(dev->of_node, "current-speed", &baud)) { + dev_err(dev, + "'current-speed' is not specified in device node\n"); + return -EINVAL; + } + + sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL); + if (!sp) + return -ENOMEM; + + sp->serdev = serdev; + dev_set_drvdata(dev, sp); + + sp->variant = of_device_get_match_data(dev); + if (!sp->variant) + return -ENODEV; + + mutex_init(&sp->bus_lock); + mutex_init(&sp->reply_lock); + BLOCKING_INIT_NOTIFIER_HEAD(&sp->event_notifier_list); + + serdev_device_set_client_ops(serdev, &rave_sp_serdev_device_ops); + ret = devm_serdev_device_open(dev, serdev); + if (ret) + return ret; + + serdev_device_set_baudrate(serdev, baud); + + return devm_of_platform_populate(dev); +} + +MODULE_DEVICE_TABLE(of, rave_sp_dt_ids); + +static struct serdev_device_driver rave_sp_drv = { + .probe = rave_sp_probe, + .driver = { + .name = "rave-sp", + .of_match_table = rave_sp_dt_ids, + }, +}; +module_serdev_device_driver(rave_sp_drv); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Andrey Vostrikov "); +MODULE_AUTHOR("Nikita Yushchenko "); +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("RAVE SP core driver"); diff --git a/include/linux/mfd/rave-sp.h b/include/linux/mfd/rave-sp.h new file mode 100644 index 000000000000..796fb9794c9e --- /dev/null +++ b/include/linux/mfd/rave-sp.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +/* + * Core definitions for RAVE SP MFD driver. + * + * Copyright (C) 2017 Zodiac Inflight Innovations + */ + +#ifndef _LINUX_RAVE_SP_H_ +#define _LINUX_RAVE_SP_H_ + +#include + +enum rave_sp_command { + RAVE_SP_CMD_GET_FIRMWARE_VERSION = 0x20, + RAVE_SP_CMD_GET_BOOTLOADER_VERSION = 0x21, + RAVE_SP_CMD_BOOT_SOURCE = 0x26, + RAVE_SP_CMD_GET_BOARD_COPPER_REV = 0x2B, + RAVE_SP_CMD_GET_GPIO_STATE = 0x2F, + + RAVE_SP_CMD_STATUS = 0xA0, + RAVE_SP_CMD_SW_WDT = 0xA1, + RAVE_SP_CMD_PET_WDT = 0xA2, + RAVE_SP_CMD_RESET = 0xA7, + RAVE_SP_CMD_RESET_REASON = 0xA8, + + RAVE_SP_CMD_REQ_COPPER_REV = 0xB6, + RAVE_SP_CMD_GET_I2C_DEVICE_STATUS = 0xBA, + RAVE_SP_CMD_GET_SP_SILICON_REV = 0xB9, + RAVE_SP_CMD_CONTROL_EVENTS = 0xBB, + + RAVE_SP_EVNT_BASE = 0xE0, +}; + +struct rave_sp; + +static inline unsigned long rave_sp_action_pack(u8 event, u8 value) +{ + return ((unsigned long)value << 8) | event; +} + +static inline u8 rave_sp_action_unpack_event(unsigned long action) +{ + return action; +} + +static inline u8 rave_sp_action_unpack_value(unsigned long action) +{ + return action >> 8; +} + +int rave_sp_exec(struct rave_sp *sp, + void *__data, size_t data_size, + void *reply_data, size_t reply_data_size); + +struct device; +int devm_rave_sp_register_event_notifier(struct device *dev, + struct notifier_block *nb); + +#endif /* _LINUX_RAVE_SP_H_ */ -- cgit v1.2.3 From c3bb333457218ca4ed9553be47c0f567b4ef8a38 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Dec 2017 22:51:17 -0800 Subject: watchdog: Add RAVE SP watchdog driver This driver provides access to RAVE SP watchdog functionality. Acked-by: Philippe Ombredanne Acked-by: Pavel Machek Reviewed-by: Guenter Roeck Signed-off-by: Nikita Yushchenko Signed-off-by: Andrey Smirnov Signed-off-by: Lee Jones --- drivers/watchdog/Kconfig | 7 + drivers/watchdog/Makefile | 1 + drivers/watchdog/rave-sp-wdt.c | 337 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 345 insertions(+) create mode 100644 drivers/watchdog/rave-sp-wdt.c diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index ca200d1f310a..5bf613d3b7d6 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -223,6 +223,13 @@ config ZIIRAVE_WATCHDOG To compile this driver as a module, choose M here: the module will be called ziirave_wdt. +config RAVE_SP_WATCHDOG + tristate "RAVE SP Watchdog timer" + depends on RAVE_SP_CORE + select WATCHDOG_CORE + help + Support for the watchdog on RAVE SP device. + # ALPHA Architecture # ARM Architecture diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 715a21078e0c..135c5e81f25e 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -224,3 +224,4 @@ obj-$(CONFIG_MAX77620_WATCHDOG) += max77620_wdt.o obj-$(CONFIG_ZIIRAVE_WATCHDOG) += ziirave_wdt.o obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o obj-$(CONFIG_MENF21BMC_WATCHDOG) += menf21bmc_wdt.o +obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o diff --git a/drivers/watchdog/rave-sp-wdt.c b/drivers/watchdog/rave-sp-wdt.c new file mode 100644 index 000000000000..35db173252f9 --- /dev/null +++ b/drivers/watchdog/rave-sp-wdt.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Driver for watchdog aspect of for Zodiac Inflight Innovations RAVE + * Supervisory Processor(SP) MCU + * + * Copyright (C) 2017 Zodiac Inflight Innovation + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + RAVE_SP_RESET_BYTE = 1, + RAVE_SP_RESET_REASON_NORMAL = 0, + RAVE_SP_RESET_DELAY_MS = 500, +}; + +/** + * struct rave_sp_wdt_variant - RAVE SP watchdog variant + * + * @max_timeout: Largest possible watchdog timeout setting + * @min_timeout: Smallest possible watchdog timeout setting + * + * @configure: Function to send configuration command + * @restart: Function to send "restart" command + */ +struct rave_sp_wdt_variant { + unsigned int max_timeout; + unsigned int min_timeout; + + int (*configure)(struct watchdog_device *, bool); + int (*restart)(struct watchdog_device *); +}; + +/** + * struct rave_sp_wdt - RAVE SP watchdog + * + * @wdd: Underlying watchdog device + * @sp: Pointer to parent RAVE SP device + * @variant: Device specific variant information + * @reboot_notifier: Reboot notifier implementing machine reset + */ +struct rave_sp_wdt { + struct watchdog_device wdd; + struct rave_sp *sp; + const struct rave_sp_wdt_variant *variant; + struct notifier_block reboot_notifier; +}; + +static struct rave_sp_wdt *to_rave_sp_wdt(struct watchdog_device *wdd) +{ + return container_of(wdd, struct rave_sp_wdt, wdd); +} + +static int rave_sp_wdt_exec(struct watchdog_device *wdd, void *data, + size_t data_size) +{ + return rave_sp_exec(to_rave_sp_wdt(wdd)->sp, + data, data_size, NULL, 0); +} + +static int rave_sp_wdt_legacy_configure(struct watchdog_device *wdd, bool on) +{ + u8 cmd[] = { + [0] = RAVE_SP_CMD_SW_WDT, + [1] = 0, + [2] = 0, + [3] = on, + [4] = on ? wdd->timeout : 0, + }; + + return rave_sp_wdt_exec(wdd, cmd, sizeof(cmd)); +} + +static int rave_sp_wdt_rdu_configure(struct watchdog_device *wdd, bool on) +{ + u8 cmd[] = { + [0] = RAVE_SP_CMD_SW_WDT, + [1] = 0, + [2] = on, + [3] = (u8)wdd->timeout, + [4] = (u8)(wdd->timeout >> 8), + }; + + return rave_sp_wdt_exec(wdd, cmd, sizeof(cmd)); +} + +/** + * rave_sp_wdt_configure - Configure watchdog device + * + * @wdd: Device to configure + * @on: Desired state of the watchdog timer (ON/OFF) + * + * This function configures two aspects of the watchdog timer: + * + * - Wheither it is ON or OFF + * - Its timeout duration + * + * with first aspect specified via function argument and second via + * the value of 'wdd->timeout'. + */ +static int rave_sp_wdt_configure(struct watchdog_device *wdd, bool on) +{ + return to_rave_sp_wdt(wdd)->variant->configure(wdd, on); +} + +static int rave_sp_wdt_legacy_restart(struct watchdog_device *wdd) +{ + u8 cmd[] = { + [0] = RAVE_SP_CMD_RESET, + [1] = 0, + [2] = RAVE_SP_RESET_BYTE + }; + + return rave_sp_wdt_exec(wdd, cmd, sizeof(cmd)); +} + +static int rave_sp_wdt_rdu_restart(struct watchdog_device *wdd) +{ + u8 cmd[] = { + [0] = RAVE_SP_CMD_RESET, + [1] = 0, + [2] = RAVE_SP_RESET_BYTE, + [3] = RAVE_SP_RESET_REASON_NORMAL + }; + + return rave_sp_wdt_exec(wdd, cmd, sizeof(cmd)); +} + +static int rave_sp_wdt_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + /* + * Restart handler is called in atomic context which means we + * can't communicate to SP via UART. Luckily for use SP will + * wait 500ms before actually resetting us, so we ask it to do + * so here and let the rest of the system go on wrapping + * things up. + */ + if (action == SYS_DOWN || action == SYS_HALT) { + struct rave_sp_wdt *sp_wd = + container_of(nb, struct rave_sp_wdt, reboot_notifier); + + const int ret = sp_wd->variant->restart(&sp_wd->wdd); + + if (ret < 0) + dev_err(sp_wd->wdd.parent, + "Failed to issue restart command (%d)", ret); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int rave_sp_wdt_restart(struct watchdog_device *wdd, + unsigned long action, void *data) +{ + /* + * The actual work was done by reboot notifier above. SP + * firmware waits 500 ms before issuing reset, so let's hang + * here for twice that delay and hopefuly we'd never reach + * the return statement. + */ + mdelay(2 * RAVE_SP_RESET_DELAY_MS); + + return -EIO; +} + +static int rave_sp_wdt_start(struct watchdog_device *wdd) +{ + int ret; + + ret = rave_sp_wdt_configure(wdd, true); + if (!ret) + set_bit(WDOG_HW_RUNNING, &wdd->status); + + return ret; +} + +static int rave_sp_wdt_stop(struct watchdog_device *wdd) +{ + return rave_sp_wdt_configure(wdd, false); +} + +static int rave_sp_wdt_set_timeout(struct watchdog_device *wdd, + unsigned int timeout) +{ + wdd->timeout = timeout; + + return rave_sp_wdt_configure(wdd, watchdog_active(wdd)); +} + +static int rave_sp_wdt_ping(struct watchdog_device *wdd) +{ + u8 cmd[] = { + [0] = RAVE_SP_CMD_PET_WDT, + [1] = 0, + }; + + return rave_sp_wdt_exec(wdd, cmd, sizeof(cmd)); +} + +static const struct watchdog_info rave_sp_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, + .identity = "RAVE SP Watchdog", +}; + +static const struct watchdog_ops rave_sp_wdt_ops = { + .owner = THIS_MODULE, + .start = rave_sp_wdt_start, + .stop = rave_sp_wdt_stop, + .ping = rave_sp_wdt_ping, + .set_timeout = rave_sp_wdt_set_timeout, + .restart = rave_sp_wdt_restart, +}; + +static const struct rave_sp_wdt_variant rave_sp_wdt_legacy = { + .max_timeout = 255, + .min_timeout = 1, + .configure = rave_sp_wdt_legacy_configure, + .restart = rave_sp_wdt_legacy_restart, +}; + +static const struct rave_sp_wdt_variant rave_sp_wdt_rdu = { + .max_timeout = 180, + .min_timeout = 60, + .configure = rave_sp_wdt_rdu_configure, + .restart = rave_sp_wdt_rdu_restart, +}; + +static const struct of_device_id rave_sp_wdt_of_match[] = { + { + .compatible = "zii,rave-sp-watchdog-legacy", + .data = &rave_sp_wdt_legacy, + }, + { + .compatible = "zii,rave-sp-watchdog", + .data = &rave_sp_wdt_rdu, + }, + { /* sentinel */ } +}; + +static int rave_sp_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct watchdog_device *wdd; + struct rave_sp_wdt *sp_wd; + struct nvmem_cell *cell; + __le16 timeout = 0; + int ret; + + sp_wd = devm_kzalloc(dev, sizeof(*sp_wd), GFP_KERNEL); + if (!sp_wd) + return -ENOMEM; + + sp_wd->variant = of_device_get_match_data(dev); + sp_wd->sp = dev_get_drvdata(dev->parent); + + wdd = &sp_wd->wdd; + wdd->parent = dev; + wdd->info = &rave_sp_wdt_info; + wdd->ops = &rave_sp_wdt_ops; + wdd->min_timeout = sp_wd->variant->min_timeout; + wdd->max_timeout = sp_wd->variant->max_timeout; + wdd->status = WATCHDOG_NOWAYOUT_INIT_STATUS; + wdd->timeout = 60; + + cell = nvmem_cell_get(dev, "wdt-timeout"); + if (!IS_ERR(cell)) { + size_t len; + void *value = nvmem_cell_read(cell, &len); + + if (!IS_ERR(value)) { + memcpy(&timeout, value, min(len, sizeof(timeout))); + kfree(value); + } + nvmem_cell_put(cell); + } + watchdog_init_timeout(wdd, le16_to_cpu(timeout), dev); + watchdog_set_restart_priority(wdd, 255); + watchdog_stop_on_unregister(wdd); + + sp_wd->reboot_notifier.notifier_call = rave_sp_wdt_reboot_notifier; + ret = devm_register_reboot_notifier(dev, &sp_wd->reboot_notifier); + if (ret) { + dev_err(dev, "Failed to register reboot notifier\n"); + return ret; + } + + /* + * We don't know if watchdog is running now. To be sure, let's + * start it and depend on watchdog core to ping it + */ + wdd->max_hw_heartbeat_ms = wdd->max_timeout * 1000; + ret = rave_sp_wdt_start(wdd); + if (ret) { + dev_err(dev, "Watchdog didn't start\n"); + return ret; + } + + ret = devm_watchdog_register_device(dev, wdd); + if (ret) { + dev_err(dev, "Failed to register watchdog device\n"); + rave_sp_wdt_stop(wdd); + return ret; + } + + return 0; +} + +static struct platform_driver rave_sp_wdt_driver = { + .probe = rave_sp_wdt_probe, + .driver = { + .name = KBUILD_MODNAME, + .of_match_table = rave_sp_wdt_of_match, + }, +}; + +module_platform_driver(rave_sp_wdt_driver); + +MODULE_DEVICE_TABLE(of, rave_sp_wdt_of_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Andrey Vostrikov "); +MODULE_AUTHOR("Nikita Yushchenko "); +MODULE_AUTHOR("Andrey Smirnov "); +MODULE_DESCRIPTION("RAVE SP Watchdog driver"); +MODULE_ALIAS("platform:rave-sp-watchdog"); -- cgit v1.2.3 From a8b751eb709ecefe0a81a0112d9c01b2181a3efa Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Wed, 20 Dec 2017 22:51:18 -0800 Subject: dt-bindings: watchdog: Add bindings for RAVE SP watchdog driver Add Device Tree bindings for RAVE SP watchdog drvier - an MFD cell of parent RAVE SP driver (documented in Documentation/devicetree/bindings/mfd/zii,rave-sp.txt). Acked-by: Philippe Ombredanne Acked-by: Pavel Machek Acked-by: Rob Herring Reviewed-by: Guenter Roeck Signed-off-by: Nikita Yushchenko Signed-off-by: Andrey Smirnov Signed-off-by: Lee Jones --- .../bindings/watchdog/zii,rave-sp-wdt.txt | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt diff --git a/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt b/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt new file mode 100644 index 000000000000..3de96186e92e --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/zii,rave-sp-wdt.txt @@ -0,0 +1,39 @@ +Zodiac Inflight Innovations RAVE Supervisory Processor Watchdog Bindings + +RAVE SP watchdog device is a "MFD cell" device corresponding to +watchdog functionality of RAVE Supervisory Processor. It is expected +that its Device Tree node is specified as a child of the node +corresponding to the parent RAVE SP device (as documented in +Documentation/devicetree/bindings/mfd/zii,rave-sp.txt) + +Required properties: + +- compatible: Depending on wire protocol implemented by RAVE SP + firmware, should be one of: + - "zii,rave-sp-watchdog" + - "zii,rave-sp-watchdog-legacy" + +Optional properties: + +- wdt-timeout: Two byte nvmem cell specified as per + Documentation/devicetree/bindings/nvmem/nvmem.txt + +Example: + + rave-sp { + compatible = "zii,rave-sp-rdu1"; + current-speed = <38400>; + + eeprom { + wdt_timeout: wdt-timeout@8E { + reg = <0x8E 2>; + }; + }; + + watchdog { + compatible = "zii,rave-sp-watchdog"; + nvmem-cells = <&wdt_timeout>; + nvmem-cell-names = "wdt-timeout"; + }; + } + -- cgit v1.2.3 From 87590ce6e373d1a5401f6539f0c59ef92dd924a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:00 +0100 Subject: sysfs/cpu: Add vulnerability folder As the meltdown/spectre problem affects several CPU architectures, it makes sense to have common way to express whether a system is affected by a particular vulnerability or not. If affected the way to express the mitigation should be common as well. Create /sys/devices/system/cpu/vulnerabilities folder and files for meltdown, spectre_v1 and spectre_v2. Allow architectures to override the show function. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.096657732@linutronix.de --- Documentation/ABI/testing/sysfs-devices-system-cpu | 16 ++++++++ drivers/base/Kconfig | 3 ++ drivers/base/cpu.c | 48 ++++++++++++++++++++++ include/linux/cpu.h | 7 ++++ 4 files changed, 74 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index f3d5817c4ef0..bd3a88e16d8b 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -373,3 +373,19 @@ Contact: Linux kernel mailing list Description: information about CPUs heterogeneity. cpu_capacity: capacity of cpu#. + +What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +Date: Januar 2018 +Contact: Linux kernel mailing list +Description: Information about CPU vulnerabilities + + The files are named after the code names of CPU + vulnerabilities. The output of those files reflects the + state of the CPUs in the system. Possible output values: + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect + "Mitigation: $M" CPU is affetcted and mitigation $M is in effect diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 2f6614c9a229..37a71fd9043f 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES config GENERIC_CPU_AUTOPROBE bool +config GENERIC_CPU_VULNERABILITIES + bool + config SOC_BUS bool select GLOB diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 321cd7b4d817..825964efda1d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -501,10 +501,58 @@ static void __init cpu_dev_register_generic(void) #endif } +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES + +ssize_t __weak cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + +static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, + NULL +}; + +static const struct attribute_group cpu_root_vulnerabilities_group = { + .name = "vulnerabilities", + .attrs = cpu_root_vulnerabilities_attrs, +}; + +static void __init cpu_register_vulnerabilities(void) +{ + if (sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_vulnerabilities_group)) + pr_err("Unable to register CPU vulnerabilities\n"); +} + +#else +static inline void cpu_register_vulnerabilities(void) { } +#endif + void __init cpu_dev_init(void) { if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) panic("Failed to register CPU subsystem"); cpu_dev_register_generic(); + cpu_register_vulnerabilities(); } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 938ea8ae0ba4..c816e6f2730c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); +extern ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf); + extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, -- cgit v1.2.3 From 61dc0f555b5c761cdafb0ba5bd41ecf22d68a4c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 7 Jan 2018 22:48:01 +0100 Subject: x86/cpu: Implement CPU vulnerabilites sysfs functions Implement the CPU vulnerabilty show functions for meltdown, spectre_v1 and spectre_v2. Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Cc: Will Deacon Cc: Dave Hansen Cc: Linus Torvalds Cc: Borislav Petkov Cc: David Woodhouse Link: https://lkml.kernel.org/r/20180107214913.177414879@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cd5199de231e..e23d21ac745a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,6 +89,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ba0b2424c9b0..76ad6cb44b40 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -60,3 +61,31 @@ void __init check_bugs(void) set_memory_4k((unsigned long)__va(0), 1); #endif } + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} +#endif -- cgit v1.2.3 From 7066d746d1aa63c2e5d656eeb5c4b0bfad334c7a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Jan 2018 10:39:35 +0000 Subject: cris: Make THREAD_SIZE available to vmlinux.lds Make THREAD_SIZE available to vmlinux.lds on cris by moving it to asm/thread_info.h and including that from the linker script. This allows init_stack to be allocated in the linker script in a subsequent patch. Reported-by: Guenter Roeck Signed-off-by: David Howells Tested-by: Guenter Roeck cc: Mikael Starvik cc: Jesper Nilsson cc: linux-cris-kernel@axis.com --- arch/cris/include/asm/processor.h | 7 ------- arch/cris/include/asm/thread_info.h | 7 +++++++ arch/cris/kernel/vmlinux.lds.S | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 124dd5ec7f65..b50907799cb2 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -26,13 +26,6 @@ struct task_struct; */ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) -/* THREAD_SIZE is the size of the thread_info/kernel_stack combo. - * normally, the stack is found by doing something like p + THREAD_SIZE - * in CRIS, a page is 8192 bytes, which seems like a sane size - */ -#define THREAD_SIZE PAGE_SIZE -#define THREAD_SIZE_ORDER (0) - /* * At user->kernel entry, the pt_regs struct is stacked on the top of the kernel-stack. * This macro allows us to find those regs for a task. diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 472830c90997..108f77081a3c 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -20,6 +20,13 @@ #endif +/* THREAD_SIZE is the size of the thread_info/kernel_stack combo. + * normally, the stack is found by doing something like p + THREAD_SIZE + * in CRIS, a page is 8192 bytes, which seems like a sane size + */ +#define THREAD_SIZE PAGE_SIZE +#define THREAD_SIZE_ORDER (0) + /* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 6d1dbc1ba767..9b232e0f673e 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -11,6 +11,7 @@ #include #include +#include #ifdef CONFIG_ETRAX_VMEM_SIZE #define __CONFIG_ETRAX_VMEM_SIZE CONFIG_ETRAX_VMEM_SIZE -- cgit v1.2.3 From f058aa3faba60cf61b573bd519366c809b6f2727 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 29 Oct 2017 11:31:23 +0530 Subject: mfd: pcf50633: Fix spelling mistake: 'Falied' -> 'Failed' Trivial fix to spelling mistakes in 'pcf50633_client_dev_register'. Signed-off-by: Arvind Yadav Signed-off-by: Lee Jones --- drivers/mfd/pcf50633-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c index 6155d123a84e..f952dff6765f 100644 --- a/drivers/mfd/pcf50633-core.c +++ b/drivers/mfd/pcf50633-core.c @@ -149,7 +149,7 @@ pcf50633_client_dev_register(struct pcf50633 *pcf, const char *name, *pdev = platform_device_alloc(name, -1); if (!*pdev) { - dev_err(pcf->dev, "Falied to allocate %s\n", name); + dev_err(pcf->dev, "Failed to allocate %s\n", name); return; } -- cgit v1.2.3 From e368866ea7a88f5ea16544c4e037b22d993dc3df Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 29 Oct 2017 12:58:51 +0000 Subject: mfd: ti_am335x_tscadc: Remove redundant assignment to node Node is being initialized a value that is never read, it is being written over a few statements into the function with the return value from call to of_get_child_by_name. Hence this initialization can be removed. Cleans up clang warning: drivers/mfd/ti_am335x_tscadc.c:127:22: warning: Value stored to 'node' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Lee Jones --- drivers/mfd/ti_am335x_tscadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 0f3fab47fe48..3cd958a31f36 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -124,7 +124,7 @@ static int ti_tscadc_probe(struct platform_device *pdev) struct ti_tscadc_dev *tscadc; struct resource *res; struct clk *clk; - struct device_node *node = pdev->dev.of_node; + struct device_node *node; struct mfd_cell *cell; struct property *prop; const __be32 *cur; -- cgit v1.2.3 From 0a5d79bfc0154bb587d5a439dc0e439d4d72f25b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 27 Oct 2017 17:20:45 +0200 Subject: mfd: ab8500-debugfs: Use common error handling code in ab8500_print_modem_registers() Add jump targets so that two error messages are stored only once at the end of this function implementation. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/ab8500-debugfs.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index c1c815241e02..37f39b2a1aa1 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1620,18 +1620,15 @@ static int ab8500_print_modem_registers(struct seq_file *s, void *p) err = abx500_get_register_interruptible(dev, AB8500_REGU_CTRL1, AB8500_SUPPLY_CONTROL_REG, &orig_value); - if (err < 0) { - dev_err(dev, "ab->read fail %d\n", err); - return err; - } + if (err < 0) + goto report_read_failure; + /* Config 1 will allow APE side to read SIM registers */ err = abx500_set_register_interruptible(dev, AB8500_REGU_CTRL1, AB8500_SUPPLY_CONTROL_REG, AB8500_SUPPLY_CONTROL_CONFIG_1); - if (err < 0) { - dev_err(dev, "ab->write fail %d\n", err); - return err; - } + if (err < 0) + goto report_write_failure; seq_printf(s, " bank 0x%02X:\n", bank); @@ -1641,19 +1638,25 @@ static int ab8500_print_modem_registers(struct seq_file *s, void *p) for (reg = AB8500_FIRST_SIM_REG; reg <= last_sim_reg; reg++) { err = abx500_get_register_interruptible(dev, bank, reg, &value); - if (err < 0) { - dev_err(dev, "ab->read fail %d\n", err); - return err; - } + if (err < 0) + goto report_read_failure; + seq_printf(s, " [0x%02X/0x%02X]: 0x%02X\n", bank, reg, value); } err = abx500_set_register_interruptible(dev, AB8500_REGU_CTRL1, AB8500_SUPPLY_CONTROL_REG, orig_value); - if (err < 0) { - dev_err(dev, "ab->write fail %d\n", err); - return err; - } + if (err < 0) + goto report_write_failure; + return 0; + +report_read_failure: + dev_err(dev, "ab->read fail %d\n", err); + return err; + +report_write_failure: + dev_err(dev, "ab->write fail %d\n", err); + return err; } static int ab8500_modem_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 572ff4d560be3784205b224cd67d6715620092d7 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 24 Oct 2017 13:51:36 +0530 Subject: mfd: palmas: Assign the right powerhold mask for tps65917 The powerhold mask for TPS65917 is different when comapred to the other palmas versions. Hence assign the right mask that enables power off of tps65917 pmic correctly. Signed-off-by: Keerthy Signed-off-by: Lee Jones --- drivers/mfd/palmas.c | 10 +++++++++- include/linux/mfd/palmas.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index 3922a93f9f92..663a2398b6b1 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -430,6 +430,7 @@ static void palmas_power_off(void) { unsigned int addr; int ret, slave; + u8 powerhold_mask; struct device_node *np = palmas_dev->dev->of_node; if (of_property_read_bool(np, "ti,palmas-override-powerhold")) { @@ -437,8 +438,15 @@ static void palmas_power_off(void) PALMAS_PRIMARY_SECONDARY_PAD2); slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE); + if (of_device_is_compatible(np, "ti,tps65917")) + powerhold_mask = + TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK; + else + powerhold_mask = + PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK; + ret = regmap_update_bits(palmas_dev->regmap[slave], addr, - PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK, 0); + powerhold_mask, 0); if (ret) dev_err(palmas_dev->dev, "Unable to write PRIMARY_SECONDARY_PAD2 %d\n", diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index 3c8568aa82a5..75e5c8ff85fc 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -3733,6 +3733,9 @@ enum usb_irq_events { #define TPS65917_REGEN3_CTRL_MODE_ACTIVE 0x01 #define TPS65917_REGEN3_CTRL_MODE_ACTIVE_SHIFT 0x00 +/* POWERHOLD Mask field for PRIMARY_SECONDARY_PAD2 register */ +#define TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK 0xC + /* Registers for function RESOURCE */ #define TPS65917_REGEN1_CTRL 0x2 #define TPS65917_PLLEN_CTRL 0x3 -- cgit v1.2.3 From d501ff903bdd735b215bded10958d8f794ca0339 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 14 Nov 2017 14:43:28 +0000 Subject: mfd: cros ec: spi: Simplify delay handling between SPI messages The EC SPI driver prevents SPI transfers being to rapidly by keeping track of the time the last transfer was issued via the 'last_transfer_ns' variable. Previously, if the 'last_transfer_ns' variable was zero, this indicated that no previous transfer had been sent and that no delay was needed. However, the EC SPI driver has been updated to always initialise the 'last_transfer_ns' variable during probe and therefore, it is no longer necessary to test if it is zero. Remove the code that checks if this variable is zero. Signed-off-by: Jon Hunter Reviewed-by: Brian Norris Reviewed-by: Douglas Anderson Acked-by: Benson Leung Signed-off-by: Lee Jones --- drivers/mfd/cros_ec_spi.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/cros_ec_spi.c b/drivers/mfd/cros_ec_spi.c index c9714072e224..54cb760342f5 100644 --- a/drivers/mfd/cros_ec_spi.c +++ b/drivers/mfd/cros_ec_spi.c @@ -72,8 +72,7 @@ * struct cros_ec_spi - information about a SPI-connected EC * * @spi: SPI device we are connected to - * @last_transfer_ns: time that we last finished a transfer, or 0 if there - * if no record + * @last_transfer_ns: time that we last finished a transfer. * @start_of_msg_delay: used to set the delay_usecs on the spi_transfer that * is sent when we want to turn on CS at the start of a transaction. * @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that @@ -378,18 +377,15 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, u8 *rx_buf; u8 sum; int ret = 0, final_ret; + unsigned long delay; len = cros_ec_prepare_tx(ec_dev, ec_msg); dev_dbg(ec_dev->dev, "prepared, len=%d\n", len); /* If it's too soon to do another transaction, wait */ - if (ec_spi->last_transfer_ns) { - unsigned long delay; /* The delay completed so far */ - - delay = ktime_get_ns() - ec_spi->last_transfer_ns; - if (delay < EC_SPI_RECOVERY_TIME_NS) - ndelay(EC_SPI_RECOVERY_TIME_NS - delay); - } + delay = ktime_get_ns() - ec_spi->last_transfer_ns; + if (delay < EC_SPI_RECOVERY_TIME_NS) + ndelay(EC_SPI_RECOVERY_TIME_NS - delay); rx_buf = kzalloc(len, GFP_KERNEL); if (!rx_buf) @@ -510,18 +506,15 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, u8 *rx_buf; int sum; int ret = 0, final_ret; + unsigned long delay; len = cros_ec_prepare_tx(ec_dev, ec_msg); dev_dbg(ec_dev->dev, "prepared, len=%d\n", len); /* If it's too soon to do another transaction, wait */ - if (ec_spi->last_transfer_ns) { - unsigned long delay; /* The delay completed so far */ - - delay = ktime_get_ns() - ec_spi->last_transfer_ns; - if (delay < EC_SPI_RECOVERY_TIME_NS) - ndelay(EC_SPI_RECOVERY_TIME_NS - delay); - } + delay = ktime_get_ns() - ec_spi->last_transfer_ns; + if (delay < EC_SPI_RECOVERY_TIME_NS) + ndelay(EC_SPI_RECOVERY_TIME_NS - delay); rx_buf = kzalloc(len, GFP_KERNEL); if (!rx_buf) -- cgit v1.2.3 From 299fad6b9b6e4b50929861c701af64a36cde0f31 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 7 Nov 2017 17:14:12 +0900 Subject: mfd: tmio: Move register macros to tmio_core.c These registers are only used in drivers/mfd/tmio_core.c Signed-off-by: Masahiro Yamada Acked-by: Wolfram Sang Signed-off-by: Lee Jones --- drivers/mfd/tmio_core.c | 20 ++++++++++++++++++++ include/linux/mfd/tmio.h | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c index 83af78c1b0eb..ebf54cc28f7a 100644 --- a/drivers/mfd/tmio_core.c +++ b/drivers/mfd/tmio_core.c @@ -9,6 +9,26 @@ #include #include +#define CNF_CMD 0x04 +#define CNF_CTL_BASE 0x10 +#define CNF_INT_PIN 0x3d +#define CNF_STOP_CLK_CTL 0x40 +#define CNF_GCLK_CTL 0x41 +#define CNF_SD_CLK_MODE 0x42 +#define CNF_PIN_STATUS 0x44 +#define CNF_PWR_CTL_1 0x48 +#define CNF_PWR_CTL_2 0x49 +#define CNF_PWR_CTL_3 0x4a +#define CNF_CARD_DETECT_MODE 0x4c +#define CNF_SD_SLOT 0x50 +#define CNF_EXT_GCLK_CTL_1 0xf0 +#define CNF_EXT_GCLK_CTL_2 0xf1 +#define CNF_EXT_GCLK_CTL_3 0xf9 +#define CNF_SD_LED_EN_1 0xfa +#define CNF_SD_LED_EN_2 0xfe + +#define SDCREN 0x2 /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/ + int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base) { /* Enable the MMC/SD Control registers */ diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index e1cfe9194129..396a103c8bc6 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -25,26 +25,6 @@ writew((val) >> 16, (addr) + 2); \ } while (0) -#define CNF_CMD 0x04 -#define CNF_CTL_BASE 0x10 -#define CNF_INT_PIN 0x3d -#define CNF_STOP_CLK_CTL 0x40 -#define CNF_GCLK_CTL 0x41 -#define CNF_SD_CLK_MODE 0x42 -#define CNF_PIN_STATUS 0x44 -#define CNF_PWR_CTL_1 0x48 -#define CNF_PWR_CTL_2 0x49 -#define CNF_PWR_CTL_3 0x4a -#define CNF_CARD_DETECT_MODE 0x4c -#define CNF_SD_SLOT 0x50 -#define CNF_EXT_GCLK_CTL_1 0xf0 -#define CNF_EXT_GCLK_CTL_2 0xf1 -#define CNF_EXT_GCLK_CTL_3 0xf9 -#define CNF_SD_LED_EN_1 0xfa -#define CNF_SD_LED_EN_2 0xfe - -#define SDCREN 0x2 /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/ - #define sd_config_write8(base, shift, reg, val) \ tmio_iowrite8((val), (base) + ((reg) << (shift))) #define sd_config_write16(base, shift, reg, val) \ -- cgit v1.2.3 From 263a7c57db8d59f7fdea39ea525f48d3d42ba697 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Mon, 20 Nov 2017 22:02:55 +0100 Subject: mfd: kempld-core: Use resource_size function on resource object drivers/mfd/kempld-core.c:461:13-16: WARNING: Suspicious code. resource_size is maybe missing with ioport Generated by: scripts/coccinelle/api/resource_size.cocci Signed-off-by: Vasyl Gomonovych Signed-off-by: Lee Jones --- drivers/mfd/kempld-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index 55d824b3a808..390b27cb2c2e 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -458,7 +458,7 @@ static int kempld_probe(struct platform_device *pdev) return -EINVAL; pld->io_base = devm_ioport_map(dev, ioport->start, - ioport->end - ioport->start); + resource_size(ioport)); if (!pld->io_base) return -ENOMEM; -- cgit v1.2.3 From a23670df27829558965a3b96a43bd67ed3064988 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 29 Nov 2017 09:00:43 +0100 Subject: mfd: pm8xxx: Make elegible for COMPILE_TEST This should be enabled so that we get full compile coverage of the PM8xxx MFD core with the different subdrivers. Tested on the build servers. Suggested-by: Jonathan Cameron Signed-off-by: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index fb884fab0b8d..b860eb5aa194 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -887,7 +887,7 @@ config UCB1400_CORE config MFD_PM8XXX tristate "Qualcomm PM8xxx PMIC chips driver" - depends on (ARM || HEXAGON) + depends on (ARM || HEXAGON || COMPILE_TEST) select IRQ_DOMAIN select MFD_CORE select REGMAP -- cgit v1.2.3 From 2bb3253c30d517f3d574610ed0523a469f58562d Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 5 Dec 2017 15:46:47 +0100 Subject: mfd: axp20x: Add pinctrl cell for AXP813 As GPIO/pinctrl driver now supports AXP813, add a cell for it. Signed-off-by: Quentin Schulz Acked-by: Maxime Ripard Acked-by: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/axp20x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index 2468b431bb22..d8c92fbbd170 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -878,6 +878,9 @@ static struct mfd_cell axp813_cells[] = { .resources = axp803_pek_resources, }, { .name = "axp20x-regulator", + }, { + .name = "axp20x-gpio", + .of_compatible = "x-powers,axp813-gpio", } }; -- cgit v1.2.3 From fa93f5b7aac54f08dea386fa4d79aa29bf54370e Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Tue, 5 Dec 2017 16:24:18 +0100 Subject: mfd: stm32: Adopt SPDX identifier Add SPDX identifier Signed-off-by: Benjamin Gaignard Acked-by: Philippe Ombredanne Signed-off-by: Lee Jones --- drivers/mfd/stm32-lptimer.c | 6 +----- drivers/mfd/stm32-timers.c | 4 +--- include/linux/mfd/stm32-lptimer.h | 6 +----- include/linux/mfd/stm32-timers.h | 4 +--- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/stm32-lptimer.c b/drivers/mfd/stm32-lptimer.c index 075330a25f61..a00f99f36559 100644 --- a/drivers/mfd/stm32-lptimer.c +++ b/drivers/mfd/stm32-lptimer.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * STM32 Low-Power Timer parent driver. - * * Copyright (C) STMicroelectronics 2017 - * * Author: Fabrice Gasnier - * * Inspired by Benjamin Gaignard's stm32-timers driver - * - * License terms: GNU General Public License (GPL), version 2 */ #include diff --git a/drivers/mfd/stm32-timers.c b/drivers/mfd/stm32-timers.c index a6675a449409..1d347e5dfa79 100644 --- a/drivers/mfd/stm32-timers.c +++ b/drivers/mfd/stm32-timers.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) STMicroelectronics 2016 - * * Author: Benjamin Gaignard - * - * License terms: GNU General Public License (GPL), version 2 */ #include diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 77c7cf40d9b4..605f62264825 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -1,13 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * STM32 Low-Power Timer parent driver. - * * Copyright (C) STMicroelectronics 2017 - * * Author: Fabrice Gasnier - * * Inspired by Benjamin Gaignard's stm32-timers driver - * - * License terms: GNU General Public License (GPL), version 2 */ #ifndef _LINUX_STM32_LPTIMER_H_ diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index ce7346e7f77a..2aadab6f34a1 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -1,9 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) STMicroelectronics 2016 - * * Author: Benjamin Gaignard - * - * License terms: GNU General Public License (GPL), version 2 */ #ifndef _LINUX_STM32_GPTIMER_H_ -- cgit v1.2.3 From 39ce6133b9a83489e87aad733f4cd4222ee7f431 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 1 Dec 2017 19:51:26 -0200 Subject: dt-bindings: mfd: mc13xxx: Add the unit address to sysled As the 'reg' property is mandatory in the subnodes, improve the example by adding the unit address to the sysled node. This prevents the following build warning with W=1: Node /soc/aips@70000000/spba@70000000/ecspi@70010000/pmic@0/leds/sysled0 has a reg or ranges property, but no unit name Signed-off-by: Fabio Estevam Reviewed-by: Rob Herring Signed-off-by: Lee Jones --- Documentation/devicetree/bindings/mfd/mc13xxx.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt index ac235fe385fc..8261ea73278a 100644 --- a/Documentation/devicetree/bindings/mfd/mc13xxx.txt +++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt @@ -130,7 +130,7 @@ ecspi@70010000 { /* ECSPI1 */ #size-cells = <0>; led-control = <0x000 0x000 0x0e0 0x000>; - sysled { + sysled@3 { reg = <3>; label = "system:red:live"; linux,default-trigger = "heartbeat"; -- cgit v1.2.3 From 63fb9cb51da257e48af9db6bbfe4ace8ade04ff3 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 22:23:09 +0530 Subject: mfd: Remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Signed-off-by: Lee Jones --- drivers/mfd/intel_soc_pmic_core.c | 1 - drivers/mfd/max77843.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c index 36adf9e8153e..274306d98ac1 100644 --- a/drivers/mfd/intel_soc_pmic_core.c +++ b/drivers/mfd/intel_soc_pmic_core.c @@ -16,7 +16,6 @@ * Author: Zhu, Lejun */ -#include #include #include #include diff --git a/drivers/mfd/max77843.c b/drivers/mfd/max77843.c index dc5caeaaa6a1..da9612dbb222 100644 --- a/drivers/mfd/max77843.c +++ b/drivers/mfd/max77843.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 7fdec11015c374a6f0b29ceccf35f559c2208042 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Tue, 12 Dec 2017 17:21:19 +0100 Subject: atmel_flexcom: Support resuming after a chip reset The controller used by a flexcom module is configured at boot, and left alone after this. In the suspend mode called "backup with self-refresh" available on SAMA5D2, the chip will resume with most of its registers reset. In this case, we need to restore the state of the flexcom driver on resume. Signed-off-by: Romain Izard Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Tested-by: Nicolas Ferre Signed-off-by: Lee Jones --- drivers/mfd/atmel-flexcom.c | 63 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/drivers/mfd/atmel-flexcom.c b/drivers/mfd/atmel-flexcom.c index 064bde9cff5a..f684a93a3340 100644 --- a/drivers/mfd/atmel-flexcom.c +++ b/drivers/mfd/atmel-flexcom.c @@ -39,34 +39,43 @@ #define FLEX_MR_OPMODE(opmode) (((opmode) << FLEX_MR_OPMODE_OFFSET) & \ FLEX_MR_OPMODE_MASK) +struct atmel_flexcom { + void __iomem *base; + u32 opmode; + struct clk *clk; +}; static int atmel_flexcom_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - struct clk *clk; struct resource *res; - void __iomem *base; - u32 opmode; + struct atmel_flexcom *ddata; int err; - err = of_property_read_u32(np, "atmel,flexcom-mode", &opmode); + ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + platform_set_drvdata(pdev, ddata); + + err = of_property_read_u32(np, "atmel,flexcom-mode", &ddata->opmode); if (err) return err; - if (opmode < ATMEL_FLEXCOM_MODE_USART || - opmode > ATMEL_FLEXCOM_MODE_TWI) + if (ddata->opmode < ATMEL_FLEXCOM_MODE_USART || + ddata->opmode > ATMEL_FLEXCOM_MODE_TWI) return -EINVAL; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + ddata->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ddata->base)) + return PTR_ERR(ddata->base); - clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(clk)) - return PTR_ERR(clk); + ddata->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ddata->clk)) + return PTR_ERR(ddata->clk); - err = clk_prepare_enable(clk); + err = clk_prepare_enable(ddata->clk); if (err) return err; @@ -76,9 +85,9 @@ static int atmel_flexcom_probe(struct platform_device *pdev) * inaccessible and are read as zero. Also the external I/O lines of the * Flexcom are muxed to reach the selected device. */ - writel(FLEX_MR_OPMODE(opmode), base + FLEX_MR); + writel(FLEX_MR_OPMODE(ddata->opmode), ddata->base + FLEX_MR); - clk_disable_unprepare(clk); + clk_disable_unprepare(ddata->clk); return devm_of_platform_populate(&pdev->dev); } @@ -89,10 +98,34 @@ static const struct of_device_id atmel_flexcom_of_match[] = { }; MODULE_DEVICE_TABLE(of, atmel_flexcom_of_match); +#ifdef CONFIG_PM_SLEEP +static int atmel_flexcom_resume(struct device *dev) +{ + struct atmel_flexcom *ddata = dev_get_drvdata(dev); + int err; + u32 val; + + err = clk_prepare_enable(ddata->clk); + if (err) + return err; + + val = FLEX_MR_OPMODE(ddata->opmode), + writel(val, ddata->base + FLEX_MR); + + clk_disable_unprepare(ddata->clk); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(atmel_flexcom_pm_ops, NULL, + atmel_flexcom_resume); + static struct platform_driver atmel_flexcom_driver = { .probe = atmel_flexcom_probe, .driver = { .name = "atmel_flexcom", + .pm = &atmel_flexcom_pm_ops, .of_match_table = atmel_flexcom_of_match, }, }; -- cgit v1.2.3 From 156d07050b34605dafc8a6bf493d69b2b998d239 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 14 Dec 2017 12:51:21 +0200 Subject: mfd: ab8500: Introduce DEFINE_SHOW_ATTRIBUTE() macro This macro deduplicates a lot of similar code in the ab8500-debugfs.c module. Targeting to be moved to seq_file.h eventually. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Lee Jones --- drivers/mfd/ab8500-debugfs.c | 406 +++++++------------------------------------ 1 file changed, 62 insertions(+), 344 deletions(-) diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index 37f39b2a1aa1..1afa27de7191 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1258,6 +1258,19 @@ static struct ab8500_prcmu_ranges ab8540_debug_ranges[AB8500_NUM_BANKS] = { }, }; +#define DEFINE_SHOW_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} \ static irqreturn_t ab8500_debug_handler(int irq, void *data) { @@ -1318,7 +1331,7 @@ static int ab8500_registers_print(struct device *dev, u32 bank, return 0; } -static int ab8500_print_bank_registers(struct seq_file *s, void *p) +static int ab8500_bank_registers_show(struct seq_file *s, void *p) { struct device *dev = s->private; u32 bank = debug_bank; @@ -1330,18 +1343,7 @@ static int ab8500_print_bank_registers(struct seq_file *s, void *p) return ab8500_registers_print(dev, bank, s); } -static int ab8500_registers_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_print_bank_registers, inode->i_private); -} - -static const struct file_operations ab8500_registers_fops = { - .open = ab8500_registers_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_bank_registers); static int ab8500_print_all_banks(struct seq_file *s, void *p) { @@ -1528,7 +1530,7 @@ void ab8500_debug_register_interrupt(int line) num_interrupts[line]++; } -static int ab8500_interrupts_print(struct seq_file *s, void *p) +static int ab8500_interrupts_show(struct seq_file *s, void *p) { int line; @@ -1557,10 +1559,7 @@ static int ab8500_interrupts_print(struct seq_file *s, void *p) return 0; } -static int ab8500_interrupts_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_interrupts_print, inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_interrupts); /* * - HWREG DB8500 formated routines @@ -1603,7 +1602,7 @@ static int ab8500_hwreg_open(struct inode *inode, struct file *file) #define AB8500_LAST_SIM_REG 0x8B #define AB8505_LAST_SIM_REG 0x8C -static int ab8500_print_modem_registers(struct seq_file *s, void *p) +static int ab8500_modem_show(struct seq_file *s, void *p) { struct device *dev = s->private; struct ab8500 *ab8500; @@ -1659,21 +1658,9 @@ report_write_failure: return err; } -static int ab8500_modem_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_print_modem_registers, - inode->i_private); -} - -static const struct file_operations ab8500_modem_fops = { - .open = ab8500_modem_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_modem); -static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p) +static int ab8500_gpadc_bat_ctrl_show(struct seq_file *s, void *p) { int bat_ctrl_raw; int bat_ctrl_convert; @@ -1690,21 +1677,9 @@ static int ab8500_gpadc_bat_ctrl_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_bat_ctrl_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_bat_ctrl_print, - inode->i_private); -} - -static const struct file_operations ab8500_gpadc_bat_ctrl_fops = { - .open = ab8500_gpadc_bat_ctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_bat_ctrl); -static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p) +static int ab8500_gpadc_btemp_ball_show(struct seq_file *s, void *p) { int btemp_ball_raw; int btemp_ball_convert; @@ -1721,22 +1696,9 @@ static int ab8500_gpadc_btemp_ball_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_btemp_ball_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_btemp_ball_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_btemp_ball); -static const struct file_operations ab8500_gpadc_btemp_ball_fops = { - .open = ab8500_gpadc_btemp_ball_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_main_charger_v_print(struct seq_file *s, void *p) +static int ab8500_gpadc_main_charger_v_show(struct seq_file *s, void *p) { int main_charger_v_raw; int main_charger_v_convert; @@ -1753,22 +1715,9 @@ static int ab8500_gpadc_main_charger_v_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_main_charger_v_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_main_charger_v_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_main_charger_v); -static const struct file_operations ab8500_gpadc_main_charger_v_fops = { - .open = ab8500_gpadc_main_charger_v_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_acc_detect1_print(struct seq_file *s, void *p) +static int ab8500_gpadc_acc_detect1_show(struct seq_file *s, void *p) { int acc_detect1_raw; int acc_detect1_convert; @@ -1785,22 +1734,9 @@ static int ab8500_gpadc_acc_detect1_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_acc_detect1_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_acc_detect1_print, - inode->i_private); -} - -static const struct file_operations ab8500_gpadc_acc_detect1_fops = { - .open = ab8500_gpadc_acc_detect1_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_acc_detect1); -static int ab8500_gpadc_acc_detect2_print(struct seq_file *s, void *p) +static int ab8500_gpadc_acc_detect2_show(struct seq_file *s, void *p) { int acc_detect2_raw; int acc_detect2_convert; @@ -1817,22 +1753,9 @@ static int ab8500_gpadc_acc_detect2_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_acc_detect2_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_acc_detect2_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_acc_detect2); -static const struct file_operations ab8500_gpadc_acc_detect2_fops = { - .open = ab8500_gpadc_acc_detect2_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_aux1_print(struct seq_file *s, void *p) +static int ab8500_gpadc_aux1_show(struct seq_file *s, void *p) { int aux1_raw; int aux1_convert; @@ -1849,20 +1772,9 @@ static int ab8500_gpadc_aux1_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_aux1_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_aux1_print, inode->i_private); -} - -static const struct file_operations ab8500_gpadc_aux1_fops = { - .open = ab8500_gpadc_aux1_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_aux1); -static int ab8500_gpadc_aux2_print(struct seq_file *s, void *p) +static int ab8500_gpadc_aux2_show(struct seq_file *s, void *p) { int aux2_raw; int aux2_convert; @@ -1879,20 +1791,9 @@ static int ab8500_gpadc_aux2_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_aux2_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_aux2_print, inode->i_private); -} - -static const struct file_operations ab8500_gpadc_aux2_fops = { - .open = ab8500_gpadc_aux2_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_aux2); -static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p) +static int ab8500_gpadc_main_bat_v_show(struct seq_file *s, void *p) { int main_bat_v_raw; int main_bat_v_convert; @@ -1909,22 +1810,9 @@ static int ab8500_gpadc_main_bat_v_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_main_bat_v_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_main_bat_v_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_main_bat_v); -static const struct file_operations ab8500_gpadc_main_bat_v_fops = { - .open = ab8500_gpadc_main_bat_v_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_vbus_v_print(struct seq_file *s, void *p) +static int ab8500_gpadc_vbus_v_show(struct seq_file *s, void *p) { int vbus_v_raw; int vbus_v_convert; @@ -1941,20 +1829,9 @@ static int ab8500_gpadc_vbus_v_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_vbus_v_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_vbus_v_print, inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_vbus_v); -static const struct file_operations ab8500_gpadc_vbus_v_fops = { - .open = ab8500_gpadc_vbus_v_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_main_charger_c_print(struct seq_file *s, void *p) +static int ab8500_gpadc_main_charger_c_show(struct seq_file *s, void *p) { int main_charger_c_raw; int main_charger_c_convert; @@ -1971,22 +1848,9 @@ static int ab8500_gpadc_main_charger_c_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_main_charger_c_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_main_charger_c_print, - inode->i_private); -} - -static const struct file_operations ab8500_gpadc_main_charger_c_fops = { - .open = ab8500_gpadc_main_charger_c_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_main_charger_c); -static int ab8500_gpadc_usb_charger_c_print(struct seq_file *s, void *p) +static int ab8500_gpadc_usb_charger_c_show(struct seq_file *s, void *p) { int usb_charger_c_raw; int usb_charger_c_convert; @@ -2003,22 +1867,9 @@ static int ab8500_gpadc_usb_charger_c_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_usb_charger_c_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8500_gpadc_usb_charger_c_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_usb_charger_c); -static const struct file_operations ab8500_gpadc_usb_charger_c_fops = { - .open = ab8500_gpadc_usb_charger_c_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p) +static int ab8500_gpadc_bk_bat_v_show(struct seq_file *s, void *p) { int bk_bat_v_raw; int bk_bat_v_convert; @@ -2035,21 +1886,9 @@ static int ab8500_gpadc_bk_bat_v_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_bk_bat_v_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_bk_bat_v_print, - inode->i_private); -} - -static const struct file_operations ab8500_gpadc_bk_bat_v_fops = { - .open = ab8500_gpadc_bk_bat_v_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_bk_bat_v); -static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p) +static int ab8500_gpadc_die_temp_show(struct seq_file *s, void *p) { int die_temp_raw; int die_temp_convert; @@ -2066,21 +1905,9 @@ static int ab8500_gpadc_die_temp_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_die_temp_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_die_temp_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_die_temp); -static const struct file_operations ab8500_gpadc_die_temp_fops = { - .open = ab8500_gpadc_die_temp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8500_gpadc_usb_id_print(struct seq_file *s, void *p) +static int ab8500_gpadc_usb_id_show(struct seq_file *s, void *p) { int usb_id_raw; int usb_id_convert; @@ -2097,20 +1924,9 @@ static int ab8500_gpadc_usb_id_print(struct seq_file *s, void *p) return 0; } -static int ab8500_gpadc_usb_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8500_gpadc_usb_id_print, inode->i_private); -} - -static const struct file_operations ab8500_gpadc_usb_id_fops = { - .open = ab8500_gpadc_usb_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8500_gpadc_usb_id); -static int ab8540_gpadc_xtal_temp_print(struct seq_file *s, void *p) +static int ab8540_gpadc_xtal_temp_show(struct seq_file *s, void *p) { int xtal_temp_raw; int xtal_temp_convert; @@ -2127,21 +1943,9 @@ static int ab8540_gpadc_xtal_temp_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_xtal_temp_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8540_gpadc_xtal_temp_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_xtal_temp); -static const struct file_operations ab8540_gpadc_xtal_temp_fops = { - .open = ab8540_gpadc_xtal_temp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p) +static int ab8540_gpadc_vbat_true_meas_show(struct seq_file *s, void *p) { int vbat_true_meas_raw; int vbat_true_meas_convert; @@ -2159,22 +1963,9 @@ static int ab8540_gpadc_vbat_true_meas_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_vbat_true_meas_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8540_gpadc_vbat_true_meas_print, - inode->i_private); -} - -static const struct file_operations ab8540_gpadc_vbat_true_meas_fops = { - .open = ab8540_gpadc_vbat_true_meas_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_vbat_true_meas); -static int ab8540_gpadc_bat_ctrl_and_ibat_print(struct seq_file *s, void *p) +static int ab8540_gpadc_bat_ctrl_and_ibat_show(struct seq_file *s, void *p) { int bat_ctrl_raw; int bat_ctrl_convert; @@ -2200,22 +1991,9 @@ static int ab8540_gpadc_bat_ctrl_and_ibat_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_bat_ctrl_and_ibat_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8540_gpadc_bat_ctrl_and_ibat_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_bat_ctrl_and_ibat); -static const struct file_operations ab8540_gpadc_bat_ctrl_and_ibat_fops = { - .open = ab8540_gpadc_bat_ctrl_and_ibat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8540_gpadc_vbat_meas_and_ibat_print(struct seq_file *s, void *p) +static int ab8540_gpadc_vbat_meas_and_ibat_show(struct seq_file *s, void *p) { int vbat_meas_raw; int vbat_meas_convert; @@ -2240,23 +2018,9 @@ static int ab8540_gpadc_vbat_meas_and_ibat_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_vbat_meas_and_ibat_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8540_gpadc_vbat_meas_and_ibat_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_vbat_meas_and_ibat); -static const struct file_operations ab8540_gpadc_vbat_meas_and_ibat_fops = { - .open = ab8540_gpadc_vbat_meas_and_ibat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, - void *p) +static int ab8540_gpadc_vbat_true_meas_and_ibat_show(struct seq_file *s, void *p) { int vbat_true_meas_raw; int vbat_true_meas_convert; @@ -2282,23 +2046,9 @@ static int ab8540_gpadc_vbat_true_meas_and_ibat_print(struct seq_file *s, return 0; } -static int ab8540_gpadc_vbat_true_meas_and_ibat_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8540_gpadc_vbat_true_meas_and_ibat_print, - inode->i_private); -} - -static const struct file_operations -ab8540_gpadc_vbat_true_meas_and_ibat_fops = { - .open = ab8540_gpadc_vbat_true_meas_and_ibat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_vbat_true_meas_and_ibat); -static int ab8540_gpadc_bat_temp_and_ibat_print(struct seq_file *s, void *p) +static int ab8540_gpadc_bat_temp_and_ibat_show(struct seq_file *s, void *p) { int bat_temp_raw; int bat_temp_convert; @@ -2323,22 +2073,9 @@ static int ab8540_gpadc_bat_temp_and_ibat_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_bat_temp_and_ibat_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ab8540_gpadc_bat_temp_and_ibat_print, - inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_bat_temp_and_ibat); -static const struct file_operations ab8540_gpadc_bat_temp_and_ibat_fops = { - .open = ab8540_gpadc_bat_temp_and_ibat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - -static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p) +static int ab8540_gpadc_otp_calib_show(struct seq_file *s, void *p) { struct ab8500_gpadc *gpadc; u16 vmain_l, vmain_h, btemp_l, btemp_h; @@ -2362,18 +2099,7 @@ static int ab8540_gpadc_otp_cal_print(struct seq_file *s, void *p) return 0; } -static int ab8540_gpadc_otp_cal_open(struct inode *inode, struct file *file) -{ - return single_open(file, ab8540_gpadc_otp_cal_print, inode->i_private); -} - -static const struct file_operations ab8540_gpadc_otp_calib_fops = { - .open = ab8540_gpadc_otp_cal_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; +DEFINE_SHOW_ATTRIBUTE(ab8540_gpadc_otp_calib); static int ab8500_gpadc_avg_sample_print(struct seq_file *s, void *p) { @@ -2906,14 +2632,6 @@ static const struct file_operations ab8500_val_fops = { .owner = THIS_MODULE, }; -static const struct file_operations ab8500_interrupts_fops = { - .open = ab8500_interrupts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .owner = THIS_MODULE, -}; - static const struct file_operations ab8500_subscribe_fops = { .open = ab8500_subscribe_unsubscribe_open, .write = ab8500_subscribe_write, @@ -3000,7 +2718,7 @@ static int ab8500_debug_probe(struct platform_device *plf) goto err; file = debugfs_create_file("all-bank-registers", S_IRUGO, ab8500_dir, - &plf->dev, &ab8500_registers_fops); + &plf->dev, &ab8500_bank_registers_fops); if (!file) goto err; -- cgit v1.2.3 From 0c384fc8620f3cfd886ba969200b79a3563a92df Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Dec 2017 13:35:09 +0100 Subject: mfd: axp20x: Mark axp288 CHRG_BAK_CTRL register volatile The input current limit bits get updated by the charger detection logic, so we should not cache the contents of this register. Signed-off-by: Hans de Goede Signed-off-by: Lee Jones --- drivers/mfd/axp20x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index d8c92fbbd170..e94c72c2faa2 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -129,6 +129,7 @@ static const struct regmap_range axp288_volatile_ranges[] = { regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON), regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL), regmap_reg_range(AXP288_BC_DET_STAT, AXP288_BC_DET_STAT), + regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL), regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L), regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL), regmap_reg_range(AXP22X_GPIO_STATE, AXP22X_GPIO_STATE), -- cgit v1.2.3 From 0f89ffefa4e122e7e9bc1c2d716c6052b4601b76 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 4 Jan 2018 12:20:18 +0300 Subject: mfd: lpc_ich: Do not touch SPI-NOR write protection bit on Apollo Lake Just to be on the safe side, don't touch the bit. If write access to the flash chip is needed, the BIOS needs to enable it explicitly. Signed-off-by: Mika Westerberg Signed-off-by: Lee Jones --- drivers/mfd/lpc_ich.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c index cf1120abbf52..53dc1a43472c 100644 --- a/drivers/mfd/lpc_ich.c +++ b/drivers/mfd/lpc_ich.c @@ -1143,11 +1143,6 @@ static int lpc_ich_init_spi(struct pci_dev *dev) res->end = res->start + SPIBASE_APL_SZ - 1; pci_bus_read_config_dword(bus, spi, BCR, &bcr); - if (!(bcr & BCR_WPD)) { - bcr |= BCR_WPD; - pci_bus_write_config_dword(bus, spi, BCR, bcr); - pci_bus_read_config_dword(bus, spi, BCR, &bcr); - } info->writeable = !!(bcr & BCR_WPD); } -- cgit v1.2.3 From d593574aff0ab846136190b1729c151c736727ec Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 7 Jan 2018 15:05:49 +0100 Subject: spi: imx: do not access registers while clocks disabled Since clocks are disabled except during message transfer clocks are also disabled when spi_imx_remove gets called. Accessing registers leads to a freeeze at least on a i.MX 6ULL. Enable clocks before disabling accessing the MXC_CSPICTRL register. Fixes: 9e556dcc55774 ("spi: spi-imx: only enable the clocks when we start to transfer a message") Signed-off-by: Stefan Agner Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-imx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 79ddefe4180d..40390d31a93b 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1668,12 +1668,23 @@ static int spi_imx_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct spi_imx_data *spi_imx = spi_master_get_devdata(master); + int ret; spi_bitbang_stop(&spi_imx->bitbang); + ret = clk_enable(spi_imx->clk_per); + if (ret) + return ret; + + ret = clk_enable(spi_imx->clk_ipg); + if (ret) { + clk_disable(spi_imx->clk_per); + return ret; + } + writel(0, spi_imx->base + MXC_CSPICTRL); - clk_unprepare(spi_imx->clk_ipg); - clk_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); + clk_disable_unprepare(spi_imx->clk_per); spi_imx_sdma_exit(spi_imx); spi_master_put(master); -- cgit v1.2.3 From a5a86a7f87d7b684f0369e1f207bb294cfa58dde Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 7 Jan 2018 09:05:50 -0800 Subject: ASoC: fsl_ssi: Fix build error powerpc:mpc85xx_defconfig fails to build with the following errors. sound/soc/fsl/fsl_dma.c: In function 'fsl_soc_dma_probe': sound/soc/fsl/fsl_dma.c:916:34: error: 'CCSR_SSI_STX0' undeclared sound/soc/fsl/fsl_dma.c:917:34: error: 'CCSR_SSI_SRX0' undeclared Fixes: a818aa5f967b ("ASoC: fsl_ssi: Rename registers and fields macros") Signed-off-by: Guenter Roeck Acked-by: Nicolin Chen Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c index 0c11f434a374..8c2981b70f64 100644 --- a/sound/soc/fsl/fsl_dma.c +++ b/sound/soc/fsl/fsl_dma.c @@ -913,8 +913,8 @@ static int fsl_soc_dma_probe(struct platform_device *pdev) dma->dai.pcm_free = fsl_dma_free_dma_buffers; /* Store the SSI-specific information that we need */ - dma->ssi_stx_phys = res.start + CCSR_SSI_STX0; - dma->ssi_srx_phys = res.start + CCSR_SSI_SRX0; + dma->ssi_stx_phys = res.start + REG_SSI_STX0; + dma->ssi_srx_phys = res.start + REG_SSI_SRX0; iprop = of_get_property(ssi_np, "fsl,fifo-depth", NULL); if (iprop) -- cgit v1.2.3 From 7c22ce6e218403fc8746f7915b5a391d8227f7e1 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Mon, 8 Jan 2018 15:50:59 +0530 Subject: regmap: Add SoundWire bus support SoundWire bus provides sdw_read() and sdw_write() APIs for Slave devices to program the registers. Provide support in regmap for SoundWire bus. Signed-off-by: Hardik T Shah Signed-off-by: Sanyog Kale Reviewed-by: Philippe Ombredanne Acked-by: Pierre-Louis Bossart Reviewed-by: Takashi Iwai Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- drivers/base/regmap/Kconfig | 4 ++ drivers/base/regmap/Makefile | 1 + drivers/base/regmap/regmap-sdw.c | 88 ++++++++++++++++++++++++++++++++++++++++ include/linux/regmap.h | 37 +++++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 drivers/base/regmap/regmap-sdw.c diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig index 0368fd7b3a41..067073e4beb1 100644 --- a/drivers/base/regmap/Kconfig +++ b/drivers/base/regmap/Kconfig @@ -37,3 +37,7 @@ config REGMAP_MMIO config REGMAP_IRQ bool + +config REGMAP_SOUNDWIRE + tristate + depends on SOUNDWIRE_BUS diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile index 0d298c446108..22d263cca395 100644 --- a/drivers/base/regmap/Makefile +++ b/drivers/base/regmap/Makefile @@ -13,3 +13,4 @@ obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o obj-$(CONFIG_REGMAP_W1) += regmap-w1.o +obj-$(CONFIG_REGMAP_SOUNDWIRE) += regmap-sdw.o diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c new file mode 100644 index 000000000000..50a66382d87d --- /dev/null +++ b/drivers/base/regmap/regmap-sdw.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2015-17 Intel Corporation. + +#include +#include +#include +#include +#include "internal.h" + +static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val) +{ + struct device *dev = context; + struct sdw_slave *slave = dev_to_sdw_dev(dev); + + return sdw_write(slave, reg, val); +} + +static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val) +{ + struct device *dev = context; + struct sdw_slave *slave = dev_to_sdw_dev(dev); + int read; + + read = sdw_read(slave, reg); + if (read < 0) + return read; + + *val = read; + return 0; +} + +static struct regmap_bus regmap_sdw = { + .reg_read = regmap_sdw_read, + .reg_write = regmap_sdw_write, + .reg_format_endian_default = REGMAP_ENDIAN_LITTLE, + .val_format_endian_default = REGMAP_ENDIAN_LITTLE, +}; + +static int regmap_sdw_config_check(const struct regmap_config *config) +{ + /* All register are 8-bits wide as per MIPI Soundwire 1.0 Spec */ + if (config->val_bits != 8) + return -ENOTSUPP; + + /* Registers are 32 bits wide */ + if (config->reg_bits != 32) + return -ENOTSUPP; + + if (config->pad_bits != 0) + return -ENOTSUPP; + + return 0; +} + +struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) +{ + int ret; + + ret = regmap_sdw_config_check(config); + if (ret) + return ERR_PTR(ret); + + return __regmap_init(&sdw->dev, ®map_sdw, + &sdw->dev, config, lock_key, lock_name); +} +EXPORT_SYMBOL_GPL(__regmap_init_sdw); + +struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) +{ + int ret; + + ret = regmap_sdw_config_check(config); + if (ret) + return ERR_PTR(ret); + + return __devm_regmap_init(&sdw->dev, ®map_sdw, + &sdw->dev, config, lock_key, lock_name); +} +EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw); + +MODULE_DESCRIPTION("Regmap SoundWire Module"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c78e0057df66..eab9c0a29837 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -30,6 +30,7 @@ struct regmap; struct regmap_range_cfg; struct regmap_field; struct snd_ac97; +struct sdw_slave; /* An enum of all the supported cache types */ enum regcache_type { @@ -526,6 +527,10 @@ struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init(struct device *dev, const struct regmap_bus *bus, @@ -563,6 +568,10 @@ struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); /* * Wrapper for regmap_init macros to include a unique lockdep key and name @@ -711,6 +720,20 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, ac97, config) bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); +/** + * regmap_init_sdw() - Initialise register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_sdw(sdw, config) \ + __regmap_lockdep_wrapper(__regmap_init_sdw, #config, \ + sdw, config) + + /** * devm_regmap_init() - Initialise managed register map * @@ -841,6 +864,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config, \ ac97, config) +/** + * devm_regmap_init_sdw() - Initialise managed register map + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sdw(sdw, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config, \ + sdw, config) + void regmap_exit(struct regmap *map); int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config); -- cgit v1.2.3 From 3e8052d90d24320a1edb556c20523f3b17195985 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 8 Jan 2018 02:15:01 +0000 Subject: ASoC: mediatek: mt2701: fix return value check in mt2701_afe_pcm_dev_probe() In case of error, the function syscon_node_to_regmap() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: dfa3cbb83e09 ("ASoC: mediatek: modify MT2701 AFE driver to adapt mfd device") Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- sound/soc/mediatek/mt2701/mt2701-afe-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c index f0cd08fa5c5d..5bc4e00a4a29 100644 --- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c +++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c @@ -1440,9 +1440,9 @@ static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev) } afe->regmap = syscon_node_to_regmap(dev->parent->of_node); - if (!afe->regmap) { + if (IS_ERR(afe->regmap)) { dev_err(dev, "could not get regmap from parent\n"); - return -ENODEV; + return PTR_ERR(afe->regmap); } mutex_init(&afe->irq_alloc_lock); -- cgit v1.2.3 From db51707b9c9aeedd310ebce60f15d5bb006567e0 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 5 Jan 2018 14:12:42 -0800 Subject: ASoC: rockchip: i2s: Support mono capture The Rockchip I2S controller only allows to configure even numbers of capture channels. It is still possible to capture monophonic audio by using dual-channel mode and ignoring the 'data' from the second channel. Signed-off-by: Matthias Kaehlcke Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 908211e1d6fc..cc22ab3d10dd 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -328,6 +328,7 @@ static int rockchip_i2s_hw_params(struct snd_pcm_substream *substream, val |= I2S_CHN_4; break; case 2: + case 1: val |= I2S_CHN_2; break; default: @@ -460,7 +461,7 @@ static struct snd_soc_dai_driver rockchip_i2s_dai = { }, .capture = { .stream_name = "Capture", - .channels_min = 2, + .channels_min = 1, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_192000, .formats = (SNDRV_PCM_FMTBIT_S8 | @@ -654,7 +655,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) } if (!of_property_read_u32(node, "rockchip,capture-channels", &val)) { - if (val >= 2 && val <= 8) + if (val >= 1 && val <= 8) soc_dai->capture.channels_max = val; } -- cgit v1.2.3 From 46318b9784fb7b8363cc67bec24796b15dbba1e9 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Sun, 7 Jan 2018 17:22:33 -0600 Subject: regcache: flat: Un-inline index lookup from cache access This makes the code slightly more readable and allows for cleaner addition of functionality in later patches. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-flat.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c index 4d2e50bfc726..bc6cd88b8cc6 100644 --- a/drivers/base/regmap/regcache-flat.c +++ b/drivers/base/regmap/regcache-flat.c @@ -37,9 +37,12 @@ static int regcache_flat_init(struct regmap *map) cache = map->cache; - for (i = 0; i < map->num_reg_defaults; i++) - cache[regcache_flat_get_index(map, map->reg_defaults[i].reg)] = - map->reg_defaults[i].def; + for (i = 0; i < map->num_reg_defaults; i++) { + unsigned int reg = map->reg_defaults[i].reg; + unsigned int index = regcache_flat_get_index(map, reg); + + cache[index] = map->reg_defaults[i].def; + } return 0; } @@ -56,8 +59,9 @@ static int regcache_flat_read(struct regmap *map, unsigned int reg, unsigned int *value) { unsigned int *cache = map->cache; + unsigned int index = regcache_flat_get_index(map, reg); - *value = cache[regcache_flat_get_index(map, reg)]; + *value = cache[index]; return 0; } @@ -66,8 +70,9 @@ static int regcache_flat_write(struct regmap *map, unsigned int reg, unsigned int value) { unsigned int *cache = map->cache; + unsigned int index = regcache_flat_get_index(map, reg); - cache[regcache_flat_get_index(map, reg)] = value; + cache[index] = value; return 0; } -- cgit v1.2.3 From 9bf485c955bcb707a5e679bcc74075deb0bc8531 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Sun, 7 Jan 2018 17:19:09 -0600 Subject: regmap: Allow empty read/write_flag_mask All zero read and write masks in the regmap config are used to signal no special mask is needed and the bus defaults are used. In some devices all zero read/write masks are the special mask and bus defaults should not be used. To signal this a new variable is added. For example SPI often sets bit 7 in address to signal to the device a read is requested. On TI AFE44xx parts with SPI interfaces no bit needs to be set as registers are either read or write only and the operation can be determined from the address only. For this case both masks must be zero to not effect the address. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 +++- include/linux/regmap.h | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 8d516a9bfc01..025c62358bd6 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -769,7 +769,9 @@ struct regmap *__regmap_init(struct device *dev, INIT_LIST_HEAD(&map->async_free); init_waitqueue_head(&map->async_waitq); - if (config->read_flag_mask || config->write_flag_mask) { + if (config->read_flag_mask || + config->write_flag_mask || + config->zero_flag_mask) { map->read_flag_mask = config->read_flag_mask; map->write_flag_mask = config->write_flag_mask; } else if (bus) { diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 15eddc1353ba..f8bff272c429 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,7 +296,10 @@ typedef void (*regmap_unlock)(void *); * a read. * @write_flag_mask: Mask to be set in the top bytes of the register when doing * a write. If both read_flag_mask and write_flag_mask are - * empty the regmap_bus default masks are used. + * empty and zero_flag_mask is not set the regmap_bus default + * masks are used. + * @zero_flag_mask: If set, read_flag_mask and write_flag_mask are used even + * if they are both empty. * @use_single_rw: If set, converts the bulk read and write operations into * a series of single read and write operations. This is useful * for device that does not support bulk read and write. @@ -355,6 +358,7 @@ struct regmap_config { unsigned long read_flag_mask; unsigned long write_flag_mask; + bool zero_flag_mask; bool use_single_rw; bool can_multi_write; -- cgit v1.2.3 From 5c256045b87b8aa8e5bc9d2e2fdc0802351c1f99 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 5 Jan 2018 14:55:33 -0600 Subject: ASoC: acpi: fix machine driver selection based on quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACPI/machine-driver code refactoring introduced in 4.13 introduced a regression for cases where we need a DMI-based quirk to select the machine driver (the BIOS reports an invalid HID). The fix is just to make sure the results of the quirk are actually used. Fixes: 54746dabf770 ('ASoC: Improve machine driver selection based on quirk data') Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=96691 Tested-by: Nicole Færber Signed-off-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/soc-acpi.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index f21df28bc28e..d4dd2efea45e 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -84,11 +84,9 @@ snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines) for (mach = machines; mach->id[0]; mach++) { if (snd_soc_acpi_check_hid(mach->id) == true) { - if (mach->machine_quirk == NULL) - return mach; - - if (mach->machine_quirk(mach) != NULL) - return mach; + if (mach->machine_quirk) + mach = mach->machine_quirk(mach); + return mach; } } return NULL; -- cgit v1.2.3 From 6703c9771d83ebe092b0d49cb0609a3f9d8b4ff7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 14:59:21 -0300 Subject: perf test bpf: Improve message about expected samples When failing on one of the BPF tests we were just stating: BPF filter result incorrect Add some more info to help figuring out the problem: BPF filter result incorrect, expected 56, got 0 samples This came out while investigating this failure, first seen after updating the kernel to the 4.15.0-rc6 tag: [root@jouet ~]# perf test bpf 39: BPF filter : 39.1: Basic BPF filtering : FAILED! 39.2: BPF pinning : Skip 39.3: BPF prologue generation: Skip 39.4: BPF relocation checker : Skip [root@jouet ~]# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-403npu7daupv6b2bmxliv5pk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index c433dd30975a..057c6b8fdb53 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -190,7 +190,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect) { - pr_debug("BPF filter result incorrect\n"); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); goto out_delete_evlist; } -- cgit v1.2.3 From 13cb2d0f513cf35bf74484a392f745f9e9c5a3f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 15:18:07 -0300 Subject: perf test bpf: Use designated struct field initializers To follow standard practice in the kernel sources, documenting the initialization better and helping quickly finding the value for some field in a struct with many entries. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-syn3hz9hz7ukxlxbx5x6hv20@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 59 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 057c6b8fdb53..0512f1b5bfdb 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -63,46 +63,41 @@ static struct { bool pin; } bpf_testcase_table[] = { { - LLVM_TESTCASE_BASE, - "Basic BPF filtering", - "[basic_bpf_test]", - "fix 'perf test LLVM' first", - "load bpf object failed", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - false, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "Basic BPF filtering", + .name = "[basic_bpf_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "load bpf object failed", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, }, { - LLVM_TESTCASE_BASE, - "BPF pinning", - "[bpf_pinning]", - "fix kbuild first", - "check your vmlinux setting?", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - true, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "BPF pinning", + .name = "[bpf_pinning]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, + .pin = true, }, #ifdef HAVE_BPF_PROLOGUE { - LLVM_TESTCASE_BPF_PROLOGUE, - "BPF prologue generation", - "[bpf_prologue_test]", - "fix kbuild first", - "check your vmlinux setting?", - &llseek_loop, - (NR_ITERS + 1) / 4, - false, + .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, + .desc = "BPF prologue generation", + .name = "[bpf_prologue_test]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &llseek_loop, + .expect_result = (NR_ITERS + 1) / 4, }, #endif { - LLVM_TESTCASE_BPF_RELOCATION, - "BPF relocation checker", - "[bpf_relocation_test]", - "fix 'perf test LLVM' first", - "libbpf error when dealing with relocation", - NULL, - 0, - false, + .prog_id = LLVM_TESTCASE_BPF_RELOCATION, + .desc = "BPF relocation checker", + .name = "[bpf_relocation_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "libbpf error when dealing with relocation", }, }; -- cgit v1.2.3 From e0337f4f9aff60a19079b0f224136bb03877db58 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Jan 2018 12:43:32 -0300 Subject: perf test bpf: Hook on epoll_pwait() The 'perf test bpf' was hooking a eBPF program on the SyS_epoll_wait() kernel function, that was what the epoll_wait() glibc function ended up calling, but since at least glibc 2.26, the one that comes with, for instance, Fedora 27, glibc ends up calling SyS_epoll_pwait() when epoll_wait() is used, causing this 'perf test' entry to fail. So switch to using epoll_pwait() and hook the eBPF program to the SyS_epoll_pwait() kernel function to make it work on a wider range of glibc and kernel versions. Tested-by: Wang Nan Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-zynvquy63er8s5mrgsz65pto@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-example.c | 4 ++-- tools/perf/tests/bpf.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 268e5f8e4aa2..e4123c1b0e88 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_wait") -int bpf_func__SyS_epoll_wait(void *ctx) +SEC("func=SyS_epoll_pwait") +int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 0512f1b5bfdb..8e709c9d512c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -19,13 +19,13 @@ #ifdef HAVE_LIBBPF_SUPPORT -static int epoll_wait_loop(void) +static int epoll_pwait_loop(void) { int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_wait(-(i + 1), NULL, 0, 0); + epoll_pwait(-(i + 1), NULL, 0, 0, NULL); return 0; } @@ -68,7 +68,7 @@ static struct { .name = "[basic_bpf_test]", .msg_compile_fail = "fix 'perf test LLVM' first", .msg_load_fail = "load bpf object failed", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, }, { @@ -77,7 +77,7 @@ static struct { .name = "[bpf_pinning]", .msg_compile_fail = "fix kbuild first", .msg_load_fail = "check your vmlinux setting?", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, .pin = true, }, -- cgit v1.2.3 From 44df1afdb174fd6038e419f80efd914c0b5f2f85 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Dec 2017 01:50:40 +0000 Subject: perf tools: Fix compile error with libunwind x86 Fix a compile error: ... CC util/libunwind/x86_32.o In file included from util/libunwind/x86_32.c:33:0: util/libunwind/../../arch/x86/util/unwind-libunwind.c: In function 'libunwind__x86_reg_id': util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: error: 'EINVAL' undeclared (first use in this function) return -EINVAL; ^ util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: note: each undeclared identifier is reported only once for each function it appears in mv: cannot stat 'util/libunwind/.x86_32.o.tmp': No such file or directory make[4]: *** [util/libunwind/x86_32.o] Error 1 make[3]: *** [util] Error 2 make[2]: *** [libperf-in.o] Error 2 make[1]: *** [sub-make] Error 2 make: *** [all] Error 2 It happens when libunwind-x86 feature is detected. Signed-off-by: Wang Nan Link: http://lkml.kernel.org/r/20171206015040.114574-1-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 9c917f80c906..05920e3edf7a 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef REMOTE_UNWIND_LIBUNWIND #include +#ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" -- cgit v1.2.3 From 935f5a9d4500020879858c9224c98dfabf16101d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 30 Dec 2017 00:26:52 +0800 Subject: perf report: Fix a wrong offset issue when using /proc/kcore When a valid vmlinux is not found, 'perf report' falls back to look at /proc/kcore. In this case, it will report the impossible large offset. For example: # perf record -b -e cycles:k find /etc/ > /dev/null # perf report --stdio --branch-history 22.77% _vm_normal_page+18446603336221188162 | ---page_remove_rmap +18446603336221188324 page_remove_rmap +18446603336221188487 (cycles:5) unlock_page_memcg +18446603336221188096 page_remove_rmap +18446603336221188327 (cycles:1) The issue is the value which is passed to parameter 'addr' in __get_srcline() is the objdump address. It's not correct if we calculate the offset by using 'addr - sym->start'. This patch creates a new parameter 'ip' in __get_srcline(). It is not converted to objdump address. With this patch, the perf report output is: 22.77% _vm_normal_page+66 | ---page_remove_rmap +228 page_remove_rmap +391 (cycles:5) unlock_page_memcg +0 page_remove_rmap +231 (cycles:1) page_remove_rmap +236 Committer testing: Make sure you get any valid vmlinux out of the way, using '-v' on the 'perf report' case and deleting it from places where perf searches them, like your kernel build dir and the build-id cache, in ~/.debug/. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514564812-17344-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- tools/perf/util/machine.c | 2 +- tools/perf/util/map.c | 2 +- tools/perf/util/sort.c | 16 ++++++++++------ tools/perf/util/srcline.c | 9 +++++---- tools/perf/util/srcline.h | 5 +++-- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 68e687d1bf99..28b233c3dcbe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1960,7 +1960,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + al->path = get_srcline(map->dso, start + al->offset, NULL, + false, true, start + al->offset); insert_source_line(&tmp_root, al); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 64d255f6a537..b05a67464c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1726,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) bool show_addr = callchain_param.key == CCKEY_ADDRESS; srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), - sym, show_sym, show_addr); + sym, show_sym, show_addr, ip); srcline__tree_insert(&map->dso->srclines, ip, srcline); } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6d40efd74402..8fe57031e1a8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, map__rip_2objdump(map, addr), NULL, - true, true); + true, true, addr); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a00eacdf02ed..211e7f326b9f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true); + he->ms.sym, true, true, he->ip); } static int64_t @@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->from.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->from.al_addr); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->from.al_addr), right->branch_info->from.sym, - true, true); + true, true, + right->branch_info->from.al_addr); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->to.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->to.al_addr); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->to.al_addr), right->branch_info->to.sym, - true, true); + true, true, + right->branch_info->to.al_addr); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true, true); + e->ms.sym, false, true, true, e->ip); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d19f05c56de6..3c21fd059b64 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -496,7 +496,8 @@ out: #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip) { char *file = NULL; unsigned line = 0; @@ -536,7 +537,7 @@ out: if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", - addr - sym->start) < 0) + ip - sym->start) < 0) return SRCLINE_UNKNOWN; } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; @@ -550,9 +551,9 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr) + bool show_sym, bool show_addr, u64 ip) { - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); } struct srcline_node { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 847b7086182c..b2bb5502fd62 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -11,9 +11,10 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); + bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip); void free_srcline(char *srcline); /* insert the srcline into the DSO, which will take ownership */ -- cgit v1.2.3 From 40c39e3046411f84bab82f66783ff3593e2bcd9b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 26 Dec 2017 18:42:43 +0800 Subject: perf report: Fix a no annotate browser displayed issue When enabling '-b' option in perf record, for example, perf record -b ... perf report and then browsing the annotate browser from perf report (press 'A'), it would fail (annotate browser can't be displayed). It's because the '.add_entry_cb' op of struct report is overwritten by hist_iter__branch_callback() in builtin-report.c. But this function doesn't do something like mapping symbols and sources. So next, do_annotate() will return directly. notes = symbol__annotation(act->ms.sym); if (!notes->src) return 0; This patch adds the lost code to hist_iter__branch_callback (refer to hist_iter__report_callback). v2: Fix a crash bug when perform 'perf report --stdio'. The reason is that we init the symbol annotation only in browser mode, it doesn't allocate/init resources for stdio mode. So now in hist_iter__branch_callback(), it will return directly if it's not in browser mode. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514284963-18587-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index eb9ce6327e71..07827cd51480 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -162,12 +162,28 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct report *rep = arg; struct branch_info *bi; + struct perf_sample *sample = iter->sample; + struct perf_evsel *evsel = iter->evsel; + int err; + + if (!ui__has_annotation()) + return 0; + + hist__account_cycles(sample->branch_stack, al, sample, + rep->nonany_branch_mode); bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); - return 0; +out: + return err; } static int process_sample_event(struct perf_tool *tool, -- cgit v1.2.3 From 29159a4ed7044c52e3e2cf1a9fb55cec4745c60b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 13:58:31 +0100 Subject: ALSA: pcm: Abort properly at pending signal in OSS read/write loops The loops for read and write in PCM OSS emulation have no proper check of pending signals, and they keep processing even after user tries to break. This results in a very long delay, often seen as RCU stall when a huge unprocessed bytes remain queued. The bug could be easily triggered by syzkaller. As a simple workaround, this patch adds the proper check of pending signals and aborts the loop appropriately. Reported-by: syzbot+993cb4cfcbbff3947c21@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index ceaa51f76591..e317964bd2ea 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1381,6 +1381,10 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha tmp != runtime->oss.period_bytes) break; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; @@ -1466,6 +1470,10 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + if (signal_pending(current)) { + tmp = -ERESTARTSYS; + goto err; + } } mutex_unlock(&runtime->oss.params_lock); return xfer; -- cgit v1.2.3 From 6011518db3bd04c80cd3ce3e6aea1c399739adb4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:41 +0800 Subject: perf header: Add infrastructure to record first and last sample time perf report/script/... have a --time option to limit the time range of output. That's very useful to slice large traces, e.g. when processing the output of perf script for some analysis. But right now --time only supports absolute time. Also there is no fast way to get the start/end times of a given trace except for looking at it. This makes it hard to e.g. only decode the first half of the trace, which is useful for parallelization of scripts Another problem is that perf records are variable size and there is no synchronization mechanism. So the only way to find the last sample reliably would be to walk all samples. But we want to avoid that in perf report/... because it is already quite expensive. That is why storing the first sample time and last sample time in perf record is better. This patch creates a new header feature type HEADER_SAMPLE_TIME and related ops. Save the first sample time and the last sample time to the feature section in perf file header. That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time. Committer testing: After this patch is applied the header is written with zeroes, we need the next patch, for "perf record" to actually write the timestamps: # perf report -D | grep PERF_RECORD_SAMPLE\( 22501155244406 0x44f0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21be8c5 period: 1 addr: 0 22501155793625 0x4a30 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21ffd50 period: 2828043 addr: 0 # perf report --header | grep "time of " # time of first sample : 0.000000 # time of last sample : 0.000000 # Changelog: v7: 1. Rebase to latest perf/core branch. 2. Add following clarification in patch description according to Arnaldo's suggestion. "That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time." v4: Use perf script time style for timestamp printing. Also add with the printing of sample duration. v3: Remove the definitions of first_sample_time/last_sample_time from perf_session. Just define them in perf_evlist Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 4 ++ tools/perf/util/evlist.h | 2 + tools/perf/util/header.c | 60 ++++++++++++++++++++++ tools/perf/util/header.h | 1 + 4 files changed, 67 insertions(+) diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 15e8b48077ba..f7d85e89a98a 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -261,6 +261,10 @@ struct { struct perf_header_string map; }[number_of_cache_levels]; + HEADER_SAMPLE_TIME = 21, + +Two uint64_t for the time of first sample and the time of last sample. + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 75160666d305..e7fbca69cbac 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -50,6 +50,8 @@ struct perf_evlist { struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + u64 first_sample_time; + u64 last_sample_time; }; struct perf_evsel_str_handler { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ca73aa7be708..a326e0d8b5b6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "evlist.h" #include "evsel.h" @@ -35,6 +36,7 @@ #include #include "asm/bug.h" #include "tool.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1180,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } +static int write_sample_time(struct feat_fd *ff, + struct perf_evlist *evlist) +{ + int ret; + + ret = do_write(ff, &evlist->first_sample_time, + sizeof(evlist->first_sample_time)); + if (ret < 0) + return ret; + + return do_write(ff, &evlist->last_sample_time, + sizeof(evlist->last_sample_time)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1505,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) } } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + char time_buf[32]; + double d; + + session = container_of(ff->ph, struct perf_session, header); + + timestamp__scnprintf_usec(session->evlist->first_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of first sample : %s\n", time_buf); + + timestamp__scnprintf_usec(session->evlist->last_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of last sample : %s\n", time_buf); + + d = (double)(session->evlist->last_sample_time - + session->evlist->first_sample_time) / NSEC_PER_MSEC; + + fprintf(fp, "# sample duration : %10.3f ms\n", d); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -2146,6 +2184,27 @@ out_free_caches: return -1; } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_session *session; + u64 first_sample_time, last_sample_time; + int ret; + + session = container_of(ff->ph, struct perf_session, header); + + ret = do_read_u64(ff, &first_sample_time); + if (ret) + return -1; + + ret = do_read_u64(ff, &last_sample_time); + if (ret) + return -1; + + session->evlist->first_sample_time = first_sample_time; + session->evlist->last_sample_time = last_sample_time; + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2203,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), + FEAT_OPR(SAMPLE_TIME, sample_time, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 317fb901e47f..f28aaaa3a440 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -35,6 +35,7 @@ enum { HEADER_AUXTRACE, HEADER_STAT, HEADER_CACHE, + HEADER_SAMPLE_TIME, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit v1.2.3 From 68588baf8d01826673f2874f434123029e519052 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:42 +0800 Subject: perf record: Record the first and last sample time in the header In the default 'perf record' configuration, all samples are processed, to create the HEADER_BUILD_ID table. So it's very easy to get the first/last samples and save the time to perf file header via the function write_sample_time(). Later, at post processing time, perf report/script will fetch the time from perf file header. Committer testing: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.099 MB perf.data (1101 samples) ] [root@jouet home]# perf report --header | grep "time of " # time of first sample : 22947.909226 # time of last sample : 22948.910704 # # perf report -D | grep PERF_RECORD_SAMPLE\( 0 22947909226101 0x20bb68 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa21b1af3 period: 1 addr: 0 0 22947909229928 0x20bb98 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa200d204 period: 1 addr: 0 3 22948910397351 0x219360 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 28251/28251: 0xffffffffa22071d8 period: 169518 addr: 0 0 22948910652380 0x20f120 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 198807 addr: 0 2 22948910704034 0x2172d0 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 88111 addr: 0 # Changelog: v7: Just update the patch description according to Arnaldo's suggestion. v6: Currently '--buildid-all' is not enabled at default. So the walking on all samples is the default operation. There is no big overhead to calculate the timestamp boundary in process_sample_event handler once we already go through all samples. So the timestamp boundary calculation is enabled by default when '--buildid-all' is not enabled. While if '--buildid-all' is enabled, we creates a new option "--timestamp-boundary" for user to decide if it enables the timestamp boundary calculation. v5: There is an issue that the sample walking can only work when '--buildid-all' is not enabled. So we need to let the walking be able to work even if '--buildid-all' is enabled and let the processing skips the dso hit marking for this case. At first, I want to provide a new option "--record-time-boundaries". While after consideration, I think a new option is not very necessary. v3: Remove the definitions of first_sample_time and last_sample_time from struct record and directly save them in perf_evlist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a626ef666c2..3eea6de35a38 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,9 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. +--timestamp-boundary:: +Record timestamp boundary (time of first/last samples). + --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 50385d89c497..65681a1a292a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,6 +78,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; + bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; }; @@ -409,8 +410,15 @@ static int process_sample_event(struct perf_tool *tool, { struct record *rec = container_of(tool, struct record, tool); - rec->samples++; + if (rec->evlist->first_sample_time == 0) + rec->evlist->first_sample_time = sample->time; + + rec->evlist->last_sample_time = sample->time; + if (rec->buildid_all) + return 0; + + rec->samples++; return build_id__mark_dso_hit(tool, event, sample, evsel, machine); } @@ -435,9 +443,11 @@ static int process_buildids(struct record *rec) /* * If --buildid-all is given, it marks all DSO regardless of hits, - * so no need to process samples. + * so no need to process samples. But if timestamp_boundary is enabled, + * it still needs to walk on all samples to get the timestamps of + * first/last samples. */ - if (rec->buildid_all) + if (rec->buildid_all && !rec->timestamp_boundary) rec->tool.sample = NULL; return perf_session__process_events(session); @@ -1621,6 +1631,8 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), + OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, + "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, &record.switch_output.set, "signal,size,time", "Switch output when receive SIGUSR2 or cross size,time threshold", -- cgit v1.2.3 From 13a70f350665580708ab11f725d3578eaacbf2d0 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:43 +0800 Subject: perf tools: Create function to parse time percent Current perf report/script/... have a --time option to limit the time range of output. But right now it only supports absolute time, add support for time percentage. For example: 1. Select the second 10% time slice perf report --time 10%/2 2. Select from 0% to 10% time slice perf report --time 0%-10% It also support the multiple time ranges. 3. Select the first and second 10% time slices perf report --time 10%/1,10%/2 4. Select from 0% to 10% and 30% to 40% slices perf report --time 0%-10%,30%-40% Changelog: v4: An issue is found. Following passes. perf script --time 10%/10x12321xsdfdasfdsafdsafdsa Now it uses strtol to replace atoi. Committer notes: This just puts in place the infrastructure, so the examples in this cset comment will only work later, after more patches in this series are applied. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 205 ++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/time-utils.h | 3 + 2 files changed, 196 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 81927d027417..61c46022de0b 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "perf.h" #include "debug.h" @@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, return 0; } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch) { char *start_str, *end_str; char *d, *str; - int rc = 0; if (ostr == NULL || *ostr == '\0') return 0; @@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) if (str == NULL) return -ENOMEM; - ptime->start = 0; - ptime->end = 0; - - /* str has the format: , - * variations: , - * , - * , - */ start_str = str; - d = strchr(start_str, ','); + d = strchr(start_str, ch); if (d) { *d = '\0'; ++d; } end_str = d; + *start = start_str; + *end = end_str; + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str = NULL, *end_str; + int rc; + + rc = split_start_end(&start_str, &end_str, ostr, ','); + if (rc || !start_str) + return rc; + + ptime->start = 0; + ptime->end = 0; + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); - free(str); + free(start_str); /* make sure end time is after start time if it was given */ if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -104,6 +114,177 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int parse_percent(double *pcnt, char *str) +{ + char *c; + + c = strchr(str, '%'); + if (c) + *c = '\0'; + else + return -1; + + *pcnt = atof(str) / 100.0; + + return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *p, *end_str; + double pcnt, start_pcnt, end_pcnt; + u64 total = end - start; + int i; + + /* + * Example: + * 10%/2: select the second 10% slice and the third 10% slice + */ + + /* We can modify this string since the original one is copied */ + p = strchr(str, '/'); + if (!p) + return -1; + + *p = '\0'; + if (parse_percent(&pcnt, str) < 0) + return -1; + + p++; + i = (int)strtol(p, &end_str, 10); + if (*end_str) + return -1; + + if (pcnt <= 0.0) + return -1; + + start_pcnt = pcnt * (i - 1); + end_pcnt = pcnt * i; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *start_str = NULL, *end_str; + double start_pcnt, end_pcnt; + u64 total = end - start; + int ret; + + /* + * Example: 0%-10% + */ + + ret = split_start_end(&start_str, &end_str, str, '-'); + if (ret || !start_str) + return ret; + + if ((parse_percent(&start_pcnt, start_str) != 0) || + (parse_percent(&end_pcnt, end_str) != 0)) { + free(start_str); + return -1; + } + + free(start_str); + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0 || + start_pcnt > end_pcnt) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, + u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end, + time_pecent_split func) +{ + char *str, *p1, *p2; + int len, ret, i = 0; + + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + len = strlen(str); + p1 = str; + + while (p1 < str + len) { + if (i >= num) { + free(str); + return -1; + } + + p2 = strchr(p1, ','); + if (p2) + *p2 = '\0'; + + ret = (func)(p1, &ptime_buf[i], start, end); + if (ret < 0) { + free(str); + return -1; + } + + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + + i++; + + if (p2) + p1 = p2 + 1; + else + break; + } + + free(str); + return i; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end) +{ + char *c; + + /* + * ostr example: + * 10%/2,10%/3: select the second 10% slice and the third 10% slice + * 0%-10%,30%-40%: multiple time range + */ + + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + + c = strchr(ostr, '/'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_slash_split); + } + + c = strchr(ostr, '-'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_dash_split); + } + + return -1; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 15b475c50ccf..23087231785a 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -13,6 +13,9 @@ int parse_nsec_time(const char *str, u64 *ptime); int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); -- cgit v1.2.3 From 0dd6d272d39c7c1fe2f4253197b505f2b66538ee Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 23 Dec 2017 21:50:13 -0500 Subject: x86/xen/time: fix section mismatch for xen_init_time_ops() The header declares this function as __init but is defined in __ref section. Signed-off-by: Nick Desaulniers Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/xen-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f96dbedb33d4..1a7a9469e5a7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -71,7 +71,7 @@ u64 xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); void xen_save_time_memory_area(void); void xen_restore_time_memory_area(void); -void __init xen_init_time_ops(void); +void __ref xen_init_time_ops(void); void __init xen_hvm_init_time_ops(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); -- cgit v1.2.3 From 9a9b8b4b2271e763c1600311a3d4ecc2ac359b55 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:44 +0800 Subject: perf tools: Create function to perform multiple time range checking Previous patch supports the multiple time range. For example, select the first and second 10% time slices. perf report --time 10%/1,10%/2 We need a function to check if a timestamp is in the ranges of [0, 10%) and [10%, 20%]. Note that it includes the last element in [10%, 20%] but it doesn't include the last element in [0, 10%). It's to avoid the overlap. This patch implments a new function perf_time__ranges_skip_sample for this checking. Change log: v4: Let perf_time__ranges_skip_sample be compatible with perf_time__skip_sample when only one time range. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/time-utils.h | 3 +++ 2 files changed, 31 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 61c46022de0b..3f7f18f06982 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -300,6 +300,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp) +{ + struct perf_time_interval *ptime; + int i; + + if ((timestamp == 0) || (num == 0)) + return false; + + if (num == 1) + return perf_time__skip_sample(&ptime_buf[0], timestamp); + + /* + * start/end of multiple time ranges must be valid. + */ + for (i = 0; i < num; i++) { + ptime = &ptime_buf[i]; + + if (timestamp >= ptime->start && + ((timestamp < ptime->end && i < num - 1) || + (timestamp <= ptime->end && i == num - 1))) { + break; + } + } + + return (i == num) ? true : false; +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 23087231785a..34d5eba26bf5 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -18,6 +18,9 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp); + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); -- cgit v1.2.3 From 66a640e7823da803fdb68d5d88f7a8fbd11c29e6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 6 Jan 2018 13:39:48 -0800 Subject: x86: xen: remove the use of VLAIS Variable Length Arrays In Structs (VLAIS) is not supported by Clang, and frowned upon by others. https://lkml.org/lkml/2013/9/23/500 Here, the VLAIS was used because the size of the bitmap returned from xen_mc_entry() depended on possibly (based on kernel configuration) runtime sized data. Rather than declaring args as a VLAIS then calling sizeof on *args, we calculate the appropriate sizeof args manually. Further, we can get rid of the #ifdef's and rely on num_possible_cpus() (thanks to a helpful checkpatch warning from an earlier version of this patch). Suggested-by: Juergen Gross Signed-off-by: Nick Desaulniers Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/mmu_pv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 7118f776cd49..aa701d2a5023 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1339,20 +1339,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, { struct { struct mmuext_op op; -#ifdef CONFIG_SMP - DECLARE_BITMAP(mask, num_processors); -#else DECLARE_BITMAP(mask, NR_CPUS); -#endif } *args; struct multicall_space mcs; + const size_t mc_entry_size = sizeof(args->op) + + sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus()); trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); if (cpumask_empty(cpus)) return; /* nothing to do */ - mcs = xen_mc_entry(sizeof(*args)); + mcs = xen_mc_entry(mc_entry_size); args = mcs.args; args->op.arg2.vcpumask = to_cpumask(args->mask); -- cgit v1.2.3 From 5b969bc766807e5c2f184d1d6f97b8471de946f1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:45 +0800 Subject: perf report: Support time percent and multiple time ranges perf report has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf report --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf report --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-6-git-send-email-yao.jin@linux.intel.com [ Add missing colons at end of examples in the man page ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 20 ++++++++++++++++++++ tools/perf/builtin-report.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ddde2b54af57..1e02c4e1a81f 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -402,6 +402,26 @@ OPTIONS stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multiple time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice: + + perf report --time 10%/2 + + Select from 0% to 10% time slice: + + perf report --time 0%-10% + + Select the first and second 10% time slices: + + perf report --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices: + + perf report --time 0%-10%,30%-40% + --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 07827cd51480..770bf8a614f2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,8 @@ #include #include +#define PTIME_RANGE_MAX 10 + struct report { struct perf_tool tool; struct perf_session *session; @@ -69,7 +71,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; float min_percent; u64 nr_entries; u64 queue_size; @@ -202,8 +205,10 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_time__skip_sample(&rep->ptime, sample->time)) + if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num, + sample->time)) { return 0; + } if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", @@ -1093,9 +1098,25 @@ repeat: if (symbol__init(&session->header.env) < 0) goto error; - if (perf_time__parse_str(&report.ptime, report.time_str) != 0) { - pr_err("Invalid time string\n"); - return -EINVAL; + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + return -EINVAL; + } + + report.range_num = perf_time__percent_parse_str( + report.ptime_range, PTIME_RANGE_MAX, + report.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (report.range_num < 0) { + pr_err("Invalid time string\n"); + return -EINVAL; + } + } else { + report.range_num = 1; } sort__setup_elide(stdout); -- cgit v1.2.3 From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:46 +0800 Subject: perf script: Support time percent and multiple time ranges perf script has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf script --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 16 +++++++++++++++ tools/perf/builtin-script.c | 34 ++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 974ceb12c7f3..7b622a812a72 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -329,6 +329,22 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multipe time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice + perf script --time 10%/2 + + Select from 0% to 10% time slice + perf script --time 0%-10% + + Select the first and second 10% time slices + perf script --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices + perf script --time 0%-10%,30%-40% + --max-blocks:: Set the maximum number of program blocks to print with brstackasm for each sample. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 77e47cf39f2c..330dcd9b9b8f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } +#define PTIME_RANGE_MAX 10 + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1449,7 +1451,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; }; static int perf_evlist__max_name_len(struct perf_evlist *evlist) @@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool, struct perf_script *scr = container_of(tool, struct perf_script, tool); struct addr_location al; - if (perf_time__skip_sample(&scr->ptime, sample->time)) + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { return 0; + } if (debug_mode) { if (sample->time < last_timestamp) { @@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv) goto out_delete; /* needs to be parsed after looking up reference time */ - if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { - pr_err("Invalid time string\n"); - err = -EINVAL; - goto out_delete; + if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + err = -EINVAL; + goto out_delete; + } + + script.range_num = perf_time__percent_parse_str( + script.ptime_range, PTIME_RANGE_MAX, + script.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (script.range_num < 0) { + pr_err("Invalid time string\n"); + err = -EINVAL; + goto out_delete; + } + } else { + script.range_num = 1; } err = __cmd_script(&script); -- cgit v1.2.3 From 24787afbcd0127859394eb9230659ee6d5dc4644 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:45 +0100 Subject: perf tools: Enable LIBBABELTRACE by default There's no reason anymore to treat babel trace in a special way, because a) we no longer display its state b) the needed babeltrace library is now out and well adopted among distros. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/Makefile.perf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f050f38d8fa3..12dec6ea5ed2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -780,7 +780,7 @@ else NO_PERF_READ_VDSOX32 := 1 endif -ifdef LIBBABELTRACE +ifndef NO_LIBBABELTRACE $(call feature_check,libbabeltrace) ifeq ($(feature-libbabeltrace), 1) CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 68cf1360a3f3..9fdefd748e2e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -77,7 +77,7 @@ include ../scripts/utilities.mak # # Define NO_ZLIB if you do not want to support compressed kernel modules # -# Define LIBBABELTRACE if you DO want libbabeltrace support +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support # for CTF data format. # # Define NO_LZMA if you do not want to support compressed (xz) kernel modules -- cgit v1.2.3 From db9fc765e8f4d0144d13cdfa4be32d81eae01710 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:46 +0100 Subject: perf tools: Display perf_event_attr::namespaces debug info Display namespaces bit in -vv debug display: $ perf record -vv --namespaces ... ... perf_event_attr: size 112 ... namespaces 1 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a4d256ea0dc4..c435b2444153 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1577,6 +1577,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); -- cgit v1.2.3 From 313ccb96159489eabdbdcf4deb34e7fbac17557d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:47 +0100 Subject: perf: Allocate context task_ctx_data for child event Currently we use perf_event_context::task_ctx_data to save and restore the LBR status when the task is scheduled out and in. We don't allocate it for child contexts, which results in shorter task's LBR stack, because we don't save the history from previous run and start over every time we schedule the task in. I made a test to generate samples with LBR call stack and got higher numbers on bigger chain depths: before: after: LBR call chain: nr: 1 60561 498127 LBR call chain: nr: 2 0 0 LBR call chain: nr: 3 107030 2172 LBR call chain: nr: 4 466685 62758 LBR call chain: nr: 5 2307319 878046 LBR call chain: nr: 6 48713 495218 LBR call chain: nr: 7 1040 4551 LBR call chain: nr: 8 481 172 LBR call chain: nr: 9 878 120 LBR call chain: nr: 10 2377 6698 LBR call chain: nr: 11 28830 151487 LBR call chain: nr: 12 29347 339867 LBR call chain: nr: 13 4 22 LBR call chain: nr: 14 3 53 Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Fixes: 4af57ef28c2c ("perf: Add pmu specific data for perf task context") Link: http://lkml.kernel.org/r/20180107160356.28203-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4df5b695bf0d..55fb648a32b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10703,6 +10703,19 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; + + if ((child_event->attach_state & PERF_ATTACH_TASK_DATA) && + !child_ctx->task_ctx_data) { + struct pmu *pmu = child_event->pmu; + + child_ctx->task_ctx_data = kzalloc(pmu->task_ctx_size, + GFP_KERNEL); + if (!child_ctx->task_ctx_data) { + free_event(child_event); + return NULL; + } + } + /* * is_orphaned_event() and list_add_tail(&parent_event->child_list) * must be under the same lock in order to serialize against @@ -10713,6 +10726,7 @@ inherit_event(struct perf_event *parent_event, if (is_orphaned_event(parent_event) || !atomic_long_inc_not_zero(&parent_event->refcount)) { mutex_unlock(&parent_event->child_mutex); + /* task_ctx_data is freed with child_ctx */ free_event(child_event); return NULL; } -- cgit v1.2.3 From 81df978c49379481716aef591de77313c286d747 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:48 +0100 Subject: perf: Add sample_id to PERF_RECORD_ITRACE_START event comment Adding missing sample_id line into PERF_RECORD_ITRACE_START event comment. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-5-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 1 + tools/include/uapi/linux/perf_event.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, -- cgit v1.2.3 From 8cf7e0e22414f5acf85ecb7cd0d4482e6c9696ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:49 +0100 Subject: perf: Make perf_callchain function static And move it to core.c, because there's no caller of this function other than the one in core.c Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/callchain.c | 15 --------------- kernel/events/core.c | 16 ++++++++++++++++ kernel/events/internal.h | 4 ---- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 1b2be63c8528..772a43fea825 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -178,21 +178,6 @@ put_callchain_entry(int rctx) put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } -struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs) -{ - bool kernel = !event->attr.exclude_callchain_kernel; - bool user = !event->attr.exclude_callchain_user; - /* Disallow cross-task user callchains. */ - bool crosstask = event->ctx->task && event->ctx->task != current; - const u32 max_stack = event->attr.sample_max_stack; - - if (!kernel && !user) - return NULL; - - return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); -} - struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) diff --git a/kernel/events/core.c b/kernel/events/core.c index 55fb648a32b0..5fc1ded4b450 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5980,6 +5980,22 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +static struct perf_callchain_entry * +perf_callchain(struct perf_event *event, struct pt_regs *regs) +{ + bool kernel = !event->attr.exclude_callchain_kernel; + bool user = !event->attr.exclude_callchain_user; + /* Disallow cross-task user callchains. */ + bool crosstask = event->ctx->task && event->ctx->task != current; + const u32 max_stack = event->attr.sample_max_stack; + + if (!kernel && !user) + return NULL; + + return get_perf_callchain(regs, 0, kernel, user, + max_stack, crosstask, true); +} + void perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09b1537ae06c..6dc725a7e7bc 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -201,10 +201,6 @@ arch_perf_out_copy_user(void *dst, const void *src, unsigned long n) DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) -/* Callchain handling */ -extern struct perf_callchain_entry * -perf_callchain(struct perf_event *event, struct pt_regs *regs); - static inline int get_recursion_context(int *recursion) { int rctx; -- cgit v1.2.3 From 99e818cc88889a2fa2f483b91b372c47b94b7c98 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:50 +0100 Subject: perf: Return empty callchain instead of NULL It simplifies the code a bit, because we dump the callchain Link: http://lkml.kernel.org/n/tip-uqp7qd6aif47g39glnbu95yl@git.kernel.org even if it's empty. With 'empty' callchain we can remove all the NULL-checking code paths. Original-patch-from: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5fc1ded4b450..4e1a1bf8d867 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5815,19 +5815,11 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_read(handle, event); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - if (data->callchain) { - int size = 1; - - if (data->callchain) - size += data->callchain->nr; - - size *= sizeof(u64); + int size = 1; - __output_copy(handle, data->callchain, size); - } else { - u64 nr = 0; - perf_output_put(handle, nr); - } + size += data->callchain->nr; + size *= sizeof(u64); + __output_copy(handle, data->callchain, size); } if (sample_type & PERF_SAMPLE_RAW) { @@ -5980,6 +5972,8 @@ static u64 perf_virt_to_phys(u64 virt) return phys_addr; } +static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; + static struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { @@ -5988,12 +5982,14 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) /* Disallow cross-task user callchains. */ bool crosstask = event->ctx->task && event->ctx->task != current; const u32 max_stack = event->attr.sample_max_stack; + struct perf_callchain_entry *callchain; if (!kernel && !user) - return NULL; + return &__empty_callchain; - return get_perf_callchain(regs, 0, kernel, user, - max_stack, crosstask, true); + callchain = get_perf_callchain(regs, 0, kernel, user, + max_stack, crosstask, true); + return callchain ?: &__empty_callchain; } void perf_prepare_sample(struct perf_event_header *header, @@ -6018,9 +6014,7 @@ void perf_prepare_sample(struct perf_event_header *header, int size = 1; data->callchain = perf_callchain(event, regs); - - if (data->callchain) - size += data->callchain->nr; + size += data->callchain->nr; header->size += size * sizeof(u64); } -- cgit v1.2.3 From 972c14884728bf5f69ec69cfb1beeec1a9cd29ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:51 +0100 Subject: perf: Update PERF_RECORD_MISC_* comment for perf_event_header::misc bit 13 The perf_event_header::misc bit 13 is shared on different events and next patch is adding yet another bit 13 user. Updating the comment to make it more structured and clear which events use bit 13. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-8-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 9 ++++++--- tools/include/uapi/linux/perf_event.h | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) -- cgit v1.2.3 From 78b5d705b51bc175fb67e4f5209f55e5ec581eec Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 14 Dec 2017 15:28:27 +0900 Subject: spi: s3c64xx: add SPDX identifier Replace the original license statement with the SPDX identifier. Signed-off-by: Andi Shyti Reviewed-by: Krzysztof Kozlowski Acked-by: Philippe Ombredanne Signed-off-by: Mark Brown --- drivers/spi/spi-s3c64xx.c | 18 ++++-------------- include/linux/platform_data/spi-s3c64xx.h | 6 ++---- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index de7df20f8712..baa3a9fa2638 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1,17 +1,7 @@ -/* - * Copyright (C) 2009 Samsung Electronics Ltd. - * Jaswinder Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright (c) 2009 Samsung Electronics Co., Ltd. +// Jaswinder Singh #include #include diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h index da79774078a7..773daf7915a3 100644 --- a/include/linux/platform_data/spi-s3c64xx.h +++ b/include/linux/platform_data/spi-s3c64xx.h @@ -1,10 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + /* * Copyright (C) 2009 Samsung Electronics Ltd. * Jaswinder Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __SPI_S3C64XX_H -- cgit v1.2.3 From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:52 +0100 Subject: perf script: Add support to display sample misc field Adding support to display sample misc field in form of letter for each bit: # perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... sched-messaging 1407 U 28690.636600: 325620 cycles ... sched-messaging 1414 K 28690.636608: 19473 cycles ... misc field __________/ The misc bits are assigned to following letters: PERF_RECORD_MISC_KERNEL K PERF_RECORD_MISC_USER U PERF_RECORD_MISC_HYPERVISOR H PERF_RECORD_MISC_GUEST_KERNEL G PERF_RECORD_MISC_GUEST_USER g PERF_RECORD_MISC_MMAP_DATA* M PERF_RECORD_MISC_COMM_EXEC E PERF_RECORD_MISC_SWITCH_OUT S Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 20 ++++++++- tools/perf/builtin-script.c | 74 +++++++++++++++++++++++++++----- tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 1 + 4 files changed, 84 insertions(+), 12 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 7b622a812a72..93ae8d60e3d3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -225,6 +225,24 @@ OPTIONS that the metric computed is averaged over the whole sampling period, not just for the sample point. + For sample events it's possible to display misc field with -F +misc option, + following letters are displayed for each bit: + + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + + $ perf script -F +misc ... + sched-messaging 1414 K 28690.636582: 4590 cycles ... + sched-messaging 1407 U 28690.636600: 325620 cycles ... + sched-messaging 1414 K 28690.636608: 19473 cycles ... + misc field ___________/ + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 330dcd9b9b8f..bb603495cf4a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -93,6 +93,7 @@ enum perf_output_field { PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, PERF_OUTPUT_METRIC = 1U << 28, + PERF_OUTPUT_MISC = 1U << 29, }; struct output_option { @@ -128,6 +129,7 @@ struct output_option { {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, {.str = "metric", .field = PERF_OUTPUT_METRIC}, + {.str = "misc", .field = PERF_OUTPUT_MISC}, }; enum { @@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, static int perf_sample__fprintf_start(struct perf_sample *sample, struct thread *thread, - struct perf_evsel *evsel, FILE *fp) + struct perf_evsel *evsel, + u32 type, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; @@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "[%03d] ", sample->cpu); } + if (PRINT_FIELD(MISC)) { + int ret = 0; + + #define has(m) \ + (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m + + if (has(KERNEL)) + ret += fprintf(fp, "K"); + if (has(USER)) + ret += fprintf(fp, "U"); + if (has(HYPERVISOR)) + ret += fprintf(fp, "H"); + if (has(GUEST_KERNEL)) + ret += fprintf(fp, "G"); + if (has(GUEST_USER)) + ret += fprintf(fp, "g"); + + switch (type) { + case PERF_RECORD_MMAP: + case PERF_RECORD_MMAP2: + if (has(MMAP_DATA)) + ret += fprintf(fp, "M"); + break; + case PERF_RECORD_COMM: + if (has(COMM_EXEC)) + ret += fprintf(fp, "E"); + break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + if (has(SWITCH_OUT)) + ret += fprintf(fp, "S"); + default: + break; + } + + #undef has + + ret += fprintf(fp, "%*s", 6 - ret, " "); + printed += ret; + } + if (PRINT_FIELD(TIME)) { nsecs = sample->time; secs = nsecs / NSEC_PER_SEC; @@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color, if (!fmt) return; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) color_fprintf(mctx->fp, color, fmt, val); @@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx) struct metric_ctx *mctx = ctx; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, fp); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) fprintf(fp, "%10" PRIu64 " ", sample->period); @@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_COMM, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); perf_event__fprintf(event, stdout); thread__put(thread); @@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) @@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ae95efbfb95..e5fbd6dd1b01 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -205,6 +205,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + u16 misc; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c435b2444153..d934f04e3110 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2042,6 +2042,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->misc = event->header.misc; data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; -- cgit v1.2.3 From 900498a34a3ac9c611e9b425094c8106bdd7dc1c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 8 Jan 2018 14:03:53 +0100 Subject: ALSA: pcm: Allow aborting mutex lock at OSS read/write loops PCM OSS read/write loops keep taking the mutex lock for the whole read/write, and this might take very long when the exceptionally high amount of data is given. Also, since it invokes with mutex_lock(), the concurrent read/write becomes unbreakable. This patch tries to address these issues by replacing mutex_lock() with mutex_lock_interruptible(), and also splits / re-takes the lock at each read/write period chunk, so that it can switch the context more finely if requested. Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index e317964bd2ea..c2db7e905f7d 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1334,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1379,18 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha xfer += tmp; if ((substream->f_flags & O_NONBLOCK) != 0 && tmp != runtime->oss.period_bytes) - break; + tmp = -EAGAIN; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1438,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) return tmp; - mutex_lock(&runtime->oss.params_lock); while (bytes > 0) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + tmp = -ERESTARTSYS; + break; + } if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1470,16 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use bytes -= tmp; xfer += tmp; } + err: + mutex_unlock(&runtime->oss.params_lock); + if (tmp < 0) + break; if (signal_pending(current)) { tmp = -ERESTARTSYS; - goto err; + break; } + tmp = 0; } - mutex_unlock(&runtime->oss.params_lock); - return xfer; - - err: - mutex_unlock(&runtime->oss.params_lock); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } -- cgit v1.2.3 From 0d5ea120abc020fada1f7cb019ec37f13162e7af Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 5 Jan 2018 14:55:34 -0600 Subject: ASoC: Replace snd_soc_acpi_check_hid with acpi_dev_present Replace snd_soc_acpi_check_hid() with the generic acpi_dev_present() and remove the now unused snd_soc_acpi_check_hid function. This should have no functional change. Signed-off-by: Jeremy Cline Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 3 --- sound/soc/soc-acpi.c | 32 ++------------------------------ 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index a7d8d335b043..057805489af3 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -49,9 +49,6 @@ snd_soc_acpi_find_package_from_hid(const u8 hid[ACPI_ID_LEN], struct snd_soc_acpi_mach * snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines); -/* acpi check hid */ -bool snd_soc_acpi_check_hid(const u8 hid[ACPI_ID_LEN]); - /** * snd_soc_acpi_mach: ACPI-based machine descriptor. Most of the fields are * related to the hardware, except for the firmware and topology file names. diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index d4dd2efea45e..7f43c9bf3d09 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -49,41 +49,13 @@ const char *snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]) } EXPORT_SYMBOL_GPL(snd_soc_acpi_find_name_from_hid); -static acpi_status snd_soc_acpi_mach_match(acpi_handle handle, u32 level, - void *context, void **ret) -{ - unsigned long long sta; - acpi_status status; - - *(bool *)context = true; - status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT)) - *(bool *)context = false; - - return AE_OK; -} - -bool snd_soc_acpi_check_hid(const u8 hid[ACPI_ID_LEN]) -{ - acpi_status status; - bool found = false; - - status = acpi_get_devices(hid, snd_soc_acpi_mach_match, &found, NULL); - - if (ACPI_FAILURE(status)) - return false; - - return found; -} -EXPORT_SYMBOL_GPL(snd_soc_acpi_check_hid); - struct snd_soc_acpi_mach * snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines) { struct snd_soc_acpi_mach *mach; for (mach = machines; mach->id[0]; mach++) { - if (snd_soc_acpi_check_hid(mach->id) == true) { + if (acpi_dev_present(mach->id, NULL, -1)) { if (mach->machine_quirk) mach = mach->machine_quirk(mach); return mach; @@ -161,7 +133,7 @@ struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg) return mach; for (i = 0; i < codec_list->num_codecs; i++) { - if (snd_soc_acpi_check_hid(codec_list->codecs[i]) != true) + if (!acpi_dev_present(codec_list->codecs[i], NULL, -1)) return NULL; } -- cgit v1.2.3 From 845ab40092601630ec9eb58398a53b9a87b6900b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:52 -0600 Subject: ASoC: acpi: add missing includes for non-ACPI platforms 0-day reports compilation issues with non-ACPI platforms. In file included from sound/soc/soc-acpi.c:17:0: >> include/sound/soc-acpi.h:36:46: error: 'ACPI_ID_LEN' undeclared here (not in a function); did you mean 'ACPI_FILE'? snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]) sound/soc/soc-acpi.c: At top level: >> sound/soc/soc-acpi.c:174:16: error: expected declaration specifiers or '...' before string constant MODULE_LICENSE("GPL v2"); Add missing include files. Fixes: 7feb2f786a46 ("ASoC: move ACPI common code out of Intel/sst tree") Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- include/sound/soc-acpi-intel-match.h | 1 + include/sound/soc-acpi.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h index 1a9191cd4bb3..9da6388c20a1 100644 --- a/include/sound/soc-acpi-intel-match.h +++ b/include/sound/soc-acpi-intel-match.h @@ -16,6 +16,7 @@ #ifndef __LINUX_SND_SOC_ACPI_INTEL_MATCH_H #define __LINUX_SND_SOC_ACPI_INTEL_MATCH_H +#include #include #include diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index a7d8d335b043..a93436089bf5 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -17,6 +17,7 @@ #include #include +#include struct snd_soc_acpi_package_context { char *name; /* package name */ -- cgit v1.2.3 From 85088c4a0f65f0be25a98164ec6bca02ac5cad04 Mon Sep 17 00:00:00 2001 From: Nitzan Carmi Date: Thu, 4 Jan 2018 17:56:13 +0200 Subject: nvme: take refcount on transport module The block device is backed by the transport so we must ensure that the transport driver will not be removed until all references are released. Otherwise, we might end up referencing freed memory. Reviewed-by: Max Gurtovoy Signed-off-by: Nitzan Carmi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 609307ca9e4d..c8bcfe64e976 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1228,16 +1228,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) #ifdef CONFIG_NVME_MULTIPATH /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(ns->head->disk)) - return -ENXIO; + goto fail; #endif if (!kref_get_unless_zero(&ns->kref)) - return -ENXIO; + goto fail; + if (!try_module_get(ns->ctrl->ops->module)) + goto fail_put_ns; + return 0; + +fail_put_ns: + nvme_put_ns(ns); +fail: + return -ENXIO; } static void nvme_release(struct gendisk *disk, fmode_t mode) { - nvme_put_ns(disk->private_data); + struct nvme_ns *ns = disk->private_data; + + module_put(ns->ctrl->ops->module); + nvme_put_ns(ns); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- cgit v1.2.3 From b837b28394fb76993c28bb242db7061ee0417da6 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 4 Jan 2018 17:56:14 +0200 Subject: nvme: fix subsystem multiple controllers support check There is a problem when another module (e.g. nvmet) takes a reference on the nvme block device and the physical nvme drive is removed. In that case nvme_free_ctrl() will not be called and the controller state will be "deleting" or "dead" unless nvmet module releases the block device. Later on, the same nvme drive probes back and nvme_init_subsystem() will be called and fail due to duplicate subnqn (if the nvme device doesn't support subsystem with multiple controllers). This will cause a probe failure. This commit changes the check of multiple controllers support at nvme_init_subsystem() by not counting all the controllers at "dead" or "deleting" state (this is safe because controllers at this state will never be active again). Fixes: ab9e00cc72fa ("nvme: track subsystems") Reviewed-by: Max Gurtovoy Signed-off-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c8bcfe64e976..2bcd49584f71 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2069,6 +2069,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static int nvme_active_ctrls(struct nvme_subsystem *subsys) +{ + int count = 0; + struct nvme_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (ctrl->state != NVME_CTRL_DELETING && + ctrl->state != NVME_CTRL_DEAD) + count++; + } + mutex_unlock(&subsys->lock); + + return count; +} + static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { struct nvme_subsystem *subsys, *found; @@ -2107,7 +2123,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (!(id->cmic & (1 << 1))) { + if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); -- cgit v1.2.3 From eaadb1caa966a91128297b754e90b7c92b350a00 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Jan 2018 21:18:24 +0100 Subject: ASoC: Intel: sst: Fix the return value of 'sst_send_byte_stream_mrfld()' In some error handling paths, an error code is assiegned to 'ret'. However, the function always return 0. Fix it and return the error code if such an error paths is taken. Fixes: 3d9ff34622ba ("ASoC: Intel: sst: add stream operations") Signed-off-by: Christophe JAILLET Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst/sst_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst/sst_stream.c b/sound/soc/intel/atom/sst/sst_stream.c index 65e257b17a7e..20f5066fefb9 100644 --- a/sound/soc/intel/atom/sst/sst_stream.c +++ b/sound/soc/intel/atom/sst/sst_stream.c @@ -220,7 +220,7 @@ int sst_send_byte_stream_mrfld(struct intel_sst_drv *sst_drv_ctx, sst_free_block(sst_drv_ctx, block); out: test_and_clear_bit(pvt_id, &sst_drv_ctx->pvt_id); - return 0; + return ret; } /* -- cgit v1.2.3 From 25f3fd043ec1f60f3955f9d7277d97e2f9c1612c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Jan 2018 21:18:55 +0100 Subject: ASoC: Intel: sst: Fix some style This patch fixes 3 small issues: - missing 2nd '*' at the beginning of a doxygen comment - extra space after a '\n' in a dev_dbg message - extra tab before a 'return" statement Signed-off-by: Christophe JAILLET Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst/sst_stream.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/atom/sst/sst_stream.c b/sound/soc/intel/atom/sst/sst_stream.c index 20f5066fefb9..7ee6aeb7e0af 100644 --- a/sound/soc/intel/atom/sst/sst_stream.c +++ b/sound/soc/intel/atom/sst/sst_stream.c @@ -223,7 +223,7 @@ out: return ret; } -/* +/** * sst_pause_stream - Send msg for a pausing stream * @str_id: stream ID * @@ -261,7 +261,7 @@ int sst_pause_stream(struct intel_sst_drv *sst_drv_ctx, int str_id) } } else { retval = -EBADRQC; - dev_dbg(sst_drv_ctx->dev, "SST DBG:BADRQC for stream\n "); + dev_dbg(sst_drv_ctx->dev, "SST DBG:BADRQC for stream\n"); } return retval; @@ -284,7 +284,7 @@ int sst_resume_stream(struct intel_sst_drv *sst_drv_ctx, int str_id) if (!str_info) return -EINVAL; if (str_info->status == STREAM_RUNNING) - return 0; + return 0; if (str_info->status == STREAM_PAUSED) { retval = sst_prepare_and_post_msg(sst_drv_ctx, str_info->task_id, IPC_CMD, IPC_IA_RESUME_STREAM_MRFLD, -- cgit v1.2.3 From 56e49aa41da204f8582816c3f2572862c71adc90 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 5 Jan 2018 13:20:21 -0600 Subject: ASoC: Intel: bytcr_rt5651: fix Kiano DMI quirk The current code doesn't enable the MCLK which reduces audio quality (PLL driven from BLCK), fix the quirk Tested-by: Carlo Caione Signed-off-by: Pierre-Louis Bossart Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 488ec48f296a..de064f0f7b08 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -264,7 +264,8 @@ static const struct dmi_system_id byt_rt5651_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "KIANO"), DMI_MATCH(DMI_PRODUCT_NAME, "KIANO SlimNote 14.2"), }, - .driver_data = (void *)(BYT_RT5651_IN1_IN2_MAP), + .driver_data = (void *)(BYT_RT5651_MCLK_EN | + BYT_RT5651_IN1_IN2_MAP), }, {} }; -- cgit v1.2.3 From 60e3b52e9354550c28090237b083b20bbabed598 Mon Sep 17 00:00:00 2001 From: Keyon Jie Date: Fri, 5 Jan 2018 13:20:18 -0600 Subject: ASoC: Intel: bytcr-rt5651: add quirk for IN3P which may also be used On Minnowboard Max with Realtek rt5651 eval board, the IN3P is connected to Headset Mic. Here add and select it for Minnowboard Max. Signed-off-by: Keyon Jie Signed-off-by: Pierre-Louis Bossart Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index de064f0f7b08..a958ed37569f 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -39,6 +39,7 @@ enum { BYT_RT5651_IN1_MAP, BYT_RT5651_IN2_MAP, BYT_RT5651_IN1_IN2_MAP, + BYT_RT5651_IN3_MAP, }; #define BYT_RT5651_MAP(quirk) ((quirk) & GENMASK(7, 0)) @@ -63,6 +64,8 @@ static void log_quirks(struct device *dev) dev_info(dev, "quirk IN1_MAP enabled"); if (BYT_RT5651_MAP(byt_rt5651_quirk) == BYT_RT5651_IN2_MAP) dev_info(dev, "quirk IN2_MAP enabled"); + if (BYT_RT5651_MAP(byt_rt5651_quirk) == BYT_RT5651_IN3_MAP) + dev_info(dev, "quirk IN3_MAP enabled"); if (byt_rt5651_quirk & BYT_RT5651_DMIC_EN) dev_info(dev, "quirk DMIC enabled"); if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) @@ -179,6 +182,12 @@ static const struct snd_soc_dapm_route byt_rt5651_intmic_in1_in2_map[] = { {"IN3P", NULL, "Headset Mic"}, }; +static const struct snd_soc_dapm_route byt_rt5651_intmic_in3_map[] = { + {"Internal Mic", NULL, "micbias1"}, + {"IN3P", NULL, "Headset Mic"}, + {"IN1P", NULL, "Internal Mic"}, +}; + static const struct snd_kcontrol_new byt_rt5651_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), @@ -255,8 +264,7 @@ static const struct dmi_system_id byt_rt5651_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Circuitco"), DMI_MATCH(DMI_PRODUCT_NAME, "Minnowboard Max B3 PLATFORM"), }, - .driver_data = (void *)(BYT_RT5651_DMIC_MAP | - BYT_RT5651_DMIC_EN), + .driver_data = (void *)(BYT_RT5651_IN3_MAP), }, { .callback = byt_rt5651_quirk_cb, @@ -294,6 +302,10 @@ static int byt_rt5651_init(struct snd_soc_pcm_runtime *runtime) custom_map = byt_rt5651_intmic_in1_in2_map; num_routes = ARRAY_SIZE(byt_rt5651_intmic_in1_in2_map); break; + case BYT_RT5651_IN3_MAP: + custom_map = byt_rt5651_intmic_in3_map; + num_routes = ARRAY_SIZE(byt_rt5651_intmic_in3_map); + break; default: custom_map = byt_rt5651_intmic_dmic_map; num_routes = ARRAY_SIZE(byt_rt5651_intmic_dmic_map); -- cgit v1.2.3 From 416f2b51119b8cdd899b226e4cf683d000797a8b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 5 Jan 2018 13:20:20 -0600 Subject: ASoC: Intel: bytcr-rt5651: enable MinnowBoard Turbot quirks Define DMI quirk for rt5651 eval board connected to MinnowBoard Turbot. The only difference with a MinnowBoard MAX is that the MCLK pin is enabled on the LSE connector Signed-off-by: Pierre-Louis Bossart Tested-by: Keqiao.Zhang Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index a958ed37569f..dfe6435f8ed0 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -266,6 +266,15 @@ static const struct dmi_system_id byt_rt5651_quirk_table[] = { }, .driver_data = (void *)(BYT_RT5651_IN3_MAP), }, + { + .callback = byt_rt5651_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ADI"), + DMI_MATCH(DMI_PRODUCT_NAME, "Minnowboard Turbot"), + }, + .driver_data = (void *)(BYT_RT5651_MCLK_EN | + BYT_RT5651_IN3_MAP), + }, { .callback = byt_rt5651_quirk_cb, .matches = { -- cgit v1.2.3 From ea39bdcf22b084c6e6db0078f3140f0655a1e572 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 5 Jan 2018 13:20:23 -0600 Subject: ASoC: Intel: bytcr-rt5651: add support for Line In Add the DAPM widgets and routes. Tested with MinnowMax Turbot + rt5651 eval board with Speaker (LineOut) -> LineIn loopback Thanks to Bard Liao @ Realtek for providing the 0dB settings "IN Capture Volume" = 23 "ADC Capture Volume" = 47 "OUT Playback Volume" = 31 "DAC1 Playback Volume" = 175 Signed-off-by: Pierre-Louis Bossart Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5651.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index dfe6435f8ed0..22c9cc5d135e 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -131,6 +131,7 @@ static const struct snd_soc_dapm_widget byt_rt5651_widgets[] = { SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_MIC("Internal Mic", NULL), SND_SOC_DAPM_SPK("Speaker", NULL), + SND_SOC_DAPM_LINE("Line In", NULL), SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), @@ -142,6 +143,7 @@ static const struct snd_soc_dapm_route byt_rt5651_audio_map[] = { {"Headset Mic", NULL, "Platform Clock"}, {"Internal Mic", NULL, "Platform Clock"}, {"Speaker", NULL, "Platform Clock"}, + {"Line In", NULL, "Platform Clock"}, {"AIF1 Playback", NULL, "ssp2 Tx"}, {"ssp2 Tx", NULL, "codec_out0"}, @@ -155,6 +157,9 @@ static const struct snd_soc_dapm_route byt_rt5651_audio_map[] = { {"Headphone", NULL, "HPOR"}, {"Speaker", NULL, "LOUTL"}, {"Speaker", NULL, "LOUTR"}, + {"IN2P", NULL, "Line In"}, + {"IN2N", NULL, "Line In"}, + }; static const struct snd_soc_dapm_route byt_rt5651_intmic_dmic_map[] = { @@ -193,6 +198,7 @@ static const struct snd_kcontrol_new byt_rt5651_controls[] = { SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Internal Mic"), SOC_DAPM_PIN_SWITCH("Speaker"), + SOC_DAPM_PIN_SWITCH("Line In"), }; static struct snd_soc_jack_pin bytcr_jack_pins[] = { -- cgit v1.2.3 From c66234cfedfc3e6e3b62563a5f2c1562be09a35d Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 8 Jan 2018 16:01:04 +0000 Subject: ASoC: rockchip: i2s: fix playback after runtime resume When restoring registers during runtime resume, we must not write to I2S_TXDR which is the transmit FIFO as this queues up a sample to be output and pushes all of the output channels down by one. This can be demonstrated with the speaker-test utility: for i in a b c; do speaker-test -c 2 -s 1; done which should play a test through the left speaker three times but if the I2S hardware starts runtime suspended the first sample will be played through the right speaker. Fix this by marking I2S_TXDR as volatile (which also requires marking it as readble, even though it technically isn't). This seems to be the most robust fix, the alternative of giving I2S_TXDR a default value is more fragile since it does not prevent regcache writing to the register in all circumstances. While here, also fix the configuration of I2S_RXDR and I2S_FIFOLR; these are not writable so they do not suffer from the same problem as I2S_TXDR but reading from I2S_RXDR does suffer from a similar problem. Fixes: f0447f6cbb20 ("ASoC: rockchip: i2s: restore register during runtime_suspend/resume cycle", 2016-09-07) Signed-off-by: John Keeping Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/rockchip/rockchip_i2s.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 908211e1d6fc..eb27f6c24bf7 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -504,6 +504,7 @@ static bool rockchip_i2s_rd_reg(struct device *dev, unsigned int reg) case I2S_INTCR: case I2S_XFER: case I2S_CLR: + case I2S_TXDR: case I2S_RXDR: case I2S_FIFOLR: case I2S_INTSR: @@ -518,6 +519,9 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) switch (reg) { case I2S_INTSR: case I2S_CLR: + case I2S_FIFOLR: + case I2S_TXDR: + case I2S_RXDR: return true; default: return false; @@ -527,6 +531,8 @@ static bool rockchip_i2s_volatile_reg(struct device *dev, unsigned int reg) static bool rockchip_i2s_precious_reg(struct device *dev, unsigned int reg) { switch (reg) { + case I2S_RXDR: + return true; default: return false; } -- cgit v1.2.3 From c6059879be298cccda52f77bf019a7a99eb13e78 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:53 -0600 Subject: ASoC: Intel: Fix Kconfig with top-level selector Follow network example suggested by Linus, move Intel definitions in if/endif block and clarify in help text which options distro configurations should enable - everything except legacy Baytrail stuff and NOCODEC (test only) To avoid user confusion, machine drivers are handled with a submenu made dependent on this top-level selector. There should be no functionality change - except that sound capabilities are restored when using older configs without any user selection. Note that the SND_SOC_ACPI_INTEL_MATCH config is currently filtered out by the top-level selector. This will change in the near future to allow for this option to be selected by both SST and SOF drivers (simplification with submenu for machine drivers by Vinod Koul) Fixes: f6a118a800e3 ("ASoC: Intel: clarify Kconfig dependencies") Reported-by: Linus Torvalds Signed-off-by: Pierre-Louis Bossart Signed-off-by: Vinod Koul Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 66 +++++++++++++++++++---------- sound/soc/intel/Makefile | 2 +- sound/soc/intel/boards/Kconfig | 94 +++++++++++++++++++++++------------------- 3 files changed, 97 insertions(+), 65 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index 7b49d04e3c60..b827d3b70095 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -1,3 +1,19 @@ +config SND_SOC_INTEL_SST_TOPLEVEL + bool "Intel ASoC SST drivers" + default y + depends on X86 || COMPILE_TEST + select SND_SOC_INTEL_MACH + help + Intel ASoC SST Platform Drivers. If you have a Intel machine that + has an audio controller with a DSP and I2S or DMIC port, then + enable this option by saying Y + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Intel SST drivers. + +if SND_SOC_INTEL_SST_TOPLEVEL + config SND_SST_IPC tristate @@ -11,9 +27,6 @@ config SND_SST_IPC_ACPI select SND_SOC_INTEL_SST select IOSF_MBI -config SND_SOC_INTEL_COMMON - tristate - config SND_SOC_INTEL_SST tristate select SND_SOC_INTEL_SST_ACPI if ACPI @@ -25,47 +38,56 @@ config SND_SOC_INTEL_SST_FIRMWARE config SND_SOC_INTEL_SST_ACPI tristate -config SND_SOC_ACPI_INTEL_MATCH - tristate - select SND_SOC_ACPI if ACPI - -config SND_SOC_INTEL_SST_TOPLEVEL - tristate "Intel ASoC SST drivers" - depends on X86 || COMPILE_TEST - select SND_SOC_INTEL_MACH - select SND_SOC_INTEL_COMMON - help - Intel ASoC Audio Drivers. If you have a Intel machine that - has audio controller with a DSP and I2S or DMIC port, then - enable this option by saying Y or M - If unsure select "N". - config SND_SOC_INTEL_HASWELL tristate "Intel ASoC SST driver for Haswell/Broadwell" - depends on SND_SOC_INTEL_SST_TOPLEVEL && SND_DMA_SGBUF + depends on SND_DMA_SGBUF depends on DMADEVICES select SND_SOC_INTEL_SST select SND_SOC_INTEL_SST_FIRMWARE + select SND_SOC_ACPI_INTEL_MATCH + help + If you have a Intel Haswell or Broadwell platform connected to + an I2S codec, then enable this option by saying Y or m. This is + typically used for Chromebooks. This is a recommended option. config SND_SOC_INTEL_BAYTRAIL tristate "Intel ASoC SST driver for Baytrail (legacy)" - depends on SND_SOC_INTEL_SST_TOPLEVEL depends on DMADEVICES select SND_SOC_INTEL_SST select SND_SOC_INTEL_SST_FIRMWARE + select SND_SOC_ACPI_INTEL_MATCH + help + If you have a Intel Baytrail platform connected to an I2S codec, + then enable this option by saying Y or m. This was typically used + for Baytrail Chromebooks but this option is now deprecated and is + not recommended, use SND_SST_ATOM_HIFI2_PLATFORM instead. config SND_SST_ATOM_HIFI2_PLATFORM tristate "Intel ASoC SST driver for HiFi2 platforms (*field, *trail)" - depends on SND_SOC_INTEL_SST_TOPLEVEL && X86 + depends on X86 select SND_SOC_COMPRESS + select SND_SOC_ACPI_INTEL_MATCH config SND_SOC_INTEL_SKYLAKE tristate "Intel ASoC SST driver for SKL/BXT/KBL/GLK/CNL" - depends on SND_SOC_INTEL_SST_TOPLEVEL && PCI && ACPI + depends on PCI && ACPI select SND_HDA_EXT_CORE select SND_HDA_DSP_LOADER select SND_SOC_TOPOLOGY select SND_SOC_INTEL_SST + select SND_SOC_ACPI_INTEL_MATCH + help + If you have a Intel Skylake/Broxton/ApolloLake/KabyLake/ + GeminiLake or CannonLake platform with the DSP enabled in the BIOS + then enable this option by saying Y or m. + +config SND_SOC_ACPI_INTEL_MATCH + tristate + select SND_SOC_ACPI if ACPI + # this option controls the compilation of ACPI matching tables and + # helpers and is not meant to be selected by the user. + +endif ## SND_SOC_INTEL_SST_TOPLEVEL # ASoC codec drivers source "sound/soc/intel/boards/Kconfig" diff --git a/sound/soc/intel/Makefile b/sound/soc/intel/Makefile index b973d457e834..8160520fd74c 100644 --- a/sound/soc/intel/Makefile +++ b/sound/soc/intel/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Core support -obj-$(CONFIG_SND_SOC_INTEL_COMMON) += common/ +obj-$(CONFIG_SND_SOC) += common/ # Platform Support obj-$(CONFIG_SND_SOC_INTEL_HASWELL) += haswell/ diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 6f754708a48c..08481882c240 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -1,7 +1,14 @@ -config SND_SOC_INTEL_MACH - tristate "Intel Audio machine drivers" +menuconfig SND_SOC_INTEL_MACH + bool "Intel Machine drivers" depends on SND_SOC_INTEL_SST_TOPLEVEL - select SND_SOC_ACPI_INTEL_MATCH if ACPI + help + Intel ASoC Machine Drivers. If you have a Intel machine that + has an audio controller with a DSP and I2S or DMIC port, then + enable this option by saying Y + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Intel ASoC machine drivers. if SND_SOC_INTEL_MACH @@ -17,103 +24,106 @@ config SND_MFLD_MACHINE Say Y if you have such a device. If unsure select "N". +if SND_SOC_INTEL_HASWELL + config SND_SOC_INTEL_HASWELL_MACH tristate "ASoC Audio DSP support for Intel Haswell Lynxpoint" depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM - depends on SND_SOC_INTEL_HASWELL select SND_SOC_RT5640 help This adds support for the Lynxpoint Audio DSP on Intel(R) Haswell - Ultrabook platforms. - Say Y if you have such a device. + Ultrabook platforms. This is a recommended option. + Say Y or m if you have such a device. If unsure select "N". config SND_SOC_INTEL_BDW_RT5677_MACH tristate "ASoC Audio driver for Intel Broadwell with RT5677 codec" depends on X86_INTEL_LPSS && GPIOLIB && I2C - depends on SND_SOC_INTEL_HASWELL select SND_SOC_RT5677 help This adds support for Intel Broadwell platform based boards with - the RT5677 audio codec. + the RT5677 audio codec. This is a recommended option. + Say Y or m if you have such a device. + If unsure select "N". config SND_SOC_INTEL_BROADWELL_MACH tristate "ASoC Audio DSP support for Intel Broadwell Wildcatpoint" depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM - depends on SND_SOC_INTEL_HASWELL select SND_SOC_RT286 help This adds support for the Wilcatpoint Audio DSP on Intel(R) Broadwell Ultrabook platforms. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". +endif + +if SND_SOC_INTEL_BAYTRAIL config SND_SOC_INTEL_BYT_MAX98090_MACH tristate "ASoC Audio driver for Intel Baytrail with MAX98090 codec" depends on X86_INTEL_LPSS && I2C - depends on SND_SST_IPC_ACPI = n - depends on SND_SOC_INTEL_BAYTRAIL select SND_SOC_MAX98090 help This adds audio driver for Intel Baytrail platform based boards - with the MAX98090 audio codec. + with the MAX98090 audio codec. This driver is deprecated, use + SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH instead for better + functionality. config SND_SOC_INTEL_BYT_RT5640_MACH tristate "ASoC Audio driver for Intel Baytrail with RT5640 codec" depends on X86_INTEL_LPSS && I2C - depends on SND_SST_IPC_ACPI = n - depends on SND_SOC_INTEL_BAYTRAIL select SND_SOC_RT5640 help This adds audio driver for Intel Baytrail platform based boards with the RT5640 audio codec. This driver is deprecated, use SND_SOC_INTEL_BYTCR_RT5640_MACH instead for better functionality. +endif + +if SND_SST_ATOM_HIFI2_PLATFORM + config SND_SOC_INTEL_BYTCR_RT5640_MACH tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5640 codec" depends on X86 && I2C && ACPI select SND_SOC_RT5640 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR platforms with RT5640 audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BYTCR_RT5651_MACH tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5651 codec" depends on X86 && I2C && ACPI select SND_SOC_RT5651 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR platforms with RT5651 audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_CHT_BSW_RT5672_MACH tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5672 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5670 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with RT5672 audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_CHT_BSW_RT5645_MACH tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5645/5650 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5645 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with RT5645/5650 audio codec. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH @@ -121,63 +131,67 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_MAX98090 select SND_SOC_TS3A227E - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with MAX98090 audio codec it also can support TI jack chip as aux device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BYT_CHT_DA7213_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with DA7212/7213 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_DA7213 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail & CherryTrail platforms with DA7212/7213 audio codec. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BYT_CHT_ES8316_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with ES8316 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ES8316 - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail & Cherrytrail platforms with ES8316 audio codec. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail platform with no codec (MinnowBoard MAX, Up)" depends on X86_INTEL_LPSS && I2C && ACPI - depends on SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for the MinnowBoard Max or Up boards and provides access to I2S signals on the Low-Speed - connector + connector. This is not a recommended option outside of these cases. + It is not intended to be enabled by distros by default. + Say Y or m if you have such a device. + If unsure select "N". +endif + +if SND_SOC_INTEL_SKYLAKE + config SND_SOC_INTEL_SKL_RT286_MACH tristate "ASoC Audio driver for SKL with RT286 I2S mode" depends on X86 && ACPI && I2C - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_RT286 select SND_SOC_DMIC select SND_SOC_HDAC_HDMI help This adds support for ASoC machine driver for Skylake platforms with RT286 I2S audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. If unsure select "N". config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH tristate "ASoC Audio driver for SKL with NAU88L25 and SSM4567 in I2S Mode" depends on X86_INTEL_LPSS && I2C - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_NAU8825 select SND_SOC_SSM4567 select SND_SOC_DMIC @@ -185,13 +199,12 @@ config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH help This adds support for ASoC Onboard Codec I2S machine driver. This will create an alsa sound card for NAU88L25 + SSM4567. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH tristate "ASoC Audio driver for SKL with NAU88L25 and MAX98357A in I2S Mode" depends on X86_INTEL_LPSS && I2C - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_NAU8825 select SND_SOC_MAX98357A select SND_SOC_DMIC @@ -199,13 +212,12 @@ config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH help This adds support for ASoC Onboard Codec I2S machine driver. This will create an alsa sound card for NAU88L25 + MAX98357A. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH tristate "ASoC Audio driver for Broxton with DA7219 and MAX98357A in I2S Mode" depends on X86 && ACPI && I2C - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_DA7219 select SND_SOC_MAX98357A select SND_SOC_DMIC @@ -214,13 +226,12 @@ config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH help This adds support for ASoC machine driver for Broxton-P platforms with DA7219 + MAX98357A I2S audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_BXT_RT298_MACH tristate "ASoC Audio driver for Broxton with RT298 I2S mode" depends on X86 && ACPI && I2C - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_RT298 select SND_SOC_DMIC select SND_SOC_HDAC_HDMI @@ -228,14 +239,13 @@ config SND_SOC_INTEL_BXT_RT298_MACH help This adds support for ASoC machine driver for Broxton platforms with RT286 I2S audio codec. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH tristate "ASoC Audio driver for KBL with RT5663 and MAX98927 in I2S Mode" depends on X86_INTEL_LPSS && I2C select SND_SOC_INTEL_SST - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_RT5663 select SND_SOC_MAX98927 select SND_SOC_DMIC @@ -243,14 +253,13 @@ config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH help This adds support for ASoC Onboard Codec I2S machine driver. This will create an alsa sound card for RT5663 + MAX98927. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH tristate "ASoC Audio driver for KBL with RT5663, RT5514 and MAX98927 in I2S Mode" depends on X86_INTEL_LPSS && I2C && SPI select SND_SOC_INTEL_SST - depends on SND_SOC_INTEL_SKYLAKE select SND_SOC_RT5663 select SND_SOC_RT5514 select SND_SOC_RT5514_SPI @@ -259,7 +268,8 @@ config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH help This adds support for ASoC Onboard Codec I2S machine driver. This will create an alsa sound card for RT5663 + RT5514 + MAX98927. - Say Y if you have such a device. + Say Y or m if you have such a device. This is a recommended option. If unsure select "N". - endif + +endif ## SND_SOC_INTEL_MACH -- cgit v1.2.3 From 4772c16ede522d46219a59646503d2020841a6f4 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:54 -0600 Subject: ASoC: Intel: Kconfig: Simplify-clarify ACPI/PCI dependencies PCI/ACPI selections should not happen in Kconfig for machine drivers, move to SOC selections. Add distinction between PCI and ACPI HiFi2 platforms and help text. There should be no functionality change. The PCI-based platforms may be removed at some point since Medfield is not really supported by anyone, and with Edison now defunct support for Merrifield/Edison is to be determined. The dependency on SND_DMA_SGBUF for Haswell is not clear at this point and may have to be further updated. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 27 +++++++++++++++++++++++---- sound/soc/intel/boards/Kconfig | 14 ++++---------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index b827d3b70095..16374576cb6e 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -41,7 +41,7 @@ config SND_SOC_INTEL_SST_ACPI config SND_SOC_INTEL_HASWELL tristate "Intel ASoC SST driver for Haswell/Broadwell" depends on SND_DMA_SGBUF - depends on DMADEVICES + depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST select SND_SOC_INTEL_SST_FIRMWARE select SND_SOC_ACPI_INTEL_MATCH @@ -52,7 +52,7 @@ config SND_SOC_INTEL_HASWELL config SND_SOC_INTEL_BAYTRAIL tristate "Intel ASoC SST driver for Baytrail (legacy)" - depends on DMADEVICES + depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST select SND_SOC_INTEL_SST_FIRMWARE select SND_SOC_ACPI_INTEL_MATCH @@ -62,11 +62,30 @@ config SND_SOC_INTEL_BAYTRAIL for Baytrail Chromebooks but this option is now deprecated and is not recommended, use SND_SST_ATOM_HIFI2_PLATFORM instead. +config SND_SST_ATOM_HIFI2_PLATFORM_PCI + tristate "Intel ASoC SST driver for PCI HiFi2 platforms (Medfield, Merrifield)" + depends on X86 && PCI + select SND_SST_IPC_PCI + select SND_SOC_COMPRESS + select SND_SOC_INTEL_COMMON + help + If you have a Intel Medfield or Merrifield/Edison platform, then + enable this option by saying Y or m. Distros will typically not + enable this option: Medfield devices are not available to + developers and while Merrifield/Edison can run a mainline kernel with + limited functionality it will require a firmware file which + is not in the standard firmware tree + config SND_SST_ATOM_HIFI2_PLATFORM - tristate "Intel ASoC SST driver for HiFi2 platforms (*field, *trail)" - depends on X86 + tristate "Intel ASoC SST driver for ACPI HiFi2 platforms (Baytrail, Cherrytrail)" + depends on X86 && ACPI + select SND_SST_IPC_ACPI select SND_SOC_COMPRESS select SND_SOC_ACPI_INTEL_MATCH + help + If you have a Intel Baytrail or Cherrytrail platform with an I2S + codec, then enable this option by saying Y or m. This is a + recommended option config SND_SOC_INTEL_SKYLAKE tristate "Intel ASoC SST driver for SKL/BXT/KBL/GLK/CNL" diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 08481882c240..e926f9747232 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -12,18 +12,20 @@ menuconfig SND_SOC_INTEL_MACH if SND_SOC_INTEL_MACH +if SND_SST_ATOM_HIFI2_PLATFORM_PCI + config SND_MFLD_MACHINE tristate "SOC Machine Audio driver for Intel Medfield MID platform" depends on INTEL_SCU_IPC select SND_SOC_SN95031 - depends on SND_SST_ATOM_HIFI2_PLATFORM - select SND_SST_IPC_PCI help This adds support for ASoC machine driver for Intel(R) MID Medfield platform used as alsa device in audio substem in Intel(R) MID devices Say Y if you have such a device. If unsure select "N". +endif + if SND_SOC_INTEL_HASWELL config SND_SOC_INTEL_HASWELL_MACH @@ -86,7 +88,6 @@ config SND_SOC_INTEL_BYTCR_RT5640_MACH tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5640 codec" depends on X86 && I2C && ACPI select SND_SOC_RT5640 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR platforms with RT5640 audio codec. @@ -97,7 +98,6 @@ config SND_SOC_INTEL_BYTCR_RT5651_MACH tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5651 codec" depends on X86 && I2C && ACPI select SND_SOC_RT5651 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR platforms with RT5651 audio codec. @@ -108,7 +108,6 @@ config SND_SOC_INTEL_CHT_BSW_RT5672_MACH tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5672 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5670 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with RT5672 audio codec. @@ -119,7 +118,6 @@ config SND_SOC_INTEL_CHT_BSW_RT5645_MACH tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5645/5650 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5645 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with RT5645/5650 audio codec. @@ -131,7 +129,6 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_MAX98090 select SND_SOC_TS3A227E - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with MAX98090 audio codec it also can support TI jack chip as aux device. @@ -142,7 +139,6 @@ config SND_SOC_INTEL_BYT_CHT_DA7213_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with DA7212/7213 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_DA7213 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail & CherryTrail platforms with DA7212/7213 audio codec. @@ -153,7 +149,6 @@ config SND_SOC_INTEL_BYT_CHT_ES8316_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with ES8316 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ES8316 - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for Intel(R) Baytrail & Cherrytrail platforms with ES8316 audio codec. @@ -163,7 +158,6 @@ config SND_SOC_INTEL_BYT_CHT_ES8316_MACH config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail platform with no codec (MinnowBoard MAX, Up)" depends on X86_INTEL_LPSS && I2C && ACPI - select SND_SST_IPC_ACPI help This adds support for ASoC machine driver for the MinnowBoard Max or Up boards and provides access to I2S signals on the Low-Speed -- cgit v1.2.3 From def2c4284fd6b3402265ee050d769897e3331521 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:55 -0600 Subject: ASoC: Intel: document what Kconfig options do Document in comments what the options are supposed to mean, before clean-up in next patch. No functionality change here. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index 16374576cb6e..01b75df6b118 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -16,16 +16,27 @@ if SND_SOC_INTEL_SST_TOPLEVEL config SND_SST_IPC tristate + # This option controls the IPC core for HiFi2 platforms config SND_SST_IPC_PCI tristate select SND_SST_IPC + # This option controls the PCI-based IPC for HiFi2 platforms + # (Medfield, Merrifield). config SND_SST_IPC_ACPI tristate select SND_SST_IPC select SND_SOC_INTEL_SST select IOSF_MBI + # This option controls the ACPI-based IPC for HiFi2 platforms + # (Baytrail, Cherrytrail) + +config SND_SOC_INTEL_SST_ACPI + tristate + # This option controls ACPI-based probing on + # Haswell/Broadwell/Baytrail legacy and will be set + # when these platforms are enabled config SND_SOC_INTEL_SST tristate @@ -34,9 +45,9 @@ config SND_SOC_INTEL_SST config SND_SOC_INTEL_SST_FIRMWARE tristate select DW_DMAC_CORE - -config SND_SOC_INTEL_SST_ACPI - tristate + # This option controls firmware download on + # Haswell/Broadwell/Baytrail legacy and will be set + # when these platforms are enabled config SND_SOC_INTEL_HASWELL tristate "Intel ASoC SST driver for Haswell/Broadwell" -- cgit v1.2.3 From f3f2bb7a0ebf9d83229810f69a53fee2c0441b2c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:56 -0600 Subject: ASoC: Intel: Fix nested/unnecessary Kconfig dependencies This patch fixes a number of issues: 1. IOSF_MBI is only needed for byt-cr detection, which is only supported on Baytrail/Cherrytrail, move to HiFi2 config 2. SND_SOC_INTEL_SST should not select SND_SOC_INTEL_SST_ACPI, the latter config is only valid for Haswell/Baytrail legacy but not needed by Skylake 3. SND_SST_IPC_ACPI, used only by the atom/sst driver, should not select SND_SOC_INTEL_SST, none of the code under common/sst*.c is used This nesting of configs really makes no sense, it's easier to maintain if for each platform one can control what is strictly required. Compiled-tested with each of Haswell, Baytrail legacy, HiFi2, SKL cases selected independently. 0-day and explicit randconfig tests did not report additional issues and no functionality loss was observed in Intel tests on HIFI2 and SKYLAKE platforms Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index 01b75df6b118..ca0d6eb95d11 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -27,8 +27,6 @@ config SND_SST_IPC_PCI config SND_SST_IPC_ACPI tristate select SND_SST_IPC - select SND_SOC_INTEL_SST - select IOSF_MBI # This option controls the ACPI-based IPC for HiFi2 platforms # (Baytrail, Cherrytrail) @@ -40,7 +38,6 @@ config SND_SOC_INTEL_SST_ACPI config SND_SOC_INTEL_SST tristate - select SND_SOC_INTEL_SST_ACPI if ACPI config SND_SOC_INTEL_SST_FIRMWARE tristate @@ -54,6 +51,7 @@ config SND_SOC_INTEL_HASWELL depends on SND_DMA_SGBUF depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST + select SND_SOC_INTEL_SST_ACPI select SND_SOC_INTEL_SST_FIRMWARE select SND_SOC_ACPI_INTEL_MATCH help @@ -65,6 +63,7 @@ config SND_SOC_INTEL_BAYTRAIL tristate "Intel ASoC SST driver for Baytrail (legacy)" depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST + select SND_SOC_INTEL_SST_ACPI select SND_SOC_INTEL_SST_FIRMWARE select SND_SOC_ACPI_INTEL_MATCH help @@ -93,6 +92,7 @@ config SND_SST_ATOM_HIFI2_PLATFORM select SND_SST_IPC_ACPI select SND_SOC_COMPRESS select SND_SOC_ACPI_INTEL_MATCH + select IOSF_MBI help If you have a Intel Baytrail or Cherrytrail platform with an I2S codec, then enable this option by saying Y or m. This is a -- cgit v1.2.3 From 043f5a0b8d6e4b9cb373978ca1883fe16287abfd Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:57 -0600 Subject: ASoC: Intel: boards: align Kconfig dependencies for Haswell/Broadwell Make sure that the same I2C/I2C_DESIGNWARE_PLATFORM are selected. The latter might actually need to be moved to the SOC side of things, it really has no place in a machine driver dependency Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index e926f9747232..358f8f33adc4 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -40,7 +40,7 @@ config SND_SOC_INTEL_HASWELL_MACH config SND_SOC_INTEL_BDW_RT5677_MACH tristate "ASoC Audio driver for Intel Broadwell with RT5677 codec" - depends on X86_INTEL_LPSS && GPIOLIB && I2C + depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM && GPIOLIB select SND_SOC_RT5677 help This adds support for Intel Broadwell platform based boards with -- cgit v1.2.3 From 99644597d71333866070f0223bffa62c03f36587 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:58 -0600 Subject: ASoC: Intel: boards: align Kconfig configurations for HiFi2 Make sure all the configs are aligned Also add the missing dependencies on SOC_ACPI stuff used to fix DAI names based on HID and fix a couple of indentation issues Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 358f8f33adc4..08c482cc02f7 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -85,29 +85,32 @@ endif if SND_SST_ATOM_HIFI2_PLATFORM config SND_SOC_INTEL_BYTCR_RT5640_MACH - tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5640 codec" - depends on X86 && I2C && ACPI + tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5640 codec" + depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_ACPI select SND_SOC_RT5640 help - This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR - platforms with RT5640 audio codec. - Say Y or m if you have such a device. This is a recommended option. - If unsure select "N". + This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR + platforms with RT5640 audio codec. + Say Y or m if you have such a device. This is a recommended option. + If unsure select "N". config SND_SOC_INTEL_BYTCR_RT5651_MACH - tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5651 codec" - depends on X86 && I2C && ACPI + tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5651 codec" + depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_ACPI select SND_SOC_RT5651 help - This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR - platforms with RT5651 audio codec. - Say Y or m if you have such a device. This is a recommended option. - If unsure select "N". + This adds support for ASoC machine driver for Intel(R) Baytrail and Baytrail-CR + platforms with RT5651 audio codec. + Say Y or m if you have such a device. This is a recommended option. + If unsure select "N". config SND_SOC_INTEL_CHT_BSW_RT5672_MACH - tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5672 codec" + tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5672 codec" depends on X86_INTEL_LPSS && I2C && ACPI - select SND_SOC_RT5670 + select SND_SOC_ACPI + select SND_SOC_RT5670 help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell platforms with RT5672 audio codec. @@ -117,6 +120,7 @@ config SND_SOC_INTEL_CHT_BSW_RT5672_MACH config SND_SOC_INTEL_CHT_BSW_RT5645_MACH tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5645/5650 codec" depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_ACPI select SND_SOC_RT5645 help This adds support for ASoC machine driver for Intel(R) Cherrytrail & Braswell @@ -138,6 +142,7 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH config SND_SOC_INTEL_BYT_CHT_DA7213_MACH tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with DA7212/7213 codec" depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_ACPI select SND_SOC_DA7213 help This adds support for ASoC machine driver for Intel(R) Baytrail & CherryTrail -- cgit v1.2.3 From 969eaef7710400e39be13190bace40910555d426 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 4 Jan 2018 16:35:59 -0600 Subject: ASoC: Intel: boards: align/fix SKL/BXT/KBL Kconfigs No reason why SND_SOC_INTEL_SST should be set here. Also make sure same dependencies are used everywhere (only last one has SPI in addition). Replace X86_INTEL_LPSS by MFD_INTEL_LPSS since the former makes no sense for Skylake+ devices Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 08c482cc02f7..063611ad641c 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -178,7 +178,7 @@ if SND_SOC_INTEL_SKYLAKE config SND_SOC_INTEL_SKL_RT286_MACH tristate "ASoC Audio driver for SKL with RT286 I2S mode" - depends on X86 && ACPI && I2C + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT286 select SND_SOC_DMIC select SND_SOC_HDAC_HDMI @@ -190,7 +190,7 @@ config SND_SOC_INTEL_SKL_RT286_MACH config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH tristate "ASoC Audio driver for SKL with NAU88L25 and SSM4567 in I2S Mode" - depends on X86_INTEL_LPSS && I2C + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_NAU8825 select SND_SOC_SSM4567 select SND_SOC_DMIC @@ -203,7 +203,7 @@ config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH tristate "ASoC Audio driver for SKL with NAU88L25 and MAX98357A in I2S Mode" - depends on X86_INTEL_LPSS && I2C + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_NAU8825 select SND_SOC_MAX98357A select SND_SOC_DMIC @@ -216,7 +216,7 @@ config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH tristate "ASoC Audio driver for Broxton with DA7219 and MAX98357A in I2S Mode" - depends on X86 && ACPI && I2C + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_DA7219 select SND_SOC_MAX98357A select SND_SOC_DMIC @@ -230,7 +230,7 @@ config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH config SND_SOC_INTEL_BXT_RT298_MACH tristate "ASoC Audio driver for Broxton with RT298 I2S mode" - depends on X86 && ACPI && I2C + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT298 select SND_SOC_DMIC select SND_SOC_HDAC_HDMI @@ -243,8 +243,7 @@ config SND_SOC_INTEL_BXT_RT298_MACH config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH tristate "ASoC Audio driver for KBL with RT5663 and MAX98927 in I2S Mode" - depends on X86_INTEL_LPSS && I2C - select SND_SOC_INTEL_SST + depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5663 select SND_SOC_MAX98927 select SND_SOC_DMIC @@ -257,8 +256,8 @@ config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH tristate "ASoC Audio driver for KBL with RT5663, RT5514 and MAX98927 in I2S Mode" - depends on X86_INTEL_LPSS && I2C && SPI - select SND_SOC_INTEL_SST + depends on MFD_INTEL_LPSS && I2C && ACPI + depends on SPI select SND_SOC_RT5663 select SND_SOC_RT5514 select SND_SOC_RT5514_SPI -- cgit v1.2.3 From 89671061116f83bd8b8b01d1c9620a26b221725f Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 4 Jan 2018 16:36:00 -0600 Subject: ASoC: Intel: kconfig: add some comments for if symbols Help in finding matching "if" endings by commenting the "endif". Signed-off-by: Vinod Koul Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 063611ad641c..e1b6addcd13d 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -24,7 +24,7 @@ config SND_MFLD_MACHINE Say Y if you have such a device. If unsure select "N". -endif +endif ## SND_SST_ATOM_HIFI2_PLATFORM_PCI if SND_SOC_INTEL_HASWELL @@ -57,7 +57,7 @@ config SND_SOC_INTEL_BROADWELL_MACH Ultrabook platforms. Say Y or m if you have such a device. This is a recommended option. If unsure select "N". -endif +endif ## SND_SOC_INTEL_HASWELL if SND_SOC_INTEL_BAYTRAIL @@ -80,7 +80,7 @@ config SND_SOC_INTEL_BYT_RT5640_MACH with the RT5640 audio codec. This driver is deprecated, use SND_SOC_INTEL_BYTCR_RT5640_MACH instead for better functionality. -endif +endif ## SND_SOC_INTEL_BAYTRAIL if SND_SST_ATOM_HIFI2_PLATFORM @@ -172,7 +172,7 @@ config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH If unsure select "N". -endif +endif ## SND_SST_ATOM_HIFI2_PLATFORM if SND_SOC_INTEL_SKYLAKE @@ -268,6 +268,6 @@ config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH create an alsa sound card for RT5663 + RT5514 + MAX98927. Say Y or m if you have such a device. This is a recommended option. If unsure select "N". -endif +endif ## SND_SOC_INTEL_SKYLAKE endif ## SND_SOC_INTEL_MACH -- cgit v1.2.3 From 8fca15839ce5c473c57356abe36e166367e6c6ef Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 4 Jan 2018 16:36:01 -0600 Subject: ASoC: Intel: kconfig: drop boiler plate text from config items Drop "Intel ASoC SST driver for " platforms and "SOC Machine Audio driver for Intel" for machines.. Signed-off-by: Vinod Koul Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 10 +++++----- sound/soc/intel/boards/Kconfig | 42 +++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index ca0d6eb95d11..b0bd1938b71e 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -47,7 +47,7 @@ config SND_SOC_INTEL_SST_FIRMWARE # when these platforms are enabled config SND_SOC_INTEL_HASWELL - tristate "Intel ASoC SST driver for Haswell/Broadwell" + tristate "Haswell/Broadwell Platforms" depends on SND_DMA_SGBUF depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST @@ -60,7 +60,7 @@ config SND_SOC_INTEL_HASWELL typically used for Chromebooks. This is a recommended option. config SND_SOC_INTEL_BAYTRAIL - tristate "Intel ASoC SST driver for Baytrail (legacy)" + tristate "Baytrail (legacy) Platforms" depends on DMADEVICES && ACPI select SND_SOC_INTEL_SST select SND_SOC_INTEL_SST_ACPI @@ -73,7 +73,7 @@ config SND_SOC_INTEL_BAYTRAIL not recommended, use SND_SST_ATOM_HIFI2_PLATFORM instead. config SND_SST_ATOM_HIFI2_PLATFORM_PCI - tristate "Intel ASoC SST driver for PCI HiFi2 platforms (Medfield, Merrifield)" + tristate "PCI HiFi2 (Medfield, Merrifield) Platforms" depends on X86 && PCI select SND_SST_IPC_PCI select SND_SOC_COMPRESS @@ -87,7 +87,7 @@ config SND_SST_ATOM_HIFI2_PLATFORM_PCI is not in the standard firmware tree config SND_SST_ATOM_HIFI2_PLATFORM - tristate "Intel ASoC SST driver for ACPI HiFi2 platforms (Baytrail, Cherrytrail)" + tristate "ACPI HiFi2 (Baytrail, Cherrytrail) Platforms" depends on X86 && ACPI select SND_SST_IPC_ACPI select SND_SOC_COMPRESS @@ -99,7 +99,7 @@ config SND_SST_ATOM_HIFI2_PLATFORM recommended option config SND_SOC_INTEL_SKYLAKE - tristate "Intel ASoC SST driver for SKL/BXT/KBL/GLK/CNL" + tristate "SKL/BXT/KBL/GLK/CNL... Platforms" depends on PCI && ACPI select SND_HDA_EXT_CORE select SND_HDA_DSP_LOADER diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index e1b6addcd13d..12761d8fd8a5 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -15,7 +15,7 @@ if SND_SOC_INTEL_MACH if SND_SST_ATOM_HIFI2_PLATFORM_PCI config SND_MFLD_MACHINE - tristate "SOC Machine Audio driver for Intel Medfield MID platform" + tristate "Medfield (Intel MID)" depends on INTEL_SCU_IPC select SND_SOC_SN95031 help @@ -29,7 +29,7 @@ endif ## SND_SST_ATOM_HIFI2_PLATFORM_PCI if SND_SOC_INTEL_HASWELL config SND_SOC_INTEL_HASWELL_MACH - tristate "ASoC Audio DSP support for Intel Haswell Lynxpoint" + tristate "Haswell Lynxpoint" depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM select SND_SOC_RT5640 help @@ -39,7 +39,7 @@ config SND_SOC_INTEL_HASWELL_MACH If unsure select "N". config SND_SOC_INTEL_BDW_RT5677_MACH - tristate "ASoC Audio driver for Intel Broadwell with RT5677 codec" + tristate "Broadwell with RT5677 codec" depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM && GPIOLIB select SND_SOC_RT5677 help @@ -49,7 +49,7 @@ config SND_SOC_INTEL_BDW_RT5677_MACH If unsure select "N". config SND_SOC_INTEL_BROADWELL_MACH - tristate "ASoC Audio DSP support for Intel Broadwell Wildcatpoint" + tristate "Broadwell Wildcatpoint" depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM select SND_SOC_RT286 help @@ -62,7 +62,7 @@ endif ## SND_SOC_INTEL_HASWELL if SND_SOC_INTEL_BAYTRAIL config SND_SOC_INTEL_BYT_MAX98090_MACH - tristate "ASoC Audio driver for Intel Baytrail with MAX98090 codec" + tristate "Baytrail with MAX98090 codec" depends on X86_INTEL_LPSS && I2C select SND_SOC_MAX98090 help @@ -72,7 +72,7 @@ config SND_SOC_INTEL_BYT_MAX98090_MACH functionality. config SND_SOC_INTEL_BYT_RT5640_MACH - tristate "ASoC Audio driver for Intel Baytrail with RT5640 codec" + tristate "Baytrail with RT5640 codec" depends on X86_INTEL_LPSS && I2C select SND_SOC_RT5640 help @@ -85,7 +85,7 @@ endif ## SND_SOC_INTEL_BAYTRAIL if SND_SST_ATOM_HIFI2_PLATFORM config SND_SOC_INTEL_BYTCR_RT5640_MACH - tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5640 codec" + tristate "Baytrail and Baytrail-CR with RT5640 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ACPI select SND_SOC_RT5640 @@ -96,7 +96,7 @@ config SND_SOC_INTEL_BYTCR_RT5640_MACH If unsure select "N". config SND_SOC_INTEL_BYTCR_RT5651_MACH - tristate "ASoC Audio driver for Intel Baytrail and Baytrail-CR with RT5651 codec" + tristate "Baytrail and Baytrail-CR with RT5651 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ACPI select SND_SOC_RT5651 @@ -107,7 +107,7 @@ config SND_SOC_INTEL_BYTCR_RT5651_MACH If unsure select "N". config SND_SOC_INTEL_CHT_BSW_RT5672_MACH - tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5672 codec" + tristate "Cherrytrail & Braswell with RT5672 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ACPI select SND_SOC_RT5670 @@ -118,7 +118,7 @@ config SND_SOC_INTEL_CHT_BSW_RT5672_MACH If unsure select "N". config SND_SOC_INTEL_CHT_BSW_RT5645_MACH - tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with RT5645/5650 codec" + tristate "Cherrytrail & Braswell with RT5645/5650 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ACPI select SND_SOC_RT5645 @@ -129,7 +129,7 @@ config SND_SOC_INTEL_CHT_BSW_RT5645_MACH If unsure select "N". config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH - tristate "ASoC Audio driver for Intel Cherrytrail & Braswell with MAX98090 & TI codec" + tristate "Cherrytrail & Braswell with MAX98090 & TI codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_MAX98090 select SND_SOC_TS3A227E @@ -140,7 +140,7 @@ config SND_SOC_INTEL_CHT_BSW_MAX98090_TI_MACH If unsure select "N". config SND_SOC_INTEL_BYT_CHT_DA7213_MACH - tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with DA7212/7213 codec" + tristate "Baytrail & Cherrytrail with DA7212/7213 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ACPI select SND_SOC_DA7213 @@ -151,7 +151,7 @@ config SND_SOC_INTEL_BYT_CHT_DA7213_MACH If unsure select "N". config SND_SOC_INTEL_BYT_CHT_ES8316_MACH - tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail with ES8316 codec" + tristate "Baytrail & Cherrytrail with ES8316 codec" depends on X86_INTEL_LPSS && I2C && ACPI select SND_SOC_ES8316 help @@ -161,7 +161,7 @@ config SND_SOC_INTEL_BYT_CHT_ES8316_MACH If unsure select "N". config SND_SOC_INTEL_BYT_CHT_NOCODEC_MACH - tristate "ASoC Audio driver for Intel Baytrail & Cherrytrail platform with no codec (MinnowBoard MAX, Up)" + tristate "Baytrail & Cherrytrail platform with no codec (MinnowBoard MAX, Up)" depends on X86_INTEL_LPSS && I2C && ACPI help This adds support for ASoC machine driver for the MinnowBoard Max or @@ -177,7 +177,7 @@ endif ## SND_SST_ATOM_HIFI2_PLATFORM if SND_SOC_INTEL_SKYLAKE config SND_SOC_INTEL_SKL_RT286_MACH - tristate "ASoC Audio driver for SKL with RT286 I2S mode" + tristate "SKL with RT286 I2S mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT286 select SND_SOC_DMIC @@ -189,7 +189,7 @@ config SND_SOC_INTEL_SKL_RT286_MACH If unsure select "N". config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH - tristate "ASoC Audio driver for SKL with NAU88L25 and SSM4567 in I2S Mode" + tristate "SKL with NAU88L25 and SSM4567 in I2S Mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_NAU8825 select SND_SOC_SSM4567 @@ -202,7 +202,7 @@ config SND_SOC_INTEL_SKL_NAU88L25_SSM4567_MACH If unsure select "N". config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH - tristate "ASoC Audio driver for SKL with NAU88L25 and MAX98357A in I2S Mode" + tristate "SKL with NAU88L25 and MAX98357A in I2S Mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_NAU8825 select SND_SOC_MAX98357A @@ -215,7 +215,7 @@ config SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH If unsure select "N". config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH - tristate "ASoC Audio driver for Broxton with DA7219 and MAX98357A in I2S Mode" + tristate "Broxton with DA7219 and MAX98357A in I2S Mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_DA7219 select SND_SOC_MAX98357A @@ -229,7 +229,7 @@ config SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH If unsure select "N". config SND_SOC_INTEL_BXT_RT298_MACH - tristate "ASoC Audio driver for Broxton with RT298 I2S mode" + tristate "Broxton with RT298 I2S mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT298 select SND_SOC_DMIC @@ -242,7 +242,7 @@ config SND_SOC_INTEL_BXT_RT298_MACH If unsure select "N". config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH - tristate "ASoC Audio driver for KBL with RT5663 and MAX98927 in I2S Mode" + tristate "KBL with RT5663 and MAX98927 in I2S Mode" depends on MFD_INTEL_LPSS && I2C && ACPI select SND_SOC_RT5663 select SND_SOC_MAX98927 @@ -255,7 +255,7 @@ config SND_SOC_INTEL_KBL_RT5663_MAX98927_MACH If unsure select "N". config SND_SOC_INTEL_KBL_RT5663_RT5514_MAX98927_MACH - tristate "ASoC Audio driver for KBL with RT5663, RT5514 and MAX98927 in I2S Mode" + tristate "KBL with RT5663, RT5514 and MAX98927 in I2S Mode" depends on MFD_INTEL_LPSS && I2C && ACPI depends on SPI select SND_SOC_RT5663 -- cgit v1.2.3 From dba04eb76df982703fefc021a4d278347b6176a9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 8 Jan 2018 16:27:31 +0100 Subject: locking/Documentation: Remove stale crossrelease_fullstack parameter The cross-release lockdep functionality has been removed in: e966eaeeb623: ("locking/lockdep: Remove the cross-release locking checks") ... leaving the kernel parameter docs behind. The code handling the parameter does not exist so this is a plain documentation change. Signed-off-by: David Sterba Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: byungchul.park@lge.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20180108152731.27613-1-dsterba@suse.com Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index af7104aaffd9..a626465dd877 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -713,9 +713,6 @@ It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - crossrelease_fullstack - [KNL] Allow to record full stack trace in cross-release - cryptomgr.notests [KNL] Disable crypto self-tests -- cgit v1.2.3 From 262b6b30087246abf09d6275eb0c0dc421bcbe38 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 6 Jan 2018 18:41:14 +0100 Subject: x86/tboot: Unbreak tboot with PTI enabled This is another case similar to what EFI does: create a new set of page tables, map some code at a low address, and jump to it. PTI mistakes this low address for userspace and mistakenly marks it non-executable in an effort to make it unusable for userspace. Undo the poison to allow execution. Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig") Signed-off-by: Dave Hansen Signed-off-by: Andrea Arcangeli Signed-off-by: Thomas Gleixner Cc: Alan Cox Cc: Tim Chen Cc: Jon Masters Cc: Dave Hansen Cc: Andi Kleen Cc: Jeff Law Cc: Paolo Bonzini Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David" Cc: Nick Clifton Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180108102805.GK25546@redhat.com --- arch/x86/kernel/tboot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a4eb27918ceb..75869a4b6c41 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -127,6 +127,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, p4d = p4d_alloc(&tboot_mm, pgd, vaddr); if (!p4d) return -1; + pgd->pgd &= ~_PAGE_NX; pud = pud_alloc(&tboot_mm, p4d, vaddr); if (!pud) return -1; -- cgit v1.2.3 From 527187d28569e39c5d489d6306d3b79605cf85a6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jan 2018 17:27:19 +0100 Subject: locking/lockdep: Remove cross-release leftovers There's two cross-release leftover facilities: - the crossrelease_hist_*() irq-tracing callbacks (NOPs currently) - the complete_release_commit() callback (NOP as well) Remove them. Cc: David Sterba Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/completion.h | 1 - include/linux/irqflags.h | 4 ---- include/linux/lockdep.h | 2 -- kernel/sched/completion.c | 5 ----- 4 files changed, 12 deletions(-) diff --git a/include/linux/completion.h b/include/linux/completion.h index 94a59ba7d422..519e94915d18 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -32,7 +32,6 @@ struct completion { #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} -static inline void complete_release_commit(struct completion *x) {} #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 46cb57d5eb13..1b3996ff3f16 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -27,22 +27,18 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD); \ } while (0) # define trace_hardirq_exit() \ do { \ current->hardirq_context--; \ - crossrelease_hist_end(XHLOCK_HARD); \ } while (0) # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT); \ } while (0) # define lockdep_softirq_exit() \ do { \ current->softirq_context--; \ - crossrelease_hist_end(XHLOCK_SOFT); \ } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2e75dc34bff5..3251d9c0d313 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -475,8 +475,6 @@ enum xhlock_context_t { #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c) {} -static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 2ddaec40956f..0926aef10dad 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -34,11 +34,6 @@ void complete(struct completion *x) spin_lock_irqsave(&x->wait.lock, flags); - /* - * Perform commit of crossrelease here. - */ - complete_release_commit(x); - if (x->done != UINT_MAX) x->done++; __wake_up_locked(&x->wait, TASK_NORMAL, 1); -- cgit v1.2.3 From 8d56eff266f3e41a6c39926269c4c3f58f881a8e Mon Sep 17 00:00:00 2001 From: Jike Song Date: Tue, 9 Jan 2018 00:03:41 +0800 Subject: x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song Signed-off-by: Thomas Gleixner Cc: Alan Cox Cc: Andi Kleen Cc: Tom Lendacky Cc: Peter Zijlstra Cc: Tim Chen Cc: Jiri Koshina Cc: Dave Hansen Cc: Borislav Petkov Cc: Kees Cook Cc: Andi Lutomirski Cc: Linus Torvalds Cc: Greg KH Cc: David Woodhouse Cc: Paul Turner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com --- arch/x86/mm/pti.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 43d4a4a29037..ce38f165489b 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) * * Returns a pointer to a P4D on success, or NULL on failure. */ -static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) { pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (!new_p4d_page) return NULL; - if (pgd_none(*pgd)) { - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); - new_p4d_page = 0; - } - if (new_p4d_page) - free_page(new_p4d_page); + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); } BUILD_BUG_ON(pgd_large(*pgd) != 0); @@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) * * Returns a pointer to a PMD on success, or NULL on failure. */ -static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); p4d_t *p4d = pti_user_pagetable_walk_p4d(address); @@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pud_page) return NULL; - if (p4d_none(*p4d)) { - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); - new_pud_page = 0; - } - if (new_pud_page) - free_page(new_pud_page); + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); } pud = pud_offset(p4d, address); @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) if (!new_pmd_page) return NULL; - if (pud_none(*pud)) { - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); - new_pmd_page = 0; - } - if (new_pmd_page) - free_page(new_pmd_page); + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); } return pmd_offset(pud, address); @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!new_pte_page) return NULL; - if (pmd_none(*pmd)) { - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); - new_pte_page = 0; - } - if (new_pte_page) - free_page(new_pte_page); + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); } pte = pte_offset_kernel(pmd, address); -- cgit v1.2.3 From f3074a2825d45868e77c5926913986141394d8dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:40 +0100 Subject: dt-bindings/clocksource: Add Actions Semi S700 timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a compatible string for the Actions Semi S700 SoC timer. Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Rob Herring Cc: Thomas Gleixner Cc: devicetree@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- Documentation/devicetree/bindings/timer/actions,owl-timer.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt index e3c28da80cb2..977054f87563 100644 --- a/Documentation/devicetree/bindings/timer/actions,owl-timer.txt +++ b/Documentation/devicetree/bindings/timer/actions,owl-timer.txt @@ -2,6 +2,7 @@ Actions Semi Owl Timer Required properties: - compatible : "actions,s500-timer" for S500 + "actions,s700-timer" for S700 "actions,s900-timer" for S900 - reg : Offset and length of the register set for the device. - interrupts : Should contain the interrupts. -- cgit v1.2.3 From f58639ac29d9fea7c54afca3c8c03596eafbec20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:41 +0100 Subject: clocksource/drivers/owl: Adopt TIMER_OF_DECLARE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following commit: 1727339590fd ("clocksource/drivers: Rename CLOCKSOURCE_OF_DECLARE to TIMER_OF_DECLARE") deprecated CLOCKSOURCE_OF_DECLARE(), so adopt the new TIMER_OF_DECLARE() macro instead. Reported-by: Daniel Lezcano Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/owl-timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/owl-timer.c b/drivers/clocksource/owl-timer.c index c68630565079..9fb4333cfb8a 100644 --- a/drivers/clocksource/owl-timer.c +++ b/drivers/clocksource/owl-timer.c @@ -168,5 +168,5 @@ static int __init owl_timer_init(struct device_node *node) return 0; } -CLOCKSOURCE_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); -CLOCKSOURCE_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); -- cgit v1.2.3 From 0ec5477c3e0e86b3b2750d82355af422a468dc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Mon, 8 Jan 2018 14:28:42 +0100 Subject: clocksource/drivers/owl: Add the S700 timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Actions S700 has two 2Hz timers like S500, and four TIMx timers like S900. Signed-off-by: Andreas Färber Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/owl-timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/owl-timer.c b/drivers/clocksource/owl-timer.c index 9fb4333cfb8a..ea00a5e8f95d 100644 --- a/drivers/clocksource/owl-timer.c +++ b/drivers/clocksource/owl-timer.c @@ -169,4 +169,5 @@ static int __init owl_timer_init(struct device_node *node) return 0; } TIMER_OF_DECLARE(owl_s500, "actions,s500-timer", owl_timer_init); +TIMER_OF_DECLARE(owl_s700, "actions,s700-timer", owl_timer_init); TIMER_OF_DECLARE(owl_s900, "actions,s900-timer", owl_timer_init); -- cgit v1.2.3 From 542f824607a6968ea443208ccfef3b7daf503559 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Mon, 8 Jan 2018 14:28:43 +0100 Subject: clocksource/drivers/tcb_clksrc: Fix clock speed message The clock speed displayed at boot in an information message was 500 kHz too high compared to its real value. As the value is not used anywhere, there is no functional impact. Fix the rounding formula to display the correct value. Signed-off-by: Romain Izard Signed-off-by: Daniel Lezcano Acked-by: Nicolas Ferre Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-4-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/tcb_clksrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 9de47d4d2d9e..43f4d5c4d6fa 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -384,7 +384,7 @@ static int __init tcb_clksrc_init(void) printk(bootinfo, clksrc.name, CONFIG_ATMEL_TCB_CLKSRC_BLOCK, divided_rate / 1000000, - ((divided_rate + 500000) % 1000000) / 1000); + ((divided_rate % 1000000) + 500) / 1000); if (tc->tcb_config && tc->tcb_config->counter_width == 32) { /* use apropriate function to read 32 bit counter */ -- cgit v1.2.3 From 5bbf4ad945a9bb353e77ef71c753ca9bb1e3d978 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:44 +0100 Subject: clocksource/drivers/timer-of: Fix function names All the functions are not prefixed with 'timer_of_', fix the naming in order to have the code consistent. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-5-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index a31990408153..ad5565404e25 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -24,7 +24,7 @@ #include "timer-of.h" -static __init void timer_irq_exit(struct of_timer_irq *of_irq) +static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) { struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -34,8 +34,8 @@ static __init void timer_irq_exit(struct of_timer_irq *of_irq) free_irq(of_irq->irq, clkevt); } -static __init int timer_irq_init(struct device_node *np, - struct of_timer_irq *of_irq) +static __init int timer_of_irq_init(struct device_node *np, + struct of_timer_irq *of_irq) { int ret; struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -72,15 +72,15 @@ static __init int timer_irq_init(struct device_node *np, return 0; } -static __init void timer_clk_exit(struct of_timer_clk *of_clk) +static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) { of_clk->rate = 0; clk_disable_unprepare(of_clk->clk); clk_put(of_clk->clk); } -static __init int timer_clk_init(struct device_node *np, - struct of_timer_clk *of_clk) +static __init int timer_of_clk_init(struct device_node *np, + struct of_timer_clk *of_clk) { int ret; @@ -116,13 +116,13 @@ out_clk_put: goto out; } -static __init void timer_base_exit(struct of_timer_base *of_base) +static __init void timer_of_base_exit(struct of_timer_base *of_base) { iounmap(of_base->base); } -static __init int timer_base_init(struct device_node *np, - struct of_timer_base *of_base) +static __init int timer_of_base_init(struct device_node *np, + struct of_timer_base *of_base) { const char *name = of_base->name ? of_base->name : np->full_name; @@ -141,21 +141,21 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) int flags = 0; if (to->flags & TIMER_OF_BASE) { - ret = timer_base_init(np, &to->of_base); + ret = timer_of_base_init(np, &to->of_base); if (ret) goto out_fail; flags |= TIMER_OF_BASE; } if (to->flags & TIMER_OF_CLOCK) { - ret = timer_clk_init(np, &to->of_clk); + ret = timer_of_clk_init(np, &to->of_clk); if (ret) goto out_fail; flags |= TIMER_OF_CLOCK; } if (to->flags & TIMER_OF_IRQ) { - ret = timer_irq_init(np, &to->of_irq); + ret = timer_of_irq_init(np, &to->of_irq); if (ret) goto out_fail; flags |= TIMER_OF_IRQ; @@ -167,13 +167,13 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) out_fail: if (flags & TIMER_OF_IRQ) - timer_irq_exit(&to->of_irq); + timer_of_irq_exit(&to->of_irq); if (flags & TIMER_OF_CLOCK) - timer_clk_exit(&to->of_clk); + timer_of_clk_exit(&to->of_clk); if (flags & TIMER_OF_BASE) - timer_base_exit(&to->of_base); + timer_of_base_exit(&to->of_base); return ret; } @@ -187,11 +187,11 @@ out_fail: void __init timer_of_cleanup(struct timer_of *to) { if (to->flags & TIMER_OF_IRQ) - timer_irq_exit(&to->of_irq); + timer_of_irq_exit(&to->of_irq); if (to->flags & TIMER_OF_CLOCK) - timer_clk_exit(&to->of_clk); + timer_of_clk_exit(&to->of_clk); if (to->flags & TIMER_OF_BASE) - timer_base_exit(&to->of_base); + timer_of_base_exit(&to->of_base); } -- cgit v1.2.3 From cf7f46b9b12269d204b6acd0925704543adb6e05 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:45 +0100 Subject: clocksource/drivers/timer-of: Add kernel documentation The current code has no comments, neither any function descriptions. Fix this by adding function descriptions in kernel doc format. Signed-off-by: Daniel Lezcano Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-6-git-send-email-daniel.lezcano@linaro.org [ Spelling and style fixes. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index ad5565404e25..c1045b9a787c 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -24,6 +24,12 @@ #include "timer-of.h" +/** + * timer_of_irq_exit - Release the interrupt + * @of_irq: an of_timer_irq structure pointer + * + * Free the irq resource + */ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) { struct timer_of *to = container_of(of_irq, struct timer_of, of_irq); @@ -34,6 +40,22 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) free_irq(of_irq->irq, clkevt); } +/** + * timer_of_irq_init - Request the interrupt + * @np: a device tree node pointer + * @of_irq: an of_timer_irq structure pointer + * + * Get the interrupt number from the DT from its definition and + * request it. The interrupt is gotten by falling back the following way: + * + * - Get interrupt number by name + * - Get interrupt number by index + * + * When the interrupt is per CPU, 'request_percpu_irq()' is called, + * otherwise 'request_irq()' is used. + * + * Returns 0 on success, < 0 otherwise + */ static __init int timer_of_irq_init(struct device_node *np, struct of_timer_irq *of_irq) { @@ -72,6 +94,12 @@ static __init int timer_of_irq_init(struct device_node *np, return 0; } +/** + * timer_of_clk_exit - Release the clock resources + * @of_clk: a of_timer_clk structure pointer + * + * Disables and releases the refcount on the clk + */ static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) { of_clk->rate = 0; @@ -79,6 +107,15 @@ static __init void timer_of_clk_exit(struct of_timer_clk *of_clk) clk_put(of_clk->clk); } +/** + * timer_of_clk_init - Initialize the clock resources + * @np: a device tree node pointer + * @of_clk: a of_timer_clk structure pointer + * + * Get the clock by name or by index, enable it and get the rate + * + * Returns 0 on success, < 0 otherwise + */ static __init int timer_of_clk_init(struct device_node *np, struct of_timer_clk *of_clk) { -- cgit v1.2.3 From 286f30db8b713b17e048bb86df1e257fd8695498 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 8 Jan 2018 14:28:46 +0100 Subject: dt-bindings/clocksource: Add Spreadtrum SC9860 timer documentation This patch adds documentation of device tree bindings for the timers found on the Spreadtrum SC9860 platform. Signed-off-by: Baolin Wang Signed-off-by: Daniel Lezcano Acked-by: Rob Herring Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Rob Herring Cc: Thomas Gleixner Cc: devicetree@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-7-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- .../bindings/timer/spreadtrum,sprd-timer.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt diff --git a/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt new file mode 100644 index 000000000000..6d97e7d0f6e8 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/spreadtrum,sprd-timer.txt @@ -0,0 +1,20 @@ +Spreadtrum timers + +The Spreadtrum SC9860 platform provides 3 general-purpose timers. +These timers can support 32bit or 64bit counter, as well as supporting +period mode or one-shot mode, and they are can be wakeup source +during deep sleep. + +Required properties: +- compatible: should be "sprd,sc9860-timer" for SC9860 platform. +- reg: The register address of the timer device. +- interrupts: Should contain the interrupt for the timer device. +- clocks: The phandle to the source clock (usually a 32.768 KHz fixed clock). + +Example: + timer@40050000 { + compatible = "sprd,sc9860-timer"; + reg = <0 0x40050000 0 0x20>; + interrupts = ; + clocks = <&ext_32k>; + }; -- cgit v1.2.3 From 067bc9144766495650e621b79bd2bc199cee0769 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 8 Jan 2018 14:28:47 +0100 Subject: clocksource/drivers/spreadtrum: Add timer driver for the Spreadtrum SC9860 platform The Spreadtrum SC9860 platform will use the architected timers as local clock events, but we also need a broadcast timer device to wake up the CPUs when the CPUs are in sleep mode. The Spreadtrum timer can support 32-bit or 64-bit counters, as well as supporting period mode or one-shot mode. Signed-off-by: Baolin Wang Signed-off-by: Daniel Lezcano Acked-by: Philippe Ombredanne Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-8-git-send-email-daniel.lezcano@linaro.org [ Minor readability edits. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/Kconfig | 7 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/timer-sprd.c | 159 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 drivers/clocksource/timer-sprd.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index c729a88007d0..035981290d96 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -441,6 +441,13 @@ config MTK_TIMER help Support for Mediatek timer driver. +config SPRD_TIMER + bool "Spreadtrum timer driver" if COMPILE_TEST + depends on HAS_IOMEM + select TIMER_OF + help + Enables support for the Spreadtrum timer driver. + config SYS_SUPPORTS_SH_MTU2 bool diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 72711f1491e3..d6dec4489d66 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_CLKSRC_TI_32K) += timer-ti-32k.o obj-$(CONFIG_CLKSRC_NPS) += timer-nps.o obj-$(CONFIG_OXNAS_RPS_TIMER) += timer-oxnas-rps.o obj-$(CONFIG_OWL_TIMER) += owl-timer.o +obj-$(CONFIG_SPRD_TIMER) += timer-sprd.o obj-$(CONFIG_ARC_TIMERS) += arc_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o diff --git a/drivers/clocksource/timer-sprd.c b/drivers/clocksource/timer-sprd.c new file mode 100644 index 000000000000..ef9ebeafb3ed --- /dev/null +++ b/drivers/clocksource/timer-sprd.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Spreadtrum Communications Inc. + */ + +#include +#include + +#include "timer-of.h" + +#define TIMER_NAME "sprd_timer" + +#define TIMER_LOAD_LO 0x0 +#define TIMER_LOAD_HI 0x4 +#define TIMER_VALUE_LO 0x8 +#define TIMER_VALUE_HI 0xc + +#define TIMER_CTL 0x10 +#define TIMER_CTL_PERIOD_MODE BIT(0) +#define TIMER_CTL_ENABLE BIT(1) +#define TIMER_CTL_64BIT_WIDTH BIT(16) + +#define TIMER_INT 0x14 +#define TIMER_INT_EN BIT(0) +#define TIMER_INT_RAW_STS BIT(1) +#define TIMER_INT_MASK_STS BIT(2) +#define TIMER_INT_CLR BIT(3) + +#define TIMER_VALUE_SHDW_LO 0x18 +#define TIMER_VALUE_SHDW_HI 0x1c + +#define TIMER_VALUE_LO_MASK GENMASK(31, 0) + +static void sprd_timer_enable(void __iomem *base, u32 flag) +{ + u32 val = readl_relaxed(base + TIMER_CTL); + + val |= TIMER_CTL_ENABLE; + if (flag & TIMER_CTL_64BIT_WIDTH) + val |= TIMER_CTL_64BIT_WIDTH; + else + val &= ~TIMER_CTL_64BIT_WIDTH; + + if (flag & TIMER_CTL_PERIOD_MODE) + val |= TIMER_CTL_PERIOD_MODE; + else + val &= ~TIMER_CTL_PERIOD_MODE; + + writel_relaxed(val, base + TIMER_CTL); +} + +static void sprd_timer_disable(void __iomem *base) +{ + u32 val = readl_relaxed(base + TIMER_CTL); + + val &= ~TIMER_CTL_ENABLE; + writel_relaxed(val, base + TIMER_CTL); +} + +static void sprd_timer_update_counter(void __iomem *base, unsigned long cycles) +{ + writel_relaxed(cycles & TIMER_VALUE_LO_MASK, base + TIMER_LOAD_LO); + writel_relaxed(0, base + TIMER_LOAD_HI); +} + +static void sprd_timer_enable_interrupt(void __iomem *base) +{ + writel_relaxed(TIMER_INT_EN, base + TIMER_INT); +} + +static void sprd_timer_clear_interrupt(void __iomem *base) +{ + u32 val = readl_relaxed(base + TIMER_INT); + + val |= TIMER_INT_CLR; + writel_relaxed(val, base + TIMER_INT); +} + +static int sprd_timer_set_next_event(unsigned long cycles, + struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + sprd_timer_update_counter(timer_of_base(to), cycles); + sprd_timer_enable(timer_of_base(to), 0); + + return 0; +} + +static int sprd_timer_set_periodic(struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + sprd_timer_update_counter(timer_of_base(to), timer_of_period(to)); + sprd_timer_enable(timer_of_base(to), TIMER_CTL_PERIOD_MODE); + + return 0; +} + +static int sprd_timer_shutdown(struct clock_event_device *ce) +{ + struct timer_of *to = to_timer_of(ce); + + sprd_timer_disable(timer_of_base(to)); + return 0; +} + +static irqreturn_t sprd_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *ce = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(ce); + + sprd_timer_clear_interrupt(timer_of_base(to)); + + if (clockevent_state_oneshot(ce)) + sprd_timer_disable(timer_of_base(to)); + + ce->event_handler(ce); + return IRQ_HANDLED; +} + +static struct timer_of to = { + .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK, + + .clkevt = { + .name = TIMER_NAME, + .rating = 300, + .features = CLOCK_EVT_FEAT_DYNIRQ | CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_state_shutdown = sprd_timer_shutdown, + .set_state_periodic = sprd_timer_set_periodic, + .set_next_event = sprd_timer_set_next_event, + .cpumask = cpu_possible_mask, + }, + + .of_irq = { + .handler = sprd_timer_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL, + }, +}; + +static int __init sprd_timer_init(struct device_node *np) +{ + int ret; + + ret = timer_of_init(np, &to); + if (ret) + return ret; + + sprd_timer_enable_interrupt(timer_of_base(&to)); + clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), + 1, UINT_MAX); + + return 0; +} + +TIMER_OF_DECLARE(sc9860_timer, "sprd,sc9860-timer", sprd_timer_init); -- cgit v1.2.3 From 1c63c1c089a48e1b1821a73dc36a3997ced2f82d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:48 +0100 Subject: clocksource/drivers/timer-of: Store the device node pointer in 'struct timer_of' Under certain circumstances, some specific operations must be done with the device node pointer, which forces the timer code to propagate the pointer to the functions which need it. In order to consolidate the function signatures in the different drivers by using the timer-of structure, let's store it in the timer-of structure as a handy pointer when it is needed. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-9-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 3 +++ drivers/clocksource/timer-of.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index c1045b9a787c..25008d2cc346 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -200,6 +200,9 @@ int __init timer_of_init(struct device_node *np, struct timer_of *to) if (!to->clkevt.name) to->clkevt.name = np->name; + + to->np = np; + return ret; out_fail: diff --git a/drivers/clocksource/timer-of.h b/drivers/clocksource/timer-of.h index 3f708f1be43d..a5478f3e8589 100644 --- a/drivers/clocksource/timer-of.h +++ b/drivers/clocksource/timer-of.h @@ -33,6 +33,7 @@ struct of_timer_clk { struct timer_of { unsigned int flags; + struct device_node *np; struct clock_event_device clkevt; struct of_timer_base of_base; struct of_timer_irq of_irq; -- cgit v1.2.3 From 9aea417afa6bf52f15a5b194944b6a646d61af04 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:49 +0100 Subject: clocksource/drivers/timer-of: Don't request the resource by name When the driver does not specify a name for the resource, don't use of_io_request_and_map() but of_iomap(). That prevents resource name allocation conflicts on some platforms which have the same name as the node. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-10-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-of.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index 25008d2cc346..06ed88a2a8a0 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -161,11 +161,11 @@ static __init void timer_of_base_exit(struct of_timer_base *of_base) static __init int timer_of_base_init(struct device_node *np, struct of_timer_base *of_base) { - const char *name = of_base->name ? of_base->name : np->full_name; - - of_base->base = of_io_request_and_map(np, of_base->index, name); + of_base->base = of_base->name ? + of_io_request_and_map(np, of_base->index, of_base->name) : + of_iomap(np, of_base->index); if (IS_ERR(of_base->base)) { - pr_err("Failed to iomap (%s)\n", name); + pr_err("Failed to iomap (%s)\n", of_base->name); return PTR_ERR(of_base->base); } -- cgit v1.2.3 From e0aeca3d8cbaea514eb98df1149faa918f9ec42d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:50 +0100 Subject: clocksource/drivers/stm32: Fix kernel panic with multiple timers The current code hides a couple of bugs: - The global variable 'clock_event_ddata' is overwritten each time the init function is invoked. This is fixed with a kmemdup() instead of assigning the global variable. That prevents a memory corruption when several timers are defined in the DT. - The clockevent's event_handler is NULL if the time framework does not select the clockevent when registering it, this is fine but the init code generates in any case an interrupt leading to dereference this NULL pointer. The stm32 timer works with shadow registers, a mechanism to cache the registers. When a change is done in one buffered register, we need to artificially generate an event to force the timer to copy the content of the register to the shadowed register. The auto-reload register (ARR) is one of the shadowed register as well as the prescaler register (PSC), so in order to force the copy, we issue an event which in turn leads to an interrupt and the NULL dereference. This is fixed by inverting two lines where we clear the status register before enabling the update event interrupt. As this kernel crash is resulting from the combination of these two bugs, the fixes are grouped into a single patch. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1515418139-23276-11-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 8f2423789ba9..4bfeb9929ab2 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -106,6 +106,10 @@ static int __init stm32_clockevent_init(struct device_node *np) unsigned long rate, max_delta; int irq, ret, bits, prescaler = 1; + data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + clk = of_clk_get(np, 0); if (IS_ERR(clk)) { ret = PTR_ERR(clk); @@ -156,8 +160,8 @@ static int __init stm32_clockevent_init(struct device_node *np) writel_relaxed(prescaler - 1, data->base + TIM_PSC); writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); @@ -184,6 +188,7 @@ err_iomap: err_clk_enable: clk_put(clk); err_clk_get: + kfree(data); return ret; } -- cgit v1.2.3 From d04af4908a7283bc6ae0dd9475ccf807d094f8ba Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:51 +0100 Subject: clocksource/drivers/stm32: Convert the driver to timer_of primitives Convert the driver to use the timer_of() helpers. This allows the removal of a custom private structure, factors out and simplifies the code. [Daniel Lezcano]: Respin against the critical fix patch and massaged the changelog. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-12-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/Kconfig | 1 + drivers/clocksource/timer-stm32.c | 187 +++++++++++++++----------------------- 2 files changed, 74 insertions(+), 114 deletions(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 035981290d96..b3b4ed9b6874 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -269,6 +269,7 @@ config CLKSRC_STM32 bool "Clocksource for STM32 SoCs" if !ARCH_STM32 depends on OF && ARM && (ARCH_STM32 || COMPILE_TEST) select CLKSRC_MMIO + select TIMER_OF config CLKSRC_MPS2 bool "Clocksource for MPS2 SoCs" if COMPILE_TEST diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 4bfeb9929ab2..3e4ab0770293 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,9 @@ #include #include #include +#include + +#include "timer-of.h" #define TIM_CR1 0x00 #define TIM_DIER 0x0c @@ -34,162 +37,118 @@ #define TIM_EGR_UG BIT(0) -struct stm32_clock_event_ddata { - struct clock_event_device evtdev; - unsigned periodic_top; - void __iomem *base; -}; - -static int stm32_clock_event_shutdown(struct clock_event_device *evtdev) +static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); - void *base = data->base; + struct timer_of *to = to_timer_of(clkevt); + + writel_relaxed(0, timer_of_base(to) + TIM_CR1); - writel_relaxed(0, base + TIM_CR1); return 0; } -static int stm32_clock_event_set_periodic(struct clock_event_device *evtdev) +static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); - void *base = data->base; + struct timer_of *to = to_timer_of(clkevt); + + writel_relaxed(timer_of_period(to), timer_of_base(to) + TIM_ARR); + writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); - writel_relaxed(data->periodic_top, base + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, base + TIM_CR1); return 0; } static int stm32_clock_event_set_next_event(unsigned long evt, - struct clock_event_device *evtdev) + struct clock_event_device *clkevt) { - struct stm32_clock_event_ddata *data = - container_of(evtdev, struct stm32_clock_event_ddata, evtdev); + struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(evt, data->base + TIM_ARR); + writel_relaxed(evt, timer_of_base(to) + TIM_ARR); writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN, - data->base + TIM_CR1); + timer_of_base(to) + TIM_CR1); return 0; } static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) { - struct stm32_clock_event_ddata *data = dev_id; + struct clock_event_device *clkevt = (struct clock_event_device *)dev_id; + struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(0, data->base + TIM_SR); + writel_relaxed(0, timer_of_base(to) + TIM_SR); - data->evtdev.event_handler(&data->evtdev); + clkevt->event_handler(clkevt); return IRQ_HANDLED; } -static struct stm32_clock_event_ddata clock_event_ddata = { - .evtdev = { - .name = "stm32 clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, - .set_state_shutdown = stm32_clock_event_shutdown, - .set_state_periodic = stm32_clock_event_set_periodic, - .set_state_oneshot = stm32_clock_event_shutdown, - .tick_resume = stm32_clock_event_shutdown, - .set_next_event = stm32_clock_event_set_next_event, - .rating = 200, - }, -}; - -static int __init stm32_clockevent_init(struct device_node *np) +static void __init stm32_clockevent_init(struct timer_of *to) { - struct stm32_clock_event_ddata *data = &clock_event_ddata; - struct clk *clk; - struct reset_control *rstc; - unsigned long rate, max_delta; - int irq, ret, bits, prescaler = 1; - - data = kmemdup(&clock_event_ddata, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - clk = of_clk_get(np, 0); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - pr_err("failed to get clock for clockevent (%d)\n", ret); - goto err_clk_get; - } - - ret = clk_prepare_enable(clk); - if (ret) { - pr_err("failed to enable timer clock for clockevent (%d)\n", - ret); - goto err_clk_enable; - } - - rate = clk_get_rate(clk); - - rstc = of_reset_control_get(np, NULL); - if (!IS_ERR(rstc)) { - reset_control_assert(rstc); - reset_control_deassert(rstc); - } - - data->base = of_iomap(np, 0); - if (!data->base) { - ret = -ENXIO; - pr_err("failed to map registers for clockevent\n"); - goto err_iomap; - } + unsigned long max_delta; + int prescaler; - irq = irq_of_parse_and_map(np, 0); - if (!irq) { - ret = -EINVAL; - pr_err("%pOF: failed to get irq.\n", np); - goto err_get_irq; - } + to->clkevt.name = "stm32_clockevent"; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; + to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; + to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; + to->clkevt.set_state_oneshot = stm32_clock_event_shutdown; + to->clkevt.tick_resume = stm32_clock_event_shutdown; + to->clkevt.set_next_event = stm32_clock_event_set_next_event; /* Detect whether the timer is 16 or 32 bits */ - writel_relaxed(~0U, data->base + TIM_ARR); - max_delta = readl_relaxed(data->base + TIM_ARR); + writel_relaxed(~0U, timer_of_base(to) + TIM_ARR); + max_delta = readl_relaxed(timer_of_base(to) + TIM_ARR); if (max_delta == ~0U) { prescaler = 1; - bits = 32; + to->clkevt.rating = 250; } else { prescaler = 1024; - bits = 16; + to->clkevt.rating = 100; } - writel_relaxed(0, data->base + TIM_ARR); + writel_relaxed(0, timer_of_base(to) + TIM_ARR); - writel_relaxed(prescaler - 1, data->base + TIM_PSC); - writel_relaxed(TIM_EGR_UG, data->base + TIM_EGR); - writel_relaxed(0, data->base + TIM_SR); - writel_relaxed(TIM_DIER_UIE, data->base + TIM_DIER); + writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); + writel_relaxed(TIM_EGR_UG, timer_of_base(to) + TIM_EGR); + writel_relaxed(0, timer_of_base(to) + TIM_SR); + writel_relaxed(TIM_DIER_UIE, timer_of_base(to) + TIM_DIER); - data->periodic_top = DIV_ROUND_CLOSEST(rate, prescaler * HZ); + /* Adjust rate and period given the prescaler value */ + to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); + to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); - clockevents_config_and_register(&data->evtdev, - DIV_ROUND_CLOSEST(rate, prescaler), - 0x1, max_delta); - - ret = request_irq(irq, stm32_clock_event_handler, IRQF_TIMER, - "stm32 clockevent", data); - if (ret) { - pr_err("%pOF: failed to request irq.\n", np); - goto err_get_irq; - } + clockevents_config_and_register(&to->clkevt, + timer_of_rate(to), 0x1, max_delta); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - np, bits); + to->np, max_delta == UINT_MAX ? 32 : 16); +} - return ret; +static int __init stm32_timer_init(struct device_node *node) +{ + struct reset_control *rstc; + struct timer_of *to; + int ret; + + to = kzalloc(sizeof(*to), GFP_KERNEL); + if (!to) + return -ENOMEM; + + to->flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE; + to->of_irq.handler = stm32_clock_event_handler; + + ret = timer_of_init(node, to); + if (ret) + goto err; -err_get_irq: - iounmap(data->base); -err_iomap: - clk_disable_unprepare(clk); -err_clk_enable: - clk_put(clk); -err_clk_get: - kfree(data); + rstc = of_reset_control_get(node, NULL); + if (!IS_ERR(rstc)) { + reset_control_assert(rstc); + reset_control_deassert(rstc); + } + + stm32_clockevent_init(to); + return 0; +err: + kfree(to); return ret; } -TIMER_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init); +TIMER_OF_DECLARE(stm32, "st,stm32-timer", stm32_timer_init); -- cgit v1.2.3 From f2ed8ef1cea41c7e7e5d52199db9c822951ab101 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:52 +0100 Subject: clocksource/drivers/stm32: Use the node name as timer name As there are different timers on the stm32, use the node name for the timer name in order to give the indication of which timer the kernel is using. /proc/timer_list gives all the information with the right name, otherwise we end up digging in the kernel log and /proc/interrupt to do the connection between the used timer. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-13-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 3e4ab0770293..14b7a2b99933 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -85,7 +85,7 @@ static void __init stm32_clockevent_init(struct timer_of *to) unsigned long max_delta; int prescaler; - to->clkevt.name = "stm32_clockevent"; + to->clkevt.name = to->np->full_name; to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; -- cgit v1.2.3 From 70c62cf910aeba7cb79f4ebc7e6c8edbb37a77f6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:53 +0100 Subject: clocksource/drivers/stm32: Factor out the timer width sorting code In order to clarify and encapsulate the code for upcoming changes, move the timer width check into a function and add some documentation. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-14-git-send-email-daniel.lezcano@linaro.org [ Spelling fixes. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 14b7a2b99933..33c7c90412ba 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -80,9 +80,27 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/** + * stm32_timer_width - Sort out the timer width (32/16) + * @to: a pointer to a timer-of structure + * + * Write the 32-bit max value and read/return the result. If the timer + * is 32 bits wide, the result will be UINT_MAX, otherwise it will + * be truncated by the 16-bit register to USHRT_MAX. + * + * Returns UINT_MAX if the timer is 32 bits wide, USHRT_MAX if it is a + * 16 bits wide. + */ +static u32 __init stm32_timer_width(struct timer_of *to) +{ + writel_relaxed(UINT_MAX, timer_of_base(to) + TIM_ARR); + + return readl_relaxed(timer_of_base(to) + TIM_ARR); +} + static void __init stm32_clockevent_init(struct timer_of *to) { - unsigned long max_delta; + u32 width = 0; int prescaler; to->clkevt.name = to->np->full_name; @@ -93,10 +111,8 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->clkevt.tick_resume = stm32_clock_event_shutdown; to->clkevt.set_next_event = stm32_clock_event_set_next_event; - /* Detect whether the timer is 16 or 32 bits */ - writel_relaxed(~0U, timer_of_base(to) + TIM_ARR); - max_delta = readl_relaxed(timer_of_base(to) + TIM_ARR); - if (max_delta == ~0U) { + width = stm32_timer_width(to); + if (width == UINT_MAX) { prescaler = 1; to->clkevt.rating = 250; } else { @@ -115,10 +131,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); clockevents_config_and_register(&to->clkevt, - timer_of_rate(to), 0x1, max_delta); + timer_of_rate(to), 0x1, width); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - to->np, max_delta == UINT_MAX ? 32 : 16); + to->np, width == UINT_MAX ? 32 : 16); } static int __init stm32_timer_init(struct device_node *node) -- cgit v1.2.3 From 4744daa10dcd3a1470fbeba4945fbf44dcb1b0d1 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:54 +0100 Subject: clocksource/drivers/stm32: Compute a prescaler value with a targeted rate The prescaler value is arbitrarily set to 1024 without any regard to the timer frequency. For 32-bit timers, there is no need to set a prescaler value as they wrap in an acceptable interval and give the opportunity to have precise timers on this platform. However, for 16-bit timers a prescaler value is needed if we don't want to wrap too often per second which is inefficient and adds more and more error margin. With a targeted clock of 10MHz, the 16 bits are precise enough whatever the timer frequency is as we will compute the prescaler. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-15-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 33c7c90412ba..928ac281f937 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -37,6 +37,9 @@ #define TIM_EGR_UG BIT(0) +#define TIM_PSC_MAX USHRT_MAX +#define TIM_PSC_CLKRATE 10000 + static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); @@ -116,7 +119,14 @@ static void __init stm32_clockevent_init(struct timer_of *to) prescaler = 1; to->clkevt.rating = 250; } else { - prescaler = 1024; + prescaler = DIV_ROUND_CLOSEST(timer_of_rate(to), + TIM_PSC_CLKRATE); + /* + * The prescaler register is an u16, the variable + * can't be greater than TIM_PSC_MAX, let's cap it in + * this case. + */ + prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; to->clkevt.rating = 100; } writel_relaxed(0, timer_of_base(to) + TIM_ARR); -- cgit v1.2.3 From 8e82df381b676ae5f6c93ab4a75f56d8f61babc4 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:55 +0100 Subject: clocksource/drivers/stm32: Add oneshot mode The stm32 timer block is able to have a counter and a comparator. Instead of using the auto-reload register for periodic events, we switch to oneshot mode by using the comparator register. The timer is able to generate an interrupt when the counter overflows but we don't want that as this counter will be use as a clocksource in the next patches. So it is disabled by the UDIS bit of the control register. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-16-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 56 ++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 928ac281f937..882037f1d8d6 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -24,14 +24,18 @@ #define TIM_DIER 0x0c #define TIM_SR 0x10 #define TIM_EGR 0x14 +#define TIM_CNT 0x24 #define TIM_PSC 0x28 #define TIM_ARR 0x2c +#define TIM_CCR1 0x34 #define TIM_CR1_CEN BIT(0) +#define TIM_CR1_UDIS BIT(1) #define TIM_CR1_OPM BIT(3) #define TIM_CR1_ARPE BIT(7) #define TIM_DIER_UIE BIT(0) +#define TIM_DIER_CC1IE BIT(1) #define TIM_SR_UIF BIT(0) @@ -40,33 +44,57 @@ #define TIM_PSC_MAX USHRT_MAX #define TIM_PSC_CLKRATE 10000 +static void stm32_clock_event_disable(struct timer_of *to) +{ + writel_relaxed(0, timer_of_base(to) + TIM_DIER); +} + +static void stm32_clock_event_enable(struct timer_of *to) +{ + writel_relaxed(TIM_CR1_UDIS | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); +} + static int stm32_clock_event_shutdown(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(0, timer_of_base(to) + TIM_CR1); + stm32_clock_event_disable(to); return 0; } -static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) +static int stm32_clock_event_set_next_event(unsigned long evt, + struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); + unsigned long now, next; + + next = readl_relaxed(timer_of_base(to) + TIM_CNT) + evt; + writel_relaxed(next, timer_of_base(to) + TIM_CCR1); + now = readl_relaxed(timer_of_base(to) + TIM_CNT); + + if ((next - now) > evt) + return -ETIME; - writel_relaxed(timer_of_period(to), timer_of_base(to) + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); + writel_relaxed(TIM_DIER_CC1IE, timer_of_base(to) + TIM_DIER); return 0; } -static int stm32_clock_event_set_next_event(unsigned long evt, - struct clock_event_device *clkevt) +static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) +{ + struct timer_of *to = to_timer_of(clkevt); + + stm32_clock_event_enable(to); + + return stm32_clock_event_set_next_event(timer_of_period(to), clkevt); +} + +static int stm32_clock_event_set_oneshot(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - writel_relaxed(evt, timer_of_base(to) + TIM_ARR); - writel_relaxed(TIM_CR1_ARPE | TIM_CR1_OPM | TIM_CR1_CEN, - timer_of_base(to) + TIM_CR1); + stm32_clock_event_enable(to); return 0; } @@ -78,6 +106,11 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) writel_relaxed(0, timer_of_base(to) + TIM_SR); + if (clockevent_state_periodic(clkevt)) + stm32_clock_event_set_periodic(clkevt); + else + stm32_clock_event_shutdown(clkevt); + clkevt->event_handler(clkevt); return IRQ_HANDLED; @@ -108,9 +141,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) to->clkevt.name = to->np->full_name; to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; - to->clkevt.set_state_oneshot = stm32_clock_event_shutdown; + to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; to->clkevt.tick_resume = stm32_clock_event_shutdown; to->clkevt.set_next_event = stm32_clock_event_set_next_event; @@ -129,12 +163,10 @@ static void __init stm32_clockevent_init(struct timer_of *to) prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; to->clkevt.rating = 100; } - writel_relaxed(0, timer_of_base(to) + TIM_ARR); writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); writel_relaxed(TIM_EGR_UG, timer_of_base(to) + TIM_EGR); writel_relaxed(0, timer_of_base(to) + TIM_SR); - writel_relaxed(TIM_DIER_UIE, timer_of_base(to) + TIM_DIER); /* Adjust rate and period given the prescaler value */ to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); -- cgit v1.2.3 From 3c84e75b1e5d8406e12b533d44f54ad84d6e3bd6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:56 +0100 Subject: clocksource/drivers/stm32: Factor out more of the clockevent code In order to prepare the clocksource code, let's factor out the clockevent code, split the prescaler and timer width code into separate functions. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-17-git-send-email-daniel.lezcano@linaro.org [ Small edits. ] Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 107 +++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 882037f1d8d6..0d37f1a1994e 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -44,6 +44,42 @@ #define TIM_PSC_MAX USHRT_MAX #define TIM_PSC_CLKRATE 10000 +struct stm32_timer_private { + int bits; +}; + +/** + * stm32_timer_of_bits_set - set accessor helper + * @to: a timer_of structure pointer + * @bits: the number of bits (16 or 32) + * + * Accessor helper to set the number of bits in the timer-of private + * structure. + * + */ +static void stm32_timer_of_bits_set(struct timer_of *to, int bits) +{ + struct stm32_timer_private *pd = to->private_data; + + pd->bits = bits; +} + +/** + * stm32_timer_of_bits_get - get accessor helper + * @to: a timer_of structure pointer + * + * Accessor helper to get the number of bits in the timer-of private + * structure. + * + * Returns an integer corresponding to the number of bits. + */ +static int stm32_timer_of_bits_get(struct timer_of *to) +{ + struct stm32_timer_private *pd = to->private_data; + + return pd->bits; +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -124,35 +160,31 @@ static irqreturn_t stm32_clock_event_handler(int irq, void *dev_id) * is 32 bits wide, the result will be UINT_MAX, otherwise it will * be truncated by the 16-bit register to USHRT_MAX. * - * Returns UINT_MAX if the timer is 32 bits wide, USHRT_MAX if it is a - * 16 bits wide. */ -static u32 __init stm32_timer_width(struct timer_of *to) +static void __init stm32_timer_set_width(struct timer_of *to) { + u32 width; + writel_relaxed(UINT_MAX, timer_of_base(to) + TIM_ARR); - return readl_relaxed(timer_of_base(to) + TIM_ARR); + width = readl_relaxed(timer_of_base(to) + TIM_ARR); + + stm32_timer_of_bits_set(to, width == UINT_MAX ? 32 : 16); } -static void __init stm32_clockevent_init(struct timer_of *to) +/** + * stm32_timer_set_prescaler - Compute and set the prescaler register + * @to: a pointer to a timer-of structure + * + * Depending on the timer width, compute the prescaler to always + * target a 10MHz timer rate for 16 bits. 32-bit timers are + * considered precise and long enough to not use the prescaler. + */ +static void __init stm32_timer_set_prescaler(struct timer_of *to) { - u32 width = 0; - int prescaler; + int prescaler = 1; - to->clkevt.name = to->np->full_name; - to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; - to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; - to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; - to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; - to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; - to->clkevt.tick_resume = stm32_clock_event_shutdown; - to->clkevt.set_next_event = stm32_clock_event_set_next_event; - - width = stm32_timer_width(to); - if (width == UINT_MAX) { - prescaler = 1; - to->clkevt.rating = 250; - } else { + if (stm32_timer_of_bits_get(to) != 32) { prescaler = DIV_ROUND_CLOSEST(timer_of_rate(to), TIM_PSC_CLKRATE); /* @@ -161,7 +193,6 @@ static void __init stm32_clockevent_init(struct timer_of *to) * this case. */ prescaler = prescaler < TIM_PSC_MAX ? prescaler : TIM_PSC_MAX; - to->clkevt.rating = 100; } writel_relaxed(prescaler - 1, timer_of_base(to) + TIM_PSC); @@ -171,12 +202,26 @@ static void __init stm32_clockevent_init(struct timer_of *to) /* Adjust rate and period given the prescaler value */ to->of_clk.rate = DIV_ROUND_CLOSEST(to->of_clk.rate, prescaler); to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); +} + +static void __init stm32_clockevent_init(struct timer_of *to) +{ + u32 bits = stm32_timer_of_bits_get(to); - clockevents_config_and_register(&to->clkevt, - timer_of_rate(to), 0x1, width); + to->clkevt.name = to->np->full_name; + to->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + to->clkevt.set_state_shutdown = stm32_clock_event_shutdown; + to->clkevt.set_state_periodic = stm32_clock_event_set_periodic; + to->clkevt.set_state_oneshot = stm32_clock_event_set_oneshot; + to->clkevt.tick_resume = stm32_clock_event_shutdown; + to->clkevt.set_next_event = stm32_clock_event_set_next_event; + to->clkevt.rating = bits == 32 ? 250 : 100; + + clockevents_config_and_register(&to->clkevt, timer_of_rate(to), 0x1, + (1 << bits) - 1); pr_info("%pOF: STM32 clockevent driver initialized (%d bits)\n", - to->np, width == UINT_MAX ? 32 : 16); + to->np, bits); } static int __init stm32_timer_init(struct device_node *node) @@ -196,14 +241,26 @@ static int __init stm32_timer_init(struct device_node *node) if (ret) goto err; + to->private_data = kzalloc(sizeof(struct stm32_timer_private), + GFP_KERNEL); + if (!to->private_data) + goto deinit; + rstc = of_reset_control_get(node, NULL); if (!IS_ERR(rstc)) { reset_control_assert(rstc); reset_control_deassert(rstc); } + stm32_timer_set_width(to); + + stm32_timer_set_prescaler(to); + stm32_clockevent_init(to); return 0; + +deinit: + timer_of_cleanup(to); err: kfree(to); return ret; -- cgit v1.2.3 From f5ef02bd0e8cf53472cc358a542121366add0c9a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Mon, 8 Jan 2018 14:28:57 +0100 Subject: clocksource/drivers/stm32: Add clocksource functionality The scene is set for the clocksource functionality, let's add it for this driver. Tested-by: Benjamin Gaignard Signed-off-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-18-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 0d37f1a1994e..21b7492c963a 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "timer-of.h" @@ -80,6 +81,13 @@ static int stm32_timer_of_bits_get(struct timer_of *to) return pd->bits; } +static void __iomem *stm32_timer_cnt __read_mostly; + +static u64 notrace stm32_read_sched_clock(void) +{ + return readl_relaxed(stm32_timer_cnt); +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -204,6 +212,31 @@ static void __init stm32_timer_set_prescaler(struct timer_of *to) to->of_clk.period = DIV_ROUND_UP(to->of_clk.rate, HZ); } +static int __init stm32_clocksource_init(struct timer_of *to) +{ + u32 bits = stm32_timer_of_bits_get(to); + const char *name = to->np->full_name; + + /* + * This driver allows to register several timers and relies on + * the generic time framework to select the right one. + * However, nothing allows to do the same for the + * sched_clock. We are not interested in a sched_clock for the + * 16-bit timers but only for the 32-bit one, so if no 32-bit + * timer is registered yet, we select this 32-bit timer as a + * sched_clock. + */ + if (bits == 32 && !stm32_timer_cnt) { + stm32_timer_cnt = timer_of_base(to) + TIM_CNT; + sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); + pr_info("%s: STM32 sched_clock registered\n", name); + } + + return clocksource_mmio_init(timer_of_base(to) + TIM_CNT, name, + timer_of_rate(to), bits == 32 ? 250 : 100, + bits, clocksource_mmio_readl_up); +} + static void __init stm32_clockevent_init(struct timer_of *to) { u32 bits = stm32_timer_of_bits_get(to); @@ -256,6 +289,10 @@ static int __init stm32_timer_init(struct device_node *node) stm32_timer_set_prescaler(to); + ret = stm32_clocksource_init(to); + if (ret) + goto deinit; + stm32_clockevent_init(to); return 0; -- cgit v1.2.3 From 81abdbbffd69fecdac37fe1d2b44a21227ee23d4 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:58 +0100 Subject: clocksource/drivers/stm32: Add the timer delay callback Add the timer delay callback, that saves us ~90ms of boot time. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-19-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index 21b7492c963a..c7d1dae27067 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,13 @@ static u64 notrace stm32_read_sched_clock(void) return readl_relaxed(stm32_timer_cnt); } +static struct delay_timer stm32_timer_delay; + +static unsigned long stm32_read_delay(void) +{ + return readl_relaxed(stm32_timer_cnt); +} + static void stm32_clock_event_disable(struct timer_of *to) { writel_relaxed(0, timer_of_base(to) + TIM_DIER); @@ -230,6 +238,11 @@ static int __init stm32_clocksource_init(struct timer_of *to) stm32_timer_cnt = timer_of_base(to) + TIM_CNT; sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); pr_info("%s: STM32 sched_clock registered\n", name); + + stm32_timer_delay.read_current_timer = stm32_read_delay; + stm32_timer_delay.freq = timer_of_rate(to); + register_current_timer_delay(&stm32_timer_delay); + pr_info("%s: STM32 delay timer registered\n", name); } return clocksource_mmio_init(timer_of_base(to) + TIM_CNT, name, -- cgit v1.2.3 From 103bb56a2831bfc7f2d442da9e47f89f37d34952 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 8 Jan 2018 14:28:59 +0100 Subject: clocksource/drivers/stm32: Start the timer's counter sooner As we have a lot of timers on this platform, we can have potentially all the timers enabled in the DT, so we don't want to start the timer for every probe otherwise they will be running for nothing as only one will be used. Start the timer only when setting the mode or when the clocksource is enabled. Tested-by: Benjamin Gaignard Signed-off-by: Daniel Lezcano Acked-by: Benjamin Gaignard Cc: Alexandre Torgue Cc: Linus Torvalds Cc: Maxime Coquelin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515418139-23276-20-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-stm32.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index c7d1dae27067..e5cdc3af684c 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -101,7 +101,15 @@ static void stm32_clock_event_disable(struct timer_of *to) writel_relaxed(0, timer_of_base(to) + TIM_DIER); } -static void stm32_clock_event_enable(struct timer_of *to) +/** + * stm32_timer_start - Start the counter without event + * @to: a timer_of structure pointer + * + * Start the timer in order to have the counter reset and start + * incrementing but disable interrupt event when there is a counter + * overflow. By default, the counter direction is used as upcounter. + */ +static void stm32_timer_start(struct timer_of *to) { writel_relaxed(TIM_CR1_UDIS | TIM_CR1_CEN, timer_of_base(to) + TIM_CR1); } @@ -137,7 +145,7 @@ static int stm32_clock_event_set_periodic(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - stm32_clock_event_enable(to); + stm32_timer_start(to); return stm32_clock_event_set_next_event(timer_of_period(to), clkevt); } @@ -146,7 +154,7 @@ static int stm32_clock_event_set_oneshot(struct clock_event_device *clkevt) { struct timer_of *to = to_timer_of(clkevt); - stm32_clock_event_enable(to); + stm32_timer_start(to); return 0; } @@ -235,6 +243,13 @@ static int __init stm32_clocksource_init(struct timer_of *to) * sched_clock. */ if (bits == 32 && !stm32_timer_cnt) { + + /* + * Start immediately the counter as we will be using + * it right after. + */ + stm32_timer_start(to); + stm32_timer_cnt = timer_of_base(to) + TIM_CNT; sched_clock_register(stm32_read_sched_clock, bits, timer_of_rate(to)); pr_info("%s: STM32 sched_clock registered\n", name); -- cgit v1.2.3 From 6baf9e67c9c5d738188b8490893c7e079d3deb7e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 5 Jan 2018 05:19:56 +0100 Subject: irq/work: Improve the flag definitions IRQ_WORK_FLAGS is defined simply to 3UL. This is confusing as it says nothing about its purpose. Define IRQ_WORK_FLAGS as a bitwise OR of IRQ_WORK_PENDING and IRQ_WORK_BUSY and change its name to IRQ_WORK_CLAIMED. While we're at it: use the BIT() macro for all flags. Signed-off-by: Bartosz Golaszewski Signed-off-by: Frederic Weisbecker Reviewed-by: Andy Shevchenko Cc: Linus Torvalds Cc: Marc Zyngier Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515125996-21564-1-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/irq_work.h | 11 +++++++---- kernel/irq_work.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 0e81035b678f..b11fcdfd0770 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -13,10 +13,13 @@ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed */ -#define IRQ_WORK_PENDING 1UL -#define IRQ_WORK_BUSY 2UL -#define IRQ_WORK_FLAGS 3UL -#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ +#define IRQ_WORK_PENDING BIT(0) +#define IRQ_WORK_BUSY BIT(1) + +/* Doesn't want IPI, wait for tick: */ +#define IRQ_WORK_LAZY BIT(2) + +#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) struct irq_work { unsigned long flags; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 40e9d739c169..6b7cdf17ccf8 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -36,7 +36,7 @@ static bool irq_work_claim(struct irq_work *work) */ flags = work->flags & ~IRQ_WORK_PENDING; for (;;) { - nflags = flags | IRQ_WORK_FLAGS; + nflags = flags | IRQ_WORK_CLAIMED; oflags = cmpxchg(&work->flags, flags, nflags); if (oflags == flags) break; -- cgit v1.2.3 From 98b8e4e5c17bf87c1b18ed929472051dab39878c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 12:49:29 +0100 Subject: platform/x86: wmi: Call acpi_wmi_init() later Calling acpi_wmi_init() at the subsys_initcall() level causes ordering issues to appear on some systems and they are difficult to reproduce, because there is no guaranteed ordering between subsys_initcall() calls, so they may occur in different orders on different systems. In particular, commit 86d9f48534e8 (mm/slab: fix kmemcg cache creation delayed issue) exposed one of these issues where genl_init() and acpi_wmi_init() are both called at the same initcall level, but the former must run before the latter so as to avoid a NULL pointer dereference. For this reason, move the acpi_wmi_init() invocation to the initcall_sync level which should still be early enough for things to work correctly in the WMI land. Link: https://marc.info/?t=151274596700002&r=1&w=2 Reported-by: Jonathan McDowell Reported-by: Joonsoo Kim Tested-by: Jonathan McDowell Signed-off-by: Rafael J. Wysocki Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 791449a2370f..daa68acbc900 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void) class_unregister(&wmi_bus_class); } -subsys_initcall(acpi_wmi_init); +subsys_initcall_sync(acpi_wmi_init); module_exit(acpi_wmi_exit); -- cgit v1.2.3 From 9d0513d82f1a8fe17b41f113ac5922fa57dbaf5c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Dec 2017 14:25:23 +0200 Subject: x86/platform/intel-mid: Revert "Make 'bt_sfi_data' const" So one of the constification patches unearthed a type casting fragility of the underlying code: 276c87054751 ("x86/platform/intel-mid: Make 'bt_sfi_data' const") converted the struct to be const while it is also used as a temporary container for important data that is used to fill 'parent' and 'name' fields in struct platform_device_info. The compiler doesn't notice this due to an explicit type cast that loses the const - which fragility will be fixed separately. This type cast turned a seemingly trivial const propagation patch into a hard to debug data corruptor and crasher bug. Signed-off-by: Andy Shevchenko Cc: Bhumika Goyal Cc: Darren Hart Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: julia.lawall@lip6.fr Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20171228122523.21802-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_bt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c index dc036e511f48..5a0483e7bf66 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c @@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata) return 0; } -static const struct bt_sfi_data tng_bt_sfi_data __initdata = { +static struct bt_sfi_data tng_bt_sfi_data __initdata = { .setup = tng_bt_sfi_setup, }; -- cgit v1.2.3 From 414a2dc138838642d28938506e31ad461648b898 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Jan 2018 12:13:10 +0100 Subject: sched/isolation: Make CONFIG_CPU_ISOLATION=y depend on SMP or COMPILE_TEST On uniprocessor systems, critical and non-critical tasks cannot be isolated, as there is only a single CPU core. Hence enabling CPU isolation by default on such systems does not make much sense. Instead of changing the default for !SMP, fix this by making the feature depend on SMP, with an override for compile-testing. Note that its sole selector (NO_HZ_FULL) already depends on SMP. This decreases kernel size for a default uniprocessor kernel by ca. 1 KiB. Signed-off-by: Geert Uytterhoeven Acked-by: Nicolas Pitre Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 2c43838c99d9d23f ("sched/isolation: Enable CONFIG_CPU_ISOLATION=y by default") Link: http://lkml.kernel.org/r/1514891590-20782-1-git-send-email-geert@linux-m68k.org Signed-off-by: Ingo Molnar --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index 690a381adee0..c1221332e128 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -461,6 +461,7 @@ endmenu # "CPU/Task time and stats accounting" config CPU_ISOLATION bool "CPU isolation" + depends on SMP || COMPILE_TEST default y help Make sure that CPUs running critical tasks are not disturbed by -- cgit v1.2.3 From f328299e54a94998b31baf788d2b33d8122a4acb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Dec 2017 13:53:03 -0600 Subject: locking/refcounts: Remove stale comment from the ARCH_HAS_REFCOUNT Kconfig entry ARCH_HAS_REFCOUNT is no longer marked as broken ('if BROKEN'), so remove the stale comment regarding it being broken. Signed-off-by: Eric Biggers Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171229195303.17781-1-ebiggers3@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..ff4e9cd99854 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -55,7 +55,6 @@ config X86 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_PMEM_API if X86_64 - # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY -- cgit v1.2.3 From 7deea450eb912f269d999de62c8ab922d1461748 Mon Sep 17 00:00:00 2001 From: Sunil Challa Date: Thu, 4 Jan 2018 18:46:54 -0500 Subject: bnxt_en: Fix population of flow_type in bnxt_hwrm_cfa_flow_alloc() flow_type in HWRM_FLOW_ALLOC is not being populated correctly due to incorrect passing of pointer and size of l3_mask argument of is_wildcard(). Fixed this. Fixes: db1d36a27324 ("bnxt_en: add TC flower offload flow_alloc/free FW cmds") Signed-off-by: Sunil Challa Reviewed-by: Sathya Perla Reviewed-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 3d201d7324bd..d8fee26cd45e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -421,7 +421,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, } /* If all IP and L4 fields are wildcarded then this is an L2 flow */ - if (is_wildcard(&l3_mask, sizeof(l3_mask)) && + if (is_wildcard(l3_mask, sizeof(*l3_mask)) && is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) { flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2; } else { -- cgit v1.2.3 From 78f300049335ae81a5cc6b4b232481dc5e1f9d41 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Thu, 4 Jan 2018 18:46:55 -0500 Subject: bnxt_en: Fix the 'Invalid VF' id check in bnxt_vf_ndo_prep routine. In bnxt_vf_ndo_prep (which is called by bnxt_get_vf_config ndo), there is a check for "Invalid VF id". Currently, the check is done against max_vfs. However, the user doesn't always create max_vfs. So, the check should be against the created number of VFs. The number of bnxt_vf_info structures that are allocated in bnxt_alloc_vf_resources routine is the "number of requested VFs". So, if an "invalid VF id" falls between the requested number of VFs and the max_vfs, the driver will be dereferencing an invalid pointer. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Venkat Devvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 5ee18660bc33..c9617675f934 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); return -EINVAL; } - if (vf_id >= bp->pf.max_vfs) { + if (vf_id >= bp->pf.active_vfs) { netdev_err(bp->dev, "Invalid VF id %d\n", vf_id); return -EINVAL; } -- cgit v1.2.3 From b707fda2df4070785d0fa8a278aa13944c5f51f8 Mon Sep 17 00:00:00 2001 From: Eduardo Otubo Date: Fri, 5 Jan 2018 09:42:16 +0100 Subject: xen-netfront: enable device after manual module load When loading the module after unloading it, the network interface would not be enabled and thus wouldn't have a backend counterpart and unable to be used by the guest. The guest would face errors like: [root@guest ~]# ethtool -i eth0 Cannot get driver information: No such device [root@guest ~]# ifconfig eth0 eth0: error fetching interface information: Device not found This patch initializes the state of the netfront device whenever it is loaded manually, this state would communicate the netback to create its device and establish the connection between them. Signed-off-by: Eduardo Otubo Reviewed-by: Boris Ostrovsky Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c5a34671abda..9bd7ddeeb6a5 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netif_carrier_off(netdev); + xenbus_switch_state(dev, XenbusStateInitialising); return netdev; exit: -- cgit v1.2.3 From cc35c3d1edf7a8373a1a5daa80a912dec96a9cd5 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 5 Jan 2018 11:17:17 -0200 Subject: sctp: do not retransmit upon FragNeeded if PMTU discovery is disabled Currently, if PMTU discovery is disabled on a given transport, but the configured value is higher than the actual PMTU, it is likely that we will get some icmp Frag Needed. The issue is, if PMTU discovery is disabled, we won't update the information and will issue a retransmission immediately, which may very well trigger another ICMP, and another retransmission, leading to a loop. The fix is to simply not trigger immediate retransmissions if PMTU discovery is disabled on the given transport. Changes from v2: - updated stale comment, noticed by Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/input.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 621b5ca3fd1c..9320661cc41d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -399,20 +399,20 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, return; } - if (t->param_flags & SPP_PMTUD_ENABLE) { - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + if (!(t->param_flags & SPP_PMTUD_ENABLE)) + /* We can't allow retransmitting in such case, as the + * retransmission would be sized just as before, and thus we + * would get another icmp, and retransmit again. + */ + return; - /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); - } + /* Update transports view of the MTU */ + sctp_transport_update_pmtu(t, pmtu); - /* Retransmit with the new pmtu setting. - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation - * Needed will never be sent, but if a message was sent before - * PMTU discovery was disabled that was larger than the PMTU, it - * would not be fragmented, so it must be re-transmitted fragmented. - */ + /* Update association pmtu. */ + sctp_assoc_sync_pmtu(asoc); + + /* Retransmit with the new pmtu setting. */ sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } -- cgit v1.2.3 From b6c5734db07079c9410147b32407f2366d584e6c Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 5 Jan 2018 11:17:18 -0200 Subject: sctp: fix the handling of ICMP Frag Needed for too small MTUs syzbot reported a hang involving SCTP, on which it kept flooding dmesg with the message: [ 246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too low, using default minimum of 512 That happened because whenever SCTP hits an ICMP Frag Needed, it tries to adjust to the new MTU and triggers an immediate retransmission. But it didn't consider the fact that MTUs smaller than the SCTP minimum MTU allowed (512) would not cause the PMTU to change, and issued the retransmission anyway (thus leading to another ICMP Frag Needed, and so on). As IPv4 (ip_rt_min_pmtu=556) and IPv6 (IPV6_MIN_MTU=1280) minimum MTU are higher than that, sctp_transport_update_pmtu() is changed to re-fetch the PMTU that got set after our request, and with that, detect if there was an actual change or not. The fix, thus, skips the immediate retransmission if the received ICMP resulted in no change, in the hope that SCTP will select another path. Note: The value being used for the minimum MTU (512, SCTP_DEFAULT_MINSEGMENT) is not right and instead it should be (576, SCTP_MIN_PMTU), but such change belongs to another patch. Changes from v1: - do not disable PMTU discovery, in the light of commit 06ad391919b2 ("[SCTP] Don't disable PMTU discovery when mtu is small") and as suggested by Xin Long. - changed the way to break the rtx loop by detecting if the icmp resulted in a change or not Changes from v2: none See-also: https://lkml.org/lkml/2017/12/22/811 Reported-by: syzbot Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/input.c | 8 ++++++-- net/sctp/transport.c | 29 +++++++++++++++++++---------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2f8f93da5dc2..9a5ccf03a59b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *t); -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); diff --git a/net/sctp/input.c b/net/sctp/input.c index 9320661cc41d..141c9c466ec1 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -406,8 +406,12 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, */ return; - /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + /* Update transports view of the MTU. Return if no update was needed. + * If an update wasn't needed/possible, it also doesn't make sense to + * try to retransmit now. + */ + if (!sctp_transport_update_pmtu(t, pmtu)) + return; /* Update association pmtu. */ sctp_assoc_sync_pmtu(asoc); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1e5a22430cf5..47f82bd794d9 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); - /* Use default minimum segment size and disable - * pmtu discovery on this transport. - */ - t->pathmtu = SCTP_DEFAULT_MINSEGMENT; - } else { - t->pathmtu = pmtu; + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); + /* Use default minimum segment instead */ + pmtu = SCTP_DEFAULT_MINSEGMENT; } + pmtu = SCTP_TRUNC4(pmtu); if (dst) { dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); } - if (!dst) + if (!dst) { t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + dst = t->dst; + } + + if (dst) { + /* Re-fetch, as under layers may have a higher minimum size */ + pmtu = SCTP_TRUNC4(dst_mtu(dst)); + change = t->pathmtu != pmtu; + } + t->pathmtu = pmtu; + + return change; } /* Caches the dst entry and source address for a transport's destination -- cgit v1.2.3 From 46cd75036415d94e9cf451e6606a099945d54cc6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 5 Jan 2018 11:23:45 -0600 Subject: phylink: mark expected switch fall-throughs in phylink_mii_ioctl In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 1463447 ("Missing break in switch") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 150cd95a6e1e..249ce5cbea22 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1296,6 +1296,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = pl->phydev->mdio.addr; + /* fall through */ case SIOCGMIIREG: ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num); @@ -1318,6 +1319,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd) switch (cmd) { case SIOCGMIIPHY: mii->phy_id = 0; + /* fall through */ case SIOCGMIIREG: ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num); -- cgit v1.2.3 From 56c0290202ab94a2f2780c449395d4ae8495fab4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Jan 2018 09:00:09 +0100 Subject: mdio-sun4i: Fix a memory leak If the probing of the regulator is deferred, the memory allocated by 'mdiobus_alloc_size()' will be leaking. It should be freed before the next call to 'sun4i_mdio_probe()' which will reallocate it. Fixes: 4bdcb1dd9feb ("net: Add MDIO bus driver for the Allwinner EMAC") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/phy/mdio-sun4i.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 135296508a7e..6425ce04d3f9 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev) data->regulator = devm_regulator_get(&pdev->dev, "phy"); if (IS_ERR(data->regulator)) { - if (PTR_ERR(data->regulator) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(data->regulator) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err_out_free_mdiobus; + } dev_info(&pdev->dev, "no regulator found\n"); data->regulator = NULL; -- cgit v1.2.3 From 50f3d740d376f664f6accc7e86c9afd8f1c7e1e4 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 7 Jan 2018 00:26:47 +0300 Subject: sh_eth: fix TXALCR1 offsets The TXALCR1 offsets are incorrect in the register offset tables, most probably due to copy&paste error. Luckily, the driver never uses this register. :-) Fixes: 4a55530f38e4 ("net: sh_eth: modify the definitions of register") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f21c1db91c3f..b9e2846589f8 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, @@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { [FWNLCR0] = 0x0090, [FWALCR0] = 0x0094, [TXNLCR1] = 0x00a0, - [TXALCR1] = 0x00a0, + [TXALCR1] = 0x00a4, [RXNLCR1] = 0x00a8, [RXALCR1] = 0x00ac, [FWNLCR1] = 0x00b0, -- cgit v1.2.3 From b221fc130c49c50f4c2250d22e873420765a9fa2 Mon Sep 17 00:00:00 2001 From: Rui Hua Date: Mon, 8 Jan 2018 12:21:18 -0800 Subject: bcache: ret IOERR when read meets metadata error The read request might meet error when searching the btree, but the error was not handled in cache_lookup(), and this kind of metadata failure will not go into cached_dev_read_error(), finally, the upper layer will receive bi_status=0. In this patch we judge the metadata error by the return value of bch_btree_map_keys(), there are two potential paths give rise to the error: 1. Because the btree is not totally cached in memery, we maybe get error when read btree node from cache device (see bch_btree_node_get()), the likely errno is -EIO, -ENOMEM 2. When read miss happens, bch_btree_insert_check_key() will be called to insert a "replace_key" to btree(see cached_dev_cache_miss(), just for doing preparatory work before insert the missed data to cache device), a failure can also happen in this situation, the likely errno is -ENOMEM bch_btree_map_keys() will return MAP_DONE in normal scenario, but we will get either -EIO or -ENOMEM in above two cases. if this happened, we should NOT recover data from backing device (when cache device is dirty) because we don't know whether bkeys the read request covered are all clean. And after that happened, s->iop.status is still its initially value(0) before we submit s->bio.bio, we set it to BLK_STS_IOERR, so it can go into cached_dev_read_error(), and finally it can be passed to upper layer, or recovered by reread from backing device. [edit by mlyle: patch formatting, word-wrap, comment spelling, commit log format] Signed-off-by: Hua Rui Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index c493fb947dc9..52b4ce24f9e2 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -576,6 +576,7 @@ static void cache_lookup(struct closure *cl) { struct search *s = container_of(cl, struct search, iop.cl); struct bio *bio = &s->bio.bio; + struct cached_dev *dc; int ret; bch_btree_op_init(&s->op, -1); @@ -588,6 +589,27 @@ static void cache_lookup(struct closure *cl) return; } + /* + * We might meet err when searching the btree, If that happens, we will + * get negative ret, in this scenario we should not recover data from + * backing device (when cache device is dirty) because we don't know + * whether bkeys the read request covered are all clean. + * + * And after that happened, s->iop.status is still its initial value + * before we submit s->bio.bio + */ + if (ret < 0) { + BUG_ON(ret == -EINTR); + if (s->d && s->d->c && + !UUID_FLASH_ONLY(&s->d->c->uuids[s->d->id])) { + dc = container_of(s->d, struct cached_dev, disk); + if (dc && atomic_read(&dc->has_dirty)) + s->recoverable = false; + } + if (!s->iop.status) + s->iop.status = BLK_STS_IOERR; + } + closure_return(cl); } -- cgit v1.2.3 From 8d29c4426b9f8afaccf28de414fde8a722b35fdf Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Jan 2018 12:21:19 -0800 Subject: bcache: stop writeback thread after detaching Currently, when a cached device detaching from cache, writeback thread is not stopped, and writeback_rate_update work is not canceled. For example, after the following command: echo 1 >/sys/block/sdb/bcache/detach you can still see the writeback thread. Then you attach the device to the cache again, bcache will create another writeback thread, for example, after below command: echo ba0fb5cd-658a-4533-9806-6ce166d883b9 > /sys/block/sdb/bcache/attach then you will see 2 writeback threads. This patch stops writeback thread and cancels writeback_rate_update work when cached device detaching from cache. Compare with patch v1, this v2 patch moves code down into the register lock for safety in case of any future changes as Coly and Mike suggested. [edit by mlyle: commit log spelling/formatting] Signed-off-by: Tang Junhui Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8399fe0651f2..553e841e897d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -906,6 +906,12 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); + cancel_delayed_work_sync(&dc->writeback_rate_update); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) { + kthread_stop(dc->writeback_thread); + dc->writeback_thread = NULL; + } + memset(&dc->sb.set_uuid, 0, 16); SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); -- cgit v1.2.3 From 9d13411784e27227162857df25ab6817a1db2a73 Mon Sep 17 00:00:00 2001 From: Vasyl Gomonovych Date: Mon, 8 Jan 2018 12:21:20 -0800 Subject: bcache: Use PTR_ERR_OR_ZERO() Fix ptr_ret.cocci warnings: drivers/md/bcache/btree.c:1800:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Vasyl Gomonovych Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/btree.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index ebb1874218e7..9e30713dbdb8 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1804,10 +1804,7 @@ static int bch_gc_thread(void *arg) int bch_gc_thread_start(struct cache_set *c) { c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc"); - if (IS_ERR(c->gc_thread)) - return PTR_ERR(c->gc_thread); - - return 0; + return PTR_ERR_OR_ZERO(c->gc_thread); } /* Initial partial gc */ -- cgit v1.2.3 From 4eca1cb28d8b0574ca4f1f48e9331c5f852d43b9 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Jan 2018 12:21:21 -0800 Subject: bcache: segregate flash only volume write streams In such scenario that there are some flash only volumes , and some cached devices, when many tasks request these devices in writeback mode, the write IOs may fall to the same bucket as bellow: | cached data | flash data | cached data | cached data| flash data| then after writeback of these cached devices, the bucket would be like bellow bucket: | free | flash data | free | free | flash data | So, there are many free space in this bucket, but since data of flash only volumes still exists, so this bucket cannot be reclaimable, which would cause waste of bucket space. In this patch, we segregate flash only volume write streams from cached devices, so data from flash only volumes and cached devices can store in different buckets. Compare to v1 patch, this patch do not add a additionally open bucket list, and it is try best to segregate flash only volume write streams from cached devices, sectors of flash only volumes may still be mixed with dirty sectors of cached device, but the number is very small. [mlyle: fixed commit log formatting, permissions, line endings] Signed-off-by: Tang Junhui Reviewed-by: Michael Lyle Signed-off-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/alloc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a0cc1bc6d884..6cc6c0f9c3a9 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -525,15 +525,21 @@ struct open_bucket { /* * We keep multiple buckets open for writes, and try to segregate different - * write streams for better cache utilization: first we look for a bucket where - * the last write to it was sequential with the current write, and failing that - * we look for a bucket that was last used by the same task. + * write streams for better cache utilization: first we try to segregate flash + * only volume write streams from cached devices, secondly we look for a bucket + * where the last write to it was sequential with the current write, and + * failing that we look for a bucket that was last used by the same task. * * The ideas is if you've got multiple tasks pulling data into the cache at the * same time, you'll get better cache utilization if you try to segregate their * data and preserve locality. * - * For example, say you've starting Firefox at the same time you're copying a + * For example, dirty sectors of flash only volume is not reclaimable, if their + * dirty sectors mixed with dirty sectors of cached device, such buckets will + * be marked as dirty and won't be reclaimed, though the dirty data of cached + * device have been written back to backend device. + * + * And say you've starting Firefox at the same time you're copying a * bunch of files. Firefox will likely end up being fairly hot and stay in the * cache awhile, but the data you copied might not be; if you wrote all that * data to the same buckets it'd get invalidated at the same time. @@ -550,7 +556,10 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c, struct open_bucket *ret, *ret_task = NULL; list_for_each_entry_reverse(ret, &c->data_buckets, list) - if (!bkey_cmp(&ret->key, search)) + if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) != + UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)])) + continue; + else if (!bkey_cmp(&ret->key, search)) goto found; else if (ret->last_write_point == write_point) ret_task = ret; -- cgit v1.2.3 From 539d39eb27083405b82b9e604e88af01a9a46c63 Mon Sep 17 00:00:00 2001 From: Tang Junhui Date: Mon, 8 Jan 2018 12:21:22 -0800 Subject: bcache: fix wrong return value in bch_debug_init() in bch_debug_init(), ret is always 0, and the return value is useless, change it to return 0 if be success after calling debugfs_create_dir(), else return a non-zero value. Signed-off-by: Tang Junhui Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 6 --- drivers/md/bcache/debug.c | 5 +- drivers/md/bcache/writeback.c | 120 +++++++++++++++++++++++++++++------------- drivers/md/bcache/writeback.h | 3 ++ 4 files changed, 87 insertions(+), 47 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 843877e017e1..1784e50eb857 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -323,12 +323,6 @@ struct cached_dev { struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; - /* - * Internal to the writeback code, so read_dirty() can keep track of - * where it's at. - */ - sector_t last_read; - /* Limit number of writeback bios in flight */ struct semaphore in_flight; struct task_struct *writeback_thread; diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 879ab21074c6..af89408befe8 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -251,8 +251,7 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { - int ret = 0; - debug = debugfs_create_dir("bcache", NULL); - return ret; + + return IS_ERR_OR_NULL(debug); } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 1ac2af6128b1..479095987f22 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -237,7 +237,9 @@ static void read_dirty_submit(struct closure *cl) static void read_dirty(struct cached_dev *dc) { unsigned delay = 0; - struct keybuf_key *w; + struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w; + size_t size; + int nk, i; struct dirty_io *io; struct closure cl; @@ -248,45 +250,87 @@ static void read_dirty(struct cached_dev *dc) * mempools. */ - while (!kthread_should_stop()) { - - w = bch_keybuf_next(&dc->writeback_keys); - if (!w) - break; - - BUG_ON(ptr_stale(dc->disk.c, &w->key, 0)); - - if (KEY_START(&w->key) != dc->last_read || - jiffies_to_msecs(delay) > 50) - while (!kthread_should_stop() && delay) - delay = schedule_timeout_interruptible(delay); - - dc->last_read = KEY_OFFSET(&w->key); - - io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec) - * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), - GFP_KERNEL); - if (!io) - goto err; - - w->private = io; - io->dc = dc; - - dirty_init(w); - bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); - io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); - bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); - io->bio.bi_end_io = read_dirty_endio; - - if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) - goto err_free; - - trace_bcache_writeback(&w->key); + next = bch_keybuf_next(&dc->writeback_keys); + + while (!kthread_should_stop() && next) { + size = 0; + nk = 0; + + do { + BUG_ON(ptr_stale(dc->disk.c, &next->key, 0)); + + /* + * Don't combine too many operations, even if they + * are all small. + */ + if (nk >= MAX_WRITEBACKS_IN_PASS) + break; + + /* + * If the current operation is very large, don't + * further combine operations. + */ + if (size >= MAX_WRITESIZE_IN_PASS) + break; + + /* + * Operations are only eligible to be combined + * if they are contiguous. + * + * TODO: add a heuristic willing to fire a + * certain amount of non-contiguous IO per pass, + * so that we can benefit from backing device + * command queueing. + */ + if ((nk != 0) && bkey_cmp(&keys[nk-1]->key, + &START_KEY(&next->key))) + break; + + size += KEY_SIZE(&next->key); + keys[nk++] = next; + } while ((next = bch_keybuf_next(&dc->writeback_keys))); + + /* Now we have gathered a set of 1..5 keys to write back. */ + for (i = 0; i < nk; i++) { + w = keys[i]; + + io = kzalloc(sizeof(struct dirty_io) + + sizeof(struct bio_vec) * + DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), + GFP_KERNEL); + if (!io) + goto err; + + w->private = io; + io->dc = dc; + + dirty_init(w); + bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); + io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); + bio_set_dev(&io->bio, + PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); + io->bio.bi_end_io = read_dirty_endio; + + if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL)) + goto err_free; + + trace_bcache_writeback(&w->key); + + down(&dc->in_flight); + + /* We've acquired a semaphore for the maximum + * simultaneous number of writebacks; from here + * everything happens asynchronously. + */ + closure_call(&io->cl, read_dirty_submit, NULL, &cl); + } - down(&dc->in_flight); - closure_call(&io->cl, read_dirty_submit, NULL, &cl); + delay = writeback_delay(dc, size); - delay = writeback_delay(dc, KEY_SIZE(&w->key)); + while (!kthread_should_stop() && delay) { + schedule_timeout_interruptible(delay); + delay = writeback_delay(dc, 0); + } } if (0) { diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index a9e3ffb4b03c..6d26927267f8 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -5,6 +5,9 @@ #define CUTOFF_WRITEBACK 40 #define CUTOFF_WRITEBACK_SYNC 70 +#define MAX_WRITEBACKS_IN_PASS 5 +#define MAX_WRITESIZE_IN_PASS 5000 /* *512b */ + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; -- cgit v1.2.3 From 6e6ccc67b9c7a682d717feedb887cb630a984317 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Mon, 8 Jan 2018 12:21:23 -0800 Subject: bcache: writeback: properly order backing device IO Writeback keys are presently iterated and dispatched for writeback in order of the logical block address on the backing device. Multiple may be, in parallel, read from the cache device and then written back (especially when there are contiguous I/O). However-- there was no guarantee with the existing code that the writes would be issued in LBA order, as the reads from the cache device are often re-ordered. In turn, when writing back quickly, the backing disk often has to seek backwards-- this slows writeback and increases utilization. This patch introduces an ordering mechanism that guarantees that the original order of issue is maintained for the write portion of the I/O. Performance for writeback is significantly improved when there are multiple contiguous keys or high writeback rates. Signed-off-by: Michael Lyle Reviewed-by: Tang Junhui Tested-by: Tang Junhui Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 8 ++++++++ drivers/md/bcache/writeback.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1784e50eb857..3be0fcc19b1f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -330,6 +330,14 @@ struct cached_dev { struct keybuf writeback_keys; + /* + * Order the write-half of writeback operations strongly in dispatch + * order. (Maintain LBA order; don't allow reads completing out of + * order to re-order the writes...) + */ + struct closure_waitlist writeback_ordering_wait; + atomic_t writeback_sequence_next; + /* For tracking sequential IO */ #define RECENT_IO_BITS 7 #define RECENT_IO (1 << RECENT_IO_BITS) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 479095987f22..6e1d2fde43df 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -116,6 +116,7 @@ static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) struct dirty_io { struct closure cl; struct cached_dev *dc; + uint16_t sequence; struct bio bio; }; @@ -194,6 +195,27 @@ static void write_dirty(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; + struct cached_dev *dc = io->dc; + + uint16_t next_sequence; + + if (atomic_read(&dc->writeback_sequence_next) != io->sequence) { + /* Not our turn to write; wait for a write to complete */ + closure_wait(&dc->writeback_ordering_wait, cl); + + if (atomic_read(&dc->writeback_sequence_next) == io->sequence) { + /* + * Edge case-- it happened in indeterminate order + * relative to when we were added to wait list.. + */ + closure_wake_up(&dc->writeback_ordering_wait); + } + + continue_at(cl, write_dirty, io->dc->writeback_write_wq); + return; + } + + next_sequence = io->sequence + 1; /* * IO errors are signalled using the dirty bit on the key. @@ -211,6 +233,9 @@ static void write_dirty(struct closure *cl) closure_bio_submit(&io->bio, cl); } + atomic_set(&dc->writeback_sequence_next, next_sequence); + closure_wake_up(&dc->writeback_ordering_wait); + continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq); } @@ -242,7 +267,10 @@ static void read_dirty(struct cached_dev *dc) int nk, i; struct dirty_io *io; struct closure cl; + uint16_t sequence = 0; + BUG_ON(!llist_empty(&dc->writeback_ordering_wait.list)); + atomic_set(&dc->writeback_sequence_next, sequence); closure_init_stack(&cl); /* @@ -303,6 +331,7 @@ static void read_dirty(struct cached_dev *dc) w->private = io; io->dc = dc; + io->sequence = sequence++; dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); -- cgit v1.2.3 From b1092c9af9ed88dd2fc8345d987dfb7efe7be8f0 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Mon, 8 Jan 2018 12:21:24 -0800 Subject: bcache: allow quick writeback when backing idle If the control system would wait for at least half a second, and there's been no reqs hitting the backing disk for awhile: use an alternate mode where we have at most one contiguous set of writebacks in flight at a time. (But don't otherwise delay). If front-end IO appears, it will still be quick, as it will only have to contend with one real operation in flight. But otherwise, we'll be sending data to the backing disk as quickly as it can accept it (with one op at a time). Signed-off-by: Michael Lyle Reviewed-by: Tang Junhui Acked-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 7 +++++++ drivers/md/bcache/request.c | 1 + drivers/md/bcache/writeback.c | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 3be0fcc19b1f..5f7b0b2513cc 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -320,6 +320,13 @@ struct cached_dev { */ atomic_t has_dirty; + /* + * Set to zero by things that touch the backing volume-- except + * writeback. Incremented by writeback. Used to determine when to + * accelerate idle writeback. + */ + atomic_t backing_idle; + struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 52b4ce24f9e2..ddd941056f3c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -996,6 +996,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); + atomic_set(&dc->backing_idle, 0); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); bio_set_dev(bio, dc->bdev); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 6e1d2fde43df..f82ffb2e9b9b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -356,6 +356,27 @@ static void read_dirty(struct cached_dev *dc) delay = writeback_delay(dc, size); + /* If the control system would wait for at least half a + * second, and there's been no reqs hitting the backing disk + * for awhile: use an alternate mode where we have at most + * one contiguous set of writebacks in flight at a time. If + * someone wants to do IO it will be quick, as it will only + * have to contend with one operation in flight, and we'll + * be round-tripping data to the backing disk as quickly as + * it can accept it. + */ + if (delay >= HZ / 2) { + /* 3 means at least 1.5 seconds, up to 7.5 if we + * have slowed way down. + */ + if (atomic_inc_return(&dc->backing_idle) >= 3) { + /* Wait for current I/Os to finish */ + closure_sync(&cl); + /* And immediately launch a new set. */ + delay = 0; + } + } + while (!kthread_should_stop() && delay) { schedule_timeout_interruptible(delay); delay = writeback_delay(dc, 0); -- cgit v1.2.3 From e4bf791937d82afca79e1df4063f72dbc6960ac7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 8 Jan 2018 12:21:25 -0800 Subject: bcache: Fix, improve efficiency of closure_sync() Eliminates cases where sync can race and fail to complete / get stuck. Removes many status flags and simplifies entering-and-exiting closure sleeping behaviors. [mlyle: fixed conflicts due to changed return behavior in mainline. extended commit comment, and squashed down two commits that were mostly contradictory to get to this state. Changed __set_current_state to set_current_state per Jens review comment] Signed-off-by: Kent Overstreet Signed-off-by: Michael Lyle Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.c | 46 +++++++++++++++++----------------- drivers/md/bcache/closure.h | 60 +++++++++++++++++---------------------------- 2 files changed, 46 insertions(+), 60 deletions(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 1841d0359bac..ca7ace6962a4 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -18,10 +18,6 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) BUG_ON(flags & CLOSURE_GUARD_MASK); BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR)); - /* Must deliver precisely one wakeup */ - if (r == 1 && (flags & CLOSURE_SLEEPING)) - wake_up_process(cl->task); - if (!r) { if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) { atomic_set(&cl->remaining, @@ -100,28 +96,34 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) } EXPORT_SYMBOL(closure_wait); -/** - * closure_sync - sleep until a closure has nothing left to wait on - * - * Sleeps until the refcount hits 1 - the thread that's running the closure owns - * the last refcount. - */ -void closure_sync(struct closure *cl) +struct closure_syncer { + struct task_struct *task; + int done; +}; + +static void closure_sync_fn(struct closure *cl) { - while (1) { - __closure_start_sleep(cl); - closure_set_ret_ip(cl); + cl->s->done = 1; + wake_up_process(cl->s->task); +} - if ((atomic_read(&cl->remaining) & - CLOSURE_REMAINING_MASK) == 1) - break; +void __closure_sync(struct closure *cl) +{ + struct closure_syncer s = { .task = current }; + cl->s = &s; + continue_at(cl, closure_sync_fn, NULL); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (s.done) + break; schedule(); } - __closure_end_sleep(cl); + __set_current_state(TASK_RUNNING); } -EXPORT_SYMBOL(closure_sync); +EXPORT_SYMBOL(__closure_sync); #ifdef CONFIG_BCACHE_CLOSURES_DEBUG @@ -168,12 +170,10 @@ static int debug_seq_show(struct seq_file *f, void *data) cl, (void *) cl->ip, cl->fn, cl->parent, r & CLOSURE_REMAINING_MASK); - seq_printf(f, "%s%s%s%s\n", + seq_printf(f, "%s%s\n", test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&cl->work)) ? "Q" : "", - r & CLOSURE_RUNNING ? "R" : "", - r & CLOSURE_STACK ? "S" : "", - r & CLOSURE_SLEEPING ? "Sl" : ""); + r & CLOSURE_RUNNING ? "R" : ""); if (r & CLOSURE_WAITING) seq_printf(f, " W %pF\n", diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index ccfbea6f9f6b..392a87cf1b92 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -103,6 +103,7 @@ */ struct closure; +struct closure_syncer; typedef void (closure_fn) (struct closure *); struct closure_waitlist { @@ -115,10 +116,6 @@ enum closure_state { * the thread that owns the closure, and cleared by the thread that's * waking up the closure. * - * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep - * - indicates that cl->task is valid and closure_put() may wake it up. - * Only set or cleared by the thread that owns the closure. - * * The rest are for debugging and don't affect behaviour: * * CLOSURE_RUNNING: Set when a closure is running (i.e. by @@ -128,22 +125,16 @@ enum closure_state { * continue_at() and closure_return() clear it for you, if you're doing * something unusual you can use closure_set_dead() which also helps * annotate where references are being transferred. - * - * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a - * closure with this flag set */ - CLOSURE_BITS_START = (1 << 23), - CLOSURE_DESTRUCTOR = (1 << 23), - CLOSURE_WAITING = (1 << 25), - CLOSURE_SLEEPING = (1 << 27), - CLOSURE_RUNNING = (1 << 29), - CLOSURE_STACK = (1 << 31), + CLOSURE_BITS_START = (1U << 27), + CLOSURE_DESTRUCTOR = (1U << 27), + CLOSURE_WAITING = (1U << 29), + CLOSURE_RUNNING = (1U << 31), }; #define CLOSURE_GUARD_MASK \ - ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING| \ - CLOSURE_RUNNING|CLOSURE_STACK) << 1) + ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_RUNNING) << 1) #define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) #define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) @@ -152,7 +143,7 @@ struct closure { union { struct { struct workqueue_struct *wq; - struct task_struct *task; + struct closure_syncer *s; struct llist_node list; closure_fn *fn; }; @@ -178,7 +169,19 @@ void closure_sub(struct closure *cl, int v); void closure_put(struct closure *cl); void __closure_wake_up(struct closure_waitlist *list); bool closure_wait(struct closure_waitlist *list, struct closure *cl); -void closure_sync(struct closure *cl); +void __closure_sync(struct closure *cl); + +/** + * closure_sync - sleep until a closure a closure has nothing left to wait on + * + * Sleeps until the refcount hits 1 - the thread that's running the closure owns + * the last refcount. + */ +static inline void closure_sync(struct closure *cl) +{ + if ((atomic_read(&cl->remaining) & CLOSURE_REMAINING_MASK) != 1) + __closure_sync(cl); +} #ifdef CONFIG_BCACHE_CLOSURES_DEBUG @@ -215,24 +218,6 @@ static inline void closure_set_waiting(struct closure *cl, unsigned long f) #endif } -static inline void __closure_end_sleep(struct closure *cl) -{ - __set_current_state(TASK_RUNNING); - - if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING) - atomic_sub(CLOSURE_SLEEPING, &cl->remaining); -} - -static inline void __closure_start_sleep(struct closure *cl) -{ - closure_set_ip(cl); - cl->task = current; - set_current_state(TASK_UNINTERRUPTIBLE); - - if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING)) - atomic_add(CLOSURE_SLEEPING, &cl->remaining); -} - static inline void closure_set_stopped(struct closure *cl) { atomic_sub(CLOSURE_RUNNING, &cl->remaining); @@ -241,7 +226,6 @@ static inline void closure_set_stopped(struct closure *cl) static inline void set_closure_fn(struct closure *cl, closure_fn *fn, struct workqueue_struct *wq) { - BUG_ON(object_is_on_stack(cl)); closure_set_ip(cl); cl->fn = fn; cl->wq = wq; @@ -300,7 +284,7 @@ static inline void closure_init(struct closure *cl, struct closure *parent) static inline void closure_init_stack(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); - atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK); + atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); } /** @@ -322,6 +306,8 @@ static inline void closure_wake_up(struct closure_waitlist *list) * This is because after calling continue_at() you no longer have a ref on @cl, * and whatever @cl owns may be freed out from under you - a running closure fn * has a ref on its own closure which continue_at() drops. + * + * Note you are expected to immediately return after using this macro. */ #define continue_at(_cl, _fn, _wq) \ do { \ -- cgit v1.2.3 From ce439bf78b4850cadb1e67f0d4a48178d489c75d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 8 Jan 2018 12:21:26 -0800 Subject: bcache: mark closure_sync() __sched [edit by mlyle: include sched/debug.h to get __sched] Signed-off-by: Kent Overstreet Signed-off-by: Michael Lyle Reviewed-by: Michael Lyle Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index ca7ace6962a4..7f12920c14f7 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "closure.h" @@ -107,7 +108,7 @@ static void closure_sync_fn(struct closure *cl) wake_up_process(cl->s->task); } -void __closure_sync(struct closure *cl) +void __sched __closure_sync(struct closure *cl) { struct closure_syncer s = { .task = current }; -- cgit v1.2.3 From b40503ea4f5c134b16ef405cab6c08f9343a0d06 Mon Sep 17 00:00:00 2001 From: Zhai Zhaoxuan Date: Mon, 8 Jan 2018 12:21:27 -0800 Subject: bcache: fix unmatched generic_end_io_acct() & generic_start_io_acct() The function cached_dev_make_request() and flash_dev_make_request() call generic_start_io_acct() with (struct bcache_device)->disk when they start a closure. Then the function bio_complete() calls generic_end_io_acct() with (struct search)->orig_bio->bi_disk when the closure has done. Since the `bi_disk` is not the bcache device, the generic_end_io_acct() is called with a wrong device queue. It causes the "inflight" (in struct hd_struct) counter keep increasing without decreasing. This patch fix the problem by calling generic_end_io_acct() with (struct bcache_device)->disk. Signed-off-by: Zhai Zhaoxuan Reviewed-by: Michael Lyle Reviewed-by: Coly Li Reviewed-by: Tang Junhui Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ddd941056f3c..1a46b41dac70 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -633,8 +633,8 @@ static void request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - struct request_queue *q = s->orig_bio->bi_disk->queue; - generic_end_io_acct(q, bio_data_dir(s->orig_bio), + generic_end_io_acct(s->d->disk->queue, + bio_data_dir(s->orig_bio), &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); -- cgit v1.2.3 From 2831231d4c3f999d2d062b23dfbc8b0faa4bc6e0 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 8 Jan 2018 12:21:28 -0800 Subject: bcache: reduce cache_set devices iteration by devices_max_used Member devices of struct cache_set is used to reference all attached bcache devices to this cache set. If it is treated as array of pointers, size of devices[] is indicated by member nr_uuids of struct cache_set. nr_uuids is calculated in drivers/md/super.c:bch_cache_set_alloc(), bucket_bytes(c) / sizeof(struct uuid_entry) Bucket size is determined by user space tool "make-bcache", by default it is 1024 sectors (defined in bcache-tools/make-bcache.c:main()). So default nr_uuids value is 4096 from the above calculation. Every time when bcache code iterates bcache devices of a cache set, all the 4096 pointers are checked even only 1 bcache device is attached to the cache set, that's a wast of time and unncessary. This patch adds a member devices_max_used to struct cache_set. Its value is 1 + the maximum used index of devices[] in a cache set. When iterating all valid bcache devices of a cache set, use c->devices_max_used in for-loop may reduce a lot of useless checking. Personally, my motivation of this patch is not for performance, I use it in bcache debugging, which helps me to narrow down the scape to check valid bcached devices of a cache set. Signed-off-by: Coly Li Reviewed-by: Michael Lyle Reviewed-by: Tang Junhui Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/btree.c | 2 +- drivers/md/bcache/super.c | 9 ++++++--- drivers/md/bcache/writeback.h | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 5f7b0b2513cc..9117da5f494b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -497,6 +497,7 @@ struct cache_set { int caches_loaded; struct bcache_device **devices; + unsigned devices_max_used; struct list_head cached_devs; uint64_t cached_dev_sectors; struct closure caching; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9e30713dbdb8..bf3a48aa9a9a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1679,7 +1679,7 @@ static void bch_btree_gc_finish(struct cache_set *c) /* don't reclaim buckets to which writeback keys point */ rcu_read_lock(); - for (i = 0; i < c->nr_uuids; i++) { + for (i = 0; i < c->devices_max_used; i++) { struct bcache_device *d = c->devices[i]; struct cached_dev *dc; struct keybuf_key *w, *n; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 553e841e897d..d13e4ccb30a0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -721,6 +721,9 @@ static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, d->c = c; c->devices[id] = d; + if (id >= c->devices_max_used) + c->devices_max_used = id + 1; + closure_get(&c->caching); } @@ -1267,7 +1270,7 @@ static int flash_devs_run(struct cache_set *c) struct uuid_entry *u; for (u = c->uuids; - u < c->uuids + c->nr_uuids && !ret; + u < c->uuids + c->devices_max_used && !ret; u++) if (UUID_FLASH_ONLY(u)) ret = flash_dev_run(c, u); @@ -1433,7 +1436,7 @@ static void __cache_set_unregister(struct closure *cl) mutex_lock(&bch_register_lock); - for (i = 0; i < c->nr_uuids; i++) + for (i = 0; i < c->devices_max_used; i++) if (c->devices[i]) { if (!UUID_FLASH_ONLY(&c->uuids[i]) && test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { @@ -1496,7 +1499,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->bucket_bits = ilog2(sb->bucket_size); c->block_bits = ilog2(sb->block_size); c->nr_uuids = bucket_bytes(c) / sizeof(struct uuid_entry); - + c->devices_max_used = 0; c->btree_pages = bucket_pages(c); if (c->btree_pages > BTREE_MAX_PAGES) c->btree_pages = max_t(int, c->btree_pages / 4, diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 6d26927267f8..f102b1f9bc51 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -24,7 +24,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) mutex_lock(&bch_register_lock); - for (i = 0; i < c->nr_uuids; i++) { + for (i = 0; i < c->devices_max_used; i++) { struct bcache_device *d = c->devices[i]; if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) -- cgit v1.2.3 From 5138ac6748e381501894976f995fb7d1a63f80f4 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 8 Jan 2018 12:21:29 -0800 Subject: bcache: fix misleading error message in bch_count_io_errors() Bcache only does recoverable I/O for read operations by calling cached_dev_read_error(). For write opertions there is no I/O recovery for failed requests. But in bch_count_io_errors() no matter read or write I/Os, before errors counter reaches io error limit, pr_err() always prints "IO error on %, recoverying". For write requests this information is misleading, because there is no I/O recovery at all. This patch adds a parameter 'is_read' to bch_count_io_errors(), and only prints "recovering" by pr_err() when the bio direction is READ. Signed-off-by: Coly Li Reviewed-by: Michael Lyle Reviewed-by: Tang Junhui Signed-off-by: Jens Axboe --- drivers/md/bcache/bcache.h | 2 +- drivers/md/bcache/io.c | 13 +++++++++---- drivers/md/bcache/super.c | 4 +++- drivers/md/bcache/writeback.c | 4 +++- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 9117da5f494b..5e2d4e80198e 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -862,7 +862,7 @@ static inline void wake_up_allocators(struct cache_set *c) /* Forward declarations */ -void bch_count_io_errors(struct cache *, blk_status_t, const char *); +void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, blk_status_t, const char *); void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t, diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index fac97ec2d0e2..a783c5a41ff1 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -51,7 +51,10 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, /* IO errors */ -void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m) +void bch_count_io_errors(struct cache *ca, + blk_status_t error, + int is_read, + const char *m) { /* * The halflife of an error is: @@ -94,8 +97,9 @@ void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m) errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) - pr_err("%s: IO error on %s, recovering", - bdevname(ca->bdev, buf), m); + pr_err("%s: IO error on %s%s", + bdevname(ca->bdev, buf), m, + is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, "%s: too many IO errors %s", @@ -108,6 +112,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, { struct bbio *b = container_of(bio, struct bbio, bio); struct cache *ca = PTR_CACHE(c, &b->key, 0); + int is_read = (bio_data_dir(bio) == READ ? 1 : 0); unsigned threshold = op_is_write(bio_op(bio)) ? c->congested_write_threshold_us @@ -129,7 +134,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, atomic_inc(&c->congested); } - bch_count_io_errors(ca, error, m); + bch_count_io_errors(ca, error, is_read, m); } void bch_bbio_endio(struct cache_set *c, struct bio *bio, diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d13e4ccb30a0..133b81225ea9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -274,7 +274,9 @@ static void write_super_endio(struct bio *bio) { struct cache *ca = bio->bi_private; - bch_count_io_errors(ca, bio->bi_status, "writing superblock"); + /* is_read = 0 */ + bch_count_io_errors(ca, bio->bi_status, 0, + "writing superblock"); closure_put(&ca->set->sb_write); } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f82ffb2e9b9b..31b0a292a619 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -244,8 +244,10 @@ static void read_dirty_endio(struct bio *bio) struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; + /* is_read = 1 */ bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), - bio->bi_status, "reading dirty data from cache"); + bio->bi_status, 1, + "reading dirty data from cache"); dirty_endio(bio); } -- cgit v1.2.3 From 616486ab52ab7f9739b066d958bdd20e65aefd74 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Mon, 8 Jan 2018 12:21:30 -0800 Subject: bcache: fix writeback target calc on large devices Bcache needs to scale the dirty data in the cache over the multiple backing disks in order to calculate writeback rates for each. The previous code did this by multiplying the target number of dirty sectors by the backing device size, and expected it to fit into a uint64_t; this blows up on relatively small backing devices. The new approach figures out the bdev's share in 16384ths of the overall cached data. This is chosen to cope well when bdevs drastically vary in size and to ensure that bcache can cross the petabyte boundary for each backing device. This has been improved based on Tang Junhui's feedback to ensure that every device gets a share of dirty data, no matter how small it is compared to the total backing pool. The existing mechanism is very limited; this is purely a bug fix to remove limits on volume size. However, there still needs to be change to make this "fair" over many volumes where some are idle. Reported-by: Jack Douglas Signed-off-by: Michael Lyle Reviewed-by: Tang Junhui Signed-off-by: Jens Axboe --- drivers/md/bcache/writeback.c | 31 +++++++++++++++++++++++++++---- drivers/md/bcache/writeback.h | 7 +++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 31b0a292a619..51306a19ab03 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -18,17 +18,39 @@ #include /* Rate limiting */ - -static void __update_writeback_rate(struct cached_dev *dc) +static uint64_t __calc_target_rate(struct cached_dev *dc) { struct cache_set *c = dc->disk.c; + + /* + * This is the size of the cache, minus the amount used for + * flash-only devices + */ uint64_t cache_sectors = c->nbuckets * c->sb.bucket_size - bcache_flash_devs_sectors_dirty(c); + + /* + * Unfortunately there is no control of global dirty data. If the + * user states that they want 10% dirty data in the cache, and has, + * e.g., 5 backing volumes of equal size, we try and ensure each + * backing volume uses about 2% of the cache for dirty data. + */ + uint32_t bdev_share = + div64_u64(bdev_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, + c->cached_dev_sectors); + uint64_t cache_dirty_target = div_u64(cache_sectors * dc->writeback_percent, 100); - int64_t target = div64_u64(cache_dirty_target * bdev_sectors(dc->bdev), - c->cached_dev_sectors); + /* Ensure each backing dev gets at least one dirty share */ + if (bdev_share < 1) + bdev_share = 1; + + return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT; +} + +static void __update_writeback_rate(struct cached_dev *dc) +{ /* * PI controller: * Figures out the amount that should be written per second. @@ -49,6 +71,7 @@ static void __update_writeback_rate(struct cached_dev *dc) * This acts as a slow, long-term average that is not subject to * variations in usage like the p term. */ + int64_t target = __calc_target_rate(dc); int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); int64_t error = dirty - target; int64_t proportional_scaled = diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index f102b1f9bc51..66f1c527fa24 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -8,6 +8,13 @@ #define MAX_WRITEBACKS_IN_PASS 5 #define MAX_WRITESIZE_IN_PASS 5000 /* *512b */ +/* + * 14 (16384ths) is chosen here as something that each backing device + * should be a reasonable fraction of the share, and not to blow up + * until individual backing devices are a petabyte. + */ +#define WRITEBACK_SHARE_SHIFT 14 + static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) { uint64_t i, ret = 0; -- cgit v1.2.3 From 4471879acf926221231fe68ebc30f9cad0d7f7c8 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Mon, 8 Jan 2018 15:10:09 -0600 Subject: hwmon: (pmbus) cffps: Add PMBUS_SKIP_STATUS_CHECK This power supply device regularly fails to read VOUT_MODE due to the CML bit going high. This results in an incorrect exponent used for the voltage data, and therefore the power supply reports incorrect voltage. Work around this by setting the pmbus flag to skip the CML check. Signed-off-by: Eddie James Fixes: f69316d62c70 ("hwmon: (pmbus) Add IBM Common Form Factor (CFF) ...") Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ibm-cffps.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c index de2547476253..2d6f4f41e736 100644 --- a/drivers/hwmon/pmbus/ibm-cffps.c +++ b/drivers/hwmon/pmbus/ibm-cffps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "pmbus.h" @@ -268,6 +269,10 @@ static struct pmbus_driver_info ibm_cffps_info = { .read_word_data = ibm_cffps_read_word_data, }; +static struct pmbus_platform_data ibm_cffps_pdata = { + .flags = PMBUS_SKIP_STATUS_CHECK, +}; + static int ibm_cffps_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -276,6 +281,7 @@ static int ibm_cffps_probe(struct i2c_client *client, struct dentry *ibm_cffps_dir; struct ibm_cffps *psu; + client->dev.platform_data = &ibm_cffps_pdata; rc = pmbus_do_probe(client, id, &ibm_cffps_info); if (rc) return rc; -- cgit v1.2.3 From b2157399cc9898260d6031c5bfe45fe137c1fbe7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 7 Jan 2018 17:33:02 -0800 Subject: bpf: prevent out-of-bounds speculation Under speculation, CPUs may mis-predict branches in bounds checks. Thus, memory accesses under a bounds check may be speculated even if the bounds check fails, providing a primitive for building a side channel. To avoid leaking kernel data round up array-based maps and mask the index after bounds check, so speculated load with out of bounds index will load either valid value from the array or zero from the padded area. Unconditionally mask index for all array types even when max_entries are not rounded to power of 2 for root user. When map is created by unpriv user generate a sequence of bpf insns that includes AND operation to make sure that JITed code includes the same 'index & index_mask' operation. If prog_array map is created by unpriv user replace bpf_tail_call(ctx, map, index); with if (index >= max_entries) { index &= map->index_mask; bpf_tail_call(ctx, map, index); } (along with roundup to power 2) to prevent out-of-bounds speculation. There is secondary redundant 'if (index >= max_entries)' in the interpreter and in all JITs, but they can be optimized later if necessary. Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) cannot be used by unpriv, so no changes there. That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on all architectures with and without JIT. v2->v3: Daniel noticed that attack potentially can be crafted via syscall commands without loading the program, so add masking to those paths as well. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 47 ++++++++++++++++++++++++++++++++++++----------- kernel/bpf/verifier.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..1b985ca4ffbe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -52,6 +52,7 @@ struct bpf_map { u32 pages; u32 id; int numa_node; + bool unpriv_array; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -221,6 +222,7 @@ struct bpf_prog_aux { struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 7c25426d3cf5..aaa319848e7d 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; u64 array_size; - u32 elem_size; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); + max_entries = attr->max_entries; + index_mask = roundup_pow_of_two(max_entries) - 1; + + if (unpriv) + /* round up array size to nearest power of 2, + * since cpu will speculate within index_mask limits + */ + max_entries = index_mask + 1; + array_size = sizeof(*array); if (percpu) - array_size += (u64) attr->max_entries * sizeof(void *); + array_size += (u64) max_entries * sizeof(void *); else - array_size += (u64) attr->max_entries * elem_size; + array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ if (array_size >= U32_MAX - PAGE_SIZE) @@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array = bpf_map_area_alloc(array_size, numa_node); if (!array) return ERR_PTR(-ENOMEM); + array->index_mask = index_mask; + array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ array->map.map_type = attr->map_type; @@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * index; + return array->value + array->elem_size * (index & array->index_mask); } /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { + struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_insn *insn = insn_buf; u32 elem_size = round_up(map->value_size, 8); const int ret = BPF_REG_0; @@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); + if (map->unpriv_array) { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); + } if (is_power_of_2(elem_size)) { *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); @@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return this_cpu_ptr(array->pptrs[index]); + return this_cpu_ptr(array->pptrs[index & array->index_mask]); } int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) @@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); off += size; @@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, return -EEXIST; if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) - memcpy(this_cpu_ptr(array->pptrs[index]), + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), value, map->value_size); else - memcpy(array->value + array->elem_size * index, + memcpy(array->value + + array->elem_size * (index & array->index_mask), value, map->value_size); return 0; } @@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, */ size = round_up(map->value_size, 8); rcu_read_lock(); - pptr = array->pptrs[index]; + pptr = array->pptrs[index & array->index_mask]; for_each_possible_cpu(cpu) { bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); off += size; @@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) static u32 array_of_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) { + struct bpf_array *array = container_of(map, struct bpf_array, map); u32 elem_size = round_up(map->value_size, 8); struct bpf_insn *insn = insn_buf; const int ret = BPF_REG_0; @@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); + if (map->unpriv_array) { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); + } if (is_power_of_2(elem_size)) *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); else diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 04b24876cd23..b414d6b2d470 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1729,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; + if (func_id == BPF_FUNC_tail_call) { + if (meta.map_ptr == NULL) { + verbose(env, "verifier bug\n"); + return -EINVAL; + } + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; + } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@ -4456,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ insn->imm = 0; insn->code = BPF_JMP | BPF_TAIL_CALL; + + /* instead of changing every JIT dealing with tail_call + * emit two extra insns: + * if (index >= max_entries) goto out; + * index &= array->index_mask; + * to avoid out-of-bounds cpu speculation + */ + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (map_ptr == BPF_MAP_PTR_POISON) { + verbose(env, "tail_call obusing map_ptr\n"); + return -EINVAL; + } + if (!map_ptr->unpriv_array) + continue; + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, + map_ptr->max_entries, 2); + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, + container_of(map_ptr, + struct bpf_array, + map)->index_mask); + insn_buf[2] = *insn; + cnt = 3; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; } -- cgit v1.2.3 From e4d0e84e490790798691aaa0f2e598637f1867ec Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:21 -0600 Subject: x86/cpu/AMD: Make LFENCE a serializing instruction To aid in speculation control, make LFENCE a serializing instruction since it has less overhead than MFENCE. This is done by setting bit 1 of MSR 0xc0011029 (DE_CFG). Some families that support LFENCE do not have this MSR. For these families, the LFENCE instruction is already serializing. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220921.12580.71694.stgit@tlendack-t1.amdoffice.net --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/amd.c | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ab022618a50a..1e7d710fef43 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -352,6 +352,8 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bcb75dc97d44..5b438d81beb2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -829,6 +829,16 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); } -- cgit v1.2.3 From 9c6a73c75864ad9fa49e5fa6513e4c4071c0e29f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 8 Jan 2018 16:09:32 -0600 Subject: x86/cpu/AMD: Use LFENCE_RDTSC in preference to MFENCE_RDTSC With LFENCE now a serializing instruction, use LFENCE_RDTSC in preference to MFENCE_RDTSC. However, since the kernel could be running under a hypervisor that does not support writing that MSR, read the MSR back and verify that the bit has been set successfully. If the MSR can be read and the bit is set, then set the LFENCE_RDTSC feature, otherwise set the MFENCE_RDTSC feature. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Reviewed-by: Borislav Petkov Cc: Peter Zijlstra Cc: Tim Chen Cc: Dave Hansen Cc: Borislav Petkov Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: David Woodhouse Cc: Paul Turner Link: https://lkml.kernel.org/r/20180108220932.12580.52458.stgit@tlendack-t1.amdoffice.net --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1e7d710fef43..fa11fb1fa570 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -354,6 +354,7 @@ #define MSR_FAM10H_NODE_ID 0xc001100c #define MSR_F10H_DECFG 0xc0011029 #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5b438d81beb2..ea831c858195 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -829,6 +829,9 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { + unsigned long long val; + int ret; + /* * A serializing LFENCE has less overhead than MFENCE, so * use it for execution serialization. On families which @@ -839,8 +842,19 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_F10H_DECFG, MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* -- cgit v1.2.3 From 5b189201993ab03001a398de731045bfea90c689 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:15:06 -0800 Subject: Input: twl4030-vibra - fix sibling-node lookup A helper purported to look up a child node based on its name was using the wrong of-helper and ended up prematurely freeing the parent of-node while searching the whole device tree depth-first starting at the parent node. Fixes: 64b9e4d803b1 ("input: twl4030-vibra: Support for DT booted kernel") Fixes: e661d0a04462 ("Input: twl4030-vibra - fix ERROR: Bad of_node_put() warning") Cc: stable # 3.7 Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov --- drivers/input/misc/twl4030-vibra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c index 6c51d404874b..c37aea9ac272 100644 --- a/drivers/input/misc/twl4030-vibra.c +++ b/drivers/input/misc/twl4030-vibra.c @@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops, twl4030_vibra_suspend, twl4030_vibra_resume); static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata, - struct device_node *node) + struct device_node *parent) { + struct device_node *node; + if (pdata && pdata->coexist) return true; - node = of_find_node_by_name(node, "codec"); + node = of_get_child_by_name(parent, "codec"); if (node) { of_node_put(node); return true; -- cgit v1.2.3 From dcaf12a8b0bbdbfcfa2be8dff2c4948d9844b4ad Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:17:48 -0800 Subject: Input: twl6040-vibra - fix child-node lookup Fix child-node lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. Later sanity checks on node properties (which would likely be missing) should prevent this from causing much trouble however, especially as the original premature free of the parent node has already been fixed separately (but that "fix" was apparently never backported to stable). Fixes: e7ec014a47e4 ("Input: twl6040-vibra - update for device tree support") Fixes: c52c545ead97 ("Input: twl6040-vibra - fix DT node memory management") Cc: stable # 3.6 Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Tested-by: H. Nikolaus Schaller (on Pyra OMAP5 hardware) Signed-off-by: Dmitry Torokhov --- drivers/input/misc/twl6040-vibra.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c index 5690eb7ff954..15e0d352c4cc 100644 --- a/drivers/input/misc/twl6040-vibra.c +++ b/drivers/input/misc/twl6040-vibra.c @@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev) int vddvibr_uV = 0; int error; - of_node_get(twl6040_core_dev->of_node); - twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node, + twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node, "vibra"); if (!twl6040_core_node) { dev_err(&pdev->dev, "parent of node is missing?\n"); -- cgit v1.2.3 From 906bf7daa0618d0ef39f4872ca42218c29a3631f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 8 Jan 2018 17:20:18 -0800 Subject: Input: 88pm860x-ts - fix child-node lookup Fix child node-lookup during probe, which ended up searching the whole device tree depth-first starting at parent rather than just matching on its children. To make things worse, the parent node was prematurely freed, while the child node was leaked. Fixes: 2e57d56747e6 ("mfd: 88pm860x: Device tree support") Cc: stable # 3.7 Signed-off-by: Johan Hovold Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/88pm860x-ts.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c index 7ed828a51f4c..3486d9403805 100644 --- a/drivers/input/touchscreen/88pm860x-ts.c +++ b/drivers/input/touchscreen/88pm860x-ts.c @@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, int data, n, ret; if (!np) return -ENODEV; - np = of_find_node_by_name(np, "touch"); + np = of_get_child_by_name(np, "touch"); if (!np) { dev_err(&pdev->dev, "Can't find touch node\n"); return -EINVAL; @@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set tsi prebias time */ if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) { ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } /* set prebias & prechg time of pen detect */ data = 0; @@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev, if (data) { ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data); if (ret < 0) - return -EINVAL; + goto err_put_node; } of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x); + + of_node_put(np); + return 0; + +err_put_node: + of_node_put(np); + + return -EINVAL; } #else #define pm860x_touch_dt_init(x, y, z) (-1) -- cgit v1.2.3 From 1b5c7ef3d0d0610bda9b63263f7c5b7178d11015 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 6 Jan 2018 10:59:41 -0500 Subject: drm/nouveau/disp/gf119: add missing drive vfunc ptr Fixes broken dp on GF119: Call Trace: ? nvkm_dp_train_drive+0x183/0x2c0 [nouveau] nvkm_dp_acquire+0x4f3/0xcd0 [nouveau] nv50_disp_super_2_2+0x5d/0x470 [nouveau] ? nvkm_devinit_pll_set+0xf/0x20 [nouveau] gf119_disp_super+0x19c/0x2f0 [nouveau] process_one_work+0x193/0x3c0 worker_thread+0x35/0x3b0 kthread+0x125/0x140 ? process_one_work+0x3c0/0x3c0 ? kthread_park+0x60/0x60 ret_from_fork+0x25/0x30 Code: Bad RIP value. RIP: (null) RSP: ffffb1e243e4bc38 CR2: 0000000000000000 Fixes: af85389c614a drm/nouveau/disp: shuffle functions around Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103421 Signed-off-by: Rob Clark Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index a2978a37b4f3..700fc754f28a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -174,6 +174,7 @@ gf119_sor = { .links = gf119_sor_dp_links, .power = g94_sor_dp_power, .pattern = gf119_sor_dp_pattern, + .drive = gf119_sor_dp_drive, .vcpi = gf119_sor_dp_vcpi, .audio = gf119_sor_dp_audio, .audio_sym = gf119_sor_dp_audio_sym, -- cgit v1.2.3 From c64ed5dd9feba193c76eb460b451225ac2a0d87b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Jan 2018 08:51:02 +0100 Subject: ALSA: pcm: Use ERESTARTSYS instead of EINTR in OSS emulation Fix the last standing EINTR in the whole subsystem. Use more correct ERESTARTSYS for pending signals. Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index c7d8489d39c8..e8b19876c420 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -842,7 +842,7 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, if (!(mutex_trylock(&runtime->oss.params_lock))) return -EAGAIN; } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) - return -EINTR; + return -ERESTARTSYS; sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); sparams = kmalloc(sizeof(*sparams), GFP_KERNEL); -- cgit v1.2.3 From 371d39fabcebc3cb042a0a9c4bc2f494d6cde02c Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Thu, 4 Jan 2018 16:30:57 +0100 Subject: mmc: sdhci-esdhc-imx: Manage sdhci_runtime_suspend_host error code We need to return in case of error even if the actual implementation of sdhci_runtime_suspend_host always return 0. We don't want to power down the clock and the assuption is that the sdhci_runtime_suspend_host always let the system consistent in case of failure Signed-off-by: Michael Trimarchi Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 85140c9af581..d08c21e511dd 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1389,6 +1389,8 @@ static int sdhci_esdhc_runtime_suspend(struct device *dev) int ret; ret = sdhci_runtime_suspend_host(host); + if (ret) + return ret; if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); -- cgit v1.2.3 From a0ad3087586d0945a3cdfa755f951995dfc1b17e Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Thu, 4 Jan 2018 16:30:58 +0100 Subject: mmc: sdhci-esdhc-imx: Changes the order of how clocks are being re-enabled runtime_resume() should re-enable the clocks in reverse order comparing with runtime_suspend() Signed-off-by: Michael Trimarchi Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index d08c21e511dd..6d4e3233a920 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1411,31 +1411,33 @@ static int sdhci_esdhc_runtime_resume(struct device *dev) struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); int err; + err = clk_prepare_enable(imx_data->clk_ahb); + if (err) + return err; + if (!sdhci_sdio_irq_enabled(host)) { err = clk_prepare_enable(imx_data->clk_per); if (err) - return err; + goto disable_ahb_clk; err = clk_prepare_enable(imx_data->clk_ipg); if (err) goto disable_per_clk; } - err = clk_prepare_enable(imx_data->clk_ahb); - if (err) - goto disable_ipg_clk; + err = sdhci_runtime_resume_host(host); if (err) - goto disable_ahb_clk; + goto disable_ipg_clk; return 0; -disable_ahb_clk: - clk_disable_unprepare(imx_data->clk_ahb); disable_ipg_clk: if (!sdhci_sdio_irq_enabled(host)) clk_disable_unprepare(imx_data->clk_ipg); disable_per_clk: if (!sdhci_sdio_irq_enabled(host)) clk_disable_unprepare(imx_data->clk_per); +disable_ahb_clk: + clk_disable_unprepare(imx_data->clk_ahb); return err; } #endif -- cgit v1.2.3 From 3602785b341a9545eee97f6ce634091da1a2b56d Mon Sep 17 00:00:00 2001 From: Michael Trimarchi Date: Thu, 4 Jan 2018 16:30:59 +0100 Subject: mmc: sdhci-esdhc-imx: Enable/Disable mmc clock during runtime suspend mmc clock can be stopped during runtime suspend and restart during runtime resume if the sdio irq is not enabled. Stop sdio clock reduce EMI of the device when the bus is not in use. Signed-off-by: Michael Trimarchi Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 6d4e3233a920..53cc1b6caf8f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -193,6 +193,7 @@ struct pltfm_imx_data { struct clk *clk_ipg; struct clk *clk_ahb; struct clk *clk_per; + unsigned int actual_clock; enum { NO_CMD_PENDING, /* no multiblock command pending */ MULTIBLK_IN_PROCESS, /* exact multiblock cmd in process */ @@ -1396,6 +1397,8 @@ static int sdhci_esdhc_runtime_suspend(struct device *dev) mmc_retune_needed(host->mmc); if (!sdhci_sdio_irq_enabled(host)) { + imx_data->actual_clock = host->mmc->actual_clock; + esdhc_pltfm_set_clock(host, 0); clk_disable_unprepare(imx_data->clk_per); clk_disable_unprepare(imx_data->clk_ipg); } @@ -1422,6 +1425,7 @@ static int sdhci_esdhc_runtime_resume(struct device *dev) err = clk_prepare_enable(imx_data->clk_ipg); if (err) goto disable_per_clk; + esdhc_pltfm_set_clock(host, imx_data->actual_clock); } err = sdhci_runtime_resume_host(host); -- cgit v1.2.3 From f953f0f89663c39f08f4baaa8a4a881401b65654 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Mon, 8 Jan 2018 15:36:48 -0500 Subject: mtd: nand: brcmnand: Disable prefetch by default Brcm nand controller prefetch feature needs to be disabled by default. Enabling affects performance on random reads as well as dma reads. Signed-off-by: Kamal Dasu Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller") Cc: Acked-by: Florian Fainelli Signed-off-by: Boris Brezillon --- drivers/mtd/nand/brcmnand/brcmnand.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index e0797abb1ebd..b81ddbaae149 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -2200,16 +2200,9 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) if (ctrl->nand_version >= 0x0702) tmp |= ACC_CONTROL_RD_ERASED; tmp &= ~ACC_CONTROL_FAST_PGM_RDIN; - if (ctrl->features & BRCMNAND_HAS_PREFETCH) { - /* - * FIXME: Flash DMA + prefetch may see spurious erased-page ECC - * errors - */ - if (has_flash_dma(ctrl)) - tmp &= ~ACC_CONTROL_PREFETCH; - else - tmp |= ACC_CONTROL_PREFETCH; - } + if (ctrl->features & BRCMNAND_HAS_PREFETCH) + tmp &= ~ACC_CONTROL_PREFETCH; + nand_writereg(ctrl, offs, tmp); return 0; -- cgit v1.2.3 From 6cbefbdcec41bf725b308288dcb200a6efc3339f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 9 Jan 2018 10:47:02 +0100 Subject: mtd: tests: nandbiterrs: Fix read_page return value The number of corrected bitflips is not correctly reported by the test until the bitflip threshold is reached. read_page() shall return the number of corrected bitflips, but mtd_read() returns 0 or a negative error, so we can't forward its return value. In the absence of an error we always have calculate the number of bitflips ourselves. Signed-off-by: Sascha Hauer Signed-off-by: Boris Brezillon --- drivers/mtd/tests/nandbiterrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c index 5f03b8c885a9..cde19c99e77b 100644 --- a/drivers/mtd/tests/nandbiterrs.c +++ b/drivers/mtd/tests/nandbiterrs.c @@ -151,7 +151,7 @@ static int read_page(int log) memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats)); err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer); - if (err == -EUCLEAN) + if (!err || err == -EUCLEAN) err = mtd->ecc_stats.corrected - oldstats.corrected; if (err < 0 || read != mtd->writesize) { -- cgit v1.2.3 From 374d1b5a81f7f9cc5e7f095ac3d5aff3f6600376 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 5 Jan 2018 08:35:47 +0100 Subject: esp: Fix GRO when the headers not fully in the linear part of the skb. The GRO layer does not necessarily pull the complete headers into the linear part of the skb, a part may remain on the first page fragment. This can lead to a crash if we try to pull the headers, so make sure we have them on the linear part before pulling. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Reported-by: syzbot+82bbd65569c49c6c0c4d@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 3 ++- net/ipv6/esp6_offload.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index f8b918c766b0..b1338e576d00 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, __be32 spi; int err; - skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL; if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 333a478aa161..dd9627490c7c 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, int nhoff; int err; - skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL; if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; -- cgit v1.2.3 From 7bf4e594c28afc67bc120a380ca774e43ca496d8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Jan 2018 02:18:42 +0100 Subject: PM / wakeup: Do not fail dev_pm_attach_wake_irq() unnecessarily Returning an error code from dev_pm_attach_wake_irq() if device_wakeup_attach_irq() called by it returns an error is pointless, because the wakeup source used by it may be deleted by user space via sysfs at any time and in particular right after dev_pm_attach_wake_irq() has returned. Moreover, it requires the callers of dev_pm_attach_wake_irq() to create that wakeup source via device_wakeup_enable() upfront, but that obviously is racy with respect to the sysfs-based manipulations of it. To avoid the race, modify device_wakeup_attach_irq() to check that the wakeup source it is going to use is there (and return early otherwise), make it void (as it cannot fail after that change) and make dev_pm_attach_wake_irq() simply call it for the device unconditionally. Tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/base/power/power.h | 11 +++-------- drivers/base/power/wakeirq.c | 8 +++----- drivers/base/power/wakeup.c | 11 ++++------- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 7beee75399d4..21244c53e377 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -41,20 +41,15 @@ extern void dev_pm_disable_wake_irq_check(struct device *dev); #ifdef CONFIG_PM_SLEEP -extern int device_wakeup_attach_irq(struct device *dev, - struct wake_irq *wakeirq); +extern void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq); extern void device_wakeup_detach_irq(struct device *dev); extern void device_wakeup_arm_wake_irqs(void); extern void device_wakeup_disarm_wake_irqs(void); #else -static inline int -device_wakeup_attach_irq(struct device *dev, - struct wake_irq *wakeirq) -{ - return 0; -} +static inline void device_wakeup_attach_irq(struct device *dev, + struct wake_irq *wakeirq) {} static inline void device_wakeup_detach_irq(struct device *dev) { diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index ae0429827f31..a8ac86e4d79e 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -33,7 +33,6 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, struct wake_irq *wirq) { unsigned long flags; - int err; if (!dev || !wirq) return -EINVAL; @@ -45,12 +44,11 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, return -EEXIST; } - err = device_wakeup_attach_irq(dev, wirq); - if (!err) - dev->power.wakeirq = wirq; + dev->power.wakeirq = wirq; + device_wakeup_attach_irq(dev, wirq); spin_unlock_irqrestore(&dev->power.lock, flags); - return err; + return 0; } /** diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index b7b8b2fe89c6..e73a081c6397 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -291,22 +291,19 @@ EXPORT_SYMBOL_GPL(device_wakeup_enable); * * Call under the device's power.lock lock. */ -int device_wakeup_attach_irq(struct device *dev, +void device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; ws = dev->power.wakeup; - if (!ws) { - dev_err(dev, "forgot to call device_init_wakeup?\n"); - return -EINVAL; - } + if (!ws) + return; if (ws->wakeirq) - return -EEXIST; + dev_err(dev, "Leftover wakeup IRQ found, overriding\n"); ws->wakeirq = wakeirq; - return 0; } /** -- cgit v1.2.3 From 8512220c5782d3e469cf8127a612a6c8f521e2dc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 2 Jan 2018 17:08:50 +0100 Subject: PM / core: Assign the wakeup_path status flag in __device_prepare() The PM core in the device_prepare() phase, resets the wakeup_path status flag to the value of device_may_wakeup(). This means if a ->prepare() or a ->suspend() callback for the device would update the device's wakeup setting, this doesn't become reflected in the wakeup_path status flag. In general this isn't a problem, because wakeup settings are not supposed to be changed (via for example calling device_set_wakeup_enable()) during any system wide suspend/resume phase. Nevertheless there are some users, which can be considered as legacy, that don't conform to this behaviour. These legacy cases should be corrected, however until that is done, let's address the issue from the PM core, by moving the assignment of the wakeup_path status flag to the __device_suspend() phase and after the ->suspend() callback has been invoked. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 70398e7b3569..ebcec7e677ba 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1788,6 +1788,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) End: if (!error) { dev->power.is_suspended = true; + if (device_may_wakeup(dev)) + dev->power.wakeup_path = true; + dpm_propagate_to_parent(dev); dpm_clear_suppliers_direct_complete(dev); } @@ -1912,7 +1915,7 @@ static int device_prepare(struct device *dev, pm_message_t state) device_lock(dev); - dev->power.wakeup_path = device_may_wakeup(dev); + dev->power.wakeup_path = false; if (dev->power.no_pm_callbacks) { ret = 1; /* Let device go direct_complete */ -- cgit v1.2.3 From cf04ce7841fabc7af0d6ee273711ec29658bee7b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 2 Jan 2018 17:08:52 +0100 Subject: PM / wakeup: Add device_set_wakeup_path() helper to control wakeup path During system suspend, a driver may find that the wakeup setting is enabled for its device and therefore configures it to deliver system wakeup signals. Additionally, sometimes the driver and its device, relies on some further consumed resource, like an irqchip or a phy for example, to stay powered on, as to be able to deliver system wakeup signals. In general the driver deals with this, via raising an "enable count" of the consumed resource or via a subsystem specific API, like irq_set_irq_wake() or enable|disable_irq_wake() for an irqchip. However, this may not be sufficient in cases when the resource's device may be attached to a PM domain (genpd for example) or is handled by a non-trivial middle layer (PCI for example). To address cases like these, the existing ->dev.power.wakeup_path status flag is there to help. As a matter of fact, genpd already monitors the flag during system suspend and acts accordingly. However, so far it has not been clear, if anybody else but the PM core is allowed to set the ->dev.power.wakeup_path status flag, which is required to make this work. For this reason, introduce a new helper function, device_set_wakeup_path() for that. Typically, a driver that manages a resource needed in the wakeup path should call device_set_wakeup_path() from its ->suspend() or ->suspend_late() callback. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 4c2cba7ec1d4..4238dde0aaf0 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -88,6 +88,11 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && !!dev->power.wakeup; } +static inline void device_set_wakeup_path(struct device *dev) +{ + dev->power.wakeup_path = true; +} + /* drivers/base/power/wakeup.c */ extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name); extern struct wakeup_source *wakeup_source_create(const char *name); @@ -174,6 +179,8 @@ static inline bool device_may_wakeup(struct device *dev) return dev->power.can_wakeup && dev->power.should_wakeup; } +static inline void device_set_wakeup_path(struct device *dev) {} + static inline void __pm_stay_awake(struct wakeup_source *ws) {} static inline void pm_stay_awake(struct device *dev) {} -- cgit v1.2.3 From 877b3729ca03b00800b99ac0c076e9456ef3ae6b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 01:38:27 +0100 Subject: PCI / PM: Use SMART_SUSPEND and LEAVE_SUSPENDED flags for PCIe ports Make the PCIe port driver set DPM_FLAG_SMART_SUSPEND and DPM_FLAG_LEAVE_SUSPENDED for the devices handled by it to benefit from the opportunistic optimizations in the PCI layer enabled by these flags. Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/pci/pcie/portdrv_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index ffbf4e723527..fb1c1bb87316 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -150,6 +150,9 @@ static int pcie_portdrv_probe(struct pci_dev *dev, pci_save_state(dev); + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_LEAVE_SUSPENDED); + if (pci_bridge_d3_possible(dev)) { /* * Keep the port resumed 100ms to make sure things like -- cgit v1.2.3 From aa1f10e85b0ab53dee85d8e293c8159d18d293a8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 29 Dec 2017 00:22:54 +0100 Subject: mux: core: fix double get_device() class_find_device already does a get_device on the returned device. So the device returned by of_find_mux_chip_by_node is already referenced and we should not reference it again (and unref it on error). Signed-off-by: Hans de Goede Signed-off-by: Peter Rosin Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/mux/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mux/core.c b/drivers/mux/core.c index 2260063b0ea8..6e5cf9d9cd99 100644 --- a/drivers/mux/core.c +++ b/drivers/mux/core.c @@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data) return dev->of_node == data; } +/* Note this function returns a reference to the mux_chip dev. */ static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np) { struct device *dev; @@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name) (!args.args_count && (mux_chip->controllers > 1))) { dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n", np, args.np); + put_device(&mux_chip->dev); return ERR_PTR(-EINVAL); } @@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name) if (controller >= mux_chip->controllers) { dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n", np, controller, args.np); + put_device(&mux_chip->dev); return ERR_PTR(-EINVAL); } - get_device(&mux_chip->dev); return &mux_chip->mux[controller]; } EXPORT_SYMBOL_GPL(mux_control_get); -- cgit v1.2.3 From 443064cb0b1fb4569fe0a71209da7625129fb760 Mon Sep 17 00:00:00 2001 From: Viktor Slavkovic Date: Mon, 8 Jan 2018 10:43:03 -0800 Subject: staging: android: ashmem: fix a race condition in ASHMEM_SET_SIZE ioctl A lock-unlock is missing in ASHMEM_SET_SIZE ioctl which can result in a race condition when mmap is called. After the !asma->file check, before setting asma->size, asma->file can be set in mmap. That would result in having different asma->size than the mapped memory size. Combined with ASHMEM_UNPIN ioctl and shrinker invocation, this can result in memory corruption. Signed-off-by: Viktor Slavkovic Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 0f695df14c9d..372ce9913e6d 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) break; case ASHMEM_SET_SIZE: ret = -EINVAL; + mutex_lock(&ashmem_mutex); if (!asma->file) { ret = 0; asma->size = (size_t)arg; } + mutex_unlock(&ashmem_mutex); break; case ASHMEM_GET_SIZE: ret = asma->size; -- cgit v1.2.3 From 98648ae6ef6bdcdcb88c46cad963906ab452e96d Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 9 Jan 2018 15:33:42 +0100 Subject: drm/vmwgfx: Don't cache framebuffer maps Buffer objects need to be either pinned or reserved while a map is active, that's not the case here, so avoid caching the framebuffer map. This will cause increasing mapping activity mainly when we don't do page flipping. This fixes occasional garbage filled screens when the framebuffer has been evicted after the map. Since in-kernel mapping of whole buffer objects is error-prone on 32-bit architectures and also quite inefficient, we will revisit this later. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Cc: --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 ------ drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 41 +++++++++++------------------------- 3 files changed, 13 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 0545740b3724..641294aef165 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) vps->pinned = 0; /* Mapping is managed by prepare_fb/cleanup_fb */ - memset(&vps->guest_map, 0, sizeof(vps->guest_map)); memset(&vps->host_map, 0, sizeof(vps->host_map)); vps->cpp = 0; @@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, /* Should have been freed by cleanup_fb */ - if (vps->guest_map.virtual) { - DRM_ERROR("Guest mapping not freed\n"); - ttm_bo_kunmap(&vps->guest_map); - } - if (vps->host_map.virtual) { DRM_ERROR("Host mapping not freed\n"); ttm_bo_kunmap(&vps->host_map); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index ff9c8389ff21..cd9da2dd79af 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -175,7 +175,7 @@ struct vmw_plane_state { int pinned; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map, guest_map; + struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 90b5437fd787..b68d74888ab1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit { bool defined; /* For CPU Blit */ - struct ttm_bo_kmap_obj host_map, guest_map; + struct ttm_bo_kmap_obj host_map; unsigned int cpp; }; @@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) s32 src_pitch, dst_pitch; u8 *src, *dst; bool not_used; - + struct ttm_bo_kmap_obj guest_map; + int ret; if (!dirty->num_hits) return; @@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) if (width == 0 || height == 0) return; + ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages, + &guest_map); + if (ret) { + DRM_ERROR("Failed mapping framebuffer for blit: %d\n", + ret); + goto out_cleanup; + } /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */ src_pitch = stdu->display_srf->base_size.width * stdu->cpp; @@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) src += ddirty->top * src_pitch + ddirty->left * stdu->cpp; dst_pitch = ddirty->pitch; - dst = ttm_kmap_obj_virtual(&stdu->guest_map, ¬_used); + dst = ttm_kmap_obj_virtual(&guest_map, ¬_used); dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; @@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty) vmw_fifo_commit(dev_priv, sizeof(*cmd)); } + ttm_bo_kunmap(&guest_map); out_cleanup: ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX; ddirty->right = ddirty->bottom = S32_MIN; @@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->guest_map.virtual) - ttm_bo_kunmap(&vps->guest_map); - if (vps->host_map.virtual) ttm_bo_kunmap(&vps->host_map); @@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, */ if (vps->content_fb_type == SEPARATE_DMA && !(dev_priv->capabilities & SVGA_CAP_3D)) { - - struct vmw_framebuffer_dmabuf *new_vfbd; - - new_vfbd = vmw_framebuffer_to_vfbd(new_fb); - - ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false, - NULL); - if (ret) - goto out_srf_unpin; - - ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0, - new_vfbd->buffer->base.num_pages, - &vps->guest_map); - - ttm_bo_unreserve(&new_vfbd->buffer->base); - - if (ret) { - DRM_ERROR("Failed to map content buffer to CPU\n"); - goto out_srf_unpin; - } - ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0, vps->surf->res.backup->base.num_pages, &vps->host_map); if (ret) { DRM_ERROR("Failed to map display buffer to CPU\n"); - ttm_bo_kunmap(&vps->guest_map); goto out_srf_unpin; } @@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu->display_srf = vps->surf; stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; - memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map)); memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map)); if (!stdu->defined) -- cgit v1.2.3 From 191eccb1580939fb0d47deb405b82a85b0379070 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Jan 2018 03:52:05 +1100 Subject: powerpc/pseries: Add H_GET_CPU_CHARACTERISTICS flags & wrapper A new hypervisor call has been defined to communicate various characteristics of the CPU to guests. Add definitions for the hcall number, flags and a wrapper function. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 17 +++++++++++++++++ arch/powerpc/include/asm/plpar_wrappers.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bd..f0461618bf7b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -330,6 +331,17 @@ #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_DEREG 0x10 @@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb..55eddf50d149 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ -- cgit v1.2.3 From 46eb14a6e1585d99c1b9f58d0e7389082a5f466b Mon Sep 17 00:00:00 2001 From: Pete Zaitcev Date: Mon, 8 Jan 2018 15:46:41 -0600 Subject: USB: fix usbmon BUG trigger Automated tests triggered this by opening usbmon and accessing the mmap while simultaneously resizing the buffers. This bug was with us since 2006, because typically applications only size the buffers once and thus avoid racing. Reported by Kirill A. Shutemov. Reported-by: Signed-off-by: Pete Zaitcev Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_bin.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index f6ae753ab99b..f932f40302df 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg break; case MON_IOCQ_RING_SIZE: + mutex_lock(&rp->fetch_lock); ret = rp->b_size; + mutex_unlock(&rp->fetch_lock); break; case MON_IOCT_RING_SIZE: @@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; + mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) + if (offset >= rp->b_size) { + mutex_unlock(&rp->fetch_lock); return VM_FAULT_SIGBUS; + } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); + mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } -- cgit v1.2.3 From 7ae2c3c280db183ca9ada2675c34ec2f7378abfa Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 3 Jan 2018 12:51:51 -0500 Subject: USB: UDC core: fix double-free in usb_add_gadget_udc_release The error-handling pathways in usb_add_gadget_udc_release() are messed up. Aside from the uninformative statement labels, they can deallocate the udc structure after calling put_device(), which is a double-free. This was observed by KASAN in automatic testing. This patch cleans up the routine. It preserves the requirement that when any failure occurs, we call put_device(&gadget->dev). Signed-off-by: Alan Stern Reported-by: Fengguang Wu CC: Reviewed-by: Peter Chen Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 93eff7dec2f5..1b3efb14aec7 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1147,11 +1147,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, udc = kzalloc(sizeof(*udc), GFP_KERNEL); if (!udc) - goto err1; - - ret = device_add(&gadget->dev); - if (ret) - goto err2; + goto err_put_gadget; device_initialize(&udc->dev); udc->dev.release = usb_udc_release; @@ -1160,7 +1156,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, udc->dev.parent = parent; ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj)); if (ret) - goto err3; + goto err_put_udc; + + ret = device_add(&gadget->dev); + if (ret) + goto err_put_udc; udc->gadget = gadget; gadget->udc = udc; @@ -1170,7 +1170,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, ret = device_add(&udc->dev); if (ret) - goto err4; + goto err_unlist_udc; usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED); udc->vbus = true; @@ -1178,27 +1178,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, /* pick up one of pending gadget drivers */ ret = check_pending_gadget_drivers(udc); if (ret) - goto err5; + goto err_del_udc; mutex_unlock(&udc_lock); return 0; -err5: + err_del_udc: device_del(&udc->dev); -err4: + err_unlist_udc: list_del(&udc->list); mutex_unlock(&udc_lock); -err3: - put_device(&udc->dev); device_del(&gadget->dev); -err2: - kfree(udc); + err_put_udc: + put_device(&udc->dev); -err1: + err_put_gadget: put_device(&gadget->dev); return ret; } -- cgit v1.2.3 From 9ecccfaa7cb5249bd31bdceb93fcf5bedb8a24d8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jan 2018 15:02:51 +0000 Subject: sysfs/cpu: Fix typos in vulnerability documentation Fixes: 87590ce6e ("sysfs/cpu: Add vulnerability folder") Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner --- Documentation/ABI/testing/sysfs-devices-system-cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index bd3a88e16d8b..258902db14bf 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -378,7 +378,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/meltdown /sys/devices/system/cpu/vulnerabilities/spectre_v1 /sys/devices/system/cpu/vulnerabilities/spectre_v2 -Date: Januar 2018 +Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities @@ -388,4 +388,4 @@ Description: Information about CPU vulnerabilities "Not affected" CPU is not affected by the vulnerability "Vulnerable" CPU is affected and no mitigation in effect - "Mitigation: $M" CPU is affetcted and mitigation $M is in effect + "Mitigation: $M" CPU is affected and mitigation $M is in effect -- cgit v1.2.3 From 8ab0b7dc73e1b3e2987d42554b2bff503f692772 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 9 Jan 2018 21:28:29 +0800 Subject: blk-mq: fix kernel oops in blk_mq_tag_idle() HW queues may be unmapped in some cases, such as blk_mq_update_nr_hw_queues(), then we need to check it before calling blk_mq_tag_idle(), otherwise the following kernel oops can be triggered, so fix it by checking if the hw queue is unmapped since it doesn't make sense to idle the tags any more after hw queues are unmapped. [ 440.771298] Workqueue: nvme-wq nvme_rdma_del_ctrl_work [nvme_rdma] [ 440.779104] task: ffff894bae755ee0 ti: ffff893bf9bc8000 task.ti: ffff893bf9bc8000 [ 440.788359] RIP: 0010:[] [] __blk_mq_tag_idle+0x24/0x40 [ 440.798697] RSP: 0018:ffff893bf9bcbd10 EFLAGS: 00010286 [ 440.805538] RAX: 0000000000000000 RBX: ffff895bb131dc00 RCX: 000000000000011f [ 440.814426] RDX: 00000000ffffffff RSI: 0000000000000120 RDI: ffff895bb131dc00 [ 440.823301] RBP: ffff893bf9bcbd10 R08: 000000000001b860 R09: 4a51d361c00c0000 [ 440.832193] R10: b5907f32b4cc7003 R11: ffffd6cabfb57000 R12: ffff894bafd1e008 [ 440.841091] R13: 0000000000000001 R14: ffff895baf770000 R15: 0000000000000080 [ 440.849988] FS: 0000000000000000(0000) GS:ffff894bbdcc0000(0000) knlGS:0000000000000000 [ 440.859955] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 440.867274] CR2: 0000000000000008 CR3: 000000103d098000 CR4: 00000000001407e0 [ 440.876169] Call Trace: [ 440.879818] [] blk_mq_exit_hctx+0xd8/0xe0 [ 440.887051] [] blk_mq_free_queue+0xf0/0x160 [ 440.894465] [] blk_cleanup_queue+0xd9/0x150 [ 440.901881] [] nvme_ns_remove+0x5b/0xb0 [nvme_core] [ 440.910068] [] nvme_remove_namespaces+0x3b/0x60 [nvme_core] [ 440.919026] [] __nvme_rdma_remove_ctrl+0x2b/0xb0 [nvme_rdma] [ 440.928079] [] nvme_rdma_del_ctrl_work+0x17/0x20 [nvme_rdma] [ 440.937126] [] process_one_work+0x17a/0x440 [ 440.944517] [] worker_thread+0x278/0x3c0 [ 440.951607] [] ? manage_workers.isra.24+0x2a0/0x2a0 [ 440.959760] [] kthread+0xcf/0xe0 [ 440.966055] [] ? insert_kthread_work+0x40/0x40 [ 440.973715] [] ret_from_fork+0x58/0x90 [ 440.980586] [] ? insert_kthread_work+0x40/0x40 [ 440.988229] Code: 5b 41 5c 5d c3 66 90 0f 1f 44 00 00 48 8b 87 20 01 00 00 f0 0f ba 77 40 01 19 d2 85 d2 75 08 c3 0f 1f 80 00 00 00 00 55 48 89 e5 ff 48 08 48 8d 78 10 e8 7f 0f 05 00 5d c3 0f 1f 00 66 2e 0f [ 441.011620] RIP [] __blk_mq_tag_idle+0x24/0x40 [ 441.019301] RSP [ 441.024052] CR2: 0000000000000008 Reported-by: Zhang Yi Tested-by: Zhang Yi Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 111e1aa5562f..e258ad8dc171 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2015,7 +2015,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, { blk_mq_debugfs_unregister_hctx(hctx); - blk_mq_tag_idle(hctx); + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_idle(hctx); if (set->ops->exit_request) set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); -- cgit v1.2.3 From 52257ffbfcaf58d247b13fb148e27ed17c33e526 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 9 Jan 2018 10:27:58 +0100 Subject: block, bfq: put async queues for root bfq groups too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For each pair [device for which bfq is selected as I/O scheduler, group in blkio/io], bfq maintains a corresponding bfq group. Each such bfq group contains a set of async queues, with each async queue created on demand, i.e., when some I/O request arrives for it. On creation, an async queue gets an extra reference, to make sure that the queue is not freed as long as its bfq group exists. Accordingly, to allow the queue to be freed after the group exited, this extra reference must released on group exit. The above holds also for a bfq root group, i.e., for the bfq group corresponding to the root blkio/io root for a given device. Yet, by mistake, the references to the existing async queues of a root group are not released when the latter exits. This causes a memory leak when the instance of bfq for a given device exits. In a similar vein, bfqg_stats_xfer_dead is not executed for a root group. This commit fixes bfq_pd_offline so that the latter executes the above missing operations for a root group too. Reported-by: Holger Hoffstätte Reported-by: Guoqing Jiang Tested-by: Holger Hoffstätte Signed-off-by: Davide Ferrari Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index da1525ec4c87..d819dc77fe65 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -775,10 +775,11 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) unsigned long flags; int i; + spin_lock_irqsave(&bfqd->lock, flags); + if (!entity) /* root group */ - return; + goto put_async_queues; - spin_lock_irqsave(&bfqd->lock, flags); /* * Empty all service_trees belonging to this group before * deactivating the group itself. @@ -809,6 +810,8 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) } __bfq_deactivate_entity(entity, false); + +put_async_queues: bfq_put_async_queues(bfqd, bfqg); spin_unlock_irqrestore(&bfqd->lock, flags); -- cgit v1.2.3 From 0d52af590552473666da5b6111e7182d6cd23f92 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 9 Jan 2018 10:27:59 +0100 Subject: block, bfq: release oom-queue ref to root group on exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On scheduler init, a reference to the root group, and a reference to its corresponding blkg are taken for the oom queue. Yet these references are not released on scheduler exit, which prevents these objects from be freed. This commit adds the missing reference releases. Reported-by: Davide Ferrari Tested-by: Holger Hoffstätte Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7bd789da7a29..7c0b7f60811c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4893,6 +4893,9 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); + /* release oom-queue reference to root group */ + bfqg_and_blkg_put(bfqd->root_group); + #ifdef CONFIG_BFQ_GROUP_IOSCHED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else -- cgit v1.2.3 From 50e51c13b3822d14ff6df4279423e4b7b2269bc3 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Add macros for annotating the destination of rfid/hrfid The rfid/hrfid ((Hypervisor) Return From Interrupt) instruction is used for switching from the kernel to userspace, and from the hypervisor to the guest kernel. However it can and is also used for other transitions, eg. from real mode kernel code to virtual mode kernel code, and it's not always clear from the code what the destination context is. To make it clearer when reading the code, add macros which encode the expected destination context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64e.h | 6 ++++++ arch/powerpc/include/asm/exception-64s.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d..1af427a3c74f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,35 @@ */ #define EX_R3 EX_DAR +/* Macros for annotating the expected destination of (h)rfid */ + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + rfid + +#define RFI_TO_USER_OR_KERNEL \ + rfid + +#define RFI_TO_GUEST \ + rfid + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + hrfid + +#define HRFI_TO_USER_OR_KERNEL \ + hrfid + +#define HRFI_TO_GUEST \ + hrfid + +#define HRFI_TO_UNKNOWN \ + hrfid + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ -- cgit v1.2.3 From 222f20f140623ef6033491d0103ee0875fe87d35 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Simple RFI macro conversions This commit does simple conversions of rfi/rfid to the new macros that include the expected destination context. By simple we mean cases where there is a single well known destination context, and it's simply a matter of substituting the instruction for the appropriate macro. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/kernel/entry_64.S | 14 +++++++++----- arch/powerpc/kernel/exceptions-64s.S | 24 ++++++++++++------------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 9 ++++----- arch/powerpc/kvm/book3s_rmhandlers.S | 7 +++++-- arch/powerpc/kvm/book3s_segment.S | 4 ++-- 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1af427a3c74f..dfc56daed98b 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -247,7 +247,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) @@ -261,7 +261,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3320bcac7192..e68faa4d1b13 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -37,6 +37,11 @@ #include #include #include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif /* * System calls. @@ -397,8 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 - - rfid + RFI_TO_USER b . /* prevent speculative execution */ #endif _ASM_NOKPROBE_SYMBOL(system_call_common); @@ -1073,7 +1077,7 @@ __enter_rtas: mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ rtas_return_loc: @@ -1098,7 +1102,7 @@ rtas_return_loc: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) @@ -1171,7 +1175,7 @@ _GLOBAL(enter_prom) LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 - rfid + RFI_TO_KERNEL #endif /* CONFIG_PPC_BOOK3E */ 1: /* Return from OF */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e441b469dc8f..5502b0147c4e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -256,7 +256,7 @@ BEGIN_FTR_SECTION LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ 2: /* Stack overflow. Stay on emergency stack and panic. @@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) li r3,MSR_ME andc r10,r10,r3 /* Turn off MSR_ME */ mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 2: /* @@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP - rfid + RFI_TO_USER_OR_KERNEL 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP @@ -651,7 +651,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: std r3,PACA_EXSLB+EX_DAR(r13) @@ -662,7 +662,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . EXC_COMMON_BEGIN(unrecov_slb) @@ -901,7 +901,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtspr SPRN_SRR0,r10 ; \ ld r10,PACAKMSR(r13) ; \ mtspr SPRN_SRR1,r10 ; \ - rfid ; \ + RFI_TO_KERNEL ; \ b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH @@ -917,7 +917,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ mr r13,r9 ; \ - rfid ; /* return to userspace */ \ + RFI_TO_USER ; /* return to userspace */ \ b . ; /* prevent speculative execution */ #else #define SYSCALL_FASTENDIAN_TEST @@ -1063,7 +1063,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtcr r11 REST_GPR(11, r1) ld r1,GPR1(r1) - hrfid + HRFI_TO_USER_OR_KERNEL 1: mtcr r11 REST_GPR(11, r1) @@ -1314,7 +1314,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) ld r13,PACA_EXGEN+EX_R13(r13) - HRFID + HRFI_TO_UNKNOWN b . #endif @@ -1418,7 +1418,7 @@ masked_##_H##interrupt: \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ /* returns to kernel where r13 must be set up, so don't restore it */ \ - ##_H##rfid; \ + ##_H##RFI_TO_KERNEL; \ b .; \ MASKED_DEC_HANDLER(_H) @@ -1441,7 +1441,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) addi r13, r13, 4 mtspr SPRN_SRR0, r13 GET_SCRATCH0(r13) - rfid + RFI_TO_KERNEL b . TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) @@ -1453,7 +1453,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) addi r13, r13, 4 mtspr SPRN_HSRR0, r13 GET_SCRATCH0(r13) - hrfid + HRFI_TO_KERNEL b . #endif diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..9c61f736c75b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) mtmsrd r0,1 /* clear RI in MSR */ mtsrr0 r5 mtsrr1 r6 - RFI + RFI_TO_KERNEL kvmppc_call_hv_entry: BEGIN_FTR_SECTION @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - RFI + RFI_TO_KERNEL /* Virtual-mode return */ .Lvirt_return: @@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) - - hrfid + HRFI_TO_GUEST b . secondary_too_late: @@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 - rfid + RFI_TO_KERNEL 9: addi r3, r1, STACK_FRAME_OVERHEAD bl kvmppc_bad_interrupt b 9b diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 42a4b237df5f..34a5adeff084 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -46,6 +46,9 @@ #define FUNC(name) name +#define RFI_TO_KERNEL RFI +#define RFI_TO_GUEST RFI + .macro INTERRUPT_TRAMPOLINE intno .global kvmppc_trampoline_\intno @@ -141,7 +144,7 @@ kvmppc_handler_skip_ins: GET_SCRATCH0(r13) /* And get back into the code */ - RFI + RFI_TO_KERNEL #endif /* @@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline) ori r5, r5, MSR_EE mtsrr0 r7 mtsrr1 r6 - RFI + RFI_TO_KERNEL #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 2a2b96d53999..93a180ceefad 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -156,7 +156,7 @@ no_dcbz32_on: PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) - RFI + RFI_TO_GUEST kvmppc_handler_trampoline_enter_end: @@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) cmpwi r12, BOOK3S_INTERRUPT_DOORBELL beqa BOOK3S_INTERRUPT_DOORBELL - RFI + RFI_TO_KERNEL kvmppc_handler_trampoline_exit_end: -- cgit v1.2.3 From b8e90cb7bc04a509e821e82ab6ed7a8ef11ba333 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Convert the syscall exit path to use RFI_TO_USER/KERNEL In the syscall exit path we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e68faa4d1b13..724733b74744 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -267,13 +267,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ .Lsyscall_error: -- cgit v1.2.3 From a08f828cf47e6c605af21d2cdec68f84e799c318 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64: Convert fast_exception_return to use RFI_TO_USER/KERNEL Similar to the syscall return path, in fast_exception_return we may be returning to user or kernel context. We already have a test for that, because we conditionally restore r13. So use that existing test and branch, and bifurcate the return based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 724733b74744..2748584b767d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -892,7 +892,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -905,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ + +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 - rfid + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ -- cgit v1.2.3 From c7305645eb0c1621351cfc104038831ae87c0053 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Convert slb_miss_common to use RFI_TO_USER/KERNEL In the SLB miss handler we may be returning to user or kernel. We need to add a check early on and save the result in the cr4 register, and then we bifurcate the return path based on that. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5502b0147c4e..ed356194f09c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + andi. r9,r11,MSR_PR // Check for exception from userspace + cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later + /* * Test MSR_RI before calling slb_allocate_realmode, because the * MSR in r11 gets clobbered. However we still want to allocate @@ -624,9 +627,32 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* All done -- return from exception. */ + bne cr4,1f /* returning to kernel */ + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER + b . /* prevent speculative execution */ +1: .machine push .machine "power4" mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ @@ -640,9 +666,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ + 2: std r3,PACA_EXSLB+EX_DAR(r13) mr r3,r12 mfspr r11,SPRN_SRR0 -- cgit v1.2.3 From 928afc85270753657b5543e052cc270c279a3fe9 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 6 Jan 2018 00:56:44 +0800 Subject: uas: ignore UAS for Norelsys NS1068(X) chips The UAS mode of Norelsys NS1068(X) is reported to fail to work on several platforms with the following error message: xhci-hcd xhci-hcd.0.auto: ERROR Transfer event for unknown stream ring slot 1 ep 8 xhci-hcd xhci-hcd.0.auto: @00000000bf04a400 00000000 00000000 1b000000 01098001 And when trying to mount a partition on the disk the disk will disconnect from the USB controller, then after re-connecting the device will be offlined and not working at all. Falling back to USB mass storage can solve this problem, so ignore UAS function of this chip. Cc: stable@vger.kernel.org Signed-off-by: Icenowy Zheng Acked-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index e6127fb21c12..a7d08ae0adad 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -143,6 +143,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Icenowy Zheng */ +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, + "Norelsys", + "NS1068X", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Takeo Nakayama */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", -- cgit v1.2.3 From b8fd0823e0770c2d5fdbd865bccf0d5e058e5287 Mon Sep 17 00:00:00 2001 From: Andrii Vladyka Date: Thu, 4 Jan 2018 13:09:17 +0200 Subject: net: core: fix module type in sock_diag_bind Use AF_INET6 instead of AF_INET in IPv6-related code path Signed-off-by: Andrii Vladyka Signed-off-by: David S. Miller --- net/core/sock_diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 217f4e3b82f6..146b50e30659 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group) case SKNLGRP_INET6_UDP_DESTROY: if (!sock_diag_handlers[AF_INET6]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_SOCK_DIAG, AF_INET); + NETLINK_SOCK_DIAG, AF_INET6); break; } return 0; -- cgit v1.2.3 From 04ced159cec863f9bc27015d6b970bb13cfa6176 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jan 2018 08:29:46 -0800 Subject: blk-mq: move hctx lock/unlock into a helper Move the RCU vs SRCU logic into lock/unlock helpers, which makes the actual functional bits within the locked region much easier to read. tj: Reordered in front of timeout revamp patches and added the missing blk_mq_run_hw_queue() conversion. Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-mq.c | 66 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e258ad8dc171..bd7c47eb2923 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -557,6 +557,22 @@ static void __blk_mq_complete_request(struct request *rq) put_cpu(); } +static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) +{ + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + rcu_read_unlock(); + else + srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); +} + +static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) +{ + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + rcu_read_lock(); + else + *srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); +} + /** * blk_mq_complete_request - end I/O on a request * @rq: the request being processed @@ -1214,17 +1230,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) */ WARN_ON_ONCE(in_interrupt()); - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - rcu_read_lock(); - blk_mq_sched_dispatch_requests(hctx); - rcu_read_unlock(); - } else { - might_sleep(); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - blk_mq_sched_dispatch_requests(hctx); - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); - } + hctx_lock(hctx, &srcu_idx); + blk_mq_sched_dispatch_requests(hctx); + hctx_unlock(hctx, srcu_idx); } /* @@ -1296,17 +1306,10 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) * And queue will be rerun in blk_mq_unquiesce_queue() if it is * quiesced. */ - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - rcu_read_lock(); - need_run = !blk_queue_quiesced(hctx->queue) && - blk_mq_hctx_has_pending(hctx); - rcu_read_unlock(); - } else { - srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - need_run = !blk_queue_quiesced(hctx->queue) && - blk_mq_hctx_has_pending(hctx); - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); - } + hctx_lock(hctx, &srcu_idx); + need_run = !blk_queue_quiesced(hctx->queue) && + blk_mq_hctx_has_pending(hctx); + hctx_unlock(hctx, srcu_idx); if (need_run) { __blk_mq_delay_run_hw_queue(hctx, async, 0); @@ -1618,7 +1621,7 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie, bool may_sleep) + blk_qc_t *cookie) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1668,25 +1671,20 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, } insert: - blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); + blk_mq_sched_insert_request(rq, false, run_queue, false, + hctx->flags & BLK_MQ_F_BLOCKING); } static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie) { - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { - rcu_read_lock(); - __blk_mq_try_issue_directly(hctx, rq, cookie, false); - rcu_read_unlock(); - } else { - unsigned int srcu_idx; + int srcu_idx; - might_sleep(); + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); - srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); - __blk_mq_try_issue_directly(hctx, rq, cookie, true); - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); - } + hctx_lock(hctx, &srcu_idx); + __blk_mq_try_issue_directly(hctx, rq, cookie); + hctx_unlock(hctx, srcu_idx); } static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) -- cgit v1.2.3 From 5197c05e16b49885cc9086f1676455371e821b0e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:47 -0800 Subject: blk-mq: protect completion path with RCU Currently, blk-mq protects only the issue path with RCU. This patch puts the completion path under the same RCU protection. This will be used to synchronize issue/completion against timeout by later patches, which will also add the comments. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index bd7c47eb2923..f5e57c80a82b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -584,11 +584,16 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) void blk_mq_complete_request(struct request *rq) { struct request_queue *q = rq->q; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); + int srcu_idx; if (unlikely(blk_should_fake_timeout(q))) return; + + hctx_lock(hctx, &srcu_idx); if (!blk_mark_rq_complete(rq)) __blk_mq_complete_request(rq); + hctx_unlock(hctx, srcu_idx); } EXPORT_SYMBOL(blk_mq_complete_request); -- cgit v1.2.3 From 1d9bd5161ba32db5665a617edc8b0723880f543e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:48 -0800 Subject: blk-mq: replace timeout synchronization with a RCU and generation based scheme Currently, blk-mq timeout path synchronizes against the usual issue/completion path using a complex scheme involving atomic bitflags, REQ_ATOM_*, memory barriers and subtle memory coherence rules. Unfortunately, it contains quite a few holes. There's a complex dancing around REQ_ATOM_STARTED and REQ_ATOM_COMPLETE between issue/completion and timeout paths; however, they don't have a synchronization point across request recycle instances and it isn't clear what the barriers add. blk_mq_check_expired() can easily read STARTED from N-2'th iteration, deadline from N-1'th, blk_mark_rq_complete() against Nth instance. In fact, it's pretty easy to make blk_mq_check_expired() terminate a later instance of a request. If we induce 5 sec delay before time_after_eq() test in blk_mq_check_expired(), shorten the timeout to 2s, and issue back-to-back large IOs, blk-mq starts timing out requests spuriously pretty quickly. Nothing actually timed out. It just made the call on a recycle instance of a request and then terminated a later instance long after the original instance finished. The scenario isn't theoretical either. This patch replaces the broken synchronization mechanism with a RCU and generation number based one. 1. Each request has a u64 generation + state value, which can be updated only by the request owner. Whenever a request becomes in-flight, the generation number gets bumped up too. This provides the basis for the timeout path to distinguish different recycle instances of the request. Also, marking a request in-flight and setting its deadline are protected with a seqcount so that the timeout path can fetch both values coherently. 2. The timeout path fetches the generation, state and deadline. If the verdict is timeout, it records the generation into a dedicated request abortion field and does RCU wait. 3. The completion path is also protected by RCU (from the previous patch) and checks whether the current generation number and state match the abortion field. If so, it skips completion. 4. The timeout path, after RCU wait, scans requests again and terminates the ones whose generation and state still match the ones requested for abortion. By now, the timeout path knows that either the generation number and state changed if it lost the race or the completion will yield to it and can safely timeout the request. While it's more lines of code, it's conceptually simpler, doesn't depend on direct use of subtle memory ordering or coherence, and hopefully doesn't terminate the wrong instance. While this change makes REQ_ATOM_COMPLETE synchronization unnecessary between issue/complete and timeout paths, REQ_ATOM_COMPLETE isn't removed yet as it's still used in other places. Future patches will move all state tracking to the new mechanism and remove all bitops in the hot paths. Note that this patch adds a comment explaining a race condition in BLK_EH_RESET_TIMER path. The race has always been there and this patch doesn't change it. It's just documenting the existing race. v2: - Fixed BLK_EH_RESET_TIMER handling as pointed out by Jianchao. - s/request->gstate_seqc/request->gstate_seq/ as suggested by Peter. - READ_ONCE() added in blk_mq_rq_update_state() as suggested by Peter. v3: - Fixed possible extended seqcount / u64_stats_sync read looping spotted by Peter. - MQ_RQ_IDLE was incorrectly being set in complete_request instead of free_request. Fixed. v4: - Rebased on top of hctx_lock() refactoring patch. - Added comment explaining the use of hctx_lock() in completion path. v5: - Added comments requested by Bart. - Note the addition of BLK_EH_RESET_TIMER race condition in the commit message. Signed-off-by: Tejun Heo Cc: "jianchao.wang" Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 2 + block/blk-mq.c | 229 +++++++++++++++++++++++++++++++++---------------- block/blk-mq.h | 46 ++++++++++ block/blk-timeout.c | 2 +- block/blk.h | 6 -- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 23 +++++ 7 files changed, 230 insertions(+), 79 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2e0d041e2daf..f843ae4f858d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -126,6 +126,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->start_time = jiffies; set_start_time_ns(rq); rq->part = NULL; + seqcount_init(&rq->gstate_seq); + u64_stats_init(&rq->aborted_gstate_sync); } EXPORT_SYMBOL(blk_rq_init); diff --git a/block/blk-mq.c b/block/blk-mq.c index f5e57c80a82b..156203876c8c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -483,6 +483,7 @@ void blk_mq_free_request(struct request *rq) if (blk_rq_rl(rq)) blk_put_rl(blk_rq_rl(rq)); + blk_mq_rq_update_state(rq, MQ_RQ_IDLE); clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); if (rq->tag != -1) @@ -530,6 +531,8 @@ static void __blk_mq_complete_request(struct request *rq) bool shared = false; int cpu; + WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT); + if (rq->internal_tag != -1) blk_mq_sched_completed_request(rq); if (rq->rq_flags & RQF_STATS) { @@ -573,6 +576,36 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) *srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); } +static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate) +{ + unsigned long flags; + + /* + * blk_mq_rq_aborted_gstate() is used from the completion path and + * can thus be called from irq context. u64_stats_fetch in the + * middle of update on the same CPU leads to lockup. Disable irq + * while updating. + */ + local_irq_save(flags); + u64_stats_update_begin(&rq->aborted_gstate_sync); + rq->aborted_gstate = gstate; + u64_stats_update_end(&rq->aborted_gstate_sync); + local_irq_restore(flags); +} + +static u64 blk_mq_rq_aborted_gstate(struct request *rq) +{ + unsigned int start; + u64 aborted_gstate; + + do { + start = u64_stats_fetch_begin(&rq->aborted_gstate_sync); + aborted_gstate = rq->aborted_gstate; + } while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start)); + + return aborted_gstate; +} + /** * blk_mq_complete_request - end I/O on a request * @rq: the request being processed @@ -590,8 +623,20 @@ void blk_mq_complete_request(struct request *rq) if (unlikely(blk_should_fake_timeout(q))) return; + /* + * If @rq->aborted_gstate equals the current instance, timeout is + * claiming @rq and we lost. This is synchronized through + * hctx_lock(). See blk_mq_timeout_work() for details. + * + * Completion path never blocks and we can directly use RCU here + * instead of hctx_lock() which can be either RCU or SRCU. + * However, that would complicate paths which want to synchronize + * against us. Let stay in sync with the issue path so that + * hctx_lock() covers both issue and completion paths. + */ hctx_lock(hctx, &srcu_idx); - if (!blk_mark_rq_complete(rq)) + if (blk_mq_rq_aborted_gstate(rq) != rq->gstate && + !blk_mark_rq_complete(rq)) __blk_mq_complete_request(rq); hctx_unlock(hctx, srcu_idx); } @@ -617,34 +662,32 @@ void blk_mq_start_request(struct request *rq) wbt_issue(q->rq_wb, &rq->issue_stat); } - blk_add_timer(rq); - + WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); WARN_ON_ONCE(test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)); /* - * Mark us as started and clear complete. Complete might have been - * set if requeue raced with timeout, which then marked it as - * complete. So be sure to clear complete again when we start - * the request, otherwise we'll ignore the completion event. + * Mark @rq in-flight which also advances the generation number, + * and register for timeout. Protect with a seqcount to allow the + * timeout path to read both @rq->gstate and @rq->deadline + * coherently. * - * Ensure that ->deadline is visible before we set STARTED, such that - * blk_mq_check_expired() is guaranteed to observe our ->deadline when - * it observes STARTED. + * This is the only place where a request is marked in-flight. If + * the timeout path reads an in-flight @rq->gstate, the + * @rq->deadline it reads together under @rq->gstate_seq is + * guaranteed to be the matching one. */ - smp_wmb(); + preempt_disable(); + write_seqcount_begin(&rq->gstate_seq); + + blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT); + blk_add_timer(rq); + + write_seqcount_end(&rq->gstate_seq); + preempt_enable(); + set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) { - /* - * Coherence order guarantees these consecutive stores to a - * single variable propagate in the specified order. Thus the - * clear_bit() is ordered _after_ the set bit. See - * blk_mq_check_expired(). - * - * (the bits must be part of the same byte for this to be - * true). - */ + if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); - } if (q->dma_drain_size && blk_rq_bytes(rq)) { /* @@ -677,6 +720,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_sched_requeue_request(rq); if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { + blk_mq_rq_update_state(rq, MQ_RQ_IDLE); if (q->dma_drain_size && blk_rq_bytes(rq)) rq->nr_phys_segments--; } @@ -774,6 +818,7 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq); struct blk_mq_timeout_data { unsigned long next; unsigned int next_set; + unsigned int nr_expired; }; void blk_mq_rq_timed_out(struct request *req, bool reserved) @@ -801,6 +846,12 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved) __blk_mq_complete_request(req); break; case BLK_EH_RESET_TIMER: + /* + * As nothing prevents from completion happening while + * ->aborted_gstate is set, this may lead to ignored + * completions and further spurious timeouts. + */ + blk_mq_rq_update_aborted_gstate(req, 0); blk_add_timer(req); blk_clear_rq_complete(req); break; @@ -816,50 +867,51 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { struct blk_mq_timeout_data *data = priv; - unsigned long deadline; + unsigned long gstate, deadline; + int start; + + might_sleep(); if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; - /* - * Ensures that if we see STARTED we must also see our - * up-to-date deadline, see blk_mq_start_request(). - */ - smp_rmb(); - - deadline = READ_ONCE(rq->deadline); + /* read coherent snapshots of @rq->state_gen and @rq->deadline */ + while (true) { + start = read_seqcount_begin(&rq->gstate_seq); + gstate = READ_ONCE(rq->gstate); + deadline = rq->deadline; + if (!read_seqcount_retry(&rq->gstate_seq, start)) + break; + cond_resched(); + } - /* - * The rq being checked may have been freed and reallocated - * out already here, we avoid this race by checking rq->deadline - * and REQ_ATOM_COMPLETE flag together: - * - * - if rq->deadline is observed as new value because of - * reusing, the rq won't be timed out because of timing. - * - if rq->deadline is observed as previous value, - * REQ_ATOM_COMPLETE flag won't be cleared in reuse path - * because we put a barrier between setting rq->deadline - * and clearing the flag in blk_mq_start_request(), so - * this rq won't be timed out too. - */ - if (time_after_eq(jiffies, deadline)) { - if (!blk_mark_rq_complete(rq)) { - /* - * Again coherence order ensures that consecutive reads - * from the same variable must be in that order. This - * ensures that if we see COMPLETE clear, we must then - * see STARTED set and we'll ignore this timeout. - * - * (There's also the MB implied by the test_and_clear()) - */ - blk_mq_rq_timed_out(rq, reserved); - } + /* if in-flight && overdue, mark for abortion */ + if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT && + time_after_eq(jiffies, deadline)) { + blk_mq_rq_update_aborted_gstate(rq, gstate); + data->nr_expired++; + hctx->nr_expired++; } else if (!data->next_set || time_after(data->next, deadline)) { data->next = deadline; data->next_set = 1; } } +static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + /* + * We marked @rq->aborted_gstate and waited for RCU. If there were + * completions that we lost to, they would have finished and + * updated @rq->gstate by now; otherwise, the completion path is + * now guaranteed to see @rq->aborted_gstate and yield. If + * @rq->aborted_gstate still matches @rq->gstate, @rq is ours. + */ + if (READ_ONCE(rq->gstate) == rq->aborted_gstate && + !blk_mark_rq_complete(rq)) + blk_mq_rq_timed_out(rq, reserved); +} + static void blk_mq_timeout_work(struct work_struct *work) { struct request_queue *q = @@ -867,7 +919,9 @@ static void blk_mq_timeout_work(struct work_struct *work) struct blk_mq_timeout_data data = { .next = 0, .next_set = 0, + .nr_expired = 0, }; + struct blk_mq_hw_ctx *hctx; int i; /* A deadlock might occur if a request is stuck requiring a @@ -886,14 +940,40 @@ static void blk_mq_timeout_work(struct work_struct *work) if (!percpu_ref_tryget(&q->q_usage_counter)) return; + /* scan for the expired ones and set their ->aborted_gstate */ blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); + if (data.nr_expired) { + bool has_rcu = false; + + /* + * Wait till everyone sees ->aborted_gstate. The + * sequential waits for SRCUs aren't ideal. If this ever + * becomes a problem, we can add per-hw_ctx rcu_head and + * wait in parallel. + */ + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->nr_expired) + continue; + + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + has_rcu = true; + else + synchronize_srcu(hctx->queue_rq_srcu); + + hctx->nr_expired = 0; + } + if (has_rcu) + synchronize_rcu(); + + /* terminate the ones we won */ + blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL); + } + if (data.next_set) { data.next = blk_rq_timeout(round_jiffies_up(data.next)); mod_timer(&q->timeout, data.next); } else { - struct blk_mq_hw_ctx *hctx; - queue_for_each_hw_ctx(q, hctx, i) { /* the hctx may be unmapped, so check it here */ if (blk_mq_hw_queue_mapped(hctx)) @@ -1893,6 +1973,22 @@ static size_t order_to_size(unsigned int order) return (size_t)PAGE_SIZE << order; } +static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, int node) +{ + int ret; + + if (set->ops->init_request) { + ret = set->ops->init_request(set, rq, hctx_idx, node); + if (ret) + return ret; + } + + seqcount_init(&rq->gstate_seq); + u64_stats_init(&rq->aborted_gstate_sync); + return 0; +} + int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, unsigned int hctx_idx, unsigned int depth) { @@ -1954,12 +2050,9 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, struct request *rq = p; tags->static_rqs[i] = rq; - if (set->ops->init_request) { - if (set->ops->init_request(set, rq, hctx_idx, - node)) { - tags->static_rqs[i] = NULL; - goto fail; - } + if (blk_mq_init_request(set, rq, hctx_idx, node)) { + tags->static_rqs[i] = NULL; + goto fail; } p += rq_size; @@ -2099,9 +2192,7 @@ static int blk_mq_init_hctx(struct request_queue *q, if (!hctx->fq) goto sched_exit_hctx; - if (set->ops->init_request && - set->ops->init_request(set, hctx->fq->flush_rq, hctx_idx, - node)) + if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node)) goto free_fq; if (hctx->flags & BLK_MQ_F_BLOCKING) @@ -3019,12 +3110,6 @@ static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) static int __init blk_mq_init(void) { - /* - * See comment in block/blk.h rq_atomic_flags enum - */ - BUILD_BUG_ON((REQ_ATOM_STARTED / BITS_PER_BYTE) != - (REQ_ATOM_COMPLETE / BITS_PER_BYTE)); - cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, blk_mq_hctx_notify_dead); return 0; diff --git a/block/blk-mq.h b/block/blk-mq.h index 6c7c3ff5bf62..cf01f6f8c73d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -27,6 +27,19 @@ struct blk_mq_ctx { struct kobject kobj; } ____cacheline_aligned_in_smp; +/* + * Bits for request->gstate. The lower two bits carry MQ_RQ_* state value + * and the upper bits the generation number. + */ +enum mq_rq_state { + MQ_RQ_IDLE = 0, + MQ_RQ_IN_FLIGHT = 1, + + MQ_RQ_STATE_BITS = 2, + MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1, + MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS, +}; + void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); @@ -85,6 +98,39 @@ extern void blk_mq_rq_timed_out(struct request *req, bool reserved); void blk_mq_release(struct request_queue *q); +/** + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request + * @rq: target request. + */ +static inline int blk_mq_rq_state(struct request *rq) +{ + return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK; +} + +/** + * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request + * @rq: target request. + * @state: new state to set. + * + * Set @rq's state to @state. The caller is responsible for ensuring that + * there are no other updaters. A request can transition into IN_FLIGHT + * only from IDLE and doing so increments the generation number. + */ +static inline void blk_mq_rq_update_state(struct request *rq, + enum mq_rq_state state) +{ + u64 old_val = READ_ONCE(rq->gstate); + u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state; + + if (state == MQ_RQ_IN_FLIGHT) { + WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE); + new_val += MQ_RQ_GEN_INC; + } + + /* avoid exposing interim values */ + WRITE_ONCE(rq->gstate, new_val); +} + static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, unsigned int cpu) { diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 764ecf9aeb30..6427be7ac363 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -208,7 +208,7 @@ void blk_add_timer(struct request *req) if (!req->timeout) req->timeout = q->rq_timeout; - WRITE_ONCE(req->deadline, jiffies + req->timeout); + req->deadline = jiffies + req->timeout; /* * Only the non-mq case needs to add the request to a protected list. diff --git a/block/blk.h b/block/blk.h index 3f1446937aec..9cb2739edb6a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -123,12 +123,6 @@ void blk_account_io_done(struct request *req); * Internal atomic flags for request handling */ enum rq_atomic_flags { - /* - * Keep these two bits first - not because we depend on the - * value of them, but we do depend on them being in the same - * byte of storage to ensure ordering on writes. Keeping them - * first will achieve that nicely. - */ REQ_ATOM_COMPLETE = 0, REQ_ATOM_STARTED, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95c9a5c862e2..460798dbac1f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -51,6 +51,7 @@ struct blk_mq_hw_ctx { unsigned int queue_num; atomic_t nr_active; + unsigned int nr_expired; struct hlist_node cpuhp_dead; struct kobject kobj; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 46e606f5b44b..ae563d01b29d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,6 +27,8 @@ #include #include #include +#include +#include struct module; struct scsi_ioctl_command; @@ -230,6 +232,27 @@ struct request { unsigned short write_hint; + /* + * On blk-mq, the lower bits of ->gstate (generation number and + * state) carry the MQ_RQ_* state value and the upper bits the + * generation number which is monotonically incremented and used to + * distinguish the reuse instances. + * + * ->gstate_seq allows updates to ->gstate and other fields + * (currently ->deadline) during request start to be read + * atomically from the timeout path, so that it can operate on a + * coherent set of information. + */ + seqcount_t gstate_seq; + u64 gstate; + + /* + * ->aborted_gstate is used by the timeout to claim a specific + * recycle instance of this request. See blk_mq_timeout_work(). + */ + struct u64_stats_sync aborted_gstate_sync; + u64 aborted_gstate; + unsigned long deadline; struct list_head timeout_list; -- cgit v1.2.3 From 67818d25738b1c9ffb8541ca875b2ae3304869d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:49 -0800 Subject: blk-mq: use blk_mq_rq_state() instead of testing REQ_ATOM_COMPLETE blk_mq_check_inflight() and blk_mq_poll_hybrid_sleep() test REQ_ATOM_COMPLETE to determine the request state. Both uses are speculative and we can test REQ_ATOM_STARTED and blk_mq_rq_state() for equivalent results. Replace the tests. This will allow removing REQ_ATOM_COMPLETE usages from blk-mq. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 156203876c8c..50dda2ff0d85 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -95,8 +95,7 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) && - !test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) { + if (blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) { /* * index[0] counts the specific partition that was asked * for. index[1] counts the ones that are active on the @@ -3024,7 +3023,8 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, hrtimer_init_sleeper(&hs, current); do { - if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) + if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) && + blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) break; set_current_state(TASK_UNINTERRUPTIBLE); hrtimer_start_expires(&hs.timer, mode); -- cgit v1.2.3 From 358f70da49d77c43f2ca11b5da584213b2add29c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:50 -0800 Subject: blk-mq: make blk_abort_request() trigger timeout path With issue/complete and timeout paths now using the generation number and state based synchronization, blk_abort_request() is the only one which depends on REQ_ATOM_COMPLETE for arbitrating completion. There's no reason for blk_abort_request() to be a completely separate path. This patch makes blk_abort_request() piggyback on the timeout path instead of trying to terminate the request directly. This removes the last dependency on REQ_ATOM_COMPLETE in blk-mq. Note that this makes blk_abort_request() asynchronous - it initiates abortion but the actual termination will happen after a short while, even when the caller owns the request. AFAICS, SCSI and ATA should be fine with that and I think mtip32xx and dasd should be safe but not completely sure. It'd be great if people who know the drivers take a look. v2: - Add comment explaining the lack of synchronization around ->deadline update as requested by Bart. Signed-off-by: Tejun Heo Cc: Asai Thambi SP Cc: Stefan Haberland Cc: Jan Hoeppner Cc: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-mq.h | 2 -- block/blk-timeout.c | 13 +++++++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 50dda2ff0d85..90f6910a83f6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -820,7 +820,7 @@ struct blk_mq_timeout_data { unsigned int nr_expired; }; -void blk_mq_rq_timed_out(struct request *req, bool reserved) +static void blk_mq_rq_timed_out(struct request *req, bool reserved) { const struct blk_mq_ops *ops = req->q->mq_ops; enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; diff --git a/block/blk-mq.h b/block/blk-mq.h index cf01f6f8c73d..6b2d61629d48 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -94,8 +94,6 @@ extern int blk_mq_sysfs_register(struct request_queue *q); extern void blk_mq_sysfs_unregister(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); -extern void blk_mq_rq_timed_out(struct request *req, bool reserved); - void blk_mq_release(struct request_queue *q); /** diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 6427be7ac363..4f04cd1e0b74 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -156,12 +156,17 @@ void blk_timeout_work(struct work_struct *work) */ void blk_abort_request(struct request *req) { - if (blk_mark_rq_complete(req)) - return; - if (req->q->mq_ops) { - blk_mq_rq_timed_out(req, false); + /* + * All we need to ensure is that timeout scan takes place + * immediately and that scan sees the new timeout value. + * No need for fancy synchronizations. + */ + req->deadline = jiffies; + mod_timer(&req->q->timeout, 0); } else { + if (blk_mark_rq_complete(req)) + return; blk_delete_timer(req); blk_rq_timed_out(req); } -- cgit v1.2.3 From 634f9e4631a88025d3b90c1884e9a1b6a13d01d2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:51 -0800 Subject: blk-mq: remove REQ_ATOM_COMPLETE usages from blk-mq After the recent updates to use generation number and state based synchronization, blk-mq no longer depends on REQ_ATOM_COMPLETE except to avoid firing the same timeout multiple times. Remove all REQ_ATOM_COMPLETE usages and use a new rq_flags flag RQF_MQ_TIMEOUT_EXPIRED to avoid firing the same timeout multiple times. This removes atomic bitops from hot paths too. v2: Removed blk_clear_rq_complete() from blk_mq_rq_timed_out(). v3: Added RQF_MQ_TIMEOUT_EXPIRED flag. Signed-off-by: Tejun Heo Cc: "jianchao.wang" Signed-off-by: Jens Axboe --- block/blk-mq.c | 15 +++++++-------- block/blk-timeout.c | 1 + include/linux/blkdev.h | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 90f6910a83f6..d1000c6cbec6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -634,8 +634,7 @@ void blk_mq_complete_request(struct request *rq) * hctx_lock() covers both issue and completion paths. */ hctx_lock(hctx, &srcu_idx); - if (blk_mq_rq_aborted_gstate(rq) != rq->gstate && - !blk_mark_rq_complete(rq)) + if (blk_mq_rq_aborted_gstate(rq) != rq->gstate) __blk_mq_complete_request(rq); hctx_unlock(hctx, srcu_idx); } @@ -685,8 +684,6 @@ void blk_mq_start_request(struct request *rq) preempt_enable(); set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) - clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); if (q->dma_drain_size && blk_rq_bytes(rq)) { /* @@ -837,6 +834,8 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved) if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags)) return; + req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED; + if (ops->timeout) ret = ops->timeout(req, reserved); @@ -852,7 +851,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved) */ blk_mq_rq_update_aborted_gstate(req, 0); blk_add_timer(req); - blk_clear_rq_complete(req); break; case BLK_EH_NOT_HANDLED: break; @@ -871,7 +869,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, might_sleep(); - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + if ((rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) || + !test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; /* read coherent snapshots of @rq->state_gen and @rq->deadline */ @@ -906,8 +905,8 @@ static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx, * now guaranteed to see @rq->aborted_gstate and yield. If * @rq->aborted_gstate still matches @rq->gstate, @rq is ours. */ - if (READ_ONCE(rq->gstate) == rq->aborted_gstate && - !blk_mark_rq_complete(rq)) + if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) && + READ_ONCE(rq->gstate) == rq->aborted_gstate) blk_mq_rq_timed_out(rq, reserved); } diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4f04cd1e0b74..ebe99963386c 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -214,6 +214,7 @@ void blk_add_timer(struct request *req) req->timeout = q->rq_timeout; req->deadline = jiffies + req->timeout; + req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED; /* * Only the non-mq case needs to add the request to a protected list. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae563d01b29d..007a7cf1f262 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -125,6 +125,8 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) /* The per-zone write lock is held for this request */ #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* timeout is expired */ +#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ -- cgit v1.2.3 From 5a61c36398d0626bad377a7f5b9391b21e16e91d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:52 -0800 Subject: blk-mq: remove REQ_ATOM_STARTED After the recent updates to use generation number and state based synchronization, we can easily replace REQ_ATOM_STARTED usages by adding an extra state to distinguish completed but not yet freed state. Add MQ_RQ_COMPLETE and replace REQ_ATOM_STARTED usages with blk_mq_rq_state() tests. REQ_ATOM_STARTED no longer has any users left and is removed. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 4 +--- block/blk-mq.c | 37 ++++++++----------------------------- block/blk-mq.h | 1 + block/blk.h | 1 - 4 files changed, 10 insertions(+), 33 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b56a4f35720d..8adc83786256 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -271,7 +271,6 @@ static const char *const cmd_flag_name[] = { #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name static const char *const rqf_name[] = { RQF_NAME(SORTED), - RQF_NAME(STARTED), RQF_NAME(QUEUED), RQF_NAME(SOFTBARRIER), RQF_NAME(FLUSH_SEQ), @@ -295,7 +294,6 @@ static const char *const rqf_name[] = { #define RQAF_NAME(name) [REQ_ATOM_##name] = #name static const char *const rqaf_name[] = { RQAF_NAME(COMPLETE), - RQAF_NAME(STARTED), RQAF_NAME(POLL_SLEPT), }; #undef RQAF_NAME @@ -409,7 +407,7 @@ static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved) const struct show_busy_params *params = data; if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx && - test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + blk_mq_rq_state(rq) != MQ_RQ_IDLE) __blk_mq_debugfs_rq_show(params->m, list_entry_rq(&rq->queuelist)); } diff --git a/block/blk-mq.c b/block/blk-mq.c index d1000c6cbec6..275812909d77 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -483,7 +483,6 @@ void blk_mq_free_request(struct request *rq) blk_put_rl(blk_rq_rl(rq)); blk_mq_rq_update_state(rq, MQ_RQ_IDLE); - clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); @@ -531,6 +530,7 @@ static void __blk_mq_complete_request(struct request *rq) int cpu; WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT); + blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE); if (rq->internal_tag != -1) blk_mq_sched_completed_request(rq); @@ -642,7 +642,7 @@ EXPORT_SYMBOL(blk_mq_complete_request); int blk_mq_request_started(struct request *rq) { - return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags); + return blk_mq_rq_state(rq) != MQ_RQ_IDLE; } EXPORT_SYMBOL_GPL(blk_mq_request_started); @@ -661,7 +661,6 @@ void blk_mq_start_request(struct request *rq) } WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE); - WARN_ON_ONCE(test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)); /* * Mark @rq in-flight which also advances the generation number, @@ -683,8 +682,6 @@ void blk_mq_start_request(struct request *rq) write_seqcount_end(&rq->gstate_seq); preempt_enable(); - set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); - if (q->dma_drain_size && blk_rq_bytes(rq)) { /* * Make sure space for the drain appears. We know we can do @@ -697,13 +694,9 @@ void blk_mq_start_request(struct request *rq) EXPORT_SYMBOL(blk_mq_start_request); /* - * When we reach here because queue is busy, REQ_ATOM_COMPLETE - * flag isn't set yet, so there may be race with timeout handler, - * but given rq->deadline is just set in .queue_rq() under - * this situation, the race won't be possible in reality because - * rq->timeout should be set as big enough to cover the window - * between blk_mq_start_request() called from .queue_rq() and - * clearing REQ_ATOM_STARTED here. + * When we reach here because queue is busy, it's safe to change the state + * to IDLE without checking @rq->aborted_gstate because we should still be + * holding the RCU read lock and thus protected against timeout. */ static void __blk_mq_requeue_request(struct request *rq) { @@ -715,7 +708,7 @@ static void __blk_mq_requeue_request(struct request *rq) wbt_requeue(q->rq_wb, &rq->issue_stat); blk_mq_sched_requeue_request(rq); - if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { + if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) { blk_mq_rq_update_state(rq, MQ_RQ_IDLE); if (q->dma_drain_size && blk_rq_bytes(rq)) rq->nr_phys_segments--; @@ -822,18 +815,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved) const struct blk_mq_ops *ops = req->q->mq_ops; enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; - /* - * We know that complete is set at this point. If STARTED isn't set - * anymore, then the request isn't active and the "timeout" should - * just be ignored. This can happen due to the bitflag ordering. - * Timeout first checks if STARTED is set, and if it is, assumes - * the request is active. But if we race with completion, then - * both flags will get cleared. So check here again, and ignore - * a timeout event with a request that isn't active. - */ - if (!test_bit(REQ_ATOM_STARTED, &req->atomic_flags)) - return; - req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED; if (ops->timeout) @@ -869,8 +850,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, might_sleep(); - if ((rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) || - !test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) + if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) return; /* read coherent snapshots of @rq->state_gen and @rq->deadline */ @@ -3022,8 +3002,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, hrtimer_init_sleeper(&hs, current); do { - if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) && - blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) + if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) break; set_current_state(TASK_UNINTERRUPTIBLE); hrtimer_start_expires(&hs.timer, mode); diff --git a/block/blk-mq.h b/block/blk-mq.h index 6b2d61629d48..8591a54d989b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -34,6 +34,7 @@ struct blk_mq_ctx { enum mq_rq_state { MQ_RQ_IDLE = 0, MQ_RQ_IN_FLIGHT = 1, + MQ_RQ_COMPLETE = 2, MQ_RQ_STATE_BITS = 2, MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1, diff --git a/block/blk.h b/block/blk.h index 9cb2739edb6a..a68dbe312ea3 100644 --- a/block/blk.h +++ b/block/blk.h @@ -124,7 +124,6 @@ void blk_account_io_done(struct request *req); */ enum rq_atomic_flags { REQ_ATOM_COMPLETE = 0, - REQ_ATOM_STARTED, REQ_ATOM_POLL_SLEPT, }; -- cgit v1.2.3 From 05707b64aed8f5f1674b25334fb720d651459d5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2018 08:29:53 -0800 Subject: blk-mq: rename blk_mq_hw_ctx->queue_rq_srcu to ->srcu The RCU protection has been expanded to cover both queueing and completion paths making ->queue_rq_srcu a misnomer. Rename it to ->srcu as suggested by Bart. Signed-off-by: Tejun Heo Cc: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 +++++++------- include/linux/blk-mq.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 275812909d77..0269d44d512e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -219,7 +219,7 @@ void blk_mq_quiesce_queue(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { if (hctx->flags & BLK_MQ_F_BLOCKING) - synchronize_srcu(hctx->queue_rq_srcu); + synchronize_srcu(hctx->srcu); else rcu = true; } @@ -564,7 +564,7 @@ static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) rcu_read_unlock(); else - srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx); + srcu_read_unlock(hctx->srcu, srcu_idx); } static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) @@ -572,7 +572,7 @@ static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) rcu_read_lock(); else - *srcu_idx = srcu_read_lock(hctx->queue_rq_srcu); + *srcu_idx = srcu_read_lock(hctx->srcu); } static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate) @@ -937,7 +937,7 @@ static void blk_mq_timeout_work(struct work_struct *work) if (!(hctx->flags & BLK_MQ_F_BLOCKING)) has_rcu = true; else - synchronize_srcu(hctx->queue_rq_srcu); + synchronize_srcu(hctx->srcu); hctx->nr_expired = 0; } @@ -2101,7 +2101,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, set->ops->exit_hctx(hctx, hctx_idx); if (hctx->flags & BLK_MQ_F_BLOCKING) - cleanup_srcu_struct(hctx->queue_rq_srcu); + cleanup_srcu_struct(hctx->srcu); blk_mq_remove_cpuhp(hctx); blk_free_flush_queue(hctx->fq); @@ -2174,7 +2174,7 @@ static int blk_mq_init_hctx(struct request_queue *q, goto free_fq; if (hctx->flags & BLK_MQ_F_BLOCKING) - init_srcu_struct(hctx->queue_rq_srcu); + init_srcu_struct(hctx->srcu); blk_mq_debugfs_register_hctx(q, hctx); @@ -2463,7 +2463,7 @@ static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set) { int hw_ctx_size = sizeof(struct blk_mq_hw_ctx); - BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu), + BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu), __alignof__(struct blk_mq_hw_ctx)) != sizeof(struct blk_mq_hw_ctx)); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 460798dbac1f..8efcf49796a3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -66,7 +66,7 @@ struct blk_mq_hw_ctx { #endif /* Must be the last member - see also blk_mq_hw_ctx_size(). */ - struct srcu_struct queue_rq_srcu[0]; + struct srcu_struct srcu[0]; }; struct blk_mq_tag_set { -- cgit v1.2.3 From 08b5a6e2a769f720977b245431b45134c0bdd377 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jan 2018 09:32:25 -0700 Subject: blk-mq: silence false positive warnings in hctx_unlock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some stupider versions of gcc, it complains: block/blk-mq.c: In function ‘blk_mq_complete_request’: ./include/linux/srcu.h:175:2: warning: ‘srcu_idx’ may be used uninitialized in this function [-Wmaybe-uninitialized] __srcu_read_unlock(sp, idx); ^ block/blk-mq.c:620:6: note: ‘srcu_idx’ was declared here int srcu_idx; ^ which is completely bogus, since we only use srcu_idx when hctx->flags & BLK_MQ_F_BLOCKING is set, and that's the case where hctx_lock() has initialized it. Just set it to '0' in the normal path in hctx_lock() to silence this annoying warning. Fixes: 04ced159cec8 ("blk-mq: move hctx lock/unlock into a helper") Fixes: 5197c05e16b4 ("blk-mq: protect completion path with RCU") Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0269d44d512e..8de354606690 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -569,9 +569,11 @@ static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) { - if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { + /* shut up gcc false positive */ + *srcu_idx = 0; rcu_read_lock(); - else + } else *srcu_idx = srcu_read_lock(hctx->srcu); } -- cgit v1.2.3 From edd8ca8015800b354453b891d38960f3a474b7e4 Mon Sep 17 00:00:00 2001 From: Florian Margaine Date: Wed, 13 Dec 2017 16:43:59 +0100 Subject: rbd: reacquire lock should update lock owner client id Otherwise, future operations on this RBD using exclusive-lock are going to require the lock from a non-existent client id. Cc: stable@vger.kernel.org Fixes: 14bb211d324d ("rbd: support updating the lock cookie without releasing the lock") Link: http://tracker.ceph.com/issues/19929 Signed-off-by: Florian Margaine [idryomov@gmail.com: rbd_set_owner_cid() call, __rbd_lock() helper] Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 38fc5f397fde..aacae6f7163e 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3047,13 +3047,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf) mutex_unlock(&rbd_dev->watch_mutex); } +static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie) +{ + struct rbd_client_id cid = rbd_get_cid(rbd_dev); + + strcpy(rbd_dev->lock_cookie, cookie); + rbd_set_owner_cid(rbd_dev, &cid); + queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); +} + /* * lock_rwsem must be held for write */ static int rbd_lock(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; - struct rbd_client_id cid = rbd_get_cid(rbd_dev); char cookie[32]; int ret; @@ -3068,9 +3076,7 @@ static int rbd_lock(struct rbd_device *rbd_dev) return ret; rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; - strcpy(rbd_dev->lock_cookie, cookie); - rbd_set_owner_cid(rbd_dev, &cid); - queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); + __rbd_lock(rbd_dev, cookie); return 0; } @@ -3856,7 +3862,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); } else { - strcpy(rbd_dev->lock_cookie, cookie); + __rbd_lock(rbd_dev, cookie); } } -- cgit v1.2.3 From 21acdf45f4958135940f0b4767185cf911d4b010 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 21 Dec 2017 15:35:11 +0100 Subject: rbd: set max_segments to USHRT_MAX Commit d3834fefcfe5 ("rbd: bump queue_max_segments") bumped max_segments (unsigned short) to max_hw_sectors (unsigned int). max_hw_sectors is set to the number of 512-byte sectors in an object and overflows unsigned short for 32M (largest possible) objects, making the block layer resort to handing us single segment (i.e. single page or even smaller) bios in that case. Cc: stable@vger.kernel.org Fixes: d3834fefcfe5 ("rbd: bump queue_max_segments") Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index aacae6f7163e..cc93522a6d41 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4387,7 +4387,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); -- cgit v1.2.3 From 7fb59e940f6225beed0b24cd09e9fad9aebb7565 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 5 Jan 2018 12:39:57 -0800 Subject: ASoC: codecs: dmic: Make number of channels configurable The DMIC DAI driver specifies a number of 1 to 8 channels for each DAI. The actual number of mics can currently not be configured in the device tree or audio glue, but is derived from the min/max channels of the CPU and codec DAI. A typical CPU DAI has two or more channels, in consequence a single mic is treated as a stereo/multi channel device, even though only one channel carries audio data. This change adds the option to specify the number of used DMIC channels in the device tree. When specified this value overwrites the default channels_max value of 8 in the snd_soc_dai_driver struct of the codec. Signed-off-by: Matthias Kaehlcke Reviewed-by: Rob Herring Acked-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/dmic.txt | 2 ++ sound/soc/codecs/dmic.c | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/dmic.txt b/Documentation/devicetree/bindings/sound/dmic.txt index 54c8ef6498a8..f7bf65611453 100644 --- a/Documentation/devicetree/bindings/sound/dmic.txt +++ b/Documentation/devicetree/bindings/sound/dmic.txt @@ -7,10 +7,12 @@ Required properties: Optional properties: - dmicen-gpios: GPIO specifier for dmic to control start and stop + - num-channels: Number of microphones on this DAI Example node: dmic_codec: dmic@0 { compatible = "dmic-codec"; dmicen-gpios = <&gpio4 3 GPIO_ACTIVE_HIGH>; + num-channels = <1>; }; diff --git a/sound/soc/codecs/dmic.c b/sound/soc/codecs/dmic.c index b88a1ee66f80..c88f974ebe3e 100644 --- a/sound/soc/codecs/dmic.c +++ b/sound/soc/codecs/dmic.c @@ -107,8 +107,30 @@ static const struct snd_soc_codec_driver soc_dmic = { static int dmic_dev_probe(struct platform_device *pdev) { + int err; + u32 chans; + struct snd_soc_dai_driver *dai_drv = &dmic_dai; + + if (pdev->dev.of_node) { + err = of_property_read_u32(pdev->dev.of_node, "num-channels", &chans); + if (err && (err != -ENOENT)) + return err; + + if (!err) { + if (chans < 1 || chans > 8) + return -EINVAL; + + dai_drv = devm_kzalloc(&pdev->dev, sizeof(*dai_drv), GFP_KERNEL); + if (!dai_drv) + return -ENOMEM; + + memcpy(dai_drv, &dmic_dai, sizeof(*dai_drv)); + dai_drv->capture.channels_max = chans; + } + } + return snd_soc_register_codec(&pdev->dev, - &soc_dmic, &dmic_dai, 1); + &soc_dmic, dai_drv, 1); } static int dmic_dev_remove(struct platform_device *pdev) -- cgit v1.2.3 From 3dc2fa47549aca71773afdd12a78d31802bb22b4 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 8 Jan 2018 19:43:00 +0800 Subject: net: caif: use strlcpy() instead of strncpy() gcc-8 reports net/caif/caif_dev.c: In function 'caif_enroll_dev': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' output may be truncated copying 15 bytes from a string of length 15 [-Wstringop-truncation] net/caif/cfctrl.c: In function 'cfctrl_linkup_request': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' output may be truncated copying 15 bytes from a string of length 15 [-Wstringop-truncation] net/caif/cfcnfg.c: In function 'caif_connect_client': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' output may be truncated copying 15 bytes from a string of length 15 [-Wstringop-truncation] The compiler require that the input param 'len' of strncpy() should be greater than the length of the src string, so that '\0' is copied as well. We can just use strlcpy() to avoid this warning. Signed-off-by: Xiongfeng Wang Signed-off-by: David S. Miller --- net/caif/caif_dev.c | 5 ++--- net/caif/cfcnfg.c | 10 ++++------ net/caif/cfctrl.c | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 2d38b6e34203..e0adcd123f48 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, mutex_lock(&caifdevs->lock); list_add_rcu(&caifd->list, &caifdevs->list); - strncpy(caifd->layer.name, dev->name, - sizeof(caifd->layer.name) - 1); - caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0; + strlcpy(caifd->layer.name, dev->name, + sizeof(caifd->layer.name)); caifd->layer.transmit = transmit; cfcnfg_add_phy_layer(cfg, dev, diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index 273cb07f57d8..8f00bea093b9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, case CAIFPROTO_RFM: l->linktype = CFCTRL_SRV_RFM; l->u.datagram.connid = s->sockaddr.u.rfm.connection_id; - strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, - sizeof(l->u.rfm.volume)-1); - l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0; + strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume, + sizeof(l->u.rfm.volume)); break; case CAIFPROTO_UTIL: l->linktype = CFCTRL_SRV_UTIL; l->endpoint = 0x00; l->chtype = 0x00; - strncpy(l->u.utility.name, s->sockaddr.u.util.service, - sizeof(l->u.utility.name)-1); - l->u.utility.name[sizeof(l->u.utility.name)-1] = 0; + strlcpy(l->u.utility.name, s->sockaddr.u.util.service, + sizeof(l->u.utility.name)); caif_assert(sizeof(l->u.utility.name) > 10); l->u.utility.paramlen = s->param.size; if (l->u.utility.paramlen > sizeof(l->u.utility.params)) diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index f5afda1abc76..655ed7032150 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer, tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs); cfpkt_add_body(pkt, &tmp16, 2); memset(utility_name, 0, sizeof(utility_name)); - strncpy(utility_name, param->u.utility.name, - UTILITY_NAME_LENGTH - 1); + strlcpy(utility_name, param->u.utility.name, + UTILITY_NAME_LENGTH); cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH); tmp8 = param->u.utility.paramlen; cfpkt_add_body(pkt, &tmp8, 1); -- cgit v1.2.3 From 20b50d79974ea3192e8c3ab7faf4e536e5f14d8f Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 8 Jan 2018 15:54:44 +0100 Subject: net: ipv4: emulate READ_ONCE() on ->hdrincl bit-field in raw_sendmsg() Commit 8f659a03a0ba ("net: ipv4: fix for a race condition in raw_sendmsg") fixed the issue of possibly inconsistent ->hdrincl handling due to concurrent updates by reading this bit-field member into a local variable and using the thus stabilized value in subsequent tests. However, aforementioned commit also adds the (correct) comment that /* hdrincl should be READ_ONCE(inet->hdrincl) * but READ_ONCE() doesn't work with bit fields */ because as it stands, the compiler is free to shortcut or even eliminate the local variable at its will. Note that I have not seen anything like this happening in reality and thus, the concern is a theoretical one. However, in order to be on the safe side, emulate a READ_ONCE() on the bit-field by doing it on the local 'hdrincl' variable itself: int hdrincl = inet->hdrincl; hdrincl = READ_ONCE(hdrincl); This breaks the chain in the sense that the compiler is not allowed to replace subsequent reads from hdrincl with reloads from inet->hdrincl. Fixes: 8f659a03a0ba ("net: ipv4: fix for a race condition in raw_sendmsg") Signed-off-by: Nicolai Stange Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/raw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 125c1eab3eaa..5e570aa9e43b 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; /* hdrincl should be READ_ONCE(inet->hdrincl) - * but READ_ONCE() doesn't work with bit fields + * but READ_ONCE() doesn't work with bit fields. + * Doing this indirectly yields the same result. */ hdrincl = inet->hdrincl; + hdrincl = READ_ONCE(hdrincl); /* * Check the flags. */ -- cgit v1.2.3 From 599522ea10c55c9f6823e3041ce4ddc666965468 Mon Sep 17 00:00:00 2001 From: Steven Eckhoff Date: Mon, 8 Jan 2018 09:47:54 -0700 Subject: ASoC: TSCS42xx: Fix control names The tscs42xx CODEC driver can confuse userspace with non-standard control names. Remove "Switch" from enum control type names. Add "Switch" to on/off control type names. Signed-off-by: Steven Eckhoff Signed-off-by: Mark Brown --- sound/soc/codecs/tscs42xx.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sound/soc/codecs/tscs42xx.c b/sound/soc/codecs/tscs42xx.c index eedd600875e5..4a5b32a717f2 100644 --- a/sound/soc/codecs/tscs42xx.c +++ b/sound/soc/codecs/tscs42xx.c @@ -631,7 +631,7 @@ static const struct snd_kcontrol_new tscs42xx_snd_controls[] = { 0, mic_boost_scale), /* Input Channel Map */ - SOC_ENUM("Input Channel Map Switch", ch_map_select_enum), + SOC_ENUM("Input Channel Map", ch_map_select_enum), /* Coefficient Ram */ COEFF_RAM_CTL("Cascade1L BiQuad1", BIQUAD_SIZE, 0x00), @@ -708,13 +708,13 @@ static const struct snd_kcontrol_new tscs42xx_snd_controls[] = { /* EQ */ SOC_SINGLE("EQ1 Switch", R_CONFIG1, FB_CONFIG1_EQ1_EN, 1, 0), SOC_SINGLE("EQ2 Switch", R_CONFIG1, FB_CONFIG1_EQ2_EN, 1, 0), - SOC_ENUM("EQ1 Band Enable Switch", eq1_band_enable_enum), - SOC_ENUM("EQ2 Band Enable Switch", eq2_band_enable_enum), + SOC_ENUM("EQ1 Band Enable", eq1_band_enable_enum), + SOC_ENUM("EQ2 Band Enable", eq2_band_enable_enum), /* CLE */ - SOC_ENUM("CLE Level Detect Switch", + SOC_ENUM("CLE Level Detect", cle_level_detection_enum), - SOC_ENUM("CLE Level Detect Win Switch", + SOC_ENUM("CLE Level Detect Win", cle_level_detection_window_enum), SOC_SINGLE("Expander Switch", R_CLECTL, FB_CLECTL_EXP_EN, 1, 0), @@ -726,7 +726,7 @@ static const struct snd_kcontrol_new tscs42xx_snd_controls[] = { R_MUGAIN, FB_MUGAIN_CLEMUG, 0x1f, 0, mugain_scale), SOC_SINGLE_TLV("Comp Thresh Playback Volume", R_COMPTH, FB_COMPTH, 0xff, 0, compth_scale), - SOC_ENUM("Comp Ratio Switch", compressor_ratio_enum), + SOC_ENUM("Comp Ratio", compressor_ratio_enum), SND_SOC_BYTES("Comp Atk Time", R_CATKTCL, 2), /* Effects */ @@ -740,50 +740,50 @@ static const struct snd_kcontrol_new tscs42xx_snd_controls[] = { SOC_SINGLE("MBC Band1 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN1, 1, 0), SOC_SINGLE("MBC Band2 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN2, 1, 0), SOC_SINGLE("MBC Band3 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN3, 1, 0), - SOC_ENUM("MBC Band1 Level Detect Switch", + SOC_ENUM("MBC Band1 Level Detect", mbc_level_detection_enums[0]), - SOC_ENUM("MBC Band2 Level Detect Switch", + SOC_ENUM("MBC Band2 Level Detect", mbc_level_detection_enums[1]), - SOC_ENUM("MBC Band3 Level Detect Switch", + SOC_ENUM("MBC Band3 Level Detect", mbc_level_detection_enums[2]), - SOC_ENUM("MBC Band1 Level Detect Win Switch", + SOC_ENUM("MBC Band1 Level Detect Win", mbc_level_detection_window_enums[0]), - SOC_ENUM("MBC Band2 Level Detect Win Switch", + SOC_ENUM("MBC Band2 Level Detect Win", mbc_level_detection_window_enums[1]), - SOC_ENUM("MBC Band3 Level Detect Win Switch", + SOC_ENUM("MBC Band3 Level Detect Win", mbc_level_detection_window_enums[2]), - SOC_SINGLE("MBC1 Phase Invert", R_DACMBCMUG1, FB_DACMBCMUG1_PHASE, - 1, 0), + SOC_SINGLE("MBC1 Phase Invert Switch", + R_DACMBCMUG1, FB_DACMBCMUG1_PHASE, 1, 0), SOC_SINGLE_TLV("DAC MBC1 Make-Up Gain Playback Volume", R_DACMBCMUG1, FB_DACMBCMUG1_MUGAIN, 0x1f, 0, mugain_scale), SOC_SINGLE_TLV("DAC MBC1 Comp Thresh Playback Volume", R_DACMBCTHR1, FB_DACMBCTHR1_THRESH, 0xff, 0, compth_scale), - SOC_ENUM("DAC MBC1 Comp Ratio Switch", + SOC_ENUM("DAC MBC1 Comp Ratio", dac_mbc1_compressor_ratio_enum), SND_SOC_BYTES("DAC MBC1 Comp Atk Time", R_DACMBCATK1L, 2), SND_SOC_BYTES("DAC MBC1 Comp Rel Time Const", R_DACMBCREL1L, 2), - SOC_SINGLE("MBC2 Phase Invert", R_DACMBCMUG2, FB_DACMBCMUG2_PHASE, - 1, 0), + SOC_SINGLE("MBC2 Phase Invert Switch", + R_DACMBCMUG2, FB_DACMBCMUG2_PHASE, 1, 0), SOC_SINGLE_TLV("DAC MBC2 Make-Up Gain Playback Volume", R_DACMBCMUG2, FB_DACMBCMUG2_MUGAIN, 0x1f, 0, mugain_scale), SOC_SINGLE_TLV("DAC MBC2 Comp Thresh Playback Volume", R_DACMBCTHR2, FB_DACMBCTHR2_THRESH, 0xff, 0, compth_scale), - SOC_ENUM("DAC MBC2 Comp Ratio Switch", + SOC_ENUM("DAC MBC2 Comp Ratio", dac_mbc2_compressor_ratio_enum), SND_SOC_BYTES("DAC MBC2 Comp Atk Time", R_DACMBCATK2L, 2), SND_SOC_BYTES("DAC MBC2 Comp Rel Time Const", R_DACMBCREL2L, 2), - SOC_SINGLE("MBC3 Phase Invert", R_DACMBCMUG3, FB_DACMBCMUG3_PHASE, - 1, 0), + SOC_SINGLE("MBC3 Phase Invert Switch", + R_DACMBCMUG3, FB_DACMBCMUG3_PHASE, 1, 0), SOC_SINGLE_TLV("DAC MBC3 Make-Up Gain Playback Volume", R_DACMBCMUG3, FB_DACMBCMUG3_MUGAIN, 0x1f, 0, mugain_scale), SOC_SINGLE_TLV("DAC MBC3 Comp Thresh Playback Volume", R_DACMBCTHR3, FB_DACMBCTHR3_THRESH, 0xff, 0, compth_scale), - SOC_ENUM("DAC MBC3 Comp Ratio Switch", + SOC_ENUM("DAC MBC3 Comp Ratio", dac_mbc3_compressor_ratio_enum), SND_SOC_BYTES("DAC MBC3 Comp Atk Time", R_DACMBCATK3L, 2), SND_SOC_BYTES("DAC MBC3 Comp Rel Time Const", -- cgit v1.2.3 From 3511108a790fc8942448556b82046d6de945b80f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 8 Jan 2018 23:14:44 +0000 Subject: ASoC: TSCS42xx: make functions pll_event and dac_event static The functions pll_event and dac_event are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 'pll_event' was not declared. Should it be static? symbol 'dac_event' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Steven Eckhoff Signed-off-by: Mark Brown --- sound/soc/codecs/tscs42xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/tscs42xx.c b/sound/soc/codecs/tscs42xx.c index 4a5b32a717f2..e7661d0315e6 100644 --- a/sound/soc/codecs/tscs42xx.c +++ b/sound/soc/codecs/tscs42xx.c @@ -355,8 +355,8 @@ static int dapm_micb_event(struct snd_soc_dapm_widget *w, return 0; } -int pll_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) +static int pll_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); int ret; @@ -369,8 +369,8 @@ int pll_event(struct snd_soc_dapm_widget *w, return ret; } -int dac_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) +static int dac_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); struct tscs42xx *tscs42xx = snd_soc_codec_get_drvdata(codec); -- cgit v1.2.3 From 2fdd18118dad86bf5e7880d8d02ea27be23e3671 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 8 Jan 2018 08:50:17 +0200 Subject: docs-rst: networking: wire up msg_zerocopy Fix the following 'make htmldocs' complaint: Documentation/networking/msg_zerocopy.rst:: WARNING: document isn't included in any toctree. Signed-off-by: Mike Rapoport Signed-off-by: David S. Miller --- Documentation/networking/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 66e620866245..7d4b15977d61 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -9,6 +9,7 @@ Contents: batman-adv kapi z8530book + msg_zerocopy .. only:: subproject @@ -16,4 +17,3 @@ Contents: ======= * :ref:`genindex` - -- cgit v1.2.3 From 195e2addbce09e5afbc766efc1e6567c9ce840d3 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 6 Jan 2018 21:53:26 +0300 Subject: SolutionEngine771x: fix Ether platform data The 'sh_eth' driver's probe() method would fail on the SolutionEngine7710 board and crash on SolutionEngine7712 board as the platform code is hopelessly behind the driver's platform data -- it passes the PHY address instead of 'struct sh_eth_plat_data *'; pass the latter to the driver in order to fix the bug... Fixes: 71557a37adb5 ("[netdrvr] sh_eth: Add SH7619 support") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- arch/sh/boards/mach-se/770x/setup.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c index 77c35350ee77..b7fa7a87e946 100644 --- a/arch/sh/boards/mach-se/770x/setup.c +++ b/arch/sh/boards/mach-se/770x/setup.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include @@ -115,6 +116,11 @@ static struct platform_device heartbeat_device = { #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\ defined(CONFIG_CPU_SUBTYPE_SH7712) /* SH771X Ethernet driver */ +static struct sh_eth_plat_data sh_eth_plat = { + .phy = PHY_ID, + .phy_interface = PHY_INTERFACE_MODE_MII, +}; + static struct resource sh_eth0_resources[] = { [0] = { .start = SH_ETH0_BASE, @@ -132,7 +138,7 @@ static struct platform_device sh_eth0_device = { .name = "sh771x-ether", .id = 0, .dev = { - .platform_data = PHY_ID, + .platform_data = &sh_eth_plat, }, .num_resources = ARRAY_SIZE(sh_eth0_resources), .resource = sh_eth0_resources, @@ -155,7 +161,7 @@ static struct platform_device sh_eth1_device = { .name = "sh771x-ether", .id = 1, .dev = { - .platform_data = PHY_ID, + .platform_data = &sh_eth_plat, }, .num_resources = ARRAY_SIZE(sh_eth1_resources), .resource = sh_eth1_resources, -- cgit v1.2.3 From f9a531d6731d74f1e24298d9641c2dc1fef2631b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 6 Jan 2018 21:53:27 +0300 Subject: SolutionEngine771x: add Ether TSU resource After the Ether platform data is fixed, the driver probe() method would still fail since the 'struct sh_eth_cpu_data' corresponding to SH771x indicates the presence of TSU but the memory resource for it is absent. Add the missing TSU resource to both Ether devices and fix the harmless off-by-one error in the main memory resources, while at it... Fixes: 4986b996882d ("net: sh_eth: remove the SH_TSU_ADDR") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- arch/sh/boards/mach-se/770x/setup.c | 14 ++++++++++++-- arch/sh/include/mach-se/mach/se.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c index b7fa7a87e946..412326d59e6f 100644 --- a/arch/sh/boards/mach-se/770x/setup.c +++ b/arch/sh/boards/mach-se/770x/setup.c @@ -124,10 +124,15 @@ static struct sh_eth_plat_data sh_eth_plat = { static struct resource sh_eth0_resources[] = { [0] = { .start = SH_ETH0_BASE, - .end = SH_ETH0_BASE + 0x1B8, + .end = SH_ETH0_BASE + 0x1B8 - 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = SH_TSU_BASE, + .end = SH_TSU_BASE + 0x200 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = SH_ETH0_IRQ, .end = SH_ETH0_IRQ, .flags = IORESOURCE_IRQ, @@ -147,10 +152,15 @@ static struct platform_device sh_eth0_device = { static struct resource sh_eth1_resources[] = { [0] = { .start = SH_ETH1_BASE, - .end = SH_ETH1_BASE + 0x1B8, + .end = SH_ETH1_BASE + 0x1B8 - 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = SH_TSU_BASE, + .end = SH_TSU_BASE + 0x200 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = SH_ETH1_IRQ, .end = SH_ETH1_IRQ, .flags = IORESOURCE_IRQ, diff --git a/arch/sh/include/mach-se/mach/se.h b/arch/sh/include/mach-se/mach/se.h index 4246ef9b07a3..aa83fe1ff0b1 100644 --- a/arch/sh/include/mach-se/mach/se.h +++ b/arch/sh/include/mach-se/mach/se.h @@ -100,6 +100,7 @@ /* Base address */ #define SH_ETH0_BASE 0xA7000000 #define SH_ETH1_BASE 0xA7000400 +#define SH_TSU_BASE 0xA7000800 /* PHY ID */ #if defined(CONFIG_CPU_SUBTYPE_SH7710) # define PHY_ID 0x00 -- cgit v1.2.3 From 4512c43eac7e007d982e7ea45152ea6f3f4d1921 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 8 Jan 2018 10:34:00 -0800 Subject: ipv6: remove null_entry before adding default route In the current code, when creating a new fib6 table, tb6_root.leaf gets initialized to net->ipv6.ip6_null_entry. If a default route is being added with rt->rt6i_metric = 0xffffffff, fib6_add() will add this route after net->ipv6.ip6_null_entry. As null_entry is shared, it could cause problem. In order to fix it, set fn->leaf to NULL before calling fib6_add_rt2node() when trying to add the first default route. And reset fn->leaf to null_entry when adding fails or when deleting the last default route. syzkaller reported the following issue which is fixed by this commit: WARNING: suspicious RCU usage 4.15.0-rc5+ #171 Not tainted ----------------------------- net/ipv6/ip6_fib.c:1702 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by swapper/0/0: #0: ((&net->ipv6.ip6_fib_timer)){+.-.}, at: [<00000000d43f631b>] lockdep_copy_map include/linux/lockdep.h:178 [inline] #0: ((&net->ipv6.ip6_fib_timer)){+.-.}, at: [<00000000d43f631b>] call_timer_fn+0x1c6/0x820 kernel/time/timer.c:1310 #1: (&(&net->ipv6.fib6_gc_lock)->rlock){+.-.}, at: [<000000002ff9d65c>] spin_lock_bh include/linux/spinlock.h:315 [inline] #1: (&(&net->ipv6.fib6_gc_lock)->rlock){+.-.}, at: [<000000002ff9d65c>] fib6_run_gc+0x9d/0x3c0 net/ipv6/ip6_fib.c:2007 #2: (rcu_read_lock){....}, at: [<0000000091db762d>] __fib6_clean_all+0x0/0x3a0 net/ipv6/ip6_fib.c:1560 #3: (&(&tb->tb6_lock)->rlock){+.-.}, at: [<000000009e503581>] spin_lock_bh include/linux/spinlock.h:315 [inline] #3: (&(&tb->tb6_lock)->rlock){+.-.}, at: [<000000009e503581>] __fib6_clean_all+0x1d0/0x3a0 net/ipv6/ip6_fib.c:1948 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc5+ #171 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4585 fib6_del+0xcaa/0x11b0 net/ipv6/ip6_fib.c:1701 fib6_clean_node+0x3aa/0x4f0 net/ipv6/ip6_fib.c:1892 fib6_walk_continue+0x46c/0x8a0 net/ipv6/ip6_fib.c:1815 fib6_walk+0x91/0xf0 net/ipv6/ip6_fib.c:1863 fib6_clean_tree+0x1e6/0x340 net/ipv6/ip6_fib.c:1933 __fib6_clean_all+0x1f4/0x3a0 net/ipv6/ip6_fib.c:1949 fib6_clean_all net/ipv6/ip6_fib.c:1960 [inline] fib6_run_gc+0x16b/0x3c0 net/ipv6/ip6_fib.c:2016 fib6_gc_timer_cb+0x20/0x30 net/ipv6/ip6_fib.c:2033 call_timer_fn+0x228/0x820 kernel/time/timer.c:1320 expire_timers kernel/time/timer.c:1357 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1660 run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:540 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:904 Reported-by: syzbot Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Signed-off-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d11a5578e4f8..9dcc3924a975 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net, if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); rt6_release(leaf); + /* remove null_entry in the root node */ + } else if (fn->fn_flags & RTN_TL_ROOT && + rcu_access_pointer(fn->leaf) == + net->ipv6.ip6_null_entry) { + RCU_INIT_POINTER(fn->leaf, NULL); } return fn; @@ -1270,13 +1275,17 @@ out: return err; failure: - /* fn->leaf could be NULL if fn is an intermediate node and we - * failed to add the new route to it in both subtree creation - * failure and fib6_add_rt2node() failure case. - * In both cases, fib6_repair_tree() should be called to fix - * fn->leaf. + /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if: + * 1. fn is an intermediate node and we failed to add the new + * route to it in both subtree creation failure and fib6_add_rt2node() + * failure case. + * 2. fn is the root node in the table and we fail to add the first + * default route to it. */ - if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) + if (fn && + (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) || + (fn->fn_flags & RTN_TL_ROOT && + !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); /* Always release dst as dst->__refcnt is guaranteed * to be taken before entering this function @@ -1531,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_walker *w; int iter = 0; + /* Set fn->leaf to null_entry for root node. */ + if (fn->fn_flags & RTN_TL_ROOT) { + rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + return fn; + } + for (;;) { struct fib6_node *fn_r = rcu_dereference_protected(fn->right, lockdep_is_held(&table->tb6_lock)); @@ -1685,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, } read_unlock(&net->ipv6.fib6_walker_lock); - /* If it was last route, expunge its radix tree node */ + /* If it was last route, call fib6_repair_tree() to: + * 1. For root node, put back null_entry as how the table was created. + * 2. For other nodes, expunge its radix tree node. + */ if (!rcu_access_pointer(fn->leaf)) { - fn->fn_flags &= ~RTN_RTINFO; - net->ipv6.rt6_stats->fib_route_nodes--; + if (!(fn->fn_flags & RTN_TL_ROOT)) { + fn->fn_flags &= ~RTN_RTINFO; + net->ipv6.rt6_stats->fib_route_nodes--; + } fn = fib6_repair_tree(net, table, fn); } -- cgit v1.2.3 From be95a845cc4402272994ce290e3ad928aff06cb9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Jan 2018 13:17:44 +0100 Subject: bpf: avoid false sharing of map refcount with max_entries In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds speculation") also change the layout of struct bpf_map such that false sharing of fast-path members like max_entries is avoided when the maps reference counter is altered. Therefore enforce them to be placed into separate cachelines. pahole dump after change: struct bpf_map { const struct bpf_map_ops * ops; /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ u32 pages; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ bool unpriv_array; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct user_struct * user; /* 64 8 */ atomic_t refcnt; /* 72 4 */ atomic_t usercnt; /* 76 4 */ struct work_struct work; /* 80 32 */ char name[16]; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ /* size: 128, cachelines: 2, members: 17 */ /* sum members: 121, holes: 1, sum holes: 7 */ }; Now all entries in the first cacheline are read only throughout the life time of the map, set up once during map creation. Overall struct size and number of cachelines doesn't change from the reordering. struct bpf_map is usually first member and embedded in map structs in specific map implementations, so also avoid those members to sit at the end where it could potentially share the cacheline with first map values e.g. in the array since remote CPUs could trigger map updates just as well for those (easily dirtying members like max_entries intentionally as well) while having subsequent values in cache. Quoting from Google's Project Zero blog [1]: Additionally, at least on the Intel machine on which this was tested, bouncing modified cache lines between cores is slow, apparently because the MESI protocol is used for cache coherence [8]. Changing the reference counter of an eBPF array on one physical CPU core causes the cache line containing the reference counter to be bounced over to that CPU core, making reads of the reference counter on all other CPU cores slow until the changed reference counter has been written back to memory. Because the length and the reference counter of an eBPF array are stored in the same cache line, this also means that changing the reference counter on one physical CPU core causes reads of the eBPF array's length to be slow on other physical CPU cores (intentional false sharing). While this doesn't 'control' the out-of-bounds speculation through masking the index as in commit b2157399cc98, triggering a manipulation of the map's reference counter is really trivial, so lets not allow to easily affect max_entries from it. Splitting to separate cachelines also generally makes sense from a performance perspective anyway in that fast-path won't have a cache miss if the map gets pinned, reused in other progs, etc out of control path, thus also avoids unintentional false sharing. [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b985ca4ffbe..fe2cb7c398e3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -43,7 +43,14 @@ struct bpf_map_ops { }; struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; + struct bpf_map *inner_map_meta; +#ifdef CONFIG_SECURITY + void *security; +#endif enum bpf_map_type map_type; u32 key_size; u32 value_size; @@ -53,15 +60,16 @@ struct bpf_map { u32 id; int numa_node; bool unpriv_array; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; - struct bpf_map *inner_map_meta; + struct work_struct work; char name[BPF_OBJ_NAME_LEN]; -#ifdef CONFIG_SECURITY - void *security; -#endif }; /* function argument constraints */ -- cgit v1.2.3 From ee3e4de525aad5d9b2ef1fdd28341587a97d740e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 9 Jan 2018 10:09:15 -0800 Subject: blk-mq: Fix spelling in a source code comment Change "nedeing" into "needing" and "caes" into "cases". Fixes: commit f906a6a0f426 ("blk-mq: improve tag waiting setup for non-shared tags") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8de354606690..9aa24c9508f9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1089,8 +1089,8 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, /* * Mark us waiting for a tag. For shared tags, this involves hooking us into - * the tag wakeups. For non-shared tags, we can simply mark us nedeing a - * restart. For both caes, take care to check the condition again after + * the tag wakeups. For non-shared tags, we can simply mark us needing a + * restart. For both cases, take care to check the condition again after * marking us as waiting. */ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, -- cgit v1.2.3 From aa98192dead2027a8f20a609472cdd7caf15dae4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 9 Jan 2018 10:11:00 -0800 Subject: block: Fix kernel-doc warnings reported when building with W=1 Commit 3a025e1d1c2e ("Add optional check for bad kernel-doc comments") causes W=1 the kernel-doc script to be run and thereby causes several new warnings to appear when building the kernel with W=1. Fix the block layer kernel-doc headers such that the block layer again builds cleanly with W=1. Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/bsg-lib.c | 3 ++- block/scsi_ioctl.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 15d25ccd51a5..1474153f73e3 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -30,7 +30,7 @@ /** * bsg_teardown_job - routine to teardown a bsg job - * @job: bsg_job that is to be torn down + * @kref: kref inside bsg_job that is to be torn down */ static void bsg_teardown_job(struct kref *kref) { @@ -251,6 +251,7 @@ static void bsg_exit_rq(struct request_queue *q, struct request *req) * @name: device to give bsg device * @job_fn: bsg job handler * @dd_job_size: size of LLD data needed for each job + * @release: @dev release function */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, bsg_job_fn *job_fn, int dd_job_size, diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index edcfff974527..5cddff44a2f8 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -384,9 +384,10 @@ out_put_request: /** * sg_scsi_ioctl -- handle deprecated SCSI_IOCTL_SEND_COMMAND ioctl - * @file: file this ioctl operates on (optional) * @q: request queue to send scsi commands down * @disk: gendisk to operate on (option) + * @mode: mode used to open the file through which the ioctl has been + * submitted * @sic: userspace structure describing the command to perform * * Send down the scsi command described by @sic to the device below @@ -415,10 +416,10 @@ out_put_request: * Positive numbers returned are the compacted SCSI error codes (4 * bytes in one int) where the lowest byte is the SCSI status. */ -#define OMAX_SB_LEN 16 /* For backward compatibility */ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, struct scsi_ioctl_command __user *sic) { + enum { OMAX_SB_LEN = 16 }; /* For backward compatibility */ struct request *rq; struct scsi_request *req; int err; -- cgit v1.2.3 From 68fa24f9121c04ef146b5158f538c8b32f285be5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2018 21:54:00 +0100 Subject: EDAC, mv64x60: Fix an error handling path We should not call edac_mc_del_mc() if a corresponding call to edac_mc_add_mc() has not been performed yet. So here, we should go to err instead of err2 to branch at the right place of the error handling path. Signed-off-by: Christophe JAILLET Cc: linux-edac Link: http://lkml.kernel.org/r/20180107205400.14068-1-christophe.jaillet@wanadoo.fr Signed-off-by: Borislav Petkov --- drivers/edac/mv64x60_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index ec5d695bbb72..3c68bb525d5d 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -758,7 +758,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) /* Non-ECC RAM? */ printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; - goto err2; + goto err; } edac_dbg(3, "init mci\n"); -- cgit v1.2.3 From 3609c471a1b86bffc812d8a2f0299892aa11a5e6 Mon Sep 17 00:00:00 2001 From: Michael Lyle Date: Tue, 9 Jan 2018 11:13:23 -0800 Subject: bcache: closures: move control bits one bit right Otherwise, architectures that do negated adds of atomics (e.g. s390) to do atomic_sub fail in closure_set_stopped. Signed-off-by: Michael Lyle Cc: Kent Overstreet Reported-by: kbuild test robot Signed-off-by: Jens Axboe --- drivers/md/bcache/closure.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 392a87cf1b92..3b9dfc9962ad 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -127,10 +127,10 @@ enum closure_state { * annotate where references are being transferred. */ - CLOSURE_BITS_START = (1U << 27), - CLOSURE_DESTRUCTOR = (1U << 27), - CLOSURE_WAITING = (1U << 29), - CLOSURE_RUNNING = (1U << 31), + CLOSURE_BITS_START = (1U << 26), + CLOSURE_DESTRUCTOR = (1U << 26), + CLOSURE_WAITING = (1U << 28), + CLOSURE_RUNNING = (1U << 30), }; #define CLOSURE_GUARD_MASK \ -- cgit v1.2.3 From 8abef10b3de1144cfe968f454946f13eb1ac3d0a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jan 2018 12:20:51 -0700 Subject: bfq-iosched: don't call bfqg_and_blkg_put for !CONFIG_BFQ_GROUP_IOSCHED It's not available if we don't have group io scheduling set, and there's no need to call it. Fixes: 0d52af590552 ("block, bfq: release oom-queue ref to root group on exit") Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7c0b7f60811c..5e6f837f663e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -4893,10 +4893,10 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); +#ifdef CONFIG_BFQ_GROUP_IOSCHED /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); -#ifdef CONFIG_BFQ_GROUP_IOSCHED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); -- cgit v1.2.3 From 290af86629b25ffd1ed6232c4e9107da031705cb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 9 Jan 2018 10:04:29 -0800 Subject: bpf: introduce BPF_JIT_ALWAYS_ON config The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715. A quote from goolge project zero blog: "At this point, it would normally be necessary to locate gadgets in the host kernel code that can be used to actually leak data by reading from an attacker-controlled location, shifting and masking the result appropriately and then using the result of that as offset to an attacker-controlled address for a load. But piecing gadgets together and figuring out which ones work in a speculation context seems annoying. So instead, we decided to use the eBPF interpreter, which is built into the host kernel - while there is no legitimate way to invoke it from inside a VM, the presence of the code in the host kernel's text section is sufficient to make it usable for the attack, just like with ordinary ROP gadgets." To make attacker job harder introduce BPF_JIT_ALWAYS_ON config option that removes interpreter from the kernel in favor of JIT-only mode. So far eBPF JIT is supported by: x64, arm64, arm32, sparc64, s390, powerpc64, mips64 The start of JITed program is randomized and code page is marked as read-only. In addition "constant blinding" can be turned on with net.core.bpf_jit_harden v2->v3: - move __bpf_prog_ret0 under ifdef (Daniel) v1->v2: - fix init order, test_bpf and cBPF (Daniel's feedback) - fix offloaded bpf (Jakub's feedback) - add 'return 0' dummy in case something can invoke prog->bpf_func - retarget bpf tree. For bpf-next the patch would need one extra hunk. It will be sent when the trees are merged back to net-next Considered doing: int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT; but it seems better to land the patch as-is and in bpf-next remove bpf_jit_enable global variable from all JITs, consolidate in one place and remove this jit_init() function. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- init/Kconfig | 7 +++++++ kernel/bpf/core.c | 19 +++++++++++++++++++ lib/test_bpf.c | 11 +++++++---- net/core/filter.c | 6 ++---- net/core/sysctl_net_core.c | 6 ++++++ net/socket.c | 9 +++++++++ 6 files changed, 50 insertions(+), 8 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 2934249fba46..5e2a4a391ba9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1392,6 +1392,13 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid + speculative execution of BPF instructions by the interpreter + config USERFAULTFD bool "Enable userfaultfd() system call" select ANON_INODES diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 86b50aa26ee8..51ec2dda7f08 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) } EXPORT_SYMBOL_GPL(__bpf_call_base); +#ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * __bpf_prog_run - run eBPF program on a given context * @ctx: is the data we are operating on @@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; +#else +static unsigned int __bpf_prog_ret0(const void *ctx, + const struct bpf_insn *insn) +{ + return 0; +} +#endif + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { @@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { +#ifndef CONFIG_BPF_JIT_ALWAYS_ON u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; +#else + fp->bpf_func = __bpf_prog_ret0; +#endif /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) */ if (!bpf_prog_is_dev_bound(fp->aux)) { fp = bpf_int_jit_compile(fp); +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + if (!fp->jited) { + *err = -ENOTSUPP; + return fp; + } +#endif } else { *err = bpf_prog_offload_compile(fp); if (*err) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9e9748089270..f369889e521d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -6250,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err) return NULL; } } - /* We don't expect to fail. */ if (*err) { - pr_cont("FAIL to attach err=%d len=%d\n", + pr_cont("FAIL to prog_create err=%d len=%d\n", *err, fprog.len); return NULL; } @@ -6276,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err) * checks. */ fp = bpf_prog_select_runtime(fp, err); + if (*err) { + pr_cont("FAIL to select_runtime err=%d\n", *err); + return NULL; + } break; } @@ -6461,8 +6464,8 @@ static __init int test_bpf(void) pass_cnt++; continue; } - - return err; + err_cnt++; + continue; } pr_cont("jited:%u ", fp->jited); diff --git a/net/core/filter.c b/net/core/filter.c index 6a85e67fafce..d339ef170df6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) */ goto out_err_free; - /* We are guaranteed to never error here with cBPF to eBPF - * transitions, since there's no issue with type compatibility - * checks on program arrays. - */ fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err_free; kfree(old_prog); return fp; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cbc3dde4cfcc..a47ad6cd41c0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, +#ifndef CONFIG_BPF_JIT_ALWAYS_ON .proc_handler = proc_dointvec +#else + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, +#endif }, # ifdef CONFIG_HAVE_EBPF_JIT { diff --git a/net/socket.c b/net/socket.c index 05f361faec45..78acd6ce74c7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2619,6 +2619,15 @@ out_fs: core_initcall(sock_init); /* early initcall */ +static int __init jit_init(void) +{ +#ifdef CONFIG_BPF_JIT_ALWAYS_ON + bpf_jit_enable = 1; +#endif + return 0; +} +pure_initcall(jit_init); + #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { -- cgit v1.2.3 From 5448aca41cd58e1a20574b6f29a8478bbb123dc3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jan 2018 12:47:24 -0700 Subject: null_blk: wire up timeouts This is needed to ensure that we actually handle timeouts. Without it, the queue_mode=1 path will never call blk_add_timer(), and the queue_mode=2 path will continually just return EH_RESET_TIMER and we never actually complete the offending request. This was used to test the new timeout code, and the changes around killing off REQ_ATOM_COMPLETE. Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 1e1981c6c557..78267e3e4fa5 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1341,6 +1341,12 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) +{ + pr_info("null: rq %p timed out\n", rq); + return BLK_EH_HANDLED; +} + static int null_rq_prep_fn(struct request_queue *q, struct request *req) { struct nullb *nullb = q->queuedata; @@ -1371,6 +1377,12 @@ static void null_request_fn(struct request_queue *q) } } +static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) +{ + pr_info("null: rq %p timed out\n", rq); + return BLK_EH_HANDLED; +} + static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1394,6 +1406,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, static const struct blk_mq_ops null_mq_ops = { .queue_rq = null_queue_rq, .complete = null_softirq_done_fn, + .timeout = null_timeout_rq, }; static void cleanup_queue(struct nullb_queue *nq) @@ -1654,6 +1667,7 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; + nullb->tag_set->timeout = 5 * HZ; nullb->q = blk_mq_init_queue(nullb->tag_set); if (IS_ERR(nullb->q)) { rv = -ENOMEM; @@ -1679,6 +1693,8 @@ static int null_add_dev(struct nullb_device *dev) } blk_queue_prep_rq(nullb->q, null_rq_prep_fn); blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); + nullb->q->rq_timeout = 5 * HZ; rv = init_driver_queues(nullb); if (rv) goto out_cleanup_blk_queue; -- cgit v1.2.3 From 0b04ea6822936757cf7b18713e3d6ad05e97c883 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Jan 2018 10:45:03 +0000 Subject: hexagon: Make THREAD_SIZE available to vmlinux.lds Make THREAD_SIZE available to vmlinux.lds on hexagon by including asm/thread_info.h the linker script. This allows init_stack to be allocated in the linker script in a subsequent patch. Reported-by: Guenter Roeck Signed-off-by: David Howells Reviewed-by: Guenter Roeck Acked-by: Richard Kuo cc: linux-hexagon@vger.kernel.org --- arch/hexagon/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index ec87e67feb19..ad69d181c939 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -22,6 +22,8 @@ #include /* Most of the kernel defines are here */ #include /* except for page_offset */ #include /* and now we're pulling cache line size */ +#include /* and we need THREAD_SIZE too */ + OUTPUT_ARCH(hexagon) ENTRY(stext) -- cgit v1.2.3 From 138101932054268a235c6ac7cf474f6a88fcd885 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 8 Jan 2018 10:49:09 +0000 Subject: openrisc: Make THREAD_SIZE available to vmlinux.lds Make THREAD_SIZE available to vmlinux.lds on openrisc by including asm/thread_info.h the linker script. This allows init_stack to be allocated in the linker script in a subsequent patch. Reported-by: Guenter Roeck Signed-off-by: David Howells Tested-by: Guenter Roeck Acked-by: Stafford Horne cc: Jonas Bonn cc: Stefan Kristiansson cc: openrisc@lists.librecores.org --- arch/openrisc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S index 00ddb7804be4..953bdcd54efe 100644 --- a/arch/openrisc/kernel/vmlinux.lds.S +++ b/arch/openrisc/kernel/vmlinux.lds.S @@ -28,6 +28,7 @@ #include #include +#include #include #ifdef __OR1K__ -- cgit v1.2.3 From 0500871f21b237b2bea2d9db405eadf78e5aab05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Construct init thread stack in the linker script rather than by union Construct the init thread stack in the linker script rather than doing it by means of a union so that ia64's init_task.c can be got rid of. The following symbols are then made available from INIT_TASK_DATA() linker script macro: init_thread_union init_stack INIT_TASK_DATA() also expands the region to THREAD_SIZE to accommodate the size of the init stack. init_thread_union is given its own section so that it can be placed into the stack space in the right order. I'm assuming that the ia64 ordering is correct and that the task_struct is first and the thread_info second. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- arch/Kconfig | 4 +-- arch/alpha/include/asm/thread_info.h | 3 --- arch/arc/include/asm/thread_info.h | 3 --- arch/arm/include/asm/thread_info.h | 3 --- arch/arm64/include/asm/thread_info.h | 2 -- arch/blackfin/include/asm/thread_info.h | 2 -- arch/c6x/include/asm/thread_info.h | 3 --- arch/cris/include/asm/processor.h | 2 -- arch/cris/include/asm/thread_info.h | 2 -- arch/frv/include/asm/thread_info.h | 3 --- arch/h8300/include/asm/thread_info.h | 3 --- arch/hexagon/include/asm/thread_info.h | 3 --- arch/ia64/Kconfig | 2 +- arch/ia64/Makefile | 2 +- arch/ia64/include/asm/thread_info.h | 4 +-- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/init_task.c | 44 ------------------------------- arch/ia64/kernel/vmlinux.lds.S | 1 + arch/m32r/include/asm/thread_info.h | 3 --- arch/m68k/include/asm/thread_info.h | 4 --- arch/metag/include/asm/thread_info.h | 3 --- arch/microblaze/include/asm/thread_info.h | 3 --- arch/mips/include/asm/thread_info.h | 3 --- arch/mn10300/include/asm/thread_info.h | 2 -- arch/nios2/include/asm/thread_info.h | 3 --- arch/openrisc/include/asm/processor.h | 2 -- arch/openrisc/include/asm/thread_info.h | 2 -- arch/parisc/include/asm/thread_info.h | 3 --- arch/powerpc/include/asm/thread_info.h | 3 --- arch/riscv/include/asm/thread_info.h | 2 -- arch/s390/include/asm/thread_info.h | 2 -- arch/score/include/asm/thread_info.h | 3 --- arch/sh/include/asm/thread_info.h | 3 --- arch/sparc/include/asm/thread_info_32.h | 3 --- arch/sparc/include/asm/thread_info_64.h | 3 --- arch/tile/include/asm/thread_info.h | 3 --- arch/um/include/asm/processor-generic.h | 5 +++- arch/um/include/asm/thread_info.h | 9 +++---- arch/um/include/asm/vmlinux.lds.h | 2 ++ arch/um/kernel/dyn.lds.S | 3 +-- arch/um/kernel/um_arch.c | 2 +- arch/um/kernel/uml.lds.S | 2 +- arch/unicore32/include/asm/thread_info.h | 3 --- arch/x86/include/asm/thread_info.h | 2 -- arch/xtensa/include/asm/thread_info.h | 3 --- include/asm-generic/vmlinux.lds.h | 4 +++ include/linux/init_task.h | 3 +++ include/linux/sched.h | 9 +++++++ init/Makefile | 2 -- init/init_task.c | 10 ++++--- 50 files changed, 42 insertions(+), 155 deletions(-) delete mode 100644 arch/ia64/kernel/init_task.c create mode 100644 arch/um/include/asm/vmlinux.lds.h diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..a26d6f8ab967 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -234,8 +234,8 @@ config ARCH_HAS_FORTIFY_SOURCE config ARCH_HAS_SET_MEMORY bool -# Select if arch init_task initializer is different to init/init_task.c -config ARCH_INIT_TASK +# Select if arch init_task must go in the __init_task_data section +config ARCH_TASK_STRUCT_ON_STACK bool # Select if arch has its private alloc_task_struct() function diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 8c20c5e35432..807d7b9a1860 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -39,9 +39,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("$8"); #define current_thread_info() __current_thread_info diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 2d79e527fa50..c85947bac5e5 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -62,9 +62,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - static inline __attribute_const__ struct thread_info *current_thread_info(void) { register unsigned long sp asm("sp"); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 776757d1604a..e71cc35de163 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -75,9 +75,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* * how to get the current stack pointer in C */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index eb431286bacd..740aa03c5f0d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -51,8 +51,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_stack (init_thread_union.stack) - #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h index 2966b93850a1..a5aeab4e5f2d 100644 --- a/arch/blackfin/include/asm/thread_info.h +++ b/arch/blackfin/include/asm/thread_info.h @@ -56,8 +56,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) /* Given a task stack pointer, you can find its corresponding * thread_info structure just by masking it to the THREAD_SIZE diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h index acc70c135ab8..59a5697fe0f3 100644 --- a/arch/c6x/include/asm/thread_info.h +++ b/arch/c6x/include/asm/thread_info.h @@ -60,9 +60,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* get the thread information struct of current task */ static inline __attribute__((const)) struct thread_info *current_thread_info(void) diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index b50907799cb2..ee4d8b03d048 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -52,8 +52,6 @@ static inline void release_thread(struct task_struct *dead_task) /* Nothing needs to be done. */ } -#define init_stack (init_thread_union.stack) - #define cpu_relax() barrier() void default_idle(void); diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 108f77081a3c..996fef3be1d5 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -63,8 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index ccba3b6ce918..0f950845fad9 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -64,9 +64,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *__current_thread_info asm("gr15"); diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 072b92c0d8b5..0cdaa302d3d2 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -46,9 +46,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h index b80fe1db7b64..f41f9c6f0e31 100644 --- a/arch/hexagon/include/asm/thread_info.h +++ b/arch/hexagon/include/asm/thread_info.h @@ -84,9 +84,6 @@ struct thread_info { .regs = NULL, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* Tacky preprocessor trickery */ #define qqstr(s) qstr(s) #define qstr(s) #s diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 49583c5a5d44..315c51f58811 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -43,7 +43,7 @@ config IA64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD - select ARCH_INIT_TASK + select ARCH_TASK_STRUCT_ON_STACK select ARCH_TASK_STRUCT_ALLOCATOR select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index c100d780f1eb..2dd7f519ad0b 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -42,7 +42,7 @@ $(error Sorry, you need a newer version of the assember, one that is built from endif KBUILD_CFLAGS += $(cflags-y) -head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o +head-y := arch/ia64/kernel/head.o libs-y += arch/ia64/lib/ core-y += arch/ia64/kernel/ arch/ia64/mm/ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 1d172a4119a7..64a1011f6812 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -12,6 +12,8 @@ #include #include +#define THREAD_SIZE KERNEL_STACK_SIZE + #ifndef __ASSEMBLY__ /* @@ -41,8 +43,6 @@ struct thread_info { #endif }; -#define THREAD_SIZE KERNEL_STACK_SIZE - #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 14ad79f394e5..0b4c65a1af25 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -7,7 +7,7 @@ ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif -extra-y := head.o init_task.o vmlinux.lds +extra-y := head.o vmlinux.lds obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c deleted file mode 100644 index 8df9245e29d9..000000000000 --- a/arch/ia64/kernel/init_task.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This is where we statically allocate and initialize the initial - * task. - * - * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co - * David Mosberger-Tang - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial task structure. - * - * We need to make sure that this is properly aligned due to the way process stacks are - * handled. This is done by having a special ".data..init_task" section... - */ -#define init_thread_info init_task_mem.s.thread_info -#define init_stack init_task_mem.stack - -union { - struct { - struct task_struct task; - struct thread_info thread_info; - } s; - unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __init_task_data = - {{ - .task = INIT_TASK(init_task_mem.s.task), - .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) -}}; - -EXPORT_SYMBOL(init_task); diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 58db59da0bd8..b0b2070e0591 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index b3a215b0ce0a..ba00f1032587 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -56,9 +56,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 928035591f2e..015f1ca38305 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -41,8 +41,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_stack (init_thread_union.stack) - #ifndef __ASSEMBLY__ /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) @@ -58,8 +56,6 @@ static inline struct thread_info *current_thread_info(void) } #endif -#define init_thread_info (init_thread_union.thread_info) - /* entry.S relies on these definitions! * bits 0-7 are tested at every exception exit * bits 8-15 are also tested at syscall exit diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h index 554f73a77e6e..a1a9c7f5ca8c 100644 --- a/arch/metag/include/asm/thread_info.h +++ b/arch/metag/include/asm/thread_info.h @@ -74,9 +74,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("A0StP") __used; diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index e7e8954e9815..9afe4b5bd6c8 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -86,9 +86,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 5e8927f99a76..4993db40482c 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,9 +49,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("$28"); diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index f5f90bbf019d..1748a7b25bf8 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -79,8 +79,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) #define init_uregs \ ((struct pt_regs *) \ ((unsigned long) init_stack + THREAD_SIZE - sizeof(struct pt_regs))) diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h index d69c338bd19c..7349a4fa635b 100644 --- a/arch/nios2/include/asm/thread_info.h +++ b/arch/nios2/include/asm/thread_info.h @@ -63,9 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 396d8f306c21..af31a9fe736a 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,8 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -#define init_stack (init_thread_union.stack) - #define cpu_relax() barrier() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index c229aa6bb502..5c15dfa2fd4f 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -79,8 +79,6 @@ struct thread_info { .ksp = 0, \ } -#define init_thread_info (init_thread_union.thread_info) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("r10"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 598c8d60fa5e..285757544cca 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -25,9 +25,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *)mfctl(30)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index a264c3ad366b..4a12c00f8de3 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -58,9 +58,6 @@ struct thread_info { .flags = 0, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 22c3536ed281..f8fa1cd2dad9 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -64,8 +64,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_stack (init_thread_union.stack) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0880a37b6d3b..25d6ec3aaddd 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -42,8 +42,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - void arch_release_task_struct(struct task_struct *tsk); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h index ad51b56e51bd..bc4c7c90550f 100644 --- a/arch/score/include/asm/thread_info.h +++ b/arch/score/include/asm/thread_info.h @@ -58,9 +58,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* How to get the thread information struct from C. */ register struct thread_info *__current_thread_info __asm__("r28"); #define current_thread_info() __current_thread_info diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index becb798f1b04..cf5c792bf70b 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -63,9 +63,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the current stack pointer from C */ register unsigned long current_stack_pointer asm("r15") __used; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index febaaeb1a0fe..548b366165dd 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -63,9 +63,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("g6"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index caf915321ba9..f7e7b0baec9f 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -120,9 +120,6 @@ struct thread_info { .preempt_count = INIT_PREEMPT_COUNT, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ register struct thread_info *current_thread_info_reg asm("g6"); #define current_thread_info() (current_thread_info_reg) diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index b7659b8f1117..2adcacd85749 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -59,9 +59,6 @@ struct thread_info { .align_ctl = 0, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - #endif /* !__ASSEMBLY__ */ #if PAGE_SIZE < 8192 diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 86942a492454..b58b746d3f2c 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,7 +58,10 @@ static inline void release_thread(struct task_struct *task) { } -#define init_stack (init_thread_union.stack) +static inline void mm_copy_segments(struct mm_struct *from_mm, + struct mm_struct *new_mm) +{ +} /* * User space process size: 3GB (default). diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 9300f7630d2a..4eecd960ee8c 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -6,6 +6,9 @@ #ifndef __UM_THREAD_INFO_H #define __UM_THREAD_INFO_H +#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER +#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) + #ifndef __ASSEMBLY__ #include @@ -37,10 +40,6 @@ struct thread_info { .real_thread = NULL, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - -#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { @@ -53,8 +52,6 @@ static inline struct thread_info *current_thread_info(void) return ti; } -#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER - #endif #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ diff --git a/arch/um/include/asm/vmlinux.lds.h b/arch/um/include/asm/vmlinux.lds.h new file mode 100644 index 000000000000..149494ae78ea --- /dev/null +++ b/arch/um/include/asm/vmlinux.lds.h @@ -0,0 +1,2 @@ +#include +#include diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index d417e3899700..5568cf882371 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -1,5 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include OUTPUT_FORMAT(ELF_FORMAT) diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index f433690b9b37..a818ccef30ca 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -54,7 +54,7 @@ struct cpuinfo_um boot_cpu_data = { union thread_union cpu0_irqstack __attribute__((__section__(".data..init_irqstack"))) = - { INIT_THREAD_INFO(init_task) }; + { .thread_info = INIT_THREAD_INFO(init_task) }; /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 3d6ed6ba5b78..36b07ec09742 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include OUTPUT_FORMAT(ELF_FORMAT) diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h index e79ad6d5b5b2..5fb728f3b49a 100644 --- a/arch/unicore32/include/asm/thread_info.h +++ b/arch/unicore32/include/asm/thread_info.h @@ -87,9 +87,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* * how to get the thread information struct from C */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 00223333821a..d25a638a2720 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -62,8 +62,6 @@ struct thread_info { .flags = 0, \ } -#define init_stack (init_thread_union.stack) - #else /* !__ASSEMBLY__ */ #include diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 7be2400f745a..2ccd37510aaa 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -77,9 +77,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) -#define init_stack (init_thread_union.stack) - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ee8b707d9fa9..a564b83bf013 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -268,7 +268,11 @@ #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ VMLINUX_SYMBOL(__start_init_task) = .; \ + VMLINUX_SYMBOL(init_thread_union) = .; \ + VMLINUX_SYMBOL(init_stack) = .; \ *(.data..init_task) \ + *(.data..init_thread_info) \ + . = VMLINUX_SYMBOL(__start_init_task) + THREAD_SIZE; \ VMLINUX_SYMBOL(__end_init_task) = .; /* diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6a532629c983..30a89b99a5af 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -304,5 +304,8 @@ extern struct cred init_cred; /* Attach to the init_task data structure for proper alignment */ #define __init_task_data __attribute__((__section__(".data..init_task"))) +/* Attach to the thread_info data structure for proper alignment */ +#define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index d2588263a989..68a504f6e474 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1446,12 +1446,21 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK + struct task_struct task; +#endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; +#ifndef CONFIG_THREAD_INFO_IN_TASK +extern struct thread_info init_thread_info; +#endif + +extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; + #ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) { diff --git a/init/Makefile b/init/Makefile index 1dbb23787290..a3e5ce2bcf08 100644 --- a/init/Makefile +++ b/init/Makefile @@ -13,9 +13,7 @@ obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o endif obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o -ifneq ($(CONFIG_ARCH_INIT_TASK),y) obj-y += init_task.o -endif mounts-y := do_mounts.o mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o diff --git a/init/init_task.c b/init/init_task.c index 9325fee7dc82..2285aa42cbe1 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -17,15 +17,17 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); /* Initial task structure */ -struct task_struct init_task = INIT_TASK(init_task); +struct task_struct init_task +#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK + __init_task_data +#endif + = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); /* * Initial thread structure. Alignment of this is handled by a special * linker map entry. */ -union thread_union init_thread_union __init_task_data = { #ifndef CONFIG_THREAD_INFO_IN_TASK - INIT_THREAD_INFO(init_task) +struct thread_info init_thread_info __init_thread_info = INIT_THREAD_INFO(init_task); #endif -}; -- cgit v1.2.3 From 67dcf8a3e06582cb6b02952335b5612beb97889f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jan 2018 18:09:33 +0200 Subject: ACPI: utils: Introduce acpi_dev_get_first_match_name() Sometimes the user wants to have device name of the match rather than just checking if device present or not. To make life easier for such users introduce acpi_dev_get_first_match_name() helper based on code for acpi_dev_present(). For example, GPIO driver for Intel Merrifield needs to know the device name of pin control to be able to apply GPIO mapping table to the proper device. To be more consistent with the purpose rename struct acpi_dev_present_info -> struct acpi_dev_match_info acpi_dev_present_cb() -> acpi_dev_match_cb() in the utils.c file. Tested-by: Pierre-Louis Bossart Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 41 ++++++++++++++++++++++++++++++++++------- include/acpi/acpi_bus.h | 3 +++ include/linux/acpi.h | 6 ++++++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 9d49a1acebe3..78db97687f26 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -737,16 +737,17 @@ bool acpi_dev_found(const char *hid) } EXPORT_SYMBOL(acpi_dev_found); -struct acpi_dev_present_info { +struct acpi_dev_match_info { + const char *dev_name; struct acpi_device_id hid[2]; const char *uid; s64 hrv; }; -static int acpi_dev_present_cb(struct device *dev, void *data) +static int acpi_dev_match_cb(struct device *dev, void *data) { struct acpi_device *adev = to_acpi_device(dev); - struct acpi_dev_present_info *match = data; + struct acpi_dev_match_info *match = data; unsigned long long hrv; acpi_status status; @@ -757,6 +758,8 @@ static int acpi_dev_present_cb(struct device *dev, void *data) strcmp(adev->pnp.unique_id, match->uid))) return 0; + match->dev_name = acpi_dev_name(adev); + if (match->hrv == -1) return 1; @@ -789,20 +792,44 @@ static int acpi_dev_present_cb(struct device *dev, void *data) */ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) { - struct acpi_dev_present_info match = {}; + struct acpi_dev_match_info match = {}; struct device *dev; strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); match.uid = uid; match.hrv = hrv; - dev = bus_find_device(&acpi_bus_type, NULL, &match, - acpi_dev_present_cb); - + dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); return !!dev; } EXPORT_SYMBOL(acpi_dev_present); +/** + * acpi_dev_get_first_match_name - Return name of first match of ACPI device + * @hid: Hardware ID of the device. + * @uid: Unique ID of the device, pass NULL to not check _UID + * @hrv: Hardware Revision of the device, pass -1 to not check _HRV + * + * Return device name if a matching device was present + * at the moment of invocation, or NULL otherwise. + * + * See additional information in acpi_dev_present() as well. + */ +const char * +acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) +{ + struct acpi_dev_match_info match = {}; + struct device *dev; + + strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); + match.uid = uid; + match.hrv = hrv; + + dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); + return dev ? match.dev_name : NULL; +} +EXPORT_SYMBOL(acpi_dev_get_first_match_name); + /* * acpi_backlight= handling, this is done here rather then in video_detect.c * because __setup cannot be used in modules. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 79287629c888..c9608b0b80c6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -91,6 +91,9 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, bool acpi_dev_found(const char *hid); bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); +const char * +acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv); + #ifdef CONFIG_ACPI #include diff --git a/include/linux/acpi.h b/include/linux/acpi.h index dc1ebfeeb5ec..d918f1ea84e6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -640,6 +640,12 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) return false; } +static inline const char * +acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) +{ + return NULL; +} + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; -- cgit v1.2.3 From dd1dbf94d2826a045fbbe2649d84b27d48620d56 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 5 Jan 2018 18:09:34 +0200 Subject: gpio: merrifield: Add support of ACPI enabled platforms The driver needs the pin control device name for ACPI. We are looking through ACPI namespace and return first found device based on ACPI HID for Intel Merrifield FLIS (pin control device). Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/gpio/gpio-merrifield.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index dd67a31ac337..c38624ea0251 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -380,9 +381,16 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv) } } +static const char *mrfld_gpio_get_pinctrl_dev_name(void) +{ + const char *dev_name = acpi_dev_get_first_match_name("INTC1002", NULL, -1); + return dev_name ? dev_name : "pinctrl-merrifield"; +} + static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id) { const struct mrfld_gpio_pinrange *range; + const char *pinctrl_dev_name; struct mrfld_gpio *priv; u32 gpio_base, irq_base; void __iomem *base; @@ -439,10 +447,11 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id return retval; } + pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(); for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) { range = &mrfld_gpio_ranges[i]; retval = gpiochip_add_pin_range(&priv->chip, - "pinctrl-merrifield", + pinctrl_dev_name, range->gpio_base, range->pin_base, range->npins); -- cgit v1.2.3 From 8425ec7faff005500aad89b9fc00e5ba91ac57b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 01:34:53 +0100 Subject: PM / mfd: intel-lpss: Use DPM_FLAG_SMART_SUSPEND Make the intel-lpss driver set DPM_FLAG_SMART_SUSPEND for its devices which will allow them to stay in runtime suspend during system suspend unless they need to be reconfigured for some reason. Also make it avoid resuming its child devices if they have DPM_FLAG_SMART_SUSPEND set to allow them to remain in runtime suspend during system suspend. Signed-off-by: Rafael J. Wysocki Acked-for-MFD-by: Lee Jones Tested-by: Jarkko Nikula --- drivers/mfd/intel-lpss.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 0e0ab9bb1530..9e545eb6e8b4 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -450,6 +450,8 @@ int intel_lpss_probe(struct device *dev, if (ret) goto err_remove_ltr; + dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); + return 0; err_remove_ltr: @@ -478,7 +480,9 @@ EXPORT_SYMBOL_GPL(intel_lpss_remove); static int resume_lpss_device(struct device *dev, void *data) { - pm_runtime_resume(dev); + if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) + pm_runtime_resume(dev); + return 0; } -- cgit v1.2.3 From 422cb781e0d0f81789a1cc0f2171611028450f09 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 01:35:54 +0100 Subject: PM: i2c-designware-platdrv: Use DPM_FLAG_SMART_PREPARE Modify i2c-designware-platdrv to set DPM_FLAG_SMART_PREPARE for its devices and return 0 from the system suspend ->prepare callback if the device has an ACPI companion object in order to tell the PM core and middle layers to avoid skipping system suspend/resume callbacks for the device in that case (which may be problematic, because the device may be accessed during suspend and resume of other devices via I2C operation regions then). Also the pm_runtime_suspended() check in dw_i2c_plat_prepare() is not necessary any more, because the core does it when setting power.direct_complete for the device, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Tested-by: Jarkko Nikula --- drivers/i2c/busses/i2c-designware-platdrv.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 58add69a441c..4f90a6dc186f 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -372,6 +372,8 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE); + /* The code below assumes runtime PM to be disabled. */ WARN_ON(pm_runtime_enabled(&pdev->dev)); @@ -435,7 +437,13 @@ MODULE_DEVICE_TABLE(of, dw_i2c_of_match); #ifdef CONFIG_PM_SLEEP static int dw_i2c_plat_prepare(struct device *dev) { - return pm_runtime_suspended(dev); + /* + * If the ACPI companion device object is present for this device, it + * may be accessed during suspend and resume of other devices via I2C + * operation regions, so tell the PM core and middle layers to avoid + * skipping system suspend/resume callbacks for it in that case. + */ + return !has_acpi_companion(dev); } static void dw_i2c_plat_complete(struct device *dev) -- cgit v1.2.3 From 02e45646d53bdb38bfb47b83765778d3ecb4d3b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Jan 2018 01:37:34 +0100 Subject: PM: i2c-designware-platdrv: Optimize power management Optimize the power management in i2c-designware-platdrv by making it set the DPM_FLAG_SMART_SUSPEND and DPM_FLAG_LEAVE_SUSPENDED which allows some code to be dropped from its PM callbacks. First, setting DPM_FLAG_SMART_SUSPEND causes the intel-lpss driver to avoid resuming i2c-designware-platdrv devices in its ->prepare callback, so they can stay in runtime suspend after that point even if the direct-complete feature is not used for them. It also causes the ACPI PM domain and the PM core to avoid invoking "late" and "noirq" suspend callbacks for these devices if they are in runtime suspend at the beginning of the "late" phase of device suspend during system suspend. That guarantees dw_i2c_plat_suspend() to be called for a device only if it is not in runtime suspend. Moreover, it causes the device's runtime PM status to be set to "active" after calling dw_i2c_plat_resume() for it, so the driver doesn't need internal flags to avoid invoking either dw_i2c_plat_suspend() or dw_i2c_plat_resume() twice in a row. Second, setting DPM_FLAG_LEAVE_SUSPENDED enables the optimization allowing the device to stay suspended after system resume under suitable conditions, so again the driver doesn't need to take care of that by itself. Accordingly, the internal "suspended" and "skip_resume" flags used by the driver are not necessary any more, so drop them and simplify the driver's PM callbacks. Additionally, notice that dw_i2c_plat_complete() only needs to schedule runtime PM resume for the device if platform firmware has been involved in resuming the system, so make it call pm_resume_via_firmware() to check that. Also make it check the runtime PM status of the device instead of its direct_complete flag which also works if the device remained suspended due to the DPM_FLAG_LEAVE_SUSPENDED driver flag. Signed-off-by: Rafael J. Wysocki Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Tested-by: Jarkko Nikula --- drivers/i2c/busses/i2c-designware-core.h | 2 -- drivers/i2c/busses/i2c-designware-platdrv.c | 31 +++++++++++------------------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 21bf619a86c5..9fee4c054d3d 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -280,8 +280,6 @@ struct dw_i2c_dev { int (*acquire_lock)(struct dw_i2c_dev *dev); void (*release_lock)(struct dw_i2c_dev *dev); bool pm_disabled; - bool suspended; - bool skip_resume; void (*disable)(struct dw_i2c_dev *dev); void (*disable_int)(struct dw_i2c_dev *dev); int (*init)(struct dw_i2c_dev *dev); diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 4f90a6dc186f..153b947702c5 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "i2c-designware-core.h" @@ -372,7 +373,10 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) ACPI_COMPANION_SET(&adap->dev, ACPI_COMPANION(&pdev->dev)); adap->dev.of_node = pdev->dev.of_node; - dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE); + dev_pm_set_driver_flags(&pdev->dev, + DPM_FLAG_SMART_PREPARE | + DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_LEAVE_SUSPENDED); /* The code below assumes runtime PM to be disabled. */ WARN_ON(pm_runtime_enabled(&pdev->dev)); @@ -448,7 +452,13 @@ static int dw_i2c_plat_prepare(struct device *dev) static void dw_i2c_plat_complete(struct device *dev) { - if (dev->power.direct_complete) + /* + * The device can only be in runtime suspend at this point if it has not + * been resumed throughout the ending system suspend/resume cycle, so if + * the platform firmware might mess up with it, request the runtime PM + * framework to resume it. + */ + if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) pm_request_resume(dev); } #else @@ -461,16 +471,9 @@ static int dw_i2c_plat_suspend(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); - if (i_dev->suspended) { - i_dev->skip_resume = true; - return 0; - } - i_dev->disable(i_dev); i2c_dw_plat_prepare_clk(i_dev, false); - i_dev->suspended = true; - return 0; } @@ -478,19 +481,9 @@ static int dw_i2c_plat_resume(struct device *dev) { struct dw_i2c_dev *i_dev = dev_get_drvdata(dev); - if (!i_dev->suspended) - return 0; - - if (i_dev->skip_resume) { - i_dev->skip_resume = false; - return 0; - } - i2c_dw_plat_prepare_clk(i_dev, true); i_dev->init(i_dev); - i_dev->suspended = false; - return 0; } -- cgit v1.2.3 From 4bf236a3330e97d275e5848420f7e31948fef07a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 09:19:08 -0800 Subject: PM / sleep: Make lock/unlock_system_sleep() available to kernel modules Since pm_mutex is not exported using lock/unlock_system_sleep() from inside a kernel module causes a "pm_mutex undefined" linker error. Hence move lock/unlock_system_sleep() into kernel/power/main.c and export these. Signed-off-by: Bart Van Assche Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 28 ++-------------------------- kernel/power/main.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d60b0f5c38d5..cc22a24516d6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -443,32 +443,8 @@ extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); extern void pm_print_active_wakeup_sources(void); -static inline void lock_system_sleep(void) -{ - current->flags |= PF_FREEZER_SKIP; - mutex_lock(&pm_mutex); -} - -static inline void unlock_system_sleep(void) -{ - /* - * Don't use freezer_count() because we don't want the call to - * try_to_freeze() here. - * - * Reason: - * Fundamentally, we just don't need it, because freezing condition - * doesn't come into effect until we release the pm_mutex lock, - * since the freezer always works with pm_mutex held. - * - * More importantly, in the case of hibernation, - * unlock_system_sleep() gets called in snapshot_read() and - * snapshot_write() when the freezing condition is still in effect. - * Which means, if we use try_to_freeze() here, it would make them - * enter the refrigerator, thus causing hibernation to lockup. - */ - current->flags &= ~PF_FREEZER_SKIP; - mutex_unlock(&pm_mutex); -} +extern void lock_system_sleep(void); +extern void unlock_system_sleep(void); #else /* !CONFIG_PM_SLEEP */ diff --git a/kernel/power/main.c b/kernel/power/main.c index 3a2ca9066583..705c2366dafe 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -22,6 +22,35 @@ DEFINE_MUTEX(pm_mutex); #ifdef CONFIG_PM_SLEEP +void lock_system_sleep(void) +{ + current->flags |= PF_FREEZER_SKIP; + mutex_lock(&pm_mutex); +} +EXPORT_SYMBOL_GPL(lock_system_sleep); + +void unlock_system_sleep(void) +{ + /* + * Don't use freezer_count() because we don't want the call to + * try_to_freeze() here. + * + * Reason: + * Fundamentally, we just don't need it, because freezing condition + * doesn't come into effect until we release the pm_mutex lock, + * since the freezer always works with pm_mutex held. + * + * More importantly, in the case of hibernation, + * unlock_system_sleep() gets called in snapshot_read() and + * snapshot_write() when the freezing condition is still in effect. + * Which means, if we use try_to_freeze() here, it would make them + * enter the refrigerator, thus causing hibernation to lockup. + */ + current->flags &= ~PF_FREEZER_SKIP; + mutex_unlock(&pm_mutex); +} +EXPORT_SYMBOL_GPL(unlock_system_sleep); + /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); -- cgit v1.2.3 From 203f8c250e2195371d418b1e8466e4caf1a0ed51 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 5 Jan 2018 09:19:09 -0800 Subject: block, scsi: Fix race between SPI domain validation and system suspend Avoid that the following warning is reported when suspending a system that is using the mptspi driver: WARNING: CPU: 0 PID: 4187 at drivers/scsi/scsi_lib.c:2960 scsi_device_quiesce+0x20/0xb0 EIP: scsi_device_quiesce+0x20/0xb0 Call Trace: spi_dv_device+0x65/0x5f0 [scsi_transport_spi] mptspi_dv_device+0x4d/0x170 [mptspi] mptspi_dv_renegotiate_work+0x49/0xc0 [mptspi] process_one_work+0x190/0x2e0 worker_thread+0x37/0x3f0 kthread+0xcb/0x100 ret_from_fork+0x19/0x24 Fixes: 3a0a529971ec (block, scsi: Make SCSI quiesce and resume work reliably) Reported-by: Woody Suwalski Signed-off-by: Bart Van Assche Acked-by: Martin K. Petersen [ rjw : Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/scsi/scsi_transport_spi.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index 10ebb213ddb3..871ea582029e 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "scsi_priv.h" #include @@ -1009,11 +1010,20 @@ spi_dv_device(struct scsi_device *sdev) u8 *buffer; const int len = SPI_MAX_ECHO_BUFFER_SIZE*2; + /* + * Because this function and the power management code both call + * scsi_device_quiesce(), it is not safe to perform domain validation + * while suspend or resume is in progress. Hence the + * lock/unlock_system_sleep() calls. + */ + lock_system_sleep(); + if (unlikely(spi_dv_in_progress(starget))) - return; + goto unlock; if (unlikely(scsi_device_get(sdev))) - return; + goto unlock; + spi_dv_in_progress(starget) = 1; buffer = kzalloc(len, GFP_KERNEL); @@ -1049,6 +1059,8 @@ spi_dv_device(struct scsi_device *sdev) out_put: spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); +unlock: + unlock_system_sleep(); } EXPORT_SYMBOL(spi_dv_device); -- cgit v1.2.3 From fbe313884d7ddd73ce457473cbdf3763f5b1d3da Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Fri, 5 Jan 2018 14:31:16 -0800 Subject: tools/power/x86/intel_pstate_tracer: Free the trace buffer memory The trace buffer memory should be, mostly, freed after the buffer has been output. This patch is required before a future patch that will allow the user to override the default, and specify the trace buffer memory allocation as a command line option. Signed-off-by: Doug Smythies Acked-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- .../power/x86/intel_pstate_tracer/intel_pstate_tracer.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py index 0b24dd9d01ff..29f50d4cfea0 100755 --- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -411,6 +411,16 @@ def set_trace_buffer_size(): print('IO error setting trace buffer size ') quit() +def free_trace_buffer(): + """ Free the trace buffer memory """ + + try: + open('/sys/kernel/debug/tracing/buffer_size_kb' + , 'w').write("1") + except: + print('IO error setting trace buffer size ') + quit() + def read_trace_data(filename): """ Read and parse trace data """ @@ -583,4 +593,9 @@ for root, dirs, files in os.walk('.'): for f in files: fix_ownership(f) +clear_trace_file() +# Free the memory +if interval: + free_trace_buffer() + os.chdir('../../') -- cgit v1.2.3 From c9619bb293c9ab758ba298f039cf4b820dd8436f Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 8 Jan 2018 10:04:50 +0800 Subject: ARM: dts: imx6ul: add 696MHz operating point Add 696MHz operating point according to datasheet (Rev. 0, 12/2015). Signed-off-by: Anson Huang Reviewed-by: Fabio Estevam Acked-by: Dong Aisheng Signed-off-by: Rafael J. Wysocki --- arch/arm/boot/dts/imx6ul.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index d5181f85ca9c..963e1698fe1d 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -68,12 +68,14 @@ clock-latency = <61036>; /* two CLK32 periods */ operating-points = < /* kHz uV */ + 696000 1275000 528000 1175000 396000 1025000 198000 950000 >; fsl,soc-operating-points = < /* KHz uV */ + 696000 1275000 528000 1175000 396000 1175000 198000 1175000 -- cgit v1.2.3 From 5028f5d2b38ea68531d6b265b64e1741a141a828 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 8 Jan 2018 10:04:51 +0800 Subject: cpufreq: imx6q: add 696MHz operating point for i.mx6ul Add 696MHz operating point for i.MX6UL, only for those parts with speed grading fuse set to 2b'10 supports 696MHz operating point, so, speed grading check is also added for i.MX6UL in this patch, the clock tree for each operating point are as below: 696MHz: pll1 696000000 pll1_bypass 696000000 pll1_sys 696000000 pll1_sw 696000000 arm 696000000 528MHz: pll2 528000000 pll2_bypass 528000000 pll2_bus 528000000 ca7_secondary_sel 528000000 step 528000000 pll1_sw 528000000 arm 528000000 396MHz: pll2_pfd2_396m 396000000 ca7_secondary_sel 396000000 step 396000000 pll1_sw 396000000 arm 396000000 198MHz: pll2_pfd2_396m 396000000 ca7_secondary_sel 396000000 step 396000000 pll1_sw 396000000 arm 198000000 Signed-off-by: Anson Huang Reviewed-by: Fabio Estevam Acked-by: Viresh Kumar Acked-by: Dong Aisheng Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 46 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 8bfb0775662b..741f22e5cee3 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -136,6 +136,10 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) clks[PLL2_PFD2_396M].clk); clk_set_parent(clks[STEP].clk, clks[SECONDARY_SEL].clk); clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk); + if (freq_hz > clk_get_rate(clks[PLL2_BUS].clk)) { + clk_set_rate(clks[PLL1_SYS].clk, new_freq * 1000); + clk_set_parent(clks[PLL1_SW].clk, clks[PLL1_SYS].clk); + } } else { clk_set_parent(clks[STEP].clk, clks[PLL2_PFD2_396M].clk); clk_set_parent(clks[PLL1_SW].clk, clks[STEP].clk); @@ -260,6 +264,43 @@ put_node: of_node_put(np); } +#define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2 + +static void imx6ul_opp_check_speed_grading(struct device *dev) +{ + struct device_node *np; + void __iomem *base; + u32 val; + + np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); + if (!np) + return; + + base = of_iomap(np, 0); + if (!base) { + dev_err(dev, "failed to map ocotp\n"); + goto put_node; + } + + /* + * Speed GRADING[1:0] defines the max speed of ARM: + * 2b'00: Reserved; + * 2b'01: 528000000Hz; + * 2b'10: 696000000Hz; + * 2b'11: Reserved; + * We need to set the max speed of ARM according to fuse map. + */ + val = readl_relaxed(base + OCOTP_CFG3); + val >>= OCOTP_CFG3_SPEED_SHIFT; + val &= 0x3; + if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) + if (dev_pm_opp_disable(dev, 696000000)) + dev_warn(dev, "failed to disable 696MHz OPP\n"); + iounmap(base); +put_node: + of_node_put(np); +} + static int imx6q_cpufreq_probe(struct platform_device *pdev) { struct device_node *np; @@ -314,7 +355,10 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } - imx6q_opp_check_speed_grading(cpu_dev); + if (of_machine_is_compatible("fsl,imx6ul")) + imx6ul_opp_check_speed_grading(cpu_dev); + else + imx6q_opp_check_speed_grading(cpu_dev); /* Because we have added the OPPs here, we must free them */ free_opp = true; -- cgit v1.2.3 From 52b3672c135416d8ccc3c92f475b983a201fc6f5 Mon Sep 17 00:00:00 2001 From: Zhen Han Date: Wed, 10 Jan 2018 08:38:23 +0800 Subject: powercap: add suspend and resume mechanism for SOC power limit PL1 and PL2 could be throlled or de-throttled by Thermal management to control SOC temperature. However, currently, their value will be reset to default value after once system suspend and resume. Add pm_notifier to save PL1, PL2 before system suspect and restore PL1, PL2 after system resume. Signed-off-by: Zhen Han Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 97 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index d1694f1def72..0188cff98cdd 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -155,6 +156,7 @@ struct rapl_power_limit { int prim_id; /* primitive ID used to enable */ struct rapl_domain *domain; const char *name; + u64 last_power_limit; }; static const char pl1_name[] = "long_term"; @@ -1533,6 +1535,92 @@ static int rapl_cpu_down_prep(unsigned int cpu) static enum cpuhp_state pcap_rapl_online; +static void power_limit_state_save(void) +{ + struct rapl_package *rp; + struct rapl_domain *rd; + int nr_pl, ret, i; + + get_online_cpus(); + list_for_each_entry(rp, &rapl_packages, plist) { + if (!rp->power_zone) + continue; + rd = power_zone_to_rapl_domain(rp->power_zone); + nr_pl = find_nr_power_limit(rd); + for (i = 0; i < nr_pl; i++) { + switch (rd->rpl[i].prim_id) { + case PL1_ENABLE: + ret = rapl_read_data_raw(rd, + POWER_LIMIT1, + true, + &rd->rpl[i].last_power_limit); + if (ret) + rd->rpl[i].last_power_limit = 0; + break; + case PL2_ENABLE: + ret = rapl_read_data_raw(rd, + POWER_LIMIT2, + true, + &rd->rpl[i].last_power_limit); + if (ret) + rd->rpl[i].last_power_limit = 0; + break; + } + } + } + put_online_cpus(); +} + +static void power_limit_state_restore(void) +{ + struct rapl_package *rp; + struct rapl_domain *rd; + int nr_pl, i; + + get_online_cpus(); + list_for_each_entry(rp, &rapl_packages, plist) { + if (!rp->power_zone) + continue; + rd = power_zone_to_rapl_domain(rp->power_zone); + nr_pl = find_nr_power_limit(rd); + for (i = 0; i < nr_pl; i++) { + switch (rd->rpl[i].prim_id) { + case PL1_ENABLE: + if (rd->rpl[i].last_power_limit) + rapl_write_data_raw(rd, + POWER_LIMIT1, + rd->rpl[i].last_power_limit); + break; + case PL2_ENABLE: + if (rd->rpl[i].last_power_limit) + rapl_write_data_raw(rd, + POWER_LIMIT2, + rd->rpl[i].last_power_limit); + break; + } + } + } + put_online_cpus(); +} + +static int rapl_pm_callback(struct notifier_block *nb, + unsigned long mode, void *_unused) +{ + switch (mode) { + case PM_SUSPEND_PREPARE: + power_limit_state_save(); + break; + case PM_POST_SUSPEND: + power_limit_state_restore(); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block rapl_pm_notifier = { + .notifier_call = rapl_pm_callback, +}; + static int __init rapl_init(void) { const struct x86_cpu_id *id; @@ -1560,8 +1648,16 @@ static int __init rapl_init(void) /* Don't bail out if PSys is not supported */ rapl_register_psys(); + + ret = register_pm_notifier(&rapl_pm_notifier); + if (ret) + goto err_unreg_all; + return 0; +err_unreg_all: + cpuhp_remove_state(pcap_rapl_online); + err_unreg: rapl_unregister_powercap(); return ret; @@ -1569,6 +1665,7 @@ err_unreg: static void __exit rapl_exit(void) { + unregister_pm_notifier(&rapl_pm_notifier); cpuhp_remove_state(pcap_rapl_online); rapl_unregister_powercap(); } -- cgit v1.2.3 From b4b6cb613519b7449da510bccf08986371b328cb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 10 Jan 2018 10:51:29 +0800 Subject: Revert "block: blk-merge: try to make front segments in full size" This reverts commit a2d37968d784363842f87820a21e106741d28004. If max segment size isn't 512-aligned, this patch won't work well. Also once multipage bvec is enabled, adjacent bvecs won't be physically contiguous if page is added via bio_add_page(), so we don't need this kind of complicated logic. Reported-by: Dmitry Osipenko Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-merge.c | 54 +++++------------------------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 446f63e076aa..8452fc7164cc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -109,7 +109,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, bool do_split = true; struct bio *new = NULL; const unsigned max_sectors = get_max_io_size(q, bio); - unsigned advance = 0; bio_for_each_segment(bv, bio, iter) { /* @@ -133,32 +132,12 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, } if (bvprvp && blk_queue_cluster(q)) { + if (seg_size + bv.bv_len > queue_max_segment_size(q)) + goto new_segment; if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) goto new_segment; - if (seg_size + bv.bv_len > queue_max_segment_size(q)) { - /* - * One assumption is that initial value of - * @seg_size(equals to bv.bv_len) won't be - * bigger than max segment size, but this - * becomes false after multipage bvecs. - */ - advance = queue_max_segment_size(q) - seg_size; - - if (advance > 0) { - seg_size += advance; - sectors += advance >> 9; - bv.bv_len -= advance; - bv.bv_offset += advance; - } - - /* - * Still need to put remainder of current - * bvec into a new segment. - */ - goto new_segment; - } seg_size += bv.bv_len; bvprv = bv; @@ -180,12 +159,6 @@ new_segment: seg_size = bv.bv_len; sectors += bv.bv_len >> 9; - /* restore the bvec for iterator */ - if (advance) { - bv.bv_len += advance; - bv.bv_offset -= advance; - advance = 0; - } } do_split = false; @@ -386,29 +359,16 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, { int nbytes = bvec->bv_len; - unsigned advance = 0; if (*sg && *cluster) { + if ((*sg)->length + nbytes > queue_max_segment_size(q)) + goto new_segment; + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) goto new_segment; - /* - * try best to merge part of the bvec into previous - * segment and follow same policy with - * blk_bio_segment_split() - */ - if ((*sg)->length + nbytes > queue_max_segment_size(q)) { - advance = queue_max_segment_size(q) - (*sg)->length; - if (advance) { - (*sg)->length += advance; - bvec->bv_offset += advance; - bvec->bv_len -= advance; - } - goto new_segment; - } - (*sg)->length += nbytes; } else { new_segment: @@ -431,10 +391,6 @@ new_segment: sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); (*nsegs)++; - - /* for making iterator happy */ - bvec->bv_offset -= advance; - bvec->bv_len += advance; } *bvprv = *bvec; } -- cgit v1.2.3 From 541676078b52f365f53d46ee5517d305cd1b6350 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 15 Dec 2017 14:23:10 -0500 Subject: membarrier: Disable preemption when calling smp_call_function_many() smp_call_function_many() requires disabling preemption around the call. Signed-off-by: Mathieu Desnoyers Cc: # v4.14+ Cc: Andrea Parri Cc: Andrew Hunter Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E . McKenney Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171215192310.25293-1-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- kernel/sched/membarrier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index dd7908743dab..9bcbacba82a8 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -89,7 +89,9 @@ static int membarrier_private_expedited(void) rcu_read_unlock(); } if (!fallback) { + preempt_disable(); smp_call_function_many(tmpmask, ipi_mb, NULL, 1); + preempt_enable(); free_cpumask_var(tmpmask); } cpus_read_unlock(); -- cgit v1.2.3 From 09ec417b0ea8bdab18e78d3d55e0a5fb7d54f18c Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Tue, 9 Jan 2018 14:19:11 +0100 Subject: mtd: nand: samsung: Disable subpage writes on E-die NAND Samsung E-die SLC NAND manufactured using 21nm process (K9F1G08U0E) does not support partial page programming, so disable subpage writes for it. Manufacturing process is stored in lowest two bits of 5th ID byte. Signed-off-by: Ladislav Michl Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_samsung.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c index f6b0a63a068c..ef022f62f74c 100644 --- a/drivers/mtd/nand/nand_samsung.c +++ b/drivers/mtd/nand/nand_samsung.c @@ -92,10 +92,23 @@ static void samsung_nand_decode_id(struct nand_chip *chip) } else { nand_decode_ext_id(chip); - /* Datasheet values for SLC Samsung K9F4G08U0D-S[I|C]B0(T00) */ - if (nand_is_slc(chip) && chip->id.data[1] == 0xDC) { - chip->ecc_step_ds = 512; - chip->ecc_strength_ds = 1; + if (nand_is_slc(chip)) { + switch (chip->id.data[1]) { + /* K9F4G08U0D-S[I|C]B0(T00) */ + case 0xDC: + chip->ecc_step_ds = 512; + chip->ecc_strength_ds = 1; + break; + + /* K9F1G08U0E 21nm chips do not support subpage write */ + case 0xF1: + if (chip->id.len > 4 && + (chip->id.data[4] & GENMASK(1, 0)) == 0x1) + chip->options |= NAND_NO_SUBPAGE_WRITE; + break; + default: + break; + } } } } -- cgit v1.2.3 From 1e532d2b49645e7cb76d5af6cb5bc4ec93d861ae Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 10 Jan 2018 09:33:26 +0100 Subject: af_key: Fix memory leak in key_notify_policy. We leak the allocated out_skb in case pfkey_xfrm_policy2msg() fails. Fix this by freeing it on error. Reported-by: Dmitry Vyukov Signed-off-by: Steffen Klassert --- net/key/af_key.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index d40861a048fe..7e2e7188e7f4 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2202,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); - if (err < 0) + if (err < 0) { + kfree_skb(out_skb); return err; + } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; -- cgit v1.2.3 From 6c7d47c33ed323f14f2a3b8de925e831dbaa4e69 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Nov 2017 14:42:21 +1100 Subject: KVM: PPC: Book3S PR: Fix WIMG handling under pHyp Commit 96df226 ("KVM: PPC: Book3S PR: Preserve storage control bits") added code to preserve WIMG bits but it missed 2 special cases: - a magic page in kvmppc_mmu_book3s_64_xlate() and - guest real mode in kvmppc_handle_pagefault(). For these ptes, WIMG was 0 and pHyp failed on these causing a guest to stop in the very beginning at NIP=0x100 (due to bd9166ffe "KVM: PPC: Book3S PR: Exit KVM on failed mapping"). According to LoPAPR v1.1 14.5.4.1.2 H_ENTER: The hypervisor checks that the WIMG bits within the PTE are appropriate for the physical page number else H_Parameter return. (For System Memory pages WIMG=0010, or, 1110 if the SAO option is enabled, and for IO pages WIMG=01**.) This hence initializes WIMG to non-zero value HPTE_R_M (0x10), as expected by pHyp. [paulus@ozlabs.org - fix compile for 32-bit] Cc: stable@vger.kernel.org # v4.11+ Fixes: 96df226 "KVM: PPC: Book3S PR: Preserve storage control bits" Signed-off-by: Alexey Kardashevskiy Tested-by: Ruediger Oertel Reviewed-by: Greg Kurz Tested-by: Greg Kurz Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu.c | 1 + arch/powerpc/kvm/book3s_pr.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 29ebe2fd5867..a93d719edc90 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_read = true; gpte->may_write = true; gpte->page_size = MMU_PAGE_4K; + gpte->wimg = HPTE_R_M; return 0; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d0dc8624198f..7deaeeb14b93 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE +#define HPTE_R_M _PAGE_COHERENT #endif static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.eaddr = eaddr; pte.vpage = eaddr >> 12; pte.page_size = MMU_PAGE_64K; + pte.wimg = HPTE_R_M; } switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { -- cgit v1.2.3 From ecba8297aafd50db6ae867e90844eead1611ef1c Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 10 Jan 2018 17:04:39 +1100 Subject: KVM: PPC: Book3S HV: Always flush TLB in kvmppc_alloc_reset_hpt() The KVM_PPC_ALLOCATE_HTAB ioctl(), implemented by kvmppc_alloc_reset_hpt() is supposed to completely clear and reset a guest's Hashed Page Table (HPT) allocating or re-allocating it if necessary. In the case where an HPT of the right size already exists and it just zeroes it, it forces a TLB flush on all guest CPUs, to remove any stale TLB entries loaded from the old HPT. However, that situation can arise when the HPT is resizing as well - or even when switching from an RPT to HPT - so those cases need a TLB flush as well. So, move the TLB flush to trigger in all cases except for errors. Cc: stable@vger.kernel.org # v4.10+ Fixes: f98a8bf9ee20 ("KVM: PPC: Book3S HV: Allow KVM_PPC_ALLOCATE_HTAB ioctl() to change HPT size") Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8355398f0bb6..b73dbc9e797d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -165,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) * Reset all the reverse-mapping chains for all memslots */ kvmppc_rmap_reset(kvm); - /* Ensure that each vcpu will flush its TLB on next entry. */ - cpumask_setall(&kvm->arch.need_tlb_flush); err = 0; goto out; } @@ -182,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) kvmppc_set_hpt(kvm, &info); out: + if (err == 0) + /* Ensure that each vcpu will flush its TLB on next entry. */ + cpumask_setall(&kvm->arch.need_tlb_flush); + mutex_unlock(&kvm->lock); return err; } -- cgit v1.2.3 From e4c9fd10eb21376f44723c40ad12395089251c28 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 08:34:28 +0100 Subject: ALSA: hda - Apply headphone noise quirk for another Dell XPS 13 variant There is another Dell XPS 13 variant (SSID 1028:082a) that requires the existing fixup for reducing the headphone noise. This patch adds the quirk entry for that. BugLink: http://lkml.kernel.org/r/CAHXyb9ZCZJzVisuBARa+UORcjRERV8yokez=DP1_5O5isTz0ZA@mail.gmail.com Reported-and-tested-by: Francisco G. Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8fd2d9c62c96..9aafc6c86132 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6196,6 +6196,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), + SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- cgit v1.2.3 From 031f335cda879450095873003abb03ae8ed3b74a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 10:53:18 +0100 Subject: ALSA: hda - Apply the existing quirk to iMac 14,1 iMac 14,1 requires the same quirk as iMac 12,2, using GPIO 2 and 3 for headphone and speaker output amps. Add the codec SSID quirk entry (106b:0600) accordingly. BugLink: http://lkml.kernel.org/r/CAEw6Zyteav09VGHRfD5QwsfuWv5a43r0tFBNbfcHXoNrxVz7ew@mail.gmail.com Reported-by: Freaky Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cirrus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 80bbadc83721..d6e079f4ec09 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -408,6 +408,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { /*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/ /* codec SSID */ + SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), -- cgit v1.2.3 From e1053262cf318a2bb4a0e1e9c402a3033c669381 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 9 Jan 2018 21:01:59 -0800 Subject: ASoC: max98373: Added missing blank lines Signed-off-by: Ryan Lee Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index 9af0d985d6e9..cb389571b253 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -176,6 +176,7 @@ static int max98373_get_bclk_sel(int bclk) } return 0; } + static int max98373_set_clock(struct snd_soc_codec *codec, struct snd_pcm_hw_params *params) { @@ -270,6 +271,7 @@ static int max98373_dai_hw_params(struct snd_pcm_substream *substream, params_rate(params)); goto err; } + /* set DAI_SR to correct LRCLK frequency */ regmap_update_bits(max98373->regmap, MAX98373_R2027_PCM_SR_SETUP_1, -- cgit v1.2.3 From 3831a5b87ff87a31dba2f212bcecd4f2b8c7c6d4 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 9 Jan 2018 21:02:00 -0800 Subject: ASoC: max98373: Added TDM off if parameters are all zeroes Signed-off-by: Ryan Lee Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index cb389571b253..ad17bb1fee7b 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -311,7 +311,10 @@ static int max98373_dai_tdm_slot(struct snd_soc_dai *dai, unsigned int mask; int x, slot_found; - max98373->tdm_mode = true; + if (!tx_mask && !rx_mask && !slots && !slot_width) + max98373->tdm_mode = false; + else + max98373->tdm_mode = true; /* BCLK configuration */ bsel = max98373_get_bclk_sel(slots * slot_width); -- cgit v1.2.3 From b6158323bbe706416f8f13912879a429be5cc2a9 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 9 Jan 2018 21:02:01 -0800 Subject: ASoC: max98373: Modified control names for TLV controls Signed-off-by: Ryan Lee Signed-off-by: Mark Brown --- sound/soc/codecs/max98373.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index ad17bb1fee7b..31b0864583e8 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -611,13 +611,13 @@ SOC_ENUM("Output Voltage", max98373_out_volt_enum), /* Dynamic Headroom Tracking */ SOC_SINGLE("DHT Switch", MAX98373_R20D4_DHT_EN, MAX98373_DHT_EN_SHIFT, 1, 0), -SOC_SINGLE_TLV("DHT Gain Min", MAX98373_R20D1_DHT_CFG, +SOC_SINGLE_TLV("DHT Min Volume", MAX98373_R20D1_DHT_CFG, MAX98373_DHT_SPK_GAIN_MIN_SHIFT, 9, 0, max98373_dht_spkgain_min_tlv), -SOC_SINGLE_TLV("DHT Rot Pnt", MAX98373_R20D1_DHT_CFG, +SOC_SINGLE_TLV("DHT Rot Pnt Volume", MAX98373_R20D1_DHT_CFG, MAX98373_DHT_ROT_PNT_SHIFT, 15, 0, max98373_dht_rotation_point_tlv), -SOC_SINGLE_TLV("DHT Attack Step", MAX98373_R20D2_DHT_ATTACK_CFG, +SOC_SINGLE_TLV("DHT Attack Step Volume", MAX98373_R20D2_DHT_ATTACK_CFG, MAX98373_DHT_ATTACK_STEP_SHIFT, 4, 0, max98373_dht_step_size_tlv), -SOC_SINGLE_TLV("DHT Release Step", MAX98373_R20D3_DHT_RELEASE_CFG, +SOC_SINGLE_TLV("DHT Release Step Volume", MAX98373_R20D3_DHT_RELEASE_CFG, MAX98373_DHT_RELEASE_STEP_SHIFT, 4, 0, max98373_dht_step_size_tlv), SOC_ENUM("DHT Attack Rate", max98373_dht_attack_rate_enum), SOC_ENUM("DHT Release Rate", max98373_dht_release_rate_enum), @@ -650,36 +650,36 @@ SOC_SINGLE("BDE Thresh Hysteresis", MAX98373_R209B_BDE_THRESH_HYST, 0, 0xFF, 0), SOC_SINGLE("BDE Hold Time", MAX98373_R2090_BDE_LVL_HOLD, 0, 0xFF, 0), SOC_SINGLE("BDE Attack Rate", MAX98373_R2091_BDE_GAIN_ATK_REL_RATE, 4, 0xF, 0), SOC_SINGLE("BDE Release Rate", MAX98373_R2091_BDE_GAIN_ATK_REL_RATE, 0, 0xF, 0), -SOC_SINGLE_TLV("BDE LVL1 Clip Thresh", MAX98373_R20A9_BDE_L1_CFG_2, +SOC_SINGLE_TLV("BDE LVL1 Clip Thresh Volume", MAX98373_R20A9_BDE_L1_CFG_2, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL2 Clip Thresh", MAX98373_R20AC_BDE_L2_CFG_2, +SOC_SINGLE_TLV("BDE LVL2 Clip Thresh Volume", MAX98373_R20AC_BDE_L2_CFG_2, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL3 Clip Thresh", MAX98373_R20AF_BDE_L3_CFG_2, +SOC_SINGLE_TLV("BDE LVL3 Clip Thresh Volume", MAX98373_R20AF_BDE_L3_CFG_2, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL4 Clip Thresh", MAX98373_R20B2_BDE_L4_CFG_2, +SOC_SINGLE_TLV("BDE LVL4 Clip Thresh Volume", MAX98373_R20B2_BDE_L4_CFG_2, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL1 Clip Gain Reduct", MAX98373_R20AA_BDE_L1_CFG_3, +SOC_SINGLE_TLV("BDE LVL1 Clip Reduction Volume", MAX98373_R20AA_BDE_L1_CFG_3, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL2 Clip Gain Reduct", MAX98373_R20AD_BDE_L2_CFG_3, +SOC_SINGLE_TLV("BDE LVL2 Clip Reduction Volume", MAX98373_R20AD_BDE_L2_CFG_3, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL3 Clip Gain Reduct", MAX98373_R20B0_BDE_L3_CFG_3, +SOC_SINGLE_TLV("BDE LVL3 Clip Reduction Volume", MAX98373_R20B0_BDE_L3_CFG_3, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL4 Clip Gain Reduct", MAX98373_R20B3_BDE_L4_CFG_3, +SOC_SINGLE_TLV("BDE LVL4 Clip Reduction Volume", MAX98373_R20B3_BDE_L4_CFG_3, 0, 0x3C, 0, max98373_bde_gain_tlv), -SOC_SINGLE_TLV("BDE LVL1 Limiter Thresh", MAX98373_R20A8_BDE_L1_CFG_1, +SOC_SINGLE_TLV("BDE LVL1 Limiter Thresh Volume", MAX98373_R20A8_BDE_L1_CFG_1, 0, 0xF, 0, max98373_limiter_thresh_tlv), -SOC_SINGLE_TLV("BDE LVL2 Limiter Thresh", MAX98373_R20AB_BDE_L2_CFG_1, +SOC_SINGLE_TLV("BDE LVL2 Limiter Thresh Volume", MAX98373_R20AB_BDE_L2_CFG_1, 0, 0xF, 0, max98373_limiter_thresh_tlv), -SOC_SINGLE_TLV("BDE LVL3 Limiter Thresh", MAX98373_R20AE_BDE_L3_CFG_1, +SOC_SINGLE_TLV("BDE LVL3 Limiter Thresh Volume", MAX98373_R20AE_BDE_L3_CFG_1, 0, 0xF, 0, max98373_limiter_thresh_tlv), -SOC_SINGLE_TLV("BDE LVL4 Limiter Thresh", MAX98373_R20B1_BDE_L4_CFG_1, +SOC_SINGLE_TLV("BDE LVL4 Limiter Thresh Volume", MAX98373_R20B1_BDE_L4_CFG_1, 0, 0xF, 0, max98373_limiter_thresh_tlv), /* Limiter */ SOC_SINGLE("Limiter Switch", MAX98373_R20E2_LIMITER_EN, MAX98373_LIMITER_EN_SHIFT, 1, 0), SOC_SINGLE("Limiter Src Switch", MAX98373_R20E0_LIMITER_THRESH_CFG, MAX98373_LIMITER_THRESH_SRC_SHIFT, 1, 0), -SOC_SINGLE_TLV("Limiter Thresh", MAX98373_R20E0_LIMITER_THRESH_CFG, +SOC_SINGLE_TLV("Limiter Thresh Volume", MAX98373_R20E0_LIMITER_THRESH_CFG, MAX98373_LIMITER_THRESH_SHIFT, 15, 0, max98373_limiter_thresh_tlv), SOC_ENUM("Limiter Attack Rate", max98373_limiter_attack_rate_enum), SOC_ENUM("Limiter Release Rate", max98373_limiter_release_rate_enum), -- cgit v1.2.3 From aa8a5e0062ac940f7659394f4817c948dc8c0667 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Add support for RFI flush of L1-D cache On some CPUs we can prevent the Meltdown vulnerability by flushing the L1-D cache on exit from kernel to user mode, and from hypervisor to guest. This is known to be the case on at least Power7, Power8 and Power9. At this time we do not know the status of the vulnerability on other CPUs such as the 970 (Apple G5), pasemi CPUs (AmigaOne X1000) or Freescale CPUs. As more information comes to light we can enable this, or other mechanisms on those CPUs. The vulnerability occurs when the load of an architecturally inaccessible memory region (eg. userspace load of kernel memory) is speculatively executed to the point where its result can influence the address of a subsequent speculatively executed load. In order for that to happen, the first load must hit in the L1, because before the load is sent to the L2 the permission check is performed. Therefore if no kernel addresses hit in the L1 the vulnerability can not occur. We can ensure that is the case by flushing the L1 whenever we return to userspace. Similarly for hypervisor vs guest. In order to flush the L1-D cache on exit, we add a section of nops at each (h)rfi location that returns to a lower privileged context, and patch that with some sequence. Newer firmwares are able to advertise to us that there is a special nop instruction that flushes the L1-D. If we do not see that advertised, we fall back to doing a displacement flush in software. For guest kernels we support migration between some CPU versions, and different CPUs may use different flush instructions. So that we are prepared to migrate to a machine with a different flush instruction activated, we may have to patch more than one flush instruction at boot if the hypervisor tells us to. In the end this patch is mostly the work of Nicholas Piggin and Michael Ellerman. However a cast of thousands contributed to analysis of the issue, earlier versions of the patch, back ports testing etc. Many thanks to all of them. Tested-by: Jon Masters Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 40 ++++++++++++--- arch/powerpc/include/asm/feature-fixups.h | 13 +++++ arch/powerpc/include/asm/paca.h | 10 ++++ arch/powerpc/include/asm/setup.h | 13 +++++ arch/powerpc/kernel/asm-offsets.c | 5 ++ arch/powerpc/kernel/exceptions-64s.S | 84 +++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup_64.c | 79 +++++++++++++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 9 ++++ arch/powerpc/lib/feature-fixups.c | 41 +++++++++++++++ 9 files changed, 286 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index dfc56daed98b..7197b179c1b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,34 +74,58 @@ */ #define EX_R3 EX_DAR -/* Macros for annotating the expected destination of (h)rfid */ +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop #define RFI_TO_KERNEL \ rfid #define RFI_TO_USER \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define RFI_TO_GUEST \ - rfid + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback #define HRFI_TO_KERNEL \ hrfid #define HRFI_TO_USER \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_GUEST \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ - hrfid + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 8f88f771cc55..1e82eb3caabd 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,7 +187,20 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + #ifndef __ASSEMBLY__ +#include + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + void apply_feature_fixups(void); void setup_feature_keys(void); #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..23ac7fc0af23 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -232,6 +232,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[EX_SIZE] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index cf00ec26303a..469b7fdc9be4 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6b958414b4e0..f390d57cf2e1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -237,6 +237,11 @@ int main(void) OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); OFFSET(PACA_IN_NMI, paca_struct, in_nmi); + OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); + OFFSET(PACA_EXRFI, paca_struct, exrfi); + OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); + OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ed356194f09c..2dc10bf646b8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1449,6 +1449,90 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + +TRAMP_REAL_BEGIN(hrfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8956a9856604..96163f4c3673 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -801,3 +801,82 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +bool rfi_flush; + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.l1d.size; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + rfi_flush_enable(enable); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2..307843d23682 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RO_DATA(PAGE_SIZE) +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf..a95ea007d654 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; -- cgit v1.2.3 From bc9c9304a45480797e13a8e1df96ffcf44fb62fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/64s: Support disabling RFI flush with no_rfi_flush and nopti Because there may be some performance overhead of the RFI flush, add kernel command line options to disable it. We add a sensibly named 'no_rfi_flush' option, but we also hijack the x86 option 'nopti'. The RFI flush is not the same as KPTI, but if we see 'nopti' we can guess that the user is trying to avoid any overhead of Meltdown mitigations, and it means we don't have to educate every one about a different command line option. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 96163f4c3673..491be4179ddd 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -805,8 +805,29 @@ early_initcall(disable_hardlockup_detector); #ifdef CONFIG_PPC_BOOK3S_64 static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; +static bool no_rfi_flush; bool rfi_flush; +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + static void do_nothing(void *unused) { /* @@ -877,6 +898,7 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - rfi_flush_enable(enable); + if (!no_rfi_flush) + rfi_flush_enable(enable); } #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3 From 8989d56878a7735dfdb234707a2fee6faf631085 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/pseries: Query hypervisor for RFI flush settings A new hypervisor call is available which tells the guest settings related to the RFI flush. Use it to query the appropriate flush instruction(s), and whether the flush is required. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/setup.c | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a8531e012658..ae4f596273b5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); -- cgit v1.2.3 From 6e032b350cd1fdb830f18f8320ef0e13b4e24094 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 10 Jan 2018 03:07:15 +1100 Subject: powerpc/powernv: Check device-tree for RFI flush settings New device-tree properties are available which tell the hypervisor settings related to the RFI flush. Use them to determine the appropriate flush instruction to use, and whether the flush is required. Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/setup.c | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1edfbc1e40f4..4fb21e17504a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,13 +37,62 @@ #include #include #include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); -- cgit v1.2.3 From 48b66f8f936f369bb1a43c12aedbfeb2975baf4c Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 10 Jan 2018 11:13:03 +0100 Subject: iio: Add hardware consumer buffer support Hardware consumer interface can be used when one IIO device has a direct connection to another device in hardware. Signed-off-by: Lars-Peter Clausen Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/buffer/Kconfig | 10 ++ drivers/iio/buffer/Makefile | 1 + drivers/iio/buffer/industrialio-hw-consumer.c | 181 ++++++++++++++++++++++++++ include/linux/iio/hw-consumer.h | 19 +++ 4 files changed, 211 insertions(+) create mode 100644 drivers/iio/buffer/industrialio-hw-consumer.c create mode 100644 include/linux/iio/hw-consumer.h diff --git a/drivers/iio/buffer/Kconfig b/drivers/iio/buffer/Kconfig index 4ffd3db7817f..338774cba19b 100644 --- a/drivers/iio/buffer/Kconfig +++ b/drivers/iio/buffer/Kconfig @@ -29,6 +29,16 @@ config IIO_BUFFER_DMAENGINE Should be selected by drivers that want to use this functionality. +config IIO_BUFFER_HW_CONSUMER + tristate "Industrial I/O HW buffering" + help + Provides a way to bonding when an IIO device has a direct connection + to another device in hardware. In this case buffers for data transfers + are handled by hardware. + + Should be selected by drivers that want to use the generic Hw consumer + interface. + config IIO_KFIFO_BUF tristate "Industrial I/O buffering based on kfifo" help diff --git a/drivers/iio/buffer/Makefile b/drivers/iio/buffer/Makefile index 95f9f41c58b7..1403eb2f9409 100644 --- a/drivers/iio/buffer/Makefile +++ b/drivers/iio/buffer/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_IIO_BUFFER_CB) += industrialio-buffer-cb.o obj-$(CONFIG_IIO_BUFFER_DMA) += industrialio-buffer-dma.o obj-$(CONFIG_IIO_BUFFER_DMAENGINE) += industrialio-buffer-dmaengine.o +obj-$(CONFIG_IIO_BUFFER_HW_CONSUMER) += industrialio-hw-consumer.o obj-$(CONFIG_IIO_TRIGGERED_BUFFER) += industrialio-triggered-buffer.o obj-$(CONFIG_IIO_KFIFO_BUF) += kfifo_buf.o diff --git a/drivers/iio/buffer/industrialio-hw-consumer.c b/drivers/iio/buffer/industrialio-hw-consumer.c new file mode 100644 index 000000000000..993ecdcdab64 --- /dev/null +++ b/drivers/iio/buffer/industrialio-hw-consumer.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 Analog Devices Inc. + * Author: Lars-Peter Clausen + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * struct iio_hw_consumer - IIO hw consumer block + * @buffers: hardware buffers list head. + * @channels: IIO provider channels. + */ +struct iio_hw_consumer { + struct list_head buffers; + struct iio_channel *channels; +}; + +struct hw_consumer_buffer { + struct list_head head; + struct iio_dev *indio_dev; + struct iio_buffer buffer; + long scan_mask[]; +}; + +static struct hw_consumer_buffer *iio_buffer_to_hw_consumer_buffer( + struct iio_buffer *buffer) +{ + return container_of(buffer, struct hw_consumer_buffer, buffer); +} + +static void iio_hw_buf_release(struct iio_buffer *buffer) +{ + struct hw_consumer_buffer *hw_buf = + iio_buffer_to_hw_consumer_buffer(buffer); + kfree(hw_buf); +} + +static const struct iio_buffer_access_funcs iio_hw_buf_access = { + .release = &iio_hw_buf_release, + .modes = INDIO_BUFFER_HARDWARE, +}; + +static struct hw_consumer_buffer *iio_hw_consumer_get_buffer( + struct iio_hw_consumer *hwc, struct iio_dev *indio_dev) +{ + size_t mask_size = BITS_TO_LONGS(indio_dev->masklength) * sizeof(long); + struct hw_consumer_buffer *buf; + + list_for_each_entry(buf, &hwc->buffers, head) { + if (buf->indio_dev == indio_dev) + return buf; + } + + buf = kzalloc(sizeof(*buf) + mask_size, GFP_KERNEL); + if (!buf) + return NULL; + + buf->buffer.access = &iio_hw_buf_access; + buf->indio_dev = indio_dev; + buf->buffer.scan_mask = buf->scan_mask; + + iio_buffer_init(&buf->buffer); + list_add_tail(&buf->head, &hwc->buffers); + + return buf; +} + +/** + * iio_hw_consumer_alloc() - Allocate IIO hardware consumer + * @dev: Pointer to consumer device. + * + * Returns a valid iio_hw_consumer on success or a ERR_PTR() on failure. + */ +struct iio_hw_consumer *iio_hw_consumer_alloc(struct device *dev) +{ + struct hw_consumer_buffer *buf; + struct iio_hw_consumer *hwc; + struct iio_channel *chan; + int ret; + + hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); + if (!hwc) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&hwc->buffers); + + hwc->channels = iio_channel_get_all(dev); + if (IS_ERR(hwc->channels)) { + ret = PTR_ERR(hwc->channels); + goto err_free_hwc; + } + + chan = &hwc->channels[0]; + while (chan->indio_dev) { + buf = iio_hw_consumer_get_buffer(hwc, chan->indio_dev); + if (!buf) { + ret = -ENOMEM; + goto err_put_buffers; + } + set_bit(chan->channel->scan_index, buf->buffer.scan_mask); + chan++; + } + + return hwc; + +err_put_buffers: + list_for_each_entry(buf, &hwc->buffers, head) + iio_buffer_put(&buf->buffer); + iio_channel_release_all(hwc->channels); +err_free_hwc: + kfree(hwc); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(iio_hw_consumer_alloc); + +/** + * iio_hw_consumer_free() - Free IIO hardware consumer + * @hwc: hw consumer to free. + */ +void iio_hw_consumer_free(struct iio_hw_consumer *hwc) +{ + struct hw_consumer_buffer *buf, *n; + + iio_channel_release_all(hwc->channels); + list_for_each_entry_safe(buf, n, &hwc->buffers, head) + iio_buffer_put(&buf->buffer); + kfree(hwc); +} +EXPORT_SYMBOL_GPL(iio_hw_consumer_free); + +/** + * iio_hw_consumer_enable() - Enable IIO hardware consumer + * @hwc: iio_hw_consumer to enable. + * + * Returns 0 on success. + */ +int iio_hw_consumer_enable(struct iio_hw_consumer *hwc) +{ + struct hw_consumer_buffer *buf; + int ret; + + list_for_each_entry(buf, &hwc->buffers, head) { + ret = iio_update_buffers(buf->indio_dev, &buf->buffer, NULL); + if (ret) + goto err_disable_buffers; + } + + return 0; + +err_disable_buffers: + list_for_each_entry_continue_reverse(buf, &hwc->buffers, head) + iio_update_buffers(buf->indio_dev, NULL, &buf->buffer); + return ret; +} +EXPORT_SYMBOL_GPL(iio_hw_consumer_enable); + +/** + * iio_hw_consumer_disable() - Disable IIO hardware consumer + * @hwc: iio_hw_consumer to disable. + */ +void iio_hw_consumer_disable(struct iio_hw_consumer *hwc) +{ + struct hw_consumer_buffer *buf; + + list_for_each_entry(buf, &hwc->buffers, head) + iio_update_buffers(buf->indio_dev, NULL, &buf->buffer); +} +EXPORT_SYMBOL_GPL(iio_hw_consumer_disable); + +MODULE_AUTHOR("Lars-Peter Clausen "); +MODULE_DESCRIPTION("Hardware consumer buffer the IIO framework"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/iio/hw-consumer.h b/include/linux/iio/hw-consumer.h new file mode 100644 index 000000000000..db8c00b9c7a5 --- /dev/null +++ b/include/linux/iio/hw-consumer.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Industrial I/O in kernel hardware consumer interface + * + * Copyright 2017 Analog Devices Inc. + * Author: Lars-Peter Clausen + */ + +#ifndef LINUX_IIO_HW_CONSUMER_H +#define LINUX_IIO_HW_CONSUMER_H + +struct iio_hw_consumer; + +struct iio_hw_consumer *iio_hw_consumer_alloc(struct device *dev); +void iio_hw_consumer_free(struct iio_hw_consumer *hwc); +int iio_hw_consumer_enable(struct iio_hw_consumer *hwc); +void iio_hw_consumer_disable(struct iio_hw_consumer *hwc); + +#endif -- cgit v1.2.3 From 5b178943d64b85d78350ea9c86344c376d7bfe74 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:04 +0100 Subject: docs: driver-api: add iio hw consumer section This adds a section about the Hardware consumer API of the IIO subsystem to the driver API documentation. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- Documentation/driver-api/iio/hw-consumer.rst | 51 ++++++++++++++++++++++++++++ Documentation/driver-api/iio/index.rst | 1 + 2 files changed, 52 insertions(+) create mode 100644 Documentation/driver-api/iio/hw-consumer.rst diff --git a/Documentation/driver-api/iio/hw-consumer.rst b/Documentation/driver-api/iio/hw-consumer.rst new file mode 100644 index 000000000000..8facce6a6733 --- /dev/null +++ b/Documentation/driver-api/iio/hw-consumer.rst @@ -0,0 +1,51 @@ +=========== +HW consumer +=========== +An IIO device can be directly connected to another device in hardware. in this +case the buffers between IIO provider and IIO consumer are handled by hardware. +The Industrial I/O HW consumer offers a way to bond these IIO devices without +software buffer for data. The implementation can be found under +:file:`drivers/iio/buffer/hw-consumer.c` + + +* struct :c:type:`iio_hw_consumer` — Hardware consumer structure +* :c:func:`iio_hw_consumer_alloc` — Allocate IIO hardware consumer +* :c:func:`iio_hw_consumer_free` — Free IIO hardware consumer +* :c:func:`iio_hw_consumer_enable` — Enable IIO hardware consumer +* :c:func:`iio_hw_consumer_disable` — Disable IIO hardware consumer + + +HW consumer setup +================= + +As standard IIO device the implementation is based on IIO provider/consumer. +A typical IIO HW consumer setup looks like this:: + + static struct iio_hw_consumer *hwc; + + static const struct iio_info adc_info = { + .read_raw = adc_read_raw, + }; + + static int adc_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) + { + ret = iio_hw_consumer_enable(hwc); + + /* Acquire data */ + + ret = iio_hw_consumer_disable(hwc); + } + + static int adc_probe(struct platform_device *pdev) + { + hwc = devm_iio_hw_consumer_alloc(&iio->dev); + } + +More details +============ +.. kernel-doc:: include/linux/iio/hw-consumer.h +.. kernel-doc:: drivers/iio/buffer/industrialio-hw-consumer.c + :export: + diff --git a/Documentation/driver-api/iio/index.rst b/Documentation/driver-api/iio/index.rst index e5c3922d1b6f..7fba341bd8b2 100644 --- a/Documentation/driver-api/iio/index.rst +++ b/Documentation/driver-api/iio/index.rst @@ -15,3 +15,4 @@ Contents: buffers triggers triggered-buffers + hw-consumer -- cgit v1.2.3 From b688c18d30060e8a840d8af72790339c72acdac4 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:05 +0100 Subject: IIO: hw_consumer: add devm_iio_hw_consumer_alloc Add devm_iio_hw_consumer_alloc function that calls iio_hw_consumer_free when the device is unbound from the bus. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/buffer/industrialio-hw-consumer.c | 66 +++++++++++++++++++++++++++ include/linux/iio/hw-consumer.h | 2 + 2 files changed, 68 insertions(+) diff --git a/drivers/iio/buffer/industrialio-hw-consumer.c b/drivers/iio/buffer/industrialio-hw-consumer.c index 993ecdcdab64..95165697d8ae 100644 --- a/drivers/iio/buffer/industrialio-hw-consumer.c +++ b/drivers/iio/buffer/industrialio-hw-consumer.c @@ -137,6 +137,72 @@ void iio_hw_consumer_free(struct iio_hw_consumer *hwc) } EXPORT_SYMBOL_GPL(iio_hw_consumer_free); +static void devm_iio_hw_consumer_release(struct device *dev, void *res) +{ + iio_hw_consumer_free(*(struct iio_hw_consumer **)res); +} + +static int devm_iio_hw_consumer_match(struct device *dev, void *res, void *data) +{ + struct iio_hw_consumer **r = res; + + if (!r || !*r) { + WARN_ON(!r || !*r); + return 0; + } + return *r == data; +} + +/** + * devm_iio_hw_consumer_alloc - Resource-managed iio_hw_consumer_alloc() + * @dev: Pointer to consumer device. + * + * Managed iio_hw_consumer_alloc. iio_hw_consumer allocated with this function + * is automatically freed on driver detach. + * + * If an iio_hw_consumer allocated with this function needs to be freed + * separately, devm_iio_hw_consumer_free() must be used. + * + * returns pointer to allocated iio_hw_consumer on success, NULL on failure. + */ +struct iio_hw_consumer *devm_iio_hw_consumer_alloc(struct device *dev) +{ + struct iio_hw_consumer **ptr, *iio_hwc; + + ptr = devres_alloc(devm_iio_hw_consumer_release, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return NULL; + + iio_hwc = iio_hw_consumer_alloc(dev); + if (IS_ERR(iio_hwc)) { + devres_free(ptr); + } else { + *ptr = iio_hwc; + devres_add(dev, ptr); + } + + return iio_hwc; +} +EXPORT_SYMBOL_GPL(devm_iio_hw_consumer_alloc); + +/** + * devm_iio_hw_consumer_free - Resource-managed iio_hw_consumer_free() + * @dev: Pointer to consumer device. + * @hwc: iio_hw_consumer to free. + * + * Free iio_hw_consumer allocated with devm_iio_hw_consumer_alloc(). + */ +void devm_iio_hw_consumer_free(struct device *dev, struct iio_hw_consumer *hwc) +{ + int rc; + + rc = devres_release(dev, devm_iio_hw_consumer_release, + devm_iio_hw_consumer_match, hwc); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_iio_hw_consumer_free); + /** * iio_hw_consumer_enable() - Enable IIO hardware consumer * @hwc: iio_hw_consumer to enable. diff --git a/include/linux/iio/hw-consumer.h b/include/linux/iio/hw-consumer.h index db8c00b9c7a5..44d48bb1d39f 100644 --- a/include/linux/iio/hw-consumer.h +++ b/include/linux/iio/hw-consumer.h @@ -13,6 +13,8 @@ struct iio_hw_consumer; struct iio_hw_consumer *iio_hw_consumer_alloc(struct device *dev); void iio_hw_consumer_free(struct iio_hw_consumer *hwc); +struct iio_hw_consumer *devm_iio_hw_consumer_alloc(struct device *dev); +void devm_iio_hw_consumer_free(struct device *dev, struct iio_hw_consumer *hwc); int iio_hw_consumer_enable(struct iio_hw_consumer *hwc); void iio_hw_consumer_disable(struct iio_hw_consumer *hwc); -- cgit v1.2.3 From 34739a213dbb85c8d775de42d52358255059c257 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:06 +0100 Subject: IIO: inkern: API for manipulating channel attributes Extend the inkern API with functions for reading and writing attribute of iio channels. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/inkern.c | 17 ++++++++++++----- include/linux/iio/consumer.h | 26 ++++++++++++++++++++++++++ include/linux/iio/iio.h | 28 ---------------------------- include/linux/iio/types.h | 28 ++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 33 deletions(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 069defcc6d9b..ec98790e2a28 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -664,9 +664,8 @@ err_unlock: } EXPORT_SYMBOL_GPL(iio_convert_raw_to_processed); -static int iio_read_channel_attribute(struct iio_channel *chan, - int *val, int *val2, - enum iio_chan_info_enum attribute) +int iio_read_channel_attribute(struct iio_channel *chan, int *val, int *val2, + enum iio_chan_info_enum attribute) { int ret; @@ -682,6 +681,7 @@ err_unlock: return ret; } +EXPORT_SYMBOL_GPL(iio_read_channel_attribute); int iio_read_channel_offset(struct iio_channel *chan, int *val, int *val2) { @@ -850,7 +850,8 @@ static int iio_channel_write(struct iio_channel *chan, int val, int val2, chan->channel, val, val2, info); } -int iio_write_channel_raw(struct iio_channel *chan, int val) +int iio_write_channel_attribute(struct iio_channel *chan, int val, int val2, + enum iio_chan_info_enum attribute) { int ret; @@ -860,12 +861,18 @@ int iio_write_channel_raw(struct iio_channel *chan, int val) goto err_unlock; } - ret = iio_channel_write(chan, val, 0, IIO_CHAN_INFO_RAW); + ret = iio_channel_write(chan, val, val2, attribute); err_unlock: mutex_unlock(&chan->indio_dev->info_exist_lock); return ret; } +EXPORT_SYMBOL_GPL(iio_write_channel_attribute); + +int iio_write_channel_raw(struct iio_channel *chan, int val) +{ + return iio_write_channel_attribute(chan, val, 0, IIO_CHAN_INFO_RAW); +} EXPORT_SYMBOL_GPL(iio_write_channel_raw); unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan) diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 5e347a9805fd..2017f35db17c 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -215,6 +215,32 @@ int iio_read_channel_average_raw(struct iio_channel *chan, int *val); */ int iio_read_channel_processed(struct iio_channel *chan, int *val); +/** + * iio_write_channel_attribute() - Write values to the device attribute. + * @chan: The channel being queried. + * @val: Value being written. + * @val2: Value being written.val2 use depends on attribute type. + * @attribute: info attribute to be read. + * + * Returns an error code or 0. + */ +int iio_write_channel_attribute(struct iio_channel *chan, int val, + int val2, enum iio_chan_info_enum attribute); + +/** + * iio_read_channel_attribute() - Read values from the device attribute. + * @chan: The channel being queried. + * @val: Value being written. + * @val2: Value being written.Val2 use depends on attribute type. + * @attribute: info attribute to be written. + * + * Returns an error code if failed. Else returns a description of what is in val + * and val2, such as IIO_VAL_INT_PLUS_MICRO telling us we have a value of val + * + val2/1e6 + */ +int iio_read_channel_attribute(struct iio_channel *chan, int *val, + int *val2, enum iio_chan_info_enum attribute); + /** * iio_write_channel_raw() - write to a given channel * @chan: The channel being queried. diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 20b61347ea58..f12a61be1ede 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -20,34 +20,6 @@ * Currently assumes nano seconds. */ -enum iio_chan_info_enum { - IIO_CHAN_INFO_RAW = 0, - IIO_CHAN_INFO_PROCESSED, - IIO_CHAN_INFO_SCALE, - IIO_CHAN_INFO_OFFSET, - IIO_CHAN_INFO_CALIBSCALE, - IIO_CHAN_INFO_CALIBBIAS, - IIO_CHAN_INFO_PEAK, - IIO_CHAN_INFO_PEAK_SCALE, - IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW, - IIO_CHAN_INFO_AVERAGE_RAW, - IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY, - IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY, - IIO_CHAN_INFO_SAMP_FREQ, - IIO_CHAN_INFO_FREQUENCY, - IIO_CHAN_INFO_PHASE, - IIO_CHAN_INFO_HARDWAREGAIN, - IIO_CHAN_INFO_HYSTERESIS, - IIO_CHAN_INFO_INT_TIME, - IIO_CHAN_INFO_ENABLE, - IIO_CHAN_INFO_CALIBHEIGHT, - IIO_CHAN_INFO_CALIBWEIGHT, - IIO_CHAN_INFO_DEBOUNCE_COUNT, - IIO_CHAN_INFO_DEBOUNCE_TIME, - IIO_CHAN_INFO_CALIBEMISSIVITY, - IIO_CHAN_INFO_OVERSAMPLING_RATIO, -}; - enum iio_shared_by { IIO_SEPARATE, IIO_SHARED_BY_TYPE, diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h index 2aa7b6384d64..6eb3d683ef62 100644 --- a/include/linux/iio/types.h +++ b/include/linux/iio/types.h @@ -34,4 +34,32 @@ enum iio_available_type { IIO_AVAIL_RANGE, }; +enum iio_chan_info_enum { + IIO_CHAN_INFO_RAW = 0, + IIO_CHAN_INFO_PROCESSED, + IIO_CHAN_INFO_SCALE, + IIO_CHAN_INFO_OFFSET, + IIO_CHAN_INFO_CALIBSCALE, + IIO_CHAN_INFO_CALIBBIAS, + IIO_CHAN_INFO_PEAK, + IIO_CHAN_INFO_PEAK_SCALE, + IIO_CHAN_INFO_QUADRATURE_CORRECTION_RAW, + IIO_CHAN_INFO_AVERAGE_RAW, + IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY, + IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY, + IIO_CHAN_INFO_SAMP_FREQ, + IIO_CHAN_INFO_FREQUENCY, + IIO_CHAN_INFO_PHASE, + IIO_CHAN_INFO_HARDWAREGAIN, + IIO_CHAN_INFO_HYSTERESIS, + IIO_CHAN_INFO_INT_TIME, + IIO_CHAN_INFO_ENABLE, + IIO_CHAN_INFO_CALIBHEIGHT, + IIO_CHAN_INFO_CALIBWEIGHT, + IIO_CHAN_INFO_DEBOUNCE_COUNT, + IIO_CHAN_INFO_DEBOUNCE_TIME, + IIO_CHAN_INFO_CALIBEMISSIVITY, + IIO_CHAN_INFO_OVERSAMPLING_RATIO, +}; + #endif /* _IIO_TYPES_H_ */ -- cgit v1.2.3 From af11143757b7995f185e9365d8450ea9d63ea267 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:07 +0100 Subject: IIO: Add DT bindings for sigma delta adc modulator Add documentation of device tree bindings to support sigma delta modulator in IIO framework. Signed-off-by: Arnaud Pouliquen Acked-by: Rob Herring Acked-by: Jonathan Cameron Signed-off-by: Mark Brown --- .../devicetree/bindings/iio/adc/sigma-delta-modulator.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt new file mode 100644 index 000000000000..e9ebb8a20e0d --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.txt @@ -0,0 +1,13 @@ +Device-Tree bindings for sigma delta modulator + +Required properties: +- compatible: should be "ads1201", "sd-modulator". "sd-modulator" can be use + as a generic SD modulator if modulator not specified in compatible list. +- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers". + +Example node: + + ads1202: adc@0 { + compatible = "sd-modulator"; + #io-channel-cells = <1>; + }; -- cgit v1.2.3 From 8a5f0b6f6c4f1a0ff98b5ca1f83cbe34cdebd96e Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:08 +0100 Subject: IIO: ADC: add sigma delta modulator support Add generic driver to support sigma delta modulators. Typically, this device is hardware connected to an IIO device in charge of the conversion. Devices are bonded through the hardware consumer API. Signed-off-by: Arnaud Pouliquen Acked-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/adc/Kconfig | 12 +++++++ drivers/iio/adc/Makefile | 1 + drivers/iio/adc/sd_adc_modulator.c | 68 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 drivers/iio/adc/sd_adc_modulator.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index ef86296b8b0d..5f9d04a8ba53 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -629,6 +629,18 @@ config SPEAR_ADC To compile this driver as a module, choose M here: the module will be called spear_adc. +config SD_ADC_MODULATOR + tristate "Generic sigma delta modulator" + depends on OF + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER + help + Select this option to enables sigma delta modulator. This driver can + support generic sigma delta modulators. + + This driver can also be built as a module. If so, the module + will be called sd_adc_modulator. + config STM32_ADC_CORE tristate "STMicroelectronics STM32 adc core" depends on ARCH_STM32 || COMPILE_TEST diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 9572c1090f35..fa5e7949f642 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -82,3 +82,4 @@ obj-$(CONFIG_VF610_ADC) += vf610_adc.o obj-$(CONFIG_VIPERBOARD_ADC) += viperboard_adc.o xilinx-xadc-y := xilinx-xadc-core.o xilinx-xadc-events.o obj-$(CONFIG_XILINX_XADC) += xilinx-xadc.o +obj-$(CONFIG_SD_ADC_MODULATOR) += sd_adc_modulator.o diff --git a/drivers/iio/adc/sd_adc_modulator.c b/drivers/iio/adc/sd_adc_modulator.c new file mode 100644 index 000000000000..560d8c7d9d86 --- /dev/null +++ b/drivers/iio/adc/sd_adc_modulator.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic sigma delta modulator driver + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author: Arnaud Pouliquen . + */ + +#include +#include +#include +#include + +static const struct iio_info iio_sd_mod_iio_info; + +static const struct iio_chan_spec iio_sd_mod_ch = { + .type = IIO_VOLTAGE, + .indexed = 1, + .scan_type = { + .sign = 'u', + .realbits = 1, + .shift = 0, + }, +}; + +static int iio_sd_mod_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct iio_dev *iio; + + iio = devm_iio_device_alloc(dev, 0); + if (!iio) + return -ENOMEM; + + iio->dev.parent = dev; + iio->dev.of_node = dev->of_node; + iio->name = dev_name(dev); + iio->info = &iio_sd_mod_iio_info; + iio->modes = INDIO_BUFFER_HARDWARE; + + iio->num_channels = 1; + iio->channels = &iio_sd_mod_ch; + + platform_set_drvdata(pdev, iio); + + return devm_iio_device_register(&pdev->dev, iio); +} + +static const struct of_device_id sd_adc_of_match[] = { + { .compatible = "sd-modulator" }, + { .compatible = "ads1201" }, + { } +}; +MODULE_DEVICE_TABLE(of, sd_adc_of_match); + +static struct platform_driver iio_sd_mod_adc = { + .driver = { + .name = "iio_sd_adc_mod", + .of_match_table = of_match_ptr(sd_adc_of_match), + }, + .probe = iio_sd_mod_probe, +}; + +module_platform_driver(iio_sd_mod_adc); + +MODULE_DESCRIPTION("Basic sigma delta modulator"); +MODULE_AUTHOR("Arnaud Pouliquen "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 6c82f947fc9784b774cad0b90c20a5d703f9b763 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:09 +0100 Subject: IIO: add DT bindings for stm32 DFSDM filter Add bindings that describes STM32 Digital Filter for Sigma Delta Modulators. DFSDM allows to connect sigma delta modulators. Signed-off-by: Arnaud Pouliquen Acked-by: Rob Herring Acked-by: Jonathan Cameron Signed-off-by: Mark Brown --- .../bindings/iio/adc/st,stm32-dfsdm-adc.txt | 128 +++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt new file mode 100644 index 000000000000..911492da48f3 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.txt @@ -0,0 +1,128 @@ +STMicroelectronics STM32 DFSDM ADC device driver + + +STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to +interface external sigma delta modulators to STM32 micro controllers. +It is mainly targeted for: +- Sigma delta modulators (motor control, metering...) +- PDM microphones (audio digital microphone) + +It features up to 8 serial digital interfaces (SPI or Manchester) and +up to 4 filters on stm32h7. + +Each child node match with a filter instance. + +Contents of a STM32 DFSDM root node: +------------------------------------ +Required properties: +- compatible: Should be "st,stm32h7-dfsdm". +- reg: Offset and length of the DFSDM block register set. +- clocks: IP and serial interfaces clocking. Should be set according + to rcc clock ID and "clock-names". +- clock-names: Input clock name "dfsdm" must be defined, + "audio" is optional. If defined CLKOUT is based on the audio + clock, else "dfsdm" is used. +- #interrupt-cells = <1>; +- #address-cells = <1>; +- #size-cells = <0>; + +Optional properties: +- spi-max-frequency: Requested only for SPI master mode. + SPI clock OUT frequency (Hz). This clock must be set according + to "clock" property. Frequency must be a multiple of the rcc + clock frequency. If not, SPI CLKOUT frequency will not be + accurate. + +Contents of a STM32 DFSDM child nodes: +-------------------------------------- + +Required properties: +- compatible: Must be: + "st,stm32-dfsdm-adc" for sigma delta ADCs + "st,stm32-dfsdm-dmic" for audio digital microphone. +- reg: Specifies the DFSDM filter instance used. +- interrupts: IRQ lines connected to each DFSDM filter instance. +- st,adc-channels: List of single-ended channels muxed for this ADC. + valid values: + "st,stm32h7-dfsdm" compatibility: 0 to 7. +- st,adc-channel-names: List of single-ended channel names. +- st,filter-order: SinC filter order from 0 to 5. + 0: FastSinC + [1-5]: order 1 to 5. + For audio purpose it is recommended to use order 3 to 5. +- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers". + +Required properties for "st,stm32-dfsdm-adc" compatibility: +- io-channels: From common IIO binding. Used to pipe external sigma delta + modulator or internal ADC output to DFSDM channel. + This is not required for "st,stm32-dfsdm-pdm" compatibility as + PDM microphone is binded in Audio DT node. + +Required properties for "st,stm32-dfsdm-pdm" compatibility: +- #sound-dai-cells: Must be set to 0. +- dma: DMA controller phandle and DMA request line associated to the + filter instance (specified by the field "reg") +- dma-names: Must be "rx" + +Optional properties: +- st,adc-channel-types: Single-ended channel input type. + - "SPI_R": SPI with data on rising edge (default) + - "SPI_F": SPI with data on falling edge + - "MANCH_R": manchester codec, rising edge = logic 0 + - "MANCH_F": manchester codec, falling edge = logic 1 +- st,adc-channel-clk-src: Conversion clock source. + - "CLKIN": external SPI clock (CLKIN x) + - "CLKOUT": internal SPI clock (CLKOUT) (default) + - "CLKOUT_F": internal SPI clock divided by 2 (falling edge). + - "CLKOUT_R": internal SPI clock divided by 2 (rising edge). + +- st,adc-alt-channel: Must be defined if two sigma delta modulator are + connected on same SPI input. + If not set, channel n is connected to SPI input n. + If set, channel n is connected to SPI input n + 1. + +- st,filter0-sync: Set to 1 to synchronize with DFSDM filter instance 0. + Used for multi microphones synchronization. + +Example of a sigma delta adc connected on DFSDM SPI port 0 +and a pdm microphone connected on DFSDM SPI port 1: + + ads1202: simple_sd_adc@0 { + compatible = "ads1202"; + #io-channel-cells = <1>; + }; + + dfsdm: dfsdm@40017000 { + compatible = "st,stm32h7-dfsdm"; + reg = <0x40017000 0x400>; + clocks = <&rcc DFSDM1_CK>; + clock-names = "dfsdm"; + #interrupt-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + + dfsdm_adc0: filter@0 { + compatible = "st,stm32-dfsdm-adc"; + #io-channel-cells = <1>; + reg = <0>; + interrupts = <110>; + st,adc-channels = <0>; + st,adc-channel-names = "sd_adc0"; + st,adc-channel-types = "SPI_F"; + st,adc-channel-clk-src = "CLKOUT"; + io-channels = <&ads1202 0>; + st,filter-order = <3>; + }; + dfsdm_pdm1: filter@1 { + compatible = "st,stm32-dfsdm-dmic"; + reg = <1>; + interrupts = <111>; + dmas = <&dmamux1 102 0x400 0x00>; + dma-names = "rx"; + st,adc-channels = <1>; + st,adc-channel-names = "dmic1"; + st,adc-channel-types = "SPI_R"; + st,adc-channel-clk-src = "CLKOUT"; + st,filter-order = <5>; + }; + } -- cgit v1.2.3 From bed73904e76fc08d0ec3a7ba3eb2ddbb2e38862c Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:10 +0100 Subject: IIO: ADC: add stm32 DFSDM core support Add driver for stm32 DFSDM pheripheral. Its converts a sigma delta stream in n bit samples through a low pass filter and an integrator. stm32-dfsdm-core driver is the core part supporting the filter instances dedicated to sigma-delta ADC or audio PDM microphone purpose. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/adc/Kconfig | 12 ++ drivers/iio/adc/Makefile | 1 + drivers/iio/adc/stm32-dfsdm-core.c | 309 ++++++++++++++++++++++++++++++++++++ drivers/iio/adc/stm32-dfsdm.h | 310 +++++++++++++++++++++++++++++++++++++ 4 files changed, 632 insertions(+) create mode 100644 drivers/iio/adc/stm32-dfsdm-core.c create mode 100644 drivers/iio/adc/stm32-dfsdm.h diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 5f9d04a8ba53..776192e68ad5 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -668,6 +668,18 @@ config STM32_ADC This driver can also be built as a module. If so, the module will be called stm32-adc. +config STM32_DFSDM_CORE + tristate "STMicroelectronics STM32 DFSDM core" + depends on (ARCH_STM32 && OF) || COMPILE_TEST + select REGMAP + select REGMAP_MMIO + help + Select this option to enable the driver for STMicroelectronics + STM32 digital filter for sigma delta converter. + + This driver can also be built as a module. If so, the module + will be called stm32-dfsdm-core. + config STX104 tristate "Apex Embedded Systems STX104 driver" depends on PC104 && X86 && ISA_BUS_API diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index fa5e7949f642..8dda348f1920 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_STX104) += stx104.o obj-$(CONFIG_SUN4I_GPADC) += sun4i-gpadc-iio.o obj-$(CONFIG_STM32_ADC_CORE) += stm32-adc-core.o obj-$(CONFIG_STM32_ADC) += stm32-adc.o +obj-$(CONFIG_STM32_DFSDM_CORE) += stm32-dfsdm-core.o obj-$(CONFIG_TI_ADC081C) += ti-adc081c.o obj-$(CONFIG_TI_ADC0832) += ti-adc0832.o obj-$(CONFIG_TI_ADC084S021) += ti-adc084s021.o diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c new file mode 100644 index 000000000000..72427414db7f --- /dev/null +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is part the core part STM32 DFSDM driver + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author(s): Arnaud Pouliquen for STMicroelectronics. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stm32-dfsdm.h" + +struct stm32_dfsdm_dev_data { + unsigned int num_filters; + unsigned int num_channels; + const struct regmap_config *regmap_cfg; +}; + +#define STM32H7_DFSDM_NUM_FILTERS 4 +#define STM32H7_DFSDM_NUM_CHANNELS 8 + +static bool stm32_dfsdm_volatile_reg(struct device *dev, unsigned int reg) +{ + if (reg < DFSDM_FILTER_BASE_ADR) + return false; + + /* + * Mask is done on register to avoid to list registers of all + * filter instances. + */ + switch (reg & DFSDM_FILTER_REG_MASK) { + case DFSDM_CR1(0) & DFSDM_FILTER_REG_MASK: + case DFSDM_ISR(0) & DFSDM_FILTER_REG_MASK: + case DFSDM_JDATAR(0) & DFSDM_FILTER_REG_MASK: + case DFSDM_RDATAR(0) & DFSDM_FILTER_REG_MASK: + return true; + } + + return false; +} + +static const struct regmap_config stm32h7_dfsdm_regmap_cfg = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = sizeof(u32), + .max_register = 0x2B8, + .volatile_reg = stm32_dfsdm_volatile_reg, + .fast_io = true, +}; + +static const struct stm32_dfsdm_dev_data stm32h7_dfsdm_data = { + .num_filters = STM32H7_DFSDM_NUM_FILTERS, + .num_channels = STM32H7_DFSDM_NUM_CHANNELS, + .regmap_cfg = &stm32h7_dfsdm_regmap_cfg, +}; + +struct dfsdm_priv { + struct platform_device *pdev; /* platform device */ + + struct stm32_dfsdm dfsdm; /* common data exported for all instances */ + + unsigned int spi_clk_out_div; /* SPI clkout divider value */ + atomic_t n_active_ch; /* number of current active channels */ + + struct clk *clk; /* DFSDM clock */ + struct clk *aclk; /* audio clock */ +}; + +/** + * stm32_dfsdm_start_dfsdm - start global dfsdm interface. + * + * Enable interface if n_active_ch is not null. + * @dfsdm: Handle used to retrieve dfsdm context. + */ +int stm32_dfsdm_start_dfsdm(struct stm32_dfsdm *dfsdm) +{ + struct dfsdm_priv *priv = container_of(dfsdm, struct dfsdm_priv, dfsdm); + struct device *dev = &priv->pdev->dev; + unsigned int clk_div = priv->spi_clk_out_div; + int ret; + + if (atomic_inc_return(&priv->n_active_ch) == 1) { + ret = clk_prepare_enable(priv->clk); + if (ret < 0) { + dev_err(dev, "Failed to start clock\n"); + goto error_ret; + } + if (priv->aclk) { + ret = clk_prepare_enable(priv->aclk); + if (ret < 0) { + dev_err(dev, "Failed to start audio clock\n"); + goto disable_clk; + } + } + + /* Output the SPI CLKOUT (if clk_div == 0 clock if OFF) */ + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), + DFSDM_CHCFGR1_CKOUTDIV_MASK, + DFSDM_CHCFGR1_CKOUTDIV(clk_div)); + if (ret < 0) + goto disable_aclk; + + /* Global enable of DFSDM interface */ + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), + DFSDM_CHCFGR1_DFSDMEN_MASK, + DFSDM_CHCFGR1_DFSDMEN(1)); + if (ret < 0) + goto disable_aclk; + } + + dev_dbg(dev, "%s: n_active_ch %d\n", __func__, + atomic_read(&priv->n_active_ch)); + + return 0; + +disable_aclk: + clk_disable_unprepare(priv->aclk); +disable_clk: + clk_disable_unprepare(priv->clk); + +error_ret: + atomic_dec(&priv->n_active_ch); + + return ret; +} +EXPORT_SYMBOL_GPL(stm32_dfsdm_start_dfsdm); + +/** + * stm32_dfsdm_stop_dfsdm - stop global DFSDM interface. + * + * Disable interface if n_active_ch is null + * @dfsdm: Handle used to retrieve dfsdm context. + */ +int stm32_dfsdm_stop_dfsdm(struct stm32_dfsdm *dfsdm) +{ + struct dfsdm_priv *priv = container_of(dfsdm, struct dfsdm_priv, dfsdm); + int ret; + + if (atomic_dec_and_test(&priv->n_active_ch)) { + /* Global disable of DFSDM interface */ + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), + DFSDM_CHCFGR1_DFSDMEN_MASK, + DFSDM_CHCFGR1_DFSDMEN(0)); + if (ret < 0) + return ret; + + /* Stop SPI CLKOUT */ + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(0), + DFSDM_CHCFGR1_CKOUTDIV_MASK, + DFSDM_CHCFGR1_CKOUTDIV(0)); + if (ret < 0) + return ret; + + clk_disable_unprepare(priv->clk); + if (priv->aclk) + clk_disable_unprepare(priv->aclk); + } + dev_dbg(&priv->pdev->dev, "%s: n_active_ch %d\n", __func__, + atomic_read(&priv->n_active_ch)); + + return 0; +} +EXPORT_SYMBOL_GPL(stm32_dfsdm_stop_dfsdm); + +static int stm32_dfsdm_parse_of(struct platform_device *pdev, + struct dfsdm_priv *priv) +{ + struct device_node *node = pdev->dev.of_node; + struct resource *res; + unsigned long clk_freq; + unsigned int spi_freq, rem; + int ret; + + if (!node) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get memory resource\n"); + return -ENODEV; + } + priv->dfsdm.phys_base = res->start; + priv->dfsdm.base = devm_ioremap_resource(&pdev->dev, res); + + /* + * "dfsdm" clock is mandatory for DFSDM peripheral clocking. + * "dfsdm" or "audio" clocks can be used as source clock for + * the SPI clock out signal and internal processing, depending + * on use case. + */ + priv->clk = devm_clk_get(&pdev->dev, "dfsdm"); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "No stm32_dfsdm_clk clock found\n"); + return -EINVAL; + } + + priv->aclk = devm_clk_get(&pdev->dev, "audio"); + if (IS_ERR(priv->aclk)) + priv->aclk = NULL; + + if (priv->aclk) + clk_freq = clk_get_rate(priv->aclk); + else + clk_freq = clk_get_rate(priv->clk); + + /* SPI clock out frequency */ + ret = of_property_read_u32(pdev->dev.of_node, "spi-max-frequency", + &spi_freq); + if (ret < 0) { + /* No SPI master mode */ + return 0; + } + + priv->spi_clk_out_div = div_u64_rem(clk_freq, spi_freq, &rem) - 1; + priv->dfsdm.spi_master_freq = spi_freq; + + if (rem) { + dev_warn(&pdev->dev, "SPI clock not accurate\n"); + dev_warn(&pdev->dev, "%ld = %d * %d + %d\n", + clk_freq, spi_freq, priv->spi_clk_out_div + 1, rem); + } + + return 0; +}; + +static const struct of_device_id stm32_dfsdm_of_match[] = { + { + .compatible = "st,stm32h7-dfsdm", + .data = &stm32h7_dfsdm_data, + }, + {} +}; +MODULE_DEVICE_TABLE(of, stm32_dfsdm_of_match); + +static int stm32_dfsdm_probe(struct platform_device *pdev) +{ + struct dfsdm_priv *priv; + struct device_node *pnode = pdev->dev.of_node; + const struct of_device_id *of_id; + const struct stm32_dfsdm_dev_data *dev_data; + struct stm32_dfsdm *dfsdm; + int ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->pdev = pdev; + + of_id = of_match_node(stm32_dfsdm_of_match, pnode); + if (!of_id->data) { + dev_err(&pdev->dev, "Data associated to device is missing\n"); + return -EINVAL; + } + + dev_data = (const struct stm32_dfsdm_dev_data *)of_id->data; + dfsdm = &priv->dfsdm; + dfsdm->fl_list = devm_kcalloc(&pdev->dev, dev_data->num_filters, + sizeof(*dfsdm->fl_list), GFP_KERNEL); + if (!dfsdm->fl_list) + return -ENOMEM; + + dfsdm->num_fls = dev_data->num_filters; + dfsdm->ch_list = devm_kcalloc(&pdev->dev, dev_data->num_channels, + sizeof(*dfsdm->ch_list), + GFP_KERNEL); + if (!dfsdm->ch_list) + return -ENOMEM; + dfsdm->num_chs = dev_data->num_channels; + + ret = stm32_dfsdm_parse_of(pdev, priv); + if (ret < 0) + return ret; + + dfsdm->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "dfsdm", + dfsdm->base, + &stm32h7_dfsdm_regmap_cfg); + if (IS_ERR(dfsdm->regmap)) { + ret = PTR_ERR(dfsdm->regmap); + dev_err(&pdev->dev, "%s: Failed to allocate regmap: %d\n", + __func__, ret); + return ret; + } + + platform_set_drvdata(pdev, dfsdm); + + return devm_of_platform_populate(&pdev->dev); +} + +static struct platform_driver stm32_dfsdm_driver = { + .probe = stm32_dfsdm_probe, + .driver = { + .name = "stm32-dfsdm", + .of_match_table = stm32_dfsdm_of_match, + }, +}; + +module_platform_driver(stm32_dfsdm_driver); + +MODULE_AUTHOR("Arnaud Pouliquen "); +MODULE_DESCRIPTION("STMicroelectronics STM32 dfsdm driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/adc/stm32-dfsdm.h b/drivers/iio/adc/stm32-dfsdm.h new file mode 100644 index 000000000000..8708394b0725 --- /dev/null +++ b/drivers/iio/adc/stm32-dfsdm.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is part of STM32 DFSDM driver + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author(s): Arnaud Pouliquen . + */ + +#ifndef MDF_STM32_DFSDM__H +#define MDF_STM32_DFSDM__H + +#include + +/* + * STM32 DFSDM - global register map + * ________________________________________________________ + * | Offset | Registers block | + * -------------------------------------------------------- + * | 0x000 | CHANNEL 0 + COMMON CHANNEL FIELDS | + * -------------------------------------------------------- + * | 0x020 | CHANNEL 1 | + * -------------------------------------------------------- + * | ... | ..... | + * -------------------------------------------------------- + * | 0x0E0 | CHANNEL 7 | + * -------------------------------------------------------- + * | 0x100 | FILTER 0 + COMMON FILTER FIELDs | + * -------------------------------------------------------- + * | 0x200 | FILTER 1 | + * -------------------------------------------------------- + * | 0x300 | FILTER 2 | + * -------------------------------------------------------- + * | 0x400 | FILTER 3 | + * -------------------------------------------------------- + */ + +/* + * Channels register definitions + */ +#define DFSDM_CHCFGR1(y) ((y) * 0x20 + 0x00) +#define DFSDM_CHCFGR2(y) ((y) * 0x20 + 0x04) +#define DFSDM_AWSCDR(y) ((y) * 0x20 + 0x08) +#define DFSDM_CHWDATR(y) ((y) * 0x20 + 0x0C) +#define DFSDM_CHDATINR(y) ((y) * 0x20 + 0x10) + +/* CHCFGR1: Channel configuration register 1 */ +#define DFSDM_CHCFGR1_SITP_MASK GENMASK(1, 0) +#define DFSDM_CHCFGR1_SITP(v) FIELD_PREP(DFSDM_CHCFGR1_SITP_MASK, v) +#define DFSDM_CHCFGR1_SPICKSEL_MASK GENMASK(3, 2) +#define DFSDM_CHCFGR1_SPICKSEL(v) FIELD_PREP(DFSDM_CHCFGR1_SPICKSEL_MASK, v) +#define DFSDM_CHCFGR1_SCDEN_MASK BIT(5) +#define DFSDM_CHCFGR1_SCDEN(v) FIELD_PREP(DFSDM_CHCFGR1_SCDEN_MASK, v) +#define DFSDM_CHCFGR1_CKABEN_MASK BIT(6) +#define DFSDM_CHCFGR1_CKABEN(v) FIELD_PREP(DFSDM_CHCFGR1_CKABEN_MASK, v) +#define DFSDM_CHCFGR1_CHEN_MASK BIT(7) +#define DFSDM_CHCFGR1_CHEN(v) FIELD_PREP(DFSDM_CHCFGR1_CHEN_MASK, v) +#define DFSDM_CHCFGR1_CHINSEL_MASK BIT(8) +#define DFSDM_CHCFGR1_CHINSEL(v) FIELD_PREP(DFSDM_CHCFGR1_CHINSEL_MASK, v) +#define DFSDM_CHCFGR1_DATMPX_MASK GENMASK(13, 12) +#define DFSDM_CHCFGR1_DATMPX(v) FIELD_PREP(DFSDM_CHCFGR1_DATMPX_MASK, v) +#define DFSDM_CHCFGR1_DATPACK_MASK GENMASK(15, 14) +#define DFSDM_CHCFGR1_DATPACK(v) FIELD_PREP(DFSDM_CHCFGR1_DATPACK_MASK, v) +#define DFSDM_CHCFGR1_CKOUTDIV_MASK GENMASK(23, 16) +#define DFSDM_CHCFGR1_CKOUTDIV(v) FIELD_PREP(DFSDM_CHCFGR1_CKOUTDIV_MASK, v) +#define DFSDM_CHCFGR1_CKOUTSRC_MASK BIT(30) +#define DFSDM_CHCFGR1_CKOUTSRC(v) FIELD_PREP(DFSDM_CHCFGR1_CKOUTSRC_MASK, v) +#define DFSDM_CHCFGR1_DFSDMEN_MASK BIT(31) +#define DFSDM_CHCFGR1_DFSDMEN(v) FIELD_PREP(DFSDM_CHCFGR1_DFSDMEN_MASK, v) + +/* CHCFGR2: Channel configuration register 2 */ +#define DFSDM_CHCFGR2_DTRBS_MASK GENMASK(7, 3) +#define DFSDM_CHCFGR2_DTRBS(v) FIELD_PREP(DFSDM_CHCFGR2_DTRBS_MASK, v) +#define DFSDM_CHCFGR2_OFFSET_MASK GENMASK(31, 8) +#define DFSDM_CHCFGR2_OFFSET(v) FIELD_PREP(DFSDM_CHCFGR2_OFFSET_MASK, v) + +/* AWSCDR: Channel analog watchdog and short circuit detector */ +#define DFSDM_AWSCDR_SCDT_MASK GENMASK(7, 0) +#define DFSDM_AWSCDR_SCDT(v) FIELD_PREP(DFSDM_AWSCDR_SCDT_MASK, v) +#define DFSDM_AWSCDR_BKSCD_MASK GENMASK(15, 12) +#define DFSDM_AWSCDR_BKSCD(v) FIELD_PREP(DFSDM_AWSCDR_BKSCD_MASK, v) +#define DFSDM_AWSCDR_AWFOSR_MASK GENMASK(20, 16) +#define DFSDM_AWSCDR_AWFOSR(v) FIELD_PREP(DFSDM_AWSCDR_AWFOSR_MASK, v) +#define DFSDM_AWSCDR_AWFORD_MASK GENMASK(23, 22) +#define DFSDM_AWSCDR_AWFORD(v) FIELD_PREP(DFSDM_AWSCDR_AWFORD_MASK, v) + +/* + * Filters register definitions + */ +#define DFSDM_FILTER_BASE_ADR 0x100 +#define DFSDM_FILTER_REG_MASK 0x7F +#define DFSDM_FILTER_X_BASE_ADR(x) ((x) * 0x80 + DFSDM_FILTER_BASE_ADR) + +#define DFSDM_CR1(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x00) +#define DFSDM_CR2(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x04) +#define DFSDM_ISR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x08) +#define DFSDM_ICR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x0C) +#define DFSDM_JCHGR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x10) +#define DFSDM_FCR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x14) +#define DFSDM_JDATAR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x18) +#define DFSDM_RDATAR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x1C) +#define DFSDM_AWHTR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x20) +#define DFSDM_AWLTR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x24) +#define DFSDM_AWSR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x28) +#define DFSDM_AWCFR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x2C) +#define DFSDM_EXMAX(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x30) +#define DFSDM_EXMIN(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x34) +#define DFSDM_CNVTIMR(x) (DFSDM_FILTER_X_BASE_ADR(x) + 0x38) + +/* CR1 Control register 1 */ +#define DFSDM_CR1_DFEN_MASK BIT(0) +#define DFSDM_CR1_DFEN(v) FIELD_PREP(DFSDM_CR1_DFEN_MASK, v) +#define DFSDM_CR1_JSWSTART_MASK BIT(1) +#define DFSDM_CR1_JSWSTART(v) FIELD_PREP(DFSDM_CR1_JSWSTART_MASK, v) +#define DFSDM_CR1_JSYNC_MASK BIT(3) +#define DFSDM_CR1_JSYNC(v) FIELD_PREP(DFSDM_CR1_JSYNC_MASK, v) +#define DFSDM_CR1_JSCAN_MASK BIT(4) +#define DFSDM_CR1_JSCAN(v) FIELD_PREP(DFSDM_CR1_JSCAN_MASK, v) +#define DFSDM_CR1_JDMAEN_MASK BIT(5) +#define DFSDM_CR1_JDMAEN(v) FIELD_PREP(DFSDM_CR1_JDMAEN_MASK, v) +#define DFSDM_CR1_JEXTSEL_MASK GENMASK(12, 8) +#define DFSDM_CR1_JEXTSEL(v) FIELD_PREP(DFSDM_CR1_JEXTSEL_MASK, v) +#define DFSDM_CR1_JEXTEN_MASK GENMASK(14, 13) +#define DFSDM_CR1_JEXTEN(v) FIELD_PREP(DFSDM_CR1_JEXTEN_MASK, v) +#define DFSDM_CR1_RSWSTART_MASK BIT(17) +#define DFSDM_CR1_RSWSTART(v) FIELD_PREP(DFSDM_CR1_RSWSTART_MASK, v) +#define DFSDM_CR1_RCONT_MASK BIT(18) +#define DFSDM_CR1_RCONT(v) FIELD_PREP(DFSDM_CR1_RCONT_MASK, v) +#define DFSDM_CR1_RSYNC_MASK BIT(19) +#define DFSDM_CR1_RSYNC(v) FIELD_PREP(DFSDM_CR1_RSYNC_MASK, v) +#define DFSDM_CR1_RDMAEN_MASK BIT(21) +#define DFSDM_CR1_RDMAEN(v) FIELD_PREP(DFSDM_CR1_RDMAEN_MASK, v) +#define DFSDM_CR1_RCH_MASK GENMASK(26, 24) +#define DFSDM_CR1_RCH(v) FIELD_PREP(DFSDM_CR1_RCH_MASK, v) +#define DFSDM_CR1_FAST_MASK BIT(29) +#define DFSDM_CR1_FAST(v) FIELD_PREP(DFSDM_CR1_FAST_MASK, v) +#define DFSDM_CR1_AWFSEL_MASK BIT(30) +#define DFSDM_CR1_AWFSEL(v) FIELD_PREP(DFSDM_CR1_AWFSEL_MASK, v) + +/* CR2: Control register 2 */ +#define DFSDM_CR2_IE_MASK GENMASK(6, 0) +#define DFSDM_CR2_IE(v) FIELD_PREP(DFSDM_CR2_IE_MASK, v) +#define DFSDM_CR2_JEOCIE_MASK BIT(0) +#define DFSDM_CR2_JEOCIE(v) FIELD_PREP(DFSDM_CR2_JEOCIE_MASK, v) +#define DFSDM_CR2_REOCIE_MASK BIT(1) +#define DFSDM_CR2_REOCIE(v) FIELD_PREP(DFSDM_CR2_REOCIE_MASK, v) +#define DFSDM_CR2_JOVRIE_MASK BIT(2) +#define DFSDM_CR2_JOVRIE(v) FIELD_PREP(DFSDM_CR2_JOVRIE_MASK, v) +#define DFSDM_CR2_ROVRIE_MASK BIT(3) +#define DFSDM_CR2_ROVRIE(v) FIELD_PREP(DFSDM_CR2_ROVRIE_MASK, v) +#define DFSDM_CR2_AWDIE_MASK BIT(4) +#define DFSDM_CR2_AWDIE(v) FIELD_PREP(DFSDM_CR2_AWDIE_MASK, v) +#define DFSDM_CR2_SCDIE_MASK BIT(5) +#define DFSDM_CR2_SCDIE(v) FIELD_PREP(DFSDM_CR2_SCDIE_MASK, v) +#define DFSDM_CR2_CKABIE_MASK BIT(6) +#define DFSDM_CR2_CKABIE(v) FIELD_PREP(DFSDM_CR2_CKABIE_MASK, v) +#define DFSDM_CR2_EXCH_MASK GENMASK(15, 8) +#define DFSDM_CR2_EXCH(v) FIELD_PREP(DFSDM_CR2_EXCH_MASK, v) +#define DFSDM_CR2_AWDCH_MASK GENMASK(23, 16) +#define DFSDM_CR2_AWDCH(v) FIELD_PREP(DFSDM_CR2_AWDCH_MASK, v) + +/* ISR: Interrupt status register */ +#define DFSDM_ISR_JEOCF_MASK BIT(0) +#define DFSDM_ISR_JEOCF(v) FIELD_PREP(DFSDM_ISR_JEOCF_MASK, v) +#define DFSDM_ISR_REOCF_MASK BIT(1) +#define DFSDM_ISR_REOCF(v) FIELD_PREP(DFSDM_ISR_REOCF_MASK, v) +#define DFSDM_ISR_JOVRF_MASK BIT(2) +#define DFSDM_ISR_JOVRF(v) FIELD_PREP(DFSDM_ISR_JOVRF_MASK, v) +#define DFSDM_ISR_ROVRF_MASK BIT(3) +#define DFSDM_ISR_ROVRF(v) FIELD_PREP(DFSDM_ISR_ROVRF_MASK, v) +#define DFSDM_ISR_AWDF_MASK BIT(4) +#define DFSDM_ISR_AWDF(v) FIELD_PREP(DFSDM_ISR_AWDF_MASK, v) +#define DFSDM_ISR_JCIP_MASK BIT(13) +#define DFSDM_ISR_JCIP(v) FIELD_PREP(DFSDM_ISR_JCIP_MASK, v) +#define DFSDM_ISR_RCIP_MASK BIT(14) +#define DFSDM_ISR_RCIP(v) FIELD_PREP(DFSDM_ISR_RCIP, v) +#define DFSDM_ISR_CKABF_MASK GENMASK(23, 16) +#define DFSDM_ISR_CKABF(v) FIELD_PREP(DFSDM_ISR_CKABF_MASK, v) +#define DFSDM_ISR_SCDF_MASK GENMASK(31, 24) +#define DFSDM_ISR_SCDF(v) FIELD_PREP(DFSDM_ISR_SCDF_MASK, v) + +/* ICR: Interrupt flag clear register */ +#define DFSDM_ICR_CLRJOVRF_MASK BIT(2) +#define DFSDM_ICR_CLRJOVRF(v) FIELD_PREP(DFSDM_ICR_CLRJOVRF_MASK, v) +#define DFSDM_ICR_CLRROVRF_MASK BIT(3) +#define DFSDM_ICR_CLRROVRF(v) FIELD_PREP(DFSDM_ICR_CLRROVRF_MASK, v) +#define DFSDM_ICR_CLRCKABF_MASK GENMASK(23, 16) +#define DFSDM_ICR_CLRCKABF(v) FIELD_PREP(DFSDM_ICR_CLRCKABF_MASK, v) +#define DFSDM_ICR_CLRCKABF_CH_MASK(y) BIT(16 + (y)) +#define DFSDM_ICR_CLRCKABF_CH(v, y) \ + (((v) << (16 + (y))) & DFSDM_ICR_CLRCKABF_CH_MASK(y)) +#define DFSDM_ICR_CLRSCDF_MASK GENMASK(31, 24) +#define DFSDM_ICR_CLRSCDF(v) FIELD_PREP(DFSDM_ICR_CLRSCDF_MASK, v) +#define DFSDM_ICR_CLRSCDF_CH_MASK(y) BIT(24 + (y)) +#define DFSDM_ICR_CLRSCDF_CH(v, y) \ + (((v) << (24 + (y))) & DFSDM_ICR_CLRSCDF_MASK(y)) + +/* FCR: Filter control register */ +#define DFSDM_FCR_IOSR_MASK GENMASK(7, 0) +#define DFSDM_FCR_IOSR(v) FIELD_PREP(DFSDM_FCR_IOSR_MASK, v) +#define DFSDM_FCR_FOSR_MASK GENMASK(25, 16) +#define DFSDM_FCR_FOSR(v) FIELD_PREP(DFSDM_FCR_FOSR_MASK, v) +#define DFSDM_FCR_FORD_MASK GENMASK(31, 29) +#define DFSDM_FCR_FORD(v) FIELD_PREP(DFSDM_FCR_FORD_MASK, v) + +/* RDATAR: Filter data register for regular channel */ +#define DFSDM_DATAR_CH_MASK GENMASK(2, 0) +#define DFSDM_DATAR_DATA_OFFSET 8 +#define DFSDM_DATAR_DATA_MASK GENMASK(31, DFSDM_DATAR_DATA_OFFSET) + +/* AWLTR: Filter analog watchdog low threshold register */ +#define DFSDM_AWLTR_BKAWL_MASK GENMASK(3, 0) +#define DFSDM_AWLTR_BKAWL(v) FIELD_PREP(DFSDM_AWLTR_BKAWL_MASK, v) +#define DFSDM_AWLTR_AWLT_MASK GENMASK(31, 8) +#define DFSDM_AWLTR_AWLT(v) FIELD_PREP(DFSDM_AWLTR_AWLT_MASK, v) + +/* AWHTR: Filter analog watchdog low threshold register */ +#define DFSDM_AWHTR_BKAWH_MASK GENMASK(3, 0) +#define DFSDM_AWHTR_BKAWH(v) FIELD_PREP(DFSDM_AWHTR_BKAWH_MASK, v) +#define DFSDM_AWHTR_AWHT_MASK GENMASK(31, 8) +#define DFSDM_AWHTR_AWHT(v) FIELD_PREP(DFSDM_AWHTR_AWHT_MASK, v) + +/* AWSR: Filter watchdog status register */ +#define DFSDM_AWSR_AWLTF_MASK GENMASK(7, 0) +#define DFSDM_AWSR_AWLTF(v) FIELD_PREP(DFSDM_AWSR_AWLTF_MASK, v) +#define DFSDM_AWSR_AWHTF_MASK GENMASK(15, 8) +#define DFSDM_AWSR_AWHTF(v) FIELD_PREP(DFSDM_AWSR_AWHTF_MASK, v) + +/* AWCFR: Filter watchdog status register */ +#define DFSDM_AWCFR_AWLTF_MASK GENMASK(7, 0) +#define DFSDM_AWCFR_AWLTF(v) FIELD_PREP(DFSDM_AWCFR_AWLTF_MASK, v) +#define DFSDM_AWCFR_AWHTF_MASK GENMASK(15, 8) +#define DFSDM_AWCFR_AWHTF(v) FIELD_PREP(DFSDM_AWCFR_AWHTF_MASK, v) + +/* DFSDM filter order */ +enum stm32_dfsdm_sinc_order { + DFSDM_FASTSINC_ORDER, /* FastSinc filter type */ + DFSDM_SINC1_ORDER, /* Sinc 1 filter type */ + DFSDM_SINC2_ORDER, /* Sinc 2 filter type */ + DFSDM_SINC3_ORDER, /* Sinc 3 filter type */ + DFSDM_SINC4_ORDER, /* Sinc 4 filter type (N.A. for watchdog) */ + DFSDM_SINC5_ORDER, /* Sinc 5 filter type (N.A. for watchdog) */ + DFSDM_NB_SINC_ORDER, +}; + +/** + * struct stm32_dfsdm_filter - structure relative to stm32 FDSDM filter + * @iosr: integrator oversampling + * @fosr: filter oversampling + * @ford: filter order + * @res: output sample resolution + * @sync_mode: filter synchronized with filter 0 + * @fast: filter fast mode + */ +struct stm32_dfsdm_filter { + unsigned int iosr; + unsigned int fosr; + enum stm32_dfsdm_sinc_order ford; + u64 res; + unsigned int sync_mode; + unsigned int fast; +}; + +/** + * struct stm32_dfsdm_channel - structure relative to stm32 FDSDM channel + * @id: id of the channel + * @type: interface type linked to stm32_dfsdm_chan_type + * @src: interface type linked to stm32_dfsdm_chan_src + * @alt_si: alternative serial input interface + */ +struct stm32_dfsdm_channel { + unsigned int id; + unsigned int type; + unsigned int src; + unsigned int alt_si; +}; + +/** + * struct stm32_dfsdm - stm32 FDSDM driver common data (for all instances) + * @base: control registers base cpu addr + * @phys_base: DFSDM IP register physical address + * @regmap: regmap for register read/write + * @fl_list: filter resources list + * @num_fls: number of filter resources available + * @ch_list: channel resources list + * @num_chs: number of channel resources available + * @spi_master_freq: SPI clock out frequency + */ +struct stm32_dfsdm { + void __iomem *base; + phys_addr_t phys_base; + struct regmap *regmap; + struct stm32_dfsdm_filter *fl_list; + unsigned int num_fls; + struct stm32_dfsdm_channel *ch_list; + unsigned int num_chs; + unsigned int spi_master_freq; +}; + +/* DFSDM channel serial spi clock source */ +enum stm32_dfsdm_spi_clk_src { + DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL, + DFSDM_CHANNEL_SPI_CLOCK_INTERNAL, + DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_FALLING, + DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_RISING +}; + +int stm32_dfsdm_start_dfsdm(struct stm32_dfsdm *dfsdm); +int stm32_dfsdm_stop_dfsdm(struct stm32_dfsdm *dfsdm); + +#endif -- cgit v1.2.3 From e2e6771c646251657fafb2a78db566d5c2c70635 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:11 +0100 Subject: IIO: ADC: add STM32 DFSDM sigma delta ADC support Add DFSDM driver to handle sigma delta ADC. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/adc/Kconfig | 13 + drivers/iio/adc/Makefile | 1 + drivers/iio/adc/stm32-dfsdm-adc.c | 728 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 742 insertions(+) create mode 100644 drivers/iio/adc/stm32-dfsdm-adc.c diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 776192e68ad5..39e3b345a6c8 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -680,6 +680,19 @@ config STM32_DFSDM_CORE This driver can also be built as a module. If so, the module will be called stm32-dfsdm-core. +config STM32_DFSDM_ADC + tristate "STMicroelectronics STM32 dfsdm adc" + depends on (ARCH_STM32 && OF) || COMPILE_TEST + select STM32_DFSDM_CORE + select REGMAP_MMIO + select IIO_BUFFER_HW_CONSUMER + help + Select this option to support ADCSigma delta modulator for + STMicroelectronics STM32 digital filter for sigma delta converter. + + This driver can also be built as a module. If so, the module + will be called stm32-dfsdm-adc. + config STX104 tristate "Apex Embedded Systems STX104 driver" depends on PC104 && X86 && ISA_BUS_API diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile index 8dda348f1920..28a9423997f3 100644 --- a/drivers/iio/adc/Makefile +++ b/drivers/iio/adc/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_SUN4I_GPADC) += sun4i-gpadc-iio.o obj-$(CONFIG_STM32_ADC_CORE) += stm32-adc-core.o obj-$(CONFIG_STM32_ADC) += stm32-adc.o obj-$(CONFIG_STM32_DFSDM_CORE) += stm32-dfsdm-core.o +obj-$(CONFIG_STM32_DFSDM_ADC) += stm32-dfsdm-adc.o obj-$(CONFIG_TI_ADC081C) += ti-adc081c.o obj-$(CONFIG_TI_ADC0832) += ti-adc0832.o obj-$(CONFIG_TI_ADC084S021) += ti-adc084s021.o diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c new file mode 100644 index 000000000000..68b5920e92cb --- /dev/null +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is the ADC part of the STM32 DFSDM driver + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author: Arnaud Pouliquen . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stm32-dfsdm.h" + +/* Conversion timeout */ +#define DFSDM_TIMEOUT_US 100000 +#define DFSDM_TIMEOUT (msecs_to_jiffies(DFSDM_TIMEOUT_US / 1000)) + +/* Oversampling attribute default */ +#define DFSDM_DEFAULT_OVERSAMPLING 100 + +/* Oversampling max values */ +#define DFSDM_MAX_INT_OVERSAMPLING 256 +#define DFSDM_MAX_FL_OVERSAMPLING 1024 + +/* Max sample resolutions */ +#define DFSDM_MAX_RES BIT(31) +#define DFSDM_DATA_RES BIT(23) + +enum sd_converter_type { + DFSDM_AUDIO, + DFSDM_IIO, +}; + +struct stm32_dfsdm_dev_data { + int type; + int (*init)(struct iio_dev *indio_dev); + unsigned int num_channels; + const struct regmap_config *regmap_cfg; +}; + +struct stm32_dfsdm_adc { + struct stm32_dfsdm *dfsdm; + const struct stm32_dfsdm_dev_data *dev_data; + unsigned int fl_id; + unsigned int ch_id; + + /* ADC specific */ + unsigned int oversamp; + struct iio_hw_consumer *hwc; + struct completion completion; + u32 *buffer; + +}; + +struct stm32_dfsdm_str2field { + const char *name; + unsigned int val; +}; + +/* DFSDM channel serial interface type */ +static const struct stm32_dfsdm_str2field stm32_dfsdm_chan_type[] = { + { "SPI_R", 0 }, /* SPI with data on rising edge */ + { "SPI_F", 1 }, /* SPI with data on falling edge */ + { "MANCH_R", 2 }, /* Manchester codec, rising edge = logic 0 */ + { "MANCH_F", 3 }, /* Manchester codec, falling edge = logic 1 */ + {}, +}; + +/* DFSDM channel clock source */ +static const struct stm32_dfsdm_str2field stm32_dfsdm_chan_src[] = { + /* External SPI clock (CLKIN x) */ + { "CLKIN", DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL }, + /* Internal SPI clock (CLKOUT) */ + { "CLKOUT", DFSDM_CHANNEL_SPI_CLOCK_INTERNAL }, + /* Internal SPI clock divided by 2 (falling edge) */ + { "CLKOUT_F", DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_FALLING }, + /* Internal SPI clock divided by 2 (falling edge) */ + { "CLKOUT_R", DFSDM_CHANNEL_SPI_CLOCK_INTERNAL_DIV2_RISING }, + {}, +}; + +static int stm32_dfsdm_str2val(const char *str, + const struct stm32_dfsdm_str2field *list) +{ + const struct stm32_dfsdm_str2field *p = list; + + for (p = list; p && p->name; p++) + if (!strcmp(p->name, str)) + return p->val; + + return -EINVAL; +} + +static int stm32_dfsdm_set_osrs(struct stm32_dfsdm_filter *fl, + unsigned int fast, unsigned int oversamp) +{ + unsigned int i, d, fosr, iosr; + u64 res; + s64 delta; + unsigned int m = 1; /* multiplication factor */ + unsigned int p = fl->ford; /* filter order (ford) */ + + pr_debug("%s: Requested oversampling: %d\n", __func__, oversamp); + /* + * This function tries to compute filter oversampling and integrator + * oversampling, base on oversampling ratio requested by user. + * + * Decimation d depends on the filter order and the oversampling ratios. + * ford: filter order + * fosr: filter over sampling ratio + * iosr: integrator over sampling ratio + */ + if (fl->ford == DFSDM_FASTSINC_ORDER) { + m = 2; + p = 2; + } + + /* + * Look for filter and integrator oversampling ratios which allows + * to reach 24 bits data output resolution. + * Leave as soon as if exact resolution if reached. + * Otherwise the higher resolution below 32 bits is kept. + */ + for (fosr = 1; fosr <= DFSDM_MAX_FL_OVERSAMPLING; fosr++) { + for (iosr = 1; iosr <= DFSDM_MAX_INT_OVERSAMPLING; iosr++) { + if (fast) + d = fosr * iosr; + else if (fl->ford == DFSDM_FASTSINC_ORDER) + d = fosr * (iosr + 3) + 2; + else + d = fosr * (iosr - 1 + p) + p; + + if (d > oversamp) + break; + else if (d != oversamp) + continue; + /* + * Check resolution (limited to signed 32 bits) + * res <= 2^31 + * Sincx filters: + * res = m * fosr^p x iosr (with m=1, p=ford) + * FastSinc filter + * res = m * fosr^p x iosr (with m=2, p=2) + */ + res = fosr; + for (i = p - 1; i > 0; i--) { + res = res * (u64)fosr; + if (res > DFSDM_MAX_RES) + break; + } + if (res > DFSDM_MAX_RES) + continue; + res = res * (u64)m * (u64)iosr; + if (res > DFSDM_MAX_RES) + continue; + + delta = res - DFSDM_DATA_RES; + + if (res >= fl->res) { + fl->res = res; + fl->fosr = fosr; + fl->iosr = iosr; + fl->fast = fast; + pr_debug("%s: fosr = %d, iosr = %d\n", + __func__, fl->fosr, fl->iosr); + } + + if (!delta) + return 0; + } + } + + if (!fl->fosr) + return -EINVAL; + + return 0; +} + +static int stm32_dfsdm_start_channel(struct stm32_dfsdm *dfsdm, + unsigned int ch_id) +{ + return regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(ch_id), + DFSDM_CHCFGR1_CHEN_MASK, + DFSDM_CHCFGR1_CHEN(1)); +} + +static void stm32_dfsdm_stop_channel(struct stm32_dfsdm *dfsdm, + unsigned int ch_id) +{ + regmap_update_bits(dfsdm->regmap, DFSDM_CHCFGR1(ch_id), + DFSDM_CHCFGR1_CHEN_MASK, DFSDM_CHCFGR1_CHEN(0)); +} + +static int stm32_dfsdm_chan_configure(struct stm32_dfsdm *dfsdm, + struct stm32_dfsdm_channel *ch) +{ + unsigned int id = ch->id; + struct regmap *regmap = dfsdm->regmap; + int ret; + + ret = regmap_update_bits(regmap, DFSDM_CHCFGR1(id), + DFSDM_CHCFGR1_SITP_MASK, + DFSDM_CHCFGR1_SITP(ch->type)); + if (ret < 0) + return ret; + ret = regmap_update_bits(regmap, DFSDM_CHCFGR1(id), + DFSDM_CHCFGR1_SPICKSEL_MASK, + DFSDM_CHCFGR1_SPICKSEL(ch->src)); + if (ret < 0) + return ret; + return regmap_update_bits(regmap, DFSDM_CHCFGR1(id), + DFSDM_CHCFGR1_CHINSEL_MASK, + DFSDM_CHCFGR1_CHINSEL(ch->alt_si)); +} + +static int stm32_dfsdm_start_filter(struct stm32_dfsdm *dfsdm, + unsigned int fl_id) +{ + int ret; + + /* Enable filter */ + ret = regmap_update_bits(dfsdm->regmap, DFSDM_CR1(fl_id), + DFSDM_CR1_DFEN_MASK, DFSDM_CR1_DFEN(1)); + if (ret < 0) + return ret; + + /* Start conversion */ + return regmap_update_bits(dfsdm->regmap, DFSDM_CR1(fl_id), + DFSDM_CR1_RSWSTART_MASK, + DFSDM_CR1_RSWSTART(1)); +} + +void stm32_dfsdm_stop_filter(struct stm32_dfsdm *dfsdm, unsigned int fl_id) +{ + /* Disable conversion */ + regmap_update_bits(dfsdm->regmap, DFSDM_CR1(fl_id), + DFSDM_CR1_DFEN_MASK, DFSDM_CR1_DFEN(0)); +} + +static int stm32_dfsdm_filter_configure(struct stm32_dfsdm *dfsdm, + unsigned int fl_id, unsigned int ch_id) +{ + struct regmap *regmap = dfsdm->regmap; + struct stm32_dfsdm_filter *fl = &dfsdm->fl_list[fl_id]; + int ret; + + /* Average integrator oversampling */ + ret = regmap_update_bits(regmap, DFSDM_FCR(fl_id), DFSDM_FCR_IOSR_MASK, + DFSDM_FCR_IOSR(fl->iosr - 1)); + if (ret) + return ret; + + /* Filter order and Oversampling */ + ret = regmap_update_bits(regmap, DFSDM_FCR(fl_id), DFSDM_FCR_FOSR_MASK, + DFSDM_FCR_FOSR(fl->fosr - 1)); + if (ret) + return ret; + + ret = regmap_update_bits(regmap, DFSDM_FCR(fl_id), DFSDM_FCR_FORD_MASK, + DFSDM_FCR_FORD(fl->ford)); + if (ret) + return ret; + + /* No scan mode supported for the moment */ + ret = regmap_update_bits(regmap, DFSDM_CR1(fl_id), DFSDM_CR1_RCH_MASK, + DFSDM_CR1_RCH(ch_id)); + if (ret) + return ret; + + return regmap_update_bits(regmap, DFSDM_CR1(fl_id), + DFSDM_CR1_RSYNC_MASK, + DFSDM_CR1_RSYNC(fl->sync_mode)); +} + +int stm32_dfsdm_channel_parse_of(struct stm32_dfsdm *dfsdm, + struct iio_dev *indio_dev, + struct iio_chan_spec *ch) +{ + struct stm32_dfsdm_channel *df_ch; + const char *of_str; + int chan_idx = ch->scan_index; + int ret, val; + + ret = of_property_read_u32_index(indio_dev->dev.of_node, + "st,adc-channels", chan_idx, + &ch->channel); + if (ret < 0) { + dev_err(&indio_dev->dev, + " Error parsing 'st,adc-channels' for idx %d\n", + chan_idx); + return ret; + } + if (ch->channel >= dfsdm->num_chs) { + dev_err(&indio_dev->dev, + " Error bad channel number %d (max = %d)\n", + ch->channel, dfsdm->num_chs); + return -EINVAL; + } + + ret = of_property_read_string_index(indio_dev->dev.of_node, + "st,adc-channel-names", chan_idx, + &ch->datasheet_name); + if (ret < 0) { + dev_err(&indio_dev->dev, + " Error parsing 'st,adc-channel-names' for idx %d\n", + chan_idx); + return ret; + } + + df_ch = &dfsdm->ch_list[ch->channel]; + df_ch->id = ch->channel; + + ret = of_property_read_string_index(indio_dev->dev.of_node, + "st,adc-channel-types", chan_idx, + &of_str); + if (!ret) { + val = stm32_dfsdm_str2val(of_str, stm32_dfsdm_chan_type); + if (val < 0) + return val; + } else { + val = 0; + } + df_ch->type = val; + + ret = of_property_read_string_index(indio_dev->dev.of_node, + "st,adc-channel-clk-src", chan_idx, + &of_str); + if (!ret) { + val = stm32_dfsdm_str2val(of_str, stm32_dfsdm_chan_src); + if (val < 0) + return val; + } else { + val = 0; + } + df_ch->src = val; + + ret = of_property_read_u32_index(indio_dev->dev.of_node, + "st,adc-alt-channel", chan_idx, + &df_ch->alt_si); + if (ret < 0) + df_ch->alt_si = 0; + + return 0; +} + +static int stm32_dfsdm_start_conv(struct stm32_dfsdm_adc *adc, bool dma) +{ + struct regmap *regmap = adc->dfsdm->regmap; + int ret; + + ret = stm32_dfsdm_start_channel(adc->dfsdm, adc->ch_id); + if (ret < 0) + return ret; + + ret = stm32_dfsdm_filter_configure(adc->dfsdm, adc->fl_id, + adc->ch_id); + if (ret < 0) + goto stop_channels; + + ret = stm32_dfsdm_start_filter(adc->dfsdm, adc->fl_id); + if (ret < 0) + goto stop_channels; + + return 0; + +stop_channels: + regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RDMAEN_MASK, 0); + + regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RCONT_MASK, 0); + stm32_dfsdm_stop_channel(adc->dfsdm, adc->fl_id); + + return ret; +} + +static void stm32_dfsdm_stop_conv(struct stm32_dfsdm_adc *adc) +{ + struct regmap *regmap = adc->dfsdm->regmap; + + stm32_dfsdm_stop_filter(adc->dfsdm, adc->fl_id); + + /* Clean conversion options */ + regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RDMAEN_MASK, 0); + + regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RCONT_MASK, 0); + + stm32_dfsdm_stop_channel(adc->dfsdm, adc->ch_id); +} + +static int stm32_dfsdm_single_conv(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, int *res) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + long timeout; + int ret; + + reinit_completion(&adc->completion); + + adc->buffer = res; + + ret = stm32_dfsdm_start_dfsdm(adc->dfsdm); + if (ret < 0) + return ret; + + ret = regmap_update_bits(adc->dfsdm->regmap, DFSDM_CR2(adc->fl_id), + DFSDM_CR2_REOCIE_MASK, DFSDM_CR2_REOCIE(1)); + if (ret < 0) + goto stop_dfsdm; + + ret = stm32_dfsdm_start_conv(adc, false); + if (ret < 0) { + regmap_update_bits(adc->dfsdm->regmap, DFSDM_CR2(adc->fl_id), + DFSDM_CR2_REOCIE_MASK, DFSDM_CR2_REOCIE(0)); + goto stop_dfsdm; + } + + timeout = wait_for_completion_interruptible_timeout(&adc->completion, + DFSDM_TIMEOUT); + + /* Mask IRQ for regular conversion achievement*/ + regmap_update_bits(adc->dfsdm->regmap, DFSDM_CR2(adc->fl_id), + DFSDM_CR2_REOCIE_MASK, DFSDM_CR2_REOCIE(0)); + + if (timeout == 0) + ret = -ETIMEDOUT; + else if (timeout < 0) + ret = timeout; + else + ret = IIO_VAL_INT; + + stm32_dfsdm_stop_conv(adc); + +stop_dfsdm: + stm32_dfsdm_stop_dfsdm(adc->dfsdm); + + return ret; +} + +static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; + int ret = -EINVAL; + + if (mask == IIO_CHAN_INFO_OVERSAMPLING_RATIO) { + ret = stm32_dfsdm_set_osrs(fl, 0, val); + if (!ret) + adc->oversamp = val; + } + + return ret; +} + +static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, int *val, + int *val2, long mask) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int ret; + + switch (mask) { + case IIO_CHAN_INFO_RAW: + ret = iio_hw_consumer_enable(adc->hwc); + if (ret < 0) { + dev_err(&indio_dev->dev, + "%s: IIO enable failed (channel %d)\n", + __func__, chan->channel); + return ret; + } + ret = stm32_dfsdm_single_conv(indio_dev, chan, val); + iio_hw_consumer_disable(adc->hwc); + if (ret < 0) { + dev_err(&indio_dev->dev, + "%s: Conversion failed (channel %d)\n", + __func__, chan->channel); + return ret; + } + return IIO_VAL_INT; + + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: + *val = adc->oversamp; + + return IIO_VAL_INT; + } + + return -EINVAL; +} + +static const struct iio_info stm32_dfsdm_info_adc = { + .read_raw = stm32_dfsdm_read_raw, + .write_raw = stm32_dfsdm_write_raw, +}; + +static irqreturn_t stm32_dfsdm_irq(int irq, void *arg) +{ + struct stm32_dfsdm_adc *adc = arg; + struct iio_dev *indio_dev = iio_priv_to_dev(adc); + struct regmap *regmap = adc->dfsdm->regmap; + unsigned int status, int_en; + + regmap_read(regmap, DFSDM_ISR(adc->fl_id), &status); + regmap_read(regmap, DFSDM_CR2(adc->fl_id), &int_en); + + if (status & DFSDM_ISR_REOCF_MASK) { + /* Read the data register clean the IRQ status */ + regmap_read(regmap, DFSDM_RDATAR(adc->fl_id), adc->buffer); + complete(&adc->completion); + } + + if (status & DFSDM_ISR_ROVRF_MASK) { + if (int_en & DFSDM_CR2_ROVRIE_MASK) + dev_warn(&indio_dev->dev, "Overrun detected\n"); + regmap_update_bits(regmap, DFSDM_ICR(adc->fl_id), + DFSDM_ICR_CLRROVRF_MASK, + DFSDM_ICR_CLRROVRF_MASK); + } + + return IRQ_HANDLED; +} + +static int stm32_dfsdm_adc_chan_init_one(struct iio_dev *indio_dev, + struct iio_chan_spec *ch) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int ret; + + ret = stm32_dfsdm_channel_parse_of(adc->dfsdm, indio_dev, ch); + if (ret < 0) + return ret; + + ch->type = IIO_VOLTAGE; + ch->indexed = 1; + + /* + * IIO_CHAN_INFO_RAW: used to compute regular conversion + * IIO_CHAN_INFO_OVERSAMPLING_RATIO: used to set oversampling + */ + ch->info_mask_separate = BIT(IIO_CHAN_INFO_RAW); + ch->info_mask_shared_by_all = BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); + + ch->scan_type.sign = 'u'; + ch->scan_type.realbits = 24; + ch->scan_type.storagebits = 32; + adc->ch_id = ch->channel; + + return stm32_dfsdm_chan_configure(adc->dfsdm, + &adc->dfsdm->ch_list[ch->channel]); +} + +static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev) +{ + struct iio_chan_spec *ch; + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int num_ch; + int ret, chan_idx; + + adc->oversamp = DFSDM_DEFAULT_OVERSAMPLING; + ret = stm32_dfsdm_set_osrs(&adc->dfsdm->fl_list[adc->fl_id], 0, + adc->oversamp); + if (ret < 0) + return ret; + + num_ch = of_property_count_u32_elems(indio_dev->dev.of_node, + "st,adc-channels"); + if (num_ch < 0 || num_ch > adc->dfsdm->num_chs) { + dev_err(&indio_dev->dev, "Bad st,adc-channels\n"); + return num_ch < 0 ? num_ch : -EINVAL; + } + + /* Bind to SD modulator IIO device */ + adc->hwc = devm_iio_hw_consumer_alloc(&indio_dev->dev); + if (IS_ERR(adc->hwc)) + return -EPROBE_DEFER; + + ch = devm_kcalloc(&indio_dev->dev, num_ch, sizeof(*ch), + GFP_KERNEL); + if (!ch) + return -ENOMEM; + + for (chan_idx = 0; chan_idx < num_ch; chan_idx++) { + ch->scan_index = chan_idx; + ret = stm32_dfsdm_adc_chan_init_one(indio_dev, ch); + if (ret < 0) { + dev_err(&indio_dev->dev, "Channels init failed\n"); + return ret; + } + } + + indio_dev->num_channels = num_ch; + indio_dev->channels = ch; + + init_completion(&adc->completion); + + return 0; +} + +static const struct stm32_dfsdm_dev_data stm32h7_dfsdm_adc_data = { + .type = DFSDM_IIO, + .init = stm32_dfsdm_adc_init, +}; + +static const struct of_device_id stm32_dfsdm_adc_match[] = { + { + .compatible = "st,stm32-dfsdm-adc", + .data = &stm32h7_dfsdm_adc_data, + }, + {} +}; + +static int stm32_dfsdm_adc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct stm32_dfsdm_adc *adc; + struct device_node *np = dev->of_node; + const struct stm32_dfsdm_dev_data *dev_data; + struct iio_dev *iio; + const struct of_device_id *of_id; + char *name; + int ret, irq, val; + + of_id = of_match_node(stm32_dfsdm_adc_match, np); + if (!of_id->data) { + dev_err(&pdev->dev, "Data associated to device is missing\n"); + return -EINVAL; + } + + dev_data = (const struct stm32_dfsdm_dev_data *)of_id->data; + + iio = devm_iio_device_alloc(dev, sizeof(*adc)); + if (IS_ERR(iio)) { + dev_err(dev, "%s: Failed to allocate IIO\n", __func__); + return PTR_ERR(iio); + } + + adc = iio_priv(iio); + if (IS_ERR(adc)) { + dev_err(dev, "%s: Failed to allocate ADC\n", __func__); + return PTR_ERR(adc); + } + adc->dfsdm = dev_get_drvdata(dev->parent); + + iio->dev.parent = dev; + iio->dev.of_node = np; + iio->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE; + + platform_set_drvdata(pdev, adc); + + ret = of_property_read_u32(dev->of_node, "reg", &adc->fl_id); + if (ret != 0) { + dev_err(dev, "Missing reg property\n"); + return -EINVAL; + } + + name = devm_kzalloc(dev, sizeof("dfsdm-adc0"), GFP_KERNEL); + if (!name) + return -ENOMEM; + iio->info = &stm32_dfsdm_info_adc; + snprintf(name, sizeof("dfsdm-adc0"), "dfsdm-adc%d", adc->fl_id); + iio->name = name; + + /* + * In a first step IRQs generated for channels are not treated. + * So IRQ associated to filter instance 0 is dedicated to the Filter 0. + */ + irq = platform_get_irq(pdev, 0); + ret = devm_request_irq(dev, irq, stm32_dfsdm_irq, + 0, pdev->name, adc); + if (ret < 0) { + dev_err(dev, "Failed to request IRQ\n"); + return ret; + } + + ret = of_property_read_u32(dev->of_node, "st,filter-order", &val); + if (ret < 0) { + dev_err(dev, "Failed to set filter order\n"); + return ret; + } + + adc->dfsdm->fl_list[adc->fl_id].ford = val; + + ret = of_property_read_u32(dev->of_node, "st,filter0-sync", &val); + if (!ret) + adc->dfsdm->fl_list[adc->fl_id].sync_mode = val; + + adc->dev_data = dev_data; + ret = dev_data->init(iio); + if (ret < 0) + return ret; + + return iio_device_register(iio); +} + +static int stm32_dfsdm_adc_remove(struct platform_device *pdev) +{ + struct stm32_dfsdm_adc *adc = platform_get_drvdata(pdev); + struct iio_dev *indio_dev = iio_priv_to_dev(adc); + + iio_device_unregister(indio_dev); + + return 0; +} + +static struct platform_driver stm32_dfsdm_adc_driver = { + .driver = { + .name = "stm32-dfsdm-adc", + .of_match_table = stm32_dfsdm_adc_match, + }, + .probe = stm32_dfsdm_adc_probe, + .remove = stm32_dfsdm_adc_remove, +}; +module_platform_driver(stm32_dfsdm_adc_driver); + +MODULE_DESCRIPTION("STM32 sigma delta ADC"); +MODULE_AUTHOR("Arnaud Pouliquen "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From eca949800d2dd761bc0c73b793e4e9ad4a997469 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:12 +0100 Subject: IIO: ADC: add stm32 DFSDM support for PDM microphone This code offers a way to handle PDM audio microphones in ASOC framework. Audio driver should use consumer API. A specific management is implemented for DMA, with a callback, to allows to handle audio buffers efficiently. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- .../ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 | 16 + drivers/iio/adc/stm32-dfsdm-adc.c | 502 ++++++++++++++++++++- include/linux/iio/adc/stm32-dfsdm-adc.h | 18 + 3 files changed, 529 insertions(+), 7 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 create mode 100644 include/linux/iio/adc/stm32-dfsdm-adc.h diff --git a/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 new file mode 100644 index 000000000000..da9822309f07 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-dfsdm-adc-stm32 @@ -0,0 +1,16 @@ +What: /sys/bus/iio/devices/iio:deviceX/in_voltage_spi_clk_freq +KernelVersion: 4.14 +Contact: arnaud.pouliquen@st.com +Description: + For audio purpose only. + Used by audio driver to set/get the spi input frequency. + This is mandatory if DFSDM is slave on SPI bus, to + provide information on the SPI clock frequency during runtime + Notice that the SPI frequency should be a multiple of sample + frequency to ensure the precision. + if DFSDM input is SPI master + Reading SPI clkout frequency, + error on writing + If DFSDM input is SPI Slave: + Reading returns value previously set. + Writing value before starting conversions. \ No newline at end of file diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 68b5920e92cb..b03ca3f94331 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -6,19 +6,23 @@ * Author: Arnaud Pouliquen . */ +#include +#include #include #include #include #include #include #include -#include +#include #include #include #include #include "stm32-dfsdm.h" +#define DFSDM_DMA_BUFFER_SIZE (4 * PAGE_SIZE) + /* Conversion timeout */ #define DFSDM_TIMEOUT_US 100000 #define DFSDM_TIMEOUT (msecs_to_jiffies(DFSDM_TIMEOUT_US / 1000)) @@ -58,6 +62,18 @@ struct stm32_dfsdm_adc { struct completion completion; u32 *buffer; + /* Audio specific */ + unsigned int spi_freq; /* SPI bus clock frequency */ + unsigned int sample_freq; /* Sample frequency after filter decimation */ + int (*cb)(const void *data, size_t size, void *cb_priv); + void *cb_priv; + + /* DMA */ + u8 *rx_buf; + unsigned int bufi; /* Buffer current position */ + unsigned int buf_sz; /* Buffer size */ + struct dma_chan *dma_chan; + dma_addr_t dma_buf; }; struct stm32_dfsdm_str2field { @@ -351,10 +367,63 @@ int stm32_dfsdm_channel_parse_of(struct stm32_dfsdm *dfsdm, return 0; } +static ssize_t dfsdm_adc_audio_get_spiclk(struct iio_dev *indio_dev, + uintptr_t priv, + const struct iio_chan_spec *chan, + char *buf) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", adc->spi_freq); +} + +static ssize_t dfsdm_adc_audio_set_spiclk(struct iio_dev *indio_dev, + uintptr_t priv, + const struct iio_chan_spec *chan, + const char *buf, size_t len) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; + struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[adc->ch_id]; + unsigned int sample_freq = adc->sample_freq; + unsigned int spi_freq; + int ret; + + dev_err(&indio_dev->dev, "enter %s\n", __func__); + /* If DFSDM is master on SPI, SPI freq can not be updated */ + if (ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL) + return -EPERM; + + ret = kstrtoint(buf, 0, &spi_freq); + if (ret) + return ret; + + if (!spi_freq) + return -EINVAL; + + if (sample_freq) { + if (spi_freq % sample_freq) + dev_warn(&indio_dev->dev, + "Sampling rate not accurate (%d)\n", + spi_freq / (spi_freq / sample_freq)); + + ret = stm32_dfsdm_set_osrs(fl, 0, (spi_freq / sample_freq)); + if (ret < 0) { + dev_err(&indio_dev->dev, + "No filter parameters that match!\n"); + return ret; + } + } + adc->spi_freq = spi_freq; + + return len; +} + static int stm32_dfsdm_start_conv(struct stm32_dfsdm_adc *adc, bool dma) { struct regmap *regmap = adc->dfsdm->regmap; int ret; + unsigned int dma_en = 0, cont_en = 0; ret = stm32_dfsdm_start_channel(adc->dfsdm, adc->ch_id); if (ret < 0) @@ -365,6 +434,24 @@ static int stm32_dfsdm_start_conv(struct stm32_dfsdm_adc *adc, bool dma) if (ret < 0) goto stop_channels; + if (dma) { + /* Enable DMA transfer*/ + dma_en = DFSDM_CR1_RDMAEN(1); + /* Enable conversion triggered by SPI clock*/ + cont_en = DFSDM_CR1_RCONT(1); + } + /* Enable DMA transfer*/ + ret = regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RDMAEN_MASK, dma_en); + if (ret < 0) + goto stop_channels; + + /* Enable conversion triggered by SPI clock*/ + ret = regmap_update_bits(regmap, DFSDM_CR1(adc->fl_id), + DFSDM_CR1_RCONT_MASK, cont_en); + if (ret < 0) + goto stop_channels; + ret = stm32_dfsdm_start_filter(adc->dfsdm, adc->fl_id); if (ret < 0) goto stop_channels; @@ -398,6 +485,231 @@ static void stm32_dfsdm_stop_conv(struct stm32_dfsdm_adc *adc) stm32_dfsdm_stop_channel(adc->dfsdm, adc->ch_id); } +static int stm32_dfsdm_set_watermark(struct iio_dev *indio_dev, + unsigned int val) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + unsigned int watermark = DFSDM_DMA_BUFFER_SIZE / 2; + + /* + * DMA cyclic transfers are used, buffer is split into two periods. + * There should be : + * - always one buffer (period) DMA is working on + * - one buffer (period) driver pushed to ASoC side. + */ + watermark = min(watermark, val * (unsigned int)(sizeof(u32))); + adc->buf_sz = watermark * 2; + + return 0; +} + +static unsigned int stm32_dfsdm_adc_dma_residue(struct stm32_dfsdm_adc *adc) +{ + struct dma_tx_state state; + enum dma_status status; + + status = dmaengine_tx_status(adc->dma_chan, + adc->dma_chan->cookie, + &state); + if (status == DMA_IN_PROGRESS) { + /* Residue is size in bytes from end of buffer */ + unsigned int i = adc->buf_sz - state.residue; + unsigned int size; + + /* Return available bytes */ + if (i >= adc->bufi) + size = i - adc->bufi; + else + size = adc->buf_sz + i - adc->bufi; + + return size; + } + + return 0; +} + +static void stm32_dfsdm_audio_dma_buffer_done(void *data) +{ + struct iio_dev *indio_dev = data; + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int available = stm32_dfsdm_adc_dma_residue(adc); + size_t old_pos; + + /* + * FIXME: In Kernel interface does not support cyclic DMA buffer,and + * offers only an interface to push data samples per samples. + * For this reason IIO buffer interface is not used and interface is + * bypassed using a private callback registered by ASoC. + * This should be a temporary solution waiting a cyclic DMA engine + * support in IIO. + */ + + dev_dbg(&indio_dev->dev, "%s: pos = %d, available = %d\n", __func__, + adc->bufi, available); + old_pos = adc->bufi; + + while (available >= indio_dev->scan_bytes) { + u32 *buffer = (u32 *)&adc->rx_buf[adc->bufi]; + + /* Mask 8 LSB that contains the channel ID */ + *buffer = (*buffer & 0xFFFFFF00) << 8; + available -= indio_dev->scan_bytes; + adc->bufi += indio_dev->scan_bytes; + if (adc->bufi >= adc->buf_sz) { + if (adc->cb) + adc->cb(&adc->rx_buf[old_pos], + adc->buf_sz - old_pos, adc->cb_priv); + adc->bufi = 0; + old_pos = 0; + } + } + if (adc->cb) + adc->cb(&adc->rx_buf[old_pos], adc->bufi - old_pos, + adc->cb_priv); +} + +static int stm32_dfsdm_adc_dma_start(struct iio_dev *indio_dev) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + struct dma_async_tx_descriptor *desc; + dma_cookie_t cookie; + int ret; + + if (!adc->dma_chan) + return -EINVAL; + + dev_dbg(&indio_dev->dev, "%s size=%d watermark=%d\n", __func__, + adc->buf_sz, adc->buf_sz / 2); + + /* Prepare a DMA cyclic transaction */ + desc = dmaengine_prep_dma_cyclic(adc->dma_chan, + adc->dma_buf, + adc->buf_sz, adc->buf_sz / 2, + DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT); + if (!desc) + return -EBUSY; + + desc->callback = stm32_dfsdm_audio_dma_buffer_done; + desc->callback_param = indio_dev; + + cookie = dmaengine_submit(desc); + ret = dma_submit_error(cookie); + if (ret) { + dmaengine_terminate_all(adc->dma_chan); + return ret; + } + + /* Issue pending DMA requests */ + dma_async_issue_pending(adc->dma_chan); + + return 0; +} + +static int stm32_dfsdm_postenable(struct iio_dev *indio_dev) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + int ret; + + /* Reset adc buffer index */ + adc->bufi = 0; + + ret = stm32_dfsdm_start_dfsdm(adc->dfsdm); + if (ret < 0) + return ret; + + ret = stm32_dfsdm_start_conv(adc, true); + if (ret) { + dev_err(&indio_dev->dev, "Can't start conversion\n"); + goto stop_dfsdm; + } + + if (adc->dma_chan) { + ret = stm32_dfsdm_adc_dma_start(indio_dev); + if (ret) { + dev_err(&indio_dev->dev, "Can't start DMA\n"); + goto err_stop_conv; + } + } + + return 0; + +err_stop_conv: + stm32_dfsdm_stop_conv(adc); +stop_dfsdm: + stm32_dfsdm_stop_dfsdm(adc->dfsdm); + + return ret; +} + +static int stm32_dfsdm_predisable(struct iio_dev *indio_dev) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + + if (adc->dma_chan) + dmaengine_terminate_all(adc->dma_chan); + + stm32_dfsdm_stop_conv(adc); + + stm32_dfsdm_stop_dfsdm(adc->dfsdm); + + return 0; +} + +static const struct iio_buffer_setup_ops stm32_dfsdm_buffer_setup_ops = { + .postenable = &stm32_dfsdm_postenable, + .predisable = &stm32_dfsdm_predisable, +}; + +/** + * stm32_dfsdm_get_buff_cb() - register a callback that will be called when + * DMA transfer period is achieved. + * + * @iio_dev: Handle to IIO device. + * @cb: Pointer to callback function: + * - data: pointer to data buffer + * - size: size in byte of the data buffer + * - private: pointer to consumer private structure. + * @private: Pointer to consumer private structure. + */ +int stm32_dfsdm_get_buff_cb(struct iio_dev *iio_dev, + int (*cb)(const void *data, size_t size, + void *private), + void *private) +{ + struct stm32_dfsdm_adc *adc; + + if (!iio_dev) + return -EINVAL; + adc = iio_priv(iio_dev); + + adc->cb = cb; + adc->cb_priv = private; + + return 0; +} +EXPORT_SYMBOL_GPL(stm32_dfsdm_get_buff_cb); + +/** + * stm32_dfsdm_release_buff_cb - unregister buffer callback + * + * @iio_dev: Handle to IIO device. + */ +int stm32_dfsdm_release_buff_cb(struct iio_dev *iio_dev) +{ + struct stm32_dfsdm_adc *adc; + + if (!iio_dev) + return -EINVAL; + adc = iio_priv(iio_dev); + + adc->cb = NULL; + adc->cb_priv = NULL; + + return 0; +} +EXPORT_SYMBOL_GPL(stm32_dfsdm_release_buff_cb); + static int stm32_dfsdm_single_conv(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *res) { @@ -453,15 +765,41 @@ static int stm32_dfsdm_write_raw(struct iio_dev *indio_dev, { struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); struct stm32_dfsdm_filter *fl = &adc->dfsdm->fl_list[adc->fl_id]; + struct stm32_dfsdm_channel *ch = &adc->dfsdm->ch_list[adc->ch_id]; + unsigned int spi_freq = adc->spi_freq; int ret = -EINVAL; - if (mask == IIO_CHAN_INFO_OVERSAMPLING_RATIO) { + switch (mask) { + case IIO_CHAN_INFO_OVERSAMPLING_RATIO: ret = stm32_dfsdm_set_osrs(fl, 0, val); if (!ret) adc->oversamp = val; + + return ret; + + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) + return -EINVAL; + if (ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL) + spi_freq = adc->dfsdm->spi_master_freq; + + if (spi_freq % val) + dev_warn(&indio_dev->dev, + "Sampling rate not accurate (%d)\n", + spi_freq / (spi_freq / val)); + + ret = stm32_dfsdm_set_osrs(fl, 0, (spi_freq / val)); + if (ret < 0) { + dev_err(&indio_dev->dev, + "Not able to find parameter that match!\n"); + return ret; + } + adc->sample_freq = val; + + return 0; } - return ret; + return -EINVAL; } static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, @@ -493,12 +831,23 @@ static int stm32_dfsdm_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OVERSAMPLING_RATIO: *val = adc->oversamp; + return IIO_VAL_INT; + + case IIO_CHAN_INFO_SAMP_FREQ: + *val = adc->sample_freq; + return IIO_VAL_INT; } return -EINVAL; } +static const struct iio_info stm32_dfsdm_info_audio = { + .hwfifo_set_watermark = stm32_dfsdm_set_watermark, + .read_raw = stm32_dfsdm_read_raw, + .write_raw = stm32_dfsdm_write_raw, +}; + static const struct iio_info stm32_dfsdm_info_adc = { .read_raw = stm32_dfsdm_read_raw, .write_raw = stm32_dfsdm_write_raw, @@ -531,6 +880,70 @@ static irqreturn_t stm32_dfsdm_irq(int irq, void *arg) return IRQ_HANDLED; } +/* + * Define external info for SPI Frequency and audio sampling rate that can be + * configured by ASoC driver through consumer.h API + */ +static const struct iio_chan_spec_ext_info dfsdm_adc_audio_ext_info[] = { + /* spi_clk_freq : clock freq on SPI/manchester bus used by channel */ + { + .name = "spi_clk_freq", + .shared = IIO_SHARED_BY_TYPE, + .read = dfsdm_adc_audio_get_spiclk, + .write = dfsdm_adc_audio_set_spiclk, + }, + {}, +}; + +static void stm32_dfsdm_dma_release(struct iio_dev *indio_dev) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + + if (adc->dma_chan) { + dma_free_coherent(adc->dma_chan->device->dev, + DFSDM_DMA_BUFFER_SIZE, + adc->rx_buf, adc->dma_buf); + dma_release_channel(adc->dma_chan); + } +} + +static int stm32_dfsdm_dma_request(struct iio_dev *indio_dev) +{ + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + struct dma_slave_config config = { + .src_addr = (dma_addr_t)adc->dfsdm->phys_base + + DFSDM_RDATAR(adc->fl_id), + .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + }; + int ret; + + adc->dma_chan = dma_request_slave_channel(&indio_dev->dev, "rx"); + if (!adc->dma_chan) + return -EINVAL; + + adc->rx_buf = dma_alloc_coherent(adc->dma_chan->device->dev, + DFSDM_DMA_BUFFER_SIZE, + &adc->dma_buf, GFP_KERNEL); + if (!adc->rx_buf) { + ret = -ENOMEM; + goto err_release; + } + + ret = dmaengine_slave_config(adc->dma_chan, &config); + if (ret) + goto err_free; + + return 0; + +err_free: + dma_free_coherent(adc->dma_chan->device->dev, DFSDM_DMA_BUFFER_SIZE, + adc->rx_buf, adc->dma_buf); +err_release: + dma_release_channel(adc->dma_chan); + + return ret; +} + static int stm32_dfsdm_adc_chan_init_one(struct iio_dev *indio_dev, struct iio_chan_spec *ch) { @@ -551,7 +964,12 @@ static int stm32_dfsdm_adc_chan_init_one(struct iio_dev *indio_dev, ch->info_mask_separate = BIT(IIO_CHAN_INFO_RAW); ch->info_mask_shared_by_all = BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO); - ch->scan_type.sign = 'u'; + if (adc->dev_data->type == DFSDM_AUDIO) { + ch->scan_type.sign = 's'; + ch->ext_info = dfsdm_adc_audio_ext_info; + } else { + ch->scan_type.sign = 'u'; + } ch->scan_type.realbits = 24; ch->scan_type.storagebits = 32; adc->ch_id = ch->channel; @@ -560,6 +978,39 @@ static int stm32_dfsdm_adc_chan_init_one(struct iio_dev *indio_dev, &adc->dfsdm->ch_list[ch->channel]); } +static int stm32_dfsdm_audio_init(struct iio_dev *indio_dev) +{ + struct iio_chan_spec *ch; + struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); + struct stm32_dfsdm_channel *d_ch; + int ret; + + indio_dev->modes |= INDIO_BUFFER_SOFTWARE; + indio_dev->setup_ops = &stm32_dfsdm_buffer_setup_ops; + + ch = devm_kzalloc(&indio_dev->dev, sizeof(*ch), GFP_KERNEL); + if (!ch) + return -ENOMEM; + + ch->scan_index = 0; + + ret = stm32_dfsdm_adc_chan_init_one(indio_dev, ch); + if (ret < 0) { + dev_err(&indio_dev->dev, "Channels init failed\n"); + return ret; + } + ch->info_mask_separate = BIT(IIO_CHAN_INFO_SAMP_FREQ); + + d_ch = &adc->dfsdm->ch_list[adc->ch_id]; + if (d_ch->src != DFSDM_CHANNEL_SPI_CLOCK_EXTERNAL) + adc->spi_freq = adc->dfsdm->spi_master_freq; + + indio_dev->num_channels = 1; + indio_dev->channels = ch; + + return stm32_dfsdm_dma_request(indio_dev); +} + static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev) { struct iio_chan_spec *ch; @@ -612,11 +1063,20 @@ static const struct stm32_dfsdm_dev_data stm32h7_dfsdm_adc_data = { .init = stm32_dfsdm_adc_init, }; +static const struct stm32_dfsdm_dev_data stm32h7_dfsdm_audio_data = { + .type = DFSDM_AUDIO, + .init = stm32_dfsdm_audio_init, +}; + static const struct of_device_id stm32_dfsdm_adc_match[] = { { .compatible = "st,stm32-dfsdm-adc", .data = &stm32h7_dfsdm_adc_data, }, + { + .compatible = "st,stm32-dfsdm-dmic", + .data = &stm32h7_dfsdm_audio_data, + }, {} }; @@ -667,8 +1127,13 @@ static int stm32_dfsdm_adc_probe(struct platform_device *pdev) name = devm_kzalloc(dev, sizeof("dfsdm-adc0"), GFP_KERNEL); if (!name) return -ENOMEM; - iio->info = &stm32_dfsdm_info_adc; - snprintf(name, sizeof("dfsdm-adc0"), "dfsdm-adc%d", adc->fl_id); + if (dev_data->type == DFSDM_AUDIO) { + iio->info = &stm32_dfsdm_info_audio; + snprintf(name, sizeof("dfsdm-pdm0"), "dfsdm-pdm%d", adc->fl_id); + } else { + iio->info = &stm32_dfsdm_info_adc; + snprintf(name, sizeof("dfsdm-adc0"), "dfsdm-adc%d", adc->fl_id); + } iio->name = name; /* @@ -700,7 +1165,27 @@ static int stm32_dfsdm_adc_probe(struct platform_device *pdev) if (ret < 0) return ret; - return iio_device_register(iio); + ret = iio_device_register(iio); + if (ret < 0) + goto err_cleanup; + + dev_err(dev, "of_platform_populate\n"); + if (dev_data->type == DFSDM_AUDIO) { + ret = of_platform_populate(np, NULL, NULL, dev); + if (ret < 0) { + dev_err(dev, "Failed to find an audio DAI\n"); + goto err_unregister; + } + } + + return 0; + +err_unregister: + iio_device_unregister(iio); +err_cleanup: + stm32_dfsdm_dma_release(iio); + + return ret; } static int stm32_dfsdm_adc_remove(struct platform_device *pdev) @@ -708,7 +1193,10 @@ static int stm32_dfsdm_adc_remove(struct platform_device *pdev) struct stm32_dfsdm_adc *adc = platform_get_drvdata(pdev); struct iio_dev *indio_dev = iio_priv_to_dev(adc); + if (adc->dev_data->type == DFSDM_AUDIO) + of_platform_depopulate(&pdev->dev); iio_device_unregister(indio_dev); + stm32_dfsdm_dma_release(indio_dev); return 0; } diff --git a/include/linux/iio/adc/stm32-dfsdm-adc.h b/include/linux/iio/adc/stm32-dfsdm-adc.h new file mode 100644 index 000000000000..e7dc7a542a4e --- /dev/null +++ b/include/linux/iio/adc/stm32-dfsdm-adc.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file discribe the STM32 DFSDM IIO driver API for audio part + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Author(s): Arnaud Pouliquen . + */ + +#ifndef STM32_DFSDM_ADC_H +#define STM32_DFSDM_ADC_H + +int stm32_dfsdm_get_buff_cb(struct iio_dev *iio_dev, + int (*cb)(const void *data, size_t size, + void *private), + void *private); +int stm32_dfsdm_release_buff_cb(struct iio_dev *iio_dev); + +#endif -- cgit v1.2.3 From 16cbca06fa8288a4f58426fc898b141e12ee8008 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:13 +0100 Subject: IIO: consumer: allow to set buffer sizes Add iio consumer API to set buffer size and watermark according to sysfs API. Signed-off-by: Arnaud Pouliquen Reviewed-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/buffer/industrialio-buffer-cb.c | 11 +++++++++++ include/linux/iio/consumer.h | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/iio/buffer/industrialio-buffer-cb.c b/drivers/iio/buffer/industrialio-buffer-cb.c index 4847534700e7..ea63c838eeae 100644 --- a/drivers/iio/buffer/industrialio-buffer-cb.c +++ b/drivers/iio/buffer/industrialio-buffer-cb.c @@ -104,6 +104,17 @@ error_free_cb_buff: } EXPORT_SYMBOL_GPL(iio_channel_get_all_cb); +int iio_channel_cb_set_buffer_watermark(struct iio_cb_buffer *cb_buff, + size_t watermark) +{ + if (!watermark) + return -EINVAL; + cb_buff->buffer.watermark = watermark; + + return 0; +} +EXPORT_SYMBOL_GPL(iio_channel_cb_set_buffer_watermark); + int iio_channel_start_all_cb(struct iio_cb_buffer *cb_buff) { return iio_update_buffers(cb_buff->indio_dev, &cb_buff->buffer, diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 2017f35db17c..9887f4f8e2a8 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -133,6 +133,17 @@ struct iio_cb_buffer *iio_channel_get_all_cb(struct device *dev, int (*cb)(const void *data, void *private), void *private); +/** + * iio_channel_cb_set_buffer_watermark() - set the buffer watermark. + * @cb_buffer: The callback buffer from whom we want the channel + * information. + * @watermark: buffer watermark in bytes. + * + * This function allows to configure the buffer watermark. + */ +int iio_channel_cb_set_buffer_watermark(struct iio_cb_buffer *cb_buffer, + size_t watermark); + /** * iio_channel_release_all_cb() - release and unregister the callback. * @cb_buffer: The callback buffer that was allocated. -- cgit v1.2.3 From 31cb1bc0dc94882a588930f4d007b570c481fd17 Mon Sep 17 00:00:00 2001 From: rodrigosiqueira Date: Fri, 15 Dec 2017 12:06:03 -0200 Subject: sched/core: Rework and clarify prepare_lock_switch() The prepare_lock_switch() function has an unused parameter, and also the function name was not descriptive. To improve readability and remove the extra parameter, do the following changes: * Move prepare_lock_switch() from kernel/sched/sched.h to kernel/sched/core.c, rename it to prepare_task(), and remove the unused parameter. * Split the smp_store_release() out from finish_lock_switch() to a function named finish_task. * Comments ajdustments. Signed-off-by: Rodrigo Siqueira Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171215140603.gxe5i2y6fg5ojfpp@smtp.gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++---- kernel/sched/sched.h | 41 ---------------------------------------- 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 644fa2e3d993..a794f8155cd5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * If the owning (remote) CPU is still in the middle of schedule() with * this task as prev, wait until its done referencing the task. * - * Pairs with the smp_store_release() in finish_lock_switch(). + * Pairs with the smp_store_release() in finish_task(). * * This ensures that tasks getting woken will be fully ordered against * their previous state and preserve Program Order. @@ -2571,6 +2571,50 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, #endif /* CONFIG_PREEMPT_NOTIFIERS */ +static inline void prepare_task(struct task_struct *next) +{ +#ifdef CONFIG_SMP + /* + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. + */ + next->on_cpu = 1; +#endif +} + +static inline void finish_task(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + /* + * After ->on_cpu is cleared, the task can be moved to a different CPU. + * We must ensure this doesn't happen until the switch is completely + * finished. + * + * In particular, the load of prev->state in finish_task_switch() must + * happen before this. + * + * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). + */ + smp_store_release(&prev->on_cpu, 0); +#endif +} + +static inline void finish_lock_switch(struct rq *rq) +{ +#ifdef CONFIG_DEBUG_SPINLOCK + /* this is a valid case when another task releases the spinlock */ + rq->lock.owner = current; +#endif + /* + * If we are tracking spinlock dependencies then we have to + * fix up the runqueue lock - which gets 'carried over' from + * prev into current: + */ + spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); + + raw_spin_unlock_irq(&rq->lock); +} + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -2591,7 +2635,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); - prepare_lock_switch(rq, next); + prepare_task(next); prepare_arch_switch(next); } @@ -2646,7 +2690,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) * the scheduled task must drop that reference. * * We must observe prev->state before clearing prev->on_cpu (in - * finish_lock_switch), otherwise a concurrent wakeup can get prev + * finish_task), otherwise a concurrent wakeup can get prev * running on another CPU and we could rave with its RUNNING -> DEAD * transition, resulting in a double drop. */ @@ -2663,7 +2707,8 @@ static struct rq *finish_task_switch(struct task_struct *prev) * to use. */ smp_mb__after_unlock_lock(); - finish_lock_switch(rq, prev); + finish_task(prev); + finish_lock_switch(rq); finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b19552a212de..43f5d6e936bb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1328,47 +1328,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) # define finish_arch_post_lock_switch() do { } while (0) #endif -static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) -{ -#ifdef CONFIG_SMP - /* - * We can optimise this out completely for !SMP, because the - * SMP rebalancing from interrupt is the only thing that cares - * here. - */ - next->on_cpu = 1; -#endif -} - -static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) -{ -#ifdef CONFIG_SMP - /* - * After ->on_cpu is cleared, the task can be moved to a different CPU. - * We must ensure this doesn't happen until the switch is completely - * finished. - * - * In particular, the load of prev->state in finish_task_switch() must - * happen before this. - * - * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). - */ - smp_store_release(&prev->on_cpu, 0); -#endif -#ifdef CONFIG_DEBUG_SPINLOCK - /* this is a valid case when another task releases the spinlock */ - rq->lock.owner = current; -#endif - /* - * If we are tracking spinlock dependencies then we have to - * fix up the runqueue lock - which gets 'carried over' from - * prev into current: - */ - spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); - - raw_spin_unlock_irq(&rq->lock); -} - /* * wake flags */ -- cgit v1.2.3 From f01415fdbfe83380c2dfcf90b7b26042f88963aa Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Tue, 5 Dec 2017 17:10:15 +0000 Subject: sched/fair: Use 'unsigned long' for utilization, consistently Utilization and capacity are tracked as 'unsigned long', however some functions using them return an 'int' which is ultimately assigned back to 'unsigned long' variables. Since there is not scope on using a different and signed type, consolidate the signature of functions returning utilization to always use the native type. This change improves code consistency, and it also benefits code paths where utilizations should be clamped by avoiding further type conversions or ugly type casts. Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Chris Redpath Reviewed-by: Brendan Jackman Reviewed-by: Dietmar Eggemann Cc: Joel Fernandes Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Paul Turner Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Cc: Viresh Kumar Link: http://lkml.kernel.org/r/20171205171018.9203-2-patrick.bellasi@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2915c0d95107..de43bd80a98f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5765,8 +5765,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return affine; } -static inline int task_util(struct task_struct *p); -static int cpu_util_wake(int cpu, struct task_struct *p); +static inline unsigned long task_util(struct task_struct *p); +static unsigned long cpu_util_wake(int cpu, struct task_struct *p); static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) { @@ -6247,7 +6247,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * capacity_orig) as it useful for predicting the capacity required after task * migrations (scheduler-driven DVFS). */ -static int cpu_util(int cpu) +static unsigned long cpu_util(int cpu) { unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; unsigned long capacity = capacity_orig_of(cpu); @@ -6255,7 +6255,7 @@ static int cpu_util(int cpu) return (util >= capacity) ? capacity : util; } -static inline int task_util(struct task_struct *p) +static inline unsigned long task_util(struct task_struct *p) { return p->se.avg.util_avg; } @@ -6264,7 +6264,7 @@ static inline int task_util(struct task_struct *p) * cpu_util_wake: Compute cpu utilization with any contributions from * the waking task p removed. */ -static int cpu_util_wake(int cpu, struct task_struct *p) +static unsigned long cpu_util_wake(int cpu, struct task_struct *p) { unsigned long util, capacity; -- cgit v1.2.3 From f453ae2200b0d1b7abc0c3794ce088899ac7a2af Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 14 Dec 2017 13:21:58 -0800 Subject: sched/fair: Consider RT/IRQ pressure in capacity_spare_wake() capacity_spare_wake() in the slow path influences choice of idlest groups, as we search for groups with maximum spare capacity. In scenarios where RT pressure is high, a sub optimal group can be chosen and hurt performance of the task being woken up. Fix this by using capacity_of() instead of capacity_orig_of() in capacity_spare_wake(). Tests results from improvements with this change are below. More tests were also done by myself and Matt Fleming to ensure no degradation in different benchmarks. 1) Rohit ran barrier.c test (details below) with following improvements: ------------------------------------------------------------------------ This was Rohit's original use case for a patch he posted at [1] however from his recent tests he showed my patch can replace his slow path changes [1] and there's no need to selectively scan/skip CPUs in find_idlest_group_cpu in the slow path to get the improvement he sees. barrier.c (open_mp code) as a micro-benchmark. It does a number of iterations and barrier sync at the end of each for loop. Here barrier,c is running in along with ping on CPU 0 and 1 as: 'ping -l 10000 -q -s 10 -f hostX' barrier.c can be found at: http://www.spinics.net/lists/kernel/msg2506955.html Following are the results for the iterations per second with this micro-benchmark (higher is better), on a 44 core, 2 socket 88 Threads Intel x86 machine: +--------+------------------+---------------------------+ |Threads | Without patch | With patch | | | | | +--------+--------+---------+-----------------+---------+ | | Mean | Std Dev | Mean | Std Dev | +--------+--------+---------+-----------------+---------+ |1 | 539.36 | 60.16 | 572.54 (+6.15%) | 40.95 | |2 | 481.01 | 19.32 | 530.64 (+10.32%)| 56.16 | |4 | 474.78 | 22.28 | 479.46 (+0.99%) | 18.89 | |8 | 450.06 | 24.91 | 447.82 (-0.50%) | 12.36 | |16 | 436.99 | 22.57 | 441.88 (+1.12%) | 7.39 | |32 | 388.28 | 55.59 | 429.4 (+10.59%)| 31.14 | |64 | 314.62 | 6.33 | 311.81 (-0.89%) | 11.99 | +--------+--------+---------+-----------------+---------+ 2) ping+hackbench test on bare-metal sever (by Rohit) ----------------------------------------------------- Here hackbench is running in threaded mode along with, running ping on CPU 0 and 1 as: 'ping -l 10000 -q -s 10 -f hostX' This test is running on 2 socket, 20 core and 40 threads Intel x86 machine: Number of loops is 10000 and runtime is in seconds (Lower is better). +--------------+-----------------+--------------------------+ |Task Groups | Without patch | With patch | | +-------+---------+----------------+---------+ |(Groups of 40)| Mean | Std Dev | Mean | Std Dev | +--------------+-------+---------+----------------+---------+ |1 | 0.851 | 0.007 | 0.828 (+2.77%)| 0.032 | |2 | 1.083 | 0.203 | 1.087 (-0.37%)| 0.246 | |4 | 1.601 | 0.051 | 1.611 (-0.62%)| 0.055 | |8 | 2.837 | 0.060 | 2.827 (+0.35%)| 0.031 | |16 | 5.139 | 0.133 | 5.107 (+0.63%)| 0.085 | |25 | 7.569 | 0.142 | 7.503 (+0.88%)| 0.143 | +--------------+-------+---------+----------------+---------+ [1] https://patchwork.kernel.org/patch/9991635/ Matt Fleming also ran several different hackbench tests and cyclic test to santiy-check that the patch doesn't harm other usecases. Tested-by: Matt Fleming Tested-by: Rohit Jain Signed-off-by: Joel Fernandes Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Reviewed-by: Dietmar Eggemann Cc: Atish Patra Cc: Brendan Jackman Cc: Chris Redpath Cc: Frederic Weisbecker Cc: Juri Lelli Cc: Len Brown Cc: Linus Torvalds Cc: Morten Ramussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Saravana Kannan Cc: Srinivas Pandruvada Cc: Steve Muckle Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vikram Mulukutla Cc: Viresh Kumar Link: http://lkml.kernel.org/r/20171214212158.188190-1-joelaf@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index de43bd80a98f..6e775ac39eb4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5770,7 +5770,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p); static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) { - return capacity_orig_of(cpu) - cpu_util_wake(cpu, p); + return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); } /* -- cgit v1.2.3 From 6257e7047890084fbeeb84c641200b43f0668abc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 13 Dec 2017 15:23:20 +0530 Subject: sched/cpufreq: Initialize sg_cpu->flags to 0 Initializing sg_cpu->flags to SCHED_CPUFREQ_RT has no obvious benefit. The flags field wouldn't be used until the utilization update handler is called for the first time, and once that is called we will overwrite flags anyway. Initialize it to 0. Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juri Lelli Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael Wysocki Cc: Thomas Gleixner Cc: Vincent Guittot Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: morten.rasmussen@arm.com Cc: tkjos@android.com Link: http://lkml.kernel.org/r/763feda6424ced8486b25a0c52979634e6104478.1513158452.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d6717a3331a1..22d4630142ab 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -655,7 +655,7 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->flags = SCHED_CPUFREQ_RT; + sg_cpu->flags = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; } -- cgit v1.2.3 From 5083452f8c7a11577e83842596f97625abbc9c8e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 13 Dec 2017 15:23:22 +0530 Subject: sched/cpufreq: Don't pass flags to sugov_set_iowait_boost() We are already passing sg_cpu as argument to sugov_set_iowait_boost() helper and the same can be used to retrieve the flags value. Get rid of the redundant argument. Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael Wysocki Cc: Thomas Gleixner Cc: Vincent Guittot Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: morten.rasmussen@arm.com Cc: tkjos@android.com Link: http://lkml.kernel.org/r/4ec5562b1a87e146ebab11fb5dde1ca9c763a7fb.1513158452.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 22d4630142ab..6dd1ec9e2995 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -187,10 +187,9 @@ static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) *max = cfs_max; } -static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, - unsigned int flags) +static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) { - if (flags & SCHED_CPUFREQ_IOWAIT) { + if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) { if (sg_cpu->iowait_boost_pending) return; @@ -264,7 +263,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned int next_f; bool busy; - sugov_set_iowait_boost(sg_cpu, time, flags); + sugov_set_iowait_boost(sg_cpu, time); sg_cpu->last_update = time; if (!sugov_should_update_freq(sg_policy, time)) @@ -349,7 +348,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->max = max; sg_cpu->flags = flags; - sugov_set_iowait_boost(sg_cpu, time, flags); + sugov_set_iowait_boost(sg_cpu, time); sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { -- cgit v1.2.3 From 18cec7e0ddd5e28b7722f7049d715873373be3e9 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 15 Dec 2017 07:39:44 -0800 Subject: sched/fair: Remove impossible condition from find_idlest_group_cpu() find_idlest_group_cpu() goes through CPUs of a group previous selected by find_idlest_group(). find_idlest_group() returns NULL if the local group is the selected one and doesn't execute find_idlest_group_cpu if the group to which 'cpu' belongs to is chosen. So we're always guaranteed to call find_idlest_group_cpu() with a group to which 'cpu' is non-local. This makes one of the conditions in find_idlest_group_cpu() an impossible one, which we can get rid off. Signed-off-by: Joel Fernandes Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Brendan Jackman Reviewed-by: Vincent Guittot Cc: Android Kernel Cc: Atish Patra Cc: Chris Redpath Cc: Dietmar Eggemann Cc: EAS Dev Cc: Frederic Weisbecker Cc: Josef Bacik Cc: Juri Lelli Cc: Len Brown Cc: Linus Torvalds Cc: Morten Ramussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rohit Jain Cc: Saravana Kannan Cc: Srinivas Pandruvada Cc: Steve Muckle Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vikram Mulukutla Cc: Viresh Kumar Link: http://lkml.kernel.org/r/20171215153944.220146-3-joelaf@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6e775ac39eb4..3e7606d3ad0f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5950,7 +5950,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(cpu_rq(i)); - if (load < min_load || (load == min_load && i == this_cpu)) { + if (load < min_load) { min_load = load; least_loaded_cpu = i; } -- cgit v1.2.3 From 9783be2c0e90bbaceec3c471c4fb017bff7293ba Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 15 Dec 2017 07:39:43 -0800 Subject: sched/fair: Correct obsolete comment about cpufreq_update_util() Since the remote cpufreq callback work, the cpufreq_update_util() call can happen from remote CPUs. The comment about local CPUs is thus obsolete. Update it accordingly. Signed-off-by: Joel Fernandes Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Viresh Kumar Cc: Android Kernel Cc: Atish Patra Cc: Chris Redpath Cc: Dietmar Eggemann Cc: EAS Dev Cc: Frederic Weisbecker Cc: Josef Bacik Cc: Juri Lelli Cc: Len Brown Cc: Linus Torvalds Cc: Morten Ramussen Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rohit Jain Cc: Saravana Kannan Cc: Srinivas Pandruvada Cc: Steve Muckle Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vikram Mulukutla Cc: Vincent Guittot Link: http://lkml.kernel.org/r/20171215153944.220146-2-joelaf@google.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e7606d3ad0f..59e66a5848d0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3020,9 +3020,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be - * a real problem -- added to that it only calls on the local - * CPU, so if we enqueue remotely we'll miss an update, but - * the next tick/schedule should update. + * a real problem. * * It will not get called when we go idle, because the idle * thread is a different class (!fair), nor will the utilization -- cgit v1.2.3 From 7332dec055f2457c386032f7e9b2991eb05c2a0a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 19 Dec 2017 08:59:47 +0000 Subject: sched/fair: Only immediately migrate tasks due to interrupts if prev and target CPUs share cache If waking from an idle CPU due to an interrupt then it's possible that the waker task will be pulled to wake on the current CPU. Unfortunately, depending on the type of interrupt and IRQ configuration, there may not be a strong relationship between the CPU an interrupt was delivered on and the CPU a task was running on. For example, the interrupts could all be delivered to CPUs on one particular node due to the machine topology or IRQ affinity configuration. Another example is an interrupt for an IO completion which can be delivered to any CPU where there is no guarantee the data is either cache hot or even local. This patch was motivated by the observation that an IO workload was being pulled cross-node on a frequent basis when IO completed. From a wakeup latency perspective, it's still useful to know that an idle CPU is immediately available for use but lets only consider an automatic migration if the CPUs share cache to limit damage due to NUMA migrations. Migrations may still occur if wake_affine_weight determines it's appropriate. These are the throughput results for dbench running on ext4 comparing 4.15-rc3 and this patch on a 2-socket machine where interrupts due to IO completions can happen on any CPU. 4.15.0-rc3 4.15.0-rc3 vanilla lessmigrate Hmean 1 854.64 ( 0.00%) 865.01 ( 1.21%) Hmean 2 1229.60 ( 0.00%) 1274.44 ( 3.65%) Hmean 4 1591.81 ( 0.00%) 1628.08 ( 2.28%) Hmean 8 1845.04 ( 0.00%) 1831.80 ( -0.72%) Hmean 16 2038.61 ( 0.00%) 2091.44 ( 2.59%) Hmean 32 2327.19 ( 0.00%) 2430.29 ( 4.43%) Hmean 64 2570.61 ( 0.00%) 2568.54 ( -0.08%) Hmean 128 2481.89 ( 0.00%) 2499.28 ( 0.70%) Stddev 1 14.31 ( 0.00%) 5.35 ( 62.65%) Stddev 2 21.29 ( 0.00%) 11.09 ( 47.92%) Stddev 4 7.22 ( 0.00%) 6.80 ( 5.92%) Stddev 8 26.70 ( 0.00%) 9.41 ( 64.76%) Stddev 16 22.40 ( 0.00%) 20.01 ( 10.70%) Stddev 32 45.13 ( 0.00%) 44.74 ( 0.85%) Stddev 64 93.10 ( 0.00%) 93.18 ( -0.09%) Stddev 128 184.28 ( 0.00%) 177.85 ( 3.49%) Note the small increase in throughput for low thread counts but also note that the standard deviation for each sample during the test run is lower. The throughput figures for dbench can be misleading so the benchmark is actually modified to time the latency of the processing of one load file with many samples taken. The difference in latency is 4.15.0-rc3 4.15.0-rc3 vanilla lessmigrate Amean 1 21.71 ( 0.00%) 21.47 ( 1.08%) Amean 2 30.89 ( 0.00%) 29.58 ( 4.26%) Amean 4 47.54 ( 0.00%) 46.61 ( 1.97%) Amean 8 82.71 ( 0.00%) 82.81 ( -0.12%) Amean 16 149.45 ( 0.00%) 145.01 ( 2.97%) Amean 32 265.49 ( 0.00%) 248.43 ( 6.42%) Amean 64 463.23 ( 0.00%) 463.55 ( -0.07%) Amean 128 933.97 ( 0.00%) 935.50 ( -0.16%) Stddev 1 1.58 ( 0.00%) 1.54 ( 2.26%) Stddev 2 2.84 ( 0.00%) 2.95 ( -4.15%) Stddev 4 6.78 ( 0.00%) 6.85 ( -0.99%) Stddev 8 16.85 ( 0.00%) 16.37 ( 2.85%) Stddev 16 41.59 ( 0.00%) 41.04 ( 1.32%) Stddev 32 111.05 ( 0.00%) 105.11 ( 5.35%) Stddev 64 285.94 ( 0.00%) 288.01 ( -0.72%) Stddev 128 803.39 ( 0.00%) 809.73 ( -0.79%) It's a small improvement which is not surprising given that migrations that migrate to a different node as not that common. However, it is noticeable in the CPU migration statistics which are reduced by 24%. There was a query for v1 of this patch about NAS so here are the results for C-class using MPI for parallelisation on the same machine nas-mpi 4.15.0-rc3 4.15.0-rc3 vanilla noirq Time cg.C 24.25 ( 0.00%) 23.17 ( 4.45%) Time ep.C 8.22 ( 0.00%) 8.29 ( -0.85%) Time ft.C 22.67 ( 0.00%) 20.34 ( 10.28%) Time is.C 1.42 ( 0.00%) 1.47 ( -3.52%) Time lu.C 55.62 ( 0.00%) 54.81 ( 1.46%) Time mg.C 7.93 ( 0.00%) 7.91 ( 0.25%) 4.15.0-rc3 4.15.0-rc3 vanilla noirq-v1r1 User 3799.96 3748.34 System 672.10 626.15 Elapsed 91.91 79.49 lu.C sees a small gain, ft.C a large gain and ep.C and is.C see small regressions but in terms of absolute time, the difference is small and likely within run-to-run variance. System CPU usage is slightly reduced. schbench from Facebook was also requested. This is a bit of a mixed bag but it's important to note that this workload should not be heavily impacted by wakeups from interrupt context. 4.15.0-rc3 4.15.0-rc3 vanilla noirq-v1r1 Lat 50.00th-qrtle-1 41.00 ( 0.00%) 41.00 ( 0.00%) Lat 75.00th-qrtle-1 42.00 ( 0.00%) 42.00 ( 0.00%) Lat 90.00th-qrtle-1 43.00 ( 0.00%) 44.00 ( -2.33%) Lat 95.00th-qrtle-1 44.00 ( 0.00%) 46.00 ( -4.55%) Lat 99.00th-qrtle-1 57.00 ( 0.00%) 58.00 ( -1.75%) Lat 99.50th-qrtle-1 59.00 ( 0.00%) 59.00 ( 0.00%) Lat 99.90th-qrtle-1 67.00 ( 0.00%) 78.00 ( -16.42%) Lat 50.00th-qrtle-2 40.00 ( 0.00%) 51.00 ( -27.50%) Lat 75.00th-qrtle-2 45.00 ( 0.00%) 56.00 ( -24.44%) Lat 90.00th-qrtle-2 53.00 ( 0.00%) 59.00 ( -11.32%) Lat 95.00th-qrtle-2 57.00 ( 0.00%) 61.00 ( -7.02%) Lat 99.00th-qrtle-2 67.00 ( 0.00%) 71.00 ( -5.97%) Lat 99.50th-qrtle-2 69.00 ( 0.00%) 74.00 ( -7.25%) Lat 99.90th-qrtle-2 83.00 ( 0.00%) 77.00 ( 7.23%) Lat 50.00th-qrtle-4 51.00 ( 0.00%) 51.00 ( 0.00%) Lat 75.00th-qrtle-4 57.00 ( 0.00%) 56.00 ( 1.75%) Lat 90.00th-qrtle-4 60.00 ( 0.00%) 59.00 ( 1.67%) Lat 95.00th-qrtle-4 62.00 ( 0.00%) 62.00 ( 0.00%) Lat 99.00th-qrtle-4 73.00 ( 0.00%) 72.00 ( 1.37%) Lat 99.50th-qrtle-4 76.00 ( 0.00%) 74.00 ( 2.63%) Lat 99.90th-qrtle-4 85.00 ( 0.00%) 78.00 ( 8.24%) Lat 50.00th-qrtle-8 54.00 ( 0.00%) 58.00 ( -7.41%) Lat 75.00th-qrtle-8 59.00 ( 0.00%) 62.00 ( -5.08%) Lat 90.00th-qrtle-8 65.00 ( 0.00%) 66.00 ( -1.54%) Lat 95.00th-qrtle-8 67.00 ( 0.00%) 70.00 ( -4.48%) Lat 99.00th-qrtle-8 78.00 ( 0.00%) 79.00 ( -1.28%) Lat 99.50th-qrtle-8 81.00 ( 0.00%) 80.00 ( 1.23%) Lat 99.90th-qrtle-8 116.00 ( 0.00%) 83.00 ( 28.45%) Lat 50.00th-qrtle-16 65.00 ( 0.00%) 64.00 ( 1.54%) Lat 75.00th-qrtle-16 77.00 ( 0.00%) 71.00 ( 7.79%) Lat 90.00th-qrtle-16 83.00 ( 0.00%) 82.00 ( 1.20%) Lat 95.00th-qrtle-16 87.00 ( 0.00%) 87.00 ( 0.00%) Lat 99.00th-qrtle-16 95.00 ( 0.00%) 96.00 ( -1.05%) Lat 99.50th-qrtle-16 99.00 ( 0.00%) 103.00 ( -4.04%) Lat 99.90th-qrtle-16 104.00 ( 0.00%) 122.00 ( -17.31%) Lat 50.00th-qrtle-32 71.00 ( 0.00%) 73.00 ( -2.82%) Lat 75.00th-qrtle-32 91.00 ( 0.00%) 92.00 ( -1.10%) Lat 90.00th-qrtle-32 108.00 ( 0.00%) 107.00 ( 0.93%) Lat 95.00th-qrtle-32 118.00 ( 0.00%) 115.00 ( 2.54%) Lat 99.00th-qrtle-32 134.00 ( 0.00%) 129.00 ( 3.73%) Lat 99.50th-qrtle-32 138.00 ( 0.00%) 133.00 ( 3.62%) Lat 99.90th-qrtle-32 149.00 ( 0.00%) 146.00 ( 2.01%) Lat 50.00th-qrtle-39 83.00 ( 0.00%) 81.00 ( 2.41%) Lat 75.00th-qrtle-39 105.00 ( 0.00%) 102.00 ( 2.86%) Lat 90.00th-qrtle-39 120.00 ( 0.00%) 119.00 ( 0.83%) Lat 95.00th-qrtle-39 129.00 ( 0.00%) 128.00 ( 0.78%) Lat 99.00th-qrtle-39 153.00 ( 0.00%) 149.00 ( 2.61%) Lat 99.50th-qrtle-39 166.00 ( 0.00%) 156.00 ( 6.02%) Lat 99.90th-qrtle-39 12304.00 ( 0.00%) 12848.00 ( -4.42%) When heavily loaded (e.g. 99.50th-qrtle-39 indicates 39 threads), there are small gains in many cases. Otherwise it depends on the quartile used where it can be bad -- e.g. 75.00th-qrtle-2. However, even these results are probably a co-incidence. For this workload, much depends on what node the threads get placed on and their relative locality and not wakeups from interrupt context. A larger component on how it behaves would be automatic NUMA balancing where a fault incurred to measure locality would be a much larger contributer to latency than the wakeup path. This is the results from an almost identical machine that happened to run the same test. They only differ in terms of storage which is irrelevant for this test. 4.15.0-rc3 4.15.0-rc3 vanilla noirq-v1r1 Lat 50.00th-qrtle-1 41.00 ( 0.00%) 41.00 ( 0.00%) Lat 75.00th-qrtle-1 42.00 ( 0.00%) 42.00 ( 0.00%) Lat 90.00th-qrtle-1 44.00 ( 0.00%) 43.00 ( 2.27%) Lat 95.00th-qrtle-1 53.00 ( 0.00%) 45.00 ( 15.09%) Lat 99.00th-qrtle-1 59.00 ( 0.00%) 58.00 ( 1.69%) Lat 99.50th-qrtle-1 60.00 ( 0.00%) 59.00 ( 1.67%) Lat 99.90th-qrtle-1 86.00 ( 0.00%) 61.00 ( 29.07%) Lat 50.00th-qrtle-2 52.00 ( 0.00%) 41.00 ( 21.15%) Lat 75.00th-qrtle-2 57.00 ( 0.00%) 46.00 ( 19.30%) Lat 90.00th-qrtle-2 60.00 ( 0.00%) 53.00 ( 11.67%) Lat 95.00th-qrtle-2 62.00 ( 0.00%) 57.00 ( 8.06%) Lat 99.00th-qrtle-2 73.00 ( 0.00%) 68.00 ( 6.85%) Lat 99.50th-qrtle-2 74.00 ( 0.00%) 71.00 ( 4.05%) Lat 99.90th-qrtle-2 90.00 ( 0.00%) 75.00 ( 16.67%) Lat 50.00th-qrtle-4 57.00 ( 0.00%) 52.00 ( 8.77%) Lat 75.00th-qrtle-4 60.00 ( 0.00%) 58.00 ( 3.33%) Lat 90.00th-qrtle-4 62.00 ( 0.00%) 62.00 ( 0.00%) Lat 95.00th-qrtle-4 65.00 ( 0.00%) 65.00 ( 0.00%) Lat 99.00th-qrtle-4 76.00 ( 0.00%) 75.00 ( 1.32%) Lat 99.50th-qrtle-4 77.00 ( 0.00%) 77.00 ( 0.00%) Lat 99.90th-qrtle-4 87.00 ( 0.00%) 81.00 ( 6.90%) Lat 50.00th-qrtle-8 59.00 ( 0.00%) 57.00 ( 3.39%) Lat 75.00th-qrtle-8 63.00 ( 0.00%) 62.00 ( 1.59%) Lat 90.00th-qrtle-8 66.00 ( 0.00%) 67.00 ( -1.52%) Lat 95.00th-qrtle-8 68.00 ( 0.00%) 70.00 ( -2.94%) Lat 99.00th-qrtle-8 79.00 ( 0.00%) 80.00 ( -1.27%) Lat 99.50th-qrtle-8 80.00 ( 0.00%) 84.00 ( -5.00%) Lat 99.90th-qrtle-8 84.00 ( 0.00%) 90.00 ( -7.14%) Lat 50.00th-qrtle-16 65.00 ( 0.00%) 65.00 ( 0.00%) Lat 75.00th-qrtle-16 77.00 ( 0.00%) 75.00 ( 2.60%) Lat 90.00th-qrtle-16 84.00 ( 0.00%) 83.00 ( 1.19%) Lat 95.00th-qrtle-16 88.00 ( 0.00%) 87.00 ( 1.14%) Lat 99.00th-qrtle-16 97.00 ( 0.00%) 96.00 ( 1.03%) Lat 99.50th-qrtle-16 100.00 ( 0.00%) 104.00 ( -4.00%) Lat 99.90th-qrtle-16 110.00 ( 0.00%) 126.00 ( -14.55%) Lat 50.00th-qrtle-32 70.00 ( 0.00%) 71.00 ( -1.43%) Lat 75.00th-qrtle-32 92.00 ( 0.00%) 94.00 ( -2.17%) Lat 90.00th-qrtle-32 110.00 ( 0.00%) 110.00 ( 0.00%) Lat 95.00th-qrtle-32 121.00 ( 0.00%) 118.00 ( 2.48%) Lat 99.00th-qrtle-32 135.00 ( 0.00%) 137.00 ( -1.48%) Lat 99.50th-qrtle-32 140.00 ( 0.00%) 146.00 ( -4.29%) Lat 99.90th-qrtle-32 150.00 ( 0.00%) 160.00 ( -6.67%) Lat 50.00th-qrtle-39 80.00 ( 0.00%) 71.00 ( 11.25%) Lat 75.00th-qrtle-39 102.00 ( 0.00%) 91.00 ( 10.78%) Lat 90.00th-qrtle-39 118.00 ( 0.00%) 108.00 ( 8.47%) Lat 95.00th-qrtle-39 128.00 ( 0.00%) 117.00 ( 8.59%) Lat 99.00th-qrtle-39 149.00 ( 0.00%) 133.00 ( 10.74%) Lat 99.50th-qrtle-39 160.00 ( 0.00%) 139.00 ( 13.12%) Lat 99.90th-qrtle-39 13808.00 ( 0.00%) 4920.00 ( 64.37%) Despite being nearly identical, it showed a variety of major gains so I'm not convinced that heavy emphasis should be placed on this particular workload in terms of evaluating this particular patch. Further evidence of this is the fact that testing on a UMA machine showed small gains/losses even though the patch should be a no-op on UMA. Signed-off-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171219085947.13136-2-mgorman@techsingularity.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 59e66a5848d0..9fec992410f7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5687,8 +5687,8 @@ static int wake_wide(struct task_struct *p) * soonest. For the purpose of speed we only consider the waking and previous * CPU. * - * wake_affine_idle() - only considers 'now', it check if the waking CPU is (or - * will be) idle. + * wake_affine_idle() - only considers 'now', it check if the waking CPU is + * cache-affine and is (or will be) idle. * * wake_affine_weight() - considers the weight to reflect the average * scheduling latency of the CPUs. This seems to work @@ -5699,7 +5699,13 @@ static bool wake_affine_idle(struct sched_domain *sd, struct task_struct *p, int this_cpu, int prev_cpu, int sync) { - if (idle_cpu(this_cpu)) + /* + * If this_cpu is idle, it implies the wakeup is from interrupt + * context. Only allow the move if cache is shared. Otherwise an + * interrupt intensive workload could force all tasks onto one + * node depending on the IO topology or IRQ affinity settings. + */ + if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) return true; if (sync && cpu_rq(this_cpu)->nr_running == 1) -- cgit v1.2.3 From 34be39305a77b8b1ec9f279163c7cdb6cc719b91 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 12 Dec 2017 12:10:24 +0100 Subject: sched/deadline: Implement "runtime overrun signal" support This patch adds the possibility of getting the delivery of a SIGXCPU signal whenever there is a runtime overrun. The request is done through the sched_flags field within the sched_attr structure. Forward port of https://lkml.org/lkml/2009/10/16/170 Tested-by: Mathieu Poirier Signed-off-by: Juri Lelli Signed-off-by: Claudio Scordino Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/1513077024-25461-1-git-send-email-claudio@evidence.eu.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ include/uapi/linux/sched.h | 5 +++++ kernel/sched/core.c | 3 +-- kernel/sched/deadline.c | 7 +++++++ kernel/time/posix-cpu-timers.c | 18 ++++++++++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d2588263a989..274a449c805a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -472,11 +472,15 @@ struct sched_dl_entity { * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. + * + * @dl_overrun tells if the task asked to be informed about runtime + * overruns. */ unsigned int dl_throttled : 1; unsigned int dl_boosted : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; + unsigned int dl_overrun : 1; /* * Bandwidth enforcement timer. Each -deadline task has its diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index 30a9e51bbb1e..22627f80063e 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -49,5 +49,10 @@ */ #define SCHED_FLAG_RESET_ON_FORK 0x01 #define SCHED_FLAG_RECLAIM 0x02 +#define SCHED_FLAG_DL_OVERRUN 0x04 + +#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \ + SCHED_FLAG_RECLAIM | \ + SCHED_FLAG_DL_OVERRUN) #endif /* _UAPI_LINUX_SCHED_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a794f8155cd5..e28391bf8b04 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4085,8 +4085,7 @@ recheck: return -EINVAL; } - if (attr->sched_flags & - ~(SCHED_FLAG_RESET_ON_FORK | SCHED_FLAG_RECLAIM)) + if (attr->sched_flags & ~SCHED_FLAG_ALL) return -EINVAL; /* diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 2473736c7616..4c666dbe5038 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1155,6 +1155,12 @@ static void update_curr_dl(struct rq *rq) throttle: if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { dl_se->dl_throttled = 1; + + /* If requested, inform the user about runtime overruns. */ + if (dl_runtime_exceeded(dl_se) && + (dl_se->flags & SCHED_FLAG_DL_OVERRUN)) + dl_se->dl_overrun = 1; + __dequeue_task_dl(rq, curr, 0); if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); @@ -2566,6 +2572,7 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; dl_se->dl_non_contending = 0; + dl_se->dl_overrun = 0; } bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 1f27887aa194..cf50ea34dbd1 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "posix-timers.h" @@ -791,6 +792,14 @@ check_timers_list(struct list_head *timers, return 0; } +static inline void check_dl_overrun(struct task_struct *tsk) +{ + if (tsk->dl.dl_overrun) { + tsk->dl.dl_overrun = 0; + __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); + } +} + /* * Check for any per-thread CPU timers that have fired and move them off * the tsk->cpu_timers[N] list onto the firing list. Here we update the @@ -804,6 +813,9 @@ static void check_thread_timers(struct task_struct *tsk, u64 expires; unsigned long soft; + if (dl_task(tsk)) + check_dl_overrun(tsk); + /* * If cputime_expires is zero, then there are no active * per thread CPU timers. @@ -906,6 +918,9 @@ static void check_process_timers(struct task_struct *tsk, struct task_cputime cputime; unsigned long soft; + if (dl_task(tsk)) + check_dl_overrun(tsk); + /* * If cputimer is not running, then there are no active * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). @@ -1111,6 +1126,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk) return 1; } + if (dl_task(tsk) && tsk->dl.dl_overrun) + return 1; + return 0; } -- cgit v1.2.3 From d4edd662ac1657126df7ffd74a278958b133a77d Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:18 +0100 Subject: sched/cpufreq: Use the DEADLINE utilization signal SCHED_DEADLINE tracks active utilization signal with a per dl_rq variable named running_bw. Make use of that to drive CPU frequency selection: add up FAIR and DEADLINE contribution to get the required CPU capacity to handle both requirements (while RT still selects max frequency). Co-authored-by: Claudio Scordino Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-2-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched/cpufreq.h | 2 -- kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++---------- kernel/sched/sched.h | 10 ++++++++++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index d1ad3d825561..0b55834efd46 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -12,8 +12,6 @@ #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) -#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL) - #ifdef CONFIG_CPU_FREQ struct update_util_data { void (*func)(struct update_util_data *data, u64 time, unsigned int flags); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 6dd1ec9e2995..8d266bc5c67d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -179,12 +179,17 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) { struct rq *rq = cpu_rq(cpu); - unsigned long cfs_max; + unsigned long util_cfs = cpu_util_cfs(rq); + unsigned long util_dl = cpu_util_dl(rq); - cfs_max = arch_scale_cpu_capacity(NULL, cpu); + *max = arch_scale_cpu_capacity(NULL, cpu); - *util = min(rq->cfs.avg.util_avg, cfs_max); - *max = cfs_max; + /* + * Ideally we would like to set util_dl as min/guaranteed freq and + * util_cfs + util_dl as requested freq. However, cpufreq is not yet + * ready for such an interface. So, we only do the latter for now. + */ + *util = min(util_cfs + util_dl, *max); } static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) @@ -271,7 +276,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, busy = sugov_cpu_is_busy(sg_cpu); - if (flags & SCHED_CPUFREQ_RT_DL) { + if (flags & SCHED_CPUFREQ_RT) { next_f = policy->cpuinfo.max_freq; } else { sugov_get_util(&util, &max, sg_cpu->cpu); @@ -316,7 +321,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) j_sg_cpu->iowait_boost_pending = false; continue; } - if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL) + if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) return policy->cpuinfo.max_freq; j_util = j_sg_cpu->util; @@ -352,7 +357,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - if (flags & SCHED_CPUFREQ_RT_DL) + if (flags & SCHED_CPUFREQ_RT) next_f = sg_policy->policy->cpuinfo.max_freq; else next_f = sugov_next_freq_shared(sg_cpu, time); @@ -382,9 +387,9 @@ static void sugov_irq_work(struct irq_work *irq_work) sg_policy = container_of(irq_work, struct sugov_policy, irq_work); /* - * For RT and deadline tasks, the schedutil governor shoots the - * frequency to maximum. Special care must be taken to ensure that this - * kthread doesn't result in the same behavior. + * For RT tasks, the schedutil governor shoots the frequency to maximum. + * Special care must be taken to ensure that this kthread doesn't result + * in the same behavior. * * This is (mostly) guaranteed by the work_in_progress flag. The flag is * updated only at the end of the sugov_work() function and before that diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 43f5d6e936bb..136ab500daeb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2084,3 +2084,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #else /* arch_scale_freq_capacity */ #define arch_scale_freq_invariant() (false) #endif + +static inline unsigned long cpu_util_dl(struct rq *rq) +{ + return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; +} + +static inline unsigned long cpu_util_cfs(struct rq *rq) +{ + return rq->cfs.avg.util_avg; +} -- cgit v1.2.3 From e0367b12674bf4420870cd0237e3ebafb2ec9593 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:19 +0100 Subject: sched/deadline: Move CPU frequency selection triggering points Since SCHED_DEADLINE doesn't track utilization signal (but reserves a fraction of CPU bandwidth to tasks admitted to the system), there is no point in evaluating frequency changes during each tick event. Move frequency selection triggering points to where running_bw changes. Co-authored-by: Claudio Scordino Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Viresh Kumar Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-3-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 7 ++++--- kernel/sched/sched.h | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 4c666dbe5038..f584837b32e7 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -86,6 +86,8 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) dl_rq->running_bw += dl_bw; SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL); } static inline @@ -98,6 +100,8 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ if (dl_rq->running_bw > old) dl_rq->running_bw = 0; + /* kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL); } static inline @@ -1134,9 +1138,6 @@ static void update_curr_dl(struct rq *rq) return; } - /* kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, SCHED_CPUFREQ_DL); - schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 136ab500daeb..863964fbcfd2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2055,14 +2055,14 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data); * The way cpufreq is currently arranged requires it to evaluate the CPU * performance state (frequency/voltage) on a regular basis to prevent it from * being stuck in a completely inadequate performance level for too long. - * That is not guaranteed to happen if the updates are only triggered from CFS, - * though, because they may not be coming in if RT or deadline tasks are active - * all the time (or there are RT and DL tasks only). + * That is not guaranteed to happen if the updates are only triggered from CFS + * and DL, though, because they may not be coming in if only RT tasks are + * active all the time (or there are RT tasks only). * - * As a workaround for that issue, this function is called by the RT and DL - * sched classes to trigger extra cpufreq updates to prevent it from stalling, + * As a workaround for that issue, this function is called periodically by the + * RT sched class to trigger extra cpufreq updates to prevent it from stalling, * but that really is a band-aid. Going forward it should be replaced with - * solutions targeted more specifically at RT and DL tasks. + * solutions targeted more specifically at RT tasks. */ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { -- cgit v1.2.3 From 0a5191efe06b5103909206e4fbcff81d30283f8e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 29 Oct 2017 16:27:21 +0100 Subject: MIPS: AR7: ensure the port type's FCR value is used Since commit aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers"), the port's default FCR value isn't used in serial8250_do_set_termios anymore, but copied over once in serial8250_config_port and then modified as needed. Unfortunately, serial8250_config_port will never be called if the port is shared between kernel and userspace, and the port's flag doesn't have UPF_BOOT_AUTOCONF, which would trigger a serial8250_config_port as well. This causes garbled output from userspace: [ 5.220000] random: procd urandom read with 49 bits of entropy available ers [kee Fix this by forcing it to be configured on boot, resulting in the expected output: [ 5.250000] random: procd urandom read with 50 bits of entropy available Press the [f] key and hit [enter] to enter failsafe mode Press the [1], [2], [3] or [4] key and hit [enter] to select the debug level Fixes: aef9a7bd9b67 ("serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO buffers") Signed-off-by: Jonas Gorski Cc: Greg Kroah-Hartman Cc: Yoshihiro YUNOMAE Cc: Florian Fainelli Cc: Nicolas Schichan Cc: linux-mips@linux-mips.org Cc: linux-serial@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17544/ Signed-off-by: Ralf Baechle --- arch/mips/ar7/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 4674f1efbe7a..e1675c25d5d4 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void) uart_port.type = PORT_AR7; uart_port.uartclk = clk_get_rate(bus_clk) / 2; uart_port.iotype = UPIO_MEM32; - uart_port.flags = UPF_FIXED_TYPE; + uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF; uart_port.regshift = 2; uart_port.line = 0; -- cgit v1.2.3 From a71615792d0b341500a9d1d7b374d19e3d443a12 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:14 +0100 Subject: ASoC: add bindings for stm32 DFSDM filter Add bindings that describes audio settings to support Digital Filter for pulse density modulation(PDM) microphone. Signed-off-by: Arnaud Pouliquen Acked-by: Rob Herring Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/st,stm32-adfsdm.txt | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt diff --git a/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt b/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt new file mode 100644 index 000000000000..864f5b00b031 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/st,stm32-adfsdm.txt @@ -0,0 +1,63 @@ +STMicroelectronics Audio Digital Filter Sigma Delta modulators(DFSDM) + +The DFSDM allows PDM microphones capture through SPI interface. The Audio +interface is seems as a sub block of the DFSDM device. +For details on DFSDM bindings refer to ../iio/adc/st,stm32-dfsdm-adc.txt + +Required properties: + - compatible: "st,stm32h7-dfsdm-dai". + + - #sound-dai-cells : Must be equal to 0 + + - io-channels : phandle to iio dfsdm instance node. + +Example of a sound card using audio DFSDM node. + + sound_card { + compatible = "audio-graph-card"; + + dais = <&cpu_port>; + }; + + dfsdm: dfsdm@40017000 { + compatible = "st,stm32h7-dfsdm"; + reg = <0x40017000 0x400>; + clocks = <&rcc DFSDM1_CK>; + clock-names = "dfsdm"; + #interrupt-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + + dfsdm_adc0: filter@0 { + compatible = "st,stm32-dfsdm-dmic"; + reg = <0>; + interrupts = <110>; + dmas = <&dmamux1 101 0x400 0x00>; + dma-names = "rx"; + st,adc-channels = <1>; + st,adc-channel-names = "dmic0"; + st,adc-channel-types = "SPI_R"; + st,adc-channel-clk-src = "CLKOUT"; + st,filter-order = <5>; + + dfsdm_dai0: dfsdm-dai { + compatible = "st,stm32h7-dfsdm-dai"; + #sound-dai-cells = <0>; + io-channels = <&dfsdm_adc0 0>; + cpu_port: port { + dfsdm_endpoint: endpoint { + remote-endpoint = <&dmic0_endpoint>; + }; + }; + }; + }; + + dmic0: dmic@0 { + compatible = "dmic-codec"; + #sound-dai-cells = <0>; + port { + dmic0_endpoint: endpoint { + remote-endpoint = <&dfsdm_endpoint>; + }; + }; + }; -- cgit v1.2.3 From 55da094824c4ef1d50bc591733d79448d00265bb Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Wed, 10 Jan 2018 11:13:15 +0100 Subject: ASoC: stm32: add DFSDM DAI support Add driver to handle DAI interface for PDM microphones connected to Digital Filter for Sigma Delta Modulators IP. Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- sound/soc/stm/Kconfig | 11 ++ sound/soc/stm/Makefile | 3 + sound/soc/stm/stm32_adfsdm.c | 347 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 361 insertions(+) create mode 100644 sound/soc/stm/stm32_adfsdm.c diff --git a/sound/soc/stm/Kconfig b/sound/soc/stm/Kconfig index 3398e6c57f37..a78f7700d489 100644 --- a/sound/soc/stm/Kconfig +++ b/sound/soc/stm/Kconfig @@ -28,4 +28,15 @@ config SND_SOC_STM32_SPDIFRX help Say Y if you want to enable S/PDIF capture for STM32 +config SND_SOC_STM32_DFSDM + tristate "SoC Audio support for STM32 DFSDM" + depends on (ARCH_STM32 && OF && STM32_DFSDM_ADC) || COMPILE_TEST + depends on SND_SOC + select SND_SOC_GENERIC_DMAENGINE_PCM + select SND_SOC_DMIC + select IIO_BUFFER_CB + help + Select this option to enable the STM32 Digital Filter + for Sigma Delta Modulators (DFSDM) driver used + in various STM32 series for digital microphone capture. endmenu diff --git a/sound/soc/stm/Makefile b/sound/soc/stm/Makefile index 5b7f0fab0bd6..3143c0b47042 100644 --- a/sound/soc/stm/Makefile +++ b/sound/soc/stm/Makefile @@ -13,3 +13,6 @@ obj-$(CONFIG_SND_SOC_STM32_I2S) += snd-soc-stm32-i2s.o # SPDIFRX snd-soc-stm32-spdifrx-objs := stm32_spdifrx.o obj-$(CONFIG_SND_SOC_STM32_SPDIFRX) += snd-soc-stm32-spdifrx.o + +#DFSDM +obj-$(CONFIG_SND_SOC_STM32_DFSDM) += stm32_adfsdm.o diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c new file mode 100644 index 000000000000..af50891983c6 --- /dev/null +++ b/sound/soc/stm/stm32_adfsdm.c @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is part of STM32 DFSDM ASoC DAI driver + * + * Copyright (C) 2017, STMicroelectronics - All Rights Reserved + * Authors: Arnaud Pouliquen + * Olivier Moysan + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#define STM32_ADFSDM_DRV_NAME "stm32-adfsdm" + +#define DFSDM_MAX_PERIOD_SIZE (PAGE_SIZE / 2) +#define DFSDM_MAX_PERIODS 6 + +struct stm32_adfsdm_priv { + struct snd_soc_dai_driver dai_drv; + struct snd_pcm_substream *substream; + struct device *dev; + + /* IIO */ + struct iio_channel *iio_ch; + struct iio_cb_buffer *iio_cb; + bool iio_active; + + /* PCM buffer */ + unsigned char *pcm_buff; + unsigned int pos; +}; + +static const struct snd_pcm_hardware stm32_adfsdm_pcm_hw = { + .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_PAUSE, + .formats = SNDRV_PCM_FMTBIT_S32_LE, + + .rate_min = 8000, + .rate_max = 32000, + + .channels_min = 1, + .channels_max = 1, + + .periods_min = 2, + .periods_max = DFSDM_MAX_PERIODS, + + .period_bytes_max = DFSDM_MAX_PERIOD_SIZE, + .buffer_bytes_max = DFSDM_MAX_PERIODS * DFSDM_MAX_PERIOD_SIZE +}; + +static void stm32_adfsdm_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); + + if (priv->iio_active) { + iio_channel_stop_all_cb(priv->iio_cb); + priv->iio_active = false; + } +} + +static int stm32_adfsdm_dai_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); + int ret; + + ret = iio_write_channel_attribute(priv->iio_ch, + substream->runtime->rate, 0, + IIO_CHAN_INFO_SAMP_FREQ); + if (ret < 0) { + dev_err(dai->dev, "%s: Failed to set %d sampling rate\n", + __func__, substream->runtime->rate); + return ret; + } + + if (!priv->iio_active) { + ret = iio_channel_start_all_cb(priv->iio_cb); + if (!ret) + priv->iio_active = true; + else + dev_err(dai->dev, "%s: IIO channel start failed (%d)\n", + __func__, ret); + } + + return ret; +} + +static int stm32_adfsdm_set_sysclk(struct snd_soc_dai *dai, int clk_id, + unsigned int freq, int dir) +{ + struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(dai); + ssize_t size; + char str_freq[10]; + + dev_dbg(dai->dev, "%s: Enter for freq %d\n", __func__, freq); + + /* Set IIO frequency if CODEC is master as clock comes from SPI_IN */ + + snprintf(str_freq, sizeof(str_freq), "%d\n", freq); + size = iio_write_channel_ext_info(priv->iio_ch, "spi_clk_freq", + str_freq, sizeof(str_freq)); + if (size != sizeof(str_freq)) { + dev_err(dai->dev, "%s: Failed to set SPI clock\n", + __func__); + return -EINVAL; + } + return 0; +} + +static const struct snd_soc_dai_ops stm32_adfsdm_dai_ops = { + .shutdown = stm32_adfsdm_shutdown, + .prepare = stm32_adfsdm_dai_prepare, + .set_sysclk = stm32_adfsdm_set_sysclk, +}; + +static const struct snd_soc_dai_driver stm32_adfsdm_dai = { + .capture = { + .channels_min = 1, + .channels_max = 1, + .formats = SNDRV_PCM_FMTBIT_S32_LE, + .rates = (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 | + SNDRV_PCM_RATE_32000), + }, + .ops = &stm32_adfsdm_dai_ops, +}; + +static const struct snd_soc_component_driver stm32_adfsdm_dai_component = { + .name = "stm32_dfsdm_audio", +}; + +static int stm32_afsdm_pcm_cb(const void *data, size_t size, void *private) +{ + struct stm32_adfsdm_priv *priv = private; + struct snd_soc_pcm_runtime *rtd = priv->substream->private_data; + u8 *pcm_buff = priv->pcm_buff; + u8 *src_buff = (u8 *)data; + unsigned int buff_size = snd_pcm_lib_buffer_bytes(priv->substream); + unsigned int period_size = snd_pcm_lib_period_bytes(priv->substream); + unsigned int old_pos = priv->pos; + unsigned int cur_size = size; + + dev_dbg(rtd->dev, "%s: buff_add :%p, pos = %d, size = %zu\n", + __func__, &pcm_buff[priv->pos], priv->pos, size); + + if ((priv->pos + size) > buff_size) { + memcpy(&pcm_buff[priv->pos], src_buff, buff_size - priv->pos); + cur_size -= buff_size - priv->pos; + priv->pos = 0; + } + + memcpy(&pcm_buff[priv->pos], &src_buff[size - cur_size], cur_size); + priv->pos = (priv->pos + cur_size) % buff_size; + + if (cur_size != size || (old_pos && (old_pos % period_size < size))) + snd_pcm_period_elapsed(priv->substream); + + return 0; +} + +static int stm32_adfsdm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct stm32_adfsdm_priv *priv = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + priv->pos = 0; + return stm32_dfsdm_get_buff_cb(priv->iio_ch->indio_dev, + stm32_afsdm_pcm_cb, priv); + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_STOP: + return stm32_dfsdm_release_buff_cb(priv->iio_ch->indio_dev); + } + + return -EINVAL; +} + +static int stm32_adfsdm_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct stm32_adfsdm_priv *priv = snd_soc_dai_get_drvdata(rtd->cpu_dai); + int ret; + + ret = snd_soc_set_runtime_hwparams(substream, &stm32_adfsdm_pcm_hw); + if (!ret) + priv->substream = substream; + + return ret; +} + +static int stm32_adfsdm_pcm_close(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct stm32_adfsdm_priv *priv = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + + snd_pcm_lib_free_pages(substream); + priv->substream = NULL; + + return 0; +} + +static snd_pcm_uframes_t stm32_adfsdm_pcm_pointer( + struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct stm32_adfsdm_priv *priv = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + + return bytes_to_frames(substream->runtime, priv->pos); +} + +static int stm32_adfsdm_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct stm32_adfsdm_priv *priv = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + int ret; + + ret = snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); + if (ret < 0) + return ret; + priv->pcm_buff = substream->runtime->dma_area; + + return iio_channel_cb_set_buffer_watermark(priv->iio_cb, + params_period_size(params)); +} + +static int stm32_adfsdm_pcm_hw_free(struct snd_pcm_substream *substream) +{ + snd_pcm_lib_free_pages(substream); + + return 0; +} + +static struct snd_pcm_ops stm32_adfsdm_pcm_ops = { + .open = stm32_adfsdm_pcm_open, + .close = stm32_adfsdm_pcm_close, + .hw_params = stm32_adfsdm_pcm_hw_params, + .hw_free = stm32_adfsdm_pcm_hw_free, + .trigger = stm32_adfsdm_trigger, + .pointer = stm32_adfsdm_pcm_pointer, +}; + +static int stm32_adfsdm_pcm_new(struct snd_soc_pcm_runtime *rtd) +{ + struct snd_pcm *pcm = rtd->pcm; + struct stm32_adfsdm_priv *priv = + snd_soc_dai_get_drvdata(rtd->cpu_dai); + unsigned int size = DFSDM_MAX_PERIODS * DFSDM_MAX_PERIOD_SIZE; + + return snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + priv->dev, size, size); +} + +static void stm32_adfsdm_pcm_free(struct snd_pcm *pcm) +{ + struct snd_pcm_substream *substream; + struct snd_soc_pcm_runtime *rtd; + struct stm32_adfsdm_priv *priv; + + substream = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream; + if (substream) { + rtd = substream->private_data; + priv = snd_soc_dai_get_drvdata(rtd->cpu_dai); + + snd_pcm_lib_preallocate_free_for_all(pcm); + } +} + +static struct snd_soc_platform_driver stm32_adfsdm_soc_platform = { + .ops = &stm32_adfsdm_pcm_ops, + .pcm_new = stm32_adfsdm_pcm_new, + .pcm_free = stm32_adfsdm_pcm_free, +}; + +static const struct of_device_id stm32_adfsdm_of_match[] = { + {.compatible = "st,stm32h7-dfsdm-dai"}, + {} +}; +MODULE_DEVICE_TABLE(of, stm32_adfsdm_of_match); + +static int stm32_adfsdm_probe(struct platform_device *pdev) +{ + struct stm32_adfsdm_priv *priv; + int ret; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = &pdev->dev; + priv->dai_drv = stm32_adfsdm_dai; + + dev_set_drvdata(&pdev->dev, priv); + + ret = devm_snd_soc_register_component(&pdev->dev, + &stm32_adfsdm_dai_component, + &priv->dai_drv, 1); + if (ret < 0) + return ret; + + /* Associate iio channel */ + priv->iio_ch = devm_iio_channel_get_all(&pdev->dev); + if (IS_ERR(priv->iio_ch)) + return PTR_ERR(priv->iio_ch); + + priv->iio_cb = iio_channel_get_all_cb(&pdev->dev, NULL, NULL); + if (IS_ERR(priv->iio_cb)) + return PTR_ERR(priv->iio_ch); + + ret = devm_snd_soc_register_platform(&pdev->dev, + &stm32_adfsdm_soc_platform); + if (ret < 0) + dev_err(&pdev->dev, "%s: Failed to register PCM platform\n", + __func__); + + return ret; +} + +static struct platform_driver stm32_adfsdm_driver = { + .driver = { + .name = STM32_ADFSDM_DRV_NAME, + .of_match_table = stm32_adfsdm_of_match, + }, + .probe = stm32_adfsdm_probe, +}; + +module_platform_driver(stm32_adfsdm_driver); + +MODULE_DESCRIPTION("stm32 DFSDM DAI driver"); +MODULE_AUTHOR("Arnaud Pouliquen "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" STM32_ADFSDM_DRV_NAME); -- cgit v1.2.3 From 05f4434bc13030ca67f229b8defd37f12a05d1fa Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 10 Jan 2018 15:59:34 +0530 Subject: ASoC: Intel: remove mfld_machine mfld_machine was not getting compiled due to missed Makefile changes. Since no one complained it is safe to assume that it is not being used, so remove it Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 14 -- sound/soc/intel/boards/mfld_machine.c | 430 ---------------------------------- 2 files changed, 444 deletions(-) delete mode 100644 sound/soc/intel/boards/mfld_machine.c diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index 12761d8fd8a5..de598dcbef30 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -12,20 +12,6 @@ menuconfig SND_SOC_INTEL_MACH if SND_SOC_INTEL_MACH -if SND_SST_ATOM_HIFI2_PLATFORM_PCI - -config SND_MFLD_MACHINE - tristate "Medfield (Intel MID)" - depends on INTEL_SCU_IPC - select SND_SOC_SN95031 - help - This adds support for ASoC machine driver for Intel(R) MID Medfield platform - used as alsa device in audio substem in Intel(R) MID devices - Say Y if you have such a device. - If unsure select "N". - -endif ## SND_SST_ATOM_HIFI2_PLATFORM_PCI - if SND_SOC_INTEL_HASWELL config SND_SOC_INTEL_HASWELL_MACH diff --git a/sound/soc/intel/boards/mfld_machine.c b/sound/soc/intel/boards/mfld_machine.c deleted file mode 100644 index 7cb44fdde1ee..000000000000 --- a/sound/soc/intel/boards/mfld_machine.c +++ /dev/null @@ -1,430 +0,0 @@ -/* - * mfld_machine.c - ASoc Machine driver for Intel Medfield MID platform - * - * Copyright (C) 2010 Intel Corp - * Author: Vinod Koul - * Author: Harsha Priya - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../codecs/sn95031.h" - -#define MID_MONO 1 -#define MID_STEREO 2 -#define MID_MAX_CAP 5 -#define MFLD_JACK_INSERT 0x04 - -enum soc_mic_bias_zones { - MFLD_MV_START = 0, - /* mic bias volutage range for Headphones*/ - MFLD_MV_HP = 400, - /* mic bias volutage range for American Headset*/ - MFLD_MV_AM_HS = 650, - /* mic bias volutage range for Headset*/ - MFLD_MV_HS = 2000, - MFLD_MV_UNDEFINED, -}; - -static unsigned int hs_switch; -static unsigned int lo_dac; -static struct snd_soc_codec *mfld_codec; - -struct mfld_mc_private { - void __iomem *int_base; - u8 interrupt_status; -}; - -struct snd_soc_jack mfld_jack; - -/*Headset jack detection DAPM pins */ -static struct snd_soc_jack_pin mfld_jack_pins[] = { - { - .pin = "Headphones", - .mask = SND_JACK_HEADPHONE, - }, - { - .pin = "AMIC1", - .mask = SND_JACK_MICROPHONE, - }, -}; - -/* jack detection voltage zones */ -static struct snd_soc_jack_zone mfld_zones[] = { - {MFLD_MV_START, MFLD_MV_AM_HS, SND_JACK_HEADPHONE}, - {MFLD_MV_AM_HS, MFLD_MV_HS, SND_JACK_HEADSET}, -}; - -/* sound card controls */ -static const char * const headset_switch_text[] = {"Earpiece", "Headset"}; - -static const char * const lo_text[] = {"Vibra", "Headset", "IHF", "None"}; - -static const struct soc_enum headset_enum = - SOC_ENUM_SINGLE_EXT(2, headset_switch_text); - -static const struct soc_enum lo_enum = - SOC_ENUM_SINGLE_EXT(4, lo_text); - -static int headset_get_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - ucontrol->value.enumerated.item[0] = hs_switch; - return 0; -} - -static int headset_set_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = &card->dapm; - - if (ucontrol->value.enumerated.item[0] == hs_switch) - return 0; - - snd_soc_dapm_mutex_lock(dapm); - - if (ucontrol->value.enumerated.item[0]) { - pr_debug("hs_set HS path\n"); - snd_soc_dapm_enable_pin_unlocked(dapm, "Headphones"); - snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT"); - } else { - pr_debug("hs_set EP path\n"); - snd_soc_dapm_disable_pin_unlocked(dapm, "Headphones"); - snd_soc_dapm_enable_pin_unlocked(dapm, "EPOUT"); - } - - snd_soc_dapm_sync_unlocked(dapm); - - snd_soc_dapm_mutex_unlock(dapm); - - hs_switch = ucontrol->value.enumerated.item[0]; - - return 0; -} - -static void lo_enable_out_pins(struct snd_soc_dapm_context *dapm) -{ - snd_soc_dapm_enable_pin_unlocked(dapm, "IHFOUTL"); - snd_soc_dapm_enable_pin_unlocked(dapm, "IHFOUTR"); - snd_soc_dapm_enable_pin_unlocked(dapm, "LINEOUTL"); - snd_soc_dapm_enable_pin_unlocked(dapm, "LINEOUTR"); - snd_soc_dapm_enable_pin_unlocked(dapm, "VIB1OUT"); - snd_soc_dapm_enable_pin_unlocked(dapm, "VIB2OUT"); - if (hs_switch) { - snd_soc_dapm_enable_pin_unlocked(dapm, "Headphones"); - snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT"); - } else { - snd_soc_dapm_disable_pin_unlocked(dapm, "Headphones"); - snd_soc_dapm_enable_pin_unlocked(dapm, "EPOUT"); - } -} - -static int lo_get_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - ucontrol->value.enumerated.item[0] = lo_dac; - return 0; -} - -static int lo_set_switch(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); - struct snd_soc_dapm_context *dapm = &card->dapm; - - if (ucontrol->value.enumerated.item[0] == lo_dac) - return 0; - - snd_soc_dapm_mutex_lock(dapm); - - /* we dont want to work with last state of lineout so just enable all - * pins and then disable pins not required - */ - lo_enable_out_pins(dapm); - - switch (ucontrol->value.enumerated.item[0]) { - case 0: - pr_debug("set vibra path\n"); - snd_soc_dapm_disable_pin_unlocked(dapm, "VIB1OUT"); - snd_soc_dapm_disable_pin_unlocked(dapm, "VIB2OUT"); - snd_soc_update_bits(mfld_codec, SN95031_LOCTL, 0x66, 0); - break; - - case 1: - pr_debug("set hs path\n"); - snd_soc_dapm_disable_pin_unlocked(dapm, "Headphones"); - snd_soc_dapm_disable_pin_unlocked(dapm, "EPOUT"); - snd_soc_update_bits(mfld_codec, SN95031_LOCTL, 0x66, 0x22); - break; - - case 2: - pr_debug("set spkr path\n"); - snd_soc_dapm_disable_pin_unlocked(dapm, "IHFOUTL"); - snd_soc_dapm_disable_pin_unlocked(dapm, "IHFOUTR"); - snd_soc_update_bits(mfld_codec, SN95031_LOCTL, 0x66, 0x44); - break; - - case 3: - pr_debug("set null path\n"); - snd_soc_dapm_disable_pin_unlocked(dapm, "LINEOUTL"); - snd_soc_dapm_disable_pin_unlocked(dapm, "LINEOUTR"); - snd_soc_update_bits(mfld_codec, SN95031_LOCTL, 0x66, 0x66); - break; - } - - snd_soc_dapm_sync_unlocked(dapm); - - snd_soc_dapm_mutex_unlock(dapm); - - lo_dac = ucontrol->value.enumerated.item[0]; - return 0; -} - -static const struct snd_kcontrol_new mfld_snd_controls[] = { - SOC_ENUM_EXT("Playback Switch", headset_enum, - headset_get_switch, headset_set_switch), - SOC_ENUM_EXT("Lineout Mux", lo_enum, - lo_get_switch, lo_set_switch), -}; - -static const struct snd_soc_dapm_widget mfld_widgets[] = { - SND_SOC_DAPM_HP("Headphones", NULL), - SND_SOC_DAPM_MIC("Mic", NULL), -}; - -static const struct snd_soc_dapm_route mfld_map[] = { - {"Headphones", NULL, "HPOUTR"}, - {"Headphones", NULL, "HPOUTL"}, - {"Mic", NULL, "AMIC1"}, -}; - -static void mfld_jack_check(unsigned int intr_status) -{ - struct mfld_jack_data jack_data; - - if (!mfld_codec) - return; - - jack_data.mfld_jack = &mfld_jack; - jack_data.intr_id = intr_status; - - sn95031_jack_detection(mfld_codec, &jack_data); - /* TODO: add american headset detection post gpiolib support */ -} - -static int mfld_init(struct snd_soc_pcm_runtime *runtime) -{ - struct snd_soc_dapm_context *dapm = &runtime->card->dapm; - int ret_val; - - /* default is earpiece pin, userspace sets it explcitly */ - snd_soc_dapm_disable_pin(dapm, "Headphones"); - /* default is lineout NC, userspace sets it explcitly */ - snd_soc_dapm_disable_pin(dapm, "LINEOUTL"); - snd_soc_dapm_disable_pin(dapm, "LINEOUTR"); - lo_dac = 3; - hs_switch = 0; - /* we dont use linein in this so set to NC */ - snd_soc_dapm_disable_pin(dapm, "LINEINL"); - snd_soc_dapm_disable_pin(dapm, "LINEINR"); - - /* Headset and button jack detection */ - ret_val = snd_soc_card_jack_new(runtime->card, - "Intel(R) MID Audio Jack", SND_JACK_HEADSET | - SND_JACK_BTN_0 | SND_JACK_BTN_1, &mfld_jack, - mfld_jack_pins, ARRAY_SIZE(mfld_jack_pins)); - if (ret_val) { - pr_err("jack creation failed\n"); - return ret_val; - } - - ret_val = snd_soc_jack_add_zones(&mfld_jack, - ARRAY_SIZE(mfld_zones), mfld_zones); - if (ret_val) { - pr_err("adding jack zones failed\n"); - return ret_val; - } - - mfld_codec = runtime->codec; - - /* we want to check if anything is inserted at boot, - * so send a fake event to codec and it will read adc - * to find if anything is there or not */ - mfld_jack_check(MFLD_JACK_INSERT); - return ret_val; -} - -static struct snd_soc_dai_link mfld_msic_dailink[] = { - { - .name = "Medfield Headset", - .stream_name = "Headset", - .cpu_dai_name = "Headset-cpu-dai", - .codec_dai_name = "SN95031 Headset", - .codec_name = "sn95031", - .platform_name = "sst-platform", - .init = mfld_init, - }, - { - .name = "Medfield Speaker", - .stream_name = "Speaker", - .cpu_dai_name = "Speaker-cpu-dai", - .codec_dai_name = "SN95031 Speaker", - .codec_name = "sn95031", - .platform_name = "sst-platform", - .init = NULL, - }, - { - .name = "Medfield Vibra", - .stream_name = "Vibra1", - .cpu_dai_name = "Vibra1-cpu-dai", - .codec_dai_name = "SN95031 Vibra1", - .codec_name = "sn95031", - .platform_name = "sst-platform", - .init = NULL, - }, - { - .name = "Medfield Haptics", - .stream_name = "Vibra2", - .cpu_dai_name = "Vibra2-cpu-dai", - .codec_dai_name = "SN95031 Vibra2", - .codec_name = "sn95031", - .platform_name = "sst-platform", - .init = NULL, - }, - { - .name = "Medfield Compress", - .stream_name = "Speaker", - .cpu_dai_name = "Compress-cpu-dai", - .codec_dai_name = "SN95031 Speaker", - .codec_name = "sn95031", - .platform_name = "sst-platform", - .init = NULL, - }, -}; - -/* SoC card */ -static struct snd_soc_card snd_soc_card_mfld = { - .name = "medfield_audio", - .owner = THIS_MODULE, - .dai_link = mfld_msic_dailink, - .num_links = ARRAY_SIZE(mfld_msic_dailink), - - .controls = mfld_snd_controls, - .num_controls = ARRAY_SIZE(mfld_snd_controls), - .dapm_widgets = mfld_widgets, - .num_dapm_widgets = ARRAY_SIZE(mfld_widgets), - .dapm_routes = mfld_map, - .num_dapm_routes = ARRAY_SIZE(mfld_map), -}; - -static irqreturn_t snd_mfld_jack_intr_handler(int irq, void *dev) -{ - struct mfld_mc_private *mc_private = (struct mfld_mc_private *) dev; - - memcpy_fromio(&mc_private->interrupt_status, - ((void *)(mc_private->int_base)), - sizeof(u8)); - return IRQ_WAKE_THREAD; -} - -static irqreturn_t snd_mfld_jack_detection(int irq, void *data) -{ - struct mfld_mc_private *mc_drv_ctx = (struct mfld_mc_private *) data; - - mfld_jack_check(mc_drv_ctx->interrupt_status); - - return IRQ_HANDLED; -} - -static int snd_mfld_mc_probe(struct platform_device *pdev) -{ - int ret_val = 0, irq; - struct mfld_mc_private *mc_drv_ctx; - struct resource *irq_mem; - - pr_debug("snd_mfld_mc_probe called\n"); - - /* retrive the irq number */ - irq = platform_get_irq(pdev, 0); - if (irq <= 0) - return irq < 0 ? irq : -ENODEV; - - /* audio interrupt base of SRAM location where - * interrupts are stored by System FW */ - mc_drv_ctx = devm_kzalloc(&pdev->dev, sizeof(*mc_drv_ctx), GFP_ATOMIC); - if (!mc_drv_ctx) - return -ENOMEM; - - irq_mem = platform_get_resource_byname( - pdev, IORESOURCE_MEM, "IRQ_BASE"); - if (!irq_mem) { - pr_err("no mem resource given\n"); - return -ENODEV; - } - mc_drv_ctx->int_base = devm_ioremap_nocache(&pdev->dev, irq_mem->start, - resource_size(irq_mem)); - if (!mc_drv_ctx->int_base) { - pr_err("Mapping of cache failed\n"); - return -ENOMEM; - } - /* register for interrupt */ - ret_val = devm_request_threaded_irq(&pdev->dev, irq, - snd_mfld_jack_intr_handler, - snd_mfld_jack_detection, - IRQF_SHARED, pdev->dev.driver->name, mc_drv_ctx); - if (ret_val) { - pr_err("cannot register IRQ\n"); - return ret_val; - } - /* register the soc card */ - snd_soc_card_mfld.dev = &pdev->dev; - ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_mfld); - if (ret_val) { - pr_debug("snd_soc_register_card failed %d\n", ret_val); - return ret_val; - } - platform_set_drvdata(pdev, mc_drv_ctx); - pr_debug("successfully exited probe\n"); - return 0; -} - -static struct platform_driver snd_mfld_mc_driver = { - .driver = { - .name = "msic_audio", - }, - .probe = snd_mfld_mc_probe, -}; - -module_platform_driver(snd_mfld_mc_driver); - -MODULE_DESCRIPTION("ASoC Intel(R) MID Machine driver"); -MODULE_AUTHOR("Vinod Koul "); -MODULE_AUTHOR("Harsha Priya "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:msic-audio"); -- cgit v1.2.3 From 987da3fe175933c28aab2293505c3597052ff0e2 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Wed, 10 Jan 2018 15:59:35 +0530 Subject: ASoC: sn95031: remove this code This codec was used in MFLD systems in the PMIC chip, we no longer have users for this, so remove it Signed-off-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 4 - sound/soc/codecs/Makefile | 1 - sound/soc/codecs/sn95031.c | 936 --------------------------------------------- sound/soc/codecs/sn95031.h | 133 ------- 4 files changed, 1074 deletions(-) delete mode 100644 sound/soc/codecs/sn95031.c delete mode 100644 sound/soc/codecs/sn95031.h diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index a42ddbc93f3d..3ed2b985b38b 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -133,7 +133,6 @@ config SND_SOC_ALL_CODECS select SND_SOC_SGTL5000 if I2C select SND_SOC_SI476X if MFD_SI476X_CORE select SND_SOC_SIRF_AUDIO_CODEC - select SND_SOC_SN95031 if INTEL_SCU_IPC select SND_SOC_SPDIF select SND_SOC_SSM2518 if I2C select SND_SOC_SSM2602_SPI if SPI_MASTER @@ -818,9 +817,6 @@ config SND_SOC_SIRF_AUDIO_CODEC tristate "SiRF SoC internal audio codec" select REGMAP_MMIO -config SND_SOC_SN95031 - tristate - config SND_SOC_SPDIF tristate "S/PDIF CODEC" diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 0001069ce2a7..ae25cbe85d1d 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -140,7 +140,6 @@ snd-soc-sigmadsp-i2c-objs := sigmadsp-i2c.o snd-soc-sigmadsp-regmap-objs := sigmadsp-regmap.o snd-soc-si476x-objs := si476x.o snd-soc-sirf-audio-codec-objs := sirf-audio-codec.o -snd-soc-sn95031-objs := sn95031.o snd-soc-spdif-tx-objs := spdif_transmitter.o snd-soc-spdif-rx-objs := spdif_receiver.o snd-soc-ssm2518-objs := ssm2518.o diff --git a/sound/soc/codecs/sn95031.c b/sound/soc/codecs/sn95031.c deleted file mode 100644 index 887923e68849..000000000000 --- a/sound/soc/codecs/sn95031.c +++ /dev/null @@ -1,936 +0,0 @@ -/* - * sn95031.c - TI sn95031 Codec driver - * - * Copyright (C) 2010 Intel Corp - * Author: Vinod Koul - * Author: Harsha Priya - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include "sn95031.h" - -#define SN95031_RATES (SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_44100) -#define SN95031_FORMATS (SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S16_LE) - -/* adc helper functions */ - -/* enables mic bias voltage */ -static void sn95031_enable_mic_bias(struct snd_soc_codec *codec) -{ - snd_soc_write(codec, SN95031_VAUD, BIT(2)|BIT(1)|BIT(0)); - snd_soc_update_bits(codec, SN95031_MICBIAS, BIT(2), BIT(2)); -} - -/* Enable/Disable the ADC depending on the argument */ -static void configure_adc(struct snd_soc_codec *sn95031_codec, int val) -{ - int value = snd_soc_read(sn95031_codec, SN95031_ADC1CNTL1); - - if (val) { - /* Enable and start the ADC */ - value |= (SN95031_ADC_ENBL | SN95031_ADC_START); - value &= (~SN95031_ADC_NO_LOOP); - } else { - /* Just stop the ADC */ - value &= (~SN95031_ADC_START); - } - snd_soc_write(sn95031_codec, SN95031_ADC1CNTL1, value); -} - -/* - * finds an empty channel for conversion - * If the ADC is not enabled then start using 0th channel - * itself. Otherwise find an empty channel by looking for a - * channel in which the stopbit is set to 1. returns the index - * of the first free channel if succeeds or an error code. - * - * Context: can sleep - * - */ -static int find_free_channel(struct snd_soc_codec *sn95031_codec) -{ - int i, value; - - /* check whether ADC is enabled */ - value = snd_soc_read(sn95031_codec, SN95031_ADC1CNTL1); - - if ((value & SN95031_ADC_ENBL) == 0) - return 0; - - /* ADC is already enabled; Looking for an empty channel */ - for (i = 0; i < SN95031_ADC_CHANLS_MAX; i++) { - value = snd_soc_read(sn95031_codec, - SN95031_ADC_CHNL_START_ADDR + i); - if (value & SN95031_STOPBIT_MASK) - break; - } - return (i == SN95031_ADC_CHANLS_MAX) ? (-EINVAL) : i; -} - -/* Initialize the ADC for reading micbias values. Can sleep. */ -static int sn95031_initialize_adc(struct snd_soc_codec *sn95031_codec) -{ - int base_addr, chnl_addr; - int value; - int channel_index; - - /* Index of the first channel in which the stop bit is set */ - channel_index = find_free_channel(sn95031_codec); - if (channel_index < 0) { - pr_err("No free ADC channels"); - return channel_index; - } - - base_addr = SN95031_ADC_CHNL_START_ADDR + channel_index; - - if (!(channel_index == 0 || channel_index == SN95031_ADC_LOOP_MAX)) { - /* Reset stop bit for channels other than 0 and 12 */ - value = snd_soc_read(sn95031_codec, base_addr); - /* Set the stop bit to zero */ - snd_soc_write(sn95031_codec, base_addr, value & 0xEF); - /* Index of the first free channel */ - base_addr++; - channel_index++; - } - - /* Since this is the last channel, set the stop bit - to 1 by ORing the DIE_SENSOR_CODE with 0x10 */ - snd_soc_write(sn95031_codec, base_addr, - SN95031_AUDIO_DETECT_CODE | 0x10); - - chnl_addr = SN95031_ADC_DATA_START_ADDR + 2 * channel_index; - pr_debug("mid_initialize : %x", chnl_addr); - configure_adc(sn95031_codec, 1); - return chnl_addr; -} - - -/* reads the ADC registers and gets the mic bias value in mV. */ -static unsigned int sn95031_get_mic_bias(struct snd_soc_codec *codec) -{ - u16 adc_adr = sn95031_initialize_adc(codec); - u16 adc_val1, adc_val2; - unsigned int mic_bias; - - sn95031_enable_mic_bias(codec); - - /* Enable the sound card for conversion before reading */ - snd_soc_write(codec, SN95031_ADC1CNTL3, 0x05); - /* Re-toggle the RRDATARD bit */ - snd_soc_write(codec, SN95031_ADC1CNTL3, 0x04); - - /* Read the higher bits of data */ - msleep(1000); - adc_val1 = snd_soc_read(codec, adc_adr); - adc_adr++; - adc_val2 = snd_soc_read(codec, adc_adr); - - /* Adding lower two bits to the higher bits */ - mic_bias = (adc_val1 << 2) + (adc_val2 & 3); - mic_bias = (mic_bias * SN95031_ADC_ONE_LSB_MULTIPLIER) / 1000; - pr_debug("mic bias = %dmV\n", mic_bias); - return mic_bias; -} -/*end - adc helper functions */ - -static int sn95031_read(void *ctx, unsigned int reg, unsigned int *val) -{ - u8 value = 0; - int ret; - - ret = intel_scu_ipc_ioread8(reg, &value); - if (ret == 0) - *val = value; - - return ret; -} - -static int sn95031_write(void *ctx, unsigned int reg, unsigned int value) -{ - return intel_scu_ipc_iowrite8(reg, value); -} - -static const struct regmap_config sn95031_regmap = { - .reg_read = sn95031_read, - .reg_write = sn95031_write, -}; - -static int sn95031_set_vaud_bias(struct snd_soc_codec *codec, - enum snd_soc_bias_level level) -{ - switch (level) { - case SND_SOC_BIAS_ON: - break; - - case SND_SOC_BIAS_PREPARE: - if (snd_soc_codec_get_bias_level(codec) == SND_SOC_BIAS_STANDBY) { - pr_debug("vaud_bias powering up pll\n"); - /* power up the pll */ - snd_soc_write(codec, SN95031_AUDPLLCTRL, BIT(5)); - /* enable pcm 2 */ - snd_soc_update_bits(codec, SN95031_PCM2C2, - BIT(0), BIT(0)); - } - break; - - case SND_SOC_BIAS_STANDBY: - switch (snd_soc_codec_get_bias_level(codec)) { - case SND_SOC_BIAS_OFF: - pr_debug("vaud_bias power up rail\n"); - /* power up the rail */ - snd_soc_write(codec, SN95031_VAUD, - BIT(2)|BIT(1)|BIT(0)); - msleep(1); - break; - case SND_SOC_BIAS_PREPARE: - /* turn off pcm */ - pr_debug("vaud_bias power dn pcm\n"); - snd_soc_update_bits(codec, SN95031_PCM2C2, BIT(0), 0); - snd_soc_write(codec, SN95031_AUDPLLCTRL, 0); - break; - default: - break; - } - break; - - - case SND_SOC_BIAS_OFF: - pr_debug("vaud_bias _OFF doing rail shutdown\n"); - snd_soc_write(codec, SN95031_VAUD, BIT(3)); - break; - } - - return 0; -} - -static int sn95031_vhs_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); - - if (SND_SOC_DAPM_EVENT_ON(event)) { - pr_debug("VHS SND_SOC_DAPM_EVENT_ON doing rail startup now\n"); - /* power up the rail */ - snd_soc_write(codec, SN95031_VHSP, 0x3D); - snd_soc_write(codec, SN95031_VHSN, 0x3F); - msleep(1); - } else if (SND_SOC_DAPM_EVENT_OFF(event)) { - pr_debug("VHS SND_SOC_DAPM_EVENT_OFF doing rail shutdown\n"); - snd_soc_write(codec, SN95031_VHSP, 0xC4); - snd_soc_write(codec, SN95031_VHSN, 0x04); - } - return 0; -} - -static int sn95031_vihf_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); - - if (SND_SOC_DAPM_EVENT_ON(event)) { - pr_debug("VIHF SND_SOC_DAPM_EVENT_ON doing rail startup now\n"); - /* power up the rail */ - snd_soc_write(codec, SN95031_VIHF, 0x27); - msleep(1); - } else if (SND_SOC_DAPM_EVENT_OFF(event)) { - pr_debug("VIHF SND_SOC_DAPM_EVENT_OFF doing rail shutdown\n"); - snd_soc_write(codec, SN95031_VIHF, 0x24); - } - return 0; -} - -static int sn95031_dmic12_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); - unsigned int ldo = 0, clk_dir = 0, data_dir = 0; - - if (SND_SOC_DAPM_EVENT_ON(event)) { - ldo = BIT(5)|BIT(4); - clk_dir = BIT(0); - data_dir = BIT(7); - } - /* program DMIC LDO, clock and set clock */ - snd_soc_update_bits(codec, SN95031_MICBIAS, BIT(5)|BIT(4), ldo); - snd_soc_update_bits(codec, SN95031_DMICBUF0123, BIT(0), clk_dir); - snd_soc_update_bits(codec, SN95031_DMICBUF0123, BIT(7), data_dir); - return 0; -} - -static int sn95031_dmic34_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); - unsigned int ldo = 0, clk_dir = 0, data_dir = 0; - - if (SND_SOC_DAPM_EVENT_ON(event)) { - ldo = BIT(5)|BIT(4); - clk_dir = BIT(2); - data_dir = BIT(1); - } - /* program DMIC LDO, clock and set clock */ - snd_soc_update_bits(codec, SN95031_MICBIAS, BIT(5)|BIT(4), ldo); - snd_soc_update_bits(codec, SN95031_DMICBUF0123, BIT(2), clk_dir); - snd_soc_update_bits(codec, SN95031_DMICBUF45, BIT(1), data_dir); - return 0; -} - -static int sn95031_dmic56_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(w->dapm); - unsigned int ldo = 0; - - if (SND_SOC_DAPM_EVENT_ON(event)) - ldo = BIT(7)|BIT(6); - - /* program DMIC LDO */ - snd_soc_update_bits(codec, SN95031_MICBIAS, BIT(7)|BIT(6), ldo); - return 0; -} - -/* mux controls */ -static const char *sn95031_mic_texts[] = { "AMIC", "LineIn" }; - -static SOC_ENUM_SINGLE_DECL(sn95031_micl_enum, - SN95031_ADCCONFIG, 1, sn95031_mic_texts); - -static const struct snd_kcontrol_new sn95031_micl_mux_control = - SOC_DAPM_ENUM("Route", sn95031_micl_enum); - -static SOC_ENUM_SINGLE_DECL(sn95031_micr_enum, - SN95031_ADCCONFIG, 3, sn95031_mic_texts); - -static const struct snd_kcontrol_new sn95031_micr_mux_control = - SOC_DAPM_ENUM("Route", sn95031_micr_enum); - -static const char *sn95031_input_texts[] = { "DMIC1", "DMIC2", "DMIC3", - "DMIC4", "DMIC5", "DMIC6", - "ADC Left", "ADC Right" }; - -static SOC_ENUM_SINGLE_DECL(sn95031_input1_enum, - SN95031_AUDIOMUX12, 0, sn95031_input_texts); - -static const struct snd_kcontrol_new sn95031_input1_mux_control = - SOC_DAPM_ENUM("Route", sn95031_input1_enum); - -static SOC_ENUM_SINGLE_DECL(sn95031_input2_enum, - SN95031_AUDIOMUX12, 4, sn95031_input_texts); - -static const struct snd_kcontrol_new sn95031_input2_mux_control = - SOC_DAPM_ENUM("Route", sn95031_input2_enum); - -static SOC_ENUM_SINGLE_DECL(sn95031_input3_enum, - SN95031_AUDIOMUX34, 0, sn95031_input_texts); - -static const struct snd_kcontrol_new sn95031_input3_mux_control = - SOC_DAPM_ENUM("Route", sn95031_input3_enum); - -static SOC_ENUM_SINGLE_DECL(sn95031_input4_enum, - SN95031_AUDIOMUX34, 4, sn95031_input_texts); - -static const struct snd_kcontrol_new sn95031_input4_mux_control = - SOC_DAPM_ENUM("Route", sn95031_input4_enum); - -/* capture path controls */ - -static const char *sn95031_micmode_text[] = {"Single Ended", "Differential"}; - -/* 0dB to 30dB in 10dB steps */ -static const DECLARE_TLV_DB_SCALE(mic_tlv, 0, 10, 0); - -static SOC_ENUM_SINGLE_DECL(sn95031_micmode1_enum, - SN95031_MICAMP1, 1, sn95031_micmode_text); -static SOC_ENUM_SINGLE_DECL(sn95031_micmode2_enum, - SN95031_MICAMP2, 1, sn95031_micmode_text); - -static const char *sn95031_dmic_cfg_text[] = {"GPO", "DMIC"}; - -static SOC_ENUM_SINGLE_DECL(sn95031_dmic12_cfg_enum, - SN95031_DMICMUX, 0, sn95031_dmic_cfg_text); -static SOC_ENUM_SINGLE_DECL(sn95031_dmic34_cfg_enum, - SN95031_DMICMUX, 1, sn95031_dmic_cfg_text); -static SOC_ENUM_SINGLE_DECL(sn95031_dmic56_cfg_enum, - SN95031_DMICMUX, 2, sn95031_dmic_cfg_text); - -static const struct snd_kcontrol_new sn95031_snd_controls[] = { - SOC_ENUM("Mic1Mode Capture Route", sn95031_micmode1_enum), - SOC_ENUM("Mic2Mode Capture Route", sn95031_micmode2_enum), - SOC_ENUM("DMIC12 Capture Route", sn95031_dmic12_cfg_enum), - SOC_ENUM("DMIC34 Capture Route", sn95031_dmic34_cfg_enum), - SOC_ENUM("DMIC56 Capture Route", sn95031_dmic56_cfg_enum), - SOC_SINGLE_TLV("Mic1 Capture Volume", SN95031_MICAMP1, - 2, 4, 0, mic_tlv), - SOC_SINGLE_TLV("Mic2 Capture Volume", SN95031_MICAMP2, - 2, 4, 0, mic_tlv), -}; - -/* DAPM widgets */ -static const struct snd_soc_dapm_widget sn95031_dapm_widgets[] = { - - /* all end points mic, hs etc */ - SND_SOC_DAPM_OUTPUT("HPOUTL"), - SND_SOC_DAPM_OUTPUT("HPOUTR"), - SND_SOC_DAPM_OUTPUT("EPOUT"), - SND_SOC_DAPM_OUTPUT("IHFOUTL"), - SND_SOC_DAPM_OUTPUT("IHFOUTR"), - SND_SOC_DAPM_OUTPUT("LINEOUTL"), - SND_SOC_DAPM_OUTPUT("LINEOUTR"), - SND_SOC_DAPM_OUTPUT("VIB1OUT"), - SND_SOC_DAPM_OUTPUT("VIB2OUT"), - - SND_SOC_DAPM_INPUT("AMIC1"), /* headset mic */ - SND_SOC_DAPM_INPUT("AMIC2"), - SND_SOC_DAPM_INPUT("DMIC1"), - SND_SOC_DAPM_INPUT("DMIC2"), - SND_SOC_DAPM_INPUT("DMIC3"), - SND_SOC_DAPM_INPUT("DMIC4"), - SND_SOC_DAPM_INPUT("DMIC5"), - SND_SOC_DAPM_INPUT("DMIC6"), - SND_SOC_DAPM_INPUT("LINEINL"), - SND_SOC_DAPM_INPUT("LINEINR"), - - SND_SOC_DAPM_MICBIAS("AMIC1Bias", SN95031_MICBIAS, 2, 0), - SND_SOC_DAPM_MICBIAS("AMIC2Bias", SN95031_MICBIAS, 3, 0), - SND_SOC_DAPM_MICBIAS("DMIC12Bias", SN95031_DMICMUX, 3, 0), - SND_SOC_DAPM_MICBIAS("DMIC34Bias", SN95031_DMICMUX, 4, 0), - SND_SOC_DAPM_MICBIAS("DMIC56Bias", SN95031_DMICMUX, 5, 0), - - SND_SOC_DAPM_SUPPLY("DMIC12supply", SN95031_DMICLK, 0, 0, - sn95031_dmic12_event, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_SUPPLY("DMIC34supply", SN95031_DMICLK, 1, 0, - sn95031_dmic34_event, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_SUPPLY("DMIC56supply", SN95031_DMICLK, 2, 0, - sn95031_dmic56_event, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - - SND_SOC_DAPM_AIF_OUT("PCM_Out", "Capture", 0, - SND_SOC_NOPM, 0, 0), - - SND_SOC_DAPM_SUPPLY("Headset Rail", SND_SOC_NOPM, 0, 0, - sn95031_vhs_event, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_SUPPLY("Speaker Rail", SND_SOC_NOPM, 0, 0, - sn95031_vihf_event, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), - - /* playback path driver enables */ - SND_SOC_DAPM_PGA("Headset Left Playback", - SN95031_DRIVEREN, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("Headset Right Playback", - SN95031_DRIVEREN, 1, 0, NULL, 0), - SND_SOC_DAPM_PGA("Speaker Left Playback", - SN95031_DRIVEREN, 2, 0, NULL, 0), - SND_SOC_DAPM_PGA("Speaker Right Playback", - SN95031_DRIVEREN, 3, 0, NULL, 0), - SND_SOC_DAPM_PGA("Vibra1 Playback", - SN95031_DRIVEREN, 4, 0, NULL, 0), - SND_SOC_DAPM_PGA("Vibra2 Playback", - SN95031_DRIVEREN, 5, 0, NULL, 0), - SND_SOC_DAPM_PGA("Earpiece Playback", - SN95031_DRIVEREN, 6, 0, NULL, 0), - SND_SOC_DAPM_PGA("Lineout Left Playback", - SN95031_LOCTL, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("Lineout Right Playback", - SN95031_LOCTL, 4, 0, NULL, 0), - - /* playback path filter enable */ - SND_SOC_DAPM_PGA("Headset Left Filter", - SN95031_HSEPRXCTRL, 4, 0, NULL, 0), - SND_SOC_DAPM_PGA("Headset Right Filter", - SN95031_HSEPRXCTRL, 5, 0, NULL, 0), - SND_SOC_DAPM_PGA("Speaker Left Filter", - SN95031_IHFRXCTRL, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("Speaker Right Filter", - SN95031_IHFRXCTRL, 1, 0, NULL, 0), - - /* DACs */ - SND_SOC_DAPM_DAC("HSDAC Left", "Headset", - SN95031_DACCONFIG, 0, 0), - SND_SOC_DAPM_DAC("HSDAC Right", "Headset", - SN95031_DACCONFIG, 1, 0), - SND_SOC_DAPM_DAC("IHFDAC Left", "Speaker", - SN95031_DACCONFIG, 2, 0), - SND_SOC_DAPM_DAC("IHFDAC Right", "Speaker", - SN95031_DACCONFIG, 3, 0), - SND_SOC_DAPM_DAC("Vibra1 DAC", "Vibra1", - SN95031_VIB1C5, 1, 0), - SND_SOC_DAPM_DAC("Vibra2 DAC", "Vibra2", - SN95031_VIB2C5, 1, 0), - - /* capture widgets */ - SND_SOC_DAPM_PGA("LineIn Enable Left", SN95031_MICAMP1, - 7, 0, NULL, 0), - SND_SOC_DAPM_PGA("LineIn Enable Right", SN95031_MICAMP2, - 7, 0, NULL, 0), - - SND_SOC_DAPM_PGA("MIC1 Enable", SN95031_MICAMP1, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("MIC2 Enable", SN95031_MICAMP2, 0, 0, NULL, 0), - SND_SOC_DAPM_PGA("TX1 Enable", SN95031_AUDIOTXEN, 2, 0, NULL, 0), - SND_SOC_DAPM_PGA("TX2 Enable", SN95031_AUDIOTXEN, 3, 0, NULL, 0), - SND_SOC_DAPM_PGA("TX3 Enable", SN95031_AUDIOTXEN, 4, 0, NULL, 0), - SND_SOC_DAPM_PGA("TX4 Enable", SN95031_AUDIOTXEN, 5, 0, NULL, 0), - - /* ADC have null stream as they will be turned ON by TX path */ - SND_SOC_DAPM_ADC("ADC Left", NULL, - SN95031_ADCCONFIG, 0, 0), - SND_SOC_DAPM_ADC("ADC Right", NULL, - SN95031_ADCCONFIG, 2, 0), - - SND_SOC_DAPM_MUX("Mic_InputL Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_micl_mux_control), - SND_SOC_DAPM_MUX("Mic_InputR Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_micr_mux_control), - - SND_SOC_DAPM_MUX("Txpath1 Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_input1_mux_control), - SND_SOC_DAPM_MUX("Txpath2 Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_input2_mux_control), - SND_SOC_DAPM_MUX("Txpath3 Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_input3_mux_control), - SND_SOC_DAPM_MUX("Txpath4 Capture Route", - SND_SOC_NOPM, 0, 0, &sn95031_input4_mux_control), - -}; - -static const struct snd_soc_dapm_route sn95031_audio_map[] = { - /* headset and earpiece map */ - { "HPOUTL", NULL, "Headset Rail"}, - { "HPOUTR", NULL, "Headset Rail"}, - { "HPOUTL", NULL, "Headset Left Playback" }, - { "HPOUTR", NULL, "Headset Right Playback" }, - { "EPOUT", NULL, "Earpiece Playback" }, - { "Headset Left Playback", NULL, "Headset Left Filter"}, - { "Headset Right Playback", NULL, "Headset Right Filter"}, - { "Earpiece Playback", NULL, "Headset Left Filter"}, - { "Headset Left Filter", NULL, "HSDAC Left"}, - { "Headset Right Filter", NULL, "HSDAC Right"}, - - /* speaker map */ - { "IHFOUTL", NULL, "Speaker Rail"}, - { "IHFOUTR", NULL, "Speaker Rail"}, - { "IHFOUTL", NULL, "Speaker Left Playback"}, - { "IHFOUTR", NULL, "Speaker Right Playback"}, - { "Speaker Left Playback", NULL, "Speaker Left Filter"}, - { "Speaker Right Playback", NULL, "Speaker Right Filter"}, - { "Speaker Left Filter", NULL, "IHFDAC Left"}, - { "Speaker Right Filter", NULL, "IHFDAC Right"}, - - /* vibra map */ - { "VIB1OUT", NULL, "Vibra1 Playback"}, - { "Vibra1 Playback", NULL, "Vibra1 DAC"}, - - { "VIB2OUT", NULL, "Vibra2 Playback"}, - { "Vibra2 Playback", NULL, "Vibra2 DAC"}, - - /* lineout */ - { "LINEOUTL", NULL, "Lineout Left Playback"}, - { "LINEOUTR", NULL, "Lineout Right Playback"}, - { "Lineout Left Playback", NULL, "Headset Left Filter"}, - { "Lineout Left Playback", NULL, "Speaker Left Filter"}, - { "Lineout Left Playback", NULL, "Vibra1 DAC"}, - { "Lineout Right Playback", NULL, "Headset Right Filter"}, - { "Lineout Right Playback", NULL, "Speaker Right Filter"}, - { "Lineout Right Playback", NULL, "Vibra2 DAC"}, - - /* Headset (AMIC1) mic */ - { "AMIC1Bias", NULL, "AMIC1"}, - { "MIC1 Enable", NULL, "AMIC1Bias"}, - { "Mic_InputL Capture Route", "AMIC", "MIC1 Enable"}, - - /* AMIC2 */ - { "AMIC2Bias", NULL, "AMIC2"}, - { "MIC2 Enable", NULL, "AMIC2Bias"}, - { "Mic_InputR Capture Route", "AMIC", "MIC2 Enable"}, - - - /* Linein */ - { "LineIn Enable Left", NULL, "LINEINL"}, - { "LineIn Enable Right", NULL, "LINEINR"}, - { "Mic_InputL Capture Route", "LineIn", "LineIn Enable Left"}, - { "Mic_InputR Capture Route", "LineIn", "LineIn Enable Right"}, - - /* ADC connection */ - { "ADC Left", NULL, "Mic_InputL Capture Route"}, - { "ADC Right", NULL, "Mic_InputR Capture Route"}, - - /*DMIC connections */ - { "DMIC1", NULL, "DMIC12supply"}, - { "DMIC2", NULL, "DMIC12supply"}, - { "DMIC3", NULL, "DMIC34supply"}, - { "DMIC4", NULL, "DMIC34supply"}, - { "DMIC5", NULL, "DMIC56supply"}, - { "DMIC6", NULL, "DMIC56supply"}, - - { "DMIC12Bias", NULL, "DMIC1"}, - { "DMIC12Bias", NULL, "DMIC2"}, - { "DMIC34Bias", NULL, "DMIC3"}, - { "DMIC34Bias", NULL, "DMIC4"}, - { "DMIC56Bias", NULL, "DMIC5"}, - { "DMIC56Bias", NULL, "DMIC6"}, - - /*TX path inputs*/ - { "Txpath1 Capture Route", "ADC Left", "ADC Left"}, - { "Txpath2 Capture Route", "ADC Left", "ADC Left"}, - { "Txpath3 Capture Route", "ADC Left", "ADC Left"}, - { "Txpath4 Capture Route", "ADC Left", "ADC Left"}, - { "Txpath1 Capture Route", "ADC Right", "ADC Right"}, - { "Txpath2 Capture Route", "ADC Right", "ADC Right"}, - { "Txpath3 Capture Route", "ADC Right", "ADC Right"}, - { "Txpath4 Capture Route", "ADC Right", "ADC Right"}, - { "Txpath1 Capture Route", "DMIC1", "DMIC1"}, - { "Txpath2 Capture Route", "DMIC1", "DMIC1"}, - { "Txpath3 Capture Route", "DMIC1", "DMIC1"}, - { "Txpath4 Capture Route", "DMIC1", "DMIC1"}, - { "Txpath1 Capture Route", "DMIC2", "DMIC2"}, - { "Txpath2 Capture Route", "DMIC2", "DMIC2"}, - { "Txpath3 Capture Route", "DMIC2", "DMIC2"}, - { "Txpath4 Capture Route", "DMIC2", "DMIC2"}, - { "Txpath1 Capture Route", "DMIC3", "DMIC3"}, - { "Txpath2 Capture Route", "DMIC3", "DMIC3"}, - { "Txpath3 Capture Route", "DMIC3", "DMIC3"}, - { "Txpath4 Capture Route", "DMIC3", "DMIC3"}, - { "Txpath1 Capture Route", "DMIC4", "DMIC4"}, - { "Txpath2 Capture Route", "DMIC4", "DMIC4"}, - { "Txpath3 Capture Route", "DMIC4", "DMIC4"}, - { "Txpath4 Capture Route", "DMIC4", "DMIC4"}, - { "Txpath1 Capture Route", "DMIC5", "DMIC5"}, - { "Txpath2 Capture Route", "DMIC5", "DMIC5"}, - { "Txpath3 Capture Route", "DMIC5", "DMIC5"}, - { "Txpath4 Capture Route", "DMIC5", "DMIC5"}, - { "Txpath1 Capture Route", "DMIC6", "DMIC6"}, - { "Txpath2 Capture Route", "DMIC6", "DMIC6"}, - { "Txpath3 Capture Route", "DMIC6", "DMIC6"}, - { "Txpath4 Capture Route", "DMIC6", "DMIC6"}, - - /* tx path */ - { "TX1 Enable", NULL, "Txpath1 Capture Route"}, - { "TX2 Enable", NULL, "Txpath2 Capture Route"}, - { "TX3 Enable", NULL, "Txpath3 Capture Route"}, - { "TX4 Enable", NULL, "Txpath4 Capture Route"}, - { "PCM_Out", NULL, "TX1 Enable"}, - { "PCM_Out", NULL, "TX2 Enable"}, - { "PCM_Out", NULL, "TX3 Enable"}, - { "PCM_Out", NULL, "TX4 Enable"}, - -}; - -/* speaker and headset mutes, for audio pops and clicks */ -static int sn95031_pcm_hs_mute(struct snd_soc_dai *dai, int mute) -{ - snd_soc_update_bits(dai->codec, - SN95031_HSLVOLCTRL, BIT(7), (!mute << 7)); - snd_soc_update_bits(dai->codec, - SN95031_HSRVOLCTRL, BIT(7), (!mute << 7)); - return 0; -} - -static int sn95031_pcm_spkr_mute(struct snd_soc_dai *dai, int mute) -{ - snd_soc_update_bits(dai->codec, - SN95031_IHFLVOLCTRL, BIT(7), (!mute << 7)); - snd_soc_update_bits(dai->codec, - SN95031_IHFRVOLCTRL, BIT(7), (!mute << 7)); - return 0; -} - -static int sn95031_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) -{ - unsigned int format, rate; - - switch (params_width(params)) { - case 16: - format = BIT(4)|BIT(5); - break; - - case 24: - format = 0; - break; - default: - return -EINVAL; - } - snd_soc_update_bits(dai->codec, SN95031_PCM2C2, - BIT(4)|BIT(5), format); - - switch (params_rate(params)) { - case 48000: - pr_debug("RATE_48000\n"); - rate = 0; - break; - - case 44100: - pr_debug("RATE_44100\n"); - rate = BIT(7); - break; - - default: - pr_err("ERR rate %d\n", params_rate(params)); - return -EINVAL; - } - snd_soc_update_bits(dai->codec, SN95031_PCM1C1, BIT(7), rate); - - return 0; -} - -/* Codec DAI section */ -static const struct snd_soc_dai_ops sn95031_headset_dai_ops = { - .digital_mute = sn95031_pcm_hs_mute, - .hw_params = sn95031_pcm_hw_params, -}; - -static const struct snd_soc_dai_ops sn95031_speaker_dai_ops = { - .digital_mute = sn95031_pcm_spkr_mute, - .hw_params = sn95031_pcm_hw_params, -}; - -static const struct snd_soc_dai_ops sn95031_vib1_dai_ops = { - .hw_params = sn95031_pcm_hw_params, -}; - -static const struct snd_soc_dai_ops sn95031_vib2_dai_ops = { - .hw_params = sn95031_pcm_hw_params, -}; - -static struct snd_soc_dai_driver sn95031_dais[] = { -{ - .name = "SN95031 Headset", - .playback = { - .stream_name = "Headset", - .channels_min = 2, - .channels_max = 2, - .rates = SN95031_RATES, - .formats = SN95031_FORMATS, - }, - .capture = { - .stream_name = "Capture", - .channels_min = 1, - .channels_max = 5, - .rates = SN95031_RATES, - .formats = SN95031_FORMATS, - }, - .ops = &sn95031_headset_dai_ops, -}, -{ .name = "SN95031 Speaker", - .playback = { - .stream_name = "Speaker", - .channels_min = 2, - .channels_max = 2, - .rates = SN95031_RATES, - .formats = SN95031_FORMATS, - }, - .ops = &sn95031_speaker_dai_ops, -}, -{ .name = "SN95031 Vibra1", - .playback = { - .stream_name = "Vibra1", - .channels_min = 1, - .channels_max = 1, - .rates = SN95031_RATES, - .formats = SN95031_FORMATS, - }, - .ops = &sn95031_vib1_dai_ops, -}, -{ .name = "SN95031 Vibra2", - .playback = { - .stream_name = "Vibra2", - .channels_min = 1, - .channels_max = 1, - .rates = SN95031_RATES, - .formats = SN95031_FORMATS, - }, - .ops = &sn95031_vib2_dai_ops, -}, -}; - -static inline void sn95031_disable_jack_btn(struct snd_soc_codec *codec) -{ - snd_soc_write(codec, SN95031_BTNCTRL2, 0x00); -} - -static inline void sn95031_enable_jack_btn(struct snd_soc_codec *codec) -{ - snd_soc_write(codec, SN95031_BTNCTRL1, 0x77); - snd_soc_write(codec, SN95031_BTNCTRL2, 0x01); -} - -static int sn95031_get_headset_state(struct snd_soc_codec *codec, - struct snd_soc_jack *mfld_jack) -{ - int micbias = sn95031_get_mic_bias(codec); - - int jack_type = snd_soc_jack_get_type(mfld_jack, micbias); - - pr_debug("jack type detected = %d\n", jack_type); - if (jack_type == SND_JACK_HEADSET) - sn95031_enable_jack_btn(codec); - return jack_type; -} - -void sn95031_jack_detection(struct snd_soc_codec *codec, - struct mfld_jack_data *jack_data) -{ - unsigned int status; - unsigned int mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_HEADSET; - - pr_debug("interrupt id read in sram = 0x%x\n", jack_data->intr_id); - if (jack_data->intr_id & 0x1) { - pr_debug("short_push detected\n"); - status = SND_JACK_HEADSET | SND_JACK_BTN_0; - } else if (jack_data->intr_id & 0x2) { - pr_debug("long_push detected\n"); - status = SND_JACK_HEADSET | SND_JACK_BTN_1; - } else if (jack_data->intr_id & 0x4) { - pr_debug("headset or headphones inserted\n"); - status = sn95031_get_headset_state(codec, jack_data->mfld_jack); - } else if (jack_data->intr_id & 0x8) { - pr_debug("headset or headphones removed\n"); - status = 0; - sn95031_disable_jack_btn(codec); - } else { - pr_err("unidentified interrupt\n"); - return; - } - - snd_soc_jack_report(jack_data->mfld_jack, status, mask); - /*button pressed and released so we send explicit button release */ - if ((status & SND_JACK_BTN_0) | (status & SND_JACK_BTN_1)) - snd_soc_jack_report(jack_data->mfld_jack, - SND_JACK_HEADSET, mask); -} -EXPORT_SYMBOL_GPL(sn95031_jack_detection); - -/* codec registration */ -static int sn95031_codec_probe(struct snd_soc_codec *codec) -{ - pr_debug("codec_probe called\n"); - - /* PCM interface config - * This sets the pcm rx slot conguration to max 6 slots - * for max 4 dais (2 stereo and 2 mono) - */ - snd_soc_write(codec, SN95031_PCM2RXSLOT01, 0x10); - snd_soc_write(codec, SN95031_PCM2RXSLOT23, 0x32); - snd_soc_write(codec, SN95031_PCM2RXSLOT45, 0x54); - snd_soc_write(codec, SN95031_PCM2TXSLOT01, 0x10); - snd_soc_write(codec, SN95031_PCM2TXSLOT23, 0x32); - /* pcm port setting - * This sets the pcm port to slave and clock at 19.2Mhz which - * can support 6slots, sampling rate set per stream in hw-params - */ - snd_soc_write(codec, SN95031_PCM1C1, 0x00); - snd_soc_write(codec, SN95031_PCM2C1, 0x01); - snd_soc_write(codec, SN95031_PCM2C2, 0x0A); - snd_soc_write(codec, SN95031_HSMIXER, BIT(0)|BIT(4)); - /* vendor vibra workround, the vibras are muted by - * custom register so unmute them - */ - snd_soc_write(codec, SN95031_SSR5, 0x80); - snd_soc_write(codec, SN95031_SSR6, 0x80); - snd_soc_write(codec, SN95031_VIB1C5, 0x00); - snd_soc_write(codec, SN95031_VIB2C5, 0x00); - /* configure vibras for pcm port */ - snd_soc_write(codec, SN95031_VIB1C3, 0x00); - snd_soc_write(codec, SN95031_VIB2C3, 0x00); - - /* soft mute ramp time */ - snd_soc_write(codec, SN95031_SOFTMUTE, 0x3); - /* fix the initial volume at 1dB, - * default in +9dB, - * 1dB give optimal swing on DAC, amps - */ - snd_soc_write(codec, SN95031_HSLVOLCTRL, 0x08); - snd_soc_write(codec, SN95031_HSRVOLCTRL, 0x08); - snd_soc_write(codec, SN95031_IHFLVOLCTRL, 0x08); - snd_soc_write(codec, SN95031_IHFRVOLCTRL, 0x08); - /* dac mode and lineout workaround */ - snd_soc_write(codec, SN95031_SSR2, 0x10); - snd_soc_write(codec, SN95031_SSR3, 0x40); - - return 0; -} - -static const struct snd_soc_codec_driver sn95031_codec = { - .probe = sn95031_codec_probe, - .set_bias_level = sn95031_set_vaud_bias, - .idle_bias_off = true, - - .component_driver = { - .controls = sn95031_snd_controls, - .num_controls = ARRAY_SIZE(sn95031_snd_controls), - .dapm_widgets = sn95031_dapm_widgets, - .num_dapm_widgets = ARRAY_SIZE(sn95031_dapm_widgets), - .dapm_routes = sn95031_audio_map, - .num_dapm_routes = ARRAY_SIZE(sn95031_audio_map), - }, -}; - -static int sn95031_device_probe(struct platform_device *pdev) -{ - struct regmap *regmap; - - pr_debug("codec device probe called for %s\n", dev_name(&pdev->dev)); - - regmap = devm_regmap_init(&pdev->dev, NULL, NULL, &sn95031_regmap); - if (IS_ERR(regmap)) - return PTR_ERR(regmap); - - return snd_soc_register_codec(&pdev->dev, &sn95031_codec, - sn95031_dais, ARRAY_SIZE(sn95031_dais)); -} - -static int sn95031_device_remove(struct platform_device *pdev) -{ - pr_debug("codec device remove called\n"); - snd_soc_unregister_codec(&pdev->dev); - return 0; -} - -static struct platform_driver sn95031_codec_driver = { - .driver = { - .name = "sn95031", - }, - .probe = sn95031_device_probe, - .remove = sn95031_device_remove, -}; - -module_platform_driver(sn95031_codec_driver); - -MODULE_DESCRIPTION("ASoC TI SN95031 codec driver"); -MODULE_AUTHOR("Vinod Koul "); -MODULE_AUTHOR("Harsha Priya "); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:sn95031"); diff --git a/sound/soc/codecs/sn95031.h b/sound/soc/codecs/sn95031.h deleted file mode 100644 index 7651fe4e6a45..000000000000 --- a/sound/soc/codecs/sn95031.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * sn95031.h - TI sn95031 Codec driver - * - * Copyright (C) 2010 Intel Corp - * Author: Vinod Koul - * Author: Harsha Priya - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * - */ -#ifndef _SN95031_H -#define _SN95031_H - -/*register map*/ -#define SN95031_VAUD 0xDB -#define SN95031_VHSP 0xDC -#define SN95031_VHSN 0xDD -#define SN95031_VIHF 0xC9 - -#define SN95031_AUDPLLCTRL 0x240 -#define SN95031_DMICBUF0123 0x241 -#define SN95031_DMICBUF45 0x242 -#define SN95031_DMICGPO 0x244 -#define SN95031_DMICMUX 0x245 -#define SN95031_DMICLK 0x246 -#define SN95031_MICBIAS 0x247 -#define SN95031_ADCCONFIG 0x248 -#define SN95031_MICAMP1 0x249 -#define SN95031_MICAMP2 0x24A -#define SN95031_NOISEMUX 0x24B -#define SN95031_AUDIOMUX12 0x24C -#define SN95031_AUDIOMUX34 0x24D -#define SN95031_AUDIOSINC 0x24E -#define SN95031_AUDIOTXEN 0x24F -#define SN95031_HSEPRXCTRL 0x250 -#define SN95031_IHFRXCTRL 0x251 -#define SN95031_HSMIXER 0x256 -#define SN95031_DACCONFIG 0x257 -#define SN95031_SOFTMUTE 0x258 -#define SN95031_HSLVOLCTRL 0x259 -#define SN95031_HSRVOLCTRL 0x25A -#define SN95031_IHFLVOLCTRL 0x25B -#define SN95031_IHFRVOLCTRL 0x25C -#define SN95031_DRIVEREN 0x25D -#define SN95031_LOCTL 0x25E -#define SN95031_VIB1C1 0x25F -#define SN95031_VIB1C2 0x260 -#define SN95031_VIB1C3 0x261 -#define SN95031_VIB1SPIPCM1 0x262 -#define SN95031_VIB1SPIPCM2 0x263 -#define SN95031_VIB1C5 0x264 -#define SN95031_VIB2C1 0x265 -#define SN95031_VIB2C2 0x266 -#define SN95031_VIB2C3 0x267 -#define SN95031_VIB2SPIPCM1 0x268 -#define SN95031_VIB2SPIPCM2 0x269 -#define SN95031_VIB2C5 0x26A -#define SN95031_BTNCTRL1 0x26B -#define SN95031_BTNCTRL2 0x26C -#define SN95031_PCM1TXSLOT01 0x26D -#define SN95031_PCM1TXSLOT23 0x26E -#define SN95031_PCM1TXSLOT45 0x26F -#define SN95031_PCM1RXSLOT0_3 0x270 -#define SN95031_PCM1RXSLOT45 0x271 -#define SN95031_PCM2TXSLOT01 0x272 -#define SN95031_PCM2TXSLOT23 0x273 -#define SN95031_PCM2TXSLOT45 0x274 -#define SN95031_PCM2RXSLOT01 0x275 -#define SN95031_PCM2RXSLOT23 0x276 -#define SN95031_PCM2RXSLOT45 0x277 -#define SN95031_PCM1C1 0x278 -#define SN95031_PCM1C2 0x279 -#define SN95031_PCM1C3 0x27A -#define SN95031_PCM2C1 0x27B -#define SN95031_PCM2C2 0x27C -/*end codec register defn*/ - -/*vendor defn these are not part of avp*/ -#define SN95031_SSR2 0x381 -#define SN95031_SSR3 0x382 -#define SN95031_SSR5 0x384 -#define SN95031_SSR6 0x385 - -/* ADC registers */ - -#define SN95031_ADC1CNTL1 0x1C0 -#define SN95031_ADC_ENBL 0x10 -#define SN95031_ADC_START 0x08 -#define SN95031_ADC1CNTL3 0x1C2 -#define SN95031_ADCTHERM_ENBL 0x04 -#define SN95031_ADCRRDATA_ENBL 0x05 -#define SN95031_STOPBIT_MASK 16 -#define SN95031_ADCTHERM_MASK 4 -#define SN95031_ADC_CHANLS_MAX 15 /* Number of ADC channels */ -#define SN95031_ADC_LOOP_MAX (SN95031_ADC_CHANLS_MAX - 1) -#define SN95031_ADC_NO_LOOP 0x07 -#define SN95031_AUDIO_GPIO_CTRL 0x070 - -/* ADC channel code values */ -#define SN95031_AUDIO_DETECT_CODE 0x06 - -/* ADC base addresses */ -#define SN95031_ADC_CHNL_START_ADDR 0x1C5 /* increments by 1 */ -#define SN95031_ADC_DATA_START_ADDR 0x1D4 /* increments by 2 */ -/* multipier to convert to mV */ -#define SN95031_ADC_ONE_LSB_MULTIPLIER 2346 - - -struct mfld_jack_data { - int intr_id; - int micbias_vol; - struct snd_soc_jack *mfld_jack; -}; - -extern void sn95031_jack_detection(struct snd_soc_codec *codec, - struct mfld_jack_data *jack_data); - -#endif -- cgit v1.2.3 From 11aa2d9613c9523f8f78863bdfc7d79b37afcbbe Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Fri, 5 Jan 2018 17:00:51 +0800 Subject: ASoC: mt8173: remove unnecessary micbias widget in route The micbias1/2 are connected to route as SUPPLY usage. It was not take effect since they were MICBIAS type. To keep the same register settings, we have to remove it once the micbias1/2 widget is converted to SUPPLY type. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c | 2 -- sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c | 2 -- sound/soc/mediatek/mt8173/mt8173-rt5650.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index 99c15219dbc8..5a9a5482976e 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -37,8 +37,6 @@ static const struct snd_soc_dapm_route mt8173_rt5650_rt5514_routes[] = { {"Sub DMIC1R", NULL, "Int Mic"}, {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, - {"Headset Mic", NULL, "micbias1"}, - {"Headset Mic", NULL, "micbias2"}, {"IN1P", NULL, "Headset Mic"}, {"IN1N", NULL, "Headset Mic"}, }; diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 42de84ca8c84..b7248085ca04 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -40,8 +40,6 @@ static const struct snd_soc_dapm_route mt8173_rt5650_rt5676_routes[] = { {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, {"Headphone", NULL, "Sub AIF2TX"}, /* IF2 ADC to 5650 */ - {"Headset Mic", NULL, "micbias1"}, - {"Headset Mic", NULL, "micbias2"}, {"IN1P", NULL, "Headset Mic"}, {"IN1N", NULL, "Headset Mic"}, {"Sub AIF2RX", NULL, "Headset Mic"}, /* IF2 DAC from 5650 */ diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index e69c141d8ed4..40ebefd625c1 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -51,8 +51,6 @@ static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = { {"DMIC R1", NULL, "Int Mic"}, {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, - {"Headset Mic", NULL, "micbias1"}, - {"Headset Mic", NULL, "micbias2"}, {"IN1P", NULL, "Headset Mic"}, {"IN1N", NULL, "Headset Mic"}, }; -- cgit v1.2.3 From 76a4201191814a0061cb5c861fafb9ecaa764846 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 10 Jan 2018 12:14:28 +0100 Subject: xfrm: Fix a race in the xdst pcpu cache. We need to run xfrm_resolve_and_create_bundle() with bottom halves off. Otherwise we may reuse an already released dst_enty when the xfrm lookup functions are called from process context. Fixes: c30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Reported-by: Darius Ski Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index bc5eae12fb09..bd6b0e7a0ee4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2063,8 +2063,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, if (num_xfrms <= 0) goto make_dummy_bundle; + local_bh_disable(); xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, - xflo->dst_orig); + xflo->dst_orig); + local_bh_enable(); + if (IS_ERR(xdst)) { err = PTR_ERR(xdst); if (err != -EAGAIN) @@ -2151,9 +2154,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } + local_bh_disable(); xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); + local_bh_enable(); + if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); -- cgit v1.2.3 From 794a56ebd9a57db12abaec63f038c6eb073461f7 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:20 +0100 Subject: sched/cpufreq: Change the worker kthread to SCHED_DEADLINE Worker kthread needs to be able to change frequency for all other threads. Make it special, just under STOP class. Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Cc: Claudio Scordino Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: Viresh Kumar Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-4-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched/core.c | 13 ++++- kernel/sched/cpufreq_schedutil.c | 19 ++++++-- kernel/sched/deadline.c | 103 +++++++++++++++++++++++++++------------ kernel/sched/sched.h | 30 +++++++++++- 5 files changed, 130 insertions(+), 36 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 274a449c805a..f7506712825c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1431,6 +1431,7 @@ extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern int sched_setattr(struct task_struct *, const struct sched_attr *); +extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e28391bf8b04..402ef4fa0e1c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4085,7 +4085,7 @@ recheck: return -EINVAL; } - if (attr->sched_flags & ~SCHED_FLAG_ALL) + if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) return -EINVAL; /* @@ -4152,6 +4152,9 @@ recheck: } if (user) { + if (attr->sched_flags & SCHED_FLAG_SUGOV) + return -EINVAL; + retval = security_task_setscheduler(p); if (retval) return retval; @@ -4207,7 +4210,8 @@ change: } #endif #ifdef CONFIG_SMP - if (dl_bandwidth_enabled() && dl_policy(policy)) { + if (dl_bandwidth_enabled() && dl_policy(policy) && + !(attr->sched_flags & SCHED_FLAG_SUGOV)) { cpumask_t *span = rq->rd->span; /* @@ -4337,6 +4341,11 @@ int sched_setattr(struct task_struct *p, const struct sched_attr *attr) } EXPORT_SYMBOL_GPL(sched_setattr); +int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, false, true); +} + /** * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. * @p: the task in question. diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 8d266bc5c67d..bd5f9976892d 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -474,7 +474,20 @@ static void sugov_policy_free(struct sugov_policy *sg_policy) static int sugov_kthread_create(struct sugov_policy *sg_policy) { struct task_struct *thread; - struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_flags = SCHED_FLAG_SUGOV, + .sched_nice = 0, + .sched_priority = 0, + /* + * Fake (unused) bandwidth; workaround to "fix" + * priority inheritance. + */ + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; struct cpufreq_policy *policy = sg_policy->policy; int ret; @@ -492,10 +505,10 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) return PTR_ERR(thread); } - ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, ¶m); + ret = sched_setattr_nocheck(thread, &attr); if (ret) { kthread_stop(thread); - pr_warn("%s: failed to set SCHED_FIFO\n", __func__); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); return ret; } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f584837b32e7..54a0dc1424a9 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -78,7 +78,7 @@ static inline int dl_bw_cpus(int i) #endif static inline -void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) +void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->running_bw; @@ -91,7 +91,7 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) } static inline -void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) +void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->running_bw; @@ -105,7 +105,7 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) } static inline -void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) +void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->this_bw; @@ -115,7 +115,7 @@ void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) } static inline -void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) +void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->this_bw; @@ -127,16 +127,46 @@ void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); } +static inline +void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) +{ + if (!dl_entity_is_special(dl_se)) + __add_rq_bw(dl_se->dl_bw, dl_rq); +} + +static inline +void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) +{ + if (!dl_entity_is_special(dl_se)) + __sub_rq_bw(dl_se->dl_bw, dl_rq); +} + +static inline +void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) +{ + if (!dl_entity_is_special(dl_se)) + __add_running_bw(dl_se->dl_bw, dl_rq); +} + +static inline +void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) +{ + if (!dl_entity_is_special(dl_se)) + __sub_running_bw(dl_se->dl_bw, dl_rq); +} + void dl_change_utilization(struct task_struct *p, u64 new_bw) { struct rq *rq; + BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV); + if (task_on_rq_queued(p)) return; rq = task_rq(p); if (p->dl.dl_non_contending) { - sub_running_bw(p->dl.dl_bw, &rq->dl); + sub_running_bw(&p->dl, &rq->dl); p->dl.dl_non_contending = 0; /* * If the timer handler is currently running and the @@ -148,8 +178,8 @@ void dl_change_utilization(struct task_struct *p, u64 new_bw) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) put_task_struct(p); } - sub_rq_bw(p->dl.dl_bw, &rq->dl); - add_rq_bw(new_bw, &rq->dl); + __sub_rq_bw(p->dl.dl_bw, &rq->dl); + __add_rq_bw(new_bw, &rq->dl); } /* @@ -221,6 +251,9 @@ static void task_non_contending(struct task_struct *p) if (dl_se->dl_runtime == 0) return; + if (dl_entity_is_special(dl_se)) + return; + WARN_ON(hrtimer_active(&dl_se->inactive_timer)); WARN_ON(dl_se->dl_non_contending); @@ -240,12 +273,12 @@ static void task_non_contending(struct task_struct *p) */ if (zerolag_time < 0) { if (dl_task(p)) - sub_running_bw(dl_se->dl_bw, dl_rq); + sub_running_bw(dl_se, dl_rq); if (!dl_task(p) || p->state == TASK_DEAD) { struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); if (p->state == TASK_DEAD) - sub_rq_bw(p->dl.dl_bw, &rq->dl); + sub_rq_bw(&p->dl, &rq->dl); raw_spin_lock(&dl_b->lock); __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); __dl_clear_params(p); @@ -272,7 +305,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags) return; if (flags & ENQUEUE_MIGRATED) - add_rq_bw(dl_se->dl_bw, dl_rq); + add_rq_bw(dl_se, dl_rq); if (dl_se->dl_non_contending) { dl_se->dl_non_contending = 0; @@ -293,7 +326,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags) * when the "inactive timer" fired). * So, add it back. */ - add_running_bw(dl_se->dl_bw, dl_rq); + add_running_bw(dl_se, dl_rq); } } @@ -1149,6 +1182,9 @@ static void update_curr_dl(struct rq *rq) sched_rt_avg_update(rq, delta_exec); + if (dl_entity_is_special(dl_se)) + return; + if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) delta_exec = grub_reclaim(delta_exec, rq, &curr->dl); dl_se->runtime -= delta_exec; @@ -1211,8 +1247,8 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); if (p->state == TASK_DEAD && dl_se->dl_non_contending) { - sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); - sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); + sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); + sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl)); dl_se->dl_non_contending = 0; } @@ -1229,7 +1265,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); - sub_running_bw(dl_se->dl_bw, &rq->dl); + sub_running_bw(dl_se, &rq->dl); dl_se->dl_non_contending = 0; unlock: task_rq_unlock(rq, p, &rf); @@ -1423,8 +1459,8 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) dl_check_constrained_dl(&p->dl); if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { - add_rq_bw(p->dl.dl_bw, &rq->dl); - add_running_bw(p->dl.dl_bw, &rq->dl); + add_rq_bw(&p->dl, &rq->dl); + add_running_bw(&p->dl, &rq->dl); } /* @@ -1464,8 +1500,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) __dequeue_task_dl(rq, p, flags); if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { - sub_running_bw(p->dl.dl_bw, &rq->dl); - sub_rq_bw(p->dl.dl_bw, &rq->dl); + sub_running_bw(&p->dl, &rq->dl); + sub_rq_bw(&p->dl, &rq->dl); } /* @@ -1571,7 +1607,7 @@ static void migrate_task_rq_dl(struct task_struct *p) */ raw_spin_lock(&rq->lock); if (p->dl.dl_non_contending) { - sub_running_bw(p->dl.dl_bw, &rq->dl); + sub_running_bw(&p->dl, &rq->dl); p->dl.dl_non_contending = 0; /* * If the timer handler is currently running and the @@ -1583,7 +1619,7 @@ static void migrate_task_rq_dl(struct task_struct *p) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) put_task_struct(p); } - sub_rq_bw(p->dl.dl_bw, &rq->dl); + sub_rq_bw(&p->dl, &rq->dl); raw_spin_unlock(&rq->lock); } @@ -2026,11 +2062,11 @@ retry: } deactivate_task(rq, next_task, 0); - sub_running_bw(next_task->dl.dl_bw, &rq->dl); - sub_rq_bw(next_task->dl.dl_bw, &rq->dl); + sub_running_bw(&next_task->dl, &rq->dl); + sub_rq_bw(&next_task->dl, &rq->dl); set_task_cpu(next_task, later_rq->cpu); - add_rq_bw(next_task->dl.dl_bw, &later_rq->dl); - add_running_bw(next_task->dl.dl_bw, &later_rq->dl); + add_rq_bw(&next_task->dl, &later_rq->dl); + add_running_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, 0); ret = 1; @@ -2118,11 +2154,11 @@ static void pull_dl_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); - sub_running_bw(p->dl.dl_bw, &src_rq->dl); - sub_rq_bw(p->dl.dl_bw, &src_rq->dl); + sub_running_bw(&p->dl, &src_rq->dl); + sub_rq_bw(&p->dl, &src_rq->dl); set_task_cpu(p, this_cpu); - add_rq_bw(p->dl.dl_bw, &this_rq->dl); - add_running_bw(p->dl.dl_bw, &this_rq->dl); + add_rq_bw(&p->dl, &this_rq->dl); + add_running_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); dmin = p->dl.deadline; @@ -2231,7 +2267,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) task_non_contending(p); if (!task_on_rq_queued(p)) - sub_rq_bw(p->dl.dl_bw, &rq->dl); + sub_rq_bw(&p->dl, &rq->dl); /* * We cannot use inactive_task_timer() to invoke sub_running_bw() @@ -2263,7 +2299,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) /* If p is not queued we will update its parameters at next wakeup. */ if (!task_on_rq_queued(p)) { - add_rq_bw(p->dl.dl_bw, &rq->dl); + add_rq_bw(&p->dl, &rq->dl); return; } @@ -2442,6 +2478,9 @@ int sched_dl_overflow(struct task_struct *p, int policy, u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; int cpus, err = -1; + if (attr->sched_flags & SCHED_FLAG_SUGOV) + return 0; + /* !deadline task may carry old deadline bandwidth */ if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) return 0; @@ -2528,6 +2567,10 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) */ bool __checkparam_dl(const struct sched_attr *attr) { + /* special dl tasks don't actually use any parameter */ + if (attr->sched_flags & SCHED_FLAG_SUGOV) + return true; + /* deadline != 0 */ if (attr->sched_deadline == 0) return false; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 863964fbcfd2..c5197338ac47 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -156,13 +156,37 @@ static inline int task_has_dl_policy(struct task_struct *p) return dl_policy(p->policy); } +/* + * !! For sched_setattr_nocheck() (kernel) only !! + * + * This is actually gross. :( + * + * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE + * tasks, but still be able to sleep. We need this on platforms that cannot + * atomically change clock frequency. Remove once fast switching will be + * available on such platforms. + * + * SUGOV stands for SchedUtil GOVernor. + */ +#define SCHED_FLAG_SUGOV 0x10000000 + +static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) +{ +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL + return unlikely(dl_se->flags & SCHED_FLAG_SUGOV); +#else + return false; +#endif +} + /* * Tells if entity @a should preempt entity @b. */ static inline bool dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) { - return dl_time_before(a->deadline, b->deadline); + return dl_entity_is_special(a) || + dl_time_before(a->deadline, b->deadline); } /* @@ -2085,6 +2109,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #define arch_scale_freq_invariant() (false) #endif +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL + static inline unsigned long cpu_util_dl(struct rq *rq) { return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; @@ -2094,3 +2120,5 @@ static inline unsigned long cpu_util_cfs(struct rq *rq) { return rq->cfs.avg.util_avg; } + +#endif -- cgit v1.2.3 From d18be45dbfef2e0bb12b9696c21aeae92f83b1ea Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:21 +0100 Subject: sched/cpufreq: Split utilization signals To be able to treat utilization signals of different scheduling classes in different ways (e.g., CFS signal might be stale while DEADLINE signal is never stale by design) we need to split sugov_cpu::util signal in two: util_cfs and util_dl. This patch does that by also changing sugov_get_util() parameter list. After this change, aggregation of the different signals has to be performed by sugov_get_util() users (so that they can decide what to do with the different signals). Suggested-by: Rafael J. Wysocki Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Claudio Scordino Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-5-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index bd5f9976892d..e9e0713f85f3 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -60,7 +60,8 @@ struct sugov_cpu { u64 last_update; /* The fields below are only needed when sharing a policy. */ - unsigned long util; + unsigned long util_cfs; + unsigned long util_dl; unsigned long max; unsigned int flags; @@ -176,20 +177,23 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, return cpufreq_driver_resolve_freq(policy, freq); } -static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) +static void sugov_get_util(struct sugov_cpu *sg_cpu) { - struct rq *rq = cpu_rq(cpu); - unsigned long util_cfs = cpu_util_cfs(rq); - unsigned long util_dl = cpu_util_dl(rq); + struct rq *rq = cpu_rq(sg_cpu->cpu); - *max = arch_scale_cpu_capacity(NULL, cpu); + sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu); + sg_cpu->util_cfs = cpu_util_cfs(rq); + sg_cpu->util_dl = cpu_util_dl(rq); +} +static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) +{ /* * Ideally we would like to set util_dl as min/guaranteed freq and * util_cfs + util_dl as requested freq. However, cpufreq is not yet * ready for such an interface. So, we only do the latter for now. */ - *util = min(util_cfs + util_dl, *max); + return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max); } static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time) @@ -279,7 +283,9 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (flags & SCHED_CPUFREQ_RT) { next_f = policy->cpuinfo.max_freq; } else { - sugov_get_util(&util, &max, sg_cpu->cpu); + sugov_get_util(sg_cpu); + max = sg_cpu->max; + util = sugov_aggregate_util(sg_cpu); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); /* @@ -324,8 +330,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) return policy->cpuinfo.max_freq; - j_util = j_sg_cpu->util; j_max = j_sg_cpu->max; + j_util = sugov_aggregate_util(j_sg_cpu); if (j_util * max > j_max * util) { util = j_util; max = j_max; @@ -342,15 +348,11 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; - unsigned long util, max; unsigned int next_f; - sugov_get_util(&util, &max, sg_cpu->cpu); - raw_spin_lock(&sg_policy->update_lock); - sg_cpu->util = util; - sg_cpu->max = max; + sugov_get_util(sg_cpu); sg_cpu->flags = flags; sugov_set_iowait_boost(sg_cpu, time); -- cgit v1.2.3 From 0fa7d181f1a60149061632266bb432b4b61acdac Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:22 +0100 Subject: sched/cpufreq: Always consider all CPUs when deciding next freq No assumption can be made upon the rate at which frequency updates get triggered, as there are scheduling policies (like SCHED_DEADLINE) which don't trigger them so frequently. Remove such assumption from the code, by always considering SCHED_DEADLINE utilization signal as not stale. Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Claudio Scordino Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-6-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e9e0713f85f3..dd062a1c8cf0 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -315,17 +315,21 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) s64 delta_ns; /* - * If the CPU utilization was last updated before the previous - * frequency update and the time elapsed between the last update - * of the CPU utilization and the last frequency update is long - * enough, don't take the CPU into account as it probably is - * idle now (and clear iowait_boost for it). + * If the CFS CPU utilization was last updated before the + * previous frequency update and the time elapsed between the + * last update of the CPU utilization and the last frequency + * update is long enough, reset iowait_boost and util_cfs, as + * they are now probably stale. However, still consider the + * CPU contribution if it has some DEADLINE utilization + * (util_dl). */ delta_ns = time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; j_sg_cpu->iowait_boost_pending = false; - continue; + j_sg_cpu->util_cfs = 0; + if (j_sg_cpu->util_dl == 0) + continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_RT) return policy->cpuinfo.max_freq; -- cgit v1.2.3 From 7673c8a4c75d1cac2cd47156b9768f462683a09d Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:23 +0100 Subject: sched/cpufreq: Remove arch_scale_freq_capacity()'s 'sd' parameter The 'sd' parameter is never used in arch_scale_freq_capacity() (and it's hard to see where information coming from scheduling domains might help doing frequency invariance scaling). Remove it; also in anticipation of moving arch_scale_freq_capacity() outside CONFIG_SMP. Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: claudio@evidence.eu.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: luca.abeni@santannapisa.it Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-7-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- include/linux/arch_topology.h | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/sched.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 304511267c82..2b709416de05 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -27,7 +27,7 @@ void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); DECLARE_PER_CPU(unsigned long, freq_scale); static inline -unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu) +unsigned long topology_get_freq_scale(int cpu) { return per_cpu(freq_scale, cpu); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9fec992410f7..14859757bff0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3120,7 +3120,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa, u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */ u64 periods; - scale_freq = arch_scale_freq_capacity(NULL, cpu); + scale_freq = arch_scale_freq_capacity(cpu); scale_cpu = arch_scale_cpu_capacity(NULL, cpu); delta += sa->period_contrib; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c5197338ac47..b7100192ecd3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1675,7 +1675,7 @@ extern void sched_avg_update(struct rq *rq); #ifndef arch_scale_freq_capacity static __always_inline -unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) +unsigned long arch_scale_freq_capacity(int cpu) { return SCHED_CAPACITY_SCALE; } @@ -1694,7 +1694,7 @@ unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { - rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); + rq->rt_avg += rt_delta * arch_scale_freq_capacity(cpu_of(rq)); sched_avg_update(rq); } #else -- cgit v1.2.3 From 7e1a9208f6c7e66bb4e5d2ed18dfd191230f431b Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:24 +0100 Subject: sched/cpufreq: Move arch_scale_{freq,cpu}_capacity() outside of #ifdef CONFIG_SMP Currently, frequency and cpu capacity scaling is only performed on CONFIG_SMP systems (as CFS PELT signals are only present for such systems). However, other scheduling classes want to do freq/cpu scaling, and for !CONFIG_SMP configurations as well. arch_scale_freq_capacity() is useful to implement frequency scaling even on !CONFIG_SMP platforms, so we simply move it outside CONFIG_SMP ifdeffery. Even if arch_scale_cpu_capacity() is not useful on !CONFIG_SMP platforms, we make a default implementation available for such configurations anyway to simplify scheduler code doing CPU scale invariance. Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: claudio@evidence.eu.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: luca.abeni@santannapisa.it Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-8-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 12 ++++++------ kernel/sched/sched.h | 13 ++++++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index cf257c2e728d..26347741ba50 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -6,6 +6,12 @@ #include +/* + * Increase resolution of cpu_capacity calculations + */ +#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT +#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) + /* * sched-domains (multiprocessor balancing) declarations: */ @@ -27,12 +33,6 @@ #define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */ #define SD_NUMA 0x4000 /* cross-node balancing */ -/* - * Increase resolution of cpu_capacity calculations - */ -#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT -#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) - #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b7100192ecd3..e122c89bdbdd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1670,9 +1670,6 @@ static inline int hrtick_enabled(struct rq *rq) #endif /* CONFIG_SCHED_HRTICK */ -#ifdef CONFIG_SMP -extern void sched_avg_update(struct rq *rq); - #ifndef arch_scale_freq_capacity static __always_inline unsigned long arch_scale_freq_capacity(int cpu) @@ -1681,6 +1678,9 @@ unsigned long arch_scale_freq_capacity(int cpu) } #endif +#ifdef CONFIG_SMP +extern void sched_avg_update(struct rq *rq); + #ifndef arch_scale_cpu_capacity static __always_inline unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) @@ -1698,6 +1698,13 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) sched_avg_update(rq); } #else +#ifndef arch_scale_cpu_capacity +static __always_inline +unsigned long arch_scale_cpu_capacity(void __always_unused *sd, int cpu) +{ + return SCHED_CAPACITY_SCALE; +} +#endif static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } static inline void sched_avg_update(struct rq *rq) { } #endif -- cgit v1.2.3 From 07881166a892fa4908ac4924660a7793f75d6544 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 4 Dec 2017 11:23:25 +0100 Subject: sched/deadline: Make bandwidth enforcement scale-invariant Apply frequency and CPU scale-invariance correction factor to bandwidth enforcement (similar to what we already do to fair utilization tracking). Each delta_exec gets scaled considering current frequency and maximum CPU capacity; which means that the reservation runtime parameter (that need to be specified profiling the task execution at max frequency on biggest capacity core) gets thus scaled accordingly. Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Cc: Claudio Scordino Cc: Linus Torvalds Cc: Luca Abeni Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: Viresh Kumar Cc: alessio.balsini@arm.com Cc: bristot@redhat.com Cc: dietmar.eggemann@arm.com Cc: joelaf@google.com Cc: juri.lelli@redhat.com Cc: mathieu.poirier@linaro.org Cc: morten.rasmussen@arm.com Cc: patrick.bellasi@arm.com Cc: rjw@rjwysocki.net Cc: rostedt@goodmis.org Cc: tkjos@android.com Cc: tommaso.cucinotta@santannapisa.it Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/20171204102325.5110-9-juri.lelli@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 26 ++++++++++++++++++++++---- kernel/sched/fair.c | 2 -- kernel/sched/sched.h | 2 ++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 54a0dc1424a9..9bb0e0c412ec 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1151,7 +1151,8 @@ static void update_curr_dl(struct rq *rq) { struct task_struct *curr = rq->curr; struct sched_dl_entity *dl_se = &curr->dl; - u64 delta_exec; + u64 delta_exec, scaled_delta_exec; + int cpu = cpu_of(rq); if (!dl_task(curr) || !on_dl_rq(dl_se)) return; @@ -1185,9 +1186,26 @@ static void update_curr_dl(struct rq *rq) if (dl_entity_is_special(dl_se)) return; - if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) - delta_exec = grub_reclaim(delta_exec, rq, &curr->dl); - dl_se->runtime -= delta_exec; + /* + * For tasks that participate in GRUB, we implement GRUB-PA: the + * spare reclaimed bandwidth is used to clock down frequency. + * + * For the others, we still need to scale reservation parameters + * according to current frequency and CPU maximum capacity. + */ + if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) { + scaled_delta_exec = grub_reclaim(delta_exec, + rq, + &curr->dl); + } else { + unsigned long scale_freq = arch_scale_freq_capacity(cpu); + unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu); + + scaled_delta_exec = cap_scale(delta_exec, scale_freq); + scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); + } + + dl_se->runtime -= scaled_delta_exec; throttle: if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 14859757bff0..1070803cb423 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3089,8 +3089,6 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3) return c1 + c2 + c3; } -#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) - /* * Accumulate the three separate parts of the sum; d1 the remainder * of the last (incomplete) period, d2 the span of full periods and d3 diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e122c89bdbdd..2e95505e23c6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -156,6 +156,8 @@ static inline int task_has_dl_policy(struct task_struct *p) return dl_policy(p->policy); } +#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT) + /* * !! For sched_setattr_nocheck() (kernel) only !! * -- cgit v1.2.3 From d780537f9b49e9d714a454e5ed989d909beab8ec Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 10 Jan 2018 13:04:58 +0100 Subject: drm/tegra: sor: Fix hang on Tegra124 eDP The SOR0 found on Tegra124 and Tegra210 only supports eDP and LVDS and therefore has a slightly different clock tree than the SOR1 which does not support eDP, but HDMI and DP instead. Commit e1335e2f0cfc ("drm/tegra: sor: Reimplement pad clock") breaks setups with eDP because the sor->clk_out clock is uninitialized and therefore setting the parent clock (either the safe clock or either of the display PLLs) fails, which can cause hangs later on since there is no clock driving the module. Fix this by falling back to the module clock for sor->clk_out on those setups. This guarantees that the module will always be clocked by an enabled clock and hence prevents those hangs. Fixes: e1335e2f0cfc ("drm/tegra: sor: Reimplement pad clock") Reported-by: Guillaume Tucker Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/sor.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index b0a1dedac802..476079f1255f 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -2656,6 +2656,9 @@ static int tegra_sor_probe(struct platform_device *pdev) name, err); goto remove; } + } else { + /* fall back to the module clock on SOR0 (eDP/LVDS only) */ + sor->clk_out = sor->clk; } sor->clk_parent = devm_clk_get(&pdev->dev, "parent"); -- cgit v1.2.3 From c23bd3877bc21d830fa650570fc1a88bea82ecd2 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 9 Jan 2018 10:03:39 +0100 Subject: PM / core: Re-structure code for clearing the direct_complete flag To make the code more consistent, let's clear the parent's direct_complete flag along with clearing it for suppliers, instead of as currently, when propagating the wakeup_path flag to parents. While changing this, let's take the opportunity to rename the affected internal functions, to make them self-explanatory. Like this: dpm_clear_suppliers_direct_complete -> dpm_clear_superiors_direct_complete dpm_propagate_to_parent -> dpm_propagate_wakeup_to_parent Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ebcec7e677ba..720e36ec84ac 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1660,7 +1660,7 @@ static int legacy_suspend(struct device *dev, pm_message_t state, return error; } -static void dpm_propagate_to_parent(struct device *dev) +static void dpm_propagate_wakeup_to_parent(struct device *dev) { struct device *parent = dev->parent; @@ -1669,18 +1669,23 @@ static void dpm_propagate_to_parent(struct device *dev) spin_lock_irq(&parent->power.lock); - parent->power.direct_complete = false; if (dev->power.wakeup_path && !parent->power.ignore_children) parent->power.wakeup_path = true; spin_unlock_irq(&parent->power.lock); } -static void dpm_clear_suppliers_direct_complete(struct device *dev) +static void dpm_clear_superiors_direct_complete(struct device *dev) { struct device_link *link; int idx; + if (dev->parent) { + spin_lock_irq(&dev->parent->power.lock); + dev->parent->power.direct_complete = false; + spin_unlock_irq(&dev->parent->power.lock); + } + idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { @@ -1791,8 +1796,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (device_may_wakeup(dev)) dev->power.wakeup_path = true; - dpm_propagate_to_parent(dev); - dpm_clear_suppliers_direct_complete(dev); + dpm_propagate_wakeup_to_parent(dev); + dpm_clear_superiors_direct_complete(dev); } device_unlock(dev); -- cgit v1.2.3 From 0a99d767a9b0aae6e0fd983c889c793e4c91684c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 9 Jan 2018 10:03:40 +0100 Subject: PM / core: Propagate wakeup_path status flag in __device_suspend_late() Currently the wakeup_path status flag becomes propagated from a child device to its parent device at __device_suspend(). This allows a driver dealing with a parent device to act on the flag from its ->suspend() callback. However, in situations when the wakeup_path status flag needs to be set from a ->suspend_late() callback, its value doesn't get propagated to the parent by the PM core. Let's address this limitation, by also propagating the flag at __device_suspend_late(). Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 720e36ec84ac..02a497e7c785 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1447,6 +1447,21 @@ int dpm_suspend_noirq(pm_message_t state) return ret; } +static void dpm_propagate_wakeup_to_parent(struct device *dev) +{ + struct device *parent = dev->parent; + + if (!parent) + return; + + spin_lock_irq(&parent->power.lock); + + if (dev->power.wakeup_path && !parent->power.ignore_children) + parent->power.wakeup_path = true; + + spin_unlock_irq(&parent->power.lock); +} + static pm_callback_t dpm_subsys_suspend_late_cb(struct device *dev, pm_message_t state, const char **info_p) @@ -1527,6 +1542,7 @@ Run: async_error = error; goto Complete; } + dpm_propagate_wakeup_to_parent(dev); Skip: dev->power.is_late_suspended = true; @@ -1660,21 +1676,6 @@ static int legacy_suspend(struct device *dev, pm_message_t state, return error; } -static void dpm_propagate_wakeup_to_parent(struct device *dev) -{ - struct device *parent = dev->parent; - - if (!parent) - return; - - spin_lock_irq(&parent->power.lock); - - if (dev->power.wakeup_path && !parent->power.ignore_children) - parent->power.wakeup_path = true; - - spin_unlock_irq(&parent->power.lock); -} - static void dpm_clear_superiors_direct_complete(struct device *dev) { struct device_link *link; -- cgit v1.2.3 From 274920a3ecd5f43af0cc380bc0a9ee73a52b9f8a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 10 Jan 2018 23:49:12 +1100 Subject: powerpc/xmon: Add RFI flush related fields to paca dump Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cab24f549e7c..b3bb5beec54a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2375,6 +2375,10 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); + DUMP(p, l1d_flush_congruence, "llx"); + DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E -- cgit v1.2.3 From 1e77fc82110ac36febf46c1e2782f504f7d23099 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 9 Jan 2018 19:08:21 +0100 Subject: gpio: Add missing open drain/source handling to gpiod_set_value_cansleep() Since commit f11a04464ae57e8d ("i2c: gpio: Enable working over slow can_sleep GPIOs"), probing the i2c RTC connected to an i2c-gpio bus on r8a7740/armadillo fails with: rtc-s35390a 0-0030: error resetting chip rtc-s35390a: probe of 0-0030 failed with error -5 More debug code reveals: i2c i2c-0: master_xfer[0] R, addr=0x30, len=1 i2c i2c-0: NAK from device addr 0x30 msg #0 s35390a_get_reg: ret = -6 Commit 02e479808b5d62f8 ("gpio: Alter semantics of *raw* operations to actually be raw") moved open drain/source handling from gpiod_set_raw_value_commit() to gpiod_set_value(), but forgot to take into account that gpiod_set_value_cansleep() also needs this handling. The i2c protocol mandates that i2c signals are open drain, hence i2c communication fails. Fix this by adding the missing handling to gpiod_set_value_cansleep(), using a new common helper gpiod_set_value_nocheck(). Fixes: 02e479808b5d62f8 ("gpio: Alter semantics of *raw* operations to actually be raw") Signed-off-by: Geert Uytterhoeven [removed underscore syntax, added kerneldoc] Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 44332b793718..14532d9576e4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2892,6 +2892,27 @@ void gpiod_set_raw_value(struct gpio_desc *desc, int value) } EXPORT_SYMBOL_GPL(gpiod_set_raw_value); +/** + * gpiod_set_value_nocheck() - set a GPIO line value without checking + * @desc: the descriptor to set the value on + * @value: value to set + * + * This sets the value of a GPIO line backing a descriptor, applying + * different semantic quirks like active low and open drain/source + * handling. + */ +static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value) +{ + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) + value = !value; + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) + gpio_set_open_drain_value_commit(desc, value); + else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) + gpio_set_open_source_value_commit(desc, value); + else + gpiod_set_raw_value_commit(desc, value); +} + /** * gpiod_set_value() - assign a gpio's value * @desc: gpio whose value will be assigned @@ -2906,16 +2927,8 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value); void gpiod_set_value(struct gpio_desc *desc, int value) { VALIDATE_DESC_VOID(desc); - /* Should be using gpiod_set_value_cansleep() */ WARN_ON(desc->gdev->chip->can_sleep); - if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) - value = !value; - if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - gpio_set_open_drain_value_commit(desc, value); - else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - gpio_set_open_source_value_commit(desc, value); - else - gpiod_set_raw_value_commit(desc, value); + gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value); @@ -3243,9 +3256,7 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { might_sleep_if(extra_checks); VALIDATE_DESC_VOID(desc); - if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) - value = !value; - gpiod_set_raw_value_commit(desc, value); + gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); -- cgit v1.2.3 From 951a010233625b77cde3430b4b8785a9a22968d1 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 9 Jan 2018 12:10:21 +0000 Subject: xen/gntdev: Fix off-by-one error when unmapping with holes If the requested range has a hole, the calculation of the number of pages to unmap is off by one. Fix it. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 57efbd3b053b..d3391a1e3796 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -380,10 +380,8 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages) } range = 0; while (range < pages) { - if (map->unmap_ops[offset+range].handle == -1) { - range--; + if (map->unmap_ops[offset+range].handle == -1) break; - } range++; } err = __unmap_grant_pages(map, offset, range); -- cgit v1.2.3 From cf2acf66ad43abb39735568f55e1f85f9844e990 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 9 Jan 2018 12:10:22 +0000 Subject: xen/gntdev: Fix partial gntdev_mmap() cleanup When cleaning up after a partially successful gntdev_mmap(), unmap the successfully mapped grant pages otherwise Xen will kill the domain if in debug mode (Attempt to implicitly unmap a granted PTE) or Linux will kill the process and emit "BUG: Bad page map in process" if Xen is in release mode. This is only needed when use_ptemod is true because gntdev_put_map() will unmap grant pages itself when use_ptemod is false. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index d3391a1e3796..bd56653b9bbc 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1071,8 +1071,10 @@ unlock_out: out_unlock_put: mutex_unlock(&priv->lock); out_put_map: - if (use_ptemod) + if (use_ptemod) { map->vma = NULL; + unmap_grant_pages(map, 0, map->count); + } gntdev_put_map(priv, map); return err; } -- cgit v1.2.3 From 2248fade965a5f1ba2a8e6e63f84df696b2d2780 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Jan 2018 01:17:24 +1100 Subject: powerpc/xmon: Don't print hashed pointers in paca dump Remember when the biggest problem we had to worry about was hashed pointers, those were the days. These were missed in my earlier patch because they don't match "%p", but the macro is hiding a "%p", so these all end up being hashed, which is not what we want in xmon. Convert them to "%px". Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index b3bb5beec54a..0ddc7ac6c5f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); - DUMP(p, emergency_sp, "p"); + DUMP(p, emergency_sp, "px"); #ifdef CONFIG_PPC_BOOK3S_64 - DUMP(p, nmi_emergency_sp, "p"); - DUMP(p, mc_emergency_sp, "p"); + DUMP(p, nmi_emergency_sp, "px"); + DUMP(p, mc_emergency_sp, "px"); DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); @@ -2382,14 +2382,14 @@ static void dump_one_paca(int cpu) #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E - DUMP(p, pgd, "p"); - DUMP(p, kernel_pgd, "p"); - DUMP(p, tcd_ptr, "p"); - DUMP(p, mc_kstack, "p"); - DUMP(p, crit_kstack, "p"); - DUMP(p, dbg_kstack, "p"); + DUMP(p, pgd, "px"); + DUMP(p, kernel_pgd, "px"); + DUMP(p, tcd_ptr, "px"); + DUMP(p, mc_kstack, "px"); + DUMP(p, crit_kstack, "px"); + DUMP(p, dbg_kstack, "px"); #endif - DUMP(p, __current, "p"); + DUMP(p, __current, "px"); DUMP(p, kstack, "lx"); printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1)); DUMP(p, stab_rr, "lx"); @@ -2407,7 +2407,7 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "p"); + DUMP(p, core_idle_state_ptr, "px"); DUMP(p, thread_idle_state, "x"); DUMP(p, thread_mask, "x"); DUMP(p, subcore_sibling_mask, "x"); -- cgit v1.2.3 From 0d9cac0ca0429830c40fe1a4e50e60f6221fd7b6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2018 12:40:04 +0300 Subject: drm/vmwgfx: Potential off by one in vmw_view_add() The vmw_view_cmd_to_type() function returns vmw_view_max (3) on error. It's one element beyond the end of the vmw_view_cotables[] table. My read on this is that it's possible to hit this failure. header->id comes from vmw_cmd_check() and it's a user controlled number between 1040 and 1225 so we can hit that error. But I don't have the hardware to test this code. Fixes: d80efd5cb3de ("drm/vmwgfx: Initial DX support") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellstrom Cc: --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 21c62a34e558..87e8af5776a3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv, } view_type = vmw_view_cmd_to_type(header->id); + if (view_type == vmw_view_max) + return -EINVAL; cmd = container_of(header, typeof(*cmd), header); ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, -- cgit v1.2.3 From 349524bc0da698ec77f2057cf4a4948eb6349265 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Jan 2018 17:10:12 +1100 Subject: powerpc: Don't preempt_disable() in show_cpuinfo() This causes warnings from cpufreq mutex code. This is also rather unnecessary and ineffective. If we really want to prevent concurrent unplug, we could take the unplug read lock but I don't see this being critical. Fixes: cd77b5ce208c ("powerpc/powernv/cpufreq: Fix the frequency read by /proc/cpuinfo") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d213542a48b..8fd3a70047f1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); -- cgit v1.2.3 From 8993d445df388e3541f48920a2353cfc904b220a Mon Sep 17 00:00:00 2001 From: Chiara Bruschi Date: Mon, 18 Dec 2017 16:21:59 +0000 Subject: block, bfq: fix occurrences of request finish method's old name Commit '7b9e93616399' ("blk-mq-sched: unify request finished methods") changed the old name of current bfq_finish_request method, but left it unchanged elsewhere in the code (related comments, part of function name bfq_put_rq_priv_body). This commit fixes all occurrences of the old name of this method by changing them into the current name. Fixes: 7b9e93616399 ("blk-mq-sched: unify request finished methods") Reviewed-by: Paolo Valente Signed-off-by: Federico Motta Signed-off-by: Chiara Bruschi Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5e6f837f663e..f352b1677143 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3684,8 +3684,8 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) } /* - * We exploit the put_rq_private hook to decrement - * rq_in_driver, but put_rq_private will not be + * We exploit the bfq_finish_request hook to decrement + * rq_in_driver, but bfq_finish_request will not be * invoked on this request. So, to avoid unbalance, * just start this request, without incrementing * rq_in_driver. As a negative consequence, @@ -3694,14 +3694,14 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * bfq_schedule_dispatch to be invoked uselessly. * * As for implementing an exact solution, the - * put_request hook, if defined, is probably invoked - * also on this request. So, by exploiting this hook, - * we could 1) increment rq_in_driver here, and 2) - * decrement it in put_request. Such a solution would - * let the value of the counter be always accurate, - * but it would entail using an extra interface - * function. This cost seems higher than the benefit, - * being the frequency of non-elevator-private + * bfq_finish_request hook, if defined, is probably + * invoked also on this request. So, by exploiting + * this hook, we could 1) increment rq_in_driver here, + * and 2) decrement it in bfq_finish_request. Such a + * solution would let the value of the counter be + * always accurate, but it would entail using an extra + * interface function. This cost seems higher than the + * benefit, being the frequency of non-elevator-private * requests very low. */ goto start_rq; @@ -4558,7 +4558,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_schedule_dispatch(bfqd); } -static void bfq_put_rq_priv_body(struct bfq_queue *bfqq) +static void bfq_finish_request_body(struct bfq_queue *bfqq) { bfqq->allocated--; @@ -4588,7 +4588,7 @@ static void bfq_finish_request(struct request *rq) spin_lock_irqsave(&bfqd->lock, flags); bfq_completed_request(bfqq, bfqd); - bfq_put_rq_priv_body(bfqq); + bfq_finish_request_body(bfqq); spin_unlock_irqrestore(&bfqd->lock, flags); } else { @@ -4609,7 +4609,7 @@ static void bfq_finish_request(struct request *rq) bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); } - bfq_put_rq_priv_body(bfqq); + bfq_finish_request_body(bfqq); } rq->elv.priv[0] = NULL; -- cgit v1.2.3 From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:53 +0100 Subject: perf script: Add support to display lost events Adding option to display lost events: $ perf script --show-lost-events ... mplayer 13810 [002] 468011.402396: 100 cycles:ppp: ff.. mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880 mplayer 13810 [002] 468011.402397: 100 cycles:ppp: ff.. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org [ Use PRIu64 when printing u64 values, fixing the build in some arches ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/event.c | 8 ++++++++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 93ae8d60e3d3..806ec6391fd6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -300,6 +300,9 @@ OPTIONS Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. +--show-lost-events + Display lost events i.e. events of type PERF_RECORD_LOST. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb603495cf4a..c1cce474c0f1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1489,6 +1489,7 @@ struct perf_script { bool show_mmap_events; bool show_switch_events; bool show_namespace_events; + bool show_lost_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool, return 0; } +static int +process_lost_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct thread *thread; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) + return -1; + + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (script->show_lost_events) + script->tool.lost = process_lost_event; if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, + "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 97a8ef9980db..44e603c27944 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) event->context_switch.next_prev_tid); } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_SWITCH_CPU_WIDE: ret += perf_event__fprintf_switch(event, fp); break; + case PERF_RECORD_LOST: + ret += perf_event__fprintf_lost(event, fp); + break; default: ret += fprintf(fp, "\n"); } -- cgit v1.2.3 From 075ca1ebb25e798e4072a1e3a482b829bb51afb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:54 +0100 Subject: perf tools: Make the tool's warning messages optional I want to display the pure events status coming in the next patch and the tool's warnings are superfluous in the output. Making it optional, enabled by default. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 6 ++++-- tools/perf/util/tool.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 54e30f1bcbd7..8d0fa2f8da16 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1773,7 +1773,8 @@ done: err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1929,7 +1930,8 @@ out: err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2532b558099b..183c91453522 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,6 +76,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool no_warn; enum show_feature_header show_feat_hdr; }; -- cgit v1.2.3 From a4a4d0a7a2b20f7880262de4f51685baaf693476 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:55 +0100 Subject: perf report: Add --stats option to display quick data statistics Add --stats option to display quick data statistics of event numbers, without any further processing, like the one at the end of the perf report -D command. $ perf report --stat Aggregated stats: TOTAL events: 4566 MMAP events: 113 LOST events: 19 COMM events: 3 FORK events: 400 SAMPLE events: 3315 MMAP2 events: 32 FINISHED_ROUND events: 681 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 I found this useful when hunting lost events for another change. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-12-jolsa@kernel.org [ Rename it to --stats, plural ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 26 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1e02c4e1a81f..a7d11ef2fe25 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,10 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--stats:: + Display overall events statistics without any further processing. + (like the one at the end of the perf report -D command) + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 770bf8a614f2..8e67a8c25ab1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,6 +62,7 @@ struct report { bool show_threads; bool inverted_callchain; bool mem_mode; + bool stats_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -588,6 +589,20 @@ static void report__output_resort(struct report *rep) ui_progress__finish(); } +static void stats_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.no_warn = true; +} + +static int stats_print(struct report *rep) +{ + struct perf_session *session = rep->session; + + perf_session__fprintf_nr_events(session, stdout); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -619,12 +634,18 @@ static int __cmd_report(struct report *rep) return ret; } + if (rep->stats_mode) + stats_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); return ret; } + if (rep->stats_mode) + return stats_print(rep); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -781,6 +802,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1042,6 +1064,8 @@ repeat: report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; + if (report.stats_mode) + use_browser = 0; if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1064,7 +1088,7 @@ repeat: ret = 0; goto error; } - } else if (use_browser == 0 && !quiet) { + } else if (use_browser == 0 && !quiet && !report.stats_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } -- cgit v1.2.3 From 2d1073def3cb69aa44f99be7ef42da7cc561be1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 12:03:47 -0300 Subject: perf trace: Beautify 'gettid' syscall result Before: # trace -a -e gettid sleep 0.01 4.863 ( 0.005 ms): Chrome_ChildIO/26241 gettid() = 26241 4.931 ( 0.004 ms): Chrome_IOThrea/26154 gettid() = 26154 4.942 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.946 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.970 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 # After: # trace -a -e gettid sleep 0.01 0.000 ( 0.009 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 3.416 ( 0.002 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.424 ( 0.001 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.343 ( 0.002 ms): chrome/26116 gettid() = 26116 (chrome) 3.386 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 4.003 ( 0.003 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 4.031 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-kyg4gz2yy0vkrrh2vtq29u71@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7c57898095ea..71e64bdca86f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -622,6 +622,7 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, { .name = "getrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, + { .name = "gettid", .errpid = true, }, { .name = "ioctl", .arg = { #if defined(__i386__) || defined(__x86_64__) -- cgit v1.2.3 From 930f8b3479444d264aa33e008c4b00b86e8c62cc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:56 +0100 Subject: perf report: Add --tasks option to display monitored tasks Add --tasks option to display monitored tasks stored in perf.data. Displaying pid/tid/ppid plus the command string aligned to distinguish parent and child tasks. $ perf record -a ... $ perf report --tasks # pid tid ppid comm 0 0 -1 |swapper 2 2 0 | kthreadd 14080 14080 2 | kworker/u17:1 4 4 2 | kworker/0:0H 6 6 2 | mm_percpu_wq ... 1 1 0 | systemd 23242 23242 1 | firefox 23242 23298 23242 | Cache2 I/O 23242 23304 23242 | GMPThread ... 1195 1195 1 | login 1611 1611 1195 | bash 1639 1639 1611 | startx 1663 1663 1639 | xinit 1673 1673 1663 | xmonad-x86_64-l 23939 23939 1673 | xterm 23941 23941 23939 | bash 23963 23963 23941 | mutt 24954 24954 23963 | offlineimap Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-13-jolsa@kernel.org [ Make it --tasks, plural, --task works as well, as its unambiguous ] [ Use machine__find_thread(), not findnew(), as pointed out by Namhyung ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/builtin-report.c | 136 ++++++++++++++++++++++++++++++- 2 files changed, 138 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a7d11ef2fe25..856c3c7e94fa 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -461,6 +461,10 @@ include::itrace.txt[] Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) +--tasks:: + Display monitored tasks stored in perf data. Displaying pid/tid/ppid + plus the command string aligned to distinguish parent and child tasks. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e67a8c25ab1..2c7bd85651dc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/color.h" #include #include +#include #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" @@ -63,6 +64,7 @@ struct report { bool inverted_callchain; bool mem_mode; bool stats_mode; + bool tasks_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -603,6 +605,124 @@ static int stats_print(struct report *rep) return 0; } +static void tasks_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.comm = perf_event__process_comm; + rep->tool.exit = perf_event__process_exit; + rep->tool.fork = perf_event__process_fork; + rep->tool.no_warn = true; +} + +struct task { + struct thread *thread; + struct list_head list; + struct list_head children; +}; + +static struct task *tasks_list(struct task *task, struct machine *machine) +{ + struct thread *parent_thread, *thread = task->thread; + struct task *parent_task; + + /* Already listed. */ + if (!list_empty(&task->list)) + return NULL; + + /* Last one in the chain. */ + if (thread->ppid == -1) + return task; + + parent_thread = machine__find_thread(machine, -1, thread->ppid); + if (!parent_thread) + return ERR_PTR(-ENOENT); + + parent_task = thread__priv(parent_thread); + list_add_tail(&task->list, &parent_task->children); + return tasks_list(parent_task, machine); +} + +static void task__print_level(struct task *task, FILE *fp, int level) +{ + struct thread *thread = task->thread; + struct task *child; + + fprintf(fp, " %8d %8d %8d |%*s%s\n", + thread->pid_, thread->tid, thread->ppid, + level, "", thread__comm_str(thread)); + + if (!list_empty(&task->children)) { + list_for_each_entry(child, &task->children, list) + task__print_level(child, fp, level + 1); + } +} + +static int tasks_print(struct report *rep, FILE *fp) +{ + struct perf_session *session = rep->session; + struct machine *machine = &session->machines.host; + struct task *tasks, *task; + unsigned int nr = 0, itask = 0, i; + struct rb_node *nd; + LIST_HEAD(list); + + /* + * No locking needed while accessing machine->threads, + * because --tasks is single threaded command. + */ + + /* Count all the threads. */ + for (i = 0; i < THREADS__TABLE_SIZE; i++) + nr += machine->threads[i].nr; + + tasks = malloc(sizeof(*tasks) * nr); + if (!tasks) + return -ENOMEM; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + task = tasks + itask++; + + task->thread = rb_entry(nd, struct thread, rb_node); + INIT_LIST_HEAD(&task->children); + INIT_LIST_HEAD(&task->list); + thread__set_priv(task->thread, task); + } + } + + /* + * Iterate every task down to the unprocessed parent + * and link all in task children list. Task with no + * parent is added into 'list'. + */ + for (itask = 0; itask < nr; itask++) { + task = tasks + itask; + + if (!list_empty(&task->list)) + continue; + + task = tasks_list(task, machine); + if (IS_ERR(task)) { + pr_err("Error: failed to process tasks\n"); + free(tasks); + return PTR_ERR(task); + } + + if (task) + list_add_tail(&task->list, &list); + } + + fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); + + list_for_each_entry(task, &list, list) + task__print_level(task, fp, 0); + + free(tasks); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -637,6 +757,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) stats_setup(rep); + if (rep->tasks_mode) + tasks_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); @@ -646,6 +769,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) return stats_print(rep); + if (rep->tasks_mode) + return tasks_print(rep, stdout); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -803,6 +929,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), + OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1064,8 +1191,12 @@ repeat: report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; - if (report.stats_mode) + if (report.stats_mode || report.tasks_mode) use_browser = 0; + if (report.stats_mode && report.tasks_mode) { + pr_err("Error: --tasks and --stats options cannot be used together\n"); + goto error; + } if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1088,7 +1219,8 @@ repeat: ret = 0; goto error; } - } else if (use_browser == 0 && !quiet && !report.stats_mode) { + } else if (use_browser == 0 && !quiet && + !report.stats_mode && !report.tasks_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } -- cgit v1.2.3 From 9ae148f80a96a91b636ab0b57d24d4440b919817 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Wed, 10 Jan 2018 23:23:05 +0800 Subject: IIO: ADC: stm32_dfsdm_stop_filter() can be static Fixes: e2e6771c6462 ("IIO: ADC: add STM32 DFSDM sigma delta ADC support") Signed-off-by: Fengguang Wu Acked-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-adc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index b03ca3f94331..e628d04d5c77 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -254,7 +254,7 @@ static int stm32_dfsdm_start_filter(struct stm32_dfsdm *dfsdm, DFSDM_CR1_RSWSTART(1)); } -void stm32_dfsdm_stop_filter(struct stm32_dfsdm *dfsdm, unsigned int fl_id) +static void stm32_dfsdm_stop_filter(struct stm32_dfsdm *dfsdm, unsigned int fl_id) { /* Disable conversion */ regmap_update_bits(dfsdm->regmap, DFSDM_CR1(fl_id), @@ -296,9 +296,9 @@ static int stm32_dfsdm_filter_configure(struct stm32_dfsdm *dfsdm, DFSDM_CR1_RSYNC(fl->sync_mode)); } -int stm32_dfsdm_channel_parse_of(struct stm32_dfsdm *dfsdm, - struct iio_dev *indio_dev, - struct iio_chan_spec *ch) +static int stm32_dfsdm_channel_parse_of(struct stm32_dfsdm *dfsdm, + struct iio_dev *indio_dev, + struct iio_chan_spec *ch) { struct stm32_dfsdm_channel *df_ch; const char *of_str; -- cgit v1.2.3 From 9dd79fed1bed6089f9729027e2d8cfa1049266e5 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 14 Nov 2017 13:29:17 +0000 Subject: MIPS: ath25: Avoid undefined early_serial_setup() without SERIAL_8250_CONSOLE Currently MIPS allnoconfig with CONFIG_ATH25=y fails to link due to missing support for early_serial_setup(): LD vmlinux arch/mips/ath25/devices.o: In function ath25_serial_setup': devices.c:(.init.text+0x68): undefined reference to 'early_serial_setup' Rather than adding dependencies to the platform to force inclusion of SERIAL_8250_CONSOLE together with it's dependencies like TTY, HAS_IOMEM, etc, just make ath25_serial_setup() a no-op when the dependency is not selected in the kernel config. Signed-off-by: Matt Redfearn Cc: James Hogan Cc: Thomas Gleixner Cc: Philippe Ombredanne Cc: Kate Stewart Cc: Greg Kroah-Hartman Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17700/ Signed-off-by: Ralf Baechle --- arch/mips/ath25/devices.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/ath25/devices.c b/arch/mips/ath25/devices.c index e1156347da53..301a9028273c 100644 --- a/arch/mips/ath25/devices.c +++ b/arch/mips/ath25/devices.c @@ -73,6 +73,7 @@ const char *get_system_type(void) void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) { +#ifdef CONFIG_SERIAL_8250_CONSOLE struct uart_port s; memset(&s, 0, sizeof(s)); @@ -85,6 +86,7 @@ void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk) s.uartclk = uartclk; early_serial_setup(&s); +#endif /* CONFIG_SERIAL_8250_CONSOLE */ } int __init ath25_add_wmac(int nr, u32 base, int irq) -- cgit v1.2.3 From ff9bed94d0f3b82d0fff3599cf0eb2cadf0fc770 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 14 Nov 2017 15:44:22 +0000 Subject: MIPS: RB532: Avoid undefined early_serial_setup() without SERIAL_8250_CONSOLE Currently MIPS allnoconfig with CONFIG_MIKROTIK_RB532=y fails to link due to missing support for early_serial_setup(): LD vmlinux arch/mips/rb532/serial.o: In function `setup_serial_port': serial.c:(.init.text+0x14): undefined reference to `early_serial_setup' Rather than adding dependencies to the platform to force inclusion of SERIAL_8250_CONSOLE together with it's dependencies like TTY, HAS_IOMEM, etc, just exclude arch/mips/rb532/serial.c from the build when it's dependency is not selected in the kernel config. Reported-by: Ralf Baechle Signed-off-by: Matt Redfearn Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17701/ Signed-off-by: Ralf Baechle --- arch/mips/rb532/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile index efdecdb6e3ea..8186afca2234 100644 --- a/arch/mips/rb532/Makefile +++ b/arch/mips/rb532/Makefile @@ -2,4 +2,6 @@ # Makefile for the RB532 board specific parts of the kernel # -obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o +obj-$(CONFIG_SERIAL_8250_CONSOLE) += serial.o + +obj-y += irq.o time.o setup.o prom.o gpio.o devices.o -- cgit v1.2.3 From ecff167cc80dd3c6afff55bdc66b4981d587ba3e Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 14 Nov 2017 15:44:23 +0000 Subject: MIPS: RB532: Avoid undefined mac_pton without GENERIC_NET_UTILS Currently MIPS allnoconfig with CONFIG_MIKROTIK_RB532=y fails to link due to missing support for mac_pton(): LD vmlinux arch/mips/rb532/devices.o: In function `setup_kmac': devices.c:(.init.text+0xc): undefined reference to `mac_pton' Rather than adding dependencies to the platform to force inclusion of GENERIC_NET_UTILS which is selected by CONFIG_NET, just exclude the setup of the MAC address if CONFIG_NET is not selected in the kernel config. Signed-off-by: Matt Redfearn Cc: James Hogan Cc: Boris Brezillon Cc: Neil Armstrong Cc: Krzysztof Kozlowski Cc: Tony Lindgren Cc: Vladimir Zapolskiy Cc: Shawn Guo Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17702/ Signed-off-by: Ralf Baechle --- arch/mips/rb532/devices.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 32ea3e6731d6..354d258396ff 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -310,6 +310,8 @@ static int __init plat_setup_devices(void) return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } +#ifdef CONFIG_NET + static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); @@ -322,4 +324,6 @@ static int __init setup_kmac(char *s) __setup("kmac=", setup_kmac); +#endif /* CONFIG_NET */ + arch_initcall(plat_setup_devices); -- cgit v1.2.3 From dfe004951b1387f8e91b83c95a5dc964a8545d49 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 14 Nov 2017 17:16:27 +0000 Subject: MIPS: BCM47XX Avoid compile error with MIPS allnoconfig Currently MIPS allnoconfig with CONFIG_BCM47XX=y fails to compile due to neither BCM47XX_BCMA nor BCM47XX_SSB being selected. This leads the enumeration in arch/mips/include/asm/mach-bcm47xx/bcm47xx.h to be empty, and compilation fails: In file included from arch/mips/bcm47xx/irq.c:32:0: ./arch/mips/include/asm/mach-bcm47xx/bcm47xx.h:34:1: error: expected identifier before '}' token }; ^ make[2]: *** [scripts/Makefile.build:314: arch/mips/bcm47xx/irq.o] Error 1 Fix this by ensuring that BCM47XX_SSB is selected if BCM47XX_BCMA is not. This allows us to select either system or both, but not neither. Signed-off-by: Matt Redfearn Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17703/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 350a990fc719..659e0079487f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -259,6 +259,7 @@ config BCM47XX select LEDS_GPIO_REGISTER select BCM47XX_NVRAM select BCM47XX_SPROM + select BCM47XX_SSB if !BCM47XX_BCMA help Support for BCM47XX based boards -- cgit v1.2.3 From 7e5e371ee5390a8bb7e111c794a334d9bf25ca3d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 21 Nov 2017 00:02:40 +0000 Subject: MIPS: Fix CPS SMP NS16550 UART defaults The MIPS_CPS_NS16550_BASE and MIPS_CPS_NS16550_SHIFT options have no defaults for non-Malta platforms which select SYS_SUPPORTS_MIPS_CPS (i.e. the pistachio and generic platforms). This is problematic for automated allyesconfig and allmodconfig builds based on these platforms, since make silentoldconfig tries to ask the user for values, and especially since v4.15 where the default platform was switched to generic. Default these options to 0 and arrange for MIPS_CPS_NS16550 to be no when using that default base address, so that the option only has an effect when the default is provided (i.e. Malta) or when a value is provided by the user. Fixes: 609cf6f2291a ("MIPS: CPS: Early debug using an ns16550-compatible UART") Signed-off-by: James Hogan Reviewed-by: Paul Burton Tested-by: Guenter Roeck Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17749/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig.debug | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 464af5e025d6..0749c3724543 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -124,30 +124,36 @@ config SCACHE_DEBUGFS If unsure, say N. -menuconfig MIPS_CPS_NS16550 +menuconfig MIPS_CPS_NS16550_BOOL bool "CPS SMP NS16550 UART output" depends on MIPS_CPS help Output debug information via an ns16550 compatible UART if exceptions occur early in the boot process of a secondary core. -if MIPS_CPS_NS16550 +if MIPS_CPS_NS16550_BOOL + +config MIPS_CPS_NS16550 + def_bool MIPS_CPS_NS16550_BASE != 0 config MIPS_CPS_NS16550_BASE hex "UART Base Address" default 0x1b0003f8 if MIPS_MALTA + default 0 help The base address of the ns16550 compatible UART on which to output debug information from the early stages of core startup. + This is only used if non-zero. + config MIPS_CPS_NS16550_SHIFT int "UART Register Shift" - default 0 if MIPS_MALTA + default 0 help The number of bits to shift ns16550 register indices by in order to form their addresses. That is, log base 2 of the span between adjacent ns16550 registers in the system. -endif # MIPS_CPS_NS16550 +endif # MIPS_CPS_NS16550_BOOL endmenu -- cgit v1.2.3 From b6ab1a138b758c4bdf30d5517e546e1c8aff9e3b Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 24 Nov 2017 07:38:20 +0530 Subject: MIPS: ralink: Fix platform_get_irq's error checking The platform_get_irq() function returns negative if an error occurs. zero or positive number on success. platform_get_irq() error checking for zero is not correct. Signed-off-by: Arvind Yadav Cc: john@phrozen.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17783/ Signed-off-by: Ralf Baechle --- arch/mips/ralink/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index d4469b20d176..4f46a4509f79 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -109,9 +109,9 @@ static int rt_timer_probe(struct platform_device *pdev) } rt->irq = platform_get_irq(pdev, 0); - if (!rt->irq) { + if (rt->irq < 0) { dev_err(&pdev->dev, "failed to load irq\n"); - return -ENOENT; + return rt->irq; } rt->membase = devm_ioremap_resource(&pdev->dev, res); -- cgit v1.2.3 From 6439d7d16c94324300eb392ed85e3632e489e197 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 15:25:03 -0300 Subject: perf report: Introduce --mmaps Similar to --tasks, producing the same output plus /proc//maps similar lines for each mmap record present in a perf.data file. Please note that not all mmaps are stored, for instance, some of the non-executable mmaps are only stored when 'perf record --data' is used, when the user wants to resolve data accesses in addition to asking for executable mmaps to get the DSO with symtabs. E.g.: # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data (7 samples) ] [root@jouet ~]# perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4137 4137 -1 |sleep 5628a35a1000-5628a37aa000 r-xp 00000000 3147148 /usr/bin/sleep 7fb65ad51000-7fb65b134000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fb65b134000-7fb65b35e000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffd94b9f000-7ffd94ba1000 r-xp 00000000 0 [vdso] # # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (8 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4161 4161 -1 |sleep 55afae69a000-55afae8a3000 r-xp 00000000 3147148 /usr/bin/sleep 7f569f00d000-7f569f3f0000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f569f3f0000-7f569f61a000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fff6fffe000-7fff70000000 r-xp 00000000 0 [vdso] # # perf record time sleep 1 0.00user 0.00system 0:01.00elapsed 0%CPU (0avgtext+0avgdata 2156maxresident)k 0inputs+0outputs (0major+73minor)pagefaults 0swaps [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (14 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4281 4281 -1 |time 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 4282 4282 4281 | sleep 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 564b4de3c000-564b4e045000 r-xp 00000000 3147148 /usr/bin/sleep 7f6a5a716000-7f6a5aaf9000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f6a5aaf9000-7f6a5ad23000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 7ffcec7e6000-7ffcec7e8000 r-xp 00000000 0 [vdso] # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-zulwdlg5rfowogr1qznorvvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 9 +++++- tools/perf/builtin-report.c | 50 +++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 856c3c7e94fa..63d0db3184c9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,13 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--mmaps:: + Show --tasks output plus mmap information in a format similar to + /proc//maps. + + Please note that not all mmaps are stored, options affecting which ones + are include 'perf record --data', for instance. + --stats:: Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) @@ -469,4 +476,4 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2c7bd85651dc..dd4df9a5cd06 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,7 @@ #include #include #include +#include #define PTIME_RANGE_MAX 10 @@ -65,6 +66,7 @@ struct report { bool mem_mode; bool stats_mode; bool tasks_mode; + bool mmaps_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -608,6 +610,10 @@ static int stats_print(struct report *rep) static void tasks_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + if (rep->mmaps_mode) { + rep->tool.mmap = perf_event__process_mmap; + rep->tool.mmap2 = perf_event__process_mmap2; + } rep->tool.comm = perf_event__process_comm; rep->tool.exit = perf_event__process_exit; rep->tool.fork = perf_event__process_fork; @@ -642,14 +648,46 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + size_t printed = 0; + struct rb_node *nd; + + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + + printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + indent, "", map->start, map->end, + map->prot & PROT_READ ? 'r' : '-', + map->prot & PROT_WRITE ? 'w' : '-', + map->prot & PROT_EXEC ? 'x' : '-', + map->flags & MAP_SHARED ? 's' : 'p', + map->pgoff, + map->ino, map->dso->name); + } + + return printed; +} + +static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) +{ + int printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += maps__fprintf_task(&mg->maps[i], indent, fp); + return printed; +} + static void task__print_level(struct task *task, FILE *fp, int level) { struct thread *thread = task->thread; struct task *child; + int comm_indent = fprintf(fp, " %8d %8d %8d |%*s", + thread->pid_, thread->tid, thread->ppid, + level, ""); + + fprintf(fp, "%s\n", thread__comm_str(thread)); - fprintf(fp, " %8d %8d %8d |%*s%s\n", - thread->pid_, thread->tid, thread->ppid, - level, "", thread__comm_str(thread)); + map_groups__fprintf_task(thread->mg, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) @@ -930,6 +968,7 @@ int cmd_report(int argc, const char **argv) "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), + OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1077,6 +1116,9 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } + if (report.mmaps_mode) + report.tasks_mode = true; + if (quiet) perf_quiet_option(); @@ -1194,7 +1236,7 @@ repeat: if (report.stats_mode || report.tasks_mode) use_browser = 0; if (report.stats_mode && report.tasks_mode) { - pr_err("Error: --tasks and --stats options cannot be used together\n"); + pr_err("Error: --tasks and --mmaps can't be used together with --stats\n"); goto error; } -- cgit v1.2.3 From 5d64db2966e38bfd99114ecf0b54f97d33023dcd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Jan 2018 11:36:07 -0300 Subject: tools headers: Synchronize kernel <-> tooling headers Two kernel headers got modified recently due to meltdown/spectre, in: a89f040fa34e ("x86/cpufeatures: Add X86_BUG_CPU_INSECURE") which are used by tooling as well: arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/disabled-features.h None of those changes have an effect on tooling, so do a plain copy. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qqzcs8ri3vks8cypg0puk0ae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 4 +++- tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..21ac898df2d8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 14d6d5007314..b027633e7300 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 -- cgit v1.2.3 From c04de7b1ad645b61c141df8ca903ba0cc03a57f7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 5 Dec 2017 22:28:22 +0000 Subject: MIPS: CM: Drop WARN_ON(vp != 0) Since commit 68923cdc2eb3 ("MIPS: CM: Add cluster & block args to mips_cm_lock_other()"), mips_smp_send_ipi_mask() has used mips_cm_lock_other_cpu() with each CPU number, rather than mips_cm_lock_other() with the first VPE in each core. Prior to r6, multicore multithreaded systems such as dual-core dual-thread interAptivs with CPU Idle enabled (e.g. MIPS Creator Ci40) results in mips_cm_lock_other() repeatedly hitting WARN_ON(vp != 0). There doesn't appear to be anything fundamentally wrong about passing a non-zero VP/VPE number, even if it is a core's region that is locked into the other region before r6, so remove that particular WARN_ON(). Fixes: 68923cdc2eb3 ("MIPS: CM: Add cluster & block args to mips_cm_lock_other()") Signed-off-by: James Hogan Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.14+ Patchwork: https://patchwork.linux-mips.org/patch/17883/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips-cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index dd5567b1e305..8f5bd04f320a 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, *this_cpu_ptr(&cm_core_lock_flags)); } else { WARN_ON(cluster != 0); - WARN_ON(vp != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); /* -- cgit v1.2.3 From ccf85c744275de0ba40beff0bf9206a094f12e62 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 7 Dec 2017 07:14:17 +0000 Subject: MIPS: mm: Fix duplicate "const" on insn_table_MM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following gcc 7.x build error on microMIPS builds: arch/mips/mm/uasm-micromips.c:43:26: error: duplicate ‘const’ declaration specifier [-Werror=duplicate-decl-specifier] static const struct insn const insn_table_MM[insn_invalid] = { ^~~~~ The same issue has already been fixed in uasm-mips by commit 00e06297b351 ("MIPS: mm: remove duplicate "const" qualifier on insn_table"). Signed-off-by: James Hogan Fixes: ce807d5f67ed ("MIPS: Optimize uasm insn lookup.") Cc: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17889/ Signed-off-by: Ralf Baechle --- arch/mips/mm/uasm-micromips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index cdb5a191b9d5..9bb6baa45da3 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -40,7 +40,7 @@ #include "uasm.c" -static const struct insn const insn_table_MM[insn_invalid] = { +static const struct insn insn_table_MM[insn_invalid] = { [insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD}, [insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, [insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD}, -- cgit v1.2.3 From 93b570464cce0079c15832cbb8ea17debd541585 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jan 2018 09:06:23 -0700 Subject: null_blk: add option for managing IO timeouts Use the fault injection framework to provide a way for null_blk to configure timeouts. This only works for queue_mode 1 and 2, since the bio mode doesn't have code for tracking timeouts. Let's say you want to have a 10% chance of timing out every 100,000 requests, and for 5 total timeouts, you could do: modprobe null_blk timeout="100000,10,0,5" This is useful for adding blktests to test that IO timeouts are handled appropriately. Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 + drivers/block/null_blk.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 40579d0cb3d1..622d9a2c8dae 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -19,6 +19,7 @@ if BLK_DEV config BLK_DEV_NULL_BLK tristate "Null test block driver" select CONFIGFS_FS + select FAULT_INJECTION config BLK_DEV_FD tristate "Normal floppy disk support" diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 78267e3e4fa5..30ec0ac11a47 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -14,6 +14,7 @@ #include #include #include +#include #define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) @@ -26,6 +27,8 @@ #define TICKS_PER_SEC 50ULL #define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC) +static DECLARE_FAULT_ATTR(null_timeout_attr); + static inline u64 mb_per_tick(int mbps) { return (1 << 20) / TICKS_PER_SEC * ((u64) mbps); @@ -162,6 +165,9 @@ static int g_home_node = NUMA_NO_NODE; module_param_named(home_node, g_home_node, int, S_IRUGO); MODULE_PARM_DESC(home_node, "Home node for the device"); +static char g_timeout_str[80]; +module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); + static int g_queue_mode = NULL_Q_MQ; static int null_param_store_val(const char *str, int *val, int min, int max) @@ -1364,6 +1370,14 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req) return BLKPREP_DEFER; } +static bool should_timeout_request(struct request *rq) +{ + if (g_timeout_str[0]) + return should_fail(&null_timeout_attr, 1); + + return false; +} + static void null_request_fn(struct request_queue *q) { struct request *rq; @@ -1371,9 +1385,11 @@ static void null_request_fn(struct request_queue *q) while ((rq = blk_fetch_request(q)) != NULL) { struct nullb_cmd *cmd = rq->special; - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); + if (!should_timeout_request(rq)) { + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); + } } } @@ -1400,7 +1416,10 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); - return null_handle_cmd(cmd); + if (!should_timeout_request(bd->rq)) + return null_handle_cmd(cmd); + + return BLK_STS_OK; } static const struct blk_mq_ops null_mq_ops = { @@ -1634,6 +1653,18 @@ static void null_validate_conf(struct nullb_device *dev) dev->mbps = 0; } +static bool null_setup_fault(void) +{ + if (!g_timeout_str[0]) + return true; + + if (!setup_fault_attr(&null_timeout_attr, g_timeout_str)) + return false; + + null_timeout_attr.verbose = 0; + return true; +} + static int null_add_dev(struct nullb_device *dev) { struct nullb *nullb; @@ -1667,6 +1698,9 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; + if (!null_setup_fault()) + goto out_cleanup_queues; + nullb->tag_set->timeout = 5 * HZ; nullb->q = blk_mq_init_queue(nullb->tag_set); if (IS_ERR(nullb->q)) { @@ -1691,6 +1725,10 @@ static int null_add_dev(struct nullb_device *dev) rv = -ENOMEM; goto out_cleanup_queues; } + + if (!null_setup_fault()) + goto out_cleanup_blk_queue; + blk_queue_prep_rq(nullb->q, null_rq_prep_fn); blk_queue_softirq_done(nullb->q, null_softirq_done_fn); blk_queue_rq_timed_out(nullb->q, null_rq_timed_out_fn); -- cgit v1.2.3 From 5f15684bd5e5ef39d4337988864fec8012471dda Mon Sep 17 00:00:00 2001 From: Richard Narron Date: Wed, 10 Jan 2018 09:12:16 -0700 Subject: partitions/msdos: Unable to mount UFS 44bsd partitions UFS partitions from newer versions of FreeBSD 10 and 11 use relative addressing for their subpartitions. But older versions of FreeBSD still use absolute addressing just like OpenBSD and NetBSD. Instead of simply testing for a FreeBSD partition, the code needs to also test if the starting offset of the C subpartition is zero. https://bugzilla.kernel.org/show_bug.cgi?id=197733 Signed-off-by: Richard Narron Signed-off-by: Jens Axboe --- block/partitions/msdos.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 0af3a3db6fb0..82c44f7df911 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -301,7 +301,9 @@ static void parse_bsd(struct parsed_partitions *state, continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); - if (memcmp(flavour, "bsd\0", 4) == 0) + /* FreeBSD has relative offset if C partition offset is zero */ + if (memcmp(flavour, "bsd\0", 4) == 0 && + le32_to_cpu(l->d_partitions[2].p_offset) == 0) bsd_start += offset; if (offset == bsd_start && size == bsd_size) /* full parent partition, we have it already */ -- cgit v1.2.3 From 1783c9d7cb7bc3181b9271665959b87280d98d8e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:34:45 +0100 Subject: ASoC: ux500: add MODULE_LICENSE tag This adds MODULE_LICENSE/AUTHOR/DESCRIPTION tags to the ux500 platform drivers, to avoid these build warnings: WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-plat-dma.o WARNING: modpost: missing MODULE_LICENSE() in sound/soc/ux500/snd-soc-ux500-mach-mop500.o The company no longer exists, so the email addresses of the authors don't work any more, but I've added them anyway for consistency. Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- sound/soc/ux500/mop500.c | 4 ++++ sound/soc/ux500/ux500_pcm.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/sound/soc/ux500/mop500.c b/sound/soc/ux500/mop500.c index 070a6880980e..c60a57797640 100644 --- a/sound/soc/ux500/mop500.c +++ b/sound/soc/ux500/mop500.c @@ -163,3 +163,7 @@ static struct platform_driver snd_soc_mop500_driver = { }; module_platform_driver(snd_soc_mop500_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ASoC MOP500 board driver"); +MODULE_AUTHOR("Ola Lilja"); diff --git a/sound/soc/ux500/ux500_pcm.c b/sound/soc/ux500/ux500_pcm.c index f12c01dddc8d..d35ba7700f46 100644 --- a/sound/soc/ux500/ux500_pcm.c +++ b/sound/soc/ux500/ux500_pcm.c @@ -165,3 +165,8 @@ int ux500_pcm_unregister_platform(struct platform_device *pdev) return 0; } EXPORT_SYMBOL_GPL(ux500_pcm_unregister_platform); + +MODULE_AUTHOR("Ola Lilja"); +MODULE_AUTHOR("Roger Nilsson"); +MODULE_DESCRIPTION("ASoC UX500 driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From fcd36c36f381f534adad5a6c6485db4405d5ea42 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Jan 2018 08:33:33 -0800 Subject: blk-mq: Explain when 'active_queues' is decremented It is nontrivial to derive from the blk-mq source code when blk_mq_tags.active_queues is decremented. Hence add a comment that explains this. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 9aa24c9508f9..266fc4f6b046 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -954,6 +954,12 @@ static void blk_mq_timeout_work(struct work_struct *work) data.next = blk_rq_timeout(round_jiffies_up(data.next)); mod_timer(&q->timeout, data.next); } else { + /* + * Request timeouts are handled as a forward rolling timer. If + * we end up here it means that no requests are pending and + * also that no request has been pending for a while. Mark + * each hctx as idle. + */ queue_for_each_hw_ctx(q, hctx, i) { /* the hctx may be unmapped, so check it here */ if (blk_mq_hw_queue_mapped(hctx)) -- cgit v1.2.3 From 4f58424da3deead2605e39a9df65f5f06107a3cb Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 10 Jan 2018 04:35:12 -0800 Subject: cgroup: make cgroup.threads delegatable Make cgroup.threads file delegatable. The behavior of cgroup.threads should follow the behavior of cgroup.procs. Signed-off-by: Roman Gushchin Discovered-by: Michael Kerrisk Cc: Tejun Heo Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2cf06c274e4c..7e4c44538119 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4447,6 +4447,7 @@ static struct cftype cgroup_base_files[] = { }, { .name = "cgroup.threads", + .flags = CFTYPE_NS_DELEGATABLE, .release = cgroup_procs_release, .seq_start = cgroup_threads_start, .seq_next = cgroup_procs_next, -- cgit v1.2.3 From e96fef2c3fa396dda680e943dddaa4f2a06e7b1c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Jan 2018 12:04:14 -0700 Subject: nvme: Add more command status translation This adds more NVMe status code translations to blk_status_t values, and captures all the current status codes NVMe multipath uses. Acked-by: Mike Snitzer Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2bcd49584f71..d8956d94cbd8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -157,13 +157,20 @@ static blk_status_t nvme_error_status(struct request *req) return BLK_STS_OK; case NVME_SC_CAP_EXCEEDED: return BLK_STS_NOSPC; + case NVME_SC_LBA_RANGE: + return BLK_STS_TARGET; + case NVME_SC_BAD_ATTRIBUTES: case NVME_SC_ONCS_NOT_SUPPORTED: + case NVME_SC_INVALID_OPCODE: + case NVME_SC_INVALID_FIELD: + case NVME_SC_INVALID_NS: return BLK_STS_NOTSUPP; case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: case NVME_SC_ACCESS_DENIED: case NVME_SC_READ_ONLY: + case NVME_SC_COMPARE_FAILED: return BLK_STS_MEDIUM; case NVME_SC_GUARD_CHECK: case NVME_SC_APPTAG_CHECK: -- cgit v1.2.3 From 908e45643d6450551bfbdbad3f088d4bd1f1c1fb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Jan 2018 12:04:15 -0700 Subject: nvme/multipath: Consult blk_status_t for failover This removes nvme multipath's specific status decoding to see if failover is needed, using the generic blk_status_t that was decoded earlier. This abstraction from the raw NVMe status means all status decoding exists in one place. Acked-by: Mike Snitzer Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 9 +++++---- drivers/nvme/host/multipath.c | 44 ++++++++----------------------------------- drivers/nvme/host/nvme.h | 5 +++-- 3 files changed, 16 insertions(+), 42 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d8956d94cbd8..2fe15351ac4e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -197,8 +197,10 @@ static inline bool nvme_req_needs_retry(struct request *req) void nvme_complete_rq(struct request *req) { - if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { - if (nvme_req_needs_failover(req)) { + blk_status_t status = nvme_error_status(req); + + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { + if (nvme_req_needs_failover(req, status)) { nvme_failover_req(req); return; } @@ -209,8 +211,7 @@ void nvme_complete_rq(struct request *req) return; } } - - blk_mq_end_request(req, nvme_error_status(req)); + blk_mq_end_request(req, status); } EXPORT_SYMBOL_GPL(nvme_complete_rq); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 1218a9fca846..ae9abb600c0f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -33,46 +33,18 @@ void nvme_failover_req(struct request *req) kblockd_schedule_work(&ns->head->requeue_work); } -bool nvme_req_needs_failover(struct request *req) +bool nvme_req_needs_failover(struct request *req, blk_status_t error) { if (!(req->cmd_flags & REQ_NVME_MPATH)) return false; - switch (nvme_req(req)->status & 0x7ff) { - /* - * Generic command status: - */ - case NVME_SC_INVALID_OPCODE: - case NVME_SC_INVALID_FIELD: - case NVME_SC_INVALID_NS: - case NVME_SC_LBA_RANGE: - case NVME_SC_CAP_EXCEEDED: - case NVME_SC_RESERVATION_CONFLICT: - return false; - - /* - * I/O command set specific error. Unfortunately these values are - * reused for fabrics commands, but those should never get here. - */ - case NVME_SC_BAD_ATTRIBUTES: - case NVME_SC_INVALID_PI: - case NVME_SC_READ_ONLY: - case NVME_SC_ONCS_NOT_SUPPORTED: - WARN_ON_ONCE(nvme_req(req)->cmd->common.opcode == - nvme_fabrics_command); - return false; - - /* - * Media and Data Integrity Errors: - */ - case NVME_SC_WRITE_FAULT: - case NVME_SC_READ_ERROR: - case NVME_SC_GUARD_CHECK: - case NVME_SC_APPTAG_CHECK: - case NVME_SC_REFTAG_CHECK: - case NVME_SC_COMPARE_FAILED: - case NVME_SC_ACCESS_DENIED: - case NVME_SC_UNWRITTEN_BLOCK: + switch (error) { + case BLK_STS_NOTSUPP: + case BLK_STS_NOSPC: + case BLK_STS_TARGET: + case BLK_STS_NEXUS: + case BLK_STS_MEDIUM: + case BLK_STS_PROTECTION: return false; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index eecf71ce6e75..4112fb6ce80d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -402,7 +402,7 @@ extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH void nvme_failover_req(struct request *req); -bool nvme_req_needs_failover(struct request *req); +bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns_head *head); @@ -422,7 +422,8 @@ struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); static inline void nvme_failover_req(struct request *req) { } -static inline bool nvme_req_needs_failover(struct request *req) +static inline bool nvme_req_needs_failover(struct request *req, + blk_status_t error) { return false; } -- cgit v1.2.3 From 9111e5686c8cf3905191d4feb819acd874437500 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Jan 2018 12:04:16 -0700 Subject: block: Provide blk_status_t decoding for path errors This patch provides a common decoder for block status path related errors that may be retried so various entities wishing to consult this do not have to duplicate this decision. Acked-by: Mike Snitzer Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1e628e032da..2d973ac54b09 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -39,6 +39,34 @@ typedef u8 __bitwise blk_status_t; #define BLK_STS_AGAIN ((__force blk_status_t)12) +/** + * blk_path_error - returns true if error may be path related + * @error: status the request was completed with + * + * Description: + * This classifies block error status into non-retryable errors and ones + * that may be successful if retried on a failover path. + * + * Return: + * %false - retrying failover path will not help + * %true - may succeed if retried + */ +static inline bool blk_path_error(blk_status_t error) +{ + switch (error) { + case BLK_STS_NOTSUPP: + case BLK_STS_NOSPC: + case BLK_STS_TARGET: + case BLK_STS_NEXUS: + case BLK_STS_MEDIUM: + case BLK_STS_PROTECTION: + return false; + } + + /* Anything else could be a path failure, so should be retried */ + return true; +} + struct blk_issue_stat { u64 stat; }; -- cgit v1.2.3 From e1f425e770d21a34f51d7284e55f3fa984f8e275 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Jan 2018 12:04:17 -0700 Subject: nvme/multipath: Use blk_path_error Uses common code for determining if an error should be retried on alternate path. Acked-by: Mike Snitzer Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/multipath.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ae9abb600c0f..3b211d9e58b8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -37,19 +37,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error) { if (!(req->cmd_flags & REQ_NVME_MPATH)) return false; - - switch (error) { - case BLK_STS_NOTSUPP: - case BLK_STS_NOSPC: - case BLK_STS_TARGET: - case BLK_STS_NEXUS: - case BLK_STS_MEDIUM: - case BLK_STS_PROTECTION: - return false; - } - - /* Everything else could be a path failure, so should be retried */ - return true; + return blk_path_error(error); } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) -- cgit v1.2.3 From a1275677f8cd060bb8824fda66781d431560c714 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 9 Jan 2018 12:04:18 -0700 Subject: dm mpath: Use blk_path_error Uses common code for determining if an error should be retried on alternate path. Acked-by: Mike Snitzer Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/md/dm-mpath.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f7810cc869ac..ef57c6d1c887 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1475,21 +1475,6 @@ static void activate_path_work(struct work_struct *work) activate_or_offline_path(pgpath); } -static int noretry_error(blk_status_t error) -{ - switch (error) { - case BLK_STS_NOTSUPP: - case BLK_STS_NOSPC: - case BLK_STS_TARGET: - case BLK_STS_NEXUS: - case BLK_STS_MEDIUM: - return 1; - } - - /* Anything else could be a path failure, so should be retried */ - return 0; -} - static int multipath_end_io(struct dm_target *ti, struct request *clone, blk_status_t error, union map_info *map_context) { @@ -1508,7 +1493,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, * request into dm core, which will remake a clone request and * clone bios for it and resubmit it later. */ - if (error && !noretry_error(error)) { + if (error && blk_path_error(error)) { struct multipath *m = ti->private; r = DM_ENDIO_REQUEUE; @@ -1544,7 +1529,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, unsigned long flags; int r = DM_ENDIO_DONE; - if (!*error || noretry_error(*error)) + if (!*error || !blk_path_error(*error)) goto done; if (pgpath) -- cgit v1.2.3 From 612e8e9350fd19cae6900cf36ea0c6892d1a0dca Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 10 Jan 2018 12:28:16 +0100 Subject: x86/alternatives: Fix optimize_nops() checking The alternatives code checks only the first byte whether it is a NOP, but with NOPs in front of the payload and having actual instructions after it breaks the "optimized' test. Make sure to scan all bytes before deciding to optimize the NOPs in there. Reported-by: David Woodhouse Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tom Lendacky Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Dave Hansen Cc: Andi Kleen Cc: Andrew Lutomirski Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/20180110112815.mgciyf5acwacphkq@pd.tnic --- arch/x86/kernel/alternative.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3344d3382e91..e0b97e4d1db5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -344,9 +344,12 @@ done: static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); -- cgit v1.2.3 From 5d75d3f2e736d6c8be79b677e10edb6af1bf7ed6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jan 2018 11:30:08 -0700 Subject: blk-mq: add a few missing debugfs RQF_ flags We are missing ZONE_WRITE_LOCKED and MQ_TIMEOUT_EXPIRED, add them so the debugfs bits can decode them. Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 8adc83786256..25d41151073d 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -288,6 +288,8 @@ static const char *const rqf_name[] = { RQF_NAME(HASHED), RQF_NAME(STATS), RQF_NAME(SPECIAL_PAYLOAD), + RQF_NAME(ZONE_WRITE_LOCKED), + RQF_NAME(MQ_TIMEOUT_EXPIRED), }; #undef RQF_NAME -- cgit v1.2.3 From 76a86f9d027b342b8759a4b2f9f7fe046e284220 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jan 2018 11:30:56 -0700 Subject: block: remove REQ_ATOM_POLL_SLEPT We don't need this to be an atomic flag, it can be a regular flag. We either end up on the same CPU for the polling, in which case the state is sane, or we did the sleep which would imply the needed barrier to ensure we see the right state. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- block/blk-mq.c | 5 ++--- block/blk.h | 2 -- include/linux/blkdev.h | 2 ++ 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 25d41151073d..dd890d5e0fbd 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -290,13 +290,13 @@ static const char *const rqf_name[] = { RQF_NAME(SPECIAL_PAYLOAD), RQF_NAME(ZONE_WRITE_LOCKED), RQF_NAME(MQ_TIMEOUT_EXPIRED), + RQF_NAME(MQ_POLL_SLEPT), }; #undef RQF_NAME #define RQAF_NAME(name) [REQ_ATOM_##name] = #name static const char *const rqaf_name[] = { RQAF_NAME(COMPLETE), - RQAF_NAME(POLL_SLEPT), }; #undef RQAF_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index 266fc4f6b046..3239ca9e199f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -483,7 +483,6 @@ void blk_mq_free_request(struct request *rq) blk_put_rl(blk_rq_rl(rq)); blk_mq_rq_update_state(rq, MQ_RQ_IDLE); - clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); if (rq->tag != -1) blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) @@ -2976,7 +2975,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, unsigned int nsecs; ktime_t kt; - if (test_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags)) + if (rq->rq_flags & RQF_MQ_POLL_SLEPT) return false; /* @@ -2996,7 +2995,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, if (!nsecs) return false; - set_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); + rq->rq_flags |= RQF_MQ_POLL_SLEPT; /* * This will be replaced with the stats tracking code, using diff --git a/block/blk.h b/block/blk.h index a68dbe312ea3..eb306c52121e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -124,8 +124,6 @@ void blk_account_io_done(struct request *req); */ enum rq_atomic_flags { REQ_ATOM_COMPLETE = 0, - - REQ_ATOM_POLL_SLEPT, }; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 007a7cf1f262..ba31674d8581 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -127,6 +127,8 @@ typedef __u32 __bitwise req_flags_t; #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) /* timeout is expired */ #define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) +/* already slept for hybrid poll */ +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ -- cgit v1.2.3 From 0a72e7f44964b9ada3e5c15820372e9cb119bf80 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 9 Jan 2018 14:23:42 -0700 Subject: block: add accessors for setting/querying request deadline We reduce the resolution of request expiry, but since we're already using jiffies for this where resolution depends on the kernel configuration and since the timeout resolution is coarse anyway, that should be fine. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- block/blk-timeout.c | 14 ++++++++------ block/blk.h | 15 +++++++++++++++ include/linux/blkdev.h | 4 +++- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3239ca9e199f..7035c305be45 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -858,7 +858,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, while (true) { start = read_seqcount_begin(&rq->gstate_seq); gstate = READ_ONCE(rq->gstate); - deadline = rq->deadline; + deadline = blk_rq_deadline(rq); if (!read_seqcount_retry(&rq->gstate_seq, start)) break; cond_resched(); diff --git a/block/blk-timeout.c b/block/blk-timeout.c index ebe99963386c..a05e3676d24a 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -112,7 +112,9 @@ static void blk_rq_timed_out(struct request *req) static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, unsigned int *next_set) { - if (time_after_eq(jiffies, rq->deadline)) { + const unsigned long deadline = blk_rq_deadline(rq); + + if (time_after_eq(jiffies, deadline)) { list_del_init(&rq->timeout_list); /* @@ -120,8 +122,8 @@ static void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout */ if (!blk_mark_rq_complete(rq)) blk_rq_timed_out(rq); - } else if (!*next_set || time_after(*next_timeout, rq->deadline)) { - *next_timeout = rq->deadline; + } else if (!*next_set || time_after(*next_timeout, deadline)) { + *next_timeout = deadline; *next_set = 1; } } @@ -162,7 +164,7 @@ void blk_abort_request(struct request *req) * immediately and that scan sees the new timeout value. * No need for fancy synchronizations. */ - req->deadline = jiffies; + blk_rq_set_deadline(req, jiffies); mod_timer(&req->q->timeout, 0); } else { if (blk_mark_rq_complete(req)) @@ -213,7 +215,7 @@ void blk_add_timer(struct request *req) if (!req->timeout) req->timeout = q->rq_timeout; - req->deadline = jiffies + req->timeout; + blk_rq_set_deadline(req, jiffies + req->timeout); req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED; /* @@ -228,7 +230,7 @@ void blk_add_timer(struct request *req) * than an existing one, modify the timer. Round up to next nearest * second. */ - expiry = blk_rq_timeout(round_jiffies_up(req->deadline)); + expiry = blk_rq_timeout(round_jiffies_up(blk_rq_deadline(req))); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) { diff --git a/block/blk.h b/block/blk.h index eb306c52121e..bcd9cf7db0d4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -236,6 +236,21 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req) q->last_merge = NULL; } +/* + * Steal a bit from this field for legacy IO path atomic IO marking. Note that + * setting the deadline clears the bottom bit, potentially clearing the + * completed bit. The user has to be OK with this (current ones are fine). + */ +static inline void blk_rq_set_deadline(struct request *rq, unsigned long time) +{ + rq->__deadline = time & ~0x1UL; +} + +static inline unsigned long blk_rq_deadline(struct request *rq) +{ + return rq->__deadline & ~0x1UL; +} + /* * Internal io_context interface */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ba31674d8581..aa6698cf483c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -257,7 +257,9 @@ struct request { struct u64_stats_sync aborted_gstate_sync; u64 aborted_gstate; - unsigned long deadline; + /* access through blk_rq_set_deadline, blk_rq_deadline */ + unsigned long __deadline; + struct list_head timeout_list; /* -- cgit v1.2.3 From e14575b3d457f5806d79b85886ef94d9c29e3b2a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jan 2018 11:34:25 -0700 Subject: block: convert REQ_ATOM_COMPLETE to stealing rq->__deadline bit We only have one atomic flag left. Instead of using an entire unsigned long for that, steal the bottom bit of the deadline field that we already reserved. Remove ->atomic_flags, since it's now unused. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq-debugfs.c | 9 +-------- block/blk-mq.c | 2 +- block/blk.h | 19 +++++++++---------- include/linux/blkdev.h | 2 -- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f843ae4f858d..7ba607527487 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2853,7 +2853,7 @@ void blk_start_request(struct request *req) wbt_issue(req->q->rq_wb, &req->issue_stat); } - BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); + BUG_ON(blk_rq_is_complete(req)); blk_add_timer(req); } EXPORT_SYMBOL(blk_start_request); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index dd890d5e0fbd..19db3f583bf1 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -294,12 +294,6 @@ static const char *const rqf_name[] = { }; #undef RQF_NAME -#define RQAF_NAME(name) [REQ_ATOM_##name] = #name -static const char *const rqaf_name[] = { - RQAF_NAME(COMPLETE), -}; -#undef RQAF_NAME - int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) { const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; @@ -316,8 +310,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) seq_puts(m, ", .rq_flags="); blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, ARRAY_SIZE(rqf_name)); - seq_puts(m, ", .atomic_flags="); - blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name)); + seq_printf(m, ", complete=%d", blk_rq_is_complete(rq)); seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, rq->internal_tag); if (mq_ops->show_rq) diff --git a/block/blk-mq.c b/block/blk-mq.c index 7035c305be45..87e6b10c8ecb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -294,7 +294,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; - /* do not touch atomic flags, it needs atomic ops against the timer */ rq->cpu = -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); @@ -313,6 +312,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->special = NULL; /* tag was already set */ rq->extra_len = 0; + rq->__deadline = 0; INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; diff --git a/block/blk.h b/block/blk.h index bcd9cf7db0d4..c84ae0e21ebd 100644 --- a/block/blk.h +++ b/block/blk.h @@ -119,25 +119,24 @@ void blk_account_io_start(struct request *req, bool new_io); void blk_account_io_completion(struct request *req, unsigned int bytes); void blk_account_io_done(struct request *req); -/* - * Internal atomic flags for request handling - */ -enum rq_atomic_flags { - REQ_ATOM_COMPLETE = 0, -}; - /* * EH timer and IO completion will both attempt to 'grab' the request, make - * sure that only one of them succeeds + * sure that only one of them succeeds. Steal the bottom bit of the + * __deadline field for this. */ static inline int blk_mark_rq_complete(struct request *rq) { - return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); + return test_and_set_bit(0, &rq->__deadline); } static inline void blk_clear_rq_complete(struct request *rq) { - clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); + clear_bit(0, &rq->__deadline); +} + +static inline bool blk_rq_is_complete(struct request *rq) +{ + return test_bit(0, &rq->__deadline); } /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aa6698cf483c..d4b2f7bb18d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -156,8 +156,6 @@ struct request { int internal_tag; - unsigned long atomic_flags; - /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ int tag; -- cgit v1.2.3 From 7c3fb70f0341f9d924818e648906774921f4bcb3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jan 2018 11:46:39 -0700 Subject: block: rearrange a few request fields for better cache layout Move completion related items (like the call single data) near the end of the struct, instead of mixing them in with the initial queueing related fields. Move queuelist below the bio structures. Then we have all queueing related bits in the first cache line. This yields a 1.5-2% increase in IOPS for a null_blk test, both for sync and for high thread count access. Sync test goes form 975K to 992K, 32-thread case from 20.8M to 21.2M IOPS. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 +++++++++--------- include/linux/blkdev.h | 28 +++++++++++++++------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 87e6b10c8ecb..435a5a0d441f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -270,8 +270,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; - rq->rq_flags = 0; - if (data->flags & BLK_MQ_REQ_INTERNAL) { rq->tag = -1; rq->internal_tag = tag; @@ -285,26 +283,22 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, data->hctx->tags->rqs[rq->tag] = rq; } - INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; + rq->rq_flags = 0; + rq->cpu = -1; rq->cmd_flags = op; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; - rq->cpu = -1; + INIT_LIST_HEAD(&rq->queuelist); INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->rq_disk = NULL; rq->part = NULL; rq->start_time = jiffies; -#ifdef CONFIG_BLK_CGROUP - rq->rl = NULL; - set_start_time_ns(rq); - rq->io_start_time_ns = 0; -#endif rq->nr_phys_segments = 0; #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; @@ -321,6 +315,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->end_io_data = NULL; rq->next_rq = NULL; +#ifdef CONFIG_BLK_CGROUP + rq->rl = NULL; + set_start_time_ns(rq); + rq->io_start_time_ns = 0; +#endif + data->ctx->rq_dispatched[op_is_sync(op)]++; return rq; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4b2f7bb18d6..71a9371c8182 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -141,12 +141,6 @@ typedef __u32 __bitwise req_flags_t; * especially blk_mq_rq_ctx_init() to take care of the added fields. */ struct request { - struct list_head queuelist; - union { - call_single_data_t csd; - u64 fifo_time; - }; - struct request_queue *q; struct blk_mq_ctx *mq_ctx; @@ -164,6 +158,8 @@ struct request { struct bio *bio; struct bio *biotail; + struct list_head queuelist; + /* * The hash is used inside the scheduler, and killed once the * request reaches the dispatch list. The ipi_list is only used @@ -211,19 +207,16 @@ struct request { struct hd_struct *part; unsigned long start_time; struct blk_issue_stat issue_stat; -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ - unsigned long long start_time_ns; - unsigned long long io_start_time_ns; /* when passed to hardware */ -#endif /* Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments; + #if defined(CONFIG_BLK_DEV_INTEGRITY) unsigned short nr_integrity_segments; #endif + unsigned short write_hint; unsigned short ioprio; unsigned int timeout; @@ -232,8 +225,6 @@ struct request { unsigned int extra_len; /* length of alignment and padding */ - unsigned short write_hint; - /* * On blk-mq, the lower bits of ->gstate (generation number and * state) carry the MQ_RQ_* state value and the upper bits the @@ -260,6 +251,11 @@ struct request { struct list_head timeout_list; + union { + call_single_data_t csd; + u64 fifo_time; + }; + /* * completion callback. */ @@ -268,6 +264,12 @@ struct request { /* for bidi */ struct request *next_rq; + +#ifdef CONFIG_BLK_CGROUP + struct request_list *rl; /* rl this rq is alloced from */ + unsigned long long start_time_ns; + unsigned long long io_start_time_ns; /* when passed to hardware */ +#endif }; static inline bool blk_rq_is_scsi(struct request *rq) -- cgit v1.2.3 From 0478fe68685a428c71decc19abecd265a6d658dd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Jan 2018 16:54:52 +0100 Subject: block: silently forbid sending any ioctl to a partition After the first few months, the message has not led to many bug reports. It's been almost five years now, and in practice the main source of it seems to be MTIOCGET that someone is using to detect tape devices. While we could whitelist it just like CDROM_GET_CAPABILITY, this patch just removes the message altogether. The patch also removes the "safe but not very useful" ioctl whitelist, as suggested by Christoph. I doubt anything is using most of those ioctls _in general_, let alone on a partition. Reviewed-by: Christoph Hellwig Signed-off-by: Paolo Bonzini Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 5cddff44a2f8..60b471f8621b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -693,38 +693,9 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) if (bd && bd == bd->bd_contains) return 0; - /* Actually none of these is particularly useful on a partition, - * but they are safe. - */ - switch (cmd) { - case SCSI_IOCTL_GET_IDLUN: - case SCSI_IOCTL_GET_BUS_NUMBER: - case SCSI_IOCTL_GET_PCI: - case SCSI_IOCTL_PROBE_HOST: - case SG_GET_VERSION_NUM: - case SG_SET_TIMEOUT: - case SG_GET_TIMEOUT: - case SG_GET_RESERVED_SIZE: - case SG_SET_RESERVED_SIZE: - case SG_EMULATED_HOST: - return 0; - case CDROM_GET_CAPABILITY: - /* Keep this until we remove the printk below. udev sends it - * and we do not want to spam dmesg about it. CD-ROMs do - * not have partitions, so we get here only for disks. - */ - return -ENOIOCTLCMD; - default: - break; - } - if (capable(CAP_SYS_RAWIO)) return 0; - /* In particular, rule out all resets and host-specific ioctls. */ - printk_ratelimited(KERN_WARNING - "%s: sending ioctl %x to a partition!\n", current->comm, cmd); - return -ENOIOCTLCMD; } EXPORT_SYMBOL(scsi_verify_blk_ioctl); -- cgit v1.2.3 From b7435db8b8d11df94453708295c2ea5b09caff5f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Jan 2018 11:34:27 -0800 Subject: blk-mq: Add locking annotations to hctx_lock() and hctx_unlock() This patch avoids that sparse reports the following: block/blk-mq.c:637:33: warning: context imbalance in 'hctx_unlock' - unexpected unlock block/blk-mq.c:642:9: warning: context imbalance in 'hctx_lock' - wrong count at exit Signed-off-by: Bart Van Assche Cc: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 435a5a0d441f..8000ba6db07d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -559,6 +559,7 @@ static void __blk_mq_complete_request(struct request *rq) } static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) + __releases(hctx->srcu) { if (!(hctx->flags & BLK_MQ_F_BLOCKING)) rcu_read_unlock(); @@ -567,6 +568,7 @@ static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) } static void hctx_lock(struct blk_mq_hw_ctx *hctx, int *srcu_idx) + __acquires(hctx->srcu) { if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { /* shut up gcc false positive */ -- cgit v1.2.3 From 2e83acb970684008baee471427270c029a76ddbd Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 8 Jan 2018 19:02:27 -0200 Subject: sctp: GFP_ATOMIC is not needed in sctp_setsockopt_events So replace it with GFP_USER and also add __GFP_NOWARN. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b4fb6e4886d2..54c046783a89 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { event = sctp_ulpevent_make_sender_dry_event(asoc, - GFP_ATOMIC); + GFP_USER | __GFP_NOWARN); if (!event) return -ENOMEM; -- cgit v1.2.3 From 5960cefab9df76600a1a7d4ff592c59e14616e88 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 8 Jan 2018 19:02:28 -0200 Subject: sctp: add a ceiling to optlen in some sockopts Hangbin Liu reported that some sockopt calls could cause the kernel to log a warning on memory allocation failure if the user supplied a large optlen value. That is because some of them called memdup_user() without a ceiling on optlen, allowing it to try to allocate really large buffers. This patch adds a ceiling by limiting optlen to the maximum allowed that would still make sense for these sockopt. Reported-by: Hangbin Liu Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 54c046783a89..022b94f11fd8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; + optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) + + SCTP_AUTH_NUM_HMACS * sizeof(u16)); hmacs = memdup_user(optval, optlen); if (IS_ERR(hmacs)) @@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk, if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; + /* authkey->sca_keylength is u16, so optlen can't be bigger than + * this. + */ + optlen = min_t(unsigned int, optlen, USHRT_MAX + + sizeof(struct sctp_authkey)); authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) @@ -3893,6 +3900,9 @@ static int sctp_setsockopt_reset_streams(struct sock *sk, if (optlen < sizeof(*params)) return -EINVAL; + /* srs_number_streams is u16, so optlen can't be bigger than this. */ + optlen = min_t(unsigned int, optlen, USHRT_MAX + + sizeof(__u16) * sizeof(*params)); params = memdup_user(optval, optlen); if (IS_ERR(params)) -- cgit v1.2.3 From c76f97c99ae6d26d14c7f0e50e074382bfbc9f98 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 8 Jan 2018 19:02:29 -0200 Subject: sctp: make use of pre-calculated len Some sockopt handling functions were calculating the length of the buffer to be written to userspace and then calculating it again when actually writing the buffer, which could lead to some write not using an up-to-date length. This patch updates such places to just make use of the len variable. Also, replace some sizeof(type) to sizeof(var). Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 022b94f11fd8..9b01e994f661 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5025,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv len = sizeof(int); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) + if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) return -EFAULT; return 0; } @@ -5655,6 +5655,9 @@ copy_getaddrs: err = -EFAULT; goto out; } + /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too, + * but we can't change it anymore. + */ if (put_user(bytes_copied, optlen)) err = -EFAULT; out: @@ -6091,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); - if (copy_from_user(¶ms, optval, sizeof(params))) + if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else return -EINVAL; @@ -6261,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) + + len = sizeof(struct sctp_authkeyid); + if (copy_from_user(&val, optval, len)) return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); @@ -6273,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, else val.scact_keynumber = ep->active_key_id; - len = sizeof(struct sctp_authkeyid); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) @@ -6299,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, if (len < sizeof(struct sctp_authchunks)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; to = p->gauth_chunks; @@ -6344,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, if (len < sizeof(struct sctp_authchunks)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; to = p->gauth_chunks; -- cgit v1.2.3 From 11d827a993a969c3c6ec56758ff63a44ba19b466 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Tue, 9 Jan 2018 11:02:33 +0800 Subject: net: gianfar_ptp: move set_fipers() to spinlock protecting area set_fipers() calling should be protected by spinlock in case that any interrupt breaks related registers setting and the function we expect. This patch is to move set_fipers() to spinlock protecting area in ptp_gianfar_adjtime(). Signed-off-by: Yangbo Lu Acked-by: Richard Cochran Reviewed-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar_ptp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 544114281ea7..9f8d4f8e57e3 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -319,11 +319,10 @@ static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta) now = tmr_cnt_read(etsects); now += delta; tmr_cnt_write(etsects, now); + set_fipers(etsects); spin_unlock_irqrestore(&etsects->lock, flags); - set_fipers(etsects); - return 0; } -- cgit v1.2.3 From af60d61fa846725566f4a876ae04f891bdff1c7a Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Tue, 9 Jan 2018 09:52:22 +0100 Subject: doc: clarification about setting SO_ZEROCOPY Signed-off-by: Kornilios Kourtis Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/msg_zerocopy.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index 77f6d7e25cfd..291a01264967 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option: if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one))) error(1, errno, "setsockopt zerocopy"); +Setting the socket option only works when the socket is in its initial +(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(), +for example, will lead to an EBUSY error. In this case, the option should be set +to the listening socket and it will be inherited by the accepted sockets. Transmission ------------ -- cgit v1.2.3 From b0d55b5bc77755501be9de2c935d106ff8dba9ac Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 9 Jan 2018 19:58:18 +0800 Subject: caif_usb: use strlcpy() instead of strncpy() gcc-8 reports net/caif/caif_usb.c: In function 'cfusbl_device_notify': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' output may be truncated copying 15 bytes from a string of length 15 [-Wstringop-truncation] The compiler require that the input param 'len' of strncpy() should be greater than the length of the src string, so that '\0' is copied as well. We can just use strlcpy() to avoid this warning. Signed-off-by: Xiongfeng Wang Signed-off-by: David S. Miller --- net/caif/caif_usb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 5cd44f001f64..1a082a946045 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, dev_add_pack(&caif_usb_type); pack_added = true; - strncpy(layer->name, dev->name, - sizeof(layer->name) - 1); - layer->name[sizeof(layer->name) - 1] = 0; + strlcpy(layer->name, dev->name, sizeof(layer->name)); return 0; } -- cgit v1.2.3 From 95f566de0269a0c59fd6a737a147731302136429 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Tue, 9 Jan 2018 14:43:34 +0200 Subject: of_mdio: avoid MDIO bus removal when a PHY is missing If one of the child devices is missing the of_mdiobus_register_phy() call will return -ENODEV. When a missing device is encountered the registration of the remaining PHYs is stopped and the MDIO bus will fail to register. Propagate all errors except ENODEV to avoid it. Signed-off-by: Madalin Bucur Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 3481e69738b5..a327be1d264b 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -231,7 +231,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) rc = of_mdiobus_register_phy(mdio, child, addr); else rc = of_mdiobus_register_device(mdio, child, addr); - if (rc) + + if (rc == -ENODEV) + dev_err(&mdio->dev, + "MDIO device at address %d is missing.\n", + addr); + else if (rc) goto unregister; } @@ -255,7 +260,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) if (of_mdiobus_child_is_phy(child)) { rc = of_mdiobus_register_phy(mdio, child, addr); - if (rc) + if (rc && rc != -ENODEV) goto unregister; } } -- cgit v1.2.3 From 78bbb15f2239bc8e663aa20bbe1987c91a0b75f6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 9 Jan 2018 13:40:41 -0800 Subject: 8021q: fix a memory leak for VLAN 0 device A vlan device with vid 0 is allow to creat by not able to be fully cleaned up by unregister_vlan_dev() which checks for vlan_id!=0. Also, VLAN 0 is probably not a valid number and it is kinda "reserved" for HW accelerating devices, but it is probably too late to reject it from creation even if makes sense. Instead, just remove the check in unregister_vlan_dev(). Reported-by: Dmitry Vyukov Fixes: ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)") Cc: Vlad Yasevich Cc: Ben Hutchings Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/8021q/vlan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8dfdd94e430f..bad01b14a4ad 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); -- cgit v1.2.3 From fc2336505fb49a8b932a0a67a9745c408b79992c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 9 Jan 2018 18:14:28 -0800 Subject: nfp: always unmask aux interrupts at init The link state and exception interrupts may be masked when we probe. The firmware should in theory prevent sending (and automasking) those interrupts if the device is disabled, but if my reading of the FW code is correct there are firmwares out there with race conditions in this area. The interrupt may also be masked if previous driver which used the device was malfunctioning and we didn't load the FW (there is no other good way to comprehensively reset the PF). Note that FW unmasks the data interrupts by itself when vNIC is enabled, such helpful operation is not performed for LSC/EXN interrupts. Always unmask the auxiliary interrupts after request_irq(). On the remove path add missing PCI write flush before free_irq(). Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1a603fdd9e80..99b0487b6d82 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -568,6 +568,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, return err; } nn_writeb(nn, ctrl_offset, entry->entry); + nfp_net_irq_unmask(nn, entry->entry); return 0; } @@ -582,6 +583,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, unsigned int vector_idx) { nn_writeb(nn, ctrl_offset, 0xff); + nn_pci_flush(nn); free_irq(nn->irq_entries[vector_idx].vector, nn); } -- cgit v1.2.3 From 8e033a93b37f37aa9fca71a370a895155320af60 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 10 Jan 2018 11:42:43 +0100 Subject: mlxsw: pci: Wait after reset before accessing HW After performing reset driver polls on HW indication until learning that the reset is done, but immediately after reset the device becomes unresponsive which might lead to completion timeout on the first read. Wait for 100ms before starting the polling. Fixes: 233fa44bd67a ("mlxsw: pci: Implement reset done check") Signed-off-by: Yuval Mintz Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 7 ++++++- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 23f7d828cf67..6ef20e5cc77d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1643,7 +1643,12 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, return 0; } - wmb(); /* reset needs to be written before we read control register */ + /* Reset needs to be written before we read control register, and + * we must wait for the HW to become responsive once again + */ + wmb(); + msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS); + end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); do { u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index a6441208e9d9..fb082ad21b00 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -59,6 +59,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E -- cgit v1.2.3 From db84924c4fc3be1ef0c965d5ece5f6d785c77c5f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 10 Jan 2018 11:42:44 +0100 Subject: mlxsw: spectrum_qdisc: Don't use variable array in mlxsw_sp_tclass_congestion_enable Resolve the sparse warning: "sparse: Variable length array is used." Use 2 arrays for 2 PRM register accesses. Fixes: 96f17e0776c2 ("mlxsw: spectrum: Support RED qdisc offload") Signed-off-by: Jiri Pirko Reviewed-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index c33beac5def0..b5397da94d7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -46,7 +46,8 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, u32 probability, bool is_ecn) { - char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + char cwtp_cmd[MLXSW_REG_CWTP_LEN]; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err; @@ -60,10 +61,10 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); } static int -- cgit v1.2.3 From 862c03ee1deb7e19e0f9931682e0294ecd1fcaf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jan 2018 03:45:49 -0800 Subject: ipv6: fix possible mem leaks in ipv6_make_skb() ip6_setup_cork() might return an error, while memory allocations have been done and must be rolled back. Fixes: 6422398c2ab0 ("ipv6: introduce ipv6_make_skb") Signed-off-by: Eric Dumazet Cc: Vlad Yasevich Reported-by: Mike Maloney Acked-by: Mike Maloney Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f7dd51c42314..688ba5f7516b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1735,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); - if (err) + if (err) { + ip6_cork_release(&cork, &v6_cork); return ERR_PTR(err); - + } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag; -- cgit v1.2.3 From ccc12b11c5332c84442ef120dcd631523be75089 Mon Sep 17 00:00:00 2001 From: Mathieu Xhonneux Date: Wed, 10 Jan 2018 13:35:49 +0000 Subject: ipv6: sr: fix TLVs not being copied using setsockopt Function ipv6_push_rthdr4 allows to add an IPv6 Segment Routing Header to a socket through setsockopt, but the current implementation doesn't copy possible TLVs at the end of the SRH received from userspace. Therefore, the execution of the following branch if (sr_has_hmac(sr_phdr)) { ... } will never complete since the len and type fields of a possible HMAC TLV are not copied, hence seg6_get_tlv_hmac will return an error, and the HMAC will not be computed. This commit adds a memcpy in case TLVs have been appended to the SRH. Fixes: a149e7c7ce81 ("ipv6: sr: add support for SRH injection through setsockopt") Acked-by: David Lebrun Signed-off-by: Mathieu Xhonneux Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 83bd75713535..bc68eb661970 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, sr_phdr->segments[0] = **addr_p; *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; + if (sr_ihdr->hdrlen > hops * 2) { + int tlvs_offset, tlvs_length; + + tlvs_offset = (1 + hops * 2) << 3; + tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3; + memcpy((char *)sr_phdr + tlvs_offset, + (char *)sr_ihdr + tlvs_offset, tlvs_length); + } + #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(sr_phdr)) { struct net *net = NULL; -- cgit v1.2.3 From c9f926000fe3b84135a81602a9f7e63a6a7898e2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 10 Jan 2018 09:34:02 +0100 Subject: scsi: libsas: Disable asynchronous aborts for SATA devices Handling CD-ROM devices from libsas is decidedly odd, as libata relies on SCSI EH to be started to figure out that no medium is present. So we cannot do asynchronous aborts for SATA devices. Fixes: 909657615d9 ("scsi: libsas: allow async aborts") Cc: # 4.12+ Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Tested-by: Yves-Alexis Perez Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_scsi_host.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 91795eb56206..eea94aa4091c 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -486,15 +486,28 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type, int sas_eh_abort_handler(struct scsi_cmnd *cmd) { - int res; + int res = TMF_RESP_FUNC_FAILED; struct sas_task *task = TO_SAS_TASK(cmd); struct Scsi_Host *host = cmd->device->host; + struct domain_device *dev = cmd_to_domain_dev(cmd); struct sas_internal *i = to_sas_internal(host->transportt); + unsigned long flags; if (!i->dft->lldd_abort_task) return FAILED; - res = i->dft->lldd_abort_task(task); + spin_lock_irqsave(host->host_lock, flags); + /* We cannot do async aborts for SATA devices */ + if (dev_is_sata(dev) && !host->host_eh_scheduled) { + spin_unlock_irqrestore(host->host_lock, flags); + return FAILED; + } + spin_unlock_irqrestore(host->host_lock, flags); + + if (task) + res = i->dft->lldd_abort_task(task); + else + SAS_DPRINTK("no task to abort\n"); if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE) return SUCCESS; -- cgit v1.2.3 From cd52cb26e7ead5093635e98e07e221e4df482d34 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 26 Nov 2017 15:31:04 +0200 Subject: iser-target: Fix possible use-after-free in connection establishment error In case we fail to establish the connection we must drain our pre-posted login recieve work request before continuing safely with connection teardown. Fixes: a060b5629ab0 ("IB/core: generic RDMA READ/WRITE API") Cc: # 4.7+ Reported-by: Amrani, Ram Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/isert/ib_isert.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 720dfb3a1ac2..1b02283ce20e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -741,6 +741,7 @@ isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + ib_drain_qp(isert_conn->qp); list_del_init(&isert_conn->node); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); -- cgit v1.2.3 From 57194fa763bfa1a0908f30d4c77835beaa118fcb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 9 Jan 2018 23:03:46 +0300 Subject: IB/hfi1: Prevent a NULL dereference In the original code, we set "fd->uctxt" to NULL and then dereference it which will cause an Oops. Fixes: f2a3bc00a03c ("IB/hfi1: Protect context array set/clear with spinlock") Cc: # 4.14.x Signed-off-by: Dan Carpenter Reviewed-by: Michael J. Ruhl Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7750a9c38b06..1df7da47f431 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -763,11 +763,11 @@ static int complete_subctxt(struct hfi1_filedata *fd) } if (ret) { - hfi1_rcd_put(fd->uctxt); - fd->uctxt = NULL; spin_lock_irqsave(&fd->dd->uctxt_lock, flags); __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; } return ret; -- cgit v1.2.3 From 40950343932879247861ae152dcb55e4555afdff Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 10 Jan 2018 09:20:54 +0000 Subject: bpf: fix spelling mistake: "obusing" -> "abusing" Trivial fix to spelling mistake in error message text. Signed-off-by: Colin Ian King Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b414d6b2d470..96ab165c873c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4472,7 +4472,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) */ map_ptr = env->insn_aux_data[i + delta].map_ptr; if (map_ptr == BPF_MAP_PTR_POISON) { - verbose(env, "tail_call obusing map_ptr\n"); + verbose(env, "tail_call abusing map_ptr\n"); return -EINVAL; } if (!map_ptr->unpriv_array) -- cgit v1.2.3 From 7891a87efc7116590eaba57acc3c422487802c6f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Jan 2018 20:04:37 +0100 Subject: bpf: arsh is not supported in 32 bit alu thus reject it The following snippet was throwing an 'unknown opcode cc' warning in BPF interpreter: 0: (18) r0 = 0x0 2: (7b) *(u64 *)(r10 -16) = r0 3: (cc) (u32) r0 s>>= (u32) r0 4: (95) exit Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X} generation, not all of them do and interpreter does neither. We can leave existing ones and implement it later in bpf-next for the remaining ones, but reject this properly in verifier for the time being. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 5 ++++ tools/testing/selftests/bpf/test_verifier.c | 40 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 96ab165c873c..20eb04fd155e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2493,6 +2493,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose(env, "BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index b51017404c62..6bafa5456568 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -272,6 +272,46 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_ld_imm64 insn", .result = REJECT, }, + { + "arsh32 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_ARSH not supported for 32 bit ALU", + }, + { + "arsh32 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_ARSH not supported for 32 bit ALU", + }, + { + "arsh64 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "arsh64 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, { "no bpf_exit", .insns = { -- cgit v1.2.3 From bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Jan 2018 23:25:05 +0100 Subject: bpf, array: fix overflow in max_entries and undefined behavior in index_mask syzkaller tried to alloc a map with 0xfffffffd entries out of a userns, and thus unprivileged. With the recently added logic in b2157399cc98 ("bpf: prevent out-of-bounds speculation") we round this up to the next power of two value for max_entries for unprivileged such that we can apply proper masking into potentially zeroed out map slots. However, this will generate an index_mask of 0xffffffff, and therefore a + 1 will let this overflow into new max_entries of 0. This will pass allocation, etc, and later on map access we still enforce on the original attr->max_entries value which was 0xfffffffd, therefore triggering GPF all over the place. Thus bail out on overflow in such case. Moreover, on 32 bit archs roundup_pow_of_two() can also not be used, since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit space is undefined. Therefore, do this by hand in a 64 bit variable. This fixes all the issues triggered by syzkaller's reproducers. Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index aaa319848e7d..ab94d304a634 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -56,7 +56,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); struct bpf_array *array; - u64 array_size; + u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -74,13 +74,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) elem_size = round_up(attr->value_size, 8); max_entries = attr->max_entries; - index_mask = roundup_pow_of_two(max_entries) - 1; - if (unpriv) + /* On 32 bit archs roundup_pow_of_two() with max_entries that has + * upper most bit set in u32 space is undefined behavior due to + * resulting 1U << 32, so do it manually here in u64 space. + */ + mask64 = fls_long(max_entries - 1); + mask64 = 1ULL << mask64; + mask64 -= 1; + + index_mask = mask64; + if (unpriv) { /* round up array size to nearest power of 2, * since cpu will speculate within index_mask limits */ max_entries = index_mask + 1; + /* Check for overflows. */ + if (max_entries < attr->max_entries) + return ERR_PTR(-E2BIG); + } array_size = sizeof(*array); if (percpu) -- cgit v1.2.3 From 23b19b7b50fe1867da8d431eea9cd3e4b6328c2c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 10 Jan 2018 23:48:05 +0100 Subject: ALSA: pcm: Remove yet superfluous WARN_ON() muldiv32() contains a snd_BUG_ON() (which is morphed as WARN_ON() with debug option) for checking the case of 0 / 0. This would be helpful if this happens only as a logical error; however, since the hw refine is performed with any data set provided by user, the inconsistent values that can trigger such a condition might be passed easily. Actually, syzbot caught this by passing some zero'ed old hw_params ioctl. So, having snd_BUG_ON() there is simply superfluous and rather harmful to give unnecessary confusions. Let's get rid of it. Reported-by: syzbot+7e6ee55011deeebce15d@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_lib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index db7894bb028c..faa67861cbc1 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -560,7 +560,6 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b, { u_int64_t n = (u_int64_t) a * b; if (c == 0) { - snd_BUG_ON(!n); *r = 0; return UINT_MAX; } -- cgit v1.2.3 From ce4bb04cae8924792ed92f4af2793b77fc986f0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 10 Jan 2018 18:47:05 -0500 Subject: Fix a leak in socket(2) when we fail to allocate a file descriptor. Got broken by "make sock_alloc_file() do sock_release() on failures" - cleanup after sock_map_fd() failure got pulled all the way into sock_alloc_file(), but it used to serve the case when sock_map_fd() failed *before* getting to sock_alloc_file() as well, and that got lost. Trivial to fix, fortunately. Fixes: 8e1611e23579 (make sock_alloc_file() do sock_release() on failures) Reported-by: Dmitry Vyukov Signed-off-by: Al Viro --- net/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 42d8e9c9ccd5..82433a2200ec 100644 --- a/net/socket.c +++ b/net/socket.c @@ -432,8 +432,10 @@ static int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; int fd = get_unused_fd_flags(flags); - if (unlikely(fd < 0)) + if (unlikely(fd < 0)) { + sock_release(sock); return fd; + } newfile = sock_alloc_file(sock, flags, NULL); if (likely(!IS_ERR(newfile))) { -- cgit v1.2.3 From 43173a0ecc0c0f1a652847d84c1ef46ada73a974 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 11:29:23 -0800 Subject: Input: of_touchscreen - add MODULE_LICENSE The lack of the MODULE_LICENSE tag can lead to a warning here: WARNING: modpost: missing MODULE_LICENSE() in drivers/input/touchscreen/of_touchscreen.o I'm adding a license and description tag, but no MODULE_AUTHOR() as this file is a collection of standalone helper functions that were all added by different developers. Signed-off-by: Arnd Bergmann Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/of_touchscreen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index 8d7f9c8f2771..9642f103b726 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -13,6 +13,7 @@ #include #include #include +#include static bool touchscreen_get_prop_u32(struct device *dev, const char *property, @@ -185,3 +186,6 @@ void touchscreen_report_pos(struct input_dev *input, input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y); } EXPORT_SYMBOL(touchscreen_report_pos); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Device-tree helpers functions for touchscreen devices"); -- cgit v1.2.3 From 48282969826b3d3c76e908182f69724d86d995fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=8C=AF=E6=9D=B0?= Date: Wed, 10 Jan 2018 16:12:22 -0800 Subject: Input: synaptics - Lenovo Thinkpad T460p devices should use RMI The tpouchpad/trackpoint on Lenovo Thinkpad T460p work with smbus/RMI. Signed-off-by: Zhenjie Wang Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ee5466a374bf..cd9f61cb3fc6 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0046", /* X250 */ "LEN004a", /* W541 */ "LEN200f", /* T450s */ + "LEN2018", /* T460p */ NULL }; -- cgit v1.2.3 From a935424bb658f9ca37eb5e94119b857998341356 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 10 Jan 2018 21:31:56 +0100 Subject: PM / domains: Don't skip driver's ->suspend|resume_noirq() callbacks Commit 10da65423fdb (PM / Domains: Call driver's noirq callbacks) started to respect driver's noirq callbacks, but while doing that it also introduced a few potential problems. More precisely, in genpd_finish_suspend() and genpd_resume_noirq() the noirq callbacks at the driver level should be invoked, no matter of whether dev->power.wakeup_path is set or not. Additionally, the commit in question also made genpd_resume_noirq() to ignore the return value from pm_runtime_force_resume(). Let's fix both these issues! Fixes: 10da65423fdb (PM / Domains: Call driver's noirq callbacks) Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index f9dcc981b6b9..48255ce7c0ad 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1032,15 +1032,12 @@ static int genpd_prepare(struct device *dev) static int genpd_finish_suspend(struct device *dev, bool poweroff) { struct generic_pm_domain *genpd; - int ret; + int ret = 0; genpd = dev_to_genpd(dev); if (IS_ERR(genpd)) return -EINVAL; - if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) - return 0; - if (poweroff) ret = pm_generic_poweroff_noirq(dev); else @@ -1048,10 +1045,18 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) if (ret) return ret; + if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) + return 0; + if (genpd->dev_ops.stop && genpd->dev_ops.start) { ret = pm_runtime_force_suspend(dev); - if (ret) + if (ret) { + if (poweroff) + pm_generic_restore_noirq(dev); + else + pm_generic_resume_noirq(dev); return ret; + } } genpd_lock(genpd); @@ -1085,7 +1090,7 @@ static int genpd_suspend_noirq(struct device *dev) static int genpd_resume_noirq(struct device *dev) { struct generic_pm_domain *genpd; - int ret = 0; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -1094,21 +1099,20 @@ static int genpd_resume_noirq(struct device *dev) return -EINVAL; if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) - return 0; + return pm_generic_resume_noirq(dev); genpd_lock(genpd); genpd_sync_power_on(genpd, true, 0); genpd->suspended_count--; genpd_unlock(genpd); - if (genpd->dev_ops.stop && genpd->dev_ops.start) + if (genpd->dev_ops.stop && genpd->dev_ops.start) { ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + } - ret = pm_generic_resume_noirq(dev); - if (ret) - return ret; - - return ret; + return pm_generic_resume_noirq(dev); } /** -- cgit v1.2.3 From 0d154fddd658ce7b1f89897c99f7b675a6531423 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 9 Jan 2018 22:26:58 +0200 Subject: ACPI / PMIC: Convert to use builtin_platform_driver() macro All of PMIC OpRegion drivers can't be modules, thus, convert them to use builtin_platform_driver() macro and remove redundant MODULE_*() macros. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pmic/intel_pmic_bxtwc.c | 7 +------ drivers/acpi/pmic/intel_pmic_chtdc_ti.c | 5 +---- drivers/acpi/pmic/intel_pmic_chtwc.c | 7 +------ drivers/acpi/pmic/intel_pmic_crc.c | 7 +------ drivers/acpi/pmic/intel_pmic_xpower.c | 7 +------ 5 files changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/acpi/pmic/intel_pmic_bxtwc.c b/drivers/acpi/pmic/intel_pmic_bxtwc.c index 2012d1d87dc3..886ac8b93cd0 100644 --- a/drivers/acpi/pmic/intel_pmic_bxtwc.c +++ b/drivers/acpi/pmic/intel_pmic_bxtwc.c @@ -412,9 +412,4 @@ static struct platform_driver intel_bxtwc_pmic_opregion_driver = { }, .id_table = bxt_wc_opregion_id_table, }; - -static int __init intel_bxtwc_pmic_opregion_driver_init(void) -{ - return platform_driver_register(&intel_bxtwc_pmic_opregion_driver); -} -device_initcall(intel_bxtwc_pmic_opregion_driver_init); +builtin_platform_driver(intel_bxtwc_pmic_opregion_driver); diff --git a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c index 109c1e9c9c7a..f6d73a243d80 100644 --- a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c +++ b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c @@ -131,7 +131,4 @@ static struct platform_driver chtdc_ti_pmic_opregion_driver = { }, .id_table = chtdc_ti_pmic_opregion_id_table, }; -module_platform_driver(chtdc_ti_pmic_opregion_driver); - -MODULE_DESCRIPTION("Dollar Cove TI PMIC opregion driver"); -MODULE_LICENSE("GPL v2"); +builtin_platform_driver(chtdc_ti_pmic_opregion_driver); diff --git a/drivers/acpi/pmic/intel_pmic_chtwc.c b/drivers/acpi/pmic/intel_pmic_chtwc.c index 813b829e1c24..9912422c8185 100644 --- a/drivers/acpi/pmic/intel_pmic_chtwc.c +++ b/drivers/acpi/pmic/intel_pmic_chtwc.c @@ -264,7 +264,6 @@ static const struct platform_device_id cht_wc_opregion_id_table[] = { { .name = "cht_wcove_region" }, {}, }; -MODULE_DEVICE_TABLE(platform, cht_wc_opregion_id_table); static struct platform_driver intel_cht_wc_pmic_opregion_driver = { .probe = intel_cht_wc_pmic_opregion_probe, @@ -273,8 +272,4 @@ static struct platform_driver intel_cht_wc_pmic_opregion_driver = { }, .id_table = cht_wc_opregion_id_table, }; -module_platform_driver(intel_cht_wc_pmic_opregion_driver); - -MODULE_DESCRIPTION("Intel CHT Whiskey Cove PMIC operation region driver"); -MODULE_AUTHOR("Hans de Goede "); -MODULE_LICENSE("GPL"); +builtin_platform_driver(intel_cht_wc_pmic_opregion_driver); diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c index d7f1761ab1bc..7ffa74048107 100644 --- a/drivers/acpi/pmic/intel_pmic_crc.c +++ b/drivers/acpi/pmic/intel_pmic_crc.c @@ -201,9 +201,4 @@ static struct platform_driver intel_crc_pmic_opregion_driver = { .name = "crystal_cove_pmic", }, }; - -static int __init intel_crc_pmic_opregion_driver_init(void) -{ - return platform_driver_register(&intel_crc_pmic_opregion_driver); -} -device_initcall(intel_crc_pmic_opregion_driver_init); +builtin_platform_driver(intel_crc_pmic_opregion_driver); diff --git a/drivers/acpi/pmic/intel_pmic_xpower.c b/drivers/acpi/pmic/intel_pmic_xpower.c index 6c99d3f81095..316e55174aa9 100644 --- a/drivers/acpi/pmic/intel_pmic_xpower.c +++ b/drivers/acpi/pmic/intel_pmic_xpower.c @@ -278,9 +278,4 @@ static struct platform_driver intel_xpower_pmic_opregion_driver = { .name = "axp288_pmic_acpi", }, }; - -static int __init intel_xpower_pmic_opregion_driver_init(void) -{ - return platform_driver_register(&intel_xpower_pmic_opregion_driver); -} -device_initcall(intel_xpower_pmic_opregion_driver_init); +builtin_platform_driver(intel_xpower_pmic_opregion_driver); -- cgit v1.2.3 From 4636bda86aa1f34f45c629477476a0dcfa04e597 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 5 Jan 2018 00:59:05 -0800 Subject: drm/i915: Whitelist SLICE_COMMON_ECO_CHICKEN1 on Geminilake. Geminilake requires the 3D driver to select whether barriers are intended for compute shaders, or tessellation control shaders, by whacking a "Barrier Mode" bit in SLICE_COMMON_ECO_CHICKEN1 when switching pipelines. Failure to do this properly can result in GPU hangs. Unfortunately, this means it needs to switch mid-batch, so only userspace can properly set it. To facilitate this, the kernel needs to whitelist the register. The workarounds page currently tags this as applying to Broxton only, but that doesn't make sense. The documentation for the register it references says the bit userspace is supposed to toggle only exists on Geminilake. Empirically, the Mesa patch to toggle this bit appears to fix intermittent GPU hangs in tessellation control shader barrier tests on Geminilake; we haven't seen those hangs on Broxton. v2: Mention WA #0862 in the comment (it doesn't have a name). Signed-off-by: Kenneth Graunke Acked-by: Rodrigo Vivi Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180105085905.9298-1-kenneth@whitecape.org (cherry picked from commit ab062639edb0412daf6de540725276b9a5d217f9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 333f40bc03bb..7923dfd9963c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7027,6 +7027,8 @@ enum { #define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) #define DISABLE_PIXEL_MASK_CAMMING (1<<14) +#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) + #define GEN7_L3SQCREG1 _MMIO(0xB010) #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ab5bf4e2e28e..6074e04dc99f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1390,6 +1390,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); + if (ret) + return ret; + /* WaToEnableHwFixForPushConstHWBug:glk */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -- cgit v1.2.3 From 5005c8514285ae4f28e862f8d91faaa2015e03a3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 6 Jan 2018 10:56:18 +0000 Subject: drm/i915: Don't adjust priority on an already signaled fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we retire a signaled fence, we free the dependency tree. However, we skip clearing the list so that if we then try to adjust the priority of the signaled fence, we may walk the list of freed dependencies. [ 3083.156757] ================================================================== [ 3083.156806] BUG: KASAN: use-after-free in execlists_schedule+0x199/0x660 [i915] [ 3083.156810] Read of size 8 at addr ffff8806bf20f400 by task Xorg/831 [ 3083.156815] CPU: 0 PID: 831 Comm: Xorg Not tainted 4.15.0-rc6-no-psn+ #1 [ 3083.156817] Hardware name: Notebook N24_25BU/N24_25BU, BIOS 5.12 02/17/2017 [ 3083.156818] Call Trace: [ 3083.156823] dump_stack+0x5c/0x7a [ 3083.156827] print_address_description+0x6b/0x290 [ 3083.156830] kasan_report+0x28f/0x380 [ 3083.156872] ? execlists_schedule+0x199/0x660 [i915] [ 3083.156914] execlists_schedule+0x199/0x660 [i915] [ 3083.156956] ? intel_crtc_atomic_check+0x146/0x4e0 [i915] [ 3083.156997] ? execlists_submit_request+0xe0/0xe0 [i915] [ 3083.157038] ? i915_vma_misplaced.part.4+0x25/0xb0 [i915] [ 3083.157079] ? __i915_vma_do_pin+0x7c8/0xc80 [i915] [ 3083.157121] ? intel_atomic_state_alloc+0x44/0x60 [i915] [ 3083.157130] ? drm_atomic_helper_page_flip+0x3e/0xb0 [drm_kms_helper] [ 3083.157145] ? drm_mode_page_flip_ioctl+0x7d2/0x850 [drm] [ 3083.157159] ? drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157172] ? drm_ioctl+0x45b/0x560 [drm] [ 3083.157211] i915_gem_object_wait_priority+0x14c/0x2c0 [i915] [ 3083.157251] ? i915_gem_get_aperture_ioctl+0x150/0x150 [i915] [ 3083.157290] ? i915_vma_pin_fence+0x1d8/0x320 [i915] [ 3083.157331] ? intel_pin_and_fence_fb_obj+0x175/0x250 [i915] [ 3083.157372] ? intel_rotation_info_size+0x60/0x60 [i915] [ 3083.157413] ? intel_link_compute_m_n+0x80/0x80 [i915] [ 3083.157428] ? drm_dev_printk+0x1b0/0x1b0 [drm] [ 3083.157443] ? drm_dev_printk+0x1b0/0x1b0 [drm] [ 3083.157485] intel_prepare_plane_fb+0x2f8/0x5a0 [i915] [ 3083.157527] ? intel_crtc_get_vblank_counter+0x80/0x80 [i915] [ 3083.157536] drm_atomic_helper_prepare_planes+0xa0/0x1c0 [drm_kms_helper] [ 3083.157587] intel_atomic_commit+0x12e/0x4e0 [i915] [ 3083.157605] drm_atomic_helper_page_flip+0xa2/0xb0 [drm_kms_helper] [ 3083.157621] drm_mode_page_flip_ioctl+0x7d2/0x850 [drm] [ 3083.157638] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157652] ? drm_lease_owner+0x1a/0x30 [drm] [ 3083.157668] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157681] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157696] drm_ioctl+0x45b/0x560 [drm] [ 3083.157711] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157725] ? drm_getstats+0x20/0x20 [drm] [ 3083.157729] ? timerqueue_del+0x49/0x80 [ 3083.157732] ? __remove_hrtimer+0x62/0xb0 [ 3083.157735] ? hrtimer_try_to_cancel+0x173/0x210 [ 3083.157738] do_vfs_ioctl+0x13b/0x880 [ 3083.157741] ? ioctl_preallocate+0x140/0x140 [ 3083.157744] ? _raw_spin_unlock_irq+0xe/0x30 [ 3083.157746] ? do_setitimer+0x234/0x370 [ 3083.157750] ? SyS_setitimer+0x19e/0x1b0 [ 3083.157752] ? SyS_alarm+0x140/0x140 [ 3083.157755] ? __rcu_read_unlock+0x66/0x80 [ 3083.157757] ? __fget+0xc4/0x100 [ 3083.157760] SyS_ioctl+0x74/0x80 [ 3083.157763] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.157765] RIP: 0033:0x7f6135d0c6a7 [ 3083.157767] RSP: 002b:00007fff01451888 EFLAGS: 00003246 ORIG_RAX: 0000000000000010 [ 3083.157769] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f6135d0c6a7 [ 3083.157771] RDX: 00007fff01451950 RSI: 00000000c01864b0 RDI: 000000000000000c [ 3083.157772] RBP: 00007f613076f600 R08: 0000000000000001 R09: 0000000000000000 [ 3083.157773] R10: 0000000000000060 R11: 0000000000003246 R12: 0000000000000000 [ 3083.157774] R13: 0000000000000060 R14: 000000000000001b R15: 0000000000000060 [ 3083.157779] Allocated by task 831: [ 3083.157783] kmem_cache_alloc+0xc0/0x200 [ 3083.157822] i915_gem_request_await_dma_fence+0x2c4/0x5d0 [i915] [ 3083.157861] i915_gem_request_await_object+0x321/0x370 [i915] [ 3083.157900] i915_gem_do_execbuffer+0x1165/0x19c0 [i915] [ 3083.157937] i915_gem_execbuffer2+0x1ad/0x550 [i915] [ 3083.157950] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157962] drm_ioctl+0x45b/0x560 [drm] [ 3083.157964] do_vfs_ioctl+0x13b/0x880 [ 3083.157966] SyS_ioctl+0x74/0x80 [ 3083.157968] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.157971] Freed by task 831: [ 3083.157973] kmem_cache_free+0x77/0x220 [ 3083.158012] i915_gem_request_retire+0x72c/0xa70 [i915] [ 3083.158051] i915_gem_request_alloc+0x1e9/0x8b0 [i915] [ 3083.158089] i915_gem_do_execbuffer+0xa96/0x19c0 [i915] [ 3083.158127] i915_gem_execbuffer2+0x1ad/0x550 [i915] [ 3083.158140] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.158153] drm_ioctl+0x45b/0x560 [drm] [ 3083.158155] do_vfs_ioctl+0x13b/0x880 [ 3083.158156] SyS_ioctl+0x74/0x80 [ 3083.158158] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.158162] The buggy address belongs to the object at ffff8806bf20f400 which belongs to the cache i915_dependency of size 64 [ 3083.158166] The buggy address is located 0 bytes inside of 64-byte region [ffff8806bf20f400, ffff8806bf20f440) [ 3083.158168] The buggy address belongs to the page: [ 3083.158171] page:00000000d43decc4 count:1 mapcount:0 mapping: (null) index:0x0 [ 3083.158174] flags: 0x17ffe0000000100(slab) [ 3083.158179] raw: 017ffe0000000100 0000000000000000 0000000000000000 0000000180200020 [ 3083.158182] raw: ffffea001afc16c0 0000000500000005 ffff880731b881c0 0000000000000000 [ 3083.158184] page dumped because: kasan: bad access detected [ 3083.158187] Memory state around the buggy address: [ 3083.158190] ffff8806bf20f300: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158192] ffff8806bf20f380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158195] >ffff8806bf20f400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158196] ^ [ 3083.158199] ffff8806bf20f480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158201] ffff8806bf20f500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158203] ================================================================== Reported-by: Alexandru Chirvasitu Reported-by: Mike Keehan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104436 Fixes: 1f181225f8ec ("drm/i915/execlists: Keep request->priority for its lifetime") Signed-off-by: Chris Wilson Cc: Alexandru Chirvasitu Cc: Michał Winiarski Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Tested-by: Alexandru Chirvasitu Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180106105618.13532-1-chris@chris-wilson.co.uk (cherry picked from commit c218ee03b9315073ce43992792554dafa0626eb8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 18de6569d04a..5cfba89ed586 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -467,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) struct drm_i915_gem_request *rq; struct intel_engine_cs *engine; - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) return; rq = to_request(fence); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d36e25607435..e71a8cd50498 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -974,6 +974,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + if (i915_gem_request_completed(request)) + return; + if (prio <= READ_ONCE(request->priotree.priority)) return; -- cgit v1.2.3 From 9cee7972bd0c69bc05d2e2f09bccd81cc439328b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Jan 2018 11:33:26 +0100 Subject: ASoC: stm32: select IIO_BUFFER The new stm32_adfsdm driver has incomplete dependencies as shown by Kconfig: warning: (SND_SOC_STM32_DFSDM && LMP91000) selects IIO_BUFFER_CB which has unmet direct dependencies (IIO && IIO_BUFFER) sound/soc/stm/stm32_adfsdm.o: In function `stm32_adfsdm_trigger': stm32_adfsdm.c:(.text+0x8c): undefined reference to `stm32_dfsdm_get_buff_cb' stm32_adfsdm.c:(.text+0x9c): undefined reference to `stm32_dfsdm_release_buff_cb' This makes the dependency on SND_SOC_STM32_DFSDM unconditional, so we can always resolve the stm32_dfsdm_get_buff_cb/stm32_dfsdm_release_buff_cb symbols and get the implied IIO_BUFFER dependency. compile-testing on other platforms is still possible as long as that IIO driver is there. Fixes: 55da094824c4 ("ASoC: stm32: add DFSDM DAI support") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- sound/soc/stm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/stm/Kconfig b/sound/soc/stm/Kconfig index a78f7700d489..3ad881fc40a1 100644 --- a/sound/soc/stm/Kconfig +++ b/sound/soc/stm/Kconfig @@ -30,8 +30,9 @@ config SND_SOC_STM32_SPDIFRX config SND_SOC_STM32_DFSDM tristate "SoC Audio support for STM32 DFSDM" - depends on (ARCH_STM32 && OF && STM32_DFSDM_ADC) || COMPILE_TEST + depends on ARCH_STM32 || COMPILE_TEST depends on SND_SOC + depends on STM32_DFSDM_ADC select SND_SOC_GENERIC_DMAENGINE_PCM select SND_SOC_DMIC select IIO_BUFFER_CB -- cgit v1.2.3 From 111f4c3309397370f1fd9579754f06b82055041d Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Tue, 9 Jan 2018 13:03:21 +1100 Subject: drm/sun4i: hdmi: Check for unset best_parent in sun4i_tmds_determine_rate It is possible that if there is no exact rate match and "rounded = clk_hw_round_rate(parent, ideal)" gives high enough values (e.g. if rounded is 2 * ideal) that the condition "abs(rate - rounded / i) < abs(rate - best_parent / best_div)" is never met and best_parent is never set. This results in req->rate and req->best_parent_rate being assigned 0. To avoid this, we set best_parent to the first calculated rate if it is unset. The sun4i_tmds_calc_divider function already has a similar check. Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Jonathan Liu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180109020323.11852-2-net147@gmail.com --- drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index dc332ea56f6c..4d235e5ea31c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -102,7 +102,7 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw, goto out; } - if (abs(rate - rounded / i) < + if (!best_parent || abs(rate - rounded / i) < abs(rate - best_parent / best_div)) { best_parent = rounded; best_div = i; -- cgit v1.2.3 From 58faae28f64e9cb6d7f89596656db640392a1efc Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Tue, 9 Jan 2018 13:03:22 +1100 Subject: drm/sun4i: hdmi: Fix incorrect assignment in sun4i_tmds_determine_rate best_div is set to i which corresponds to rate halving when it should be set to j which corresponds to the divider. Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Jonathan Liu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180109020323.11852-3-net147@gmail.com --- drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index 4d235e5ea31c..88eeeaf34638 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -105,7 +105,7 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw, if (!best_parent || abs(rate - rounded / i) < abs(rate - best_parent / best_div)) { best_parent = rounded; - best_div = i; + best_div = j; } } } -- cgit v1.2.3 From 3b9c57cef4de80f29885e1edf69828de8d3fae6b Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Tue, 9 Jan 2018 13:03:23 +1100 Subject: drm/sun4i: hdmi: Add missing rate halving check in sun4i_tmds_determine_rate It was only checking the divider when determing the closest match if it could not match the requested rate exactly. For a projector connected to an Olimex A20-OLinuXino-LIME using HDMI with a native resolution of 1280x800 and pixel clock of 83.5 MHz, this resulted in 1280x800 mode not being available and the following in dmesg when the kernel is booted with drm.debug=0x3e: [drm:drm_mode_debug_printmodeline] Modeline 37:"1280x800" 60 83500 1280 1352 1480 1680 800 810 816 831 0x48 0x5 [drm:drm_mode_prune_invalid] Not using 1280x800 mode: NOCLOCK Fixes: 9c5681011a0c ("drm/sun4i: Add HDMI support") Signed-off-by: Jonathan Liu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180109020323.11852-4-net147@gmail.com --- drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index 88eeeaf34638..3ecffa52c814 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -102,9 +102,12 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw, goto out; } - if (!best_parent || abs(rate - rounded / i) < - abs(rate - best_parent / best_div)) { + if (!best_parent || + abs(rate - rounded / i / j) < + abs(rate - best_parent / best_half / + best_div)) { best_parent = rounded; + best_half = i; best_div = j; } } -- cgit v1.2.3 From 6dee6722c6065f4850eab98c2b7b9f4a08a35813 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:11:51 +0000 Subject: ASoC: stm32: fix a typo in stm32_adfsdm_probe() Fix a typo, we should return PTR_ERR(priv->iio_cb) instead of PTR_ERR(priv->iio_ch). Fixes: 55da094824c4 ("ASoC: stm32: add DFSDM DAI support") Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- sound/soc/stm/stm32_adfsdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index af50891983c6..7306e3eca9e1 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -320,7 +320,7 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) priv->iio_cb = iio_channel_get_all_cb(&pdev->dev, NULL, NULL); if (IS_ERR(priv->iio_cb)) - return PTR_ERR(priv->iio_ch); + return PTR_ERR(priv->iio_cb); ret = devm_snd_soc_register_platform(&pdev->dev, &stm32_adfsdm_soc_platform); -- cgit v1.2.3 From 2353758bc2d427809f5feb15f046ded91c60afef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Jan 2018 11:34:46 +0100 Subject: IIO: ADC: stm32-dfsdm: avoid unused-variable warning Building with CONFIG_OF disabled produces a compiler warning: drivers/iio/adc/stm32-dfsdm-core.c: In function 'stm32_dfsdm_probe': drivers/iio/adc/stm32-dfsdm-core.c:245:22: error: unused variable 'pnode' [-Werror=unused-variable] This removes the variable and open-codes it in the only place it gets used to avoid that warning. Fixes: bed73904e76f ("IIO: ADC: add stm32 DFSDM core support") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 72427414db7f..6cd655f8239b 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -242,7 +242,6 @@ MODULE_DEVICE_TABLE(of, stm32_dfsdm_of_match); static int stm32_dfsdm_probe(struct platform_device *pdev) { struct dfsdm_priv *priv; - struct device_node *pnode = pdev->dev.of_node; const struct of_device_id *of_id; const struct stm32_dfsdm_dev_data *dev_data; struct stm32_dfsdm *dfsdm; @@ -254,7 +253,7 @@ static int stm32_dfsdm_probe(struct platform_device *pdev) priv->pdev = pdev; - of_id = of_match_node(stm32_dfsdm_of_match, pnode); + of_id = of_match_node(stm32_dfsdm_of_match, pdev->dev.of_node); if (!of_id->data) { dev_err(&pdev->dev, "Data associated to device is missing\n"); return -EINVAL; -- cgit v1.2.3 From 25140717414c319bcc44b5aac39357a52d0bc8e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Jan 2018 11:34:46 +0100 Subject: IIO: ADC: stm32-dfsdm: avoid unused-variable warning Building with CONFIG_OF disabled produces a compiler warning: drivers/iio/adc/stm32-dfsdm-core.c: In function 'stm32_dfsdm_probe': drivers/iio/adc/stm32-dfsdm-core.c:245:22: error: unused variable 'pnode' [-Werror=unused-variable] This removes the variable and open-codes it in the only place it gets used to avoid that warning. Fixes: bed73904e76f ("IIO: ADC: add stm32 DFSDM core support") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 72427414db7f..6cd655f8239b 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -242,7 +242,6 @@ MODULE_DEVICE_TABLE(of, stm32_dfsdm_of_match); static int stm32_dfsdm_probe(struct platform_device *pdev) { struct dfsdm_priv *priv; - struct device_node *pnode = pdev->dev.of_node; const struct of_device_id *of_id; const struct stm32_dfsdm_dev_data *dev_data; struct stm32_dfsdm *dfsdm; @@ -254,7 +253,7 @@ static int stm32_dfsdm_probe(struct platform_device *pdev) priv->pdev = pdev; - of_id = of_match_node(stm32_dfsdm_of_match, pnode); + of_id = of_match_node(stm32_dfsdm_of_match, pdev->dev.of_node); if (!of_id->data) { dev_err(&pdev->dev, "Data associated to device is missing\n"); return -EINVAL; -- cgit v1.2.3 From d5ff18bcd4b5e66396eab7e8271172157ee1253e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:12:41 +0000 Subject: IIO: ADC: fix return value check in stm32_dfsdm_adc_probe() In case of error, the function devm_iio_device_alloc() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: e2e6771c6462 ("IIO: ADC: add STM32 DFSDM sigma delta ADC support") Signed-off-by: Wei Yongjun Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index e628d04d5c77..5e871404f565 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1100,9 +1100,9 @@ static int stm32_dfsdm_adc_probe(struct platform_device *pdev) dev_data = (const struct stm32_dfsdm_dev_data *)of_id->data; iio = devm_iio_device_alloc(dev, sizeof(*adc)); - if (IS_ERR(iio)) { + if (!iio) { dev_err(dev, "%s: Failed to allocate IIO\n", __func__); - return PTR_ERR(iio); + return -ENOMEM; } adc = iio_priv(iio); -- cgit v1.2.3 From 2a266f23550be997d783f27e704b9b40c4010292 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Wed, 10 Jan 2018 21:44:42 +0800 Subject: KVM MMU: check pending exception before injecting APF For example, when two APF's for page ready happen after one exit and the first one becomes pending, the second one will result in #DF. Instead, just handle the second page fault synchronously. Reported-by: Ross Zwisler Message-ID: Reported-by: Alec Blayne Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c4deb1f34faa..e577bacd4bd0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) { if (unlikely(!lapic_in_kernel(vcpu) || - kvm_event_needs_reinjection(vcpu))) + kvm_event_needs_reinjection(vcpu) || + vcpu->arch.exception.pending)) return false; if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) -- cgit v1.2.3 From b3defb791b26ea0683a93a4f49c77ec45ec96f10 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 9 Jan 2018 23:11:03 +0100 Subject: ALSA: seq: Make ioctls race-free The ALSA sequencer ioctls have no protection against racy calls while the concurrent operations may lead to interfere with each other. As reported recently, for example, the concurrent calls of setting client pool with a combination of write calls may lead to either the unkillable dead-lock or UAF. As a slightly big hammer solution, this patch introduces the mutex to make each ioctl exclusive. Although this may reduce performance via parallel ioctl calls, usually it's not demanded for sequencer usages, hence it should be negligible. Reported-by: Luo Quan Reviewed-by: Kees Cook Reviewed-by: Greg Kroah-Hartman Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 3 +++ sound/core/seq/seq_clientmgr.h | 1 + 2 files changed, 4 insertions(+) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 6e22eea72654..d01913404581 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -221,6 +221,7 @@ static struct snd_seq_client *seq_create_client1(int client_index, int poolsize) rwlock_init(&client->ports_lock); mutex_init(&client->ports_mutex); INIT_LIST_HEAD(&client->ports_list_head); + mutex_init(&client->ioctl_mutex); /* find free slot in the client table */ spin_lock_irqsave(&clients_lock, flags); @@ -2130,7 +2131,9 @@ static long snd_seq_ioctl(struct file *file, unsigned int cmd, return -EFAULT; } + mutex_lock(&client->ioctl_mutex); err = handler->func(client, &buf); + mutex_unlock(&client->ioctl_mutex); if (err >= 0) { /* Some commands includes a bug in 'dir' field. */ if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT || diff --git a/sound/core/seq/seq_clientmgr.h b/sound/core/seq/seq_clientmgr.h index c6614254ef8a..0611e1e0ed5b 100644 --- a/sound/core/seq/seq_clientmgr.h +++ b/sound/core/seq/seq_clientmgr.h @@ -61,6 +61,7 @@ struct snd_seq_client { struct list_head ports_list_head; rwlock_t ports_lock; struct mutex ports_mutex; + struct mutex ioctl_mutex; int convert32; /* convert 32->64bit */ /* output pool */ -- cgit v1.2.3 From ebabcf17bcd7ce968b1631ebe08236275698f39b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 7 Dec 2017 07:20:46 +0000 Subject: MIPS: Implement __multi3 for GCC7 MIPS64r6 builds GCC7 is a bit too eager to generate suboptimal __multi3 calls (128bit multiply with 128bit result) for MIPS64r6 builds, even in code which doesn't explicitly use 128bit types, such as the following: unsigned long func(unsigned long a, unsigned long b) { return a > (~0UL) / b; } Which GCC rearanges to: return (unsigned __int128)a * (unsigned __int128)b > 0xffffffffffffffff; Therefore implement __multi3, but only for MIPS64r6 with GCC7 as under normal circumstances we wouldn't expect any calls to __multi3 to be generated from kernel code. Reported-by: Thomas Petazzoni Signed-off-by: James Hogan Tested-by: Waldemar Brodkorb Cc: Ralf Baechle Cc: Maciej W. Rozycki Cc: Matthew Fortune Cc: Florian Fainelli Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17890/ --- arch/mips/lib/Makefile | 3 ++- arch/mips/lib/libgcc.h | 17 ++++++++++++++++ arch/mips/lib/multi3.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 arch/mips/lib/multi3.c diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 78c2affeabf8..e84e12655fa8 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o # libgcc-style stuff needed in the kernel -obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o +obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \ + ucmpdi2.o diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h index 28002ed90c2c..199a7f96282f 100644 --- a/arch/mips/lib/libgcc.h +++ b/arch/mips/lib/libgcc.h @@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__))); struct DWstruct { int high, low; }; + +struct TWstruct { + long long high, low; +}; #elif defined(__LITTLE_ENDIAN) struct DWstruct { int low, high; }; + +struct TWstruct { + long long low, high; +}; #else #error I feel sick. #endif @@ -23,4 +31,13 @@ typedef union { long long ll; } DWunion; +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) +typedef int ti_type __attribute__((mode(TI))); + +typedef union { + struct TWstruct s; + ti_type ti; +} TWunion; +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c new file mode 100644 index 000000000000..111ad475aa0c --- /dev/null +++ b/arch/mips/lib/multi3.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include "libgcc.h" + +/* + * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that + * specific case only we'll implement it here. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981 + */ +#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7) + +/* multiply 64-bit values, low 64-bits returned */ +static inline long long notrace dmulu(long long a, long long b) +{ + long long res; + + asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */ +static inline long long notrace dmuhu(long long a, long long b) +{ + long long res; + + asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b)); + return res; +} + +/* multiply 128-bit values, low 128-bits returned */ +ti_type notrace __multi3(ti_type a, ti_type b) +{ + TWunion res, aa, bb; + + aa.ti = a; + bb.ti = b; + + /* + * a * b = (a.lo * b.lo) + * + 2^64 * (a.hi * b.lo + a.lo * b.hi) + * [+ 2^128 * (a.hi * b.hi)] + */ + res.s.low = dmulu(aa.s.low, bb.s.low); + res.s.high = dmuhu(aa.s.low, bb.s.low); + res.s.high += dmulu(aa.s.high, bb.s.low); + res.s.high += dmulu(aa.s.low, bb.s.high); + + return res.ti; +} +EXPORT_SYMBOL(__multi3); + +#endif /* 64BIT && CPU_MIPSR6 && GCC7 */ -- cgit v1.2.3 From 2a3e83c6f96c513f43ce5a8c9034608ea584a255 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Sat, 6 Jan 2018 02:00:13 +0100 Subject: x86/gart: Exclude GART aperture from vmcore On machines where the GART aperture is mapped over physical RAM /proc/vmcore contains the remapped range and reading it may cause hangs or reboots. In the past, the GART region was added into the resource map, implemented by commit 56dd669a138c ("[PATCH] Insert GART region into resource map") However, inserting the iomem_resource from the early GART code caused resource conflicts with some AGP drivers (bko#72201), which got avoided by reverting the patch in commit 707d4eefbdb3 ("Revert [PATCH] Insert GART region into resource map"). This revert introduced the /proc/vmcore bug. The vmcore ELF header is either prepared by the kernel (when using the kexec_file_load syscall) or by the kexec userspace (when using the kexec_load syscall). Since we no longer have the GART iomem resource, the userspace kexec has no way of knowing which region to exclude from the ELF header. Changes from v1 of this patch: Instead of excluding the aperture from the ELF header, this patch makes /proc/vmcore return zeroes in the second kernel when attempting to read the aperture region. This is done by reusing the gart_oldmem_pfn_is_ram infrastructure originally intended to exclude XEN balooned memory. This works for both, the kexec_file_load and kexec_load syscalls. [Note that the GART region is the same in the first and second kernels: regardless whether the first kernel fixed up the northbridge/bios setting and mapped the aperture over physical memory, the second kernel finds the northbridge properly configured by the first kernel and the aperture never overlaps with e820 memory because the second kernel has a fake e820 map created from the crashkernel memory regions. Thus, the second kernel keeps the aperture address/size as configured by the first kernel.] register_oldmem_pfn_is_ram can only register one callback and returns an error if the callback has been registered already. Since XEN used to be the only user of this function, it never checks the return value. Now that we have more than one user, I added a WARN_ON just in case agp, XEN, or any other future user of register_oldmem_pfn_is_ram were to step on each other's toes. Fixes: 707d4eefbdb3 ("Revert [PATCH] Insert GART region into resource map") Signed-off-by: Jiri Bohac Signed-off-by: Thomas Gleixner Cc: Baoquan He Cc: Toshi Kani Cc: David Airlie Cc: yinghai@kernel.org Cc: joro@8bytes.org Cc: kexec@lists.infradead.org Cc: Borislav Petkov Cc: Bjorn Helgaas Cc: Dave Young Cc: Vivek Goyal Link: https://lkml.kernel.org/r/20180106010013.73suskgxm7lox7g6@dwarf.suse.cz --- arch/x86/kernel/aperture_64.c | 46 ++++++++++++++++++++++++++++++++++++++++++- arch/x86/xen/mmu_hvm.c | 2 +- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index f5d92bc3b884..2c4d5ece7456 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Using 512M as goal, in case kexec will load kernel_big @@ -56,6 +57,33 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; +#ifdef CONFIG_PROC_VMCORE +/* + * If the first kernel maps the aperture over e820 RAM, the kdump kernel will + * use the same range because it will remain configured in the northbridge. + * Trying to dump this area via /proc/vmcore may crash the machine, so exclude + * it from vmcore. + */ +static unsigned long aperture_pfn_start, aperture_page_count; + +static int gart_oldmem_pfn_is_ram(unsigned long pfn) +{ + return likely((pfn < aperture_pfn_start) || + (pfn >= aperture_pfn_start + aperture_page_count)); +} + +static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +{ + aperture_pfn_start = aper_base >> PAGE_SHIFT; + aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; + WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +} +#else +static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +{ +} +#endif + /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ @@ -435,8 +463,16 @@ int __init gart_iommu_hole_init(void) out: if (!fix && !fallback_aper_force) { - if (last_aper_base) + if (last_aper_base) { + /* + * If this is the kdump kernel, the first kernel + * may have allocated the range over its e820 RAM + * and fixed up the northbridge + */ + exclude_from_vmcore(last_aper_base, last_aper_order); + return 1; + } return 0; } @@ -473,6 +509,14 @@ out: return 0; } + /* + * If this is the kdump kernel _and_ the first kernel did not + * configure the aperture in the northbridge, this range may + * overlap with the first kernel's memory. We can't access the + * range through vmcore even though it should be part of the dump. + */ + exclude_from_vmcore(aper_alloc, aper_order); + /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { int bus, dev_base, dev_limit; diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c index 2cfcfe4f6b2a..dd2ad82eee80 100644 --- a/arch/x86/xen/mmu_hvm.c +++ b/arch/x86/xen/mmu_hvm.c @@ -75,6 +75,6 @@ void __init xen_hvm_init_mmu_ops(void) if (is_pagetable_dying_supported()) pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; #ifdef CONFIG_PROC_VMCORE - register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); + WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram)); #endif } -- cgit v1.2.3 From c507babf10ead4d5c8cca704539b170752a8ac84 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 4 Jan 2018 18:24:33 +0000 Subject: KVM: arm/arm64: Check pagesize when allocating a hugepage at Stage 2 KVM only supports PMD hugepages at stage 2 but doesn't actually check that the provided hugepage memory pagesize is PMD_SIZE before populating stage 2 entries. In cases where the backing hugepage size is smaller than PMD_SIZE (such as when using contiguous hugepages), KVM can end up creating stage 2 mappings that extend beyond the supplied memory. Fix this by checking for the pagesize of userspace vma before creating PMD hugepage at stage 2. Fixes: 66b3923a1a0f77a ("arm64: hugetlb: add support for PTE contiguous bit") Signed-off-by: Punit Agrawal Cc: Marc Zyngier Cc: # v4.5+ Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b4b69c2d1012..9dea96380339 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1310,7 +1310,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma) && !logging_active) { + if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { -- cgit v1.2.3 From 90e1d8ccdbdb2384948c5f5067b0f28848ca339f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 8 Jan 2018 15:44:19 +0000 Subject: mmc: sdhci_f_sdh30: add ACPI support The Fujitsu SDH30 SDHCI controller may be described as a SCX0002 ACPI device on ACPI platforms incorporating the Socionext SynQuacer SoC. Given that mmc_of_parse() has already been made ACPI/DT agnostic, making the SDH30 driver ACPI capable is actually rather simple: all we need to do is make the call to sdhci_get_of_property() [which does not set any properties we care about] and the clock handling dependent on whether we are dealing with a DT device, and exposing the ACPI id via the platform_driver struct and the module metadata. Signed-off-by: Ard Biesheuvel Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- drivers/mmc/host/sdhci_f_sdh30.c | 52 ++++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index cc4fd07735a7..945ba50e6e6e 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -321,7 +321,7 @@ config MMC_SDHCI_BCM_KONA config MMC_SDHCI_F_SDH30 tristate "SDHCI support for Fujitsu Semiconductor F_SDH30" depends on MMC_SDHCI_PLTFM - depends on OF + depends on OF || ACPI help This selects the Secure Digital Host Controller Interface (SDHCI) Needed by some Fujitsu SoC for MMC / SD / SDIO support. diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c index 04ca0d33a521..485f7591fae4 100644 --- a/drivers/mmc/host/sdhci_f_sdh30.c +++ b/drivers/mmc/host/sdhci_f_sdh30.c @@ -10,9 +10,11 @@ * the Free Software Foundation, version 2 of the License. */ +#include #include #include #include +#include #include #include @@ -146,7 +148,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev) platform_set_drvdata(pdev, host); - sdhci_get_of_property(pdev); host->hw_name = "f_sdh30"; host->ops = &sdhci_f_sdh30_ops; host->irq = irq; @@ -158,25 +159,29 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev) goto err; } - priv->clk_iface = devm_clk_get(&pdev->dev, "iface"); - if (IS_ERR(priv->clk_iface)) { - ret = PTR_ERR(priv->clk_iface); - goto err; - } + if (dev_of_node(dev)) { + sdhci_get_of_property(pdev); - ret = clk_prepare_enable(priv->clk_iface); - if (ret) - goto err; + priv->clk_iface = devm_clk_get(&pdev->dev, "iface"); + if (IS_ERR(priv->clk_iface)) { + ret = PTR_ERR(priv->clk_iface); + goto err; + } - priv->clk = devm_clk_get(&pdev->dev, "core"); - if (IS_ERR(priv->clk)) { - ret = PTR_ERR(priv->clk); - goto err_clk; - } + ret = clk_prepare_enable(priv->clk_iface); + if (ret) + goto err; - ret = clk_prepare_enable(priv->clk); - if (ret) - goto err_clk; + priv->clk = devm_clk_get(&pdev->dev, "core"); + if (IS_ERR(priv->clk)) { + ret = PTR_ERR(priv->clk); + goto err_clk; + } + + ret = clk_prepare_enable(priv->clk); + if (ret) + goto err_clk; + } /* init vendor specific regs */ ctrl = sdhci_readw(host, F_SDH30_AHB_CONFIG); @@ -226,16 +231,27 @@ static int sdhci_f_sdh30_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF static const struct of_device_id f_sdh30_dt_ids[] = { { .compatible = "fujitsu,mb86s70-sdhci-3.0" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, f_sdh30_dt_ids); +#endif + +#ifdef CONFIG_ACPI +static const struct acpi_device_id f_sdh30_acpi_ids[] = { + { "SCX0002" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(acpi, f_sdh30_acpi_ids); +#endif static struct platform_driver sdhci_f_sdh30_driver = { .driver = { .name = "f_sdh30", - .of_match_table = f_sdh30_dt_ids, + .of_match_table = of_match_ptr(f_sdh30_dt_ids), + .acpi_match_table = ACPI_PTR(f_sdh30_acpi_ids), .pm = &sdhci_pltfm_pmops, }, .probe = sdhci_f_sdh30_probe, -- cgit v1.2.3 From b8155d3ff3ebbdfa10c6ec6c5f04b263670727e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 9 Jan 2018 12:39:10 +0300 Subject: mmc: tmio, renesas_sdhi: Remove unneeded NULL check The inconsistent NULL checking in this function causes static checker warnings. drivers/mmc/host/renesas_sdhi_sys_dmac.c:360 renesas_sdhi_sys_dmac_issue_tasklet_fn() error: we previously assumed 'host' could be null (see line 351) On reviewing this code, "host" can't ever be NULL so we can just remove the check. Signed-off-by: Dan Carpenter Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_sys_dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c index c8a74b2dee00..82d757c480b2 100644 --- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c @@ -348,7 +348,7 @@ static void renesas_sdhi_sys_dmac_issue_tasklet_fn(unsigned long priv) spin_lock_irq(&host->lock); - if (host && host->data) { + if (host->data) { if (host->data->flags & MMC_DATA_READ) chan = host->chan_rx; else -- cgit v1.2.3 From 2a609abe71ca59e4bd7139e161eaca2144ae6f2e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 11 Jan 2018 15:51:58 +0200 Subject: sdhci: Advertise 2.0v supply on SDIO host controller On Intel Edison the Broadcom Wi-Fi card, which is connected to SDIO, requires 2.0v, while the host, according to Intel Merrifield TRM, supports 1.8v supply only. The card announces itself as mmc2: new ultra high speed DDR50 SDIO card at address 0001 Introduce a custom OCR mask for SDIO host controller on Intel Merrifield and add a special case to sdhci_set_power_noreg() to override 2.0v supply by enforcing 1.8v power choice. Signed-off-by: Andy Shevchenko Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 2 ++ drivers/mmc/host/sdhci.c | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index c5b229b46314..00fa7a36b336 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -930,6 +930,8 @@ static int intel_mrfld_mmc_probe_slot(struct sdhci_pci_slot *slot) slot->host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; break; case INTEL_MRFLD_SDIO: + /* Advertise 2.0v for compatibility with the SDIO card's OCR */ + slot->host->ocr_mask = MMC_VDD_20_21 | MMC_VDD_165_195; slot->host->mmc->caps |= MMC_CAP_NONREMOVABLE | MMC_CAP_POWER_OFF_CARD; break; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index e9290a3439d5..80b1a59bc3c5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1434,6 +1434,13 @@ void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, if (mode != MMC_POWER_OFF) { switch (1 << vdd) { case MMC_VDD_165_195: + /* + * Without a regulator, SDHCI does not support 2.0v + * so we only get here if the driver deliberately + * added the 2.0v range to ocr_avail. Map it to 1.8v + * for the purpose of turning on the power. + */ + case MMC_VDD_20_21: pwr = SDHCI_POWER_180; break; case MMC_VDD_29_30: -- cgit v1.2.3 From 2178790baa8639a1f516f91685ae64cc8d09fee7 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Jan 2018 13:46:51 -0700 Subject: perf evsel: Fix incorrect handling of type _TERM_DRV_CFG Commit ("d0565132605f perf evsel: Enable type checking for perf_evsel_config_term types") assumes PERF_EVSEL__CONFIG_TERM_DRV_CFG isn't used and as such adds a BUG_ON(). Since the enumeration type is used in macro ADD_CONFIG_TERM() the change break CoreSight trace acquisition. This patch restores the original code. Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: d0565132605f ("perf evsel: Enable type checking for perf_evsel_config_term types") Link: http://lkml.kernel.org/r/1515617211-32024-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d934f04e3110..4eea3b404507 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -781,7 +781,7 @@ static void apply_config_terms(struct perf_evsel *evsel, attr->write_backward = term->val.overwrite ? 1 : 0; break; case PERF_EVSEL__CONFIG_TERM_DRV_CFG: - BUG_ON(1); + break; default: break; } -- cgit v1.2.3 From 33f782c49a95aac0c88203b29e4e276c2b12dad6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Jan 2018 11:31:25 +0100 Subject: null_blk: remove explicit 'select FAULT_INJECTION' Selecting FAULT_INJECTION causes a Kconfig warning when CONFIG_DEBUG_KERNEL is not set: warning: (BLK_DEV_NULL_BLK && DRM_I915_SELFTEST) selects FAULT_INJECTION which has unmet direct dependencies (DEBUG_KERNEL) The other drivers that use FAULT_INJECTION tend to have a separate Kconfig symbol for turning on that feature, so let's do the same thing here. This may add a bit more complexity than we like, but it avoids the warning and is more consistent with the rest of the kernel. Fixes: 93b570464cce ("null_blk: add option for managing IO timeouts") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 5 ++++- drivers/block/null_blk.c | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 622d9a2c8dae..ad9b687a236a 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -19,7 +19,10 @@ if BLK_DEV config BLK_DEV_NULL_BLK tristate "Null test block driver" select CONFIGFS_FS - select FAULT_INJECTION + +config BLK_DEV_NULL_BLK_FAULT_INJECTION + bool "Support fault injection for Null test block driver" + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION config BLK_DEV_FD tristate "Normal floppy disk support" diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 30ec0ac11a47..5b94e530570c 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -27,7 +27,9 @@ #define TICKS_PER_SEC 50ULL #define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC) +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static DECLARE_FAULT_ATTR(null_timeout_attr); +#endif static inline u64 mb_per_tick(int mbps) { @@ -165,8 +167,10 @@ static int g_home_node = NUMA_NO_NODE; module_param_named(home_node, g_home_node, int, S_IRUGO); MODULE_PARM_DESC(home_node, "Home node for the device"); +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static char g_timeout_str[80]; module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); +#endif static int g_queue_mode = NULL_Q_MQ; @@ -1372,8 +1376,10 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req) static bool should_timeout_request(struct request *rq) { +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION if (g_timeout_str[0]) return should_fail(&null_timeout_attr, 1); +#endif return false; } @@ -1655,6 +1661,7 @@ static void null_validate_conf(struct nullb_device *dev) static bool null_setup_fault(void) { +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION if (!g_timeout_str[0]) return true; @@ -1662,6 +1669,7 @@ static bool null_setup_fault(void) return false; null_timeout_attr.verbose = 0; +#endif return true; } -- cgit v1.2.3 From ab271bd4dfd568060ffcf5a21b667c7c5df7ab99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:26:59 +0100 Subject: x86: kvm: propagate register_shrinker return code Patch "mm,vmscan: mark register_shrinker() as __must_check" is queued for 4.16 in linux-mm and adds a warning about the unchecked call to register_shrinker: arch/x86/kvm/mmu.c:5485:2: warning: ignoring return value of 'register_shrinker', declared with attribute warn_unused_result [-Wunused-result] This changes the kvm_mmu_module_init() function to fail itself when the call to register_shrinker fails. Signed-off-by: Arnd Bergmann Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e577bacd4bd0..2b8eb4da4d08 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5466,30 +5466,34 @@ static void mmu_destroy_caches(void) int kvm_mmu_module_init(void) { + int ret = -ENOMEM; + kvm_mmu_clear_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), 0, SLAB_ACCOUNT, NULL); if (!pte_list_desc_cache) - goto nomem; + goto out; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) - goto nomem; + goto out; if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) - goto nomem; + goto out; - register_shrinker(&mmu_shrinker); + ret = register_shrinker(&mmu_shrinker); + if (ret) + goto out; return 0; -nomem: +out: mmu_destroy_caches(); - return -ENOMEM; + return ret; } /* -- cgit v1.2.3 From bd89525a823ce6edddcedbe9aed79faa1b9cf544 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jan 2018 16:55:24 +0100 Subject: KVM: x86: emulate #UD while in guest mode This reverts commits ae1f57670703656cc9f293722c3b8b6782f8ab3f and ac9b305caa0df6f5b75d294e4b86c1027648991e. If the hardware doesn't support MOVBE, but L0 sets CPUID.01H:ECX.MOVBE in L1's emulated CPUID information, then L1 is likely to pass that CPUID bit through to L2. L2 will expect MOVBE to work, but if L1 doesn't intercept #UD, then any MOVBE instruction executed in L2 will raise #UD, and the exception will be delivered in L2. Commit ac9b305caa0df6f5b75d294e4b86c1027648991e is a better and more complete version of ae1f57670703 ("KVM: nVMX: Do not emulate #UD while in guest mode"); however, neither considers the above case. Suggested-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 9 +-------- arch/x86/kvm/vmx.c | 5 +---- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index bb31c801f1fc..3158dac87f82 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -361,7 +361,6 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; - u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -372,14 +371,9 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; - /* No need to intercept #UD if L1 doesn't intercept it */ - h_intercept_exceptions = - h->intercept_exceptions & ~(1U << UD_VECTOR); - c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = - h_intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2202,7 +2196,6 @@ static int ud_interception(struct vcpu_svm *svm) { int er; - WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5c14d65f676a..427fd3200dd8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1905,8 +1905,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; - else - eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -5917,7 +5915,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; -- cgit v1.2.3 From 4d885f2d3b778681f22ab7df9b4ccd104ffcd7f4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jan 2018 17:05:23 +0100 Subject: sound: Remove unused register_sound_midi() and co These helpers are no longer used after the removal of the legacy OSS drivers. Let's clean up. Signed-off-by: Takashi Iwai --- include/linux/sound.h | 2 -- sound/sound_core.c | 35 ----------------------------------- 2 files changed, 37 deletions(-) diff --git a/include/linux/sound.h b/include/linux/sound.h index 3c6d393c7f29..ec85b7a1f8d1 100644 --- a/include/linux/sound.h +++ b/include/linux/sound.h @@ -12,11 +12,9 @@ struct device; extern int register_sound_special(const struct file_operations *fops, int unit); extern int register_sound_special_device(const struct file_operations *fops, int unit, struct device *dev); extern int register_sound_mixer(const struct file_operations *fops, int dev); -extern int register_sound_midi(const struct file_operations *fops, int dev); extern int register_sound_dsp(const struct file_operations *fops, int dev); extern void unregister_sound_special(int unit); extern void unregister_sound_mixer(int unit); -extern void unregister_sound_midi(int unit); extern void unregister_sound_dsp(int unit); #endif /* _LINUX_SOUND_H */ diff --git a/sound/sound_core.c b/sound/sound_core.c index 99b73c675743..8b026b66cf18 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -452,26 +452,6 @@ int register_sound_mixer(const struct file_operations *fops, int dev) EXPORT_SYMBOL(register_sound_mixer); -/** - * register_sound_midi - register a midi device - * @fops: File operations for the driver - * @dev: Unit number to allocate - * - * Allocate a midi device. Unit is the number of the midi device requested. - * Pass -1 to request the next free midi unit. - * - * Return: On success, the allocated number is returned. On failure, - * a negative error code is returned. - */ - -int register_sound_midi(const struct file_operations *fops, int dev) -{ - return sound_insert_unit(&chains[2], fops, dev, 2, 130, - "midi", S_IRUSR | S_IWUSR, NULL); -} - -EXPORT_SYMBOL(register_sound_midi); - /* * DSP's are registered as a triple. Register only one and cheat * in open - see below. @@ -532,21 +512,6 @@ void unregister_sound_mixer(int unit) EXPORT_SYMBOL(unregister_sound_mixer); -/** - * unregister_sound_midi - unregister a midi device - * @unit: unit number to allocate - * - * Release a sound device that was allocated with register_sound_midi(). - * The unit passed is the return value from the register function. - */ - -void unregister_sound_midi(int unit) -{ - sound_remove_unit(&chains[2], unit); -} - -EXPORT_SYMBOL(unregister_sound_midi); - /** * unregister_sound_dsp - unregister a DSP device * @unit: unit number to allocate -- cgit v1.2.3 From 8ac60e733f7c9c41e4c125619a2f8390aca9d4db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jan 2018 17:10:34 +0100 Subject: sound: Remove leftover msnd init declarations Some obsoleted functions are still declared in sound_core.c. Get rid of them. Signed-off-by: Takashi Iwai --- sound/sound_core.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/sound_core.c b/sound/sound_core.c index 8b026b66cf18..b4efb22db561 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -119,13 +119,6 @@ struct sound_unit char name[32]; }; -#ifdef CONFIG_SOUND_MSNDCLAS -extern int msnd_classic_init(void); -#endif -#ifdef CONFIG_SOUND_MSNDPIN -extern int msnd_pinnacle_init(void); -#endif - /* * By default, OSS sound_core claims full legacy minor range (0-255) * of SOUND_MAJOR to trap open attempts to any sound minor and -- cgit v1.2.3 From c27d53fb445f2d93a1918c3dd7344770b0cd865b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Jan 2018 13:41:21 -0800 Subject: blk-mq: Reduce the number of if-statements in blk_mq_mark_tag_wait() This patch does not change any functionality but makes the blk_mq_mark_tag_wait() code slightly easier to read. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-mq.c | 69 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8000ba6db07d..afccd0848d6f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1104,58 +1104,59 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, struct request *rq) { struct blk_mq_hw_ctx *this_hctx = *hctx; - bool shared_tags = (this_hctx->flags & BLK_MQ_F_TAG_SHARED) != 0; struct sbq_wait_state *ws; wait_queue_entry_t *wait; bool ret; - if (!shared_tags) { + if (!(this_hctx->flags & BLK_MQ_F_TAG_SHARED)) { if (!test_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state)) set_bit(BLK_MQ_S_SCHED_RESTART, &this_hctx->state); - } else { - wait = &this_hctx->dispatch_wait; - if (!list_empty_careful(&wait->entry)) - return false; - spin_lock(&this_hctx->lock); - if (!list_empty(&wait->entry)) { - spin_unlock(&this_hctx->lock); - return false; - } + /* + * It's possible that a tag was freed in the window between the + * allocation failure and adding the hardware queue to the wait + * queue. + * + * Don't clear RESTART here, someone else could have set it. + * At most this will cost an extra queue run. + */ + return blk_mq_get_driver_tag(rq, hctx, false); + } + + wait = &this_hctx->dispatch_wait; + if (!list_empty_careful(&wait->entry)) + return false; - ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx); - add_wait_queue(&ws->wait, wait); + spin_lock(&this_hctx->lock); + if (!list_empty(&wait->entry)) { + spin_unlock(&this_hctx->lock); + return false; } + ws = bt_wait_ptr(&this_hctx->tags->bitmap_tags, this_hctx); + add_wait_queue(&ws->wait, wait); + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait * queue. */ ret = blk_mq_get_driver_tag(rq, hctx, false); - - if (!shared_tags) { - /* - * Don't clear RESTART here, someone else could have set it. - * At most this will cost an extra queue run. - */ - return ret; - } else { - if (!ret) { - spin_unlock(&this_hctx->lock); - return false; - } - - /* - * We got a tag, remove ourselves from the wait queue to ensure - * someone else gets the wakeup. - */ - spin_lock_irq(&ws->wait.lock); - list_del_init(&wait->entry); - spin_unlock_irq(&ws->wait.lock); + if (!ret) { spin_unlock(&this_hctx->lock); - return true; + return false; } + + /* + * We got a tag, remove ourselves from the wait queue to ensure + * someone else gets the wakeup. + */ + spin_lock_irq(&ws->wait.lock); + list_del_init(&wait->entry); + spin_unlock_irq(&ws->wait.lock); + spin_unlock(&this_hctx->lock); + + return true; } bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, -- cgit v1.2.3 From f32ab7547161b9fa7ebfbc4f18ea1eb3fd49fe25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FChristian=3D20K=3DC3=3DB6nig=3F=3D?= Date: Thu, 11 Jan 2018 14:23:29 +0100 Subject: x86/PCI: Add "pci=big_root_window" option for AMD 64-bit windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only try to enable a 64-bit window on AMD CPUs when "pci=big_root_window" is specified. This taints the kernel because the new 64-bit window uses address space we don't know anything about, and it may contain unreported devices or memory that would conflict with the window. The pci_amd_enable_64bit_bar() quirk that enables the window is specific to AMD CPUs. The generic solution would be to have the firmware enable the window and describe it in the host bridge's _CRS method, or at least describe it in the _PRS method so the OS would have the option of enabling it. Signed-off-by: Christian König [bhelgaas: changelog, extend doc, mention taint in dmesg] Signed-off-by: Bjorn Helgaas --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/common.c | 5 +++++ arch/x86/pci/fixup.c | 7 ++++++- 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1..619638362416 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3094,6 +3094,12 @@ pcie_scan_all Scan all possible PCIe devices. Otherwise we only look for one device below a PCIe downstream port. + big_root_window Try to add a big 64bit memory window to the PCIe + root complex on AMD CPUs. Some GFX hardware + can resize a BAR to allow access to all VRAM. + Adding the window is slightly risky (it may + conflict with unreported devices), so this + taints the kernel. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7a5d6695abd3..eb66fa9cd0fc 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -38,6 +38,7 @@ do { \ #define PCI_NOASSIGN_ROMS 0x80000 #define PCI_ROOT_NO_CRS 0x100000 #define PCI_NOASSIGN_BARS 0x200000 +#define PCI_BIG_ROOT_WINDOW 0x400000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7a5350d08cef..563049c483a1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str) } else if (!strcmp(str, "nocrs")) { pci_probe |= PCI_ROOT_NO_CRS; return NULL; +#ifdef CONFIG_PHYS_ADDR_T_64BIT + } else if (!strcmp(str, "big_root_window")) { + pci_probe |= PCI_BIG_ROOT_WINDOW; + return NULL; +#endif } else if (!strcmp(str, "earlydump")) { pci_early_dump_regs = 1; return NULL; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index e663d6bf1328..8bad19c7473d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -667,6 +667,9 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) struct resource *res, *conflict; struct pci_dev *other; + if (!(pci_probe & PCI_BIG_ROOT_WINDOW)) + return; + /* Check that we are the only device of that type */ other = pci_get_device(dev->vendor, dev->device, NULL); if (other != dev || @@ -714,7 +717,9 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) res->start = conflict->end + 1; } - dev_info(&dev->dev, "adding root bus resource %pR\n", res); + dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", + res); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; -- cgit v1.2.3 From b8626f1dc29d3eee444bfaa92146ec7b291ef41c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 11 Jan 2018 14:47:40 +0100 Subject: usb: misc: usb3503: make sure reset is low for at least 100us When using a GPIO which is high by default, and initialize the driver in USB Hub mode, initialization fails with: [ 111.757794] usb3503 0-0008: SP_ILOCK failed (-5) The reason seems to be that the chip is not properly reset. Probe does initialize reset low, however some lines later the code already set it back high, which is not long enouth. Make sure reset is asserted for at least 100us by inserting a delay after initializing the reset pin during probe. Signed-off-by: Stefan Agner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb3503.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c index 465dbf68b463..f723f7b8c9ac 100644 --- a/drivers/usb/misc/usb3503.c +++ b/drivers/usb/misc/usb3503.c @@ -279,6 +279,8 @@ static int usb3503_probe(struct usb3503 *hub) if (gpio_is_valid(hub->gpio_reset)) { err = devm_gpio_request_one(dev, hub->gpio_reset, GPIOF_OUT_INIT_LOW, "usb3503 reset"); + /* Datasheet defines a hardware reset to be at least 100us */ + usleep_range(100, 10000); if (err) { dev_err(dev, "unable to request GPIO %d as reset pin (%d)\n", -- cgit v1.2.3 From 1a2e91e795def04e15fac87b8e16b635691d0b82 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 9 Jan 2018 13:27:17 -0600 Subject: Documentation: usb: fix typo in UVC gadgetfs config command This seems to be a copy&paste error. With the fix the uvc gadget now can be created by following the instrucitons. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget-testing.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt index 441a4b9b666f..5908a21fddb6 100644 --- a/Documentation/usb/gadget-testing.txt +++ b/Documentation/usb/gadget-testing.txt @@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value in each line. The rules stated above are best illustrated with an example: # mkdir functions/uvc.usb0/control/header/h -# cd functions/uvc.usb0/control/header/h +# cd functions/uvc.usb0/control/ # ln -s header/h class/fs # ln -s header/h class/ss # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p -- cgit v1.2.3 From 0026cef067d2962ed064b974e07f017233d5bd5a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 11 Jan 2018 09:18:59 +0100 Subject: PM / wakeup: Print warn if device gets enabled as wakeup source during sleep In general, wakeup settings are not supposed to be changed during any of the system wide PM phases. The reason is simply that it would break guarantees provided by the PM core, to properly act on active wakeup sources. However, there are exceptions to when, in particular, disabling a device as wakeup source makes sense. For example, in cases when a driver realizes that its device is dead during system suspend. For these scenarios, we don't need to care about acting on the wakeup source correctly, because a dead device shouldn't deliver wakeup signals. To this reasoning and to help users to properly manage wakeup settings, let's print a warning in cases someone calls device_wakeup_enable() during system sleep. Suggested-by: Rafael J. Wysocki Signed-off-by: Ulf Hansson [ rjw: Message to be printed ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index e73a081c6397..ea01621ed769 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -19,6 +19,11 @@ #include "power.h" +#ifndef CONFIG_SUSPEND +suspend_state_t pm_suspend_target_state; +#define pm_suspend_target_state (PM_SUSPEND_ON) +#endif + /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. @@ -268,6 +273,9 @@ int device_wakeup_enable(struct device *dev) if (!dev || !dev->power.can_wakeup) return -EINVAL; + if (pm_suspend_target_state != PM_SUSPEND_ON) + dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__); + ws = wakeup_source_register(dev_name(dev)); if (!ws) return -ENOMEM; -- cgit v1.2.3 From 29a5a6d7082427371519ae1e186d9e35612801fb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Jan 2018 02:13:58 +0100 Subject: ACPI / PM: Use Low Power S0 Idle on more systems Some systems don't support the ACPI_LPS0_ENTRY and ACPI_LPS0_EXIT functions in their Low Power S0 Idle _DSM, but still expect EC events to be processed in the suspend-to-idle state for power button wakeup (among other things) to work. Surface Pro3 turns out to be one of them. Fortunately, it still provides Low Power S0 Idle _DSM with the screen on/off functions supported, so modify the ACPI suspend-to-idle to use the Low Power S0 Idle code path for all systems supporting the ACPI_LPS0_ENTRY and ACPI_LPS0_EXIT or the ACPI_LPS0_SCREEN_OFF and ACPI_LPS0_SCREEN_ON functions in their Low Power S0 Idle _DSM. Potentially, that will cause more systems to use suspend-to-idle by default, so some future corrections may be necessary if it leads to issues, but let it remain more straightforward for now. Link: https://bugzilla.kernel.org/show_bug.cgi?id=198389#add_comment Reported-by: Valentin Manea Signed-off-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Tested-by: Valentin Manea --- drivers/acpi/sleep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 15cd862a87c2..46cde0912762 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -707,7 +707,8 @@ static const struct acpi_device_id lps0_device_ids[] = { #define ACPI_LPS0_ENTRY 5 #define ACPI_LPS0_EXIT 6 -#define ACPI_S2IDLE_FUNC_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT)) +#define ACPI_LPS0_SCREEN_MASK ((1 << ACPI_LPS0_SCREEN_OFF) | (1 << ACPI_LPS0_SCREEN_ON)) +#define ACPI_LPS0_PLATFORM_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT)) static acpi_handle lps0_device_handle; static guid_t lps0_dsm_guid; @@ -910,7 +911,8 @@ static int lps0_device_attach(struct acpi_device *adev, if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) { char bitmask = *(char *)out_obj->buffer.pointer; - if ((bitmask & ACPI_S2IDLE_FUNC_MASK) == ACPI_S2IDLE_FUNC_MASK) { + if ((bitmask & ACPI_LPS0_PLATFORM_MASK) == ACPI_LPS0_PLATFORM_MASK || + (bitmask & ACPI_LPS0_SCREEN_MASK) == ACPI_LPS0_SCREEN_MASK) { lps0_dsm_func_mask = bitmask; lps0_device_handle = adev->handle; /* -- cgit v1.2.3 From 19351f340765ebef48d07eade8ffb5f6f1118244 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Jan 2018 13:26:35 +0100 Subject: platform/x86: surfacepro3: Support for wakeup from suspend-to-idle Modify surface_button_notify() to make it wake up the system from suspend-to-idle (by reporting "hard" wakeup events while suspended) and add wakeup initialization to surface_button_add() for wakeup events reported by this driver to work at all. Link: https://bugzilla.kernel.org/show_bug.cgi?id=198389 Reported-by: Valentin Manea Signed-off-by: Rafael J. Wysocki Acked-by: Andy Shevchenko Tested-by: Valentin Manea --- drivers/platform/x86/surfacepro3_button.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/surfacepro3_button.c b/drivers/platform/x86/surfacepro3_button.c index 6505c97705e1..1b491690ce07 100644 --- a/drivers/platform/x86/surfacepro3_button.c +++ b/drivers/platform/x86/surfacepro3_button.c @@ -119,7 +119,7 @@ static void surface_button_notify(struct acpi_device *device, u32 event) if (key_code == KEY_RESERVED) return; if (pressed) - pm_wakeup_event(&device->dev, 0); + pm_wakeup_dev_event(&device->dev, 0, button->suspended); if (button->suspended) return; input_report_key(input, key_code, pressed?1:0); @@ -185,6 +185,8 @@ static int surface_button_add(struct acpi_device *device) error = input_register_device(input); if (error) goto err_free_input; + + device_init_wakeup(&device->dev, true); dev_info(&device->dev, "%s [%s]\n", name, acpi_device_bid(device)); return 0; -- cgit v1.2.3 From dbd49b85eec7eb6d7ae61bad8306d5cdd85c142d Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 10 Jan 2018 11:38:51 -0800 Subject: cpufreq: intel_pstate: Replace bxt_funcs with core_funcs Since core_funcs and bxt_funcs have same set of callbacks, replace bxt_funcs with core_funcs. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 93a0e88bef76..3b6616b15c59 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1595,15 +1595,6 @@ static const struct pstate_funcs knl_funcs = { .get_val = core_get_val, }; -static const struct pstate_funcs bxt_funcs = { - .get_max = core_get_max_pstate, - .get_max_physical = core_get_max_pstate_physical, - .get_min = core_get_min_pstate, - .get_turbo = core_get_turbo_pstate, - .get_scaling = core_get_scaling, - .get_val = core_get_val, -}; - #define ICPU(model, policy) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\ (unsigned long)&policy } @@ -1627,8 +1618,8 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_funcs), ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_funcs), - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, bxt_funcs), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), + ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -- cgit v1.2.3 From d8de7a44e11f98f2c2a4c2e12e79ba9ffb839306 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 10 Jan 2018 11:38:52 -0800 Subject: cpufreq: intel_pstate: Add Skylake servers support Currently intel_pstate can function only in HWP mode on Skylake servers. When HWP feature is not enabled on the processor then acpi-cpufreq is driver is used. Based on the power and performance tests using intel_pstate scaling algorithm the results are comparable. But intel_pstate brings in additional features: - Display of turbo frequency range, which many users like to see - Place limits in the turbo frequency range when platform allows Since these tests are done only using non PID algorithm introduced in kernel version 4.14, this patch is not a backport candidate. So each user has to carefully weigh the benefits before he backports. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3b6616b15c59..7edf7a0e5a96 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1620,6 +1620,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -- cgit v1.2.3 From 03a551734cfc2b93f83950a595974e3c9cbd82fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FChristian=3D20K=3DC3=3DB6nig=3F=3D?= Date: Thu, 11 Jan 2018 14:23:30 +0100 Subject: x86/PCI: Move and shrink AMD 64-bit window to avoid conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid problems with BIOS implementations which don't report all used resources to the OS by only allocating a 256GB window directly below the hardware limit (from the BKDG, sec 2.4.6). Fixes a silent reboot loop reported by Aaro Koskinen on an AMD-based MSI MS-7699/760GA-P43(FX) system. This was apparently caused by RAM or other unreported hardware that conflicted with the new window. Link: https://support.amd.com/TechDocs/49125_15h_Models_30h-3Fh_BKDG.pdf Link: https://lkml.kernel.org/r/20180105220412.fzpwqe4zljdawr36@darkstar.musicnaut.iki.fi Fixes: fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") Reported-by: Aaro Koskinen Signed-off-by: Christian König [bhelgaas: changelog, comment, Fixes:] Signed-off-by: Bjorn Helgaas --- arch/x86/pci/fixup.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 8bad19c7473d..f6a26e3cb476 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -662,10 +662,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); */ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) { - unsigned i; u32 base, limit, high; - struct resource *res, *conflict; struct pci_dev *other; + struct resource *res; + unsigned i; + int r; if (!(pci_probe & PCI_BIG_ROOT_WINDOW)) return; @@ -702,19 +703,20 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) if (!res) return; + /* + * Allocate a 256GB window directly below the 0xfd00000000 hardware + * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6). + */ res->name = "PCI Bus 0000:00"; res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_WINDOW; - res->start = 0x100000000ull; + res->start = 0xbd00000000ull; res->end = 0xfd00000000ull - 1; - /* Just grab the free area behind system memory for this */ - while ((conflict = request_resource_conflict(&iomem_resource, res))) { - if (conflict->end >= res->end) { - kfree(res); - return; - } - res->start = conflict->end + 1; + r = request_resource(&iomem_resource, res); + if (r) { + kfree(res); + return; } dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", -- cgit v1.2.3 From ef9e1cdf419a37065364edb47bd6dd9aee1bcb7d Mon Sep 17 00:00:00 2001 From: "eajames@linux.vnet.ibm.com" Date: Thu, 11 Jan 2018 11:20:10 -0600 Subject: hwmon: (pmbus/cffps) Add led class device for power supply fault led This power supply device doesn't correctly manage it's own fault led. Add an led class device and register it so that userspace can manage power supply fault led as necessary. Signed-off-by: Eddie James Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ibm-cffps.c | 96 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c index 2d6f4f41e736..93d9a9ea112b 100644 --- a/drivers/hwmon/pmbus/ibm-cffps.c +++ b/drivers/hwmon/pmbus/ibm-cffps.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #define CFFPS_CCIN_CMD 0xBD #define CFFPS_FW_CMD_START 0xFA #define CFFPS_FW_NUM_BYTES 4 +#define CFFPS_SYS_CONFIG_CMD 0xDA #define CFFPS_INPUT_HISTORY_CMD 0xD6 #define CFFPS_INPUT_HISTORY_SIZE 100 @@ -39,6 +41,11 @@ #define CFFPS_MFR_VAUX_FAULT BIT(6) #define CFFPS_MFR_CURRENT_SHARE_WARNING BIT(7) +#define CFFPS_LED_BLINK BIT(0) +#define CFFPS_LED_ON BIT(1) +#define CFFPS_LED_OFF BIT(2) +#define CFFPS_BLINK_RATE_MS 250 + enum { CFFPS_DEBUGFS_INPUT_HISTORY = 0, CFFPS_DEBUGFS_FRU, @@ -63,6 +70,10 @@ struct ibm_cffps { struct ibm_cffps_input_history input_history; int debugfs_entries[CFFPS_DEBUGFS_NUM_ENTRIES]; + + char led_name[32]; + u8 led_state; + struct led_classdev led; }; #define to_psu(x, y) container_of((x), struct ibm_cffps, debugfs_entries[(y)]) @@ -258,6 +269,69 @@ static int ibm_cffps_read_word_data(struct i2c_client *client, int page, return rc; } +static void ibm_cffps_led_brightness_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + int rc; + struct ibm_cffps *psu = container_of(led_cdev, struct ibm_cffps, led); + + if (brightness == LED_OFF) { + psu->led_state = CFFPS_LED_OFF; + } else { + brightness = LED_FULL; + if (psu->led_state != CFFPS_LED_BLINK) + psu->led_state = CFFPS_LED_ON; + } + + rc = i2c_smbus_write_byte_data(psu->client, CFFPS_SYS_CONFIG_CMD, + psu->led_state); + if (rc < 0) + return; + + led_cdev->brightness = brightness; +} + +static int ibm_cffps_led_blink_set(struct led_classdev *led_cdev, + unsigned long *delay_on, + unsigned long *delay_off) +{ + int rc; + struct ibm_cffps *psu = container_of(led_cdev, struct ibm_cffps, led); + + psu->led_state = CFFPS_LED_BLINK; + + if (led_cdev->brightness == LED_OFF) + return 0; + + rc = i2c_smbus_write_byte_data(psu->client, CFFPS_SYS_CONFIG_CMD, + CFFPS_LED_BLINK); + if (rc < 0) + return rc; + + *delay_on = CFFPS_BLINK_RATE_MS; + *delay_off = CFFPS_BLINK_RATE_MS; + + return 0; +} + +static void ibm_cffps_create_led_class(struct ibm_cffps *psu) +{ + int rc; + struct i2c_client *client = psu->client; + struct device *dev = &client->dev; + + snprintf(psu->led_name, sizeof(psu->led_name), "%s-%02x", client->name, + client->addr); + psu->led.name = psu->led_name; + psu->led.max_brightness = LED_FULL; + psu->led.brightness_set = ibm_cffps_led_brightness_set; + psu->led.blink_set = ibm_cffps_led_blink_set; + + rc = devm_led_classdev_register(dev, &psu->led); + if (rc) + dev_warn(dev, "failed to register led class: %d\n", rc); +} + static struct pmbus_driver_info ibm_cffps_info = { .pages = 1, .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_IOUT | @@ -286,6 +360,20 @@ static int ibm_cffps_probe(struct i2c_client *client, if (rc) return rc; + /* + * Don't fail the probe if there isn't enough memory for leds and + * debugfs. + */ + psu = devm_kzalloc(&client->dev, sizeof(*psu), GFP_KERNEL); + if (!psu) + return 0; + + psu->client = client; + mutex_init(&psu->input_history.update_lock); + psu->input_history.last_update = jiffies - HZ; + + ibm_cffps_create_led_class(psu); + /* Don't fail the probe if we can't create debugfs */ debugfs = pmbus_get_debugfs_dir(client); if (!debugfs) @@ -295,14 +383,6 @@ static int ibm_cffps_probe(struct i2c_client *client, if (!ibm_cffps_dir) return 0; - psu = devm_kzalloc(&client->dev, sizeof(*psu), GFP_KERNEL); - if (!psu) - return 0; - - psu->client = client; - mutex_init(&psu->input_history.update_lock); - psu->input_history.last_update = jiffies - HZ; - for (i = 0; i < CFFPS_DEBUGFS_NUM_ENTRIES; ++i) psu->debugfs_entries[i] = i; -- cgit v1.2.3 From 445b69e3b75e42362a5bdc13c8b8f61599e2228a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 10 Jan 2018 14:49:39 -0800 Subject: x86/pti: Make unpoison of pgd for trusted boot work for real The inital fix for trusted boot and PTI potentially misses the pgd clearing if pud_alloc() sets a PGD. It probably works in *practice* because for two adjacent calls to map_tboot_page() that share a PGD entry, the first will clear NX, *then* allocate and set the PGD (without NX clear). The second call will *not* allocate but will clear the NX bit. Defer the NX clearing to a point after it is known that all top-level allocations have occurred. Add a comment to clarify why. [ tglx: Massaged changelog ] Fixes: 262b6b30087 ("x86/tboot: Unbreak tboot with PTI enabled") Signed-off-by: Dave Hansen Signed-off-by: Thomas Gleixner Reviewed-by: Andrea Arcangeli Cc: Jon Masters Cc: "Tim Chen" Cc: gnomes@lxorguk.ukuu.org.uk Cc: peterz@infradead.org Cc: ning.sun@intel.com Cc: tboot-devel@lists.sourceforge.net Cc: andi@firstfloor.org Cc: luto@kernel.org Cc: law@redhat.com Cc: pbonzini@redhat.com Cc: torvalds@linux-foundation.org Cc: gregkh@linux-foundation.org Cc: dwmw@amazon.co.uk Cc: nickc@redhat.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180110224939.2695CD47@viggo.jf.intel.com --- arch/x86/kernel/tboot.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 75869a4b6c41..a2486f444073 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -127,7 +127,6 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, p4d = p4d_alloc(&tboot_mm, pgd, vaddr); if (!p4d) return -1; - pgd->pgd &= ~_PAGE_NX; pud = pud_alloc(&tboot_mm, p4d, vaddr); if (!pud) return -1; @@ -139,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in p4d_alloc() _or_ + * pud_alloc() depending on 4/5-level paging. + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } -- cgit v1.2.3 From 8978cc921fc7fad3f4d6f91f1da01352aeeeff25 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 9 Jan 2018 11:41:10 +0200 Subject: {net,ib}/mlx5: Don't disable local loopback multicast traffic when needed There are systems platform information management interfaces (such as HOST2BMC) for which we cannot disable local loopback multicast traffic. Separate disable_local_lb_mc and disable_local_lb_uc capability bits so driver will not disable multicast loopback traffic if not supported. (It is expected that Firmware will not set disable_local_lb_mc if HOST2BMC is running for example.) Function mlx5_nic_vport_update_local_lb will do best effort to disable/enable UC/MC loopback traffic and return success only in case it succeeded to changed all allowed by Firmware. Adapt mlx5_ib and mlx5e to support the new cap bits. Fixes: 2c43c5a036be ("net/mlx5e: Enable local loopback in loopback selftest") Fixes: c85023e153e3 ("IB/mlx5: Add raw ethernet local loopback support") Fixes: bded747bb432 ("net/mlx5: Add raw ethernet local loopback firmware command") Signed-off-by: Eran Ben Elisha Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 9 +++++--- .../net/ethernet/mellanox/mlx5/core/en_selftest.c | 27 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 22 +++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 5 ++-- 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8ac50de2b242..00cb184fa027 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1324,7 +1324,8 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) return err; if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || - !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && + !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) return err; mutex_lock(&dev->lb_mutex); @@ -1342,7 +1343,8 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) mlx5_core_dealloc_transport_domain(dev->mdev, tdn); if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || - !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && + !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) return; mutex_lock(&dev->lb_mutex); @@ -4187,7 +4189,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) } if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && - MLX5_CAP_GEN(mdev, disable_local_lb)) + (MLX5_CAP_GEN(mdev, disable_local_lb_uc) || + MLX5_CAP_GEN(mdev, disable_local_lb_mc))) mutex_init(&dev->lb_mutex); dev->ib_active = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 1f1f8af87d4d..5a4608281f38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -238,15 +238,19 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, int err = 0; /* Temporarily enable local_lb */ - if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { - mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); - if (!lbtp->local_lb) - mlx5_nic_vport_update_local_lb(priv->mdev, true); + err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); + if (err) + return err; + + if (!lbtp->local_lb) { + err = mlx5_nic_vport_update_local_lb(priv->mdev, true); + if (err) + return err; } err = mlx5e_refresh_tirs(priv, true); if (err) - return err; + goto out; lbtp->loopback_ok = false; init_completion(&lbtp->comp); @@ -256,16 +260,21 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, lbtp->pt.dev = priv->netdev; lbtp->pt.af_packet_priv = lbtp; dev_add_pack(&lbtp->pt); + + return 0; + +out: + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); + return err; } static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, struct mlx5e_lbt_priv *lbtp) { - if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { - if (!lbtp->local_lb) - mlx5_nic_vport_update_local_lb(priv->mdev, false); - } + if (!lbtp->local_lb) + mlx5_nic_vport_update_local_lb(priv->mdev, false); dev_remove_pack(&lbtp->pt); mlx5e_refresh_tirs(priv, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8a89c7e8cd63..95e188d0883e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -578,8 +578,7 @@ static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) int ret = 0; /* Disable local_lb by default */ - if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && - MLX5_CAP_GEN(dev, disable_local_lb)) + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) ret = mlx5_nic_vport_update_local_lb(dev, false); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index d653b0025b13..a1296a62497d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -908,23 +908,33 @@ int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable) void *in; int err; - mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable"); + if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) && + !MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + return 0; + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; - MLX5_SET(modify_nic_vport_context_in, in, - field_select.disable_mc_local_lb, 1); MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.disable_mc_local_lb, !enable); - - MLX5_SET(modify_nic_vport_context_in, in, - field_select.disable_uc_local_lb, 1); MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.disable_uc_local_lb, !enable); + if (MLX5_CAP_GEN(mdev, disable_local_lb_mc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_mc_local_lb, 1); + + if (MLX5_CAP_GEN(mdev, disable_local_lb_uc)) + MLX5_SET(modify_nic_vport_context_in, in, + field_select.disable_uc_local_lb, 1); + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + if (!err) + mlx5_core_dbg(mdev, "%s local_lb\n", + enable ? "enable" : "disable"); + kvfree(in); return err; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d44ec5f41d4a..1391a82da98e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1027,8 +1027,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 disable_local_lb[0x1]; - u8 reserved_at_3e2[0x9]; + u8 disable_local_lb_uc[0x1]; + u8 disable_local_lb_mc[0x1]; + u8 reserved_at_3e3[0x8]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; -- cgit v1.2.3 From 39b735332cb8b33a27c28592d969e4016c86c3ea Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:23 +0000 Subject: objtool: Detect jumps to retpoline thunks A direct jump to a retpoline thunk is really an indirect jump in disguise. Change the objtool instruction type accordingly. Objtool needs to know where indirect branches are so it can detect switch statement jump tables. This fixes a bunch of warnings with CONFIG_RETPOLINE like: arch/x86/events/intel/uncore_nhmex.o: warning: objtool: nhmex_rbox_msr_enable_event()+0x44: sibling call from callable instruction with modified stack frame kernel/signal.o: warning: objtool: copy_siginfo_to_user()+0x91: sibling call from callable instruction with modified stack frame ... Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-2-git-send-email-dwmw@amazon.co.uk --- tools/objtool/check.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9b341584eb1b..de053fb7049b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -456,6 +456,13 @@ static int add_jump_destinations(struct objtool_file *file) } else if (rela->sym->sec->idx) { dest_sec = rela->sym->sec; dest_off = rela->sym->sym.st_value + rela->addend + 4; + } else if (strstr(rela->sym->name, "_indirect_thunk_")) { + /* + * Retpoline jumps are really dynamic jumps in + * disguise, so convert them accordingly. + */ + insn->type = INSN_JUMP_DYNAMIC; + continue; } else { /* sibling call */ insn->jump_dest = 0; -- cgit v1.2.3 From 258c76059cece01bebae098e81bacb1af2edad17 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 11 Jan 2018 21:46:24 +0000 Subject: objtool: Allow alternatives to be ignored Getting objtool to understand retpolines is going to be a bit of a challenge. For now, take advantage of the fact that retpolines are patched in with alternatives. Just read the original (sane) non-alternative instruction, and ignore the patched-in retpoline. This allows objtool to understand the control flow *around* the retpoline, even if it can't yet follow what's inside. This means the ORC unwinder will fail to unwind from inside a retpoline, but will work fine otherwise. Signed-off-by: Josh Poimboeuf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-3-git-send-email-dwmw@amazon.co.uk --- tools/objtool/check.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++----- tools/objtool/check.h | 2 +- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index de053fb7049b..f40d46e24bcc 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file) } } +/* + * FIXME: For now, just ignore any alternatives which add retpolines. This is + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. + * But it at least allows objtool to understand the control flow *around* the + * retpoline. + */ +static int add_nospec_ignores(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + + sec = find_section_by_name(file->elf, ".rela.discard.nospec"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.nospec entry"); + return -1; + } + + insn->ignore_alts = true; + } + + return 0; +} + /* * Find the destination instructions for all jumps. */ @@ -509,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off = insn->offset + insn->len + insn->immediate; insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); + /* + * FIXME: Thanks to retpolines, it's now considered + * normal for a function to call within itself. So + * disable this warning for now. + */ +#if 0 if (!insn->call_dest) { WARN_FUNC("can't find call dest symbol at offset 0x%lx", insn->sec, insn->offset, dest_off); return -1; } +#endif } else if (rela->sym->type == STT_SECTION) { insn->call_dest = find_symbol_by_offset(rela->sym->sec, rela->addend+4); @@ -678,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file) return ret; list_for_each_entry_safe(special_alt, tmp, &special_alts, list) { - alt = malloc(sizeof(*alt)); - if (!alt) { - WARN("malloc failed"); - ret = -1; - goto out; - } orig_insn = find_insn(file, special_alt->orig_sec, special_alt->orig_off); @@ -694,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + /* Ignore retpoline alternatives. */ + if (orig_insn->ignore_alts) + continue; + new_insn = NULL; if (!special_alt->group || special_alt->new_len) { new_insn = find_insn(file, special_alt->new_sec, @@ -719,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file) goto out; } + alt = malloc(sizeof(*alt)); + if (!alt) { + WARN("malloc failed"); + ret = -1; + goto out; + } + alt->insn = new_insn; list_add_tail(&alt->list, &orig_insn->alts); @@ -1035,6 +1081,10 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); + ret = add_nospec_ignores(file); + if (ret) + return ret; + ret = add_jump_destinations(file); if (ret) return ret; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 47d9ea70a83d..dbadb304a410 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -44,7 +44,7 @@ struct instruction { unsigned int len; unsigned char type; unsigned long immediate; - bool alt_group, visited, dead_end, ignore, hint, save, restore; + bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts; struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; -- cgit v1.2.3 From 76b043848fd22dbf7f8bf3a1452f8c70d557b860 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:25 +0000 Subject: x86/retpoline: Add initial retpoline support Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide the corresponding thunks. Provide assembler macros for invoking the thunks in the same way that GCC does, from native and inline assembler. This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In some circumstances, IBRS microcode features may be used instead, and the retpoline can be disabled. On AMD CPUs if lfence is serialising, the retpoline can be dramatically simplified to a simple "lfence; jmp *\reg". A future patch, after it has been verified that lfence really is serialising in all circumstances, can enable this by setting the X86_FEATURE_RETPOLINE_AMD feature bit in addition to X86_FEATURE_RETPOLINE. Do not align the retpoline in the altinstr section, because there is no guarantee that it stays aligned when it's copied over the oldinstr during alternative patching. [ Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks] [ tglx: Put actual function CALL/JMP in front of the macros, convert to symbolic labels ] [ dwmw2: Convert back to numeric labels, merge objtool fixes ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-4-git-send-email-dwmw@amazon.co.uk --- arch/x86/Kconfig | 13 ++++ arch/x86/Makefile | 10 +++ arch/x86/include/asm/asm-prototypes.h | 25 +++++++ arch/x86/include/asm/cpufeatures.h | 2 + arch/x86/include/asm/nospec-branch.h | 128 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/common.c | 4 ++ arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 48 +++++++++++++ 8 files changed, 231 insertions(+) create mode 100644 arch/x86/include/asm/nospec-branch.h create mode 100644 arch/x86/lib/retpoline.S diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e23d21ac745a..d1819161cc6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -429,6 +429,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + config INTEL_RDT bool "Intel Resource Director Technology support" default n diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a20eacd9c7e9..974c61864978 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -235,6 +235,16 @@ KBUILD_CFLAGS += -Wno-sign-compare # KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + else + $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ff700d81e91e..0927cdc4f946 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -11,7 +11,32 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1641c2f96363..f275447862f4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,6 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 000000000000..e20e92ef2ca8 --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 372ba3fb400f..7a671d1ae3cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -905,6 +905,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); +#ifdef CONFIG_RETPOLINE + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); +#endif + fpu__init_system(c); #ifdef CONFIG_X86_32 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 457f681ef379..d435c89875c1 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 000000000000..cb45c6cb465f --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif -- cgit v1.2.3 From da285121560e769cc31797bba6422eea71d473e0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:26 +0000 Subject: x86/spectre: Add boot time option to select Spectre v2 mitigation Add a spectre_v2= option to select the mitigation used for the indirect branch speculation vulnerability. Currently, the only option available is retpoline, in its various forms. This will be expanded to cover the new IBRS/IBPB microcode features. The RETPOLINE_AMD feature relies on a serializing LFENCE for speculation control. For AMD hardware, only set RETPOLINE_AMD if LFENCE is a serializing instruction, which is indicated by the LFENCE_RDTSC feature. [ tglx: Folded back the LFENCE/AMD fixes and reworked it so IBRS integration becomes simple ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-5-git-send-email-dwmw@amazon.co.uk --- Documentation/admin-guide/kernel-parameters.txt | 28 +++++ arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 158 +++++++++++++++++++++++- arch/x86/kernel/cpu/common.c | 4 - 4 files changed, 195 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 905991745d26..8122b5f98ea1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2599,6 +2599,11 @@ nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may + allow data leaks with this option, which is equivalent + to spectre_v2=off. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3908,6 +3913,29 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable + + Selecting 'on' will, and 'auto' may, choose a + mitigation method at run time according to the + CPU, the available microcode, the setting of the + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches + retpoline,generic - google's original retpoline + retpoline,amd - AMD-specific minimal thunk + + Not specifying this option is equivalent to + spectre_v2=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index e20e92ef2ca8..ea034fa6e261 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -124,5 +124,15 @@ # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 76ad6cb44b40..e4dc26185aa7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,9 @@ #include #include #include + +#include +#include #include #include #include @@ -21,6 +24,8 @@ #include #include +static void __init spectre_v2_select_mitigation(void); + void __init check_bugs(void) { identify_boot_cpu(); @@ -30,6 +35,9 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -62,6 +70,153 @@ void __init check_bugs(void) #endif } +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret; + + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret > 0) { + if (match_option(arg, ret, "off")) { + goto disable; + } else if (match_option(arg, ret, "on")) { + spec2_print_if_secure("force enabled on command line."); + return SPECTRE_V2_CMD_FORCE; + } else if (match_option(arg, ret, "retpoline")) { + spec2_print_if_insecure("retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE; + } else if (match_option(arg, ret, "retpoline,amd")) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + spec2_print_if_insecure("AMD retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_AMD; + } else if (match_option(arg, ret, "retpoline,generic")) { + spec2_print_if_insecure("generic retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_GENERIC; + } else if (match_option(arg, ret, "auto")) { + return SPECTRE_V2_CMD_AUTO; + } + } + + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_AUTO; +disable: + spec2_print_if_insecure("disabled on command line."); + return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: + goto retpoline_auto; + + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + #ifdef CONFIG_SYSFS ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -86,6 +241,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7a671d1ae3cb..372ba3fb400f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -905,10 +905,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); -#ifdef CONFIG_RETPOLINE - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); -#endif - fpu__init_system(c); #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 9697fa39efd3fc3692f2949d4045f393ec58450b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:27 +0000 Subject: x86/retpoline/crypto: Convert crypto assembler indirect jumps Convert all indirect jumps in crypto assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-6-git-send-email-dwmw@amazon.co.uk --- arch/x86/crypto/aesni-intel_asm.S | 5 +++-- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 16627fec80b2..3d09e3aca18d 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index f7c495e2863c..a14af6eb09cb 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index eee5b3982cfd..b66bbfa62f50 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 7a7de27c6f41..d9b734d0c8cc 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: -- cgit v1.2.3 From 2641f08bb7fc63a636a2b18173221d7040a3512e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:28 +0000 Subject: x86/retpoline/entry: Convert entry assembler indirect jumps Convert indirect jumps in core 32/64bit entry assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Don't use CALL_NOSPEC in entry_SYSCALL_64_fastpath because the return address after the 'call' instruction must be *precisely* at the .Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work, and the use of alternatives will mess that up unless we play horrid games to prepend with NOPs and make the variants the same length. It's not worth it; in the case where we ALTERNATIVE out the retpoline, the first instruction at __x86.indirect_thunk.rax is going to be a bare jmp *%rax anyway. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-7-git-send-email-dwmw@amazon.co.uk --- arch/x86/entry/entry_32.S | 5 +++-- arch/x86/entry/entry_64.S | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ace8f321a5a1..a1f28a54f23a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -44,6 +44,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -290,7 +291,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -919,7 +920,7 @@ common_exception: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(common_exception) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ed31d00dc5ee..59874bc1aed2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "calling.h" @@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline) */ pushq %rdi movq $entry_SYSCALL_64_stage2, %rdi - jmp *%rdi + JMP_NOSPEC %rdi END(entry_SYSCALL_64_trampoline) .popsection @@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath: * It might end up jumping to the slow path. If it jumps, RAX * and all argument registers are clobbered. */ +#ifdef CONFIG_RETPOLINE + movq sys_call_table(, %rax, 8), %rax + call __x86_indirect_thunk_rax +#else call *sys_call_table(, %rax, 8) +#endif .Lentry_SYSCALL_64_after_fastpath_call: movq %rax, RAX(%rsp) @@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - jmp *%rax /* Called from C */ + JMP_NOSPEC %rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -517,7 +523,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() -- cgit v1.2.3 From 9351803bd803cdbeb9b5a7850b7b6f464806e3db Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:29 +0000 Subject: x86/retpoline/ftrace: Convert ftrace assembler indirect jumps Convert all indirect jumps in ftrace assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-8-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/ftrace_32.S | 6 ++++-- arch/x86/kernel/ftrace_64.S | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index b6c6468e10bc..4c8440de3355 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CC_USING_FENTRY # define function_hook __fentry__ @@ -197,7 +198,8 @@ ftrace_stub: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -241,5 +243,5 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index c832291d948a..7cb8ba08beb9 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -7,7 +7,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -286,8 +286,8 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function - + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -329,5 +329,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif -- cgit v1.2.3 From e70e5892b28c18f517f29ab6e83bd57705104b31 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:30 +0000 Subject: x86/retpoline/hyperv: Convert assembler indirect jumps Convert all indirect jumps in hyperv inline asm code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-9-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/mshyperv.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 581bb54dd464..5119e4b555cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent @@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return U64_MAX; __asm__ __volatile__("mov %4, %%r8\n" - "call *%5" + CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address), "m" (hv_hypercall_pg) + : "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); @@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hv_hypercall_pg) return U64_MAX; - __asm__ __volatile__("call *%7" + __asm__ __volatile__(CALL_NOSPEC : "=A" (hv_status), "+c" (input_address_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input_address_hi), "D"(output_address_hi), "S"(output_address_lo), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); #endif /* !x86_64 */ return hv_status; @@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) #ifdef CONFIG_X86_64 { - __asm__ __volatile__("call *%4" + __asm__ __volatile__(CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "m" (hv_hypercall_pg) + : THUNK_TARGET(hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); } #else @@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) u32 input1_hi = upper_32_bits(input1); u32 input1_lo = lower_32_bits(input1); - __asm__ __volatile__ ("call *%5" + __asm__ __volatile__ (CALL_NOSPEC : "=A"(hv_status), "+c"(input1_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input1_hi), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "edi", "esi"); } #endif -- cgit v1.2.3 From ea08816d5b185ab3d09e95e393f265af54560350 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:31 +0000 Subject: x86/retpoline/xen: Convert Xen hypercall indirect jumps Convert indirect call in Xen hypercall to use non-speculative sequence, when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Reviewed-by: Juergen Gross Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-10-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 7cb282e9e587..bfd882617613 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -217,9 +218,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); -- cgit v1.2.3 From 5096732f6f695001fa2d6f1335a2680b37912c69 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 11 Jan 2018 21:46:32 +0000 Subject: x86/retpoline/checksum32: Convert assembler indirect jumps Convert all indirect jumps in 32bit checksum assembler code to use non-speculative sequences when CONFIG_RETPOLINE is enabled. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-11-git-send-email-dwmw@amazon.co.uk --- arch/x86/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb548b41..46e71a74e612 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -- cgit v1.2.3 From 7614e913db1f40fff819b36216484dc3808995d4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 11 Jan 2018 21:46:33 +0000 Subject: x86/retpoline/irq32: Convert assembler indirect jumps Convert all indirect jumps in 32bit irq inline asm code to use non speculative sequences. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Acked-by: Ingo Molnar Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515707194-20531-12-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/irq_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a83b3346a0e1..c1bdbd3d3232 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -20,6 +20,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } -- cgit v1.2.3 From 05e0cc84e00c54fb152d1f4b86bc211823a83d0c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 4 Jan 2018 04:35:51 +0200 Subject: net/mlx5: Fix get vector affinity helper function mlx5_get_vector_affinity used to call pci_irq_get_affinity and after reverting the patch that sets the device affinity via PCI_IRQ_AFFINITY API, calling pci_irq_get_affinity becomes useless and it breaks RDMA mlx5 users. To fix this, this patch provides an alternative way to retrieve IRQ vector affinity using legacy IRQ API, following smp_affinity read procfs implementation. Fixes: 231243c82793 ("Revert mlx5: move affinity hints assignments to generic code") Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") Cc: Sagi Grimberg Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1f509d072026..a0610427e168 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1231,7 +1232,23 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) { - return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); + const struct cpumask *mask; + struct irq_desc *desc; + unsigned int irq; + int eqn; + int err; + + err = mlx5_vector2eqn(dev, vector, &eqn, &irq); + if (err) + return NULL; + + desc = irq_to_desc(irq); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + mask = irq_data_get_effective_affinity_mask(&desc->irq_data); +#else + mask = desc->irq_common_data.affinity; +#endif + return mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From b6908c296021a99ba2a83a4b4703eb9e6365e5dc Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Thu, 14 Dec 2017 19:23:50 +0200 Subject: net/mlx5: Fix memory leak in bad flow of mlx5_alloc_irq_vectors Fix a memory leak where in case that pci_alloc_irq_vectors failed, priv->irq_info was not released. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 95e188d0883e..a4c82fa71aec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -319,6 +319,7 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &priv->eq_table; int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq); int nvec; + int err; nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE; @@ -328,21 +329,23 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); if (!priv->irq_info) - goto err_free_msix; + return -ENOMEM; nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, nvec, PCI_IRQ_MSIX); - if (nvec < 0) - return nvec; + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; return 0; -err_free_msix: +err_free_irq_info: kfree(priv->irq_info); - return -ENOMEM; + return err; } static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) -- cgit v1.2.3 From 72f36be06138bdc11bdbe1f04e4a3e2637ea438d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 20 Nov 2017 09:58:01 +0200 Subject: net/mlx5: Fix mlx5_get_uars_page to return error code Change mlx5_get_uars_page to return ERR_PTR in case of allocation failure. Change all callers accordingly to check the IS_ERR(ptr) instead of NULL. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Eran Ben Elisha Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 14 ++++++-------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 00cb184fa027..262c1aa2e028 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4160,7 +4160,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_cnt; dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); - if (!dev->mdev->priv.uar) + if (IS_ERR(dev->mdev->priv.uar)) goto err_cong; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a4c82fa71aec..6dffa58fb178 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1135,8 +1135,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } dev->priv.uar = mlx5_get_uars_page(dev); - if (!dev->priv.uar) { + if (IS_ERR(dev->priv.uar)) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); goto err_disable_msix; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 222b25908d01..8b97066dd1f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -168,18 +168,16 @@ struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) struct mlx5_uars_page *ret; mutex_lock(&mdev->priv.bfregs.reg_head.lock); - if (list_empty(&mdev->priv.bfregs.reg_head.list)) { - ret = alloc_uars_page(mdev, false); - if (IS_ERR(ret)) { - ret = NULL; - goto out; - } - list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); - } else { + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, struct mlx5_uars_page, list); kref_get(&ret->ref_count); + goto out; } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); out: mutex_unlock(&mdev->priv.bfregs.reg_head.lock); -- cgit v1.2.3 From 259bbc575c5322e0bc675c9a77e937250723c333 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 31 Dec 2017 11:31:34 +0200 Subject: net/mlx5: Fix error handling in load one We didn't store the result of mlx5_init_once, due to that mlx5_load_one returned success on error. Fix that. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Maor Gottlieb Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6dffa58fb178..0f88fd30a09a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1123,9 +1123,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_poll; } - if (boot && mlx5_init_once(dev, priv)) { - dev_err(&pdev->dev, "sw objs init failed\n"); - goto err_stop_poll; + if (boot) { + err = mlx5_init_once(dev, priv); + if (err) { + dev_err(&pdev->dev, "sw objs init failed\n"); + goto err_stop_poll; + } } err = mlx5_alloc_irq_vectors(dev); -- cgit v1.2.3 From e556f6dd47eda62cbb046fa92e03265245a1537f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 26 Dec 2017 13:44:49 +0200 Subject: net/mlx5e: Keep updating ethtool statistics when the interface is down ethtool statistics should be updated even when the interface is down since it shows more than just netdev counters, which might change while the logical link is down. One useful use case, for example, is when running RoCE traffic over the interface (while the logical link is down, but physical link is up) and examining rx_prioX_bytes. Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8f05efa5c829..ea5fff2c3143 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -207,8 +207,7 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, return; mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_update_stats(priv, true); + mlx5e_update_stats(priv, true); mutex_unlock(&priv->state_lock); for (i = 0; i < mlx5e_num_stats_grps; i++) -- cgit v1.2.3 From 97c8c3aa48ca8eb85d1806e08f882f90d78b1856 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 10 Oct 2017 16:51:44 +0300 Subject: net/mlx5e: Add error print in ETS init ETS initialization might fail, add a print to indicate such failures. Fixes: 08fb1dacdd76 ("net/mlx5e: Support DCBNL IEEE ETS") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 9bcf38f4123b..a5c5134f5cb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -922,8 +922,9 @@ static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, static void mlx5e_ets_init(struct mlx5e_priv *priv) { - int i; struct ieee_ets ets; + int err; + int i; if (!MLX5_CAP_GEN(priv->mdev, ets)) return; @@ -940,7 +941,10 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) ets.prio_tc[0] = 1; ets.prio_tc[1] = 0; - mlx5e_dcbnl_ieee_setets_core(priv, &ets); + err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); + if (err) + netdev_err(priv->netdev, + "%s, Failed to init ETS: %d\n", __func__, err); } enum { -- cgit v1.2.3 From 4b7d4363f14a0398eca48c7e96e46120c5eb6a96 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 10 Oct 2017 16:54:30 +0300 Subject: net/mlx5e: Check support before TC swap in ETS init Should not do the following swap between TCs 0 and 1 when max num of TCs is 1: tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) Fixes: 08fb1dacdd76 ("net/mlx5e: Support DCBNL IEEE ETS") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index a5c5134f5cb2..3d46ef48d5b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -937,9 +937,11 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) ets.prio_tc[i] = i; } - /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ - ets.prio_tc[0] = 1; - ets.prio_tc[1] = 0; + if (ets.ets_cap > 1) { + /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */ + ets.prio_tc[0] = 1; + ets.prio_tc[1] = 0; + } err = mlx5e_dcbnl_ieee_setets_core(priv, &ets); if (err) -- cgit v1.2.3 From 75b81ce719b79565eb0b39aa9954b6e11a5e73bf Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 10 Jan 2018 17:11:11 +0200 Subject: net/mlx5e: Don't override netdev features field unless in error flow Set features function sets dev->features in order to keep track of which features were successfully changed and which weren't (in case the user asks for more than one change in a single command). This breaks the logic in __netdev_update_features which assumes that dev->features is not changed on success and checks for diffs between features and dev->features (diffs that might not exist at this point because of the driver override). The solution is to keep track of successful/failed feature changes and assign them to dev->features in case of failure only. Fixes: 0e405443e803 ("net/mlx5e: Improve set features ndo resiliency") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 43 +++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d9d8227f195f..311d5ec8407c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3219,12 +3219,12 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr) return 0; } -#define MLX5E_SET_FEATURE(netdev, feature, enable) \ +#define MLX5E_SET_FEATURE(features, feature, enable) \ do { \ if (enable) \ - netdev->features |= feature; \ + *features |= feature; \ else \ - netdev->features &= ~feature; \ + *features &= ~feature; \ } while (0) typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); @@ -3347,6 +3347,7 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) #endif static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t *features, netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) @@ -3365,34 +3366,40 @@ static int mlx5e_handle_feature(struct net_device *netdev, return err; } - MLX5E_SET_FEATURE(netdev, feature, enable); + MLX5E_SET_FEATURE(features, feature, enable); return 0; } static int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { + netdev_features_t oper_features = netdev->features; int err; - err = mlx5e_handle_feature(netdev, features, NETIF_F_LRO, - set_feature_lro); - err |= mlx5e_handle_feature(netdev, features, + err = mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_LRO, set_feature_lro); + err |= mlx5e_handle_feature(netdev, &oper_features, features, NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); - err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, - set_feature_tc_num_filters); - err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, - set_feature_rx_all); - err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXFCS, - set_feature_rx_fcs); - err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, - set_feature_rx_vlan); + err |= mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_HW_TC, set_feature_tc_num_filters); + err |= mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_RXALL, set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_RXFCS, set_feature_rx_fcs); + err |= mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); #ifdef CONFIG_RFS_ACCEL - err |= mlx5e_handle_feature(netdev, features, NETIF_F_NTUPLE, - set_feature_arfs); + err |= mlx5e_handle_feature(netdev, &oper_features, features, + NETIF_F_NTUPLE, set_feature_arfs); #endif - return err ? -EINVAL : 0; + if (err) { + netdev->features = oper_features; + return -EINVAL; + } + + return 0; } static netdev_features_t mlx5e_fix_features(struct net_device *netdev, -- cgit v1.2.3 From afc98a0b46d8576a55f18092400cc518d03a79a1 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 3 Jan 2018 17:23:55 +0200 Subject: net/mlx5: Update ptp_clock_event foreach PPS event PPS event did not update ptp_clock_event fields, therefore, timestamp value was not updated correctly. This fix updates the event source and the timestamp value for each PPS event. Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section") Signed-off-by: Feras Daoud Reported-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index fa8aed62b231..5701f125e99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -423,9 +423,13 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: + ptp_event.index = pin; + ptp_event.timestamp = timecounter_cyc2time(&clock->tc, + be64_to_cpu(eqe->data.pps.time_stamp)); if (clock->pps_info.enabled) { ptp_event.type = PTP_CLOCK_PPSUSR; - ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp); + ptp_event.pps_times.ts_real = + ns_to_timespec64(ptp_event.timestamp); } else { ptp_event.type = PTP_CLOCK_EXTTS; } -- cgit v1.2.3 From 237f258c42c905f71c694670fe4d9773d85c36ed Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 8 Jan 2018 10:01:04 +0200 Subject: net/mlx5e: Remove timestamp set from netdevice open flow To avoid configuration override, timestamp set call will be moved from the netdevice open flow to the init flow. By this, a close-open procedure will not override the timestamp configuration. In addition, the change will rename mlx5e_timestamp_set function to be mlx5e_timestamp_init. Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support") Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 543060c305a0..c2d89bfa1a70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -895,7 +895,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); -void mlx5e_timestamp_set(struct mlx5e_priv *priv); +void mlx5e_timestamp_init(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { bool is_rss; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 311d5ec8407c..d8aefeed124d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2669,7 +2669,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } -void mlx5e_timestamp_set(struct mlx5e_priv *priv) +void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; @@ -2690,7 +2690,6 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); - mlx5e_timestamp_set(priv); if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); @@ -4146,6 +4145,8 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + mlx5e_timestamp_init(priv); } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2c43606c26b5..3409d86eb06b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -877,6 +877,8 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); + + mlx5e_timestamp_init(priv); } static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 8812d7208e8f..ee2f378c5030 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -86,6 +86,8 @@ void mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); mlx5i_build_nic_params(mdev, &priv->channels.params); + mlx5e_timestamp_init(priv); + /* netdev init */ netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; @@ -450,7 +452,6 @@ static int mlx5i_open(struct net_device *netdev) mlx5e_refresh_tirs(epriv, false); mlx5e_activate_priv_channels(epriv); - mlx5e_timestamp_set(epriv); mutex_unlock(&epriv->state_lock); return 0; -- cgit v1.2.3 From 69c4d8ed49568598f200b340b17e391c35be3d4b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 22:04:16 +0100 Subject: arm64: dts: socfpga: add missing interrupt-parent The PMU node has no working interrupt, as shown by this dtc warning: arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dtb: Warning (interrupts_property): Missing interrupt-parent for /pmu This adds an interrupt-parent property so we can correct parse that interrupt number. Signed-off-by: Arnd Bergmann Acked-by: Dinh Nguyen Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c9bdc7ab50b..9db19314c60c 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -66,6 +66,7 @@ <&cpu1>, <&cpu2>, <&cpu3>; + interrupt-parent = <&intc>; }; psci { -- cgit v1.2.3 From 4d94e776bd29670f01befa27e12df784fa05fa2e Mon Sep 17 00:00:00 2001 From: Nir Perry Date: Thu, 11 Jan 2018 23:43:26 -0800 Subject: Input: ALPS - fix multi-touch decoding on SS4 plus touchpads The fix for handling two-finger scroll (i4a646580f793 - "Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad") introduced a minor "typo" that broke decoding of multi-touch events are decoded on some ALPS touchpads. For example, tapping with three-fingers can no longer be used to emulate middle-mouse-button (the kernel doesn't recognize this as the proper event, and doesn't report it correctly to userspace). This affects touchpads that use SS4 "plus" protocol variant, like those found on Dell E7270 & E7470 laptops (tested on E7270). First, probably the code in alps_decode_ss4_v2() for case SS4_PACKET_ID_MULTI used inconsistent indices to "f->mt[]". You can see 0 & 1 are used for the "if" part but 2 & 3 are used for the "else" part. Second, in the previous patch, new macros were introduced to decode X coordinates specific to the SS4 "plus" variant, but the macro to define the maximum X value wasn't changed accordingly. The macros to decode X values for "plus" variant are effectively shifted right by 1 bit, but the max wasn't shifted too. This causes the driver to incorrectly handle "no data" cases, which also interfered with how multi-touch was handled. Fixes: 4a646580f793 ("Input: ALPS - fix two-finger scroll breakage...") Signed-off-by: Nir Perry Reviewed-by: Masaki Ota Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 23 +++++++++++++---------- drivers/input/mouse/alps.h | 10 ++++++---- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 579b899add26..dbe57da8c1a1 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1250,29 +1250,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL; } else { f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX_BL; } + no_data_y = SS4_MFPACKET_NO_AY_BL; f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX_BL; - no_data_y = SS4_MFPACKET_NO_AY_BL; } else { if (IS_SS4PLUS_DEV(priv->dev_id)) { - f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1); + no_data_x = SS4_PLUS_MFPACKET_NO_AX; } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + f->mt[3].x = SS4_STD_MF_X_V2(p, 1); + no_data_x = SS4_MFPACKET_NO_AX; } + no_data_y = SS4_MFPACKET_NO_AY; + f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); - no_data_x = SS4_MFPACKET_NO_AX; - no_data_y = SS4_MFPACKET_NO_AY; } f->first_mp = 0; diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index c80a7c76cb76..79b6d69d1486 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -141,10 +141,12 @@ enum SS4_PACKET_ID { #define SS4_TS_Z_V2(_b) (s8)(_b[4] & 0x7F) -#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ -#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ -#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coordinate value */ -#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX 8160 /* X-Coordinate value */ +#define SS4_MFPACKET_NO_AY 4080 /* Y-Coordinate value */ +#define SS4_MFPACKET_NO_AX_BL 8176 /* Buttonless X-Coord value */ +#define SS4_MFPACKET_NO_AY_BL 4088 /* Buttonless Y-Coord value */ +#define SS4_PLUS_MFPACKET_NO_AX 4080 /* SS4 PLUS, X */ +#define SS4_PLUS_MFPACKET_NO_AX_BL 4088 /* Buttonless SS4 PLUS, X */ /* * enum V7_PACKET_ID - defines the packet type for V7 -- cgit v1.2.3 From 7cce5d835467ea66c342951d0ed6adaffe39b1c8 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 11 Jan 2018 17:26:59 +0100 Subject: MAINTAINERS: mtd/nand: update Microchip nand entry Update Wenyou Yang email address. Take advantage of this update to move this entry to the MICROCHIP / ATMEL location and add the DT binding documentation link. Signed-off-by: Nicolas Ferre Acked-by: Wenyou Yang Signed-off-by: Boris Brezillon --- MAINTAINERS | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..37ee5ae4bae2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2382,13 +2382,6 @@ F: Documentation/devicetree/bindings/input/atmel,maxtouch.txt F: drivers/input/touchscreen/atmel_mxt_ts.c F: include/linux/platform_data/atmel_mxt_ts.h -ATMEL NAND DRIVER -M: Wenyou Yang -M: Josh Wu -L: linux-mtd@lists.infradead.org -S: Supported -F: drivers/mtd/nand/atmel/* - ATMEL SAMA5D2 ADC DRIVER M: Ludovic Desroches L: linux-iio@vger.kernel.org @@ -9045,6 +9038,14 @@ F: drivers/media/platform/atmel/atmel-isc.c F: drivers/media/platform/atmel/atmel-isc-regs.h F: devicetree/bindings/media/atmel-isc.txt +MICROCHIP / ATMEL NAND DRIVER +M: Wenyou Yang +M: Josh Wu +L: linux-mtd@lists.infradead.org +S: Supported +F: drivers/mtd/nand/atmel/* +F: Documentation/devicetree/bindings/mtd/atmel-nand.txt + MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER M: Woojung Huh M: Microchip Linux Driver Support -- cgit v1.2.3 From 12663b442e5ac5aa3d6097cd3f287c71ba46d26e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 11 Jan 2018 21:39:20 +0100 Subject: mtd: mtd_oobtest: Handle bitflips during reads Reads from NAND devices usually trigger bitflips, this is an expected behavior. While bitflips are under a given threshold, the MTD core returns 0. However, when the number of corrected bitflips is above this same threshold, -EUCLEAN is returned to inform the upper layer that this block is slightly dying and soon the ECC engine will be overtaken so actions should be taken to move the data out of it. This particular condition should not be treated like an error and the test should continue. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/tests/oobtest.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c index 1cb3f7758fb6..766b2c385682 100644 --- a/drivers/mtd/tests/oobtest.c +++ b/drivers/mtd/tests/oobtest.c @@ -193,6 +193,9 @@ static int verify_eraseblock(int ebnum) ops.datbuf = NULL; ops.oobbuf = readbuf; err = mtd_read_oob(mtd, addr, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err || ops.oobretlen != use_len) { pr_err("error: readoob failed at %#llx\n", (long long)addr); @@ -227,6 +230,9 @@ static int verify_eraseblock(int ebnum) ops.datbuf = NULL; ops.oobbuf = readbuf; err = mtd_read_oob(mtd, addr, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err || ops.oobretlen != mtd->oobavail) { pr_err("error: readoob failed at %#llx\n", (long long)addr); @@ -286,6 +292,9 @@ static int verify_eraseblock_in_one_go(int ebnum) /* read entire block's OOB at one go */ err = mtd_read_oob(mtd, addr, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err || ops.oobretlen != len) { pr_err("error: readoob failed at %#llx\n", (long long)addr); @@ -527,6 +536,9 @@ static int __init mtd_oobtest_init(void) pr_info("attempting to start read past end of OOB\n"); pr_info("an error is expected...\n"); err = mtd_read_oob(mtd, addr0, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err) { pr_info("error occurred as expected\n"); err = 0; @@ -571,6 +583,9 @@ static int __init mtd_oobtest_init(void) pr_info("attempting to read past end of device\n"); pr_info("an error is expected...\n"); err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err) { pr_info("error occurred as expected\n"); err = 0; @@ -615,6 +630,9 @@ static int __init mtd_oobtest_init(void) pr_info("attempting to read past end of device\n"); pr_info("an error is expected...\n"); err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err) { pr_info("error occurred as expected\n"); err = 0; @@ -684,6 +702,9 @@ static int __init mtd_oobtest_init(void) ops.datbuf = NULL; ops.oobbuf = readbuf; err = mtd_read_oob(mtd, addr, &ops); + if (mtd_is_bitflip(err)) + err = 0; + if (err) goto out; if (memcmpshow(addr, readbuf, writebuf, -- cgit v1.2.3 From 87e89ce8d0d14f573c068c61bec2117751fb5103 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 12 Jan 2018 10:13:36 +0100 Subject: mtd: nand: Fix nand_do_read_oob() return value Starting from commit 041e4575f034 ("mtd: nand: handle ECC errors in OOB"), nand_do_read_oob() (from the NAND core) did return 0 or a negative error, and the MTD layer expected it. However, the trend for the NAND layer is now to return an error or a positive number of bitflips. Deciding which status to return to the user belongs to the MTD layer. Commit e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") brought this logic to the mtd_read_oob() function while the return value coming from nand_do_read_oob() (called by the ->_read_oob() hook) was left unchanged. Fixes: e47f68587b82 ("mtd: check for max_bitflips in mtd_read_oob()") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 96c97588e1ba..3ff77bef9739 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3797,6 +3797,7 @@ EXPORT_SYMBOL(nand_write_oob_syndrome); static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { + unsigned int max_bitflips = 0; int page, realpage, chipnr; struct nand_chip *chip = mtd_to_nand(mtd); struct mtd_ecc_stats stats; @@ -3854,6 +3855,8 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, nand_wait_ready(mtd); } + max_bitflips = max_t(unsigned int, max_bitflips, ret); + readlen -= len; if (!readlen) break; @@ -3879,7 +3882,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, if (mtd->ecc_stats.failed - stats.failed) return -EBADMSG; - return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0; + return max_bitflips; } /** -- cgit v1.2.3 From f8f85dc00b7427de6222ea3955c52512315d13cd Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 12 Jan 2018 11:40:21 +0100 Subject: KVM: arm64: Fix GICv4 init when called from vgic_its_create Commit 3d1ad640f8c94 ("KVM: arm/arm64: Fix GICv4 ITS initialization issues") moved the vgic_supports_direct_msis() check in vgic_v4_init(). However when vgic_v4_init is called from vgic_its_create(), the has_its field is not yet set. Hence vgic_supports_direct_msis returns false and vgic_v4_init does nothing. The gic/its init sequence is a bit messy, so let's be specific about the prerequisite checks in the various call paths instead of relying on a common wrapper. Fixes: 3d1ad640f8c94 ("KVM: arm/arm64: Fix GICv4 ITS initialization issues") Reported-by: Eric Auger Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-init.c | 8 +++++--- virt/kvm/arm/vgic/vgic-v4.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 62310122ee78..743ca5cb05ef 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - ret = vgic_v4_init(kvm); - if (ret) - goto out; + if (vgic_has_its(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 4a37292855bc..bc4265154bac 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm) struct kvm_vcpu *vcpu; int i, nr_vcpus, ret; - if (!vgic_supports_direct_msis(kvm)) + if (!kvm_vgic_global_state.has_gicv4) return 0; /* Nothing to see here... move along. */ if (dist->its_vm.vpes) -- cgit v1.2.3 From 117cc7a908c83697b0b737d15ae1eb5943afe35b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 11:11:27 +0000 Subject: x86/retpoline: Fill return stack buffer on vmexit In accordance with the Intel and AMD documentation, we need to overwrite all entries in the RSB on exiting a guest, to prevent malicious branch target predictions from affecting the host kernel. This is needed both for retpoline and for IBRS. [ak: numbers again for the RSB stuffing labels] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Tested-by: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/nospec-branch.h | 78 +++++++++++++++++++++++++++++++++++- arch/x86/kvm/svm.c | 4 ++ arch/x86/kvm/vmx.c | 4 ++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index ea034fa6e261..402a11c803c3 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,6 +7,48 @@ #include #include +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -74,6 +116,20 @@ #else call *\reg #endif +.endm + + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: +#endif .endm #else /* __ASSEMBLY__ */ @@ -119,7 +175,7 @@ X86_FEATURE_RETPOLINE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) -#else /* No retpoline */ +#else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -134,5 +190,25 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ATT *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + unsigned long loops = RSB_CLEAR_LOOPS / 2; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=&r" (loops), ASM_CALL_CONSTRAINT + : "r" (loops) : "memory" ); +#endif +} #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0e68f0b3cbf7..2744b97345b8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "trace.h" @@ -4985,6 +4986,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 62ee4362e1c1..d1e25dba3112 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -9403,6 +9404,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); -- cgit v1.2.3 From 5954c4a1455c3bc42acb2c286744eae1aaa00918 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 11 Jan 2018 14:05:11 -0600 Subject: ASoC: rt5645: add fallback case for jack detection support Commit 78f5605c0329 ("ASoC: rt5645: cleanup DMI matching code") did a lot of useful cleanups. This patch adds a default case to enable jack detection if there is no pdata, device property or quirk. The chosen jd-mode3 is the most common and should limit the addition of new DMI-based quirks. Existing DMI quirks which only set this mode are left as is and not removed. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Hans de Goede Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index fbaf36aeb587..5292fca2f54f 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3739,6 +3739,17 @@ static const struct dmi_system_id dmi_platform_data[] = { { } }; +static bool rt5645_check_dp(struct device *dev) +{ + if (device_property_present(dev, "realtek,in2-differential") || + device_property_present(dev, "realtek,dmic1-data-pin") || + device_property_present(dev, "realtek,dmic2-data-pin") || + device_property_present(dev, "realtek,jd-mode")) + return true; + + return false; +} + static int rt5645_parse_dt(struct rt5645_priv *rt5645, struct device *dev) { rt5645->pdata.in2_diff = device_property_read_bool(dev, @@ -3779,8 +3790,10 @@ static int rt5645_i2c_probe(struct i2c_client *i2c, if (pdata) rt5645->pdata = *pdata; - else + else if (rt5645_check_dp(&i2c->dev)) rt5645_parse_dt(rt5645, &i2c->dev); + else + rt5645->pdata = jd_mode3_platform_data; if (quirk != -1) { rt5645->pdata.in2_diff = QUIRK_IN2_DIFF(quirk); -- cgit v1.2.3 From 3fa4680b860bf48b437d6a2c039789c4abe202ae Mon Sep 17 00:00:00 2001 From: Shilpasri G Bhat Date: Fri, 12 Jan 2018 12:43:53 +0530 Subject: cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin Some OpenPOWER boxes can have same pstate values for nominal and pmin pstates. In these boxes the current code will not initialize 'powernv_pstate_info.min' variable and result in erroneous CPU frequency reporting. This patch fixes this problem. Fixes: 09ca4c9b5958 (cpufreq: powernv: Replacing pstate_id with frequency table index) Reported-by: Alvin Wang Signed-off-by: Shilpasri G Bhat Acked-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernv-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 8a4e2ce0804c..29cdec198657 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -337,9 +337,9 @@ next: if (id == pstate_max) powernv_pstate_info.max = i; - else if (id == pstate_nominal) + if (id == pstate_nominal) powernv_pstate_info.nominal = i; - else if (id == pstate_min) + if (id == pstate_min) powernv_pstate_info.min = i; if (powernv_pstate_info.wof_enabled && id == pstate_turbo) { -- cgit v1.2.3 From 1289e0e29857e606a70a0200bf7849ae38d3493a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 11 Jan 2018 11:15:43 -0800 Subject: perf/x86/rapl: Fix Haswell and Broadwell server RAPL event Perf-fuzzer triggers non-existent MSR access in RAPL driver on Haswell-EX. Haswell/Broadwell server and client have differnt RAPL events. Since 'commit 7f2236d0bf9a ("perf/x86/rapl: Use Intel family macros for RAPL")', it accidentally assign RAPL client events to server. Signed-off-by: Kan Liang Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Linux-kernel@vger.kernel.org Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 005908ee9333..a2efb490f743 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), -- cgit v1.2.3 From dd8bd53ab86133327412e74bf5ba31a8ec2826d4 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 11 Jan 2018 15:50:20 +0000 Subject: perf evlist: Remove trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Joe Perches Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180111155020.9782-1-luisbg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f0a5e09c4071..120efd85f2c8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1760,7 +1760,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, switch (old_state) { case BKW_MMAP_NOTREADY: { if (state != BKW_MMAP_RUNNING) - goto state_err;; + goto state_err; break; } case BKW_MMAP_RUNNING: { -- cgit v1.2.3 From 41013f0c095980775e0746272873891ca7c28fb1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 4 Jan 2018 12:59:55 -0800 Subject: perf script python: Add script to profile and resolve physical mem type There could be different types of memory in the system. E.g normal System Memory, Persistent Memory. To understand how the workload maps to those memories, it's important to know the I/O statistics of them. Perf can collect physical addresses, but those are raw data. It still needs extra work to resolve the physical addresses. Provide a script to facilitate the physical addresses resolving and I/O statistics. Profile with MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS event if any of them is available. Look up the /proc/iomem and resolve the physical address. Provide memory type summary. Here is an example output: # perf script report mem-phys-addr Event: mem_inst_retired.all_loads:P Memory type count percentage ---------------------------------------- ----------- ----------- System RAM 74 53.2% Persistent Memory 55 39.6% N/A --- Changes since V2: - Apply the new license rules. - Add comments for globals Changes since V1: - Do not mix DLA and Load Latency. Do not compare the loads and stores. Only profile the loads. - Use event name to replace the RAW event Signed-off-by: Kan Liang Reviewed-by: Andi Kleen Cc: Dan Williams Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Stephane Eranian Link: https://lkml.kernel.org/r/1515099595-34770-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/bin/mem-phys-addr-record | 19 +++++ tools/perf/scripts/python/bin/mem-phys-addr-report | 3 + tools/perf/scripts/python/mem-phys-addr.py | 95 ++++++++++++++++++++++ .../util/scripting-engines/trace-event-python.c | 2 + 4 files changed, 119 insertions(+) create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-record create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-report create mode 100644 tools/perf/scripts/python/mem-phys-addr.py diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-record b/tools/perf/scripts/python/bin/mem-phys-addr-record new file mode 100644 index 000000000000..5a875122a904 --- /dev/null +++ b/tools/perf/scripts/python/bin/mem-phys-addr-record @@ -0,0 +1,19 @@ +#!/bin/bash + +# +# Profiling physical memory by all retired load instructions/uops event +# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS +# + +load=`perf list | grep mem_inst_retired.all_loads` +if [ -z "$load" ]; then + load=`perf list | grep mem_uops_retired.all_loads` +fi +if [ -z "$load" ]; then + echo "There is no event to count all retired load instructions/uops." + exit 1 +fi + +arg=$(echo $load | tr -d ' ') +arg="$arg:P" +perf record --phys-data -e $arg $@ diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-report b/tools/perf/scripts/python/bin/mem-phys-addr-report new file mode 100644 index 000000000000..3f2b847e2eab --- /dev/null +++ b/tools/perf/scripts/python/bin/mem-phys-addr-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: resolve physical address samples +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py new file mode 100644 index 000000000000..ebee2c5ae496 --- /dev/null +++ b/tools/perf/scripts/python/mem-phys-addr.py @@ -0,0 +1,95 @@ +# mem-phys-addr.py: Resolve physical address samples +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2018, Intel Corporation. + +from __future__ import division +import os +import sys +import struct +import re +import bisect +import collections + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +#physical address ranges for System RAM +system_ram = [] +#physical address ranges for Persistent Memory +pmem = [] +#file object for proc iomem +f = None +#Count for each type of memory +load_mem_type_cnt = collections.Counter() +#perf event name +event_name = None + +def parse_iomem(): + global f + f = open('/proc/iomem', 'r') + for i, j in enumerate(f): + m = re.split('-|:',j,2) + if m[2].strip() == 'System RAM': + system_ram.append(long(m[0], 16)) + system_ram.append(long(m[1], 16)) + if m[2].strip() == 'Persistent Memory': + pmem.append(long(m[0], 16)) + pmem.append(long(m[1], 16)) + +def print_memory_type(): + print "Event: %s" % (event_name) + print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), + print "%-40s %10s %10s\n" % ("----------------------------------------", \ + "-----------", "-----------"), + total = sum(load_mem_type_cnt.values()) + for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ + key = lambda(k, v): (v, k), reverse = True): + print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total), + +def trace_begin(): + parse_iomem() + +def trace_end(): + print_memory_type() + f.close() + +def is_system_ram(phys_addr): + #/proc/iomem is sorted + position = bisect.bisect(system_ram, phys_addr) + if position % 2 == 0: + return False + return True + +def is_persistent_mem(phys_addr): + position = bisect.bisect(pmem, phys_addr) + if position % 2 == 0: + return False + return True + +def find_memory_type(phys_addr): + if phys_addr == 0: + return "N/A" + if is_system_ram(phys_addr): + return "System RAM" + + if is_persistent_mem(phys_addr): + return "Persistent Memory" + + #slow path, search all + f.seek(0, 0) + for j in f: + m = re.split('-|:',j,2) + if long(m[0], 16) <= phys_addr <= long(m[1], 16): + return m[2] + return "N/A" + +def process_event(param_dict): + name = param_dict["ev_name"] + sample = param_dict["sample"] + phys_addr = sample["phys_addr"] + + global event_name + if event_name == None: + event_name = name + load_mem_type_cnt[find_memory_type(phys_addr)] += 1 diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c1848b543f27..ea070883c593 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -499,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->time)); pydict_set_item_string_decref(dict_sample, "period", PyLong_FromUnsignedLongLong(sample->period)); + pydict_set_item_string_decref(dict_sample, "phys_addr", + PyLong_FromUnsignedLongLong(sample->phys_addr)); set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); -- cgit v1.2.3 From b76bb4e64e7514a9d9027e2b0d0b76ee84a4d27b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 9 Jan 2018 11:36:31 +0100 Subject: dt-bindings: mtd: document new nand-rb property There are already an atmel,rb and an allwinner,rb properties, let's not make other ones and instead use a generic term: nand-rb to define NAND chips Ready/Busy lines. Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Boris Brezillon --- Documentation/devicetree/bindings/mtd/nand.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt index 133f3813719c..8bb11d809429 100644 --- a/Documentation/devicetree/bindings/mtd/nand.txt +++ b/Documentation/devicetree/bindings/mtd/nand.txt @@ -43,6 +43,7 @@ Optional NAND chip properties: This is particularly useful when only the in-band area is used by the upper layers, and you want to make your NAND as reliable as possible. +- nand-rb: shall contain the native Ready/Busy ids. The ECC strength and ECC step size properties define the correction capability of a controller. Together, they say a controller can correct "{strength} bit -- cgit v1.2.3 From a82d20698a92cab7c885355cd0ba3c76c45c9e2f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 9 Jan 2018 11:36:32 +0100 Subject: dt-bindings: mtd: add Marvell NAND controller documentation Document the legacy and the new bindings for Marvell NAND controller. The pxa3xx_nand.c driver does only support legacy bindings, which are incomplete and inaccurate. A rework of this controller (called marvell_nand.c) does support both. Signed-off-by: Miquel Raynal Reviewed-by: Rob Herring Signed-off-by: Boris Brezillon --- .../devicetree/bindings/mtd/marvell-nand.txt | 123 +++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 Documentation/devicetree/bindings/mtd/marvell-nand.txt diff --git a/Documentation/devicetree/bindings/mtd/marvell-nand.txt b/Documentation/devicetree/bindings/mtd/marvell-nand.txt new file mode 100644 index 000000000000..c08fb477b3c6 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/marvell-nand.txt @@ -0,0 +1,123 @@ +Marvell NAND Flash Controller (NFC) + +Required properties: +- compatible: can be one of the following: + * "marvell,armada-8k-nand-controller" + * "marvell,armada370-nand-controller" + * "marvell,pxa3xx-nand-controller" + * "marvell,armada-8k-nand" (deprecated) + * "marvell,armada370-nand" (deprecated) + * "marvell,pxa3xx-nand" (deprecated) + Compatibles marked deprecated support only the old bindings described + at the bottom. +- reg: NAND flash controller memory area. +- #address-cells: shall be set to 1. Encode the NAND CS. +- #size-cells: shall be set to 0. +- interrupts: shall define the NAND controller interrupt. +- clocks: shall reference the NAND controller clock. +- marvell,system-controller: Set to retrieve the syscon node that handles + NAND controller related registers (only required with the + "marvell,armada-8k-nand[-controller]" compatibles). + +Optional properties: +- label: see partition.txt. New platforms shall omit this property. +- dmas: shall reference DMA channel associated to the NAND controller. + This property is only used with "marvell,pxa3xx-nand[-controller]" + compatible strings. +- dma-names: shall be "rxtx". + This property is only used with "marvell,pxa3xx-nand[-controller]" + compatible strings. + +Optional children nodes: +Children nodes represent the available NAND chips. + +Required properties: +- reg: shall contain the native Chip Select ids (0-3). +- nand-rb: see nand.txt (0-1). + +Optional properties: +- marvell,nand-keep-config: orders the driver not to take the timings + from the core and leaving them completely untouched. Bootloader + timings will then be used. +- label: MTD name. +- nand-on-flash-bbt: see nand.txt. +- nand-ecc-mode: see nand.txt. Will use hardware ECC if not specified. +- nand-ecc-algo: see nand.txt. This property is essentially useful when + not using hardware ECC. Howerver, it may be added when using hardware + ECC for clarification but will be ignored by the driver because ECC + mode is chosen depending on the page size and the strength required by + the NAND chip. This value may be overwritten with nand-ecc-strength + property. +- nand-ecc-strength: see nand.txt. +- nand-ecc-step-size: see nand.txt. Marvell's NAND flash controller does + use fixed strength (1-bit for Hamming, 16-bit for BCH), so the actual + step size will shrink or grow in order to fit the required strength. + Step sizes are not completely random for all and follow certain + patterns described in AN-379, "Marvell SoC NFC ECC". + +See Documentation/devicetree/bindings/mtd/nand.txt for more details on +generic bindings. + + +Example: +nand_controller: nand-controller@d0000 { + compatible = "marvell,armada370-nand-controller"; + reg = <0xd0000 0x54>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = ; + clocks = <&coredivclk 0>; + + nand@0 { + reg = <0>; + label = "main-storage"; + nand-rb = <0>; + nand-ecc-mode = "hw"; + marvell,nand-keep-config; + nand-on-flash-bbt; + nand-ecc-strength = <4>; + nand-ecc-step-size = <512>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "Rootfs"; + reg = <0x00000000 0x40000000>; + }; + }; + }; +}; + + +Note on legacy bindings: One can find, in not-updated device trees, +bindings slightly different than described above with other properties +described below as well as the partitions node at the root of a so +called "nand" node (without clear controller/chip separation). + +Legacy properties: +- marvell,nand-enable-arbiter: To enable the arbiter, all boards blindly + used it, this bit was set by the bootloader for many boards and even if + it is marked reserved in several datasheets, it might be needed to set + it (otherwise it is harmless) so whether or not this property is set, + the bit is selected by the driver. +- num-cs: Number of chip-select lines to use, all boards blindly set 1 + to this and for a reason, other values would have failed. The value of + this property is ignored. + +Example: + + nand0: nand@43100000 { + compatible = "marvell,pxa3xx-nand"; + reg = <0x43100000 90>; + interrupts = <45>; + dmas = <&pdma 97 0>; + dma-names = "rxtx"; + #address-cells = <1>; + marvell,nand-keep-config; + marvell,nand-enable-arbiter; + num-cs = <1>; + /* Partitions (optional) */ + }; -- cgit v1.2.3 From de21dc1d9a2a9fc5023c1fe3a24ba21e68c34928 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 25 Nov 2017 01:24:44 +0900 Subject: mmc: tmio: use mmc_can_gpio_cd() instead of checking TMIO_MMC_USE_GPIO_CD To use a GPIO line for card detection, TMIO_MMC_USE_GPIO_CD is set by a legacy board (arch/sh/boards/mach-ecovec24). For DT platforms, the "cd-gpios" property is a legitimate way for that in case the IP-builtin card detection can not be used for some reason. mmc_of_parse() calls mmc_gpiod_request_cd() to set up ctx->cd_gpio if the "cd-gpios" property is specified. To cater to both cases, mmc_can_gpio_cd() is a correct way to check which card detection logic is used. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 6b18c0509ff4..0929b987fb29 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1232,7 +1232,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, } mmc->max_seg_size = mmc->max_req_size; - _host->native_hotplug = !(pdata->flags & TMIO_MMC_USE_GPIO_CD || + _host->native_hotplug = !(mmc_can_gpio_cd(mmc) || mmc->caps & MMC_CAP_NEEDS_POLL || !mmc_card_is_removable(mmc)); -- cgit v1.2.3 From 02f26ecf8c772751d4b24744d487f6b1b20e75d4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 9 Jan 2018 11:36:33 +0100 Subject: mtd: nand: add reworked Marvell NAND controller driver Add marvell_nand driver which aims at replacing the existing pxa3xx_nand driver. The new driver intends to be easier to understand and follows the brand new NAND framework rules by implementing hooks for every pattern the controller might support and referencing them inside a parser object that will be given to the core at each ->exec_op() call. Raw accessors are implemented, useful to test/debug memory/filesystem corruptions. Userspace binaries contained in the mtd-utils package may now be used and their output trusted. Most of the DT nodes using the old driver kept non-optimal timings from the bootloader (even if there was some mechanisms to derive them if the chip was ONFI compliant). The new default is to implement ->setup_data_interface() and follow the core's decision regarding the chip. Thanks to the improved timings, implementation of ONFI mode 5 support (with EDO managed by adding a delay on data sampling), merging the commands together and optimizing writes in the command registers, the new driver may achieve faster throughputs in both directions. Measurements show an improvement of about +23% read throughput and +24% write throughput. These measurements have been done with an Armada-385-DB-AP (4kiB NAND pages forced in 4-bit strength BCH ECC correction) using the userspace tool 'flash_speed' from the MTD test suite. Besides these important topics, the new driver addresses several unsolved known issues in the old driver which: - did not work with ECC soft neither with ECC none ; - relied on naked read/write (which is unchanged) while the NFCv1 embedded in the pxa3xx platforms do not implement it, so several NAND commands did not actually ever work without any notice (like reading the ONFI PARAM_PAGE or SET/GET_FEATURES) ; - wrote the OOB data correctly, but was not able to read it correctly past the first OOB data chunk ; - did not retrieve ECC bytes ; - used device tree bindings that did not allow more than one NAND chip, and did not allow to choose the correct chip select if not incrementing from 0. Plus, the Ready/Busy line used had to be 0. Old device tree bindings are still supported but deprecated. A more hierarchical view has to be used to keep the controller and the NAND chip structures clearly separated both inside the device tree and also in the driver code. Signed-off-by: Miquel Raynal Tested-by: Sean Nyekjaer Tested-by: Willy Tarreau Signed-off-by: Boris Brezillon --- drivers/mtd/nand/Kconfig | 13 + drivers/mtd/nand/Makefile | 1 + drivers/mtd/nand/marvell_nand.c | 2898 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 2912 insertions(+) create mode 100644 drivers/mtd/nand/marvell_nand.c diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 859eb7790c46..e6b8c59f2c0d 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -315,6 +315,7 @@ config MTD_NAND_ATMEL config MTD_NAND_PXA3xx tristate "NAND support on PXA3xx and Armada 370/XP" + depends on !MTD_NAND_MARVELL depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU help @@ -323,6 +324,18 @@ config MTD_NAND_PXA3xx platforms (XP, 370, 375, 38x, 39x) and 64-bit Armada platforms (7K, 8K) (NFCv2). +config MTD_NAND_MARVELL + tristate "NAND controller support on Marvell boards" + depends on PXA3xx || ARCH_MMP || PLAT_ORION || ARCH_MVEBU || \ + COMPILE_TEST + depends on HAS_IOMEM + help + This enables the NAND flash controller driver for Marvell boards, + including: + - PXA3xx processors (NFCv1) + - 32-bit Armada platforms (XP, 37x, 38x, 39x) (NFCv2) + - 64-bit Aramda platforms (7k, 8k) (NFCv2) + config MTD_NAND_SLC_LPC32XX tristate "NXP LPC32xx SLC Controller" depends on ARCH_LPC32XX diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 118a1349aad3..921634ba400c 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_MTD_NAND_OMAP2) += omap2_nand.o obj-$(CONFIG_MTD_NAND_OMAP_BCH_BUILD) += omap_elm.o obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o obj-$(CONFIG_MTD_NAND_PXA3xx) += pxa3xx_nand.o +obj-$(CONFIG_MTD_NAND_MARVELL) += marvell_nand.o obj-$(CONFIG_MTD_NAND_TMIO) += tmio_nand.o obj-$(CONFIG_MTD_NAND_PLATFORM) += plat_nand.o obj-$(CONFIG_MTD_NAND_PASEMI) += pasemi_nand.o diff --git a/drivers/mtd/nand/marvell_nand.c b/drivers/mtd/nand/marvell_nand.c new file mode 100644 index 000000000000..b8fec6093b75 --- /dev/null +++ b/drivers/mtd/nand/marvell_nand.c @@ -0,0 +1,2898 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Marvell NAND flash controller driver + * + * Copyright (C) 2017 Marvell + * Author: Miquel RAYNAL + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Data FIFO granularity, FIFO reads/writes must be a multiple of this length */ +#define FIFO_DEPTH 8 +#define FIFO_REP(x) (x / sizeof(u32)) +#define BCH_SEQ_READS (32 / FIFO_DEPTH) +/* NFC does not support transfers of larger chunks at a time */ +#define MAX_CHUNK_SIZE 2112 +/* NFCv1 cannot read more that 7 bytes of ID */ +#define NFCV1_READID_LEN 7 +/* Polling is done at a pace of POLL_PERIOD us until POLL_TIMEOUT is reached */ +#define POLL_PERIOD 0 +#define POLL_TIMEOUT 100000 +/* Interrupt maximum wait period in ms */ +#define IRQ_TIMEOUT 1000 +/* Latency in clock cycles between SoC pins and NFC logic */ +#define MIN_RD_DEL_CNT 3 +/* Maximum number of contiguous address cycles */ +#define MAX_ADDRESS_CYC_NFCV1 5 +#define MAX_ADDRESS_CYC_NFCV2 7 +/* System control registers/bits to enable the NAND controller on some SoCs */ +#define GENCONF_SOC_DEVICE_MUX 0x208 +#define GENCONF_SOC_DEVICE_MUX_NFC_EN BIT(0) +#define GENCONF_SOC_DEVICE_MUX_ECC_CLK_RST BIT(20) +#define GENCONF_SOC_DEVICE_MUX_ECC_CORE_RST BIT(21) +#define GENCONF_SOC_DEVICE_MUX_NFC_INT_EN BIT(25) +#define GENCONF_CLK_GATING_CTRL 0x220 +#define GENCONF_CLK_GATING_CTRL_ND_GATE BIT(2) +#define GENCONF_ND_CLK_CTRL 0x700 +#define GENCONF_ND_CLK_CTRL_EN BIT(0) + +/* NAND controller data flash control register */ +#define NDCR 0x00 +#define NDCR_ALL_INT GENMASK(11, 0) +#define NDCR_CS1_CMDDM BIT(7) +#define NDCR_CS0_CMDDM BIT(8) +#define NDCR_RDYM BIT(11) +#define NDCR_ND_ARB_EN BIT(12) +#define NDCR_RA_START BIT(15) +#define NDCR_RD_ID_CNT(x) (min_t(unsigned int, x, 0x7) << 16) +#define NDCR_PAGE_SZ(x) (x >= 2048 ? BIT(24) : 0) +#define NDCR_DWIDTH_M BIT(26) +#define NDCR_DWIDTH_C BIT(27) +#define NDCR_ND_RUN BIT(28) +#define NDCR_DMA_EN BIT(29) +#define NDCR_ECC_EN BIT(30) +#define NDCR_SPARE_EN BIT(31) +#define NDCR_GENERIC_FIELDS_MASK (~(NDCR_RA_START | NDCR_PAGE_SZ(2048) | \ + NDCR_DWIDTH_M | NDCR_DWIDTH_C)) + +/* NAND interface timing parameter 0 register */ +#define NDTR0 0x04 +#define NDTR0_TRP(x) ((min_t(unsigned int, x, 0xF) & 0x7) << 0) +#define NDTR0_TRH(x) (min_t(unsigned int, x, 0x7) << 3) +#define NDTR0_ETRP(x) ((min_t(unsigned int, x, 0xF) & 0x8) << 3) +#define NDTR0_SEL_NRE_EDGE BIT(7) +#define NDTR0_TWP(x) (min_t(unsigned int, x, 0x7) << 8) +#define NDTR0_TWH(x) (min_t(unsigned int, x, 0x7) << 11) +#define NDTR0_TCS(x) (min_t(unsigned int, x, 0x7) << 16) +#define NDTR0_TCH(x) (min_t(unsigned int, x, 0x7) << 19) +#define NDTR0_RD_CNT_DEL(x) (min_t(unsigned int, x, 0xF) << 22) +#define NDTR0_SELCNTR BIT(26) +#define NDTR0_TADL(x) (min_t(unsigned int, x, 0x1F) << 27) + +/* NAND interface timing parameter 1 register */ +#define NDTR1 0x0C +#define NDTR1_TAR(x) (min_t(unsigned int, x, 0xF) << 0) +#define NDTR1_TWHR(x) (min_t(unsigned int, x, 0xF) << 4) +#define NDTR1_TRHW(x) (min_t(unsigned int, x / 16, 0x3) << 8) +#define NDTR1_PRESCALE BIT(14) +#define NDTR1_WAIT_MODE BIT(15) +#define NDTR1_TR(x) (min_t(unsigned int, x, 0xFFFF) << 16) + +/* NAND controller status register */ +#define NDSR 0x14 +#define NDSR_WRCMDREQ BIT(0) +#define NDSR_RDDREQ BIT(1) +#define NDSR_WRDREQ BIT(2) +#define NDSR_CORERR BIT(3) +#define NDSR_UNCERR BIT(4) +#define NDSR_CMDD(cs) BIT(8 - cs) +#define NDSR_RDY(rb) BIT(11 + rb) +#define NDSR_ERRCNT(x) ((x >> 16) & 0x1F) + +/* NAND ECC control register */ +#define NDECCCTRL 0x28 +#define NDECCCTRL_BCH_EN BIT(0) + +/* NAND controller data buffer register */ +#define NDDB 0x40 + +/* NAND controller command buffer 0 register */ +#define NDCB0 0x48 +#define NDCB0_CMD1(x) ((x & 0xFF) << 0) +#define NDCB0_CMD2(x) ((x & 0xFF) << 8) +#define NDCB0_ADDR_CYC(x) ((x & 0x7) << 16) +#define NDCB0_ADDR_GET_NUM_CYC(x) (((x) >> 16) & 0x7) +#define NDCB0_DBC BIT(19) +#define NDCB0_CMD_TYPE(x) ((x & 0x7) << 21) +#define NDCB0_CSEL BIT(24) +#define NDCB0_RDY_BYP BIT(27) +#define NDCB0_LEN_OVRD BIT(28) +#define NDCB0_CMD_XTYPE(x) ((x & 0x7) << 29) + +/* NAND controller command buffer 1 register */ +#define NDCB1 0x4C +#define NDCB1_COLS(x) ((x & 0xFFFF) << 0) +#define NDCB1_ADDRS_PAGE(x) (x << 16) + +/* NAND controller command buffer 2 register */ +#define NDCB2 0x50 +#define NDCB2_ADDR5_PAGE(x) (((x >> 16) & 0xFF) << 0) +#define NDCB2_ADDR5_CYC(x) ((x & 0xFF) << 0) + +/* NAND controller command buffer 3 register */ +#define NDCB3 0x54 +#define NDCB3_ADDR6_CYC(x) ((x & 0xFF) << 16) +#define NDCB3_ADDR7_CYC(x) ((x & 0xFF) << 24) + +/* NAND controller command buffer 0 register 'type' and 'xtype' fields */ +#define TYPE_READ 0 +#define TYPE_WRITE 1 +#define TYPE_ERASE 2 +#define TYPE_READ_ID 3 +#define TYPE_STATUS 4 +#define TYPE_RESET 5 +#define TYPE_NAKED_CMD 6 +#define TYPE_NAKED_ADDR 7 +#define TYPE_MASK 7 +#define XTYPE_MONOLITHIC_RW 0 +#define XTYPE_LAST_NAKED_RW 1 +#define XTYPE_FINAL_COMMAND 3 +#define XTYPE_READ 4 +#define XTYPE_WRITE_DISPATCH 4 +#define XTYPE_NAKED_RW 5 +#define XTYPE_COMMAND_DISPATCH 6 +#define XTYPE_MASK 7 + +/** + * Marvell ECC engine works differently than the others, in order to limit the + * size of the IP, hardware engineers chose to set a fixed strength at 16 bits + * per subpage, and depending on a the desired strength needed by the NAND chip, + * a particular layout mixing data/spare/ecc is defined, with a possible last + * chunk smaller that the others. + * + * @writesize: Full page size on which the layout applies + * @chunk: Desired ECC chunk size on which the layout applies + * @strength: Desired ECC strength (per chunk size bytes) on which the + * layout applies + * @nchunks: Total number of chunks + * @full_chunk_cnt: Number of full-sized chunks, which is the number of + * repetitions of the pattern: + * (data_bytes + spare_bytes + ecc_bytes). + * @data_bytes: Number of data bytes per chunk + * @spare_bytes: Number of spare bytes per chunk + * @ecc_bytes: Number of ecc bytes per chunk + * @last_data_bytes: Number of data bytes in the last chunk + * @last_spare_bytes: Number of spare bytes in the last chunk + * @last_ecc_bytes: Number of ecc bytes in the last chunk + */ +struct marvell_hw_ecc_layout { + /* Constraints */ + int writesize; + int chunk; + int strength; + /* Corresponding layout */ + int nchunks; + int full_chunk_cnt; + int data_bytes; + int spare_bytes; + int ecc_bytes; + int last_data_bytes; + int last_spare_bytes; + int last_ecc_bytes; +}; + +#define MARVELL_LAYOUT(ws, dc, ds, nc, fcc, db, sb, eb, ldb, lsb, leb) \ + { \ + .writesize = ws, \ + .chunk = dc, \ + .strength = ds, \ + .nchunks = nc, \ + .full_chunk_cnt = fcc, \ + .data_bytes = db, \ + .spare_bytes = sb, \ + .ecc_bytes = eb, \ + .last_data_bytes = ldb, \ + .last_spare_bytes = lsb, \ + .last_ecc_bytes = leb, \ + } + +/* Layouts explained in AN-379_Marvell_SoC_NFC_ECC */ +static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = { + MARVELL_LAYOUT( 512, 512, 1, 1, 1, 512, 8, 8, 0, 0, 0), + MARVELL_LAYOUT( 2048, 512, 1, 1, 1, 2048, 40, 24, 0, 0, 0), + MARVELL_LAYOUT( 2048, 512, 4, 1, 1, 2048, 32, 30, 0, 0, 0), + MARVELL_LAYOUT( 4096, 512, 4, 2, 2, 2048, 32, 30, 0, 0, 0), + MARVELL_LAYOUT( 4096, 512, 8, 5, 4, 1024, 0, 30, 0, 64, 30), +}; + +/** + * The Nand Flash Controller has up to 4 CE and 2 RB pins. The CE selection + * is made by a field in NDCB0 register, and in another field in NDCB2 register. + * The datasheet describes the logic with an error: ADDR5 field is once + * declared at the beginning of NDCB2, and another time at its end. Because the + * ADDR5 field of NDCB2 may be used by other bytes, it would be more logical + * to use the last bit of this field instead of the first ones. + * + * @cs: Wanted CE lane. + * @ndcb0_csel: Value of the NDCB0 register with or without the flag + * selecting the wanted CE lane. This is set once when + * the Device Tree is probed. + * @rb: Ready/Busy pin for the flash chip + */ +struct marvell_nand_chip_sel { + unsigned int cs; + u32 ndcb0_csel; + unsigned int rb; +}; + +/** + * NAND chip structure: stores NAND chip device related information + * + * @chip: Base NAND chip structure + * @node: Used to store NAND chips into a list + * @layout NAND layout when using hardware ECC + * @ndcr: Controller register value for this NAND chip + * @ndtr0: Timing registers 0 value for this NAND chip + * @ndtr1: Timing registers 1 value for this NAND chip + * @selected_die: Current active CS + * @nsels: Number of CS lines required by the NAND chip + * @sels: Array of CS lines descriptions + */ +struct marvell_nand_chip { + struct nand_chip chip; + struct list_head node; + const struct marvell_hw_ecc_layout *layout; + u32 ndcr; + u32 ndtr0; + u32 ndtr1; + int addr_cyc; + int selected_die; + unsigned int nsels; + struct marvell_nand_chip_sel sels[0]; +}; + +static inline struct marvell_nand_chip *to_marvell_nand(struct nand_chip *chip) +{ + return container_of(chip, struct marvell_nand_chip, chip); +} + +static inline struct marvell_nand_chip_sel *to_nand_sel(struct marvell_nand_chip + *nand) +{ + return &nand->sels[nand->selected_die]; +} + +/** + * NAND controller capabilities for distinction between compatible strings + * + * @max_cs_nb: Number of Chip Select lines available + * @max_rb_nb: Number of Ready/Busy lines available + * @need_system_controller: Indicates if the SoC needs to have access to the + * system controller (ie. to enable the NAND controller) + * @legacy_of_bindings: Indicates if DT parsing must be done using the old + * fashion way + * @is_nfcv2: NFCv2 has numerous enhancements compared to NFCv1, ie. + * BCH error detection and correction algorithm, + * NDCB3 register has been added + * @use_dma: Use dma for data transfers + */ +struct marvell_nfc_caps { + unsigned int max_cs_nb; + unsigned int max_rb_nb; + bool need_system_controller; + bool legacy_of_bindings; + bool is_nfcv2; + bool use_dma; +}; + +/** + * NAND controller structure: stores Marvell NAND controller information + * + * @controller: Base controller structure + * @dev: Parent device (used to print error messages) + * @regs: NAND controller registers + * @ecc_clk: ECC block clock, two times the NAND controller clock + * @complete: Completion object to wait for NAND controller events + * @assigned_cs: Bitmask describing already assigned CS lines + * @chips: List containing all the NAND chips attached to + * this NAND controller + * @caps: NAND controller capabilities for each compatible string + * @dma_chan: DMA channel (NFCv1 only) + * @dma_buf: 32-bit aligned buffer for DMA transfers (NFCv1 only) + */ +struct marvell_nfc { + struct nand_hw_control controller; + struct device *dev; + void __iomem *regs; + struct clk *ecc_clk; + struct completion complete; + unsigned long assigned_cs; + struct list_head chips; + struct nand_chip *selected_chip; + const struct marvell_nfc_caps *caps; + + /* DMA (NFCv1 only) */ + bool use_dma; + struct dma_chan *dma_chan; + u8 *dma_buf; +}; + +static inline struct marvell_nfc *to_marvell_nfc(struct nand_hw_control *ctrl) +{ + return container_of(ctrl, struct marvell_nfc, controller); +} + +/** + * NAND controller timings expressed in NAND Controller clock cycles + * + * @tRP: ND_nRE pulse width + * @tRH: ND_nRE high duration + * @tWP: ND_nWE pulse time + * @tWH: ND_nWE high duration + * @tCS: Enable signal setup time + * @tCH: Enable signal hold time + * @tADL: Address to write data delay + * @tAR: ND_ALE low to ND_nRE low delay + * @tWHR: ND_nWE high to ND_nRE low for status read + * @tRHW: ND_nRE high duration, read to write delay + * @tR: ND_nWE high to ND_nRE low for read + */ +struct marvell_nfc_timings { + /* NDTR0 fields */ + unsigned int tRP; + unsigned int tRH; + unsigned int tWP; + unsigned int tWH; + unsigned int tCS; + unsigned int tCH; + unsigned int tADL; + /* NDTR1 fields */ + unsigned int tAR; + unsigned int tWHR; + unsigned int tRHW; + unsigned int tR; +}; + +/** + * Derives a duration in numbers of clock cycles. + * + * @ps: Duration in pico-seconds + * @period_ns: Clock period in nano-seconds + * + * Convert the duration in nano-seconds, then divide by the period and + * return the number of clock periods. + */ +#define TO_CYCLES(ps, period_ns) (DIV_ROUND_UP(ps / 1000, period_ns)) + +/** + * NAND driver structure filled during the parsing of the ->exec_op() subop + * subset of instructions. + * + * @ndcb: Array of values written to NDCBx registers + * @cle_ale_delay_ns: Optional delay after the last CMD or ADDR cycle + * @rdy_timeout_ms: Timeout for waits on Ready/Busy pin + * @rdy_delay_ns: Optional delay after waiting for the RB pin + * @data_delay_ns: Optional delay after the data xfer + * @data_instr_idx: Index of the data instruction in the subop + * @data_instr: Pointer to the data instruction in the subop + */ +struct marvell_nfc_op { + u32 ndcb[4]; + unsigned int cle_ale_delay_ns; + unsigned int rdy_timeout_ms; + unsigned int rdy_delay_ns; + unsigned int data_delay_ns; + unsigned int data_instr_idx; + const struct nand_op_instr *data_instr; +}; + +/* + * Internal helper to conditionnally apply a delay (from the above structure, + * most of the time). + */ +static void cond_delay(unsigned int ns) +{ + if (!ns) + return; + + if (ns < 10000) + ndelay(ns); + else + udelay(DIV_ROUND_UP(ns, 1000)); +} + +/* + * The controller has many flags that could generate interrupts, most of them + * are disabled and polling is used. For the very slow signals, using interrupts + * may relax the CPU charge. + */ +static void marvell_nfc_disable_int(struct marvell_nfc *nfc, u32 int_mask) +{ + u32 reg; + + /* Writing 1 disables the interrupt */ + reg = readl_relaxed(nfc->regs + NDCR); + writel_relaxed(reg | int_mask, nfc->regs + NDCR); +} + +static void marvell_nfc_enable_int(struct marvell_nfc *nfc, u32 int_mask) +{ + u32 reg; + + /* Writing 0 enables the interrupt */ + reg = readl_relaxed(nfc->regs + NDCR); + writel_relaxed(reg & ~int_mask, nfc->regs + NDCR); +} + +static void marvell_nfc_clear_int(struct marvell_nfc *nfc, u32 int_mask) +{ + writel_relaxed(int_mask, nfc->regs + NDSR); +} + +static void marvell_nfc_force_byte_access(struct nand_chip *chip, + bool force_8bit) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 ndcr; + + /* + * Callers of this function do not verify if the NAND is using a 16-bit + * an 8-bit bus for normal operations, so we need to take care of that + * here by leaving the configuration unchanged if the NAND does not have + * the NAND_BUSWIDTH_16 flag set. + */ + if (!(chip->options & NAND_BUSWIDTH_16)) + return; + + ndcr = readl_relaxed(nfc->regs + NDCR); + + if (force_8bit) + ndcr &= ~(NDCR_DWIDTH_M | NDCR_DWIDTH_C); + else + ndcr |= NDCR_DWIDTH_M | NDCR_DWIDTH_C; + + writel_relaxed(ndcr, nfc->regs + NDCR); +} + +static int marvell_nfc_wait_ndrun(struct nand_chip *chip) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 val; + int ret; + + /* + * The command is being processed, wait for the ND_RUN bit to be + * cleared by the NFC. If not, we must clear it by hand. + */ + ret = readl_relaxed_poll_timeout(nfc->regs + NDCR, val, + (val & NDCR_ND_RUN) == 0, + POLL_PERIOD, POLL_TIMEOUT); + if (ret) { + dev_err(nfc->dev, "Timeout on NAND controller run mode\n"); + writel_relaxed(readl(nfc->regs + NDCR) & ~NDCR_ND_RUN, + nfc->regs + NDCR); + return ret; + } + + return 0; +} + +/* + * Any time a command has to be sent to the controller, the following sequence + * has to be followed: + * - call marvell_nfc_prepare_cmd() + * -> activate the ND_RUN bit that will kind of 'start a job' + * -> wait the signal indicating the NFC is waiting for a command + * - send the command (cmd and address cycles) + * - enventually send or receive the data + * - call marvell_nfc_end_cmd() with the corresponding flag + * -> wait the flag to be triggered or cancel the job with a timeout + * + * The following helpers are here to factorize the code a bit so that + * specialized functions responsible for executing the actual NAND + * operations do not have to replicate the same code blocks. + */ +static int marvell_nfc_prepare_cmd(struct nand_chip *chip) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 ndcr, val; + int ret; + + /* Poll ND_RUN and clear NDSR before issuing any command */ + ret = marvell_nfc_wait_ndrun(chip); + if (ret) { + dev_err(nfc->dev, "Last operation did not suceed\n"); + return ret; + } + + ndcr = readl_relaxed(nfc->regs + NDCR); + writel_relaxed(readl(nfc->regs + NDSR), nfc->regs + NDSR); + + /* Assert ND_RUN bit and wait the NFC to be ready */ + writel_relaxed(ndcr | NDCR_ND_RUN, nfc->regs + NDCR); + ret = readl_relaxed_poll_timeout(nfc->regs + NDSR, val, + val & NDSR_WRCMDREQ, + POLL_PERIOD, POLL_TIMEOUT); + if (ret) { + dev_err(nfc->dev, "Timeout on WRCMDRE\n"); + return -ETIMEDOUT; + } + + /* Command may be written, clear WRCMDREQ status bit */ + writel_relaxed(NDSR_WRCMDREQ, nfc->regs + NDSR); + + return 0; +} + +static void marvell_nfc_send_cmd(struct nand_chip *chip, + struct marvell_nfc_op *nfc_op) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + + dev_dbg(nfc->dev, "\nNDCR: 0x%08x\n" + "NDCB0: 0x%08x\nNDCB1: 0x%08x\nNDCB2: 0x%08x\nNDCB3: 0x%08x\n", + (u32)readl_relaxed(nfc->regs + NDCR), nfc_op->ndcb[0], + nfc_op->ndcb[1], nfc_op->ndcb[2], nfc_op->ndcb[3]); + + writel_relaxed(to_nand_sel(marvell_nand)->ndcb0_csel | nfc_op->ndcb[0], + nfc->regs + NDCB0); + writel_relaxed(nfc_op->ndcb[1], nfc->regs + NDCB0); + writel(nfc_op->ndcb[2], nfc->regs + NDCB0); + + /* + * Write NDCB0 four times only if LEN_OVRD is set or if ADDR6 or ADDR7 + * fields are used (only available on NFCv2). + */ + if (nfc_op->ndcb[0] & NDCB0_LEN_OVRD || + NDCB0_ADDR_GET_NUM_CYC(nfc_op->ndcb[0]) >= 6) { + if (!WARN_ON_ONCE(!nfc->caps->is_nfcv2)) + writel(nfc_op->ndcb[3], nfc->regs + NDCB0); + } +} + +static int marvell_nfc_end_cmd(struct nand_chip *chip, int flag, + const char *label) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 val; + int ret; + + ret = readl_relaxed_poll_timeout(nfc->regs + NDSR, val, + val & flag, + POLL_PERIOD, POLL_TIMEOUT); + + if (ret) { + dev_err(nfc->dev, "Timeout on %s (NDSR: 0x%08x)\n", + label, val); + if (nfc->dma_chan) + dmaengine_terminate_all(nfc->dma_chan); + return ret; + } + + /* + * DMA function uses this helper to poll on CMDD bits without wanting + * them to be cleared. + */ + if (nfc->use_dma && (readl_relaxed(nfc->regs + NDCR) & NDCR_DMA_EN)) + return 0; + + writel_relaxed(flag, nfc->regs + NDSR); + + return 0; +} + +static int marvell_nfc_wait_cmdd(struct nand_chip *chip) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + int cs_flag = NDSR_CMDD(to_nand_sel(marvell_nand)->ndcb0_csel); + + return marvell_nfc_end_cmd(chip, cs_flag, "CMDD"); +} + +static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + int ret; + + /* Timeout is expressed in ms */ + if (!timeout_ms) + timeout_ms = IRQ_TIMEOUT; + + init_completion(&nfc->complete); + + marvell_nfc_enable_int(nfc, NDCR_RDYM); + ret = wait_for_completion_timeout(&nfc->complete, + msecs_to_jiffies(timeout_ms)); + marvell_nfc_disable_int(nfc, NDCR_RDYM); + marvell_nfc_clear_int(nfc, NDSR_RDY(0) | NDSR_RDY(1)); + if (!ret) { + dev_err(nfc->dev, "Timeout waiting for RB signal\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void marvell_nfc_select_chip(struct mtd_info *mtd, int die_nr) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 ndcr_generic; + + if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die) + return; + + if (die_nr < 0 || die_nr >= marvell_nand->nsels) { + nfc->selected_chip = NULL; + marvell_nand->selected_die = -1; + return; + } + + /* + * Do not change the timing registers when using the DT property + * marvell,nand-keep-config; in that case ->ndtr0 and ->ndtr1 from the + * marvell_nand structure are supposedly empty. + */ + writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0); + writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1); + + /* + * Reset the NDCR register to a clean state for this particular chip, + * also clear ND_RUN bit. + */ + ndcr_generic = readl_relaxed(nfc->regs + NDCR) & + NDCR_GENERIC_FIELDS_MASK & ~NDCR_ND_RUN; + writel_relaxed(ndcr_generic | marvell_nand->ndcr, nfc->regs + NDCR); + + /* Also reset the interrupt status register */ + marvell_nfc_clear_int(nfc, NDCR_ALL_INT); + + nfc->selected_chip = chip; + marvell_nand->selected_die = die_nr; +} + +static irqreturn_t marvell_nfc_isr(int irq, void *dev_id) +{ + struct marvell_nfc *nfc = dev_id; + u32 st = readl_relaxed(nfc->regs + NDSR); + u32 ien = (~readl_relaxed(nfc->regs + NDCR)) & NDCR_ALL_INT; + + /* + * RDY interrupt mask is one bit in NDCR while there are two status + * bit in NDSR (RDY[cs0/cs2] and RDY[cs1/cs3]). + */ + if (st & NDSR_RDY(1)) + st |= NDSR_RDY(0); + + if (!(st & ien)) + return IRQ_NONE; + + marvell_nfc_disable_int(nfc, st & NDCR_ALL_INT); + + if (!(st & (NDSR_RDDREQ | NDSR_WRDREQ | NDSR_WRCMDREQ))) + complete(&nfc->complete); + + return IRQ_HANDLED; +} + +/* HW ECC related functions */ +static void marvell_nfc_enable_hw_ecc(struct nand_chip *chip) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 ndcr = readl_relaxed(nfc->regs + NDCR); + + if (!(ndcr & NDCR_ECC_EN)) { + writel_relaxed(ndcr | NDCR_ECC_EN, nfc->regs + NDCR); + + /* + * When enabling BCH, set threshold to 0 to always know the + * number of corrected bitflips. + */ + if (chip->ecc.algo == NAND_ECC_BCH) + writel_relaxed(NDECCCTRL_BCH_EN, nfc->regs + NDECCCTRL); + } +} + +static void marvell_nfc_disable_hw_ecc(struct nand_chip *chip) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + u32 ndcr = readl_relaxed(nfc->regs + NDCR); + + if (ndcr & NDCR_ECC_EN) { + writel_relaxed(ndcr & ~NDCR_ECC_EN, nfc->regs + NDCR); + if (chip->ecc.algo == NAND_ECC_BCH) + writel_relaxed(0, nfc->regs + NDECCCTRL); + } +} + +/* DMA related helpers */ +static void marvell_nfc_enable_dma(struct marvell_nfc *nfc) +{ + u32 reg; + + reg = readl_relaxed(nfc->regs + NDCR); + writel_relaxed(reg | NDCR_DMA_EN, nfc->regs + NDCR); +} + +static void marvell_nfc_disable_dma(struct marvell_nfc *nfc) +{ + u32 reg; + + reg = readl_relaxed(nfc->regs + NDCR); + writel_relaxed(reg & ~NDCR_DMA_EN, nfc->regs + NDCR); +} + +/* Read/write PIO/DMA accessors */ +static int marvell_nfc_xfer_data_dma(struct marvell_nfc *nfc, + enum dma_data_direction direction, + unsigned int len) +{ + unsigned int dma_len = min_t(int, ALIGN(len, 32), MAX_CHUNK_SIZE); + struct dma_async_tx_descriptor *tx; + struct scatterlist sg; + dma_cookie_t cookie; + int ret; + + marvell_nfc_enable_dma(nfc); + /* Prepare the DMA transfer */ + sg_init_one(&sg, nfc->dma_buf, dma_len); + dma_map_sg(nfc->dma_chan->device->dev, &sg, 1, direction); + tx = dmaengine_prep_slave_sg(nfc->dma_chan, &sg, 1, + direction == DMA_FROM_DEVICE ? + DMA_DEV_TO_MEM : DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(nfc->dev, "Could not prepare DMA S/G list\n"); + return -ENXIO; + } + + /* Do the task and wait for it to finish */ + cookie = dmaengine_submit(tx); + ret = dma_submit_error(cookie); + if (ret) + return -EIO; + + dma_async_issue_pending(nfc->dma_chan); + ret = marvell_nfc_wait_cmdd(nfc->selected_chip); + dma_unmap_sg(nfc->dma_chan->device->dev, &sg, 1, direction); + marvell_nfc_disable_dma(nfc); + if (ret) { + dev_err(nfc->dev, "Timeout waiting for DMA (status: %d)\n", + dmaengine_tx_status(nfc->dma_chan, cookie, NULL)); + dmaengine_terminate_all(nfc->dma_chan); + return -ETIMEDOUT; + } + + return 0; +} + +static int marvell_nfc_xfer_data_in_pio(struct marvell_nfc *nfc, u8 *in, + unsigned int len) +{ + unsigned int last_len = len % FIFO_DEPTH; + unsigned int last_full_offset = round_down(len, FIFO_DEPTH); + int i; + + for (i = 0; i < last_full_offset; i += FIFO_DEPTH) + ioread32_rep(nfc->regs + NDDB, in + i, FIFO_REP(FIFO_DEPTH)); + + if (last_len) { + u8 tmp_buf[FIFO_DEPTH]; + + ioread32_rep(nfc->regs + NDDB, tmp_buf, FIFO_REP(FIFO_DEPTH)); + memcpy(in + last_full_offset, tmp_buf, last_len); + } + + return 0; +} + +static int marvell_nfc_xfer_data_out_pio(struct marvell_nfc *nfc, const u8 *out, + unsigned int len) +{ + unsigned int last_len = len % FIFO_DEPTH; + unsigned int last_full_offset = round_down(len, FIFO_DEPTH); + int i; + + for (i = 0; i < last_full_offset; i += FIFO_DEPTH) + iowrite32_rep(nfc->regs + NDDB, out + i, FIFO_REP(FIFO_DEPTH)); + + if (last_len) { + u8 tmp_buf[FIFO_DEPTH]; + + memcpy(tmp_buf, out + last_full_offset, last_len); + iowrite32_rep(nfc->regs + NDDB, tmp_buf, FIFO_REP(FIFO_DEPTH)); + } + + return 0; +} + +static void marvell_nfc_check_empty_chunk(struct nand_chip *chip, + u8 *data, int data_len, + u8 *spare, int spare_len, + u8 *ecc, int ecc_len, + unsigned int *max_bitflips) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + int bf; + + /* + * Blank pages (all 0xFF) that have not been written may be recognized + * as bad if bitflips occur, so whenever an uncorrectable error occurs, + * check if the entire page (with ECC bytes) is actually blank or not. + */ + if (!data) + data_len = 0; + if (!spare) + spare_len = 0; + if (!ecc) + ecc_len = 0; + + bf = nand_check_erased_ecc_chunk(data, data_len, ecc, ecc_len, + spare, spare_len, chip->ecc.strength); + if (bf < 0) { + mtd->ecc_stats.failed++; + return; + } + + /* Update the stats and max_bitflips */ + mtd->ecc_stats.corrected += bf; + *max_bitflips = max_t(unsigned int, *max_bitflips, bf); +} + +/* + * Check a chunk is correct or not according to hardware ECC engine. + * mtd->ecc_stats.corrected is updated, as well as max_bitflips, however + * mtd->ecc_stats.failure is not, the function will instead return a non-zero + * value indicating that a check on the emptyness of the subpage must be + * performed before declaring the subpage corrupted. + */ +static int marvell_nfc_hw_ecc_correct(struct nand_chip *chip, + unsigned int *max_bitflips) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + int bf = 0; + u32 ndsr; + + ndsr = readl_relaxed(nfc->regs + NDSR); + + /* Check uncorrectable error flag */ + if (ndsr & NDSR_UNCERR) { + writel_relaxed(ndsr, nfc->regs + NDSR); + + /* + * Do not increment ->ecc_stats.failed now, instead, return a + * non-zero value to indicate that this chunk was apparently + * bad, and it should be check to see if it empty or not. If + * the chunk (with ECC bytes) is not declared empty, the calling + * function must increment the failure count. + */ + return -EBADMSG; + } + + /* Check correctable error flag */ + if (ndsr & NDSR_CORERR) { + writel_relaxed(ndsr, nfc->regs + NDSR); + + if (chip->ecc.algo == NAND_ECC_BCH) + bf = NDSR_ERRCNT(ndsr); + else + bf = 1; + } + + /* Update the stats and max_bitflips */ + mtd->ecc_stats.corrected += bf; + *max_bitflips = max_t(unsigned int, *max_bitflips, bf); + + return 0; +} + +/* Hamming read helpers */ +static int marvell_nfc_hw_ecc_hmg_do_read_page(struct nand_chip *chip, + u8 *data_buf, u8 *oob_buf, + bool raw, int page) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + struct marvell_nfc_op nfc_op = { + .ndcb[0] = NDCB0_CMD_TYPE(TYPE_READ) | + NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | + NDCB0_DBC | + NDCB0_CMD1(NAND_CMD_READ0) | + NDCB0_CMD2(NAND_CMD_READSTART), + .ndcb[1] = NDCB1_ADDRS_PAGE(page), + .ndcb[2] = NDCB2_ADDR5_PAGE(page), + }; + unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0); + int ret; + + /* NFCv2 needs more information about the operation being executed */ + if (nfc->caps->is_nfcv2) + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_RDDREQ, + "RDDREQ while draining FIFO (data/oob)"); + if (ret) + return ret; + + /* + * Read the page then the OOB area. Unlike what is shown in current + * documentation, spare bytes are protected by the ECC engine, and must + * be at the beginning of the OOB area or running this driver on legacy + * systems will prevent the discovery of the BBM/BBT. + */ + if (nfc->use_dma) { + marvell_nfc_xfer_data_dma(nfc, DMA_FROM_DEVICE, + lt->data_bytes + oob_bytes); + memcpy(data_buf, nfc->dma_buf, lt->data_bytes); + memcpy(oob_buf, nfc->dma_buf + lt->data_bytes, oob_bytes); + } else { + marvell_nfc_xfer_data_in_pio(nfc, data_buf, lt->data_bytes); + marvell_nfc_xfer_data_in_pio(nfc, oob_buf, oob_bytes); + } + + ret = marvell_nfc_wait_cmdd(chip); + + return ret; +} + +static int marvell_nfc_hw_ecc_hmg_read_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, u8 *buf, + int oob_required, int page) +{ + return marvell_nfc_hw_ecc_hmg_do_read_page(chip, buf, chip->oob_poi, + true, page); +} + +static int marvell_nfc_hw_ecc_hmg_read_page(struct mtd_info *mtd, + struct nand_chip *chip, + u8 *buf, int oob_required, + int page) +{ + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + unsigned int full_sz = lt->data_bytes + lt->spare_bytes + lt->ecc_bytes; + int max_bitflips = 0, ret; + u8 *raw_buf; + + marvell_nfc_enable_hw_ecc(chip); + marvell_nfc_hw_ecc_hmg_do_read_page(chip, buf, chip->oob_poi, false, + page); + ret = marvell_nfc_hw_ecc_correct(chip, &max_bitflips); + marvell_nfc_disable_hw_ecc(chip); + + if (!ret) + return max_bitflips; + + /* + * When ECC failures are detected, check if the full page has been + * written or not. Ignore the failure if it is actually empty. + */ + raw_buf = kmalloc(full_sz, GFP_KERNEL); + if (!raw_buf) + return -ENOMEM; + + marvell_nfc_hw_ecc_hmg_do_read_page(chip, raw_buf, raw_buf + + lt->data_bytes, true, page); + marvell_nfc_check_empty_chunk(chip, raw_buf, full_sz, NULL, 0, NULL, 0, + &max_bitflips); + kfree(raw_buf); + + return max_bitflips; +} + +/* + * Spare area in Hamming layouts is not protected by the ECC engine (even if + * it appears before the ECC bytes when reading), the ->read_oob_raw() function + * also stands for ->read_oob(). + */ +static int marvell_nfc_hw_ecc_hmg_read_oob_raw(struct mtd_info *mtd, + struct nand_chip *chip, int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + return marvell_nfc_hw_ecc_hmg_do_read_page(chip, chip->data_buf, + chip->oob_poi, true, page); +} + +/* Hamming write helpers */ +static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, + const u8 *data_buf, + const u8 *oob_buf, bool raw, + int page) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + struct marvell_nfc_op nfc_op = { + .ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | + NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | + NDCB0_CMD1(NAND_CMD_SEQIN) | + NDCB0_CMD2(NAND_CMD_PAGEPROG) | + NDCB0_DBC, + .ndcb[1] = NDCB1_ADDRS_PAGE(page), + .ndcb[2] = NDCB2_ADDR5_PAGE(page), + }; + unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0); + int ret; + + /* NFCv2 needs more information about the operation being executed */ + if (nfc->caps->is_nfcv2) + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_WRDREQ, + "WRDREQ while loading FIFO (data)"); + if (ret) + return ret; + + /* Write the page then the OOB area */ + if (nfc->use_dma) { + memcpy(nfc->dma_buf, data_buf, lt->data_bytes); + memcpy(nfc->dma_buf + lt->data_bytes, oob_buf, oob_bytes); + marvell_nfc_xfer_data_dma(nfc, DMA_TO_DEVICE, lt->data_bytes + + lt->ecc_bytes + lt->spare_bytes); + } else { + marvell_nfc_xfer_data_out_pio(nfc, data_buf, lt->data_bytes); + marvell_nfc_xfer_data_out_pio(nfc, oob_buf, oob_bytes); + } + + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + ret = marvell_nfc_wait_op(chip, + chip->data_interface.timings.sdr.tPROG_max); + return ret; +} + +static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, + const u8 *buf, + int oob_required, int page) +{ + return marvell_nfc_hw_ecc_hmg_do_write_page(chip, buf, chip->oob_poi, + true, page); +} + +static int marvell_nfc_hw_ecc_hmg_write_page(struct mtd_info *mtd, + struct nand_chip *chip, + const u8 *buf, + int oob_required, int page) +{ + int ret; + + marvell_nfc_enable_hw_ecc(chip); + ret = marvell_nfc_hw_ecc_hmg_do_write_page(chip, buf, chip->oob_poi, + false, page); + marvell_nfc_disable_hw_ecc(chip); + + return ret; +} + +/* + * Spare area in Hamming layouts is not protected by the ECC engine (even if + * it appears before the ECC bytes when reading), the ->write_oob_raw() function + * also stands for ->write_oob(). + */ +static int marvell_nfc_hw_ecc_hmg_write_oob_raw(struct mtd_info *mtd, + struct nand_chip *chip, + int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + memset(chip->data_buf, 0xFF, mtd->writesize); + + return marvell_nfc_hw_ecc_hmg_do_write_page(chip, chip->data_buf, + chip->oob_poi, true, page); +} + +/* BCH read helpers */ +static int marvell_nfc_hw_ecc_bch_read_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, u8 *buf, + int oob_required, int page) +{ + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + u8 *oob = chip->oob_poi; + int chunk_size = lt->data_bytes + lt->spare_bytes + lt->ecc_bytes; + int ecc_offset = (lt->full_chunk_cnt * lt->spare_bytes) + + lt->last_spare_bytes; + int data_len = lt->data_bytes; + int spare_len = lt->spare_bytes; + int ecc_len = lt->ecc_bytes; + int chunk; + + if (oob_required) + memset(chip->oob_poi, 0xFF, mtd->oobsize); + + nand_read_page_op(chip, page, 0, NULL, 0); + + for (chunk = 0; chunk < lt->nchunks; chunk++) { + /* Update last chunk length */ + if (chunk >= lt->full_chunk_cnt) { + data_len = lt->last_data_bytes; + spare_len = lt->last_spare_bytes; + ecc_len = lt->last_ecc_bytes; + } + + /* Read data bytes*/ + nand_change_read_column_op(chip, chunk * chunk_size, + buf + (lt->data_bytes * chunk), + data_len, false); + + /* Read spare bytes */ + nand_read_data_op(chip, oob + (lt->spare_bytes * chunk), + spare_len, false); + + /* Read ECC bytes */ + nand_read_data_op(chip, oob + ecc_offset + + (ALIGN(lt->ecc_bytes, 32) * chunk), + ecc_len, false); + } + + return 0; +} + +static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk, + u8 *data, unsigned int data_len, + u8 *spare, unsigned int spare_len, + int page) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + int i, ret; + struct marvell_nfc_op nfc_op = { + .ndcb[0] = NDCB0_CMD_TYPE(TYPE_READ) | + NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | + NDCB0_LEN_OVRD, + .ndcb[1] = NDCB1_ADDRS_PAGE(page), + .ndcb[2] = NDCB2_ADDR5_PAGE(page), + .ndcb[3] = data_len + spare_len, + }; + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return; + + if (chunk == 0) + nfc_op.ndcb[0] |= NDCB0_DBC | + NDCB0_CMD1(NAND_CMD_READ0) | + NDCB0_CMD2(NAND_CMD_READSTART); + + /* + * Trigger the naked read operation only on the last chunk. + * Otherwise, use monolithic read. + */ + if (lt->nchunks == 1 || (chunk < lt->nchunks - 1)) + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW); + else + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); + + marvell_nfc_send_cmd(chip, &nfc_op); + + /* + * According to the datasheet, when reading from NDDB + * with BCH enabled, after each 32 bytes reads, we + * have to make sure that the NDSR.RDDREQ bit is set. + * + * Drain the FIFO, 8 32-bit reads at a time, and skip + * the polling on the last read. + * + * Length is a multiple of 32 bytes, hence it is a multiple of 8 too. + */ + for (i = 0; i < data_len; i += FIFO_DEPTH * BCH_SEQ_READS) { + marvell_nfc_end_cmd(chip, NDSR_RDDREQ, + "RDDREQ while draining FIFO (data)"); + marvell_nfc_xfer_data_in_pio(nfc, data, + FIFO_DEPTH * BCH_SEQ_READS); + data += FIFO_DEPTH * BCH_SEQ_READS; + } + + for (i = 0; i < spare_len; i += FIFO_DEPTH * BCH_SEQ_READS) { + marvell_nfc_end_cmd(chip, NDSR_RDDREQ, + "RDDREQ while draining FIFO (OOB)"); + marvell_nfc_xfer_data_in_pio(nfc, spare, + FIFO_DEPTH * BCH_SEQ_READS); + spare += FIFO_DEPTH * BCH_SEQ_READS; + } +} + +static int marvell_nfc_hw_ecc_bch_read_page(struct mtd_info *mtd, + struct nand_chip *chip, + u8 *buf, int oob_required, + int page) +{ + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + int data_len = lt->data_bytes, spare_len = lt->spare_bytes, ecc_len; + u8 *data = buf, *spare = chip->oob_poi, *ecc; + int max_bitflips = 0; + u32 failure_mask = 0; + int chunk, ecc_offset_in_page, ret; + + /* + * With BCH, OOB is not fully used (and thus not read entirely), not + * expected bytes could show up at the end of the OOB buffer if not + * explicitly erased. + */ + if (oob_required) + memset(chip->oob_poi, 0xFF, mtd->oobsize); + + marvell_nfc_enable_hw_ecc(chip); + + for (chunk = 0; chunk < lt->nchunks; chunk++) { + /* Update length for the last chunk */ + if (chunk >= lt->full_chunk_cnt) { + data_len = lt->last_data_bytes; + spare_len = lt->last_spare_bytes; + } + + /* Read the chunk and detect number of bitflips */ + marvell_nfc_hw_ecc_bch_read_chunk(chip, chunk, data, data_len, + spare, spare_len, page); + ret = marvell_nfc_hw_ecc_correct(chip, &max_bitflips); + if (ret) + failure_mask |= BIT(chunk); + + data += data_len; + spare += spare_len; + } + + marvell_nfc_disable_hw_ecc(chip); + + if (!failure_mask) + return max_bitflips; + + /* + * Please note that dumping the ECC bytes during a normal read with OOB + * area would add a significant overhead as ECC bytes are "consumed" by + * the controller in normal mode and must be re-read in raw mode. To + * avoid dropping the performances, we prefer not to include them. The + * user should re-read the page in raw mode if ECC bytes are required. + * + * However, for any subpage read error reported by ->correct(), the ECC + * bytes must be read in raw mode and the full subpage must be checked + * to see if it is entirely empty of if there was an actual error. + */ + for (chunk = 0; chunk < lt->nchunks; chunk++) { + /* No failure reported for this chunk, move to the next one */ + if (!(failure_mask & BIT(chunk))) + continue; + + /* Derive ECC bytes positions (in page/buffer) and length */ + ecc = chip->oob_poi + + (lt->full_chunk_cnt * lt->spare_bytes) + + lt->last_spare_bytes + + (chunk * ALIGN(lt->ecc_bytes, 32)); + ecc_offset_in_page = + (chunk * (lt->data_bytes + lt->spare_bytes + + lt->ecc_bytes)) + + (chunk < lt->full_chunk_cnt ? + lt->data_bytes + lt->spare_bytes : + lt->last_data_bytes + lt->last_spare_bytes); + ecc_len = chunk < lt->full_chunk_cnt ? + lt->ecc_bytes : lt->last_ecc_bytes; + + /* Do the actual raw read of the ECC bytes */ + nand_change_read_column_op(chip, ecc_offset_in_page, + ecc, ecc_len, false); + + /* Derive data/spare bytes positions (in buffer) and length */ + data = buf + (chunk * lt->data_bytes); + data_len = chunk < lt->full_chunk_cnt ? + lt->data_bytes : lt->last_data_bytes; + spare = chip->oob_poi + (chunk * (lt->spare_bytes + + lt->ecc_bytes)); + spare_len = chunk < lt->full_chunk_cnt ? + lt->spare_bytes : lt->last_spare_bytes; + + /* Check the entire chunk (data + spare + ecc) for emptyness */ + marvell_nfc_check_empty_chunk(chip, data, data_len, spare, + spare_len, ecc, ecc_len, + &max_bitflips); + } + + return max_bitflips; +} + +static int marvell_nfc_hw_ecc_bch_read_oob_raw(struct mtd_info *mtd, + struct nand_chip *chip, int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + return chip->ecc.read_page_raw(mtd, chip, chip->data_buf, true, page); +} + +static int marvell_nfc_hw_ecc_bch_read_oob(struct mtd_info *mtd, + struct nand_chip *chip, int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + return chip->ecc.read_page(mtd, chip, chip->data_buf, true, page); +} + +/* BCH write helpers */ +static int marvell_nfc_hw_ecc_bch_write_page_raw(struct mtd_info *mtd, + struct nand_chip *chip, + const u8 *buf, + int oob_required, int page) +{ + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + int full_chunk_size = lt->data_bytes + lt->spare_bytes + lt->ecc_bytes; + int data_len = lt->data_bytes; + int spare_len = lt->spare_bytes; + int ecc_len = lt->ecc_bytes; + int oob_len = spare_len + ecc_len; + int spare_offset = 0; + int ecc_offset = (lt->full_chunk_cnt * lt->spare_bytes) + + lt->last_spare_bytes; + int chunk; + + nand_prog_page_begin_op(chip, page, 0, NULL, 0); + + for (chunk = 0; chunk < lt->nchunks; chunk++) { + if (chunk >= lt->full_chunk_cnt) { + data_len = lt->last_data_bytes; + spare_len = lt->last_spare_bytes; + ecc_len = lt->last_ecc_bytes; + oob_len = spare_len + ecc_len; + } + + /* Point to the column of the next chunk */ + nand_change_write_column_op(chip, chunk * full_chunk_size, + NULL, 0, false); + + /* Write the data */ + nand_write_data_op(chip, buf + (chunk * lt->data_bytes), + data_len, false); + + if (!oob_required) + continue; + + /* Write the spare bytes */ + if (spare_len) + nand_write_data_op(chip, chip->oob_poi + spare_offset, + spare_len, false); + + /* Write the ECC bytes */ + if (ecc_len) + nand_write_data_op(chip, chip->oob_poi + ecc_offset, + ecc_len, false); + + spare_offset += spare_len; + ecc_offset += ALIGN(ecc_len, 32); + } + + return nand_prog_page_end_op(chip); +} + +static int +marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk, + const u8 *data, unsigned int data_len, + const u8 *spare, unsigned int spare_len, + int page) +{ + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + int ret; + struct marvell_nfc_op nfc_op = { + .ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | NDCB0_LEN_OVRD, + .ndcb[3] = data_len + spare_len, + }; + + /* + * First operation dispatches the CMD_SEQIN command, issue the address + * cycles and asks for the first chunk of data. + * All operations in the middle (if any) will issue a naked write and + * also ask for data. + * Last operation (if any) asks for the last chunk of data through a + * last naked write. + */ + if (chunk == 0) { + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_WRITE_DISPATCH) | + NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | + NDCB0_CMD1(NAND_CMD_SEQIN); + nfc_op.ndcb[1] |= NDCB1_ADDRS_PAGE(page); + nfc_op.ndcb[2] |= NDCB2_ADDR5_PAGE(page); + } else if (chunk < lt->nchunks - 1) { + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW); + } else { + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); + } + + /* Always dispatch the PAGEPROG command on the last chunk */ + if (chunk == lt->nchunks - 1) + nfc_op.ndcb[0] |= NDCB0_CMD2(NAND_CMD_PAGEPROG) | NDCB0_DBC; + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_WRDREQ, + "WRDREQ while loading FIFO (data)"); + if (ret) + return ret; + + /* Transfer the contents */ + iowrite32_rep(nfc->regs + NDDB, data, FIFO_REP(data_len)); + iowrite32_rep(nfc->regs + NDDB, spare, FIFO_REP(spare_len)); + + return 0; +} + +static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd, + struct nand_chip *chip, + const u8 *buf, + int oob_required, int page) +{ + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + const u8 *data = buf; + const u8 *spare = chip->oob_poi; + int data_len = lt->data_bytes; + int spare_len = lt->spare_bytes; + int chunk, ret; + + /* Spare data will be written anyway, so clear it to avoid garbage */ + if (!oob_required) + memset(chip->oob_poi, 0xFF, mtd->oobsize); + + marvell_nfc_enable_hw_ecc(chip); + + for (chunk = 0; chunk < lt->nchunks; chunk++) { + if (chunk >= lt->full_chunk_cnt) { + data_len = lt->last_data_bytes; + spare_len = lt->last_spare_bytes; + } + + marvell_nfc_hw_ecc_bch_write_chunk(chip, chunk, data, data_len, + spare, spare_len, page); + data += data_len; + spare += spare_len; + + /* + * Waiting only for CMDD or PAGED is not enough, ECC are + * partially written. No flag is set once the operation is + * really finished but the ND_RUN bit is cleared, so wait for it + * before stepping into the next command. + */ + marvell_nfc_wait_ndrun(chip); + } + + ret = marvell_nfc_wait_op(chip, + chip->data_interface.timings.sdr.tPROG_max); + + marvell_nfc_disable_hw_ecc(chip); + + if (ret) + return ret; + + return 0; +} + +static int marvell_nfc_hw_ecc_bch_write_oob_raw(struct mtd_info *mtd, + struct nand_chip *chip, + int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + memset(chip->data_buf, 0xFF, mtd->writesize); + + return chip->ecc.write_page_raw(mtd, chip, chip->data_buf, true, page); +} + +static int marvell_nfc_hw_ecc_bch_write_oob(struct mtd_info *mtd, + struct nand_chip *chip, int page) +{ + /* Invalidate page cache */ + chip->pagebuf = -1; + + memset(chip->data_buf, 0xFF, mtd->writesize); + + return chip->ecc.write_page(mtd, chip, chip->data_buf, true, page); +} + +/* NAND framework ->exec_op() hooks and related helpers */ +static void marvell_nfc_parse_instructions(struct nand_chip *chip, + const struct nand_subop *subop, + struct marvell_nfc_op *nfc_op) +{ + const struct nand_op_instr *instr = NULL; + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + bool first_cmd = true; + unsigned int op_id; + int i; + + /* Reset the input structure as most of its fields will be OR'ed */ + memset(nfc_op, 0, sizeof(struct marvell_nfc_op)); + + for (op_id = 0; op_id < subop->ninstrs; op_id++) { + unsigned int offset, naddrs; + const u8 *addrs; + int len = nand_subop_get_data_len(subop, op_id); + + instr = &subop->instrs[op_id]; + + switch (instr->type) { + case NAND_OP_CMD_INSTR: + if (first_cmd) + nfc_op->ndcb[0] |= + NDCB0_CMD1(instr->ctx.cmd.opcode); + else + nfc_op->ndcb[0] |= + NDCB0_CMD2(instr->ctx.cmd.opcode) | + NDCB0_DBC; + + nfc_op->cle_ale_delay_ns = instr->delay_ns; + first_cmd = false; + break; + + case NAND_OP_ADDR_INSTR: + offset = nand_subop_get_addr_start_off(subop, op_id); + naddrs = nand_subop_get_num_addr_cyc(subop, op_id); + addrs = &instr->ctx.addr.addrs[offset]; + + nfc_op->ndcb[0] |= NDCB0_ADDR_CYC(naddrs); + + for (i = 0; i < min_t(unsigned int, 4, naddrs); i++) + nfc_op->ndcb[1] |= addrs[i] << (8 * i); + + if (naddrs >= 5) + nfc_op->ndcb[2] |= NDCB2_ADDR5_CYC(addrs[4]); + if (naddrs >= 6) + nfc_op->ndcb[3] |= NDCB3_ADDR6_CYC(addrs[5]); + if (naddrs == 7) + nfc_op->ndcb[3] |= NDCB3_ADDR7_CYC(addrs[6]); + + nfc_op->cle_ale_delay_ns = instr->delay_ns; + break; + + case NAND_OP_DATA_IN_INSTR: + nfc_op->data_instr = instr; + nfc_op->data_instr_idx = op_id; + nfc_op->ndcb[0] |= NDCB0_CMD_TYPE(TYPE_READ); + if (nfc->caps->is_nfcv2) { + nfc_op->ndcb[0] |= + NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW) | + NDCB0_LEN_OVRD; + nfc_op->ndcb[3] |= round_up(len, FIFO_DEPTH); + } + nfc_op->data_delay_ns = instr->delay_ns; + break; + + case NAND_OP_DATA_OUT_INSTR: + nfc_op->data_instr = instr; + nfc_op->data_instr_idx = op_id; + nfc_op->ndcb[0] |= NDCB0_CMD_TYPE(TYPE_WRITE); + if (nfc->caps->is_nfcv2) { + nfc_op->ndcb[0] |= + NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW) | + NDCB0_LEN_OVRD; + nfc_op->ndcb[3] |= round_up(len, FIFO_DEPTH); + } + nfc_op->data_delay_ns = instr->delay_ns; + break; + + case NAND_OP_WAITRDY_INSTR: + nfc_op->rdy_timeout_ms = instr->ctx.waitrdy.timeout_ms; + nfc_op->rdy_delay_ns = instr->delay_ns; + break; + } + } +} + +static int marvell_nfc_xfer_data_pio(struct nand_chip *chip, + const struct nand_subop *subop, + struct marvell_nfc_op *nfc_op) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct nand_op_instr *instr = nfc_op->data_instr; + unsigned int op_id = nfc_op->data_instr_idx; + unsigned int len = nand_subop_get_data_len(subop, op_id); + unsigned int offset = nand_subop_get_data_start_off(subop, op_id); + bool reading = (instr->type == NAND_OP_DATA_IN_INSTR); + int ret; + + if (instr->ctx.data.force_8bit) + marvell_nfc_force_byte_access(chip, true); + + if (reading) { + u8 *in = instr->ctx.data.buf.in + offset; + + ret = marvell_nfc_xfer_data_in_pio(nfc, in, len); + } else { + const u8 *out = instr->ctx.data.buf.out + offset; + + ret = marvell_nfc_xfer_data_out_pio(nfc, out, len); + } + + if (instr->ctx.data.force_8bit) + marvell_nfc_force_byte_access(chip, false); + + return ret; +} + +static int marvell_nfc_monolithic_access_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + bool reading; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + reading = (nfc_op.data_instr->type == NAND_OP_DATA_IN_INSTR); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_RDDREQ | NDSR_WRDREQ, + "RDDREQ/WRDREQ while draining raw data"); + if (ret) + return ret; + + cond_delay(nfc_op.cle_ale_delay_ns); + + if (reading) { + if (nfc_op.rdy_timeout_ms) { + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + } + + cond_delay(nfc_op.rdy_delay_ns); + } + + marvell_nfc_xfer_data_pio(chip, subop, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + cond_delay(nfc_op.data_delay_ns); + + if (!reading) { + if (nfc_op.rdy_timeout_ms) { + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + } + + cond_delay(nfc_op.rdy_delay_ns); + } + + /* + * NDCR ND_RUN bit should be cleared automatically at the end of each + * operation but experience shows that the behavior is buggy when it + * comes to writes (with LEN_OVRD). Clear it by hand in this case. + */ + if (!reading) { + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + + writel_relaxed(readl(nfc->regs + NDCR) & ~NDCR_ND_RUN, + nfc->regs + NDCR); + } + + return 0; +} + +static int marvell_nfc_naked_access_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + + /* + * Naked access are different in that they need to be flagged as naked + * by the controller. Reset the controller registers fields that inform + * on the type and refill them according to the ongoing operation. + */ + nfc_op.ndcb[0] &= ~(NDCB0_CMD_TYPE(TYPE_MASK) | + NDCB0_CMD_XTYPE(XTYPE_MASK)); + switch (subop->instrs[0].type) { + case NAND_OP_CMD_INSTR: + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_NAKED_CMD); + break; + case NAND_OP_ADDR_INSTR: + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_NAKED_ADDR); + break; + case NAND_OP_DATA_IN_INSTR: + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_READ) | + NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); + break; + case NAND_OP_DATA_OUT_INSTR: + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_WRITE) | + NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); + break; + default: + /* This should never happen */ + break; + } + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + + if (!nfc_op.data_instr) { + ret = marvell_nfc_wait_cmdd(chip); + cond_delay(nfc_op.cle_ale_delay_ns); + return ret; + } + + ret = marvell_nfc_end_cmd(chip, NDSR_RDDREQ | NDSR_WRDREQ, + "RDDREQ/WRDREQ while draining raw data"); + if (ret) + return ret; + + marvell_nfc_xfer_data_pio(chip, subop, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + /* + * NDCR ND_RUN bit should be cleared automatically at the end of each + * operation but experience shows that the behavior is buggy when it + * comes to writes (with LEN_OVRD). Clear it by hand in this case. + */ + if (subop->instrs[0].type == NAND_OP_DATA_OUT_INSTR) { + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + + writel_relaxed(readl(nfc->regs + NDCR) & ~NDCR_ND_RUN, + nfc->regs + NDCR); + } + + return 0; +} + +static int marvell_nfc_naked_waitrdy_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + cond_delay(nfc_op.rdy_delay_ns); + + return ret; +} + +static int marvell_nfc_read_id_type_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + nfc_op.ndcb[0] &= ~NDCB0_CMD_TYPE(TYPE_READ); + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_READ_ID); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_RDDREQ, + "RDDREQ while reading ID"); + if (ret) + return ret; + + cond_delay(nfc_op.cle_ale_delay_ns); + + if (nfc_op.rdy_timeout_ms) { + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + } + + cond_delay(nfc_op.rdy_delay_ns); + + marvell_nfc_xfer_data_pio(chip, subop, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + cond_delay(nfc_op.data_delay_ns); + + return 0; +} + +static int marvell_nfc_read_status_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + nfc_op.ndcb[0] &= ~NDCB0_CMD_TYPE(TYPE_READ); + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_STATUS); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_end_cmd(chip, NDSR_RDDREQ, + "RDDREQ while reading status"); + if (ret) + return ret; + + cond_delay(nfc_op.cle_ale_delay_ns); + + if (nfc_op.rdy_timeout_ms) { + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + } + + cond_delay(nfc_op.rdy_delay_ns); + + marvell_nfc_xfer_data_pio(chip, subop, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + cond_delay(nfc_op.data_delay_ns); + + return 0; +} + +static int marvell_nfc_reset_cmd_type_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_RESET); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + cond_delay(nfc_op.cle_ale_delay_ns); + + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + + cond_delay(nfc_op.rdy_delay_ns); + + return 0; +} + +static int marvell_nfc_erase_cmd_type_exec(struct nand_chip *chip, + const struct nand_subop *subop) +{ + struct marvell_nfc_op nfc_op; + int ret; + + marvell_nfc_parse_instructions(chip, subop, &nfc_op); + nfc_op.ndcb[0] |= NDCB0_CMD_TYPE(TYPE_ERASE); + + ret = marvell_nfc_prepare_cmd(chip); + if (ret) + return ret; + + marvell_nfc_send_cmd(chip, &nfc_op); + ret = marvell_nfc_wait_cmdd(chip); + if (ret) + return ret; + + cond_delay(nfc_op.cle_ale_delay_ns); + + ret = marvell_nfc_wait_op(chip, nfc_op.rdy_timeout_ms); + if (ret) + return ret; + + cond_delay(nfc_op.rdy_delay_ns); + + return 0; +} + +static const struct nand_op_parser marvell_nfcv2_op_parser = NAND_OP_PARSER( + /* Monolithic reads/writes */ + NAND_OP_PARSER_PATTERN( + marvell_nfc_monolithic_access_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_ADDR_ELEM(true, MAX_ADDRESS_CYC_NFCV2), + NAND_OP_PARSER_PAT_CMD_ELEM(true), + NAND_OP_PARSER_PAT_WAITRDY_ELEM(true), + NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, MAX_CHUNK_SIZE)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_monolithic_access_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ADDRESS_CYC_NFCV2), + NAND_OP_PARSER_PAT_DATA_OUT_ELEM(false, MAX_CHUNK_SIZE), + NAND_OP_PARSER_PAT_CMD_ELEM(true), + NAND_OP_PARSER_PAT_WAITRDY_ELEM(true)), + /* Naked commands */ + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_access_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_access_exec, + NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ADDRESS_CYC_NFCV2)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_access_exec, + NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, MAX_CHUNK_SIZE)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_access_exec, + NAND_OP_PARSER_PAT_DATA_OUT_ELEM(false, MAX_CHUNK_SIZE)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_waitrdy_exec, + NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)), + ); + +static const struct nand_op_parser marvell_nfcv1_op_parser = NAND_OP_PARSER( + /* Naked commands not supported, use a function for each pattern */ + NAND_OP_PARSER_PATTERN( + marvell_nfc_read_id_type_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ADDRESS_CYC_NFCV1), + NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, 8)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_erase_cmd_type_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ADDRESS_CYC_NFCV1), + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_read_status_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, 1)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_reset_cmd_type_exec, + NAND_OP_PARSER_PAT_CMD_ELEM(false), + NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)), + NAND_OP_PARSER_PATTERN( + marvell_nfc_naked_waitrdy_exec, + NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)), + ); + +static int marvell_nfc_exec_op(struct nand_chip *chip, + const struct nand_operation *op, + bool check_only) +{ + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + + if (nfc->caps->is_nfcv2) + return nand_op_parser_exec_op(chip, &marvell_nfcv2_op_parser, + op, check_only); + else + return nand_op_parser_exec_op(chip, &marvell_nfcv1_op_parser, + op, check_only); +} + +/* + * Layouts were broken in old pxa3xx_nand driver, these are supposed to be + * usable. + */ +static int marvell_nand_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + + if (section) + return -ERANGE; + + oobregion->length = (lt->full_chunk_cnt * lt->ecc_bytes) + + lt->last_ecc_bytes; + oobregion->offset = mtd->oobsize - oobregion->length; + + return 0; +} + +static int marvell_nand_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + + if (section) + return -ERANGE; + + /* + * Bootrom looks in bytes 0 & 5 for bad blocks for the + * 4KB page / 4bit BCH combination. + */ + if (mtd->writesize == SZ_4K && lt->data_bytes == SZ_2K) + oobregion->offset = 6; + else + oobregion->offset = 2; + + oobregion->length = (lt->full_chunk_cnt * lt->spare_bytes) + + lt->last_spare_bytes - oobregion->offset; + + return 0; +} + +static const struct mtd_ooblayout_ops marvell_nand_ooblayout_ops = { + .ecc = marvell_nand_ooblayout_ecc, + .free = marvell_nand_ooblayout_free, +}; + +static int marvell_nand_hw_ecc_ctrl_init(struct mtd_info *mtd, + struct nand_ecc_ctrl *ecc) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + const struct marvell_hw_ecc_layout *l; + int i; + + if (!nfc->caps->is_nfcv2 && + (mtd->writesize + mtd->oobsize > MAX_CHUNK_SIZE)) { + dev_err(nfc->dev, + "NFCv1: writesize (%d) cannot be bigger than a chunk (%d)\n", + mtd->writesize, MAX_CHUNK_SIZE - mtd->oobsize); + return -ENOTSUPP; + } + + to_marvell_nand(chip)->layout = NULL; + for (i = 0; i < ARRAY_SIZE(marvell_nfc_layouts); i++) { + l = &marvell_nfc_layouts[i]; + if (mtd->writesize == l->writesize && + ecc->size == l->chunk && ecc->strength == l->strength) { + to_marvell_nand(chip)->layout = l; + break; + } + } + + if (!to_marvell_nand(chip)->layout || + (!nfc->caps->is_nfcv2 && ecc->strength > 1)) { + dev_err(nfc->dev, + "ECC strength %d at page size %d is not supported\n", + ecc->strength, mtd->writesize); + return -ENOTSUPP; + } + + mtd_set_ooblayout(mtd, &marvell_nand_ooblayout_ops); + ecc->steps = l->nchunks; + ecc->size = l->data_bytes; + + if (ecc->strength == 1) { + chip->ecc.algo = NAND_ECC_HAMMING; + ecc->read_page_raw = marvell_nfc_hw_ecc_hmg_read_page_raw; + ecc->read_page = marvell_nfc_hw_ecc_hmg_read_page; + ecc->read_oob_raw = marvell_nfc_hw_ecc_hmg_read_oob_raw; + ecc->read_oob = ecc->read_oob_raw; + ecc->write_page_raw = marvell_nfc_hw_ecc_hmg_write_page_raw; + ecc->write_page = marvell_nfc_hw_ecc_hmg_write_page; + ecc->write_oob_raw = marvell_nfc_hw_ecc_hmg_write_oob_raw; + ecc->write_oob = ecc->write_oob_raw; + } else { + chip->ecc.algo = NAND_ECC_BCH; + ecc->strength = 16; + ecc->read_page_raw = marvell_nfc_hw_ecc_bch_read_page_raw; + ecc->read_page = marvell_nfc_hw_ecc_bch_read_page; + ecc->read_oob_raw = marvell_nfc_hw_ecc_bch_read_oob_raw; + ecc->read_oob = marvell_nfc_hw_ecc_bch_read_oob; + ecc->write_page_raw = marvell_nfc_hw_ecc_bch_write_page_raw; + ecc->write_page = marvell_nfc_hw_ecc_bch_write_page; + ecc->write_oob_raw = marvell_nfc_hw_ecc_bch_write_oob_raw; + ecc->write_oob = marvell_nfc_hw_ecc_bch_write_oob; + } + + return 0; +} + +static int marvell_nand_ecc_init(struct mtd_info *mtd, + struct nand_ecc_ctrl *ecc) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + int ret; + + if (ecc->mode != NAND_ECC_NONE && (!ecc->size || !ecc->strength)) { + if (chip->ecc_step_ds && chip->ecc_strength_ds) { + ecc->size = chip->ecc_step_ds; + ecc->strength = chip->ecc_strength_ds; + } else { + dev_info(nfc->dev, + "No minimum ECC strength, using 1b/512B\n"); + ecc->size = 512; + ecc->strength = 1; + } + } + + switch (ecc->mode) { + case NAND_ECC_HW: + ret = marvell_nand_hw_ecc_ctrl_init(mtd, ecc); + if (ret) + return ret; + break; + case NAND_ECC_NONE: + case NAND_ECC_SOFT: + if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 && + mtd->writesize != SZ_2K) { + dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n", + mtd->writesize); + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + return 0; +} + +static u8 bbt_pattern[] = {'M', 'V', 'B', 'b', 't', '0' }; +static u8 bbt_mirror_pattern[] = {'1', 't', 'b', 'B', 'V', 'M' }; + +static struct nand_bbt_descr bbt_main_descr = { + .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE | + NAND_BBT_2BIT | NAND_BBT_VERSION, + .offs = 8, + .len = 6, + .veroffs = 14, + .maxblocks = 8, /* Last 8 blocks in each chip */ + .pattern = bbt_pattern +}; + +static struct nand_bbt_descr bbt_mirror_descr = { + .options = NAND_BBT_LASTBLOCK | NAND_BBT_CREATE | NAND_BBT_WRITE | + NAND_BBT_2BIT | NAND_BBT_VERSION, + .offs = 8, + .len = 6, + .veroffs = 14, + .maxblocks = 8, /* Last 8 blocks in each chip */ + .pattern = bbt_mirror_pattern +}; + +static int marvell_nfc_setup_data_interface(struct mtd_info *mtd, int chipnr, + const struct nand_data_interface + *conf) +{ + struct nand_chip *chip = mtd_to_nand(mtd); + struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); + struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + unsigned int period_ns = 1000000000 / clk_get_rate(nfc->ecc_clk) * 2; + const struct nand_sdr_timings *sdr; + struct marvell_nfc_timings nfc_tmg; + int read_delay; + + sdr = nand_get_sdr_timings(conf); + if (IS_ERR(sdr)) + return PTR_ERR(sdr); + + /* + * SDR timings are given in pico-seconds while NFC timings must be + * expressed in NAND controller clock cycles, which is half of the + * frequency of the accessible ECC clock retrieved by clk_get_rate(). + * This is not written anywhere in the datasheet but was observed + * with an oscilloscope. + * + * NFC datasheet gives equations from which thoses calculations + * are derived, they tend to be slightly more restrictives than the + * given core timings and may improve the overall speed. + */ + nfc_tmg.tRP = TO_CYCLES(DIV_ROUND_UP(sdr->tRC_min, 2), period_ns) - 1; + nfc_tmg.tRH = nfc_tmg.tRP; + nfc_tmg.tWP = TO_CYCLES(DIV_ROUND_UP(sdr->tWC_min, 2), period_ns) - 1; + nfc_tmg.tWH = nfc_tmg.tWP; + nfc_tmg.tCS = TO_CYCLES(sdr->tCS_min, period_ns); + nfc_tmg.tCH = TO_CYCLES(sdr->tCH_min, period_ns) - 1; + nfc_tmg.tADL = TO_CYCLES(sdr->tADL_min, period_ns); + /* + * Read delay is the time of propagation from SoC pins to NFC internal + * logic. With non-EDO timings, this is MIN_RD_DEL_CNT clock cycles. In + * EDO mode, an additional delay of tRH must be taken into account so + * the data is sampled on the falling edge instead of the rising edge. + */ + read_delay = sdr->tRC_min >= 30000 ? + MIN_RD_DEL_CNT : MIN_RD_DEL_CNT + nfc_tmg.tRH; + + nfc_tmg.tAR = TO_CYCLES(sdr->tAR_min, period_ns); + /* + * tWHR and tRHW are supposed to be read to write delays (and vice + * versa) but in some cases, ie. when doing a change column, they must + * be greater than that to be sure tCCS delay is respected. + */ + nfc_tmg.tWHR = TO_CYCLES(max_t(int, sdr->tWHR_min, sdr->tCCS_min), + period_ns) - 2, + nfc_tmg.tRHW = TO_CYCLES(max_t(int, sdr->tRHW_min, sdr->tCCS_min), + period_ns); + + /* Use WAIT_MODE (wait for RB line) instead of only relying on delays */ + nfc_tmg.tR = TO_CYCLES(sdr->tWB_max, period_ns); + + if (chipnr < 0) + return 0; + + marvell_nand->ndtr0 = + NDTR0_TRP(nfc_tmg.tRP) | + NDTR0_TRH(nfc_tmg.tRH) | + NDTR0_ETRP(nfc_tmg.tRP) | + NDTR0_TWP(nfc_tmg.tWP) | + NDTR0_TWH(nfc_tmg.tWH) | + NDTR0_TCS(nfc_tmg.tCS) | + NDTR0_TCH(nfc_tmg.tCH) | + NDTR0_RD_CNT_DEL(read_delay) | + NDTR0_SELCNTR | + NDTR0_TADL(nfc_tmg.tADL); + + marvell_nand->ndtr1 = + NDTR1_TAR(nfc_tmg.tAR) | + NDTR1_TWHR(nfc_tmg.tWHR) | + NDTR1_TRHW(nfc_tmg.tRHW) | + NDTR1_WAIT_MODE | + NDTR1_TR(nfc_tmg.tR); + + return 0; +} + +static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, + struct device_node *np) +{ + struct pxa3xx_nand_platform_data *pdata = dev_get_platdata(dev); + struct marvell_nand_chip *marvell_nand; + struct mtd_info *mtd; + struct nand_chip *chip; + int nsels, ret, i; + u32 cs, rb; + + /* + * The legacy "num-cs" property indicates the number of CS on the only + * chip connected to the controller (legacy bindings does not support + * more than one chip). CS are only incremented one by one while the RB + * pin is always the #0. + * + * When not using legacy bindings, a couple of "reg" and "nand-rb" + * properties must be filled. For each chip, expressed as a subnode, + * "reg" points to the CS lines and "nand-rb" to the RB line. + */ + if (pdata) { + nsels = 1; + } else if (nfc->caps->legacy_of_bindings && + !of_get_property(np, "num-cs", &nsels)) { + dev_err(dev, "missing num-cs property\n"); + return -EINVAL; + } else if (!of_get_property(np, "reg", &nsels)) { + dev_err(dev, "missing reg property\n"); + return -EINVAL; + } + + if (!pdata) + nsels /= sizeof(u32); + if (!nsels) { + dev_err(dev, "invalid reg property size\n"); + return -EINVAL; + } + + /* Alloc the nand chip structure */ + marvell_nand = devm_kzalloc(dev, sizeof(*marvell_nand) + + (nsels * + sizeof(struct marvell_nand_chip_sel)), + GFP_KERNEL); + if (!marvell_nand) { + dev_err(dev, "could not allocate chip structure\n"); + return -ENOMEM; + } + + marvell_nand->nsels = nsels; + marvell_nand->selected_die = -1; + + for (i = 0; i < nsels; i++) { + if (pdata || nfc->caps->legacy_of_bindings) { + /* + * Legacy bindings use the CS lines in natural + * order (0, 1, ...) + */ + cs = i; + } else { + /* Retrieve CS id */ + ret = of_property_read_u32_index(np, "reg", i, &cs); + if (ret) { + dev_err(dev, "could not retrieve reg property: %d\n", + ret); + return ret; + } + } + + if (cs >= nfc->caps->max_cs_nb) { + dev_err(dev, "invalid reg value: %u (max CS = %d)\n", + cs, nfc->caps->max_cs_nb); + return -EINVAL; + } + + if (test_and_set_bit(cs, &nfc->assigned_cs)) { + dev_err(dev, "CS %d already assigned\n", cs); + return -EINVAL; + } + + /* + * The cs variable represents the chip select id, which must be + * converted in bit fields for NDCB0 and NDCB2 to select the + * right chip. Unfortunately, due to a lack of information on + * the subject and incoherent documentation, the user should not + * use CS1 and CS3 at all as asserting them is not supported in + * a reliable way (due to multiplexing inside ADDR5 field). + */ + marvell_nand->sels[i].cs = cs; + switch (cs) { + case 0: + case 2: + marvell_nand->sels[i].ndcb0_csel = 0; + break; + case 1: + case 3: + marvell_nand->sels[i].ndcb0_csel = NDCB0_CSEL; + break; + default: + return -EINVAL; + } + + /* Retrieve RB id */ + if (pdata || nfc->caps->legacy_of_bindings) { + /* Legacy bindings always use RB #0 */ + rb = 0; + } else { + ret = of_property_read_u32_index(np, "nand-rb", i, + &rb); + if (ret) { + dev_err(dev, + "could not retrieve RB property: %d\n", + ret); + return ret; + } + } + + if (rb >= nfc->caps->max_rb_nb) { + dev_err(dev, "invalid reg value: %u (max RB = %d)\n", + rb, nfc->caps->max_rb_nb); + return -EINVAL; + } + + marvell_nand->sels[i].rb = rb; + } + + chip = &marvell_nand->chip; + chip->controller = &nfc->controller; + nand_set_flash_node(chip, np); + + chip->exec_op = marvell_nfc_exec_op; + chip->select_chip = marvell_nfc_select_chip; + if (nfc->caps->is_nfcv2 && + !of_property_read_bool(np, "marvell,nand-keep-config")) + chip->setup_data_interface = marvell_nfc_setup_data_interface; + + mtd = nand_to_mtd(chip); + mtd->dev.parent = dev; + + /* + * Default to HW ECC engine mode. If the nand-ecc-mode property is given + * in the DT node, this entry will be overwritten in nand_scan_ident(). + */ + chip->ecc.mode = NAND_ECC_HW; + + /* + * Save a reference value for timing registers before + * ->setup_data_interface() is called. + */ + marvell_nand->ndtr0 = readl_relaxed(nfc->regs + NDTR0); + marvell_nand->ndtr1 = readl_relaxed(nfc->regs + NDTR1); + + chip->options |= NAND_BUSWIDTH_AUTO; + ret = nand_scan_ident(mtd, marvell_nand->nsels, NULL); + if (ret) { + dev_err(dev, "could not identify the nand chip\n"); + return ret; + } + + if (pdata && pdata->flash_bbt) + chip->bbt_options |= NAND_BBT_USE_FLASH; + + if (chip->bbt_options & NAND_BBT_USE_FLASH) { + /* + * We'll use a bad block table stored in-flash and don't + * allow writing the bad block marker to the flash. + */ + chip->bbt_options |= NAND_BBT_NO_OOB_BBM; + chip->bbt_td = &bbt_main_descr; + chip->bbt_md = &bbt_mirror_descr; + } + + /* Save the chip-specific fields of NDCR */ + marvell_nand->ndcr = NDCR_PAGE_SZ(mtd->writesize); + if (chip->options & NAND_BUSWIDTH_16) + marvell_nand->ndcr |= NDCR_DWIDTH_M | NDCR_DWIDTH_C; + + /* + * On small page NANDs, only one cycle is needed to pass the + * column address. + */ + if (mtd->writesize <= 512) { + marvell_nand->addr_cyc = 1; + } else { + marvell_nand->addr_cyc = 2; + marvell_nand->ndcr |= NDCR_RA_START; + } + + /* + * Now add the number of cycles needed to pass the row + * address. + * + * Addressing a chip using CS 2 or 3 should also need the third row + * cycle but due to inconsistance in the documentation and lack of + * hardware to test this situation, this case is not supported. + */ + if (chip->options & NAND_ROW_ADDR_3) + marvell_nand->addr_cyc += 3; + else + marvell_nand->addr_cyc += 2; + + if (pdata) { + chip->ecc.size = pdata->ecc_step_size; + chip->ecc.strength = pdata->ecc_strength; + } + + ret = marvell_nand_ecc_init(mtd, &chip->ecc); + if (ret) { + dev_err(dev, "ECC init failed: %d\n", ret); + return ret; + } + + if (chip->ecc.mode == NAND_ECC_HW) { + /* + * Subpage write not available with hardware ECC, prohibit also + * subpage read as in userspace subpage access would still be + * allowed and subpage write, if used, would lead to numerous + * uncorrectable ECC errors. + */ + chip->options |= NAND_NO_SUBPAGE_WRITE; + } + + if (pdata || nfc->caps->legacy_of_bindings) { + /* + * We keep the MTD name unchanged to avoid breaking platforms + * where the MTD cmdline parser is used and the bootloader + * has not been updated to use the new naming scheme. + */ + mtd->name = "pxa3xx_nand-0"; + } else if (!mtd->name) { + /* + * If the new bindings are used and the bootloader has not been + * updated to pass a new mtdparts parameter on the cmdline, you + * should define the following property in your NAND node, ie: + * + * label = "main-storage"; + * + * This way, mtd->name will be set by the core when + * nand_set_flash_node() is called. + */ + mtd->name = devm_kasprintf(nfc->dev, GFP_KERNEL, + "%s:nand.%d", dev_name(nfc->dev), + marvell_nand->sels[0].cs); + if (!mtd->name) { + dev_err(nfc->dev, "Failed to allocate mtd->name\n"); + return -ENOMEM; + } + } + + ret = nand_scan_tail(mtd); + if (ret) { + dev_err(dev, "nand_scan_tail failed: %d\n", ret); + return ret; + } + + if (pdata) + /* Legacy bindings support only one chip */ + ret = mtd_device_register(mtd, pdata->parts[0], + pdata->nr_parts[0]); + else + ret = mtd_device_register(mtd, NULL, 0); + if (ret) { + dev_err(dev, "failed to register mtd device: %d\n", ret); + nand_release(mtd); + return ret; + } + + list_add_tail(&marvell_nand->node, &nfc->chips); + + return 0; +} + +static int marvell_nand_chips_init(struct device *dev, struct marvell_nfc *nfc) +{ + struct device_node *np = dev->of_node; + struct device_node *nand_np; + int max_cs = nfc->caps->max_cs_nb; + int nchips; + int ret; + + if (!np) + nchips = 1; + else + nchips = of_get_child_count(np); + + if (nchips > max_cs) { + dev_err(dev, "too many NAND chips: %d (max = %d CS)\n", nchips, + max_cs); + return -EINVAL; + } + + /* + * Legacy bindings do not use child nodes to exhibit NAND chip + * properties and layout. Instead, NAND properties are mixed with the + * controller ones, and partitions are defined as direct subnodes of the + * NAND controller node. + */ + if (nfc->caps->legacy_of_bindings) { + ret = marvell_nand_chip_init(dev, nfc, np); + return ret; + } + + for_each_child_of_node(np, nand_np) { + ret = marvell_nand_chip_init(dev, nfc, nand_np); + if (ret) { + of_node_put(nand_np); + return ret; + } + } + + return 0; +} + +static void marvell_nand_chips_cleanup(struct marvell_nfc *nfc) +{ + struct marvell_nand_chip *entry, *temp; + + list_for_each_entry_safe(entry, temp, &nfc->chips, node) { + nand_release(nand_to_mtd(&entry->chip)); + list_del(&entry->node); + } +} + +static int marvell_nfc_init_dma(struct marvell_nfc *nfc) +{ + struct platform_device *pdev = container_of(nfc->dev, + struct platform_device, + dev); + struct dma_slave_config config = {}; + struct resource *r; + dma_cap_mask_t mask; + struct pxad_param param; + int ret; + + if (!IS_ENABLED(CONFIG_PXA_DMA)) { + dev_warn(nfc->dev, + "DMA not enabled in configuration\n"); + return -ENOTSUPP; + } + + ret = dma_set_mask_and_coherent(nfc->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; + + r = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!r) { + dev_err(nfc->dev, "No resource defined for data DMA\n"); + return -ENXIO; + } + + param.drcmr = r->start; + param.prio = PXAD_PRIO_LOWEST; + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + nfc->dma_chan = + dma_request_slave_channel_compat(mask, pxad_filter_fn, + ¶m, nfc->dev, + "data"); + if (!nfc->dma_chan) { + dev_err(nfc->dev, + "Unable to request data DMA channel\n"); + return -ENODEV; + } + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) + return -ENXIO; + + config.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + config.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + config.src_addr = r->start + NDDB; + config.dst_addr = r->start + NDDB; + config.src_maxburst = 32; + config.dst_maxburst = 32; + ret = dmaengine_slave_config(nfc->dma_chan, &config); + if (ret < 0) { + dev_err(nfc->dev, "Failed to configure DMA channel\n"); + return ret; + } + + /* + * DMA must act on length multiple of 32 and this length may be + * bigger than the destination buffer. Use this buffer instead + * for DMA transfers and then copy the desired amount of data to + * the provided buffer. + */ + nfc->dma_buf = kmalloc(MAX_CHUNK_SIZE, GFP_DMA); + if (!nfc->dma_buf) + return -ENOMEM; + + nfc->use_dma = true; + + return 0; +} + +static int marvell_nfc_init(struct marvell_nfc *nfc) +{ + struct device_node *np = nfc->dev->of_node; + + /* + * Some SoCs like A7k/A8k need to enable manually the NAND + * controller, gated clocks and reset bits to avoid being bootloader + * dependent. This is done through the use of the System Functions + * registers. + */ + if (nfc->caps->need_system_controller) { + struct regmap *sysctrl_base = + syscon_regmap_lookup_by_phandle(np, + "marvell,system-controller"); + u32 reg; + + if (IS_ERR(sysctrl_base)) + return PTR_ERR(sysctrl_base); + + reg = GENCONF_SOC_DEVICE_MUX_NFC_EN | + GENCONF_SOC_DEVICE_MUX_ECC_CLK_RST | + GENCONF_SOC_DEVICE_MUX_ECC_CORE_RST | + GENCONF_SOC_DEVICE_MUX_NFC_INT_EN; + regmap_write(sysctrl_base, GENCONF_SOC_DEVICE_MUX, reg); + + regmap_read(sysctrl_base, GENCONF_CLK_GATING_CTRL, ®); + reg |= GENCONF_CLK_GATING_CTRL_ND_GATE; + regmap_write(sysctrl_base, GENCONF_CLK_GATING_CTRL, reg); + + regmap_read(sysctrl_base, GENCONF_ND_CLK_CTRL, ®); + reg |= GENCONF_ND_CLK_CTRL_EN; + regmap_write(sysctrl_base, GENCONF_ND_CLK_CTRL, reg); + } + + /* Configure the DMA if appropriate */ + if (!nfc->caps->is_nfcv2) + marvell_nfc_init_dma(nfc); + + /* + * ECC operations and interruptions are only enabled when specifically + * needed. ECC shall not be activated in the early stages (fails probe). + * Arbiter flag, even if marked as "reserved", must be set (empirical). + * SPARE_EN bit must always be set or ECC bytes will not be at the same + * offset in the read page and this will fail the protection. + */ + writel_relaxed(NDCR_ALL_INT | NDCR_ND_ARB_EN | NDCR_SPARE_EN | + NDCR_RD_ID_CNT(NFCV1_READID_LEN), nfc->regs + NDCR); + writel_relaxed(0xFFFFFFFF, nfc->regs + NDSR); + writel_relaxed(0, nfc->regs + NDECCCTRL); + + return 0; +} + +static int marvell_nfc_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *r; + struct marvell_nfc *nfc; + int ret; + int irq; + + nfc = devm_kzalloc(&pdev->dev, sizeof(struct marvell_nfc), + GFP_KERNEL); + if (!nfc) + return -ENOMEM; + + nfc->dev = dev; + nand_hw_control_init(&nfc->controller); + INIT_LIST_HEAD(&nfc->chips); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + nfc->regs = devm_ioremap_resource(dev, r); + if (IS_ERR(nfc->regs)) + return PTR_ERR(nfc->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to retrieve irq\n"); + return irq; + } + + nfc->ecc_clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(nfc->ecc_clk)) + return PTR_ERR(nfc->ecc_clk); + + ret = clk_prepare_enable(nfc->ecc_clk); + if (ret) + return ret; + + marvell_nfc_disable_int(nfc, NDCR_ALL_INT); + marvell_nfc_clear_int(nfc, NDCR_ALL_INT); + ret = devm_request_irq(dev, irq, marvell_nfc_isr, + 0, "marvell-nfc", nfc); + if (ret) + goto unprepare_clk; + + /* Get NAND controller capabilities */ + if (pdev->id_entry) + nfc->caps = (void *)pdev->id_entry->driver_data; + else + nfc->caps = of_device_get_match_data(&pdev->dev); + + if (!nfc->caps) { + dev_err(dev, "Could not retrieve NFC caps\n"); + ret = -EINVAL; + goto unprepare_clk; + } + + /* Init the controller and then probe the chips */ + ret = marvell_nfc_init(nfc); + if (ret) + goto unprepare_clk; + + platform_set_drvdata(pdev, nfc); + + ret = marvell_nand_chips_init(dev, nfc); + if (ret) + goto unprepare_clk; + + return 0; + +unprepare_clk: + clk_disable_unprepare(nfc->ecc_clk); + + return ret; +} + +static int marvell_nfc_remove(struct platform_device *pdev) +{ + struct marvell_nfc *nfc = platform_get_drvdata(pdev); + + marvell_nand_chips_cleanup(nfc); + + if (nfc->use_dma) { + dmaengine_terminate_all(nfc->dma_chan); + dma_release_channel(nfc->dma_chan); + } + + clk_disable_unprepare(nfc->ecc_clk); + + return 0; +} + +static const struct marvell_nfc_caps marvell_armada_8k_nfc_caps = { + .max_cs_nb = 4, + .max_rb_nb = 2, + .need_system_controller = true, + .is_nfcv2 = true, +}; + +static const struct marvell_nfc_caps marvell_armada370_nfc_caps = { + .max_cs_nb = 4, + .max_rb_nb = 2, + .is_nfcv2 = true, +}; + +static const struct marvell_nfc_caps marvell_pxa3xx_nfc_caps = { + .max_cs_nb = 2, + .max_rb_nb = 1, + .use_dma = true, +}; + +static const struct marvell_nfc_caps marvell_armada_8k_nfc_legacy_caps = { + .max_cs_nb = 4, + .max_rb_nb = 2, + .need_system_controller = true, + .legacy_of_bindings = true, + .is_nfcv2 = true, +}; + +static const struct marvell_nfc_caps marvell_armada370_nfc_legacy_caps = { + .max_cs_nb = 4, + .max_rb_nb = 2, + .legacy_of_bindings = true, + .is_nfcv2 = true, +}; + +static const struct marvell_nfc_caps marvell_pxa3xx_nfc_legacy_caps = { + .max_cs_nb = 2, + .max_rb_nb = 1, + .legacy_of_bindings = true, + .use_dma = true, +}; + +static const struct platform_device_id marvell_nfc_platform_ids[] = { + { + .name = "pxa3xx-nand", + .driver_data = (kernel_ulong_t)&marvell_pxa3xx_nfc_legacy_caps, + }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, marvell_nfc_platform_ids); + +static const struct of_device_id marvell_nfc_of_ids[] = { + { + .compatible = "marvell,armada-8k-nand-controller", + .data = &marvell_armada_8k_nfc_caps, + }, + { + .compatible = "marvell,armada370-nand-controller", + .data = &marvell_armada370_nfc_caps, + }, + { + .compatible = "marvell,pxa3xx-nand-controller", + .data = &marvell_pxa3xx_nfc_caps, + }, + /* Support for old/deprecated bindings: */ + { + .compatible = "marvell,armada-8k-nand", + .data = &marvell_armada_8k_nfc_legacy_caps, + }, + { + .compatible = "marvell,armada370-nand", + .data = &marvell_armada370_nfc_legacy_caps, + }, + { + .compatible = "marvell,pxa3xx-nand", + .data = &marvell_pxa3xx_nfc_legacy_caps, + }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, marvell_nfc_of_ids); + +static struct platform_driver marvell_nfc_driver = { + .driver = { + .name = "marvell-nfc", + .of_match_table = marvell_nfc_of_ids, + }, + .id_table = marvell_nfc_platform_ids, + .probe = marvell_nfc_probe, + .remove = marvell_nfc_remove, +}; +module_platform_driver(marvell_nfc_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Marvell NAND controller driver"); -- cgit v1.2.3 From dd533734395f0e14db12d82fc64a879c805743dd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 9 Jan 2018 11:36:34 +0100 Subject: mtd: nand: use reworked NAND controller driver with Marvell EBU SoCs Choose to compile and embed marvell_nand.c as NAND controller driver instead of the legacy pxa3xx_nand.c for platforms with Marvell EBU SoCs. Signed-off-by: Miquel Raynal Acked-by: Gregory CLEMENT Acked-by: Arnd Bergmann Signed-off-by: Boris Brezillon --- arch/arm/configs/mvebu_v7_defconfig | 2 +- arch/arm64/configs/defconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 69553704f2dc..4b6e4fd47e5d 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -57,7 +57,7 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_PXA3xx=y +CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_SPI_NOR=y CONFIG_SRAM=y CONFIG_MTD_UBI=y diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6356c6da34ea..b20fa9b31efe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -161,7 +161,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_DENALI_DT=y -CONFIG_MTD_NAND_PXA3xx=y +CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m -- cgit v1.2.3 From 8edfe5fe193a3e43a410ab058d6f5d5cf28c4733 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:11:53 +0100 Subject: dt-bindings: mtd: gpmc-onenand: Update properties description Compatible property is required for OMAP2+ mtd driver. Also add INT pin gpio description and delete unused dma-channel property. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Acked-by: Roger Quadros Signed-off-by: Boris Brezillon --- Documentation/devicetree/bindings/mtd/gpmc-onenand.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt index b6e8bfd024f4..e9f01a963a0a 100644 --- a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt +++ b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt @@ -9,13 +9,14 @@ Documentation/devicetree/bindings/memory-controllers/omap-gpmc.txt Required properties: + - compatible: "ti,omap2-onenand" - reg: The CS line the peripheral is connected to - - gpmc,device-width Width of the ONENAND device connected to the GPMC + - gpmc,device-width: Width of the ONENAND device connected to the GPMC in bytes. Must be 1 or 2. Optional properties: - - dma-channel: DMA Channel index + - int-gpios: GPIO specifier for the INT pin. For inline partition table parsing (optional): @@ -35,6 +36,7 @@ Example for an OMAP3430 board: #size-cells = <1>; onenand@0 { + compatible = "ti,omap2-onenand"; reg = <0 0 0>; /* CS0, offset 0 */ gpmc,device-width = <2>; -- cgit v1.2.3 From 396744b76a89b1cd7681c6b8a7716b545f6cf986 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:12:56 +0100 Subject: ARM: dts: OMAP2+: Add compatible property to onenand node OMAP onenand nodes are missing compatible property, add it. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Acked-by: Roger Quadros Acked-by: Tony Lindgren Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- arch/arm/boot/dts/omap2420-n8x0-common.dtsi | 1 + arch/arm/boot/dts/omap3-n900.dts | 1 + arch/arm/boot/dts/omap3-n950-n9.dtsi | 1 + arch/arm/boot/dts/omap3430-sdp.dts | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/omap2420-n8x0-common.dtsi b/arch/arm/boot/dts/omap2420-n8x0-common.dtsi index 1df3ace3af92..63b0b4921e4e 100644 --- a/arch/arm/boot/dts/omap2420-n8x0-common.dtsi +++ b/arch/arm/boot/dts/omap2420-n8x0-common.dtsi @@ -52,6 +52,7 @@ onenand@0,0 { #address-cells = <1>; #size-cells = <1>; + compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ gpmc,sync-read; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 669c51c00c00..e7c7b8e50703 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -838,6 +838,7 @@ onenand@0,0 { #address-cells = <1>; #size-cells = <1>; + compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ gpmc,sync-read; diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 12fbb3da5fce..0d9b85317529 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -367,6 +367,7 @@ onenand@0,0 { #address-cells = <1>; #size-cells = <1>; + compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ gpmc,sync-read; diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts index 908951eb5943..d652708f6bef 100644 --- a/arch/arm/boot/dts/omap3430-sdp.dts +++ b/arch/arm/boot/dts/omap3430-sdp.dts @@ -154,6 +154,7 @@ linux,mtd-name= "samsung,kfm2g16q2m-deb8"; #address-cells = <1>; #size-cells = <1>; + compatible = "ti,omap2-onenand"; reg = <2 0 0x20000>; /* CS2, offset 0, IO size 4 */ gpmc,device-width = <2>; -- cgit v1.2.3 From d36005d4a289d31e23be387995df32b8f0554cbc Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:13:19 +0100 Subject: ARM: dts: omap3-igep: Update onenand node timings Update node timings to be compatible with actual chip used - gpmc_cs_show_timings utilized to dump values. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Acked-by: Roger Quadros Acked-by: Tony Lindgren Signed-off-by: Boris Brezillon --- arch/arm/boot/dts/omap3-igep.dtsi | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/omap3-igep.dtsi b/arch/arm/boot/dts/omap3-igep.dtsi index 4ad7d5565906..f33cc80c9dbc 100644 --- a/arch/arm/boot/dts/omap3-igep.dtsi +++ b/arch/arm/boot/dts/omap3-igep.dtsi @@ -147,32 +147,32 @@ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; - gpmc,burst-read; gpmc,burst-wrap; + gpmc,burst-read; gpmc,burst-write; gpmc,device-width = <2>; /* GPMC_DEVWIDTH_16BIT */ gpmc,mux-add-data = <2>; /* GPMC_MUX_AD */ gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <96>; + gpmc,cs-wr-off-ns = <96>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <12>; + gpmc,adv-wr-off-ns = <12>; + gpmc,oe-on-ns = <18>; + gpmc,oe-off-ns = <96>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; - gpmc,page-burst-access-ns = <15>; + gpmc,we-off-ns = <96>; + gpmc,rd-cycle-ns = <114>; + gpmc,wr-cycle-ns = <114>; + gpmc,access-ns = <90>; + gpmc,page-burst-access-ns = <12>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; + gpmc,clk-activation-ns = <6>; gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; - gpmc,sync-clk-ps = <15000>; + gpmc,wr-access-ns = <90>; + gpmc,sync-clk-ps = <12000>; #address-cells = <1>; #size-cells = <1>; -- cgit v1.2.3 From e6854e029b16bfd059194aabaa738d6d0d982cac Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:13:36 +0100 Subject: mtd: onenand: omap2: Remove regulator support As no platform data user sets regulator_can_sleep, regulator code is no-op and can be deleted. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 42 +----------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 24a1388d3031..a03e1fe4aa48 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -59,7 +58,6 @@ struct omap2_onenand { int dma_channel; int freq; int (*setup)(void __iomem *base, int *freq_ptr); - struct regulator *regulator; u8 flags; }; @@ -583,30 +581,6 @@ static void omap2_onenand_shutdown(struct platform_device *pdev) memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE); } -static int omap2_onenand_enable(struct mtd_info *mtd) -{ - int ret; - struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); - - ret = regulator_enable(c->regulator); - if (ret != 0) - dev_err(&c->pdev->dev, "can't enable regulator\n"); - - return ret; -} - -static int omap2_onenand_disable(struct mtd_info *mtd) -{ - int ret; - struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); - - ret = regulator_disable(c->regulator); - if (ret != 0) - dev_err(&c->pdev->dev, "can't disable regulator\n"); - - return ret; -} - static int omap2_onenand_probe(struct platform_device *pdev) { struct omap_onenand_platform_data *pdata; @@ -726,22 +700,11 @@ static int omap2_onenand_probe(struct platform_device *pdev) } } - if (pdata->regulator_can_sleep) { - c->regulator = regulator_get(&pdev->dev, "vonenand"); - if (IS_ERR(c->regulator)) { - dev_err(&pdev->dev, "Failed to get regulator\n"); - r = PTR_ERR(c->regulator); - goto err_release_dma; - } - c->onenand.enable = omap2_onenand_enable; - c->onenand.disable = omap2_onenand_disable; - } - if (pdata->skip_initial_unlocking) this->options |= ONENAND_SKIP_INITIAL_UNLOCKING; if ((r = onenand_scan(&c->mtd, 1)) < 0) - goto err_release_regulator; + goto err_release_dma; r = mtd_device_register(&c->mtd, pdata ? pdata->parts : NULL, pdata ? pdata->nr_parts : 0); @@ -754,8 +717,6 @@ static int omap2_onenand_probe(struct platform_device *pdev) err_release_onenand: onenand_release(&c->mtd); -err_release_regulator: - regulator_put(c->regulator); err_release_dma: if (c->dma_channel != -1) omap_free_dma(c->dma_channel); @@ -779,7 +740,6 @@ static int omap2_onenand_remove(struct platform_device *pdev) struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); onenand_release(&c->mtd); - regulator_put(c->regulator); if (c->dma_channel != -1) omap_free_dma(c->dma_channel); omap2_onenand_shutdown(pdev); -- cgit v1.2.3 From 11066d42952ad254aa59043a9a71737e41705c5f Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:13:56 +0100 Subject: mtd: onenand: omap2: Remove skip initial unlocking support No platform data user sets skip_initial_unlocking, so remove test for this field. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index a03e1fe4aa48..93bd94337b35 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -700,9 +700,6 @@ static int omap2_onenand_probe(struct platform_device *pdev) } } - if (pdata->skip_initial_unlocking) - this->options |= ONENAND_SKIP_INITIAL_UNLOCKING; - if ((r = onenand_scan(&c->mtd, 1)) < 0) goto err_release_dma; -- cgit v1.2.3 From fafc0b3a9f7b586ae1261b2f78e389bd67df92d7 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:14:17 +0100 Subject: mtd: onenand: omap2: Remove partitioning support from platform data No platform data user setups partitioning information, so remove. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 93bd94337b35..883993bbe40b 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -703,8 +703,7 @@ static int omap2_onenand_probe(struct platform_device *pdev) if ((r = onenand_scan(&c->mtd, 1)) < 0) goto err_release_dma; - r = mtd_device_register(&c->mtd, pdata ? pdata->parts : NULL, - pdata ? pdata->nr_parts : 0); + r = mtd_device_register(&c->mtd, NULL, 0); if (r) goto err_release_onenand; -- cgit v1.2.3 From d120568883a4676852caab2e3545c0d1a623376b Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:14:54 +0100 Subject: mtd: onenand: omap2: Account waiting time as waiting on IO Use wait_for_completion_io_timeout, which has an impact on how the task is accounted in scheduling stats. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 883993bbe40b..0e7772e16d75 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -170,9 +170,8 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) if (result == 0) { int retry_cnt = 0; retry: - result = wait_for_completion_timeout(&c->irq_done, - msecs_to_jiffies(20)); - if (result == 0) { + if (!wait_for_completion_io_timeout(&c->irq_done, + msecs_to_jiffies(20))) { /* Timeout after 20ms */ ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); if (ctrl & ONENAND_CTRL_ONGO && -- cgit v1.2.3 From 3621311695f5b1a9396ae95098ac904328eefde7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 12 Jan 2018 14:15:25 +0100 Subject: mtd: onenand: omap2: Simplify the DMA setup for various paths We have 4 functions containing almost identical DMA setup code. Create one function which can set up the DMA for both read and write and use this in place for the setup code in the driver. The new function will use wait_for_completion_io_timeout() and it will figure out the best data_type to be used for the transfer instead of hardwiring 32 or 16 bit data. Signed-off-by: Peter Ujfalusi Signed-off-by: Ladislav Michl Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 109 ++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 64 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 0e7772e16d75..d22163271dc9 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -288,6 +288,33 @@ static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area) return 0; } +static inline int omap2_onenand_dma_transfer(struct omap2_onenand *c, + dma_addr_t src, dma_addr_t dst, + size_t count) +{ + int data_type = __ffs((src | dst | count)); + + if (data_type > OMAP_DMA_DATA_TYPE_S32) + data_type = OMAP_DMA_DATA_TYPE_S32; + + omap_set_dma_transfer_params(c->dma_channel, data_type, + count / BIT(data_type), 1, 0, 0, 0); + omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + src, 0, 0); + omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dst, 0, 0); + + reinit_completion(&c->dma_done); + omap_start_dma(c->dma_channel); + if (!wait_for_completion_io_timeout(&c->dma_done, + msecs_to_jiffies(20))) { + omap_stop_dma(c->dma_channel); + return -ETIMEDOUT; + } + + return 0; +} + #if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2) static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, @@ -298,10 +325,9 @@ static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, struct onenand_chip *this = mtd->priv; dma_addr_t dma_src, dma_dst; int bram_offset; - unsigned long timeout; void *buf = (void *)buffer; size_t xtra; - volatile unsigned *done; + int ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; if (bram_offset & 3 || (size_t)buf & 3 || count < 384) @@ -338,25 +364,10 @@ static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, goto out_copy; } - omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, - count >> 2, 1, 0, 0, 0); - omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_src, 0, 0); - omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_dst, 0, 0); - - reinit_completion(&c->dma_done); - omap_start_dma(c->dma_channel); - - timeout = jiffies + msecs_to_jiffies(20); - done = &c->dma_done.done; - while (time_before(jiffies, timeout)) - if (*done) - break; - + ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); - if (!*done) { + if (ret) { dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); goto out_copy; } @@ -376,9 +387,8 @@ static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, struct onenand_chip *this = mtd->priv; dma_addr_t dma_src, dma_dst; int bram_offset; - unsigned long timeout; void *buf = (void *)buffer; - volatile unsigned *done; + int ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; if (bram_offset & 3 || (size_t)buf & 3 || count < 384) @@ -409,25 +419,10 @@ static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, return -1; } - omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, - count >> 2, 1, 0, 0, 0); - omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_src, 0, 0); - omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_dst, 0, 0); - - reinit_completion(&c->dma_done); - omap_start_dma(c->dma_channel); - - timeout = jiffies + msecs_to_jiffies(20); - done = &c->dma_done.done; - while (time_before(jiffies, timeout)) - if (*done) - break; - + ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE); - if (!*done) { + if (ret) { dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); goto out_copy; } @@ -466,7 +461,7 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; dma_addr_t dma_src, dma_dst; - int bram_offset; + int bram_offset, ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; /* DMA is not used. Revisit PM requirements before enabling it. */ @@ -488,20 +483,13 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, return -1; } - omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, - count / 4, 1, 0, 0, 0); - omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_src, 0, 0); - omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_dst, 0, 0); - - reinit_completion(&c->dma_done); - omap_start_dma(c->dma_channel); - wait_for_completion(&c->dma_done); - + ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); - return 0; + if (ret) + dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + + return ret; } static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, @@ -511,7 +499,7 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; dma_addr_t dma_src, dma_dst; - int bram_offset; + int bram_offset, ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; /* DMA is not used. Revisit PM requirements before enabling it. */ @@ -533,20 +521,13 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, return -1; } - omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16, - count / 2, 1, 0, 0, 0); - omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_src, 0, 0); - omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dma_dst, 0, 0); - - reinit_completion(&c->dma_done); - omap_start_dma(c->dma_channel); - wait_for_completion(&c->dma_done); - + ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE); - return 0; + if (ret) + dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + + return ret; } #else -- cgit v1.2.3 From fb25070afdf07cc62282c27357dc30ef3d7ef262 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:15:45 +0100 Subject: mtd: onenand: omap2: Unify OMAP2 and OMAP3 DMA implementation Since the very first commit (36cd4fb5d277: "[MTD] [OneNAND] Add OMAP2 / OMAP3 OneNAND driver") DMA is disabled for OMAP2. Later fixes thus went only into OMAP3 specific DMA functions which turned out not to be so OMAP3 specific, so merge those two implementations. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 129 ++------------------------------------------ 1 file changed, 4 insertions(+), 125 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index d22163271dc9..36314124488d 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -315,9 +315,7 @@ static inline int omap2_onenand_dma_transfer(struct omap2_onenand *c, return 0; } -#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2) - -static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, +static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, unsigned char *buffer, int offset, size_t count) { @@ -379,7 +377,7 @@ out_copy: return 0; } -static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, +static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, const unsigned char *buffer, int offset, size_t count) { @@ -434,120 +432,6 @@ out_copy: return 0; } -#else - -static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, - unsigned char *buffer, int offset, - size_t count) -{ - return -ENOSYS; -} - -static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, - const unsigned char *buffer, - int offset, size_t count) -{ - return -ENOSYS; -} - -#endif - -#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2) - -static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, - unsigned char *buffer, int offset, - size_t count) -{ - struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); - struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset, ret; - - bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - /* DMA is not used. Revisit PM requirements before enabling it. */ - if (1 || (c->dma_channel < 0) || - ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) || - (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) { - memcpy(buffer, (__force void *)(this->base + bram_offset), - count); - return 0; - } - - dma_src = c->phys_base + bram_offset; - dma_dst = dma_map_single(&c->pdev->dev, buffer, count, - DMA_FROM_DEVICE); - if (dma_mapping_error(&c->pdev->dev, dma_dst)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); - return -1; - } - - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); - - if (ret) - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); - - return ret; -} - -static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, - const unsigned char *buffer, - int offset, size_t count) -{ - struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); - struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset, ret; - - bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - /* DMA is not used. Revisit PM requirements before enabling it. */ - if (1 || (c->dma_channel < 0) || - ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) || - (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) { - memcpy((__force void *)(this->base + bram_offset), buffer, - count); - return 0; - } - - dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count, - DMA_TO_DEVICE); - dma_dst = c->phys_base + bram_offset; - if (dma_mapping_error(&c->pdev->dev, dma_src)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); - return -1; - } - - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE); - - if (ret) - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); - - return ret; -} - -#else - -static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, - unsigned char *buffer, int offset, - size_t count) -{ - return -ENOSYS; -} - -static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, - const unsigned char *buffer, - int offset, size_t count) -{ - return -ENOSYS; -} - -#endif - static struct platform_driver omap2_onenand_driver; static void omap2_onenand_shutdown(struct platform_device *pdev) @@ -671,13 +555,8 @@ static int omap2_onenand_probe(struct platform_device *pdev) this = &c->onenand; if (c->dma_channel >= 0) { this->wait = omap2_onenand_wait; - if (c->flags & ONENAND_IN_OMAP34XX) { - this->read_bufferram = omap3_onenand_read_bufferram; - this->write_bufferram = omap3_onenand_write_bufferram; - } else { - this->read_bufferram = omap2_onenand_read_bufferram; - this->write_bufferram = omap2_onenand_write_bufferram; - } + this->read_bufferram = omap2_onenand_read_bufferram; + this->write_bufferram = omap2_onenand_write_bufferram; } if ((r = onenand_scan(&c->mtd, 1)) < 0) -- cgit v1.2.3 From 3ed6a4d1de2c5855a9e0164872b6adfd6b7a4215 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 12 Jan 2018 14:16:08 +0100 Subject: mtd: onenand: omap2: Convert to use dmaengine for memcpy Do not use the legacy and deprecated omap-dma interface for setting up the memcpy. Signed-off-by: Peter Ujfalusi Signed-off-by: Ladislav Michl Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 80 +++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 36314124488d..c9ff67100ef4 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -39,8 +40,6 @@ #include #include -#include - #define DRIVER_NAME "omap2-onenand" #define ONENAND_BUFRAM_SIZE (1024 * 5) @@ -55,17 +54,15 @@ struct omap2_onenand { struct onenand_chip onenand; struct completion irq_done; struct completion dma_done; - int dma_channel; + struct dma_chan *dma_chan; int freq; int (*setup)(void __iomem *base, int *freq_ptr); u8 flags; }; -static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data) +static void omap2_onenand_dma_complete_func(void *completion) { - struct omap2_onenand *c = data; - - complete(&c->dma_done); + complete(completion); } static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id) @@ -292,23 +289,31 @@ static inline int omap2_onenand_dma_transfer(struct omap2_onenand *c, dma_addr_t src, dma_addr_t dst, size_t count) { - int data_type = __ffs((src | dst | count)); + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; - if (data_type > OMAP_DMA_DATA_TYPE_S32) - data_type = OMAP_DMA_DATA_TYPE_S32; - - omap_set_dma_transfer_params(c->dma_channel, data_type, - count / BIT(data_type), 1, 0, 0, 0); - omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - src, 0, 0); - omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, - dst, 0, 0); + tx = dmaengine_prep_dma_memcpy(c->dma_chan, dst, src, count, 0); + if (!tx) { + dev_err(&c->pdev->dev, "Failed to prepare DMA memcpy\n"); + return -EIO; + } reinit_completion(&c->dma_done); - omap_start_dma(c->dma_channel); + + tx->callback = omap2_onenand_dma_complete_func; + tx->callback_param = &c->dma_done; + + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + dev_err(&c->pdev->dev, "Failed to do DMA tx_submit\n"); + return -EIO; + } + + dma_async_issue_pending(c->dma_chan); + if (!wait_for_completion_io_timeout(&c->dma_done, msecs_to_jiffies(20))) { - omap_stop_dma(c->dma_channel); + dmaengine_terminate_sync(c->dma_chan); return -ETIMEDOUT; } @@ -468,8 +473,7 @@ static int omap2_onenand_probe(struct platform_device *pdev) c->flags = pdata->flags; c->gpmc_cs = pdata->cs; c->gpio_irq = pdata->gpio_irq; - c->dma_channel = pdata->dma_channel; - if (c->dma_channel < 0) { + if (pdata->dma_channel < 0) { /* if -1, don't use DMA */ c->gpio_irq = 0; } @@ -521,25 +525,17 @@ static int omap2_onenand_probe(struct platform_device *pdev) goto err_release_gpio; } - if (c->dma_channel >= 0) { - r = omap_request_dma(0, pdev->dev.driver->name, - omap2_onenand_dma_cb, (void *) c, - &c->dma_channel); - if (r == 0) { - omap_set_dma_write_mode(c->dma_channel, - OMAP_DMA_WRITE_NON_POSTED); - omap_set_dma_src_data_pack(c->dma_channel, 1); - omap_set_dma_src_burst_mode(c->dma_channel, - OMAP_DMA_DATA_BURST_8); - omap_set_dma_dest_data_pack(c->dma_channel, 1); - omap_set_dma_dest_burst_mode(c->dma_channel, - OMAP_DMA_DATA_BURST_8); - } else { + if (pdata->dma_channel >= 0) { + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + c->dma_chan = dma_request_channel(mask, NULL, NULL); + if (!c->dma_chan) dev_info(&pdev->dev, "failed to allocate DMA for OneNAND, " "using PIO instead\n"); - c->dma_channel = -1; - } } dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual " @@ -553,7 +549,7 @@ static int omap2_onenand_probe(struct platform_device *pdev) mtd_set_of_node(&c->mtd, pdata->of_node); this = &c->onenand; - if (c->dma_channel >= 0) { + if (c->dma_chan) { this->wait = omap2_onenand_wait; this->read_bufferram = omap2_onenand_read_bufferram; this->write_bufferram = omap2_onenand_write_bufferram; @@ -573,8 +569,8 @@ static int omap2_onenand_probe(struct platform_device *pdev) err_release_onenand: onenand_release(&c->mtd); err_release_dma: - if (c->dma_channel != -1) - omap_free_dma(c->dma_channel); + if (c->dma_chan) + dma_release_channel(c->dma_chan); if (c->gpio_irq) free_irq(gpio_to_irq(c->gpio_irq), c); err_release_gpio: @@ -595,8 +591,8 @@ static int omap2_onenand_remove(struct platform_device *pdev) struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); onenand_release(&c->mtd); - if (c->dma_channel != -1) - omap_free_dma(c->dma_channel); + if (c->dma_chan) + dma_release_channel(c->dma_chan); omap2_onenand_shutdown(pdev); if (c->gpio_irq) { free_irq(gpio_to_irq(c->gpio_irq), c); -- cgit v1.2.3 From f5229331f13b84389cd71a58ccd5e15e5cb091c2 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:16:28 +0100 Subject: mtd: onenand: omap2: Do not make delay for GPIO OMAP3 specific Second commit in driver history (782b7a367d81: "[MTD] [OneNAND] OMAP3: add delay for GPIO") added quirk for waiting until GPIO line settle. As DMA was disabled for OMAP2 boards, chances are this problem was not OMAP3 specific and as it is just one register read, previous test for SoC type is approximately as expensive as read itself. Make delay unconditional, which allows removing SoC specific code alltogether. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Reviewed-by: Sebastian Reichel Acked-by: Roger Quadros Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index c9ff67100ef4..e4857a41760d 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -57,7 +57,6 @@ struct omap2_onenand { struct dma_chan *dma_chan; int freq; int (*setup)(void __iomem *base, int *freq_ptr); - u8 flags; }; static void omap2_onenand_dma_complete_func(void *completion) @@ -148,9 +147,8 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) if (!(syscfg & ONENAND_SYS_CFG1_IOBE)) { syscfg |= ONENAND_SYS_CFG1_IOBE; write_reg(c, syscfg, ONENAND_REG_SYS_CFG1); - if (c->flags & ONENAND_IN_OMAP34XX) - /* Add a delay to let GPIO settle */ - syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); + /* Add a delay to let GPIO settle */ + syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); } reinit_completion(&c->irq_done); @@ -470,7 +468,6 @@ static int omap2_onenand_probe(struct platform_device *pdev) init_completion(&c->irq_done); init_completion(&c->dma_done); - c->flags = pdata->flags; c->gpmc_cs = pdata->cs; c->gpio_irq = pdata->gpio_irq; if (pdata->dma_channel < 0) { -- cgit v1.2.3 From bdaca9345d41fd9420995469d27603ea62054691 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:16:57 +0100 Subject: mtd: onenand: omap2: Decouple DMA enabling from INT pin availability INT pin (gpio_irq) is not really needed for DMA but only for notification when a command that needs wait has completed. DMA memcpy can be still used even without gpio_irq available, so enable it unconditionally. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Acked-by: Roger Quadros Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 52 ++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index e4857a41760d..1cd78a076759 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -152,17 +152,13 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) } reinit_completion(&c->irq_done); - if (c->gpio_irq) { - result = gpio_get_value(c->gpio_irq); - if (result == -1) { - ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); - intr = read_reg(c, ONENAND_REG_INTERRUPT); - wait_err("gpio error", state, ctrl, intr); - return -EIO; - } - } else - result = 0; - if (result == 0) { + result = gpio_get_value(c->gpio_irq); + if (result < 0) { + ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); + intr = read_reg(c, ONENAND_REG_INTERRUPT); + wait_err("gpio error", state, ctrl, intr); + return -EIO; + } else if (result == 0) { int retry_cnt = 0; retry: if (!wait_for_completion_io_timeout(&c->irq_done, @@ -450,6 +446,7 @@ static void omap2_onenand_shutdown(struct platform_device *pdev) static int omap2_onenand_probe(struct platform_device *pdev) { + dma_cap_mask_t mask; struct omap_onenand_platform_data *pdata; struct omap2_onenand *c; struct onenand_chip *this; @@ -513,31 +510,25 @@ static int omap2_onenand_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to request GPIO%d for " "OneNAND\n", c->gpio_irq); goto err_iounmap; - } - gpio_direction_input(c->gpio_irq); + } + gpio_direction_input(c->gpio_irq); - if ((r = request_irq(gpio_to_irq(c->gpio_irq), - omap2_onenand_interrupt, IRQF_TRIGGER_RISING, - pdev->dev.driver->name, c)) < 0) - goto err_release_gpio; - } + if ((r = request_irq(gpio_to_irq(c->gpio_irq), + omap2_onenand_interrupt, IRQF_TRIGGER_RISING, + pdev->dev.driver->name, c)) < 0) + goto err_release_gpio; - if (pdata->dma_channel >= 0) { - dma_cap_mask_t mask; + this->wait = omap2_onenand_wait; + } - dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); - c->dma_chan = dma_request_channel(mask, NULL, NULL); - if (!c->dma_chan) - dev_info(&pdev->dev, - "failed to allocate DMA for OneNAND, " - "using PIO instead\n"); - } + c->dma_chan = dma_request_channel(mask, NULL, NULL); dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual " - "base %p, freq %d MHz\n", c->gpmc_cs, c->phys_base, - c->onenand.base, c->freq); + "base %p, freq %d MHz, %s mode\n", c->gpmc_cs, c->phys_base, + c->onenand.base, c->freq, c->dma_chan ? "DMA" : "PIO"); c->pdev = pdev; c->mtd.priv = &c->onenand; @@ -547,7 +538,6 @@ static int omap2_onenand_probe(struct platform_device *pdev) this = &c->onenand; if (c->dma_chan) { - this->wait = omap2_onenand_wait; this->read_bufferram = omap2_onenand_read_bufferram; this->write_bufferram = omap2_onenand_write_bufferram; } -- cgit v1.2.3 From a758f50f10cfcf863f95372ff52e0d8d22fda9ba Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:17:25 +0100 Subject: mtd: onenand: omap2: Configure driver from DT Move away from platform data configuration and use pure DT approach. Use generic probe function to deal with OneNAND node and remove now useless gpmc_probe_onenand_child function. Import sync mode timing calculation function from mach-omap2/gpmc-onenand.c Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Acked-by: Roger Quadros Signed-off-by: Boris Brezillon --- drivers/memory/omap-gpmc.c | 158 ++++++++++++++++++++-------- drivers/mtd/onenand/Kconfig | 4 +- drivers/mtd/onenand/omap2.c | 250 +++++++++++++++++++++++++++----------------- include/linux/omap-gpmc.h | 28 +++++ 4 files changed, 301 insertions(+), 139 deletions(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 0e30ee1c8677..90a66b3f7ae1 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -32,7 +32,6 @@ #include #include -#include #include @@ -1138,6 +1137,112 @@ struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *reg, int cs) } EXPORT_SYMBOL_GPL(gpmc_omap_get_nand_ops); +static void gpmc_omap_onenand_calc_sync_timings(struct gpmc_timings *t, + struct gpmc_settings *s, + int freq, int latency) +{ + struct gpmc_device_timings dev_t; + const int t_cer = 15; + const int t_avdp = 12; + const int t_cez = 20; /* max of t_cez, t_oez */ + const int t_wpl = 40; + const int t_wph = 30; + int min_gpmc_clk_period, t_ces, t_avds, t_avdh, t_ach, t_aavdh, t_rdyo; + + switch (freq) { + case 104: + min_gpmc_clk_period = 9600; /* 104 MHz */ + t_ces = 3; + t_avds = 4; + t_avdh = 2; + t_ach = 3; + t_aavdh = 6; + t_rdyo = 6; + break; + case 83: + min_gpmc_clk_period = 12000; /* 83 MHz */ + t_ces = 5; + t_avds = 4; + t_avdh = 2; + t_ach = 6; + t_aavdh = 6; + t_rdyo = 9; + break; + case 66: + min_gpmc_clk_period = 15000; /* 66 MHz */ + t_ces = 6; + t_avds = 5; + t_avdh = 2; + t_ach = 6; + t_aavdh = 6; + t_rdyo = 11; + break; + default: + min_gpmc_clk_period = 18500; /* 54 MHz */ + t_ces = 7; + t_avds = 7; + t_avdh = 7; + t_ach = 9; + t_aavdh = 7; + t_rdyo = 15; + break; + } + + /* Set synchronous read timings */ + memset(&dev_t, 0, sizeof(dev_t)); + + if (!s->sync_write) { + dev_t.t_avdp_w = max(t_avdp, t_cer) * 1000; + dev_t.t_wpl = t_wpl * 1000; + dev_t.t_wph = t_wph * 1000; + dev_t.t_aavdh = t_aavdh * 1000; + } + dev_t.ce_xdelay = true; + dev_t.avd_xdelay = true; + dev_t.oe_xdelay = true; + dev_t.we_xdelay = true; + dev_t.clk = min_gpmc_clk_period; + dev_t.t_bacc = dev_t.clk; + dev_t.t_ces = t_ces * 1000; + dev_t.t_avds = t_avds * 1000; + dev_t.t_avdh = t_avdh * 1000; + dev_t.t_ach = t_ach * 1000; + dev_t.cyc_iaa = (latency + 1); + dev_t.t_cez_r = t_cez * 1000; + dev_t.t_cez_w = dev_t.t_cez_r; + dev_t.cyc_aavdh_oe = 1; + dev_t.t_rdyo = t_rdyo * 1000 + min_gpmc_clk_period; + + gpmc_calc_timings(t, s, &dev_t); +} + +int gpmc_omap_onenand_set_timings(struct device *dev, int cs, int freq, + int latency, + struct gpmc_onenand_info *info) +{ + int ret; + struct gpmc_timings gpmc_t; + struct gpmc_settings gpmc_s; + + gpmc_read_settings_dt(dev->of_node, &gpmc_s); + + info->sync_read = gpmc_s.sync_read; + info->sync_write = gpmc_s.sync_write; + info->burst_len = gpmc_s.burst_len; + + if (!gpmc_s.sync_read && !gpmc_s.sync_write) + return 0; + + gpmc_omap_onenand_calc_sync_timings(&gpmc_t, &gpmc_s, freq, latency); + + ret = gpmc_cs_program_settings(cs, &gpmc_s); + if (ret < 0) + return ret; + + return gpmc_cs_set_timings(cs, &gpmc_t, &gpmc_s); +} +EXPORT_SYMBOL_GPL(gpmc_omap_onenand_set_timings); + int gpmc_get_client_irq(unsigned irq_config) { if (!gpmc_irq_domain) { @@ -1916,41 +2021,6 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np, of_property_read_bool(np, "gpmc,time-para-granularity"); } -#if IS_ENABLED(CONFIG_MTD_ONENAND) -static int gpmc_probe_onenand_child(struct platform_device *pdev, - struct device_node *child) -{ - u32 val; - struct omap_onenand_platform_data *gpmc_onenand_data; - - if (of_property_read_u32(child, "reg", &val) < 0) { - dev_err(&pdev->dev, "%pOF has no 'reg' property\n", - child); - return -ENODEV; - } - - gpmc_onenand_data = devm_kzalloc(&pdev->dev, sizeof(*gpmc_onenand_data), - GFP_KERNEL); - if (!gpmc_onenand_data) - return -ENOMEM; - - gpmc_onenand_data->cs = val; - gpmc_onenand_data->of_node = child; - gpmc_onenand_data->dma_channel = -1; - - if (!of_property_read_u32(child, "dma-channel", &val)) - gpmc_onenand_data->dma_channel = val; - - return gpmc_onenand_init(gpmc_onenand_data); -} -#else -static int gpmc_probe_onenand_child(struct platform_device *pdev, - struct device_node *child) -{ - return 0; -} -#endif - /** * gpmc_probe_generic_child - configures the gpmc for a child device * @pdev: pointer to gpmc platform device @@ -2053,6 +2123,16 @@ static int gpmc_probe_generic_child(struct platform_device *pdev, } } + if (of_node_cmp(child->name, "onenand") == 0) { + /* Warn about older DT blobs with no compatible property */ + if (!of_property_read_bool(child, "compatible")) { + dev_warn(&pdev->dev, + "Incompatible OneNAND node: missing compatible"); + ret = -EINVAL; + goto err; + } + } + if (of_device_is_compatible(child, "ti,omap2-nand")) { /* NAND specific setup */ val = 8; @@ -2189,11 +2269,7 @@ static void gpmc_probe_dt_children(struct platform_device *pdev) if (!child->name) continue; - if (of_node_cmp(child->name, "onenand") == 0) - ret = gpmc_probe_onenand_child(pdev, child); - else - ret = gpmc_probe_generic_child(pdev, child); - + ret = gpmc_probe_generic_child(pdev, child); if (ret) { dev_err(&pdev->dev, "failed to probe DT child '%s': %d\n", child->name, ret); diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig index aaeb30458139..9dc15748947b 100644 --- a/drivers/mtd/onenand/Kconfig +++ b/drivers/mtd/onenand/Kconfig @@ -25,9 +25,11 @@ config MTD_ONENAND_GENERIC config MTD_ONENAND_OMAP2 tristate "OneNAND on OMAP2/OMAP3 support" depends on ARCH_OMAP2 || ARCH_OMAP3 + depends on OF || COMPILE_TEST help - Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU + Support for a OneNAND flash device connected to an OMAP2/OMAP3 SoC via the GPMC memory controller. + Enable dmaengine and gpiolib for better performance. config MTD_ONENAND_SAMSUNG tristate "OneNAND on Samsung SOC controller support" diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 1cd78a076759..2ce73fb6da1c 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,10 +37,9 @@ #include #include #include -#include +#include #include -#include #define DRIVER_NAME "omap2-onenand" @@ -48,15 +49,12 @@ struct omap2_onenand { struct platform_device *pdev; int gpmc_cs; unsigned long phys_base; - unsigned int mem_size; - int gpio_irq; + struct gpio_desc *int_gpiod; struct mtd_info mtd; struct onenand_chip onenand; struct completion irq_done; struct completion dma_done; struct dma_chan *dma_chan; - int freq; - int (*setup)(void __iomem *base, int *freq_ptr); }; static void omap2_onenand_dma_complete_func(void *completion) @@ -84,6 +82,65 @@ static inline void write_reg(struct omap2_onenand *c, unsigned short value, writew(value, c->onenand.base + reg); } +static int omap2_onenand_set_cfg(struct omap2_onenand *c, + bool sr, bool sw, + int latency, int burst_len) +{ + unsigned short reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT; + + reg |= latency << ONENAND_SYS_CFG1_BRL_SHIFT; + + switch (burst_len) { + case 0: /* continuous */ + break; + case 4: + reg |= ONENAND_SYS_CFG1_BL_4; + break; + case 8: + reg |= ONENAND_SYS_CFG1_BL_8; + break; + case 16: + reg |= ONENAND_SYS_CFG1_BL_16; + break; + case 32: + reg |= ONENAND_SYS_CFG1_BL_32; + break; + default: + return -EINVAL; + } + + if (latency > 5) + reg |= ONENAND_SYS_CFG1_HF; + if (latency > 7) + reg |= ONENAND_SYS_CFG1_VHF; + if (sr) + reg |= ONENAND_SYS_CFG1_SYNC_READ; + if (sw) + reg |= ONENAND_SYS_CFG1_SYNC_WRITE; + + write_reg(c, reg, ONENAND_REG_SYS_CFG1); + + return 0; +} + +static int omap2_onenand_get_freq(int ver) +{ + switch ((ver >> 4) & 0xf) { + case 0: + return 40; + case 1: + return 54; + case 2: + return 66; + case 3: + return 83; + case 4: + return 104; + } + + return -EINVAL; +} + static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr) { printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n", @@ -152,12 +209,12 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) } reinit_completion(&c->irq_done); - result = gpio_get_value(c->gpio_irq); + result = gpiod_get_value(c->int_gpiod); if (result < 0) { ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); intr = read_reg(c, ONENAND_REG_INTERRUPT); wait_err("gpio error", state, ctrl, intr); - return -EIO; + return result; } else if (result == 0) { int retry_cnt = 0; retry: @@ -431,8 +488,6 @@ out_copy: return 0; } -static struct platform_driver omap2_onenand_driver; - static void omap2_onenand_shutdown(struct platform_device *pdev) { struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); @@ -446,105 +501,117 @@ static void omap2_onenand_shutdown(struct platform_device *pdev) static int omap2_onenand_probe(struct platform_device *pdev) { + u32 val; dma_cap_mask_t mask; - struct omap_onenand_platform_data *pdata; - struct omap2_onenand *c; - struct onenand_chip *this; - int r; + int freq, latency, r; struct resource *res; + struct omap2_onenand *c; + struct gpmc_onenand_info info; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "error getting memory resource\n"); + return -EINVAL; + } - pdata = dev_get_platdata(&pdev->dev); - if (pdata == NULL) { - dev_err(&pdev->dev, "platform data missing\n"); - return -ENODEV; + r = of_property_read_u32(np, "reg", &val); + if (r) { + dev_err(dev, "reg not found in DT\n"); + return r; } - c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL); + c = devm_kzalloc(dev, sizeof(struct omap2_onenand), GFP_KERNEL); if (!c) return -ENOMEM; init_completion(&c->irq_done); init_completion(&c->dma_done); - c->gpmc_cs = pdata->cs; - c->gpio_irq = pdata->gpio_irq; - if (pdata->dma_channel < 0) { - /* if -1, don't use DMA */ - c->gpio_irq = 0; - } - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - r = -EINVAL; - dev_err(&pdev->dev, "error getting memory resource\n"); - goto err_kfree; - } - + c->gpmc_cs = val; c->phys_base = res->start; - c->mem_size = resource_size(res); - - if (request_mem_region(c->phys_base, c->mem_size, - pdev->dev.driver->name) == NULL) { - dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, size: 0x%x\n", - c->phys_base, c->mem_size); - r = -EBUSY; - goto err_kfree; - } - c->onenand.base = ioremap(c->phys_base, c->mem_size); - if (c->onenand.base == NULL) { - r = -ENOMEM; - goto err_release_mem_region; - } - if (pdata->onenand_setup != NULL) { - r = pdata->onenand_setup(c->onenand.base, &c->freq); - if (r < 0) { - dev_err(&pdev->dev, "Onenand platform setup failed: " - "%d\n", r); - goto err_iounmap; - } - c->setup = pdata->onenand_setup; + c->onenand.base = devm_ioremap_resource(dev, res); + if (IS_ERR(c->onenand.base)) { + dev_err(dev, "Cannot reserve memory region at 0x%08x, size: 0x%x\n", + res->start, resource_size(res)); + return PTR_ERR(c->onenand.base); } - if (c->gpio_irq) { - if ((r = gpio_request(c->gpio_irq, "OneNAND irq")) < 0) { - dev_err(&pdev->dev, "Failed to request GPIO%d for " - "OneNAND\n", c->gpio_irq); - goto err_iounmap; - } - gpio_direction_input(c->gpio_irq); + c->int_gpiod = devm_gpiod_get_optional(dev, "int", GPIOD_IN); + if (IS_ERR(c->int_gpiod)) { + r = PTR_ERR(c->int_gpiod); + /* Just try again if this happens */ + if (r != -EPROBE_DEFER) + dev_err(dev, "error getting gpio: %d\n", r); + return r; + } - if ((r = request_irq(gpio_to_irq(c->gpio_irq), - omap2_onenand_interrupt, IRQF_TRIGGER_RISING, - pdev->dev.driver->name, c)) < 0) - goto err_release_gpio; + if (c->int_gpiod) { + r = devm_request_irq(dev, gpiod_to_irq(c->int_gpiod), + omap2_onenand_interrupt, + IRQF_TRIGGER_RISING, "onenand", c); + if (r) + return r; - this->wait = omap2_onenand_wait; + c->onenand.wait = omap2_onenand_wait; } dma_cap_zero(mask); dma_cap_set(DMA_MEMCPY, mask); c->dma_chan = dma_request_channel(mask, NULL, NULL); - - dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual " - "base %p, freq %d MHz, %s mode\n", c->gpmc_cs, c->phys_base, - c->onenand.base, c->freq, c->dma_chan ? "DMA" : "PIO"); + if (c->dma_chan) { + c->onenand.read_bufferram = omap2_onenand_read_bufferram; + c->onenand.write_bufferram = omap2_onenand_write_bufferram; + } c->pdev = pdev; c->mtd.priv = &c->onenand; + c->mtd.dev.parent = dev; + mtd_set_of_node(&c->mtd, dev->of_node); - c->mtd.dev.parent = &pdev->dev; - mtd_set_of_node(&c->mtd, pdata->of_node); - - this = &c->onenand; - if (c->dma_chan) { - this->read_bufferram = omap2_onenand_read_bufferram; - this->write_bufferram = omap2_onenand_write_bufferram; - } + dev_info(dev, "initializing on CS%d (0x%08lx), va %p, %s mode\n", + c->gpmc_cs, c->phys_base, c->onenand.base, + c->dma_chan ? "DMA" : "PIO"); if ((r = onenand_scan(&c->mtd, 1)) < 0) goto err_release_dma; + freq = omap2_onenand_get_freq(c->onenand.version_id); + if (freq > 0) { + switch (freq) { + case 104: + latency = 7; + break; + case 83: + latency = 6; + break; + case 66: + latency = 5; + break; + case 56: + latency = 4; + break; + default: /* 40 MHz or lower */ + latency = 3; + break; + } + + r = gpmc_omap_onenand_set_timings(dev, c->gpmc_cs, + freq, latency, &info); + if (r) + goto err_release_onenand; + + r = omap2_onenand_set_cfg(c, info.sync_read, info.sync_write, + latency, info.burst_len); + if (r) + goto err_release_onenand; + + if (info.sync_read || info.sync_write) + dev_info(dev, "optimized timings for %d MHz\n", freq); + } + r = mtd_device_register(&c->mtd, NULL, 0); if (r) goto err_release_onenand; @@ -558,17 +625,6 @@ err_release_onenand: err_release_dma: if (c->dma_chan) dma_release_channel(c->dma_chan); - if (c->gpio_irq) - free_irq(gpio_to_irq(c->gpio_irq), c); -err_release_gpio: - if (c->gpio_irq) - gpio_free(c->gpio_irq); -err_iounmap: - iounmap(c->onenand.base); -err_release_mem_region: - release_mem_region(c->phys_base, c->mem_size); -err_kfree: - kfree(c); return r; } @@ -581,23 +637,23 @@ static int omap2_onenand_remove(struct platform_device *pdev) if (c->dma_chan) dma_release_channel(c->dma_chan); omap2_onenand_shutdown(pdev); - if (c->gpio_irq) { - free_irq(gpio_to_irq(c->gpio_irq), c); - gpio_free(c->gpio_irq); - } - iounmap(c->onenand.base); - release_mem_region(c->phys_base, c->mem_size); - kfree(c); return 0; } +static const struct of_device_id omap2_onenand_id_table[] = { + { .compatible = "ti,omap2-onenand", }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap2_onenand_id_table); + static struct platform_driver omap2_onenand_driver = { .probe = omap2_onenand_probe, .remove = omap2_onenand_remove, .shutdown = omap2_onenand_shutdown, .driver = { .name = DRIVER_NAME, + .of_match_table = omap2_onenand_id_table, }, }; diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index edfa280c3d56..053feb41510a 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -25,15 +25,43 @@ struct gpmc_nand_ops { struct gpmc_nand_regs; +struct gpmc_onenand_info { + bool sync_read; + bool sync_write; + int burst_len; +}; + #if IS_ENABLED(CONFIG_OMAP_GPMC) struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, int cs); +/** + * gpmc_omap_onenand_set_timings - set optimized sync timings. + * @cs: Chip Select Region + * @freq: Chip frequency + * @latency: Burst latency cycle count + * @info: Structure describing parameters used + * + * Sets optimized timings for the @cs region based on @freq and @latency. + * Updates the @info structure based on the GPMC settings. + */ +int gpmc_omap_onenand_set_timings(struct device *dev, int cs, int freq, + int latency, + struct gpmc_onenand_info *info); + #else static inline struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs *regs, int cs) { return NULL; } + +static inline +int gpmc_omap_onenand_set_timings(struct device *dev, int cs, int freq, + int latency, + struct gpmc_onenand_info *info) +{ + return -EINVAL; +} #endif /* CONFIG_OMAP_GPMC */ extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, -- cgit v1.2.3 From 2514830b8b8ca966fae35103070984c2e847b2b9 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 12 Jan 2018 14:18:00 +0100 Subject: ARM: OMAP2+: Remove gpmc-onenand As OneNAND driver is now using devicetree gpmc-onenand and its platform data is unused and can be removed. Signed-off-by: Ladislav Michl Reviewed-by: Peter Ujfalusi Acked-by: Roger Quadros Acked-by: Tony Lindgren Tested-by: Tony Lindgren Tested-by: Aaro Koskinen Signed-off-by: Boris Brezillon --- arch/arm/mach-omap2/Makefile | 3 - arch/arm/mach-omap2/gpmc-onenand.c | 409 ------------------------ include/linux/platform_data/mtd-onenand-omap2.h | 34 -- 3 files changed, 446 deletions(-) delete mode 100644 arch/arm/mach-omap2/gpmc-onenand.c delete mode 100644 include/linux/platform_data/mtd-onenand-omap2.h diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 2f722a805948..c15bbcad5f67 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -232,6 +232,3 @@ obj-y += $(omap-hsmmc-m) $(omap-hsmmc-y) obj-y += omap_phy_internal.o obj-$(CONFIG_MACH_OMAP2_TUSB6010) += usb-tusb6010.o - -onenand-$(CONFIG_MTD_ONENAND_OMAP2) := gpmc-onenand.o -obj-y += $(onenand-m) $(onenand-y) diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c deleted file mode 100644 index 2944af820558..000000000000 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * linux/arch/arm/mach-omap2/gpmc-onenand.c - * - * Copyright (C) 2006 - 2009 Nokia Corporation - * Contacts: Juha Yrjola - * Tony Lindgren - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "soc.h" - -#define ONENAND_IO_SIZE SZ_128K - -#define ONENAND_FLAG_SYNCREAD (1 << 0) -#define ONENAND_FLAG_SYNCWRITE (1 << 1) -#define ONENAND_FLAG_HF (1 << 2) -#define ONENAND_FLAG_VHF (1 << 3) - -static unsigned onenand_flags; -static unsigned latency; - -static struct omap_onenand_platform_data *gpmc_onenand_data; - -static struct resource gpmc_onenand_resource = { - .flags = IORESOURCE_MEM, -}; - -static struct platform_device gpmc_onenand_device = { - .name = "omap2-onenand", - .id = -1, - .num_resources = 1, - .resource = &gpmc_onenand_resource, -}; - -static struct gpmc_settings onenand_async = { - .device_width = GPMC_DEVWIDTH_16BIT, - .mux_add_data = GPMC_MUX_AD, -}; - -static struct gpmc_settings onenand_sync = { - .burst_read = true, - .burst_wrap = true, - .burst_len = GPMC_BURST_16, - .device_width = GPMC_DEVWIDTH_16BIT, - .mux_add_data = GPMC_MUX_AD, - .wait_pin = 0, -}; - -static void omap2_onenand_calc_async_timings(struct gpmc_timings *t) -{ - struct gpmc_device_timings dev_t; - const int t_cer = 15; - const int t_avdp = 12; - const int t_aavdh = 7; - const int t_ce = 76; - const int t_aa = 76; - const int t_oe = 20; - const int t_cez = 20; /* max of t_cez, t_oez */ - const int t_wpl = 40; - const int t_wph = 30; - - memset(&dev_t, 0, sizeof(dev_t)); - - dev_t.t_avdp_r = max_t(int, t_avdp, t_cer) * 1000; - dev_t.t_avdp_w = dev_t.t_avdp_r; - dev_t.t_aavdh = t_aavdh * 1000; - dev_t.t_aa = t_aa * 1000; - dev_t.t_ce = t_ce * 1000; - dev_t.t_oe = t_oe * 1000; - dev_t.t_cez_r = t_cez * 1000; - dev_t.t_cez_w = dev_t.t_cez_r; - dev_t.t_wpl = t_wpl * 1000; - dev_t.t_wph = t_wph * 1000; - - gpmc_calc_timings(t, &onenand_async, &dev_t); -} - -static void omap2_onenand_set_async_mode(void __iomem *onenand_base) -{ - u32 reg; - - /* Ensure sync read and sync write are disabled */ - reg = readw(onenand_base + ONENAND_REG_SYS_CFG1); - reg &= ~ONENAND_SYS_CFG1_SYNC_READ & ~ONENAND_SYS_CFG1_SYNC_WRITE; - writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); -} - -static void set_onenand_cfg(void __iomem *onenand_base) -{ - u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT; - - reg |= (latency << ONENAND_SYS_CFG1_BRL_SHIFT) | - ONENAND_SYS_CFG1_BL_16; - if (onenand_flags & ONENAND_FLAG_SYNCREAD) - reg |= ONENAND_SYS_CFG1_SYNC_READ; - else - reg &= ~ONENAND_SYS_CFG1_SYNC_READ; - if (onenand_flags & ONENAND_FLAG_SYNCWRITE) - reg |= ONENAND_SYS_CFG1_SYNC_WRITE; - else - reg &= ~ONENAND_SYS_CFG1_SYNC_WRITE; - if (onenand_flags & ONENAND_FLAG_HF) - reg |= ONENAND_SYS_CFG1_HF; - else - reg &= ~ONENAND_SYS_CFG1_HF; - if (onenand_flags & ONENAND_FLAG_VHF) - reg |= ONENAND_SYS_CFG1_VHF; - else - reg &= ~ONENAND_SYS_CFG1_VHF; - - writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); -} - -static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg, - void __iomem *onenand_base) -{ - u16 ver = readw(onenand_base + ONENAND_REG_VERSION_ID); - int freq; - - switch ((ver >> 4) & 0xf) { - case 0: - freq = 40; - break; - case 1: - freq = 54; - break; - case 2: - freq = 66; - break; - case 3: - freq = 83; - break; - case 4: - freq = 104; - break; - default: - pr_err("onenand rate not detected, bad GPMC async timings?\n"); - freq = 0; - } - - return freq; -} - -static void omap2_onenand_calc_sync_timings(struct gpmc_timings *t, - unsigned int flags, - int freq) -{ - struct gpmc_device_timings dev_t; - const int t_cer = 15; - const int t_avdp = 12; - const int t_cez = 20; /* max of t_cez, t_oez */ - const int t_wpl = 40; - const int t_wph = 30; - int min_gpmc_clk_period, t_ces, t_avds, t_avdh, t_ach, t_aavdh, t_rdyo; - int div, gpmc_clk_ns; - - if (flags & ONENAND_SYNC_READ) - onenand_flags = ONENAND_FLAG_SYNCREAD; - else if (flags & ONENAND_SYNC_READWRITE) - onenand_flags = ONENAND_FLAG_SYNCREAD | ONENAND_FLAG_SYNCWRITE; - - switch (freq) { - case 104: - min_gpmc_clk_period = 9600; /* 104 MHz */ - t_ces = 3; - t_avds = 4; - t_avdh = 2; - t_ach = 3; - t_aavdh = 6; - t_rdyo = 6; - break; - case 83: - min_gpmc_clk_period = 12000; /* 83 MHz */ - t_ces = 5; - t_avds = 4; - t_avdh = 2; - t_ach = 6; - t_aavdh = 6; - t_rdyo = 9; - break; - case 66: - min_gpmc_clk_period = 15000; /* 66 MHz */ - t_ces = 6; - t_avds = 5; - t_avdh = 2; - t_ach = 6; - t_aavdh = 6; - t_rdyo = 11; - break; - default: - min_gpmc_clk_period = 18500; /* 54 MHz */ - t_ces = 7; - t_avds = 7; - t_avdh = 7; - t_ach = 9; - t_aavdh = 7; - t_rdyo = 15; - onenand_flags &= ~ONENAND_FLAG_SYNCWRITE; - break; - } - - div = gpmc_calc_divider(min_gpmc_clk_period); - gpmc_clk_ns = gpmc_ticks_to_ns(div); - if (gpmc_clk_ns < 15) /* >66MHz */ - onenand_flags |= ONENAND_FLAG_HF; - else - onenand_flags &= ~ONENAND_FLAG_HF; - if (gpmc_clk_ns < 12) /* >83MHz */ - onenand_flags |= ONENAND_FLAG_VHF; - else - onenand_flags &= ~ONENAND_FLAG_VHF; - if (onenand_flags & ONENAND_FLAG_VHF) - latency = 8; - else if (onenand_flags & ONENAND_FLAG_HF) - latency = 6; - else if (gpmc_clk_ns >= 25) /* 40 MHz*/ - latency = 3; - else - latency = 4; - - /* Set synchronous read timings */ - memset(&dev_t, 0, sizeof(dev_t)); - - if (onenand_flags & ONENAND_FLAG_SYNCREAD) - onenand_sync.sync_read = true; - if (onenand_flags & ONENAND_FLAG_SYNCWRITE) { - onenand_sync.sync_write = true; - onenand_sync.burst_write = true; - } else { - dev_t.t_avdp_w = max(t_avdp, t_cer) * 1000; - dev_t.t_wpl = t_wpl * 1000; - dev_t.t_wph = t_wph * 1000; - dev_t.t_aavdh = t_aavdh * 1000; - } - dev_t.ce_xdelay = true; - dev_t.avd_xdelay = true; - dev_t.oe_xdelay = true; - dev_t.we_xdelay = true; - dev_t.clk = min_gpmc_clk_period; - dev_t.t_bacc = dev_t.clk; - dev_t.t_ces = t_ces * 1000; - dev_t.t_avds = t_avds * 1000; - dev_t.t_avdh = t_avdh * 1000; - dev_t.t_ach = t_ach * 1000; - dev_t.cyc_iaa = (latency + 1); - dev_t.t_cez_r = t_cez * 1000; - dev_t.t_cez_w = dev_t.t_cez_r; - dev_t.cyc_aavdh_oe = 1; - dev_t.t_rdyo = t_rdyo * 1000 + min_gpmc_clk_period; - - gpmc_calc_timings(t, &onenand_sync, &dev_t); -} - -static int omap2_onenand_setup_async(void __iomem *onenand_base) -{ - struct gpmc_timings t; - int ret; - - /* - * Note that we need to keep sync_write set for the call to - * omap2_onenand_set_async_mode() to work to detect the onenand - * supported clock rate for the sync timings. - */ - if (gpmc_onenand_data->of_node) { - gpmc_read_settings_dt(gpmc_onenand_data->of_node, - &onenand_async); - if (onenand_async.sync_read || onenand_async.sync_write) { - if (onenand_async.sync_write) - gpmc_onenand_data->flags |= - ONENAND_SYNC_READWRITE; - else - gpmc_onenand_data->flags |= ONENAND_SYNC_READ; - onenand_async.sync_read = false; - } - } - - onenand_async.sync_write = true; - omap2_onenand_calc_async_timings(&t); - - ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async); - if (ret < 0) - return ret; - - ret = gpmc_cs_set_timings(gpmc_onenand_data->cs, &t, &onenand_async); - if (ret < 0) - return ret; - - omap2_onenand_set_async_mode(onenand_base); - - return 0; -} - -static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr) -{ - int ret, freq = *freq_ptr; - struct gpmc_timings t; - - if (!freq) { - /* Very first call freq is not known */ - freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base); - if (!freq) - return -ENODEV; - set_onenand_cfg(onenand_base); - } - - if (gpmc_onenand_data->of_node) { - gpmc_read_settings_dt(gpmc_onenand_data->of_node, - &onenand_sync); - } else { - /* - * FIXME: Appears to be legacy code from initial ONENAND commit. - * Unclear what boards this is for and if this can be removed. - */ - if (!cpu_is_omap34xx()) - onenand_sync.wait_on_read = true; - } - - omap2_onenand_calc_sync_timings(&t, gpmc_onenand_data->flags, freq); - - ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_sync); - if (ret < 0) - return ret; - - ret = gpmc_cs_set_timings(gpmc_onenand_data->cs, &t, &onenand_sync); - if (ret < 0) - return ret; - - set_onenand_cfg(onenand_base); - - *freq_ptr = freq; - - return 0; -} - -static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr) -{ - struct device *dev = &gpmc_onenand_device.dev; - unsigned l = ONENAND_SYNC_READ | ONENAND_SYNC_READWRITE; - int ret; - - ret = omap2_onenand_setup_async(onenand_base); - if (ret) { - dev_err(dev, "unable to set to async mode\n"); - return ret; - } - - if (!(gpmc_onenand_data->flags & l)) - return 0; - - ret = omap2_onenand_setup_sync(onenand_base, freq_ptr); - if (ret) - dev_err(dev, "unable to set to sync mode\n"); - return ret; -} - -int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) -{ - int err; - struct device *dev = &gpmc_onenand_device.dev; - - gpmc_onenand_data = _onenand_data; - gpmc_onenand_data->onenand_setup = gpmc_onenand_setup; - gpmc_onenand_device.dev.platform_data = gpmc_onenand_data; - - if (cpu_is_omap24xx() && - (gpmc_onenand_data->flags & ONENAND_SYNC_READWRITE)) { - dev_warn(dev, "OneNAND using only SYNC_READ on 24xx\n"); - gpmc_onenand_data->flags &= ~ONENAND_SYNC_READWRITE; - gpmc_onenand_data->flags |= ONENAND_SYNC_READ; - } - - if (cpu_is_omap34xx()) - gpmc_onenand_data->flags |= ONENAND_IN_OMAP34XX; - else - gpmc_onenand_data->flags &= ~ONENAND_IN_OMAP34XX; - - err = gpmc_cs_request(gpmc_onenand_data->cs, ONENAND_IO_SIZE, - (unsigned long *)&gpmc_onenand_resource.start); - if (err < 0) { - dev_err(dev, "Cannot request GPMC CS %d, error %d\n", - gpmc_onenand_data->cs, err); - return err; - } - - gpmc_onenand_resource.end = gpmc_onenand_resource.start + - ONENAND_IO_SIZE - 1; - - err = platform_device_register(&gpmc_onenand_device); - if (err) { - dev_err(dev, "Unable to register OneNAND device\n"); - gpmc_cs_free(gpmc_onenand_data->cs); - } - - return err; -} diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h deleted file mode 100644 index 56ff0e6f5ad1..000000000000 --- a/include/linux/platform_data/mtd-onenand-omap2.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2006 Nokia Corporation - * Author: Juha Yrjola - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MTD_ONENAND_OMAP2_H -#define __MTD_ONENAND_OMAP2_H - -#include -#include - -#define ONENAND_SYNC_READ (1 << 0) -#define ONENAND_SYNC_READWRITE (1 << 1) -#define ONENAND_IN_OMAP34XX (1 << 2) - -struct omap_onenand_platform_data { - int cs; - int gpio_irq; - struct mtd_partition *parts; - int nr_parts; - int (*onenand_setup)(void __iomem *, int *freq_ptr); - int dma_channel; - u8 flags; - u8 regulator_can_sleep; - u8 skip_initial_unlocking; - - /* for passing the partitions */ - struct device_node *of_node; -}; -#endif -- cgit v1.2.3 From 236d812c55c2f38665663f0af61606b1cecea825 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 13:14:52 -0300 Subject: perf trace: No need to set PERF_SAMPLE_IDENTIFIER explicitely Since 75562573bab3 ("perf tools: Add support for PERF_SAMPLE_IDENTIFIER") we don't need explicitely set PERF_SAMPLE_IDENTIFIER, as perf_evlist__config() will do this for us, i.e. when there are more than one evsel in an evlist, it will check if some evsel has a sample_type different than the one on the first evsel in the list, setting PERF_SAMPLE_IDENTIFIER in that case. So, to simplify 'perf trace' codebase, ditch that check. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-12xq6orhwttee2tdtu96ucrp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71e64bdca86f..e84816d02117 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2348,40 +2348,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__config(evlist, &trace->opts, NULL); if (callchain_param.enabled) { - bool use_identifier = false; - if (trace->syscalls.events.sys_exit) { perf_evsel__config_callchain(trace->syscalls.events.sys_exit, &trace->opts, &callchain_param); - use_identifier = true; } if (pgfault_maj) { perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); - use_identifier = true; } if (pgfault_min) { perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); - use_identifier = true; - } - - if (use_identifier) { - /* - * Now we have evsels with different sample_ids, use - * PERF_SAMPLE_IDENTIFIER to map from sample to evsel - * from a fixed position in each ring buffer record. - * - * As of this the changeset introducing this comment, this - * isn't strictly needed, as the fields that can come before - * PERF_SAMPLE_ID are all used, but we'll probably disable - * some of those for things like copying the payload of - * pointer syscall arguments, and for vfs_getname we don't - * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this - * here as a warning we need to use PERF_SAMPLE_IDENTIFIER. - */ - perf_evlist__set_sample_bit(evlist, IDENTIFIER); - perf_evlist__reset_sample_bit(evlist, ID); } } -- cgit v1.2.3 From b0076406495cd5905284d1f235db77d8ab54951e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jan 2018 16:49:00 +0100 Subject: hwmon: (pmbus/ibm-cffps) Add dependency on LEDS_CLASS Building without CONFIG_LEDS_CLASS causes a link failure: drivers/hwmon/pmbus/ibm-cffps.o: In function `ibm_cffps_probe': ibm-cffps.c:(.text+0x4f4): undefined reference to `devm_of_led_classdev_register' This adds the required dependency. Fixes: ef9e1cdf419a ("hwmon: (pmbus/cffps) Add led class device ...") Signed-off-by: Arnd Bergmann Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig index 08479006c7f9..6e4298e99222 100644 --- a/drivers/hwmon/pmbus/Kconfig +++ b/drivers/hwmon/pmbus/Kconfig @@ -39,6 +39,7 @@ config SENSORS_ADM1275 config SENSORS_IBM_CFFPS tristate "IBM Common Form Factor Power Supply" + depends on LEDS_CLASS help If you say yes here you get hardware monitoring support for the IBM Common Form Factor power supply. -- cgit v1.2.3 From 84676c1f21e8ff54befe985f4f14dc1edc10046b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Jan 2018 10:53:05 +0800 Subject: genirq/affinity: assign vectors to all possible CPUs Currently we assign managed interrupt vectors to all present CPUs. This works fine for systems were we only online/offline CPUs. But in case of systems that support physical CPU hotplug (or the virtualized version of it) this means the additional CPUs covered for in the ACPI tables or on the command line are not catered for. To fix this we'd either need to introduce new hotplug CPU states just for this case, or we can start assining vectors to possible but not present CPUs. Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Tested-by: Stefan Haberland Fixes: 4b855ad37194 ("blk-mq: Create hctx for each present CPU") Cc: linux-kernel@vger.kernel.org Cc: Thomas Gleixner Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- kernel/irq/affinity.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index e12d35108225..a37a3b4b6342 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -39,7 +39,7 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, } } -static cpumask_var_t *alloc_node_to_present_cpumask(void) +static cpumask_var_t *alloc_node_to_possible_cpumask(void) { cpumask_var_t *masks; int node; @@ -62,7 +62,7 @@ out_unwind: return NULL; } -static void free_node_to_present_cpumask(cpumask_var_t *masks) +static void free_node_to_possible_cpumask(cpumask_var_t *masks) { int node; @@ -71,22 +71,22 @@ static void free_node_to_present_cpumask(cpumask_var_t *masks) kfree(masks); } -static void build_node_to_present_cpumask(cpumask_var_t *masks) +static void build_node_to_possible_cpumask(cpumask_var_t *masks) { int cpu; - for_each_present_cpu(cpu) + for_each_possible_cpu(cpu) cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); } -static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask, +static int get_nodes_in_cpumask(cpumask_var_t *node_to_possible_cpumask, const struct cpumask *mask, nodemask_t *nodemsk) { int n, nodes = 0; /* Calculate the number of nodes in the supplied affinity mask */ for_each_node(n) { - if (cpumask_intersects(mask, node_to_present_cpumask[n])) { + if (cpumask_intersects(mask, node_to_possible_cpumask[n])) { node_set(n, *nodemsk); nodes++; } @@ -109,7 +109,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int last_affv = affv + affd->pre_vectors; nodemask_t nodemsk = NODE_MASK_NONE; struct cpumask *masks; - cpumask_var_t nmsk, *node_to_present_cpumask; + cpumask_var_t nmsk, *node_to_possible_cpumask; /* * If there aren't any vectors left after applying the pre/post @@ -125,8 +125,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (!masks) goto out; - node_to_present_cpumask = alloc_node_to_present_cpumask(); - if (!node_to_present_cpumask) + node_to_possible_cpumask = alloc_node_to_possible_cpumask(); + if (!node_to_possible_cpumask) goto out; /* Fill out vectors at the beginning that don't need affinity */ @@ -135,8 +135,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) /* Stabilize the cpumasks */ get_online_cpus(); - build_node_to_present_cpumask(node_to_present_cpumask); - nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask, + build_node_to_possible_cpumask(node_to_possible_cpumask); + nodes = get_nodes_in_cpumask(node_to_possible_cpumask, cpu_possible_mask, &nodemsk); /* @@ -146,7 +146,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (affv <= nodes) { for_each_node_mask(n, nodemsk) { cpumask_copy(masks + curvec, - node_to_present_cpumask[n]); + node_to_possible_cpumask[n]); if (++curvec == last_affv) break; } @@ -160,7 +160,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes; /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]); + cpumask_and(nmsk, cpu_possible_mask, node_to_possible_cpumask[n]); /* Calculate the number of cpus per vector */ ncpus = cpumask_weight(nmsk); @@ -192,7 +192,7 @@ done: /* Fill out vectors at the end that don't need affinity */ for (; curvec < nvecs; curvec++) cpumask_copy(masks + curvec, irq_default_affinity); - free_node_to_present_cpumask(node_to_present_cpumask); + free_node_to_possible_cpumask(node_to_possible_cpumask); out: free_cpumask_var(nmsk); return masks; @@ -214,7 +214,7 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity return 0; get_online_cpus(); - ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv; + ret = min_t(int, cpumask_weight(cpu_possible_mask), vecs) + resv; put_online_cpus(); return ret; } -- cgit v1.2.3 From 20e4d813931961fe26d26a1e98b3aba6ec00b130 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Jan 2018 10:53:06 +0800 Subject: blk-mq: simplify queue mapping & schedule with each possisble CPU The previous patch assigns interrupt vectors to all possible CPUs, so now hctx can be mapped to possible CPUs, this patch applies this fact to simplify queue mapping & schedule so that we don't need to handle CPU hotplug for dealing with physical CPU plug & unplug. With this simplication, we can work well on physical CPU plug & unplug, which is a normal use case for VM at least. Make sure we allocate blk_mq_ctx structures for all possible CPUs, and set hctx->numa_node for possible CPUs which are mapped to this hctx. And only choose the online CPUs for schedule. Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Tested-by: Stefan Haberland Cc: Thomas Gleixner Signed-off-by: Christoph Hellwig Fixes: 4b855ad37194 ("blk-mq: Create hctx for each present CPU") (merged the three into one because any single one may not work, and fix selecting online CPUs for scheduler) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index afccd0848d6f..b3b2003b7429 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -440,7 +440,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, blk_queue_exit(q); return ERR_PTR(-EXDEV); } - cpu = cpumask_first(alloc_data.hctx->cpumask); + cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask); alloc_data.ctx = __blk_mq_get_ctx(q, cpu); rq = blk_mq_get_request(q, NULL, op, &alloc_data); @@ -1324,9 +1324,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) if (--hctx->next_cpu_batch <= 0) { int next_cpu; - next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); + next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask, + cpu_online_mask); if (next_cpu >= nr_cpu_ids) - next_cpu = cpumask_first(hctx->cpumask); + next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask); hctx->next_cpu = next_cpu; hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; @@ -2220,16 +2221,11 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, INIT_LIST_HEAD(&__ctx->rq_list); __ctx->queue = q; - /* If the cpu isn't present, the cpu is mapped to first hctx */ - if (!cpu_present(i)) - continue; - - hctx = blk_mq_map_queue(q, i); - /* * Set local node, IFF we have more than one hw queue. If * not, we remain on the home node of the device */ + hctx = blk_mq_map_queue(q, i); if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) hctx->numa_node = local_memory_node(cpu_to_node(i)); } @@ -2286,7 +2282,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) * * If the cpu isn't present, the cpu is mapped to first hctx. */ - for_each_present_cpu(i) { + for_each_possible_cpu(i) { hctx_idx = q->mq_map[i]; /* unmapped hw queue can be remapped after CPU topo changed */ if (!set->tags[hctx_idx] && @@ -2340,7 +2336,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) /* * Initialize batch roundrobin counts */ - hctx->next_cpu = cpumask_first(hctx->cpumask); + hctx->next_cpu = cpumask_first_and(hctx->cpumask, + cpu_online_mask); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } } -- cgit v1.2.3 From 3bafc09e779710abaa7b836fe3bbeeeab7754c2b Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Mon, 25 Dec 2017 14:37:10 +0800 Subject: mfd: syscon: Add hardware spinlock support Some system control registers need hardware spinlock to synchronize between the multiple subsystems, so we should add hardware spinlock support for syscon. Signed-off-by: Baolin Wang Acked-by: Rob Herring Acked-by: Lee Jones Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/mfd/syscon.txt | 8 ++++++++ drivers/mfd/syscon.c | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/devicetree/bindings/mfd/syscon.txt b/Documentation/devicetree/bindings/mfd/syscon.txt index 8b92d4576c42..25d9e9c2fd53 100644 --- a/Documentation/devicetree/bindings/mfd/syscon.txt +++ b/Documentation/devicetree/bindings/mfd/syscon.txt @@ -16,9 +16,17 @@ Required properties: Optional property: - reg-io-width: the size (in bytes) of the IO accesses that should be performed on the device. +- hwlocks: reference to a phandle of a hardware spinlock provider node. Examples: gpr: iomuxc-gpr@20e0000 { compatible = "fsl,imx6q-iomuxc-gpr", "syscon"; reg = <0x020e0000 0x38>; + hwlocks = <&hwlock1 1>; +}; + +hwlock1: hwspinlock@40500000 { + ... + reg = <0x40500000 0x1000>; + #hwlock-cells = <1>; }; diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index b93fe4c4957a..7eaa40bc703f 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -87,6 +88,24 @@ static struct syscon *of_syscon_register(struct device_node *np) if (ret) reg_io_width = 4; + ret = of_hwspin_lock_get_id(np, 0); + if (ret > 0 || (IS_ENABLED(CONFIG_HWSPINLOCK) && ret == 0)) { + syscon_config.use_hwlock = true; + syscon_config.hwlock_id = ret; + syscon_config.hwlock_mode = HWLOCK_IRQSTATE; + } else if (ret < 0) { + switch (ret) { + case -ENOENT: + /* Ignore missing hwlock, it's optional. */ + break; + default: + pr_err("Failed to retrieve valid hwlock: %d\n", ret); + /* fall-through */ + case -EPROBE_DEFER: + goto err_regmap; + } + } + syscon_config.reg_stride = reg_io_width; syscon_config.val_bits = reg_io_width * 8; syscon_config.max_register = resource_size(&res) - reg_io_width; -- cgit v1.2.3 From 62635ea8c18f0f62df4cc58379e4f1d33afd5801 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 11 Jan 2018 09:53:35 +0900 Subject: workqueue: avoid hard lockups in show_workqueue_state() show_workqueue_state() can print out a lot of messages while being in atomic context, e.g. sysrq-t -> show_workqueue_state(). If the console device is slow it may end up triggering NMI hard lockup watchdog. Signed-off-by: Sergey Senozhatsky Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v4.5+ --- kernel/workqueue.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 43d18cb46308..f699122dab32 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -4463,6 +4464,12 @@ void show_workqueue_state(void) if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); spin_unlock_irqrestore(&pwq->pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } } @@ -4490,6 +4497,12 @@ void show_workqueue_state(void) pr_cont("\n"); next_pool: spin_unlock_irqrestore(&pool->lock, flags); + /* + * We could be printing a lot from atomic context, e.g. + * sysrq-t -> show_workqueue_state(). Avoid triggering + * hard lockup. + */ + touch_nmi_watchdog(); } rcu_read_unlock_sched(); -- cgit v1.2.3 From fa1195ccc0af2d121abe0fe266a1caee8c265eea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jan 2018 14:39:23 +0100 Subject: perf tools: Fix copyfile_offset update of output offset We need to increase output offset in each iteration, not decrease it as we currently do. I guess we were lucky to finish in most cases in first iteration, so the bug never showed. However it shows a lot when working with big (~4GB) size data. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 9c9f5a2f1944 ("perf tools: Introduce copyfile_offset() function") Link: http://lkml.kernel.org/r/20180109133923.25406-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index a789f952b3e9..443892dabedb 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -210,7 +210,7 @@ static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size -= ret; off_in += ret; - off_out -= ret; + off_out += ret; } munmap(ptr, off_in + size); -- cgit v1.2.3 From 1688c2fdf6e72633c7f463da3878b58eef01ba56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 16:21:04 -0300 Subject: perf evsel: Check if callchain is enabled before setting it up The construct: if (callchain_param) perf_evsel__config_callchain(evsel, opts, &callchain_param); happens in several places, so make perf_evsel__config_callchain() work just like free(NULL), do nothing if param->enabled is not set. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ykk0qzxnxwx3o611ctjnmxav@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4eea3b404507..efa2e629a669 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -651,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } -void perf_evsel__config_callchain(struct perf_evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; @@ -699,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, } } +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) +{ + if (param->enabled) + return __perf_evsel__config_callchain(evsel, opts, param); +} + static void perf_evsel__reset_callgraph(struct perf_evsel *evsel, struct callchain_param *param) -- cgit v1.2.3 From 08e26396c6f29642fecfb7ca083459264a033a89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 13:29:05 -0300 Subject: perf trace: Fix setting of --call-graph/--max-stack for non-syscall events The raw_syscalls:sys_{enter,exit} were first supported in 'perf trace', together with minor and major page faults, then we supported --call-graph, then --max-stack, but when the other tracepoints got supported, and bpf, etc, I forgot to make those global call-graph settings apply to them. Fix it by realizing that the global --max-stack and --call-graph settings are done via: OPT_CALLBACK(0, "call-graph", &trace.opts, "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), And then, when we go to parse the events in -e via: OPT_CALLBACK('e', "event", &trace, "event", "event/syscall selector. use 'perf list' to list available events", trace__parse_events_option), And trace__parse_sevents_option() calls: struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option); err = parse_events_option(&o, lists[0], 0); parse_events_option() will override the global --call-graph and --max-stack if the "call-graph" and/or "max-stack" terms are in the event definition, such as in the probe_libc:inet_pton event in one of the examples below (-e probe_libc:inet_pton/max-stack=2). Before: # perf trace --mmap 1024 --call-graph dwarf -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 1.525 ( ): probe_libc:inet_pton:(7f77f3ac9350)) PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.071 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.071/0.071/0.071/0.000 ms 1.677 ( 0.081 ms): ping/31296 sendto(fd: 3, buff: 0x55681b652720, len: 64, addr: 0x55681b650640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa97e4bc9cef] (/usr/bin/ping) [0xffffaa97e4bc656d] (/usr/bin/ping) [0xffffaa97e4bc7d0a] (/usr/bin/ping) [0xffffaa97e4bca447] (/usr/bin/ping) [0xffffaa97e4bc2f91] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa97e4bc3379] (/usr/bin/ping) # After: # perf trace --mmap 1024 --call-graph dwarf -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.089 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.089/0.089/0.089/0.000 ms 1.955 ( ): probe_libc:inet_pton:(7f383a311350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa5d91444f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa5d91445379] (/usr/bin/ping) 2.140 ( 0.101 ms): ping/32047 sendto(fd: 3, buff: 0x55a26edd0720, len: 64, addr: 0x55a26edce640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa5d9144bcef] (/usr/bin/ping) [0xffffaa5d9144856d] (/usr/bin/ping) [0xffffaa5d91449d0a] (/usr/bin/ping) [0xffffaa5d9144c447] (/usr/bin/ping) [0xffffaa5d91444f91] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa5d91445379] (/usr/bin/ping) # Same thing for --max-stack, the global one: # perf trace --max-stack 3 -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.097 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.097/0.097/0.097/0.000 ms 1.577 ( ): probe_libc:inet_pton:(7f32f3957350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) 1.738 ( 0.108 ms): ping/32103 sendto(fd: 3, buff: 0x55c3132d7720, len: 64, addr: 0x55c3132d5640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa3cecf44cef] (/usr/bin/ping) [0xffffaa3cecf4156d] (/usr/bin/ping) # And then setting up a global setting (dwarf, max-stack=4), that will affect the raw_syscall:sys_enter for the 'sendto' syscall and that will be overriden in the probe_libc:inet_pton call to just one entry. # perf trace --max-stack=4 --call-graph dwarf -e sendto -e probe_libc:inet_pton/max-stack=1/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.090 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.090/0.090/0.090/0.000 ms 2.140 ( ): probe_libc:inet_pton:(7f9fe9337350)) __GI___inet_pton (/usr/lib64/libc-2.26.so) 2.283 ( 0.103 ms): ping/31804 sendto(fd: 3, buff: 0x55c7f3e19720, len: 64, addr: 0x55c7f3e17640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa380c402cef] (/usr/bin/ping) [0xffffaa380c3ff56d] (/usr/bin/ping) [0xffffaa380c400d0a] (/usr/bin/ping) # Install iputils-debuginfo to get those /usr/bin/ping addresses resolved, those routines are not on its .dymsym nor .symtab :-) Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qgl2gse8elhh9zztw4ajopg3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e84816d02117..0362974854e9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2222,6 +2222,9 @@ static int trace__add_syscall_newtp(struct trace *trace) if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) goto out_delete_sys_exit; + perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); + perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); + perf_evlist__add(evlist, sys_enter); perf_evlist__add(evlist, sys_exit); @@ -2318,6 +2321,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); if (pgfault_maj == NULL) goto out_error_mem; + perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); perf_evlist__add(evlist, pgfault_maj); } @@ -2325,6 +2329,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); if (pgfault_min == NULL) goto out_error_mem; + perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); perf_evlist__add(evlist, pgfault_min); } @@ -2347,21 +2352,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__config(evlist, &trace->opts, NULL); - if (callchain_param.enabled) { - if (trace->syscalls.events.sys_exit) { - perf_evsel__config_callchain(trace->syscalls.events.sys_exit, - &trace->opts, &callchain_param); - } - - if (pgfault_maj) { - perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); - } - - if (pgfault_min) { - perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); - } - } - signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); -- cgit v1.2.3 From 92ae112e477ac412decc3fdd5c1eeb6c90c266b4 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 12 Jan 2018 11:42:33 +0100 Subject: spi: orion: Fix clock resource by adding an optional bus clock On Armada 7K/8K we need to explicitly enable the bus clock. The bus clock is optional because not all the SoCs need them but at least for Armada 7K/8K it is actually mandatory. The binding documentation is updating accordingly as well as mentioning the mandatory clock which was also missing. Signed-off-by: Gregory CLEMENT Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-orion.txt | 9 +++++++++ drivers/spi/spi-orion.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/Documentation/devicetree/bindings/spi/spi-orion.txt b/Documentation/devicetree/bindings/spi/spi-orion.txt index df8ec31f2f07..8434a65fc12a 100644 --- a/Documentation/devicetree/bindings/spi/spi-orion.txt +++ b/Documentation/devicetree/bindings/spi/spi-orion.txt @@ -18,8 +18,17 @@ Required properties: The eight register sets following the control registers refer to chip-select lines 0 through 7 respectively. - cell-index : Which of multiple SPI controllers is this. +- clocks : pointers to the reference clocks for this device, the first + one is the one used for the clock on the spi bus, the + second one is optional and is the clock used for the + functional part of the controller + Optional properties: - interrupts : Is currently not used. +- clock-names : names of used clocks, mandatory if the second clock is + used, the name must be "core", and "axi" (the latter + is only for Armada 7K/8K). + Example: spi@10600 { diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 8974bb340b3a..482a0cf3b7aa 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -94,6 +94,7 @@ struct orion_spi { struct spi_master *master; void __iomem *base; struct clk *clk; + struct clk *axi_clk; const struct orion_spi_dev *devdata; struct orion_direct_acc direct_access[ORION_NUM_CHIPSELECTS]; @@ -634,6 +635,14 @@ static int orion_spi_probe(struct platform_device *pdev) if (status) goto out; + /* The following clock is only used by some SoCs */ + spi->axi_clk = devm_clk_get(&pdev->dev, "axi"); + if (IS_ERR(spi->axi_clk) && + PTR_ERR(spi->axi_clk) == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (!IS_ERR(spi->axi_clk)) + clk_prepare_enable(spi->axi_clk); + tclk_hz = clk_get_rate(spi->clk); /* @@ -725,6 +734,7 @@ static int orion_spi_probe(struct platform_device *pdev) out_rel_pm: pm_runtime_disable(&pdev->dev); out_rel_clk: + clk_disable_unprepare(spi->axi_clk); clk_disable_unprepare(spi->clk); out: spi_master_put(master); @@ -738,6 +748,7 @@ static int orion_spi_remove(struct platform_device *pdev) struct orion_spi *spi = spi_master_get_devdata(master); pm_runtime_get_sync(&pdev->dev); + clk_disable_unprepare(spi->axi_clk); clk_disable_unprepare(spi->clk); spi_unregister_master(master); @@ -754,6 +765,7 @@ static int orion_spi_runtime_suspend(struct device *dev) struct spi_master *master = dev_get_drvdata(dev); struct orion_spi *spi = spi_master_get_devdata(master); + clk_disable_unprepare(spi->axi_clk); clk_disable_unprepare(spi->clk); return 0; } @@ -763,6 +775,8 @@ static int orion_spi_runtime_resume(struct device *dev) struct spi_master *master = dev_get_drvdata(dev); struct orion_spi *spi = spi_master_get_devdata(master); + if (!IS_ERR(spi->axi_clk)) + clk_prepare_enable(spi->axi_clk); return clk_prepare_enable(spi->clk); } #endif -- cgit v1.2.3 From 3c22a73fb87366851dcf48d852357a6d808921cc Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 11 Jan 2018 13:52:08 -0600 Subject: ASoC: Intel: bytcht_es8316: fix HID handling Same problem as with previous machine drivers, the codec dai uses a hard-coded name of "i2c-ESSX8316:00" but ACPI provides "i2c-ESSX8316:01" in some systems. Fix by overriding the hard-coded value with the codec name derived from the HID information Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189261 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/intel/boards/Kconfig | 1 + sound/soc/intel/boards/bytcht_es8316.c | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index de598dcbef30..d4e103615f51 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -139,6 +139,7 @@ config SND_SOC_INTEL_BYT_CHT_DA7213_MACH config SND_SOC_INTEL_BYT_CHT_ES8316_MACH tristate "Baytrail & Cherrytrail with ES8316 codec" depends on X86_INTEL_LPSS && I2C && ACPI + select SND_SOC_ACPI select SND_SOC_ES8316 help This adds support for ASoC machine driver for Intel(R) Baytrail & diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 8088396717e3..ae24f6205f05 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -232,15 +232,39 @@ static struct snd_soc_card byt_cht_es8316_card = { .fully_routed = true, }; +static char codec_name[16]; /* i2c-:00 with HID being 8 chars */ + static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) { - int ret = 0; struct byt_cht_es8316_private *priv; + struct snd_soc_acpi_mach *mach; + const char *i2c_name = NULL; + int dai_index = 0; + int i; + int ret = 0; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC); if (!priv) return -ENOMEM; + mach = (&pdev->dev)->platform_data; + /* fix index of codec dai */ + for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) { + if (!strcmp(byt_cht_es8316_dais[i].codec_name, + "i2c-ESSX8316:00")) { + dai_index = i; + break; + } + } + + /* fixup codec name based on HID */ + i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + if (i2c_name) { + snprintf(codec_name, sizeof(codec_name), + "%s%s", "i2c-", i2c_name); + byt_cht_es8316_dais[dai_index].codec_name = codec_name; + } + /* register the soc card */ byt_cht_es8316_card.dev = &pdev->dev; snd_soc_card_set_drvdata(&byt_cht_es8316_card, priv); -- cgit v1.2.3 From 2be2d57986431626e905ee344086affa44c5bb9b Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 11 Jan 2018 13:52:09 -0600 Subject: ASoC: acpi: remove hard-coded i2c-device name length Remove hard-codec [16] array size, replace with clearer description and dependency on ACPI_ID_LEN No functionality change Suggested-by: Andy Shevchenko Signed-off-by: Pierre-Louis Bossart Reviewed-by: Andy Shevchenko Acked-By: Vinod Koul Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 3 +++ sound/soc/intel/boards/bytcht_da7213.c | 2 +- sound/soc/intel/boards/bytcht_es8316.c | 2 +- sound/soc/intel/boards/bytcr_rt5640.c | 2 +- sound/soc/intel/boards/bytcr_rt5651.c | 2 +- sound/soc/intel/boards/cht_bsw_rt5645.c | 4 ++-- sound/soc/intel/boards/cht_bsw_rt5672.c | 2 +- 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index d1aaf876cd26..83320af8def2 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -27,6 +27,9 @@ struct snd_soc_acpi_package_context { bool data_valid; }; +/* codec name is used in DAIs is i2c-:00 with HID being 8 chars */ +#define SND_ACPI_I2C_ID_LEN (4 + ACPI_ID_LEN + 3 + 1) + #if IS_ENABLED(CONFIG_ACPI) /* translation fron HID to I2C name, needed for DAI codec_name */ const char *snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]); diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c index c4d82ad41bd7..6219c04d4731 100644 --- a/sound/soc/intel/boards/bytcht_da7213.c +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -219,7 +219,7 @@ static struct snd_soc_card bytcht_da7213_card = { .num_dapm_routes = ARRAY_SIZE(audio_map), }; -static char codec_name[16]; /* i2c-:00 with HID being 8 chars */ +static char codec_name[SND_ACPI_I2C_ID_LEN]; static int bytcht_da7213_probe(struct platform_device *pdev) { diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index ae24f6205f05..079f35cd4eaf 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -232,7 +232,7 @@ static struct snd_soc_card byt_cht_es8316_card = { .fully_routed = true, }; -static char codec_name[16]; /* i2c-:00 with HID being 8 chars */ +static char codec_name[SND_ACPI_I2C_ID_LEN]; static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) { diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index f2c0fc415e52..4548f75498d0 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -713,7 +713,7 @@ static struct snd_soc_card byt_rt5640_card = { .fully_routed = true, }; -static char byt_rt5640_codec_name[16]; /* i2c-:00 with HID being 8 chars */ +static char byt_rt5640_codec_name[SND_ACPI_I2C_ID_LEN]; static char byt_rt5640_codec_aif_name[12]; /* = "rt5640-aif[1|2]" */ static char byt_rt5640_cpu_dai_name[10]; /* = "ssp[0|2]-port" */ diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 22c9cc5d135e..5a6b7dedb773 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -509,7 +509,7 @@ static struct snd_soc_card byt_rt5651_card = { .fully_routed = true, }; -static char byt_rt5651_codec_name[16]; /* i2c-:00 with HID being 8 chars */ +static char byt_rt5651_codec_name[SND_ACPI_I2C_ID_LEN]; static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) { diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 18d129caa974..cef6a8c31c8d 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -49,7 +49,7 @@ struct cht_acpi_card { struct cht_mc_private { struct snd_soc_jack jack; struct cht_acpi_card *acpi_card; - char codec_name[16]; + char codec_name[SND_ACPI_I2C_ID_LEN]; struct clk *mclk; }; @@ -499,7 +499,7 @@ static struct cht_acpi_card snd_soc_cards[] = { {"10EC5650", CODEC_TYPE_RT5650, &snd_soc_card_chtrt5650}, }; -static char cht_rt5645_codec_name[16]; /* i2c-:00 with HID being 8 chars */ +static char cht_rt5645_codec_name[SND_ACPI_I2C_ID_LEN]; static char cht_rt5645_codec_aif_name[12]; /* = "rt5645-aif[1|2]" */ static char cht_rt5645_cpu_dai_name[10]; /* = "ssp[0|2]-port" */ diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c index f8f21eee9b2d..1f3d38dc4fcb 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5672.c +++ b/sound/soc/intel/boards/cht_bsw_rt5672.c @@ -35,7 +35,7 @@ struct cht_mc_private { struct snd_soc_jack headset; - char codec_name[16]; + char codec_name[SND_ACPI_I2C_ID_LEN]; struct clk *mclk; }; -- cgit v1.2.3 From ef3d687c795c82c44aef1bf25fcd4900ba60be9a Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 11 Jan 2018 16:27:04 +0000 Subject: ASoC: cs42l73: Remove trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l73.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index dde37e569ade..aebaa97490b6 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -1355,7 +1355,7 @@ static int cs42l73_i2c_probe(struct i2c_client *i2c_client, ret = regmap_read(cs42l73->regmap, CS42L73_REVID, ®); if (ret < 0) { dev_err(&i2c_client->dev, "Get Revision ID failed\n"); - return ret;; + return ret; } dev_info(&i2c_client->dev, -- cgit v1.2.3 From 3a1479599a610cd49cbf91da68963ade90ee0fa3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 12 Jan 2018 23:16:17 +0200 Subject: ASoC: Intel - Convert to use acpi_dev_get_first_match_name() Instead of home grown snd_soc_acpi_find_name_from_hid() use acpi_dev_get_first_match_name(). Tested-by: Pierre-Louis Bossart Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- include/sound/soc-acpi.h | 7 ------- sound/soc/intel/boards/bytcht_da7213.c | 2 +- sound/soc/intel/boards/bytcht_es8316.c | 2 +- sound/soc/intel/boards/bytcr_rt5640.c | 2 +- sound/soc/intel/boards/bytcr_rt5651.c | 2 +- sound/soc/intel/boards/cht_bsw_rt5645.c | 2 +- sound/soc/intel/boards/cht_bsw_rt5672.c | 2 +- sound/soc/soc-acpi.c | 33 --------------------------------- 8 files changed, 6 insertions(+), 46 deletions(-) diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 83320af8def2..082224275f52 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -31,16 +31,9 @@ struct snd_soc_acpi_package_context { #define SND_ACPI_I2C_ID_LEN (4 + ACPI_ID_LEN + 3 + 1) #if IS_ENABLED(CONFIG_ACPI) -/* translation fron HID to I2C name, needed for DAI codec_name */ -const char *snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]); bool snd_soc_acpi_find_package_from_hid(const u8 hid[ACPI_ID_LEN], struct snd_soc_acpi_package_context *ctx); #else -static inline const char * -snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]) -{ - return NULL; -} static inline bool snd_soc_acpi_find_package_from_hid(const u8 hid[ACPI_ID_LEN], struct snd_soc_acpi_package_context *ctx) diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c index 6219c04d4731..2179dedb28ad 100644 --- a/sound/soc/intel/boards/bytcht_da7213.c +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -243,7 +243,7 @@ static int bytcht_da7213_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(codec_name, sizeof(codec_name), "%s%s", "i2c-", i2c_name); diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 079f35cd4eaf..305e7f4fe55a 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -258,7 +258,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(codec_name, sizeof(codec_name), "%s%s", "i2c-", i2c_name); diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 4548f75498d0..b6a1cfeec830 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -762,7 +762,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name), "%s%s", "i2c-", i2c_name); diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index 5a6b7dedb773..456526a93dd5 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -539,7 +539,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name), "%s%s", "i2c-", i2c_name); diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index cef6a8c31c8d..976ea6bf9539 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -566,7 +566,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(cht_rt5645_codec_name, sizeof(cht_rt5645_codec_name), "%s%s", "i2c-", i2c_name); diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c index 1f3d38dc4fcb..c14a52d2f714 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5672.c +++ b/sound/soc/intel/boards/cht_bsw_rt5672.c @@ -396,7 +396,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) /* fixup codec name based on HID */ if (mach) { - i2c_name = snd_soc_acpi_find_name_from_hid(mach->id); + i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); if (i2c_name) { snprintf(drv->codec_name, sizeof(drv->codec_name), "i2c-%s", i2c_name); diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index 7f43c9bf3d09..3d7e1ff79139 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -16,39 +16,6 @@ #include -static acpi_status snd_soc_acpi_find_name(acpi_handle handle, u32 level, - void *context, void **ret) -{ - struct acpi_device *adev; - const char *name = NULL; - - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - - if (adev->status.present && adev->status.functional) { - name = acpi_dev_name(adev); - *(const char **)ret = name; - return AE_CTRL_TERMINATE; - } - - return AE_OK; -} - -const char *snd_soc_acpi_find_name_from_hid(const u8 hid[ACPI_ID_LEN]) -{ - const char *name = NULL; - acpi_status status; - - status = acpi_get_devices(hid, snd_soc_acpi_find_name, NULL, - (void **)&name); - - if (ACPI_FAILURE(status) || name[0] == '\0') - return NULL; - - return name; -} -EXPORT_SYMBOL_GPL(snd_soc_acpi_find_name_from_hid); - struct snd_soc_acpi_mach * snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines) { -- cgit v1.2.3 From 949293d45d6b0951e2dfdfd670a9c0092b10fd27 Mon Sep 17 00:00:00 2001 From: Christian Fischer Date: Fri, 12 Jan 2018 14:22:59 +0100 Subject: ASoC: mxs-sgtl5000: add audio-routing support Add dapm_widgets to machine-driver (from imx-sgtl5000). If the "audio-routing"-property is present at probing the dapm-widgets getting linked to the card. Signed-off-by: Christian Fischer Signed-off-by: Mark Brown --- .../bindings/sound/mxs-audio-sgtl5000.txt | 33 +++++++++++++++++++--- sound/soc/mxs/mxs-sgtl5000.c | 20 +++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt index 601c518eddaa..4eb980bd0287 100644 --- a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt +++ b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt @@ -1,10 +1,31 @@ * Freescale MXS audio complex with SGTL5000 codec Required properties: -- compatible: "fsl,mxs-audio-sgtl5000" -- model: The user-visible name of this sound complex -- saif-controllers: The phandle list of the MXS SAIF controller -- audio-codec: The phandle of the SGTL5000 audio codec +- compatible : "fsl,mxs-audio-sgtl5000" +- model : The user-visible name of this sound complex +- saif-controllers : The phandle list of the MXS SAIF controller +- audio-codec : The phandle of the SGTL5000 audio codec +- audio-routing : A list of the connections between audio components. + Each entry is a pair of strings, the first being the + connection's sink, the second being the connection's + source. Valid names could be power supplies, SGTL5000 + pins, and the jacks on the board: + + Power supplies: + * Mic Bias + + SGTL5000 pins: + * MIC_IN + * LINE_IN + * HP_OUT + * LINE_OUT + + Board connectors: + * Mic Jack + * Line In Jack + * Headphone Jack + * Line Out Jack + * Ext Spk Example: @@ -14,4 +35,8 @@ sound { model = "imx28-evk-sgtl5000"; saif-controllers = <&saif0 &saif1>; audio-codec = <&sgtl5000>; + audio-routing = + "MIC_IN", "Mic Jack", + "Mic Jack", "Mic Bias", + "Headphone Jack", "HP_OUT"; }; diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 2ed3240cc682..b593f76212e0 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -93,6 +93,14 @@ static struct snd_soc_dai_link mxs_sgtl5000_dai[] = { }, }; +static const struct snd_soc_dapm_widget mxs_sgtl5000_dapm_widgets[] = { + SND_SOC_DAPM_MIC("Mic Jack", NULL), + SND_SOC_DAPM_LINE("Line In Jack", NULL), + SND_SOC_DAPM_HP("Headphone Jack", NULL), + SND_SOC_DAPM_SPK("Line Out Jack", NULL), + SND_SOC_DAPM_SPK("Ext Spk", NULL), +}; + static struct snd_soc_card mxs_sgtl5000 = { .name = "mxs_sgtl5000", .owner = THIS_MODULE, @@ -141,6 +149,18 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) card->dev = &pdev->dev; + if (of_find_property(np, "audio-routing", NULL)) { + card->dapm_widgets = mxs_sgtl5000_dapm_widgets; + card->num_dapm_widgets = ARRAY_SIZE(mxs_sgtl5000_dapm_widgets); + + ret = snd_soc_of_parse_audio_routing(card, "audio-routing"); + if (ret) { + dev_err(&pdev->dev, "failed to parse audio-routing (%d)\n", + ret); + return ret; + } + } + ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", -- cgit v1.2.3 From 85ba3effc5a0836b9195f2010684062ad230cc23 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 12 Jan 2018 14:47:57 -0700 Subject: blk-mq: add missing RQF_STARTED to debugfs Looking at debug output, we see: ./000000009ddfa913/requeue_list:000000009646711c {.op=READ, .state=idle, gen=0x1 18, abort_gen=0x0, .cmd_flags=, .rq_flags=SORTED|1|SOFTBARRIER|IO_STAT, complete =0, .tag=-1, .internal_tag=217} Note the '1' between SORTED and SOFTBARRIER - that's because no name as defined for RQF_STARTED. Fixed that. Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 19db3f583bf1..fa31ceaa8de6 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -271,6 +271,7 @@ static const char *const cmd_flag_name[] = { #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name static const char *const rqf_name[] = { RQF_NAME(SORTED), + RQF_NAME(STARTED), RQF_NAME(QUEUED), RQF_NAME(SOFTBARRIER), RQF_NAME(FLUSH_SEQ), -- cgit v1.2.3 From abe6c3b06fa80777e321add05345c799a74e624a Mon Sep 17 00:00:00 2001 From: Emiliano Ingrassia Date: Fri, 12 Jan 2018 21:42:28 +0100 Subject: hwmon: (sht3x) wait predefined limits loading complete before access An sht3x sensor include limits register which contains temperature and humidity limit values. After a reset, pre-defined values are loaded into that register. During the probe function, the driver reads the limits register. However, if the reads are made too early, and the bus is clocked at high frequencies (e.g. 100 kHz or more), the loading could be not completed and the sensor returns a NACK which causes the probe to fail. A delay of at least 500 us before the first read solves this issue. Signed-off-by: Emiliano Ingrassia Signed-off-by: Guenter Roeck --- drivers/hwmon/sht3x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c index 6ea99cd6ae79..370b57dafab7 100644 --- a/drivers/hwmon/sht3x.c +++ b/drivers/hwmon/sht3x.c @@ -732,6 +732,13 @@ static int sht3x_probe(struct i2c_client *client, mutex_init(&data->i2c_lock); mutex_init(&data->data_lock); + /* + * An attempt to read limits register too early + * causes a NACK response from the chip. + * Waiting for an empirical delay of 500 us solves the issue. + */ + usleep_range(500, 600); + ret = limits_update(data); if (ret) return ret; -- cgit v1.2.3 From 0dda0b3fb255048a221f736c8a2a24c674da8bf3 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Fri, 8 Dec 2017 17:43:18 -0800 Subject: apparmor: fix ptrace label match when matching stacked labels Given a label with a profile stack of A//&B or A//&C ... A ptrace rule should be able to specify a generic trace pattern with a rule like ptrace trace A//&**, however this is failing because while the correct label match routine is called, it is being done post label decomposition so it is always being done against a profile instead of the stacked label. To fix this refactor the cross check to pass the full peer label in to the label_match. Fixes: 290f458a4f16 ("apparmor: allow ptrace checks to be finer grained than just capability") Cc: Stable Reported-by: Matthew Garrett Tested-by: Matthew Garrett Signed-off-by: John Johansen --- security/apparmor/include/perms.h | 3 +++ security/apparmor/ipc.c | 53 +++++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 2b27bb79aec4..d7b7e7115160 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -133,6 +133,9 @@ extern struct aa_perms allperms; #define xcheck_labels_profiles(L1, L2, FN, args...) \ xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args) +#define xcheck_labels(L1, L2, P, FN1, FN2) \ + xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2))) + void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask); void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask); diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 7ca0032e7ba9..b40678f3c1d5 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va) FLAGS_NONE, GFP_ATOMIC); } +/* assumes check for PROFILE_MEDIATES is already done */ /* TODO: conditionals */ static int profile_ptrace_perm(struct aa_profile *profile, - struct aa_profile *peer, u32 request, - struct common_audit_data *sa) + struct aa_label *peer, u32 request, + struct common_audit_data *sa) { struct aa_perms perms = { }; - /* need because of peer in cross check */ - if (profile_unconfined(profile) || - !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE)) - return 0; - - aad(sa)->peer = &peer->label; - aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request, + aad(sa)->peer = peer; + aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request, &perms); aa_apply_modes_to_perms(profile, &perms); return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb); } -static int cross_ptrace_perm(struct aa_profile *tracer, - struct aa_profile *tracee, u32 request, - struct common_audit_data *sa) +static int profile_tracee_perm(struct aa_profile *tracee, + struct aa_label *tracer, u32 request, + struct common_audit_data *sa) { + if (profile_unconfined(tracee) || unconfined(tracer) || + !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE)) + return 0; + + return profile_ptrace_perm(tracee, tracer, request, sa); +} + +static int profile_tracer_perm(struct aa_profile *tracer, + struct aa_label *tracee, u32 request, + struct common_audit_data *sa) +{ + if (profile_unconfined(tracer)) + return 0; + if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE)) - return xcheck(profile_ptrace_perm(tracer, tracee, request, sa), - profile_ptrace_perm(tracee, tracer, - request << PTRACE_PERM_SHIFT, - sa)); - /* policy uses the old style capability check for ptrace */ - if (profile_unconfined(tracer) || tracer == tracee) + return profile_ptrace_perm(tracer, tracee, request, sa); + + /* profile uses the old style capability check for ptrace */ + if (&tracer->label == tracee) return 0; aad(sa)->label = &tracer->label; - aad(sa)->peer = &tracee->label; + aad(sa)->peer = tracee; aad(sa)->request = 0; aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1); @@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer, int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, u32 request) { + struct aa_profile *profile; + u32 xrequest = request << PTRACE_PERM_SHIFT; DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE); - return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm, - request, &sa); + return xcheck_labels(tracer, tracee, profile, + profile_tracer_perm(profile, tracee, request, &sa), + profile_tracee_perm(profile, tracer, xrequest, &sa)); } -- cgit v1.2.3 From 1a3881d305592d947ed47887306919d50112394d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 11 Jan 2018 13:07:54 -0800 Subject: apparmor: Fix regression in profile conflict logic The intended behaviour in apparmor profile matching is to flag a conflict if two profiles match equally well. However, right now a conflict is generated if another profile has the same match length even if that profile doesn't actually match. Fix the logic so we only generate a conflict if the profiles match. Fixes: 844b8292b631 ("apparmor: ensure that undecidable profile attachments fail") Cc: Stable Signed-off-by: Matthew Garrett Signed-off-by: John Johansen --- security/apparmor/domain.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 04ba9d0718ea..6a54d2ffa840 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -330,10 +330,7 @@ static struct aa_profile *__attach_match(const char *name, continue; if (profile->xmatch) { - if (profile->xmatch_len == len) { - conflict = true; - continue; - } else if (profile->xmatch_len > len) { + if (profile->xmatch_len >= len) { unsigned int state; u32 perm; @@ -342,6 +339,10 @@ static struct aa_profile *__attach_match(const char *name, perm = dfa_user_allow(profile->xmatch, state); /* any accepting state means a valid match. */ if (perm & MAY_EXEC) { + if (profile->xmatch_len == len) { + conflict = true; + continue; + } candidate = profile; len = profile->xmatch_len; conflict = false; -- cgit v1.2.3 From 352909b49ba0d74929b96af6dfbefc854ab6ebb5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 11 Jan 2018 17:16:51 -0800 Subject: selftests/x86: Add test_vsyscall This tests that the vsyscall entries do what they're expected to do. It also confirms that attempts to read the vsyscall page behave as expected. If changes are made to the vsyscall code or its memory map handling, running this test in all three of vsyscall=none, vsyscall=emulate, and vsyscall=native are helpful. (Because it's easy, this also compares the vsyscall results to their vDSO equivalents.) Note to KAISER backporters: please test this under all three vsyscall modes. Also, in the emulate and native modes, make sure that test_vsyscall_64 agrees with the command line or config option as to which mode you're in. It's quite easy to mess up the kernel such that native mode accidentally emulates or vice versa. Greg, etc: please backport this to all your Meltdown-patched kernels. It'll help make sure the patches didn't regress vsyscalls. CSigned-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Hugh Dickins Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/2b9c5a174c1d60fd7774461d518aa75598b1d8fd.1515719552.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/test_vsyscall.c | 500 ++++++++++++++++++++++++++++ 2 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/test_vsyscall.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 7b1adeee4b0f..91fbfa8fdc15 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -7,7 +7,7 @@ include ../lib.mk TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ - protection_keys test_vdso + protection_keys test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c new file mode 100644 index 000000000000..7a744fa7b786 --- /dev/null +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define VSYS(x) (x) +#else +# define VSYS(x) 0 +#endif + +#ifndef SYS_getcpu +# ifdef __x86_64__ +# define SYS_getcpu 309 +# else +# define SYS_getcpu 318 +# endif +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +/* vsyscalls and vDSO */ +bool should_read_vsyscall = false; + +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000); +gtod_t vdso_gtod; + +typedef int (*vgettime_t)(clockid_t, struct timespec *); +vgettime_t vdso_gettime; + +typedef long (*time_func_t)(time_t *t); +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400); +time_func_t vdso_time; + +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); +getcpu_t vdso_getcpu; + +static void init_vdso(void) +{ + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + printf("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gtod) + printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); + + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_gettime) + printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); + + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); + if (!vdso_time) + printf("[WARN]\tfailed to find time in vDSO\n"); + + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); + if (!vdso_getcpu) { + /* getcpu() was never wired up in the 32-bit vDSO. */ + printf("[%s]\tfailed to find getcpu in vDSO\n", + sizeof(long) == 8 ? "WARN" : "NOTE"); + } +} + +static int init_vsys(void) +{ +#ifdef __x86_64__ + int nerrs = 0; + FILE *maps; + char line[128]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); + should_read_vsyscall = true; + return 0; + } + + while (fgets(line, sizeof(line), maps)) { + char r, x; + void *start, *end; + char name[128]; + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + printf("\tvsyscall map: %s", line); + + if (start != (void *)0xffffffffff600000 || + end != (void *)0xffffffffff601000) { + printf("[FAIL]\taddress range is nonsense\n"); + nerrs++; + } + + printf("\tvsyscall permissions are %c-%c\n", r, x); + should_read_vsyscall = (r == 'r'); + if (x != 'x') { + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("\tno vsyscall map in /proc/self/maps\n"); + should_read_vsyscall = false; + vgtod = NULL; + vtime = NULL; + vgetcpu = NULL; + } + + return nerrs; +#else + return 0; +#endif +} + +/* syscalls */ +static inline long sys_gtod(struct timeval *tv, struct timezone *tz) +{ + return syscall(SYS_gettimeofday, tv, tz); +} + +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(SYS_clock_gettime, id, ts); +} + +static inline long sys_time(time_t *t) +{ + return syscall(SYS_time, t); +} + +static inline long sys_getcpu(unsigned * cpu, unsigned * node, + void* cache) +{ + return syscall(SYS_getcpu, cpu, node, cache); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static double tv_diff(const struct timeval *a, const struct timeval *b) +{ + return (double)(a->tv_sec - b->tv_sec) + + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; +} + +static int check_gtod(const struct timeval *tv_sys1, + const struct timeval *tv_sys2, + const struct timezone *tz_sys, + const char *which, + const struct timeval *tv_other, + const struct timezone *tz_other) +{ + int nerrs = 0; + double d1, d2; + + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { + printf("[FAIL] %s tz mismatch\n", which); + nerrs++; + } + + d1 = tv_diff(tv_other, tv_sys1); + d2 = tv_diff(tv_sys2, tv_other); + printf("\t%s time offsets: %lf %lf\n", which, d1, d2); + + if (d1 < 0 || d2 < 0) { + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); + nerrs++; + } else { + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); + } + + return nerrs; +} + +static int test_gtod(void) +{ + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; + struct timezone tz_sys, tz_vdso, tz_vsys; + long ret_vdso = -1; + long ret_vsys = -1; + int nerrs = 0; + + printf("[RUN]\ttest gettimeofday()\n"); + + if (sys_gtod(&tv_sys1, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + if (vdso_gtod) + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); + if (vgtod) + ret_vsys = vgtod(&tv_vsys, &tz_vsys); + if (sys_gtod(&tv_sys2, &tz_sys) != 0) + err(1, "syscall gettimeofday"); + + if (vdso_gtod) { + if (ret_vdso == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); + } else { + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); + nerrs++; + } + } + + if (vgtod) { + if (ret_vsys == 0) { + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); + } else { + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); + nerrs++; + } + } + + return nerrs; +} + +static int test_time(void) { + int nerrs = 0; + + printf("[RUN]\ttest time()\n"); + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; + t_sys1 = sys_time(&t2_sys1); + if (vdso_time) + t_vdso = vdso_time(&t2_vdso); + if (vtime) + t_vsys = vtime(&t2_vsys); + t_sys2 = sys_time(&t2_sys2); + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); + nerrs++; + return nerrs; + } + + if (vdso_time) { + if (t_vdso < 0 || t_vdso != t2_vdso) { + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); + nerrs++; + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); + nerrs++; + } else { + printf("[OK]\tvDSO time() is okay\n"); + } + } + + if (vtime) { + if (t_vsys < 0 || t_vsys != t2_vsys) { + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); + nerrs++; + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); + nerrs++; + } else { + printf("[OK]\tvsyscall time() is okay\n"); + } + } + + return nerrs; +} + +static int test_getcpu(int cpu) +{ + int nerrs = 0; + long ret_sys, ret_vdso = -1, ret_vsys = -1; + + printf("[RUN]\tgetcpu() on CPU %d\n", cpu); + + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tfailed to force CPU %d\n", cpu); + return nerrs; + } + + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; + unsigned node = 0; + bool have_node = false; + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); + if (vdso_getcpu) + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + if (vgetcpu) + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); + + if (ret_sys == 0) { + if (cpu_sys != cpu) { + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); + nerrs++; + } + + have_node = true; + node = node_sys; + } + + if (vdso_getcpu) { + if (ret_vdso) { + printf("[FAIL]\tvDSO getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vdso; + } + + if (cpu_vdso != cpu) { + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct CPU\n"); + } + + if (node_vdso != node) { + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); + nerrs++; + } else { + printf("[OK]\tvDSO reported correct node\n"); + } + } + } + + if (vgetcpu) { + if (ret_vsys) { + printf("[FAIL]\tvsyscall getcpu() failed\n"); + nerrs++; + } else { + if (!have_node) { + have_node = true; + node = node_vsys; + } + + if (cpu_vsys != cpu) { + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct CPU\n"); + } + + if (node_vsys != node) { + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); + nerrs++; + } else { + printf("[OK]\tvsyscall reported correct node\n"); + } + } + } + + return nerrs; +} + +static int test_vsys_r(void) +{ +#ifdef __x86_64__ + printf("[RUN]\tChecking read access to the vsyscall page\n"); + bool can_read; + if (sigsetjmp(jmpbuf, 1) == 0) { + *(volatile int *)0xffffffffff600000; + can_read = true; + } else { + can_read = false; + } + + if (can_read && !should_read_vsyscall) { + printf("[FAIL]\tWe have read access, but we shouldn't\n"); + return 1; + } else if (!can_read && should_read_vsyscall) { + printf("[FAIL]\tWe don't have read access, but we should\n"); + return 1; + } else { + printf("[OK]\tgot expected result\n"); + } +#endif + + return 0; +} + + +#ifdef __x86_64__ +#define X86_EFLAGS_TF (1UL << 8) +static volatile sig_atomic_t num_vsyscall_traps; + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP]; + + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0) + num_vsyscall_traps++; +} + +static int test_native_vsyscall(void) +{ + time_t tmp; + bool is_native; + + if (!vtime) + return 0; + + printf("[RUN]\tchecking for native vsyscall\n"); + sethandler(SIGTRAP, sigtrap, 0); + set_eflags(get_eflags() | X86_EFLAGS_TF); + vtime(&tmp); + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + + /* + * If vsyscalls are emulated, we expect a single trap in the + * vsyscall page -- the call instruction will trap with RIP + * pointing to the entry point before emulation takes over. + * In native mode, we expect two traps, since whatever code + * the vsyscall page contains will be more than just a ret + * instruction. + */ + is_native = (num_vsyscall_traps > 1); + + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "native" : "emulated"), + (int)num_vsyscall_traps); + + return 0; +} +#endif + +int main(int argc, char **argv) +{ + int nerrs = 0; + + init_vdso(); + nerrs += init_vsys(); + + nerrs += test_gtod(); + nerrs += test_time(); + nerrs += test_getcpu(0); + nerrs += test_getcpu(1); + + sethandler(SIGSEGV, sigsegv, 0); + nerrs += test_vsys_r(); + +#ifdef __x86_64__ + nerrs += test_native_vsyscall(); +#endif + + return nerrs ? 1 : 0; +} -- cgit v1.2.3 From 9f15b9120f562ae0bf0a836c96f4dafb33ae426a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 12 Jan 2018 19:25:32 +0300 Subject: kdump: Write the correct address of mem_section into vmcoreinfo Depending on configuration mem_section can now be an array or a pointer to an array allocated dynamically. In most cases, we can continue to refer to it as 'mem_section' regardless of what it is. But there's one exception: '&mem_section' means "address of the array" if mem_section is an array, but if mem_section is a pointer, it would mean "address of the pointer". We've stepped onto this in the kdump code: VMCOREINFO_SYMBOL(mem_section) writes down the address of pointer into vmcoreinfo, not the array as we wanted, breaking kdump. Let's introduce VMCOREINFO_SYMBOL_ARRAY() that would handle the situation correctly for both cases. Mike Galbraith Signed-off-by: Kirill A. Shutemov Acked-by: Baoquan He Acked-by: Dave Young Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: kexec@lists.infradead.org Cc: linux-mm@kvack.org Cc: stable@vger.kernel.org Fixes: 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y") Link: http://lkml.kernel.org/r/20180112162532.35896-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/crash_core.h | 2 ++ kernel/crash_core.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 06097ef30449..b511f6d24b42 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) #define VMCOREINFO_SIZE(name) \ vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ (unsigned long)sizeof(name)) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index b3663896278e..4f63597c824d 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(contig_page_data); #endif #ifdef CONFIG_SPARSEMEM - VMCOREINFO_SYMBOL(mem_section); + VMCOREINFO_SYMBOL_ARRAY(mem_section); VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map); -- cgit v1.2.3 From 36c1681678b507346e7397a235a7303dad665fc3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 11 Jan 2018 18:28:08 +0900 Subject: genksyms: drop *.hash.c from .gitignore This is a left-over of commit bb3290d91695 ("Remove gperf usage from toolchain"). We do not generate a hash function any more. Signed-off-by: Masahiro Yamada --- scripts/genksyms/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/genksyms/.gitignore b/scripts/genksyms/.gitignore index 86dc07a01b43..e7836b47f060 100644 --- a/scripts/genksyms/.gitignore +++ b/scripts/genksyms/.gitignore @@ -1,4 +1,3 @@ -*.hash.c *.lex.c *.tab.c *.tab.h -- cgit v1.2.3 From bed6760cf2c40778a58f2e399c8947b3b3c55518 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 12 Jan 2018 16:53:07 -0800 Subject: MAINTAINERS, nilfs2: change project home URLs The domain of NILFS project home was changed to "nilfs.sourceforge.io" to enable https access (the previous domain "nilfs.sourceforge.net" is redirected to the new one). Modify URLs of the project home to reflect this change and to replace their protocol from http to https. Link: http://lkml.kernel.org/r/1515416141-5614-1-git-send-email-konishi.ryusuke@lab.ntt.co.jp Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/nilfs2.txt | 4 ++-- MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index c0727dc36271..f2f3f8592a6f 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -25,8 +25,8 @@ available from the following download page. At least "mkfs.nilfs2", cleaner or garbage collector) are required. Details on the tools are described in the man pages included in the package. -Project web page: http://nilfs.sourceforge.net/ -Download page: http://nilfs.sourceforge.net/en/download.html +Project web page: https://nilfs.sourceforge.io/ +Download page: https://nilfs.sourceforge.io/en/download.html List info: http://vger.kernel.org/vger-lists.html#linux-nilfs Caveats diff --git a/MAINTAINERS b/MAINTAINERS index d76af75a653a..18994806e441 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9638,8 +9638,8 @@ F: include/uapi/linux/sunrpc/ NILFS2 FILESYSTEM M: Ryusuke Konishi L: linux-nilfs@vger.kernel.org -W: http://nilfs.sourceforge.net/ -W: http://nilfs.osdn.jp/ +W: https://nilfs.sourceforge.io/ +W: https://nilfs.osdn.jp/ T: git git://github.com/konis/nilfs2.git S: Supported F: Documentation/filesystems/nilfs2.txt -- cgit v1.2.3 From d9570ee3bd1d4f20ce63485f5ef05663866fe6c0 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 12 Jan 2018 16:53:10 -0800 Subject: kmemleak: allow to coexist with fault injection kmemleak does one slab allocation per user allocation. So if slab fault injection is enabled to any degree, kmemleak instantly fails to allocate and turns itself off. However, it's useful to use kmemleak with fault injection to find leaks on error paths. On the other hand, checking kmemleak itself is not so useful because (1) it's a debugging tool and (2) it has a very regular allocation pattern (basically a single allocation site, so it either works or not). Turn off fault injection for kmemleak allocations. Link: http://lkml.kernel.org/r/20180109192243.19316-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d73c14294f3a..f656ca27f6c2 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -127,7 +127,7 @@ /* GFP bitmask for kmemleak internal allocations */ #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ __GFP_NORETRY | __GFP_NOMEMALLOC | \ - __GFP_NOWARN) + __GFP_NOWARN | __GFP_NOFAIL) /* scanning area inside a memory block */ struct kmemleak_scan_area { -- cgit v1.2.3 From a0b1280368d1e91ab72f849ef095b4f07a39bbf1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 12 Jan 2018 16:53:14 -0800 Subject: kdump: write correct address of mem_section into vmcoreinfo Depending on configuration mem_section can now be an array or a pointer to an array allocated dynamically. In most cases, we can continue to refer to it as 'mem_section' regardless of what it is. But there's one exception: '&mem_section' means "address of the array" if mem_section is an array, but if mem_section is a pointer, it would mean "address of the pointer". We've stepped onto this in kdump code. VMCOREINFO_SYMBOL(mem_section) writes down address of pointer into vmcoreinfo, not array as we wanted. Let's introduce VMCOREINFO_SYMBOL_ARRAY() that would handle the situation correctly for both cases. Link: http://lkml.kernel.org/r/20180112162532.35896-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Fixes: 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y") Acked-by: Baoquan He Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 2 ++ kernel/crash_core.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 06097ef30449..b511f6d24b42 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) #define VMCOREINFO_SIZE(name) \ vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ (unsigned long)sizeof(name)) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index b3663896278e..4f63597c824d 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_SYMBOL(contig_page_data); #endif #ifdef CONFIG_SPARSEMEM - VMCOREINFO_SYMBOL(mem_section); + VMCOREINFO_SYMBOL_ARRAY(mem_section); VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map); -- cgit v1.2.3 From 0f908ccbeca99ddf0ad60afa710e72aded4a5ea7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 12 Jan 2018 16:53:17 -0800 Subject: tools/objtool/Makefile: don't assume sync-check.sh is executable patch(1) loses the x bit. So if a user follows our patching instructions in Documentation/admin-guide/README.rst, their kernel will not compile. Fixes: 3bd51c5a371de ("objtool: Move kernel headers/code sync check to a script") Reported-by: Nicolas Bock Reported-by Joakim Tjernlund Cc: Ingo Molnar Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/objtool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index ae0272f9a091..e6acc281dd37 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -46,7 +46,7 @@ $(OBJTOOL_IN): fixdep FORCE @$(MAKE) $(build)=objtool $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) - @./sync-check.sh + @$(CONFIG_SHELL) ./sync-check.sh $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ -- cgit v1.2.3 From f10ee3dcc9f0aba92a5c4c064628be5200765dc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 00:23:57 +0100 Subject: x86/pti: Fix !PCID and sanitize defines The switch to the user space page tables in the low level ASM code sets unconditionally bit 12 and bit 11 of CR3. Bit 12 is switching the base address of the page directory to the user part, bit 11 is switching the PCID to the PCID associated with the user page tables. This fails on a machine which lacks PCID support because bit 11 is set in CR3. Bit 11 is reserved when PCID is inactive. While the Intel SDM claims that the reserved bits are ignored when PCID is disabled, the AMD APM states that they should be cleared. This went unnoticed as the AMD APM was not checked when the code was developed and reviewed and test systems with Intel CPUs never failed to boot. The report is against a Centos 6 host where the guest fails to boot, so it's not yet clear whether this is a virt issue or can happen on real hardware too, but thats irrelevant as the AMD APM clearly ask for clearing the reserved bits. Make sure that on non PCID machines bit 11 is not set by the page table switching code. Andy suggested to rename the related bits and masks so they are clearly describing what they should be used for, which is done as well for clarity. That split could have been done with alternatives but the macro hell is horrible and ugly. This can be done on top if someone cares to remove the extra orq. For now it's a straight forward fix. Fixes: 6fd166aae78c ("x86/mm: Use/Fix PCID to optimize user/kernel switches") Reported-by: Laura Abbott Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: stable Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Willy Tarreau Cc: David Woodhouse Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801140009150.2371@nanos --- arch/x86/entry/calling.h | 36 ++++++++++++++++++---------------- arch/x86/include/asm/processor-flags.h | 2 +- arch/x86/include/asm/tlbflush.h | 6 +++--- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 45a63e00a6af..3f48f695d5e6 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two * halves: */ -#define PTI_SWITCH_PGTABLES_MASK (1<= (1 << X86_CR3_PTI_SWITCH_BIT)); + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); /* * The ASID being passed in here should have respected the * MAX_ASID_AVAILABLE and thus never have the switch bit set. */ - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT)); + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); #endif /* * The dynamically-assigned ASIDs that get passed in are small @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid) { u16 ret = kern_pcid(asid); #ifdef CONFIG_PAGE_TABLE_ISOLATION - ret |= 1 << X86_CR3_PTI_SWITCH_BIT; + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; #endif return ret; } -- cgit v1.2.3 From a237f762681e2a394ca67f21df2feb2b76a3609b Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 12 Jan 2018 15:24:59 -0800 Subject: security/Kconfig: Correct the Documentation reference for PTI When the config option for PTI was added a reference to documentation was added as well. But the documentation did not exist at that point. The final documentation has a different file name. Fix it up to point to the proper file. Fixes: 385ce0ea ("x86/mm/pti: Add Kconfig") Signed-off-by: W. Trevor King Signed-off-by: Thomas Gleixner Cc: Dave Hansen Cc: linux-mm@kvack.org Cc: linux-security-module@vger.kernel.org Cc: James Morris Cc: "Serge E. Hallyn" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/3009cc8ccbddcd897ec1e0cb6dda524929de0d14.1515799398.git.wking@tremily.us --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index 3d4debd0257e..b0cb9a5f9448 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -63,7 +63,7 @@ config PAGE_TABLE_ISOLATION ensuring that the majority of kernel addresses are not mapped into userspace. - See Documentation/x86/pagetable-isolation.txt for more details. + See Documentation/x86/pti.txt for more details. config SECURITY_INFINIBAND bool "Infiniband Security Hooks" -- cgit v1.2.3 From 99a9dc98ba52267ce5e062b52de88ea1f1b2a7d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 14 Jan 2018 11:27:13 +0100 Subject: x86,perf: Disable intel_bts when PTI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_bts driver does not use the 'normal' BTS buffer which is exposed through the cpu_entry_area but instead uses the memory allocated for the perf AUX buffer. This obviously comes apart when using PTI because then the kernel mapping; which includes that AUX buffer memory; disappears. Fixing this requires to expose a mapping which is visible in all context and that's not trivial. As a quick fix disable this driver when PTI is enabled to prevent malfunction. Fixes: 385ce0ea4c07 ("x86/mm/pti: Add Kconfig") Reported-by: Vince Weaver Reported-by: Robert Święcki Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: greg@kroah.com Cc: hughd@google.com Cc: luto@amacapital.net Cc: Vince Weaver Cc: torvalds@linux-foundation.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180114102713.GB6166@worktop.programming.kicks-ass.net --- arch/x86/events/intel/bts.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 141e07b06216..24ffa1e88cf9 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -582,6 +582,24 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; + if (boot_cpu_has(X86_FEATURE_PTI)) { + /* + * BTS hardware writes through a virtual memory map we must + * either use the kernel physical map, or the user mapping of + * the AUX buffer. + * + * However, since this driver supports per-CPU and per-task inherit + * we cannot use the user mapping since it will not be availble + * if we're not running the owning process. + * + * With PTI we can't use the kernal map either, because its not + * there when we run userspace. + * + * For now, disable this driver when using PTI. + */ + return -ENODEV; + } + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; -- cgit v1.2.3 From da4ae6c4a0b8dee5a5377a385545d2250fa8cddb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 22 Dec 2017 00:27:54 -0500 Subject: x86/tsc: Future-proof native_calibrate_tsc() If the crystal frequency cannot be determined via CPUID(15).crystal_khz or the built-in table then native_calibrate_tsc() will still set the X86_FEATURE_TSC_KNOWN_FREQ flag which prevents the refined TSC calibration. As a consequence such systems use cpu_khz for the TSC frequency which is incorrect when cpu_khz != tsc_khz resulting in time drift. Return early when the crystal frequency cannot be retrieved without setting the X86_FEATURE_TSC_KNOWN_FREQ flag. This ensures that the refined TSC calibration is invoked. [ tglx: Steam-blastered changelog. Sigh ] Fixes: 4ca4df0b7eb0 ("x86/tsc: Mark TSC frequency determined by CPUID as known") Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: Bin Gao Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/0fe2503aa7d7fc69137141fc705541a78101d2b9.1513920414.git.len.brown@intel.com --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8ea117f8142e..ce4b71119c36 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -612,6 +612,8 @@ unsigned long native_calibrate_tsc(void) } } + if (crystal_khz == 0) + return 0; /* * TSC frequency determined by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This -- cgit v1.2.3 From b511203093489eb1829cb4de86e8214752205ac6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 22 Dec 2017 00:27:55 -0500 Subject: x86/tsc: Fix erroneous TSC rate on Skylake Xeon The INTEL_FAM6_SKYLAKE_X hardcoded crystal_khz value of 25MHZ is problematic: - SKX workstations (with same model # as server variants) use a 24 MHz crystal. This results in a -4.0% time drift rate on SKX workstations. - SKX servers subject the crystal to an EMI reduction circuit that reduces its actual frequency by (approximately) -0.25%. This results in -1 second per 10 minute time drift as compared to network time. This issue can also trigger a timer and power problem, on configurations that use the LAPIC timer (versus the TSC deadline timer). Clock ticks scheduled with the LAPIC timer arrive a few usec before the time they are expected (according to the slow TSC). This causes Linux to poll-idle, when it should be in an idle power saving state. The idle and clock code do not graciously recover from this error, sometimes resulting in significant polling and measurable power impact. Stop using native_calibrate_tsc() for INTEL_FAM6_SKYLAKE_X. native_calibrate_tsc() will return 0, boot will run with tsc_khz = cpu_khz, and the TSC refined calibration will update tsc_khz to correct for the difference. [ tglx: Sanitized change log ] Fixes: 6baf3d61821f ("x86/tsc: Add additional Intel CPU models to the crystal quirk list") Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: Prarit Bhargava Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com --- arch/x86/kernel/tsc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ce4b71119c36..3bf4df7f52d7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; -- cgit v1.2.3 From 4b5b2127238e689ee18aa6752959751dd61c4c73 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 22 Dec 2017 00:27:56 -0500 Subject: x86/tsc: Print tsc_khz, when it differs from cpu_khz If CPU and TSC frequency are the same the printout of the CPU frequency is valid for the TSC as well: tsc: Detected 2900.000 MHz processor If the TSC frequency is different there is no information in dmesg. Add a conditional printout: tsc: Detected 2904.000 MHz TSC Signed-off-by: Len Brown Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Link: https://lkml.kernel.org/r/537b342debcd8e8aebc8d631015dcdf9f9ba8a26.1513920414.git.len.brown@intel.com --- arch/x86/kernel/tsc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3bf4df7f52d7..e169e85db434 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1316,6 +1316,12 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + if (cpu_khz != tsc_khz) { + pr_info("Detected %lu.%03lu MHz TSC", + (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); + } + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ tsc_store_and_check_tsc_adjust(true); -- cgit v1.2.3 From beacd6f7ed5e2915959442245b3b2480c2e37490 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Jan 2018 14:31:35 -0600 Subject: x86/mm/pkeys: Fix fill_sig_info_pkey SEGV_PKUERR is a signal specific si_code which happens to have the same numeric value as several others: BUS_MCEERR_AR, ILL_ILLTRP, FPE_FLTOVF, TRAP_HWBKPT, CLD_TRAPPED, POLL_ERR, SEGV_THREAD_ID, as such it is not safe to just test the si_code the signal number must also be tested to prevent a false positive in fill_sig_info_pkey. This error was by inspection, and BUS_MCEERR_AR appears to be a real candidate for confusion. So pass in si_signo and check for SIG_SEGV to verify that it is actually a SEGV_PKUERR Fixes: 019132ff3daf ("x86/mm/pkeys: Fill in pkey field in siginfo") Signed-off-by: "Eric W. Biederman" Signed-off-by: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: Dave Hansen Cc: Oleg Nesterov Cc: Al Viro cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180112203135.4669-2-ebiederm@xmission.com --- arch/x86/mm/fault.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 06fe3d51d385..b3e40773dce0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } -- cgit v1.2.3 From fc90ccfd286eabb05ec54521367df8663cf0bbbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 28 Nov 2017 16:53:50 +0200 Subject: Revert "x86/apic: Remove init_bsp_APIC()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b371ae0d4a194b178817b0edfb6a7395c7aec37a. It causes boot hangs on old P3/P4 systems when the local APIC is enforced in UP mode. Reported-by: Meelis Roos Signed-off-by: Ville Syrjälä Signed-off-by: Thomas Gleixner Cc: Dou Liyang Cc: yinghai@kernel.org Cc: bhe@redhat.com Link: https://lkml.kernel.org/r/20171128145350.21560-1-ville.syrjala@linux.intel.com --- arch/x86/include/asm/apic.h | 1 + arch/x86/kernel/apic/apic.c | 49 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irqinit.c | 3 +++ 3 files changed, 53 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9e57f08bfa6..98722773391d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 880441f24146..25ddf02598d2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned int value; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) + return; + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); +} + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 8da3e909e967..a539410c4ea9 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) -- cgit v1.2.3 From b4525db6f0c6dc02ad2bde08a3bcdcf0ad7891d4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Sat, 13 Jan 2018 11:54:10 +0100 Subject: MAINTAINERS: Add entry for Marvell NAND controller driver Add entry for Marvell NAND controller driver and its bindings which will soon replace the old driver pxa3xx_nand.c. Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 37ee5ae4bae2..81c8c5162144 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8402,6 +8402,13 @@ L: linux-wireless@vger.kernel.org S: Odd Fixes F: drivers/net/wireless/marvell/mwl8k.c +MARVELL NAND CONTROLLER DRIVER +M: Miquel Raynal +L: linux-mtd@lists.infradead.org +S: Maintained +F: drivers/mtd/nand/marvell_nand.c +F: Documentation/devicetree/bindings/mtd/marvell-nand.txt + MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER M: Nicolas Pitre S: Odd Fixes -- cgit v1.2.3 From 6837befde3a6b42a36c894a4f86bafdc6db82534 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 13 Jan 2018 17:56:16 +0100 Subject: mtd: onenand: samsung: remove incorrect __iomem annotation 'page_buf' and 'oob_buf' are allocated with 'devm_kzalloc()' and should not have __iomem decoration. Remove these decorations and some useless casting. Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/samsung.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index dfdfb478ba35..2e9d076e445a 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -129,8 +129,8 @@ struct s3c_onenand { void __iomem *base; void __iomem *ahb_addr; int bootram_command; - void __iomem *page_buf; - void __iomem *oob_buf; + void *page_buf; + void *oob_buf; unsigned int (*mem_addr)(int fba, int fpa, int fsa); unsigned int (*cmd_map)(unsigned int type, unsigned int val); void __iomem *dma_addr; @@ -408,8 +408,8 @@ static int s3c_onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, /* * Emulate Two BufferRAMs and access with 4 bytes pointer */ - m = (unsigned int *) onenand->page_buf; - s = (unsigned int *) onenand->oob_buf; + m = onenand->page_buf; + s = onenand->oob_buf; if (index) { m += (this->writesize >> 2); @@ -481,11 +481,11 @@ static unsigned char *s3c_get_bufferram(struct mtd_info *mtd, int area) unsigned char *p; if (area == ONENAND_DATARAM) { - p = (unsigned char *) onenand->page_buf; + p = onenand->page_buf; if (index == 1) p += this->writesize; } else { - p = (unsigned char *) onenand->oob_buf; + p = onenand->oob_buf; if (index == 1) p += mtd->oobsize; } -- cgit v1.2.3 From 23bae78e8a57f0249ed9e3f9c3f40fc1499ce0d4 Mon Sep 17 00:00:00 2001 From: Guochun Mao Date: Mon, 18 Dec 2017 09:47:35 +0800 Subject: mtd: mtk-nor: modify functions' name more generally Since more and more Mediatek's SoC can use this driver to control spi-nor flash, functions' name with "mt8173_" is no longer properly. Replacing "mt8173_" with "mtk_" will be more accurate to describe these functions' usable scope. Signed-off-by: Guochun Mao Signed-off-by: Cyrille Pitchen --- drivers/mtd/spi-nor/mtk-quadspi.c | 240 +++++++++++++++++++------------------- 1 file changed, 120 insertions(+), 120 deletions(-) diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c index abe455ccd68b..5442993b71ff 100644 --- a/drivers/mtd/spi-nor/mtk-quadspi.c +++ b/drivers/mtd/spi-nor/mtk-quadspi.c @@ -110,7 +110,7 @@ #define MTK_NOR_PRG_REG(n) (MTK_NOR_PRGDATA0_REG + 4 * (n)) #define MTK_NOR_SHREG(n) (MTK_NOR_SHREG0_REG + 4 * (n)) -struct mt8173_nor { +struct mtk_nor { struct spi_nor nor; struct device *dev; void __iomem *base; /* nor flash base address */ @@ -118,48 +118,48 @@ struct mt8173_nor { struct clk *nor_clk; }; -static void mt8173_nor_set_read_mode(struct mt8173_nor *mt8173_nor) +static void mtk_nor_set_read_mode(struct mtk_nor *mtk_nor) { - struct spi_nor *nor = &mt8173_nor->nor; + struct spi_nor *nor = &mtk_nor->nor; switch (nor->read_proto) { case SNOR_PROTO_1_1_1: - writeb(nor->read_opcode, mt8173_nor->base + + writeb(nor->read_opcode, mtk_nor->base + MTK_NOR_PRGDATA3_REG); - writeb(MTK_NOR_FAST_READ, mt8173_nor->base + + writeb(MTK_NOR_FAST_READ, mtk_nor->base + MTK_NOR_CFG1_REG); break; case SNOR_PROTO_1_1_2: - writeb(nor->read_opcode, mt8173_nor->base + + writeb(nor->read_opcode, mtk_nor->base + MTK_NOR_PRGDATA3_REG); - writeb(MTK_NOR_DUAL_READ_EN, mt8173_nor->base + + writeb(MTK_NOR_DUAL_READ_EN, mtk_nor->base + MTK_NOR_DUAL_REG); break; case SNOR_PROTO_1_1_4: - writeb(nor->read_opcode, mt8173_nor->base + + writeb(nor->read_opcode, mtk_nor->base + MTK_NOR_PRGDATA4_REG); - writeb(MTK_NOR_QUAD_READ_EN, mt8173_nor->base + + writeb(MTK_NOR_QUAD_READ_EN, mtk_nor->base + MTK_NOR_DUAL_REG); break; default: - writeb(MTK_NOR_DUAL_DISABLE, mt8173_nor->base + + writeb(MTK_NOR_DUAL_DISABLE, mtk_nor->base + MTK_NOR_DUAL_REG); break; } } -static int mt8173_nor_execute_cmd(struct mt8173_nor *mt8173_nor, u8 cmdval) +static int mtk_nor_execute_cmd(struct mtk_nor *mtk_nor, u8 cmdval) { int reg; u8 val = cmdval & 0x1f; - writeb(cmdval, mt8173_nor->base + MTK_NOR_CMD_REG); - return readl_poll_timeout(mt8173_nor->base + MTK_NOR_CMD_REG, reg, + writeb(cmdval, mtk_nor->base + MTK_NOR_CMD_REG); + return readl_poll_timeout(mtk_nor->base + MTK_NOR_CMD_REG, reg, !(reg & val), 100, 10000); } -static int mt8173_nor_do_tx_rx(struct mt8173_nor *mt8173_nor, u8 op, - u8 *tx, int txlen, u8 *rx, int rxlen) +static int mtk_nor_do_tx_rx(struct mtk_nor *mtk_nor, u8 op, + u8 *tx, int txlen, u8 *rx, int rxlen) { int len = 1 + txlen + rxlen; int i, ret, idx; @@ -167,26 +167,26 @@ static int mt8173_nor_do_tx_rx(struct mt8173_nor *mt8173_nor, u8 op, if (len > MTK_NOR_MAX_SHIFT) return -EINVAL; - writeb(len * 8, mt8173_nor->base + MTK_NOR_CNT_REG); + writeb(len * 8, mtk_nor->base + MTK_NOR_CNT_REG); /* start at PRGDATA5, go down to PRGDATA0 */ idx = MTK_NOR_MAX_RX_TX_SHIFT - 1; /* opcode */ - writeb(op, mt8173_nor->base + MTK_NOR_PRG_REG(idx)); + writeb(op, mtk_nor->base + MTK_NOR_PRG_REG(idx)); idx--; /* program TX data */ for (i = 0; i < txlen; i++, idx--) - writeb(tx[i], mt8173_nor->base + MTK_NOR_PRG_REG(idx)); + writeb(tx[i], mtk_nor->base + MTK_NOR_PRG_REG(idx)); /* clear out rest of TX registers */ while (idx >= 0) { - writeb(0, mt8173_nor->base + MTK_NOR_PRG_REG(idx)); + writeb(0, mtk_nor->base + MTK_NOR_PRG_REG(idx)); idx--; } - ret = mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_PRG_CMD); + ret = mtk_nor_execute_cmd(mtk_nor, MTK_NOR_PRG_CMD); if (ret) return ret; @@ -195,20 +195,20 @@ static int mt8173_nor_do_tx_rx(struct mt8173_nor *mt8173_nor, u8 op, /* read out RX data */ for (i = 0; i < rxlen; i++, idx--) - rx[i] = readb(mt8173_nor->base + MTK_NOR_SHREG(idx)); + rx[i] = readb(mtk_nor->base + MTK_NOR_SHREG(idx)); return 0; } /* Do a WRSR (Write Status Register) command */ -static int mt8173_nor_wr_sr(struct mt8173_nor *mt8173_nor, u8 sr) +static int mtk_nor_wr_sr(struct mtk_nor *mtk_nor, u8 sr) { - writeb(sr, mt8173_nor->base + MTK_NOR_PRGDATA5_REG); - writeb(8, mt8173_nor->base + MTK_NOR_CNT_REG); - return mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_WRSR_CMD); + writeb(sr, mtk_nor->base + MTK_NOR_PRGDATA5_REG); + writeb(8, mtk_nor->base + MTK_NOR_CNT_REG); + return mtk_nor_execute_cmd(mtk_nor, MTK_NOR_WRSR_CMD); } -static int mt8173_nor_write_buffer_enable(struct mt8173_nor *mt8173_nor) +static int mtk_nor_write_buffer_enable(struct mtk_nor *mtk_nor) { u8 reg; @@ -216,27 +216,27 @@ static int mt8173_nor_write_buffer_enable(struct mt8173_nor *mt8173_nor) * 0: pre-fetch buffer use for read * 1: pre-fetch buffer use for page program */ - writel(MTK_NOR_WR_BUF_ENABLE, mt8173_nor->base + MTK_NOR_CFG2_REG); - return readb_poll_timeout(mt8173_nor->base + MTK_NOR_CFG2_REG, reg, + writel(MTK_NOR_WR_BUF_ENABLE, mtk_nor->base + MTK_NOR_CFG2_REG); + return readb_poll_timeout(mtk_nor->base + MTK_NOR_CFG2_REG, reg, 0x01 == (reg & 0x01), 100, 10000); } -static int mt8173_nor_write_buffer_disable(struct mt8173_nor *mt8173_nor) +static int mtk_nor_write_buffer_disable(struct mtk_nor *mtk_nor) { u8 reg; - writel(MTK_NOR_WR_BUF_DISABLE, mt8173_nor->base + MTK_NOR_CFG2_REG); - return readb_poll_timeout(mt8173_nor->base + MTK_NOR_CFG2_REG, reg, + writel(MTK_NOR_WR_BUF_DISABLE, mtk_nor->base + MTK_NOR_CFG2_REG); + return readb_poll_timeout(mtk_nor->base + MTK_NOR_CFG2_REG, reg, MTK_NOR_WR_BUF_DISABLE == (reg & 0x1), 100, 10000); } -static void mt8173_nor_set_addr_width(struct mt8173_nor *mt8173_nor) +static void mtk_nor_set_addr_width(struct mtk_nor *mtk_nor) { u8 val; - struct spi_nor *nor = &mt8173_nor->nor; + struct spi_nor *nor = &mtk_nor->nor; - val = readb(mt8173_nor->base + MTK_NOR_DUAL_REG); + val = readb(mtk_nor->base + MTK_NOR_DUAL_REG); switch (nor->addr_width) { case 3: @@ -246,115 +246,115 @@ static void mt8173_nor_set_addr_width(struct mt8173_nor *mt8173_nor) val |= MTK_NOR_4B_ADDR_EN; break; default: - dev_warn(mt8173_nor->dev, "Unexpected address width %u.\n", + dev_warn(mtk_nor->dev, "Unexpected address width %u.\n", nor->addr_width); break; } - writeb(val, mt8173_nor->base + MTK_NOR_DUAL_REG); + writeb(val, mtk_nor->base + MTK_NOR_DUAL_REG); } -static void mt8173_nor_set_addr(struct mt8173_nor *mt8173_nor, u32 addr) +static void mtk_nor_set_addr(struct mtk_nor *mtk_nor, u32 addr) { int i; - mt8173_nor_set_addr_width(mt8173_nor); + mtk_nor_set_addr_width(mtk_nor); for (i = 0; i < 3; i++) { - writeb(addr & 0xff, mt8173_nor->base + MTK_NOR_RADR0_REG + i * 4); + writeb(addr & 0xff, mtk_nor->base + MTK_NOR_RADR0_REG + i * 4); addr >>= 8; } /* Last register is non-contiguous */ - writeb(addr & 0xff, mt8173_nor->base + MTK_NOR_RADR3_REG); + writeb(addr & 0xff, mtk_nor->base + MTK_NOR_RADR3_REG); } -static ssize_t mt8173_nor_read(struct spi_nor *nor, loff_t from, size_t length, - u_char *buffer) +static ssize_t mtk_nor_read(struct spi_nor *nor, loff_t from, size_t length, + u_char *buffer) { int i, ret; int addr = (int)from; u8 *buf = (u8 *)buffer; - struct mt8173_nor *mt8173_nor = nor->priv; + struct mtk_nor *mtk_nor = nor->priv; /* set mode for fast read mode ,dual mode or quad mode */ - mt8173_nor_set_read_mode(mt8173_nor); - mt8173_nor_set_addr(mt8173_nor, addr); + mtk_nor_set_read_mode(mtk_nor); + mtk_nor_set_addr(mtk_nor, addr); for (i = 0; i < length; i++) { - ret = mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_PIO_READ_CMD); + ret = mtk_nor_execute_cmd(mtk_nor, MTK_NOR_PIO_READ_CMD); if (ret < 0) return ret; - buf[i] = readb(mt8173_nor->base + MTK_NOR_RDATA_REG); + buf[i] = readb(mtk_nor->base + MTK_NOR_RDATA_REG); } return length; } -static int mt8173_nor_write_single_byte(struct mt8173_nor *mt8173_nor, - int addr, int length, u8 *data) +static int mtk_nor_write_single_byte(struct mtk_nor *mtk_nor, + int addr, int length, u8 *data) { int i, ret; - mt8173_nor_set_addr(mt8173_nor, addr); + mtk_nor_set_addr(mtk_nor, addr); for (i = 0; i < length; i++) { - writeb(*data++, mt8173_nor->base + MTK_NOR_WDATA_REG); - ret = mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_PIO_WR_CMD); + writeb(*data++, mtk_nor->base + MTK_NOR_WDATA_REG); + ret = mtk_nor_execute_cmd(mtk_nor, MTK_NOR_PIO_WR_CMD); if (ret < 0) return ret; } return 0; } -static int mt8173_nor_write_buffer(struct mt8173_nor *mt8173_nor, int addr, - const u8 *buf) +static int mtk_nor_write_buffer(struct mtk_nor *mtk_nor, int addr, + const u8 *buf) { int i, bufidx, data; - mt8173_nor_set_addr(mt8173_nor, addr); + mtk_nor_set_addr(mtk_nor, addr); bufidx = 0; for (i = 0; i < SFLASH_WRBUF_SIZE; i += 4) { data = buf[bufidx + 3]<<24 | buf[bufidx + 2]<<16 | buf[bufidx + 1]<<8 | buf[bufidx]; bufidx += 4; - writel(data, mt8173_nor->base + MTK_NOR_PP_DATA_REG); + writel(data, mtk_nor->base + MTK_NOR_PP_DATA_REG); } - return mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_WR_CMD); + return mtk_nor_execute_cmd(mtk_nor, MTK_NOR_WR_CMD); } -static ssize_t mt8173_nor_write(struct spi_nor *nor, loff_t to, size_t len, - const u_char *buf) +static ssize_t mtk_nor_write(struct spi_nor *nor, loff_t to, size_t len, + const u_char *buf) { int ret; - struct mt8173_nor *mt8173_nor = nor->priv; + struct mtk_nor *mtk_nor = nor->priv; size_t i; - ret = mt8173_nor_write_buffer_enable(mt8173_nor); + ret = mtk_nor_write_buffer_enable(mtk_nor); if (ret < 0) { - dev_warn(mt8173_nor->dev, "write buffer enable failed!\n"); + dev_warn(mtk_nor->dev, "write buffer enable failed!\n"); return ret; } for (i = 0; i + SFLASH_WRBUF_SIZE <= len; i += SFLASH_WRBUF_SIZE) { - ret = mt8173_nor_write_buffer(mt8173_nor, to, buf); + ret = mtk_nor_write_buffer(mtk_nor, to, buf); if (ret < 0) { - dev_err(mt8173_nor->dev, "write buffer failed!\n"); + dev_err(mtk_nor->dev, "write buffer failed!\n"); return ret; } to += SFLASH_WRBUF_SIZE; buf += SFLASH_WRBUF_SIZE; } - ret = mt8173_nor_write_buffer_disable(mt8173_nor); + ret = mtk_nor_write_buffer_disable(mtk_nor); if (ret < 0) { - dev_warn(mt8173_nor->dev, "write buffer disable failed!\n"); + dev_warn(mtk_nor->dev, "write buffer disable failed!\n"); return ret; } if (i < len) { - ret = mt8173_nor_write_single_byte(mt8173_nor, to, - (int)(len - i), (u8 *)buf); + ret = mtk_nor_write_single_byte(mtk_nor, to, + (int)(len - i), (u8 *)buf); if (ret < 0) { - dev_err(mt8173_nor->dev, "write single byte failed!\n"); + dev_err(mtk_nor->dev, "write single byte failed!\n"); return ret; } } @@ -362,72 +362,72 @@ static ssize_t mt8173_nor_write(struct spi_nor *nor, loff_t to, size_t len, return len; } -static int mt8173_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) +static int mtk_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len) { int ret; - struct mt8173_nor *mt8173_nor = nor->priv; + struct mtk_nor *mtk_nor = nor->priv; switch (opcode) { case SPINOR_OP_RDSR: - ret = mt8173_nor_execute_cmd(mt8173_nor, MTK_NOR_RDSR_CMD); + ret = mtk_nor_execute_cmd(mtk_nor, MTK_NOR_RDSR_CMD); if (ret < 0) return ret; if (len == 1) - *buf = readb(mt8173_nor->base + MTK_NOR_RDSR_REG); + *buf = readb(mtk_nor->base + MTK_NOR_RDSR_REG); else - dev_err(mt8173_nor->dev, "len should be 1 for read status!\n"); + dev_err(mtk_nor->dev, "len should be 1 for read status!\n"); break; default: - ret = mt8173_nor_do_tx_rx(mt8173_nor, opcode, NULL, 0, buf, len); + ret = mtk_nor_do_tx_rx(mtk_nor, opcode, NULL, 0, buf, len); break; } return ret; } -static int mt8173_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, - int len) +static int mtk_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, + int len) { int ret; - struct mt8173_nor *mt8173_nor = nor->priv; + struct mtk_nor *mtk_nor = nor->priv; switch (opcode) { case SPINOR_OP_WRSR: /* We only handle 1 byte */ - ret = mt8173_nor_wr_sr(mt8173_nor, *buf); + ret = mtk_nor_wr_sr(mtk_nor, *buf); break; default: - ret = mt8173_nor_do_tx_rx(mt8173_nor, opcode, buf, len, NULL, 0); + ret = mtk_nor_do_tx_rx(mtk_nor, opcode, buf, len, NULL, 0); if (ret) - dev_warn(mt8173_nor->dev, "write reg failure!\n"); + dev_warn(mtk_nor->dev, "write reg failure!\n"); break; } return ret; } -static void mt8173_nor_disable_clk(struct mt8173_nor *mt8173_nor) +static void mtk_nor_disable_clk(struct mtk_nor *mtk_nor) { - clk_disable_unprepare(mt8173_nor->spi_clk); - clk_disable_unprepare(mt8173_nor->nor_clk); + clk_disable_unprepare(mtk_nor->spi_clk); + clk_disable_unprepare(mtk_nor->nor_clk); } -static int mt8173_nor_enable_clk(struct mt8173_nor *mt8173_nor) +static int mtk_nor_enable_clk(struct mtk_nor *mtk_nor) { int ret; - ret = clk_prepare_enable(mt8173_nor->spi_clk); + ret = clk_prepare_enable(mtk_nor->spi_clk); if (ret) return ret; - ret = clk_prepare_enable(mt8173_nor->nor_clk); + ret = clk_prepare_enable(mtk_nor->nor_clk); if (ret) { - clk_disable_unprepare(mt8173_nor->spi_clk); + clk_disable_unprepare(mtk_nor->spi_clk); return ret; } return 0; } -static int mtk_nor_init(struct mt8173_nor *mt8173_nor, +static int mtk_nor_init(struct mtk_nor *mtk_nor, struct device_node *flash_node) { const struct spi_nor_hwcaps hwcaps = { @@ -439,18 +439,18 @@ static int mtk_nor_init(struct mt8173_nor *mt8173_nor, struct spi_nor *nor; /* initialize controller to accept commands */ - writel(MTK_NOR_ENABLE_SF_CMD, mt8173_nor->base + MTK_NOR_WRPROT_REG); + writel(MTK_NOR_ENABLE_SF_CMD, mtk_nor->base + MTK_NOR_WRPROT_REG); - nor = &mt8173_nor->nor; - nor->dev = mt8173_nor->dev; - nor->priv = mt8173_nor; + nor = &mtk_nor->nor; + nor->dev = mtk_nor->dev; + nor->priv = mtk_nor; spi_nor_set_flash_node(nor, flash_node); /* fill the hooks to spi nor */ - nor->read = mt8173_nor_read; - nor->read_reg = mt8173_nor_read_reg; - nor->write = mt8173_nor_write; - nor->write_reg = mt8173_nor_write_reg; + nor->read = mtk_nor_read; + nor->read_reg = mtk_nor_read_reg; + nor->write = mtk_nor_write; + nor->write_reg = mtk_nor_write_reg; nor->mtd.name = "mtk_nor"; /* initialized with NULL */ ret = spi_nor_scan(nor, NULL, &hwcaps); @@ -465,34 +465,34 @@ static int mtk_nor_drv_probe(struct platform_device *pdev) struct device_node *flash_np; struct resource *res; int ret; - struct mt8173_nor *mt8173_nor; + struct mtk_nor *mtk_nor; if (!pdev->dev.of_node) { dev_err(&pdev->dev, "No DT found\n"); return -EINVAL; } - mt8173_nor = devm_kzalloc(&pdev->dev, sizeof(*mt8173_nor), GFP_KERNEL); - if (!mt8173_nor) + mtk_nor = devm_kzalloc(&pdev->dev, sizeof(*mtk_nor), GFP_KERNEL); + if (!mtk_nor) return -ENOMEM; - platform_set_drvdata(pdev, mt8173_nor); + platform_set_drvdata(pdev, mtk_nor); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mt8173_nor->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mt8173_nor->base)) - return PTR_ERR(mt8173_nor->base); + mtk_nor->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(mtk_nor->base)) + return PTR_ERR(mtk_nor->base); - mt8173_nor->spi_clk = devm_clk_get(&pdev->dev, "spi"); - if (IS_ERR(mt8173_nor->spi_clk)) - return PTR_ERR(mt8173_nor->spi_clk); + mtk_nor->spi_clk = devm_clk_get(&pdev->dev, "spi"); + if (IS_ERR(mtk_nor->spi_clk)) + return PTR_ERR(mtk_nor->spi_clk); - mt8173_nor->nor_clk = devm_clk_get(&pdev->dev, "sf"); - if (IS_ERR(mt8173_nor->nor_clk)) - return PTR_ERR(mt8173_nor->nor_clk); + mtk_nor->nor_clk = devm_clk_get(&pdev->dev, "sf"); + if (IS_ERR(mtk_nor->nor_clk)) + return PTR_ERR(mtk_nor->nor_clk); - mt8173_nor->dev = &pdev->dev; + mtk_nor->dev = &pdev->dev; - ret = mt8173_nor_enable_clk(mt8173_nor); + ret = mtk_nor_enable_clk(mtk_nor); if (ret) return ret; @@ -503,20 +503,20 @@ static int mtk_nor_drv_probe(struct platform_device *pdev) ret = -ENODEV; goto nor_free; } - ret = mtk_nor_init(mt8173_nor, flash_np); + ret = mtk_nor_init(mtk_nor, flash_np); nor_free: if (ret) - mt8173_nor_disable_clk(mt8173_nor); + mtk_nor_disable_clk(mtk_nor); return ret; } static int mtk_nor_drv_remove(struct platform_device *pdev) { - struct mt8173_nor *mt8173_nor = platform_get_drvdata(pdev); + struct mtk_nor *mtk_nor = platform_get_drvdata(pdev); - mt8173_nor_disable_clk(mt8173_nor); + mtk_nor_disable_clk(mtk_nor); return 0; } @@ -524,18 +524,18 @@ static int mtk_nor_drv_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int mtk_nor_suspend(struct device *dev) { - struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev); + struct mtk_nor *mtk_nor = dev_get_drvdata(dev); - mt8173_nor_disable_clk(mt8173_nor); + mtk_nor_disable_clk(mtk_nor); return 0; } static int mtk_nor_resume(struct device *dev) { - struct mt8173_nor *mt8173_nor = dev_get_drvdata(dev); + struct mtk_nor *mtk_nor = dev_get_drvdata(dev); - return mt8173_nor_enable_clk(mt8173_nor); + return mtk_nor_enable_clk(mtk_nor); } static const struct dev_pm_ops mtk_nor_dev_pm_ops = { -- cgit v1.2.3 From c366287ebd698ef5e3de300d90cd62ee9ee7373e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Jan 2018 17:43:23 -0800 Subject: bpf: fix divides by zero Divides by zero are not nice, lets avoid them if possible. Also do_div() seems not needed when dealing with 32bit operands, but this seems a minor detail. Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 51ec2dda7f08..7949e8b8f94e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -956,7 +956,7 @@ select_insn: DST = tmp; CONT; ALU_MOD_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; DST = do_div(tmp, (u32) SRC); @@ -975,7 +975,7 @@ select_insn: DST = div64_u64(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(SRC == 0)) + if (unlikely((u32)SRC == 0)) return 0; tmp = (u32) DST; do_div(tmp, (u32) SRC); -- cgit v1.2.3 From bf9ae8c5325c0070d0ec81a849bba8d156f65993 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Jan 2018 10:40:45 -0700 Subject: blk-mq: fix bad clear of RQF_MQ_INFLIGHT in blk_mq_ct_ctx_init() A previous commit moved the clearing of rq->rq_flags later, but we may have already set RQF_MQ_INFLIGHT when that happens. Ensure that we correctly initialize rq->rq_flags to the right value. This is based on an original fix by Ming, just rewritten to not require a conditional. Fixes: 7c3fb70f0341 ("block: rearrange a few request fields for better cache layout") Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b3b2003b7429..c8f62e6be6b6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -269,13 +269,14 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; + req_flags_t rq_flags = 0; if (data->flags & BLK_MQ_REQ_INTERNAL) { rq->tag = -1; rq->internal_tag = tag; } else { if (blk_mq_tag_busy(data->hctx)) { - rq->rq_flags = RQF_MQ_INFLIGHT; + rq_flags = RQF_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); } rq->tag = tag; @@ -286,7 +287,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, /* csd/requeue_work/fifo_time is initialized before use */ rq->q = data->q; rq->mq_ctx = data->ctx; - rq->rq_flags = 0; + rq->rq_flags = rq_flags; rq->cpu = -1; rq->cmd_flags = op; if (data->flags & BLK_MQ_REQ_PREEMPT) -- cgit v1.2.3 From c1e2f0eaf015fb7076d51a339011f2383e6dd389 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Dec 2017 13:49:39 +0100 Subject: futex: Avoid violating the 10th rule of futex Julia reported futex state corruption in the following scenario: waiter waker stealer (prio > waiter) futex(WAIT_REQUEUE_PI, uaddr, uaddr2, timeout=[N ms]) futex_wait_requeue_pi() futex_wait_queue_me() freezable_schedule() futex(LOCK_PI, uaddr2) futex(CMP_REQUEUE_PI, uaddr, uaddr2, 1, 0) /* requeues waiter to uaddr2 */ futex(UNLOCK_PI, uaddr2) wake_futex_pi() cmp_futex_value_locked(uaddr2, waiter) wake_up_q() task> futex(LOCK_PI, uaddr2) __rt_mutex_start_proxy_lock() try_to_take_rt_mutex() /* steals lock */ rt_mutex_set_owner(lock, stealer) rt_mutex_wait_proxy_lock() __rt_mutex_slowlock() try_to_take_rt_mutex() /* fails, lock held by stealer */ if (timeout && !timeout->task) return -ETIMEDOUT; fixup_owner() /* lock wasn't acquired, so, fixup_pi_state_owner skipped */ return -ETIMEDOUT; /* At this point, we've returned -ETIMEDOUT to userspace, but the * futex word shows waiter to be the owner, and the pi_mutex has * stealer as the owner */ futex_lock(LOCK_PI, uaddr2) -> bails with EDEADLK, futex word says we're owner. And suggested that what commit: 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state") removes from fixup_owner() looks to be just what is needed. And indeed it is -- I completely missed that requeue_pi could also result in this case. So we need to restore that, except that subsequent patches, like commit: 16ffa12d7425 ("futex: Pull rt_mutex_futex_unlock() out from under hb->lock") changed all the locking rules. Even without that, the sequence: - if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) { - locked = 1; - goto out; - } - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); - owner = rt_mutex_owner(&q->pi_state->pi_mutex); - if (!owner) - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); - ret = fixup_pi_state_owner(uaddr, q, owner); already suggests there were races; otherwise we'd never have to look at next_owner. So instead of doing 3 consecutive wait_lock sections with who knows what races, we do it all in a single section. Additionally, the usage of pi_state->owner in fixup_owner() was only safe because only the rt_mutex owner would modify it, which this additional case wrecks. Luckily the values can only change away and not to the value we're testing, this means we can do a speculative test and double check once we have the wait_lock. Fixes: 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state") Reported-by: Julia Cartwright Reported-by: Gratian Crisan Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Tested-by: Julia Cartwright Tested-by: Gratian Crisan Cc: Darren Hart Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171208124939.7livp7no2ov65rrc@hirez.programming.kicks-ass.net --- kernel/futex.c | 83 +++++++++++++++++++++++++++++++++-------- kernel/locking/rtmutex.c | 26 +++++++++---- kernel/locking/rtmutex_common.h | 1 + 3 files changed, 87 insertions(+), 23 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 57d0b3657e16..9e69589b9248 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2294,21 +2294,17 @@ static void unqueue_me_pi(struct futex_q *q) spin_unlock(q->lock_ptr); } -/* - * Fixup the pi_state owner with the new owner. - * - * Must be called with hash bucket lock held and mm->sem held for non - * private futexes. - */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) + struct task_struct *argowner) { - u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; u32 uval, uninitialized_var(curval), newval; - struct task_struct *oldowner; + struct task_struct *oldowner, *newowner; + u32 newtid; int ret; + lockdep_assert_held(q->lock_ptr); + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; @@ -2317,11 +2313,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, newtid |= FUTEX_OWNER_DIED; /* - * We are here either because we stole the rtmutex from the - * previous highest priority waiter or we are the highest priority - * waiter but have failed to get the rtmutex the first time. + * We are here because either: + * + * - we stole the lock and pi_state->owner needs updating to reflect + * that (@argowner == current), * - * We have to replace the newowner TID in the user space variable. + * or: + * + * - someone stole our lock and we need to fix things to point to the + * new owner (@argowner == NULL). + * + * Either way, we have to replace the TID in the user space variable. * This must be atomic as we have to preserve the owner died bit here. * * Note: We write the user space value _before_ changing the pi_state @@ -2334,6 +2336,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, * in the PID check in lookup_pi_state. */ retry: + if (!argowner) { + if (oldowner != current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + ret = 0; + goto out_unlock; + } + + if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { + /* We got the lock after all, nothing to fix. */ + ret = 0; + goto out_unlock; + } + + /* + * Since we just failed the trylock; there must be an owner. + */ + newowner = rt_mutex_owner(&pi_state->pi_mutex); + BUG_ON(!newowner); + } else { + WARN_ON_ONCE(argowner != current); + if (oldowner == current) { + /* + * We raced against a concurrent self; things are + * already fixed up. Nothing to do. + */ + ret = 0; + goto out_unlock; + } + newowner = argowner; + } + + newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; @@ -2434,15 +2472,28 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Got the lock. We might not be the anticipated owner if we * did a lock-steal - fix up the PI-state in that case: * - * We can safely read pi_state->owner without holding wait_lock - * because we now own the rt_mutex, only the owner will attempt - * to change it. + * Speculative pi_state->owner read (we don't hold wait_lock); + * since we own the lock pi_state->owner == current is the + * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) ret = fixup_pi_state_owner(uaddr, q, current); goto out; } + /* + * If we didn't get the lock; check if anybody stole it from us. In + * that case, we need to fix up the uval to point to them instead of + * us, otherwise bad things happen. [10] + * + * Another speculative read; pi_state->owner == current is unstable + * but needs our attention. + */ + if (q->pi_state->owner == current) { + ret = fixup_pi_state_owner(uaddr, q, NULL); + goto out; + } + /* * Paranoia check. If we did not take the lock, then we should not be * the owner of the rt_mutex. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6f3dba6e4e9e..65cc0cb984e6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, return ret; } +static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock) +{ + int ret = try_to_take_rt_mutex(lock, current, NULL); + + /* + * try_to_take_rt_mutex() sets the lock waiters bit + * unconditionally. Clean this up. + */ + fixup_rt_mutex_waiters(lock); + + return ret; +} + /* * Slow path try-lock function: */ @@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = try_to_take_rt_mutex(lock, current, NULL); - - /* - * try_to_take_rt_mutex() sets the lock waiters bit - * unconditionally. Clean this up. - */ - fixup_rt_mutex_waiters(lock); + ret = __rt_mutex_slowtrylock(lock); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); @@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) return rt_mutex_slowtrylock(lock); } +int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock) +{ + return __rt_mutex_slowtrylock(lock); +} + /** * rt_mutex_timed_lock - lock a rt_mutex interruptible * the timeout structure is provided diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 124e98ca0b17..68686b3ec3c1 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter); extern int rt_mutex_futex_trylock(struct rt_mutex *l); +extern int __rt_mutex_futex_trylock(struct rt_mutex *l); extern void rt_mutex_futex_unlock(struct rt_mutex *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, -- cgit v1.2.3 From fbe0e839d1e22d88810f3ee3e2f1479be4c0aa4a Mon Sep 17 00:00:00 2001 From: Li Jinyue Date: Thu, 14 Dec 2017 17:04:54 +0800 Subject: futex: Prevent overflow by strengthen input validation UBSAN reports signed integer overflow in kernel/futex.c: UBSAN: Undefined behaviour in kernel/futex.c:2041:18 signed integer overflow: 0 - -2147483648 cannot be represented in type 'int' Add a sanity check to catch negative values of nr_wake and nr_requeue. Signed-off-by: Li Jinyue Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: dvhart@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1513242294-31786-1-git-send-email-lijinyue@huawei.com --- kernel/futex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 9e69589b9248..8c5424dd5924 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); + if (nr_wake < 0 || nr_requeue < 0) + return -EINVAL; + /* * When PI not supported: return -ENOSYS if requeue_pi is true, * consequently the compiler knows requeue_pi is always false past -- cgit v1.2.3 From 7cf1aaa2ad3855bd5e95bef382a66fe122fc9b01 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Dec 2017 16:18:16 -0800 Subject: x86/timer: Don't inline __const_udelay __const_udelay is marked inline, and LTO will happily inline it everywhere Dropping the inline saves ~44k text in a LTO build. 13999560 1740864 1499136 17239560 1070e08 vmlinux-with-udelay-inline 13954764 1736768 1499136 17190668 1064f0c vmlinux-wo-udelay-inline Inlining it has no advantage in general, so its the right thing to do. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20171222001821.2157-2-andi@firstfloor.org --- arch/x86/lib/delay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 553f8fd23cc4..09c83b2f80d2 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -162,7 +162,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -inline void __const_udelay(unsigned long xloops) +void __const_udelay(unsigned long xloops) { unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; -- cgit v1.2.3 From 80a3e3949b8f3a3efa853d8752fd7ed5ec02de2d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Dec 2017 16:18:20 -0800 Subject: x86/extable: Mark exception handler functions visible Mark the C exception handler functions that are directly called through exception tables visible. LTO needs to know they are accessed from assembler. [ tglx: Mopped up the wrecked argument alignment. Sigh.... ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20171222001821.2157-6-andi@firstfloor.org --- arch/x86/mm/extable.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 9fe656c42aa5..45f5d6cf65ae 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -21,16 +21,16 @@ ex_fixup_handler(const struct exception_table_entry *x) return (ex_handler_t)((unsigned long)&x->handler + x->handler); } -bool ex_handler_default(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_default(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); return true; } EXPORT_SYMBOL(ex_handler_default); -bool ex_handler_fault(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); regs->ax = trapnr; @@ -42,8 +42,8 @@ EXPORT_SYMBOL_GPL(ex_handler_fault); * Handler for UD0 exception following a failed test against the * result of a refcount inc/dec/add/sub. */ -bool ex_handler_refcount(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { /* First unconditionally saturate the refcount. */ *(int *)regs->cx = INT_MIN / 2; @@ -95,8 +95,8 @@ EXPORT_SYMBOL(ex_handler_refcount); * of vulnerability by restoring from the initial state (essentially, zeroing * out all the FPU registers) if we can't restore from the task's FPU state. */ -bool ex_handler_fprestore(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { regs->ip = ex_fixup_addr(fixup); @@ -108,8 +108,8 @@ bool ex_handler_fprestore(const struct exception_table_entry *fixup, } EXPORT_SYMBOL_GPL(ex_handler_fprestore); -bool ex_handler_ext(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_ext(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { /* Special hack for uaccess_err */ current->thread.uaccess_err = 1; @@ -118,8 +118,8 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_ext); -bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) @@ -133,8 +133,8 @@ bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_rdmsr_unsafe); -bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, @@ -147,8 +147,8 @@ bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_wrmsr_unsafe); -bool ex_handler_clear_fs(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr) +__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr) { if (static_cpu_has(X86_BUG_NULL_SEG)) asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS)); @@ -157,7 +157,7 @@ bool ex_handler_clear_fs(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_clear_fs); -bool ex_has_fault_handler(unsigned long ip) +__visible bool ex_has_fault_handler(unsigned long ip) { const struct exception_table_entry *e; ex_handler_t handler; -- cgit v1.2.3 From 327867faa4d66628fcd92a843adb3345736a5313 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Dec 2017 16:18:21 -0800 Subject: x86/idt: Mark IDT tables __initconst const variables must use __initconst, not __initdata. Fix this up for the IDT tables, which got it consistently wrong. Fixes: 16bc18d895ce ("x86/idt: Move 32-bit idt_descr to C code") Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20171222001821.2157-7-andi@firstfloor.org --- arch/x86/kernel/idt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d985cef3984f..56d99be3706a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -56,7 +56,7 @@ struct idt_data { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_idts[] = { +static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, debug), SYSG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_32 @@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = { * the traps which use them are reinitialized with IST after cpu_init() has * set up TSS. */ -static const __initdata struct idt_data def_idts[] = { +static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, divide_error), INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, bounds), @@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = { /* * The APIC and SMP idt entries */ -static const __initdata struct idt_data apic_idts[] = { +static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_SMP INTG(RESCHEDULE_VECTOR, reschedule_interrupt), INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), @@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_pf_idts[] = { +static const __initconst struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, page_fault), }; @@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = { * Override for the debug_idt. Same as the default, but with interrupt * stack set to DEFAULT_STACK (0). Required for NMI trap handling. */ -static const __initdata struct idt_data dbg_idts[] = { +static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), INTG(X86_TRAP_BP, int3), }; @@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * The exceptions which use Interrupt stacks. They are setup after * cpu_init() when the TSS has been initialized. */ -static const __initdata struct idt_data ist_idts[] = { +static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), SISTG(X86_TRAP_BP, int3, DEBUG_STACK), -- cgit v1.2.3 From 30c7e5b123673d5e570e238dbada2fb68a87212c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Dec 2017 10:20:11 +0100 Subject: x86/tsc: Allow TSC calibration without PIT Zhang Rui reported that a Surface Pro 4 will fail to boot with lapic=notscdeadline. Part of the problem is that that machine doesn't have a PIT. If, for some reason, the TSC init has to fall back to TSC calibration, it relies on the PIT to be present. Allow TSC calibration to reliably fall back to HPET. The below results in an accurate TSC measurement when forced on a IVB: tsc: Unable to calibrate against PIT tsc: No reference (HPET/PMTIMER) available tsc: Unable to calibrate against PIT tsc: using HPET reference calibration tsc: Detected 2792.451 MHz processor Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: len.brown@intel.com Cc: rui.zhang@intel.com Link: https://lkml.kernel.org/r/20171222092243.333145937@infradead.org --- arch/x86/include/asm/i8259.h | 5 +++++ arch/x86/kernel/tsc.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index c8376b40e882..5cdcdbd4d892 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -69,6 +69,11 @@ struct legacy_pic { extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; +static inline bool has_legacy_pic(void) +{ + return legacy_pic != &null_legacy_pic; +} + static inline int nr_legacy_irqs(void) { return legacy_pic->nr_legacy_irqs; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e169e85db434..a2c9dd8bfc6f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -25,6 +25,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -363,6 +364,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) unsigned long tscmin, tscmax; int pitcnt; + if (!has_legacy_pic()) { + /* + * Relies on tsc_early_delay_calibrate() to have given us semi + * usable udelay(), wait for the same 50ms we would have with + * the PIT loop below. + */ + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + udelay(10 * USEC_PER_MSEC); + return ULONG_MAX; + } + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); @@ -487,6 +502,9 @@ static unsigned long quick_pit_calibrate(void) u64 tsc, delta; unsigned long d1, d2; + if (!has_legacy_pic()) + return 0; + /* Set the Gate high, disable speaker */ outb((inb(0x61) & ~0x02) | 0x01, 0x61); -- cgit v1.2.3 From 6d671e1b85c63e7a337ba76c1a154c091545cff8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Dec 2017 10:20:12 +0100 Subject: x86/time: Unconditionally register legacy timer interrupt Even without a PIC/PIT the legacy timer interrupt is required for HPET in legacy replacement mode. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: len.brown@intel.com Cc: rui.zhang@intel.com Link: https://lkml.kernel.org/r/20171222092243.382623763@infradead.org --- arch/x86/kernel/time.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 749d189f8cd4..774ebafa97c4 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -69,9 +69,12 @@ static struct irqaction irq0 = { static void __init setup_default_timer_irq(void) { - if (!nr_legacy_irqs()) - return; - setup_irq(0, &irq0); + /* + * Unconditionally register the legacy timer; even without legacy + * PIC/PIT we need this for the HPET0 in legacy replacement mode. + */ + if (setup_irq(0, &irq0)) + pr_info("Failed to register legacy timer interrupt\n"); } /* Default timer init function */ -- cgit v1.2.3 From aa83c45762a242acce9b35020363225a7b59d7c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Dec 2017 10:20:13 +0100 Subject: x86/tsc: Introduce early tsc clocksource Without TSC_KNOWN_FREQ the TSC clocksource is registered so late that the kernel first switches to the HPET. Using HPET on large CPU count machines is undesirable. Therefore register a tsc-early clocksource using the preliminary tsc_khz from quick calibration. Then when the final TSC calibration is done, it can switch to the tuned frequency. The only notably problem is that the real tsc clocksource must be marked with CLOCK_SOURCE_VALID_FOR_HRES, otherwise it will not be selected when unregistering tsc-early. tsc-early cannot be left registered, because then the clocksource code would fall back to it when we tsc clocksource is marked unstable later. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: len.brown@intel.com Cc: rui.zhang@intel.com Cc: Len Brown Link: https://lkml.kernel.org/r/20171222092243.431585460@infradead.org --- arch/x86/kernel/tsc.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a2c9dd8bfc6f..fb4302738410 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1006,8 +1006,6 @@ static void __init detect_art(void) /* clocksource code */ -static struct clocksource clocksource_tsc; - static void tsc_resume(struct clocksource *cs) { tsc_verify_tsc_adjust(true); @@ -1058,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs) /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ +static struct clocksource clocksource_tsc_early = { + .name = "tsc-early", + .rating = 299, + .read = read_tsc, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_MUST_VERIFY, + .archdata = { .vclock_mode = VCLOCK_TSC }, + .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, + .tick_stable = tsc_cs_tick_stable, +}; + +/* + * Must mark VALID_FOR_HRES early such that when we unregister tsc_early + * this one will immediately take over. We will only register if TSC has + * been found good. + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | + CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, @@ -1187,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work) int cpu; /* Don't bother refining TSC on unstable systems */ - if (check_tsc_unstable()) - goto out; + if (tsc_unstable) + return; /* * Since the work is started early in boot, we may be @@ -1240,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work) set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); out: + if (tsc_unstable) + return; + if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); + clocksource_unregister(&clocksource_tsc_early); } @@ -1251,13 +1272,11 @@ static int __init init_tsc_clocksource(void) if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) return 0; + if (check_tsc_unstable()) + return 0; + if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - /* lower the rating if we already know its unstable: */ - if (check_tsc_unstable()) { - clocksource_tsc.rating = 0; - clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1270,6 +1289,7 @@ static int __init init_tsc_clocksource(void) if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); + clocksource_unregister(&clocksource_tsc_early); return 0; } @@ -1374,9 +1394,12 @@ void __init tsc_init(void) check_system_tsc_reliable(); - if (unsynchronized_tsc()) + if (unsynchronized_tsc()) { mark_tsc_unstable("TSCs unsynchronized"); + return; + } + clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); } -- cgit v1.2.3 From a9445e47d897054876b8f43e46dc5a3eca2b844d Mon Sep 17 00:00:00 2001 From: "Max R. P. Grossmann" Date: Mon, 8 Jan 2018 20:01:57 +0100 Subject: posix-cpu-timers: Make set_process_cpu_timer() more robust Because the return value of cpu_timer_sample_group() is not checked, compilers and static checkers can legitimately warn about a potential use of the uninitialized variable 'now'. This is not a runtime issue as all call sites hand in valid clock ids. Also cpu_timer_sample_group() is invoked unconditionally even when the result is not used because *oldval is NULL. Make the invocation conditional and check the return value. [ tglx: Massage changelog ] Signed-off-by: Max R. P. Grossmann Signed-off-by: Thomas Gleixner Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/20180108190157.10048-1-m@max.pm --- kernel/time/posix-cpu-timers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index cef79ca5bbd5..ec9f5da6f163 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1189,9 +1189,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, u64 now; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); - cpu_timer_sample_group(clock_idx, tsk, &now); - if (oldval) { + if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) { /* * We are setting itimer. The *oldval is absolute and we update * it to be relative, *newval argument is relative and we update -- cgit v1.2.3 From 32c9c801a853f181448ed4e8730168c556f9e05a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:43 +0100 Subject: x86/apic: Install an empty physflat_init_apic_ldr As the comment already stated, there is no need for setting up LDR (and DFR) in physflat mode as it remains unused (see SDM, 10.6.2.1). flat_init_apic_ldr only served as a placeholder for a nop operation so far, causing no harm. That will change when running over the Jailhouse hypervisor. Here we must not touch LDR in a way that destroys the mapping originally set up by the Linux root cell. Jailhouse enforces this setting in order to efficiently validate any IPI requests sent by a cell. Avoid a needless clash caused by flat_init_apic_ldr by installing a true nop handler. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/f9867d294cdae4d45ed89d3a2e6adb524f4f6794.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/apic/apic_flat_64.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 25a87028cb3f..4b5547789713 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -218,6 +218,15 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +static void physflat_init_apic_ldr(void) +{ + /* + * LDR and DFR are not involved in physflat mode, rather: + * "In physical destination mode, the destination processor is + * specified by its local APIC ID [...]." (Intel SDM, 10.6.2.1) + */ +} + static void physflat_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -251,8 +260,7 @@ static struct apic apic_physflat __ro_after_init = { .dest_logical = 0, .check_apicid_used = NULL, - /* not needed, but shouldn't hurt: */ - .init_apic_ldr = flat_init_apic_ldr, + .init_apic_ldr = physflat_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, -- cgit v1.2.3 From e348caef8b4a161cc27bec8f7500b7e100370ef1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:44 +0100 Subject: x86/platform: Control warm reset setup via legacy feature flag Allow to turn off the setup of BIOS-managed warm reset via a new flag in x86_legacy_features. Besides the UV1, the upcoming jailhose guest support needs this switched off. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/44376558129d70a2c1527959811371ef4b82e829.1511770314.git.jan.kiszka@siemens.com --- arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/apic/x2apic_uv_x.c | 1 + arch/x86/kernel/platform-quirks.c | 1 + arch/x86/kernel/smpboot.c | 4 ++-- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index aa4747569e23..fc2f082ac635 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -212,6 +212,7 @@ enum x86_legacy_i8042_state { struct x86_legacy_features { enum x86_legacy_i8042_state i8042; int rtc; + int warm_reset; int no_vga; int reserve_bios_regions; struct x86_legacy_devices devices; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e1b8e8bf6b3c..6de35fc8fb3a 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -316,6 +316,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } else if (!strcmp(oem_table_id, "UVH")) { /* Only UV1 systems: */ uv_system_type = UV_NON_UNIQUE_APIC; + x86_platform.legacy.warm_reset = 0; __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift); uv_set_apicid_hibit(); uv_apic = 1; diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 39a59299bfa0..235fe6008ac8 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -9,6 +9,7 @@ void __init x86_early_init_platform_quirks(void) { x86_platform.legacy.i8042 = X86_LEGACY_I8042_EXPECTED_PRESENT; x86_platform.legacy.rtc = 1; + x86_platform.legacy.warm_reset = 1; x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 1; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed556d50d7ed..9adcae1b135c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -934,7 +934,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, * the targeted processor. */ - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + if (x86_platform.legacy.warm_reset) { pr_debug("Setting warm reset code and vector.\n"); @@ -1006,7 +1006,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, /* mark "stuck" area as not stuck */ *trampoline_status = 0; - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + if (x86_platform.legacy.warm_reset) { /* * Cleanup possible dangling ends... */ -- cgit v1.2.3 From a09c5ec00a120dae52eceef3eebff93ed729bb43 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:45 +0100 Subject: x86: Introduce and use MP IRQ trigger and polarity defines MP_IRQDIR_* constants pointed in the right direction but remained unused so far: It's cleaner to use symbolic values for the IRQ flags in the MP config table. That also saves some comments. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/60809926663a1d38e2a5db47d020d6e2e7a70019.1511770314.git.jan.kiszka@siemens.com --- arch/x86/include/asm/mpspec_def.h | 14 +++++++++++--- arch/x86/kernel/apic/io_apic.c | 20 ++++++++++---------- arch/x86/kernel/mpparse.c | 23 ++++++++++++++--------- arch/x86/platform/intel-mid/sfi.c | 5 ++--- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index a6bec8028480..6fb923a34309 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -128,9 +128,17 @@ enum mp_irq_source_types { mp_ExtINT = 3 }; -#define MP_IRQDIR_DEFAULT 0 -#define MP_IRQDIR_HIGH 1 -#define MP_IRQDIR_LOW 3 +#define MP_IRQPOL_DEFAULT 0x0 +#define MP_IRQPOL_ACTIVE_HIGH 0x1 +#define MP_IRQPOL_RESERVED 0x2 +#define MP_IRQPOL_ACTIVE_LOW 0x3 +#define MP_IRQPOL_MASK 0x3 + +#define MP_IRQTRIG_DEFAULT 0x0 +#define MP_IRQTRIG_EDGE 0x4 +#define MP_IRQTRIG_RESERVED 0x8 +#define MP_IRQTRIG_LEVEL 0xc +#define MP_IRQTRIG_MASK 0xc #define MP_APIC_ALL 0xFF diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8a7963421460..8ad2e410974f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -800,18 +800,18 @@ static int irq_polarity(int idx) /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].irqflag & 0x03) { - case 0: + switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) { + case MP_IRQPOL_DEFAULT: /* conforms to spec, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) return default_ISA_polarity(idx); else return default_PCI_polarity(idx); - case 1: + case MP_IRQPOL_ACTIVE_HIGH: return IOAPIC_POL_HIGH; - case 2: + case MP_IRQPOL_RESERVED: pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n"); - case 3: + case MP_IRQPOL_ACTIVE_LOW: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_POL_LOW; } @@ -845,8 +845,8 @@ static int irq_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].irqflag >> 2) & 0x03) { - case 0: + switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) { + case MP_IRQTRIG_DEFAULT: /* conforms to spec, ie. bus-type dependent trigger mode */ if (test_bit(bus, mp_bus_not_pci)) trigger = default_ISA_trigger(idx); @@ -854,11 +854,11 @@ static int irq_trigger(int idx) trigger = default_PCI_trigger(idx); /* Take EISA into account */ return eisa_irq_trigger(idx, bus, trigger); - case 1: + case MP_IRQTRIG_EDGE: return IOAPIC_EDGE; - case 2: + case MP_IRQTRIG_RESERVED: pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n"); - case 3: + case MP_IRQTRIG_LEVEL: default: /* Pointless default required due to do gcc stupidity */ return IOAPIC_LEVEL; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3a4b12809ab5..27d0a1712663 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -281,7 +281,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) int ELCR_fallback = 0; intsrc.type = MP_INTSRC; - intsrc.irqflag = 0; /* conforming */ + intsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT; intsrc.srcbus = 0; intsrc.dstapic = mpc_ioapic_id(0); @@ -324,10 +324,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) * copy that information over to the MP table in the * irqflag field (level sensitive, active high polarity). */ - if (ELCR_trigger(i)) - intsrc.irqflag = 13; - else - intsrc.irqflag = 0; + if (ELCR_trigger(i)) { + intsrc.irqflag = MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_HIGH; + } else { + intsrc.irqflag = MP_IRQTRIG_DEFAULT | + MP_IRQPOL_DEFAULT; + } } intsrc.srcbusirq = i; @@ -419,7 +422,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) construct_ioapic_table(mpc_default_type); lintsrc.type = MP_LINTSRC; - lintsrc.irqflag = 0; /* conforming */ + lintsrc.irqflag = MP_IRQTRIG_DEFAULT | MP_IRQPOL_DEFAULT; lintsrc.srcbusid = 0; lintsrc.srcbusirq = 0; lintsrc.destapic = MP_APIC_ALL; @@ -664,7 +667,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) if (m->irqtype != mp_INT) return 0; - if (m->irqflag != 0x0f) + if (m->irqflag != (MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW)) return 0; /* not legacy */ @@ -673,7 +676,8 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].irqflag != 0x0f) + if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_LOW)) continue; if (mp_irqs[i].srcbus != m->srcbus) @@ -784,7 +788,8 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].irqflag != 0x0f) + if (mp_irqs[i].irqflag != (MP_IRQTRIG_LEVEL | + MP_IRQPOL_ACTIVE_LOW)) continue; if (nr_m_spare > 0) { diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 19b43e3a9f0f..7be1e1fe9ae3 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -96,8 +96,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) pentry->freq_hz, pentry->irq); mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; - /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ - mp_irq.irqflag = 5; + mp_irq.irqflag = MP_IRQTRIG_EDGE | MP_IRQPOL_ACTIVE_HIGH; mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; @@ -168,7 +167,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) totallen, (u32)pentry->phys_addr, pentry->irq); mp_irq.type = MP_INTSRC; mp_irq.irqtype = mp_INT; - mp_irq.irqflag = 0xf; /* level trigger and active low */ + mp_irq.irqflag = MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW; mp_irq.srcbus = MP_BUS_ISA; mp_irq.srcbusirq = pentry->irq; /* IRQ */ mp_irq.dstapic = MP_APIC_ALL; -- cgit v1.2.3 From 4a362601baa6fff92b576d85199f1948cec2fb3b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:46 +0100 Subject: x86/jailhouse: Add infrastructure for running in non-root cell The Jailhouse hypervisor is able to statically partition a multicore system into multiple so-called cells. Linux is used as boot loader and continues to run in the root cell after Jailhouse is enabled. Linux can also run in non-root cells. Jailhouse does not emulate usual x86 devices. It also provides no complex ACPI but basic platform information that the boot loader forwards via setup data. This adds the infrastructure to detect when running in a non-root cell so that the platform can be configured as required in succeeding steps. Support is limited to x86-64 so far, primarily because no boot loader stub exists for i386 and, thus, we wouldn't be able to test the 32-bit path. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/7f823d077b38b1a70c526b40b403f85688c137d3.1511770314.git.jan.kiszka@siemens.com --- arch/x86/Kconfig | 8 ++++ arch/x86/include/asm/hypervisor.h | 1 + arch/x86/include/asm/jailhouse_para.h | 26 +++++++++++++ arch/x86/include/uapi/asm/bootparam.h | 22 +++++++++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/cpu/hypervisor.c | 4 ++ arch/x86/kernel/jailhouse.c | 73 +++++++++++++++++++++++++++++++++++ 7 files changed, 136 insertions(+) create mode 100644 arch/x86/include/asm/jailhouse_para.h create mode 100644 arch/x86/kernel/jailhouse.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff4e9cd99854..fbea8d15fcfb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -796,6 +796,14 @@ config PARAVIRT_TIME_ACCOUNTING config PARAVIRT_CLOCK bool +config JAILHOUSE_GUEST + bool "Jailhouse non-root cell support" + depends on X86_64 + ---help--- + This option allows to run Linux as guest in a Jailhouse non-root + cell. You can leave this option disabled if you only want to start + Jailhouse and run Linux afterwards in the root cell. + endif #HYPERVISOR_GUEST config NO_BOOTMEM diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 96aa6b9884dc..8c5aaba6633f 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -28,6 +28,7 @@ enum x86_hypervisor_type { X86_HYPER_XEN_PV, X86_HYPER_XEN_HVM, X86_HYPER_KVM, + X86_HYPER_JAILHOUSE, }; #ifdef CONFIG_HYPERVISOR_GUEST diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h new file mode 100644 index 000000000000..875b54376689 --- /dev/null +++ b/arch/x86/include/asm/jailhouse_para.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL2.0 */ + +/* + * Jailhouse paravirt_ops implementation + * + * Copyright (c) Siemens AG, 2015-2017 + * + * Authors: + * Jan Kiszka + */ + +#ifndef _ASM_X86_JAILHOUSE_PARA_H +#define _ASM_X86_JAILHOUSE_PARA_H + +#include + +#ifdef CONFIG_JAILHOUSE_GUEST +bool jailhouse_paravirt(void); +#else +static inline bool jailhouse_paravirt(void) +{ + return false; +} +#endif + +#endif /* _ASM_X86_JAILHOUSE_PARA_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index afdd5ae0fcc4..aebf60357758 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -9,6 +9,7 @@ #define SETUP_PCI 3 #define SETUP_EFI 4 #define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -126,6 +127,27 @@ struct boot_e820_entry { __u32 type; } __attribute__((packed)); +/* + * Smallest compatible version of jailhouse_setup_data required by this kernel. + */ +#define JAILHOUSE_SETUP_REQUIRED_VERSION 1 + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + u16 version; + u16 compatible_version; + u16 pm_timer_address; + u16 num_cpus; + u64 pci_mmconfig_base; + u32 tsc_khz; + u32 apic_khz; + u8 standard_ioapic; + u8 cpu_ids[255]; +} __attribute__((packed)); + /* The so-called "zeropage" */ struct boot_params { struct screen_info screen_info; /* 0x000 */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 81bb565f4497..aed9296dccd3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -112,6 +112,8 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o +obj-$(CONFIG_JAILHOUSE_GUEST) += jailhouse.o + obj-$(CONFIG_EISA) += eisa.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index bea8d3e24f50..479ca4728de0 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -31,6 +31,7 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_pv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; +extern const struct hypervisor_x86 x86_hyper_jailhouse; static const __initconst struct hypervisor_x86 * const hypervisors[] = { @@ -45,6 +46,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #ifdef CONFIG_KVM_GUEST &x86_hyper_kvm, #endif +#ifdef CONFIG_JAILHOUSE_GUEST + &x86_hyper_jailhouse, +#endif }; enum x86_hypervisor_type x86_hyper_type; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c new file mode 100644 index 000000000000..1186b8909595 --- /dev/null +++ b/arch/x86/kernel/jailhouse.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL2.0 +/* + * Jailhouse paravirt_ops implementation + * + * Copyright (c) Siemens AG, 2015-2017 + * + * Authors: + * Jan Kiszka + */ + +#include +#include +#include +#include + +static __initdata struct jailhouse_setup_data setup_data; + +static uint32_t jailhouse_cpuid_base(void) +{ + if (boot_cpu_data.cpuid_level < 0 || + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return 0; + + return hypervisor_cpuid_base("Jailhouse\0\0\0", 0); +} + +static uint32_t __init jailhouse_detect(void) +{ + return jailhouse_cpuid_base(); +} + +static void __init jailhouse_init_platform(void) +{ + u64 pa_data = boot_params.hdr.setup_data; + struct setup_data header; + void *mapping; + + while (pa_data) { + mapping = early_memremap(pa_data, sizeof(header)); + memcpy(&header, mapping, sizeof(header)); + early_memunmap(mapping, sizeof(header)); + + if (header.type == SETUP_JAILHOUSE && + header.len >= sizeof(setup_data)) { + pa_data += offsetof(struct setup_data, data); + + mapping = early_memremap(pa_data, sizeof(setup_data)); + memcpy(&setup_data, mapping, sizeof(setup_data)); + early_memunmap(mapping, sizeof(setup_data)); + + break; + } + + pa_data = header.next; + } + + if (!pa_data) + panic("Jailhouse: No valid setup data found"); + + if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION) + panic("Jailhouse: Unsupported setup data structure"); +} + +bool jailhouse_paravirt(void) +{ + return jailhouse_cpuid_base() != 0; +} + +const struct hypervisor_x86 x86_hyper_jailhouse __refconst = { + .name = "Jailhouse", + .detect = jailhouse_detect, + .init.init_platform = jailhouse_init_platform, +}; -- cgit v1.2.3 From 11c8dc419bbc7b5acef812043feefc53c45ef558 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:47 +0100 Subject: x86/jailhouse: Enable APIC and SMP support Register the APIC which Jailhouse always exposes at 0xfee00000 if in xAPIC mode or via MSRs as x2APIC. The latter is only available if it was already activated because there is no support for switching its mode during runtime. Jailhouse requires the APIC to be operated in phys-flat mode. Ensure that this mode is selected by Linux. The available CPUs are taken from the setup data structure that the loader filled and registered with the kernel. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/8b2255da0a9856c530293a67aa9d6addfe102a2b.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/apic/apic_flat_64.c | 4 +++- arch/x86/kernel/jailhouse.c | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 4b5547789713..fcce5a784c71 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -239,7 +240,8 @@ static void physflat_send_IPI_all(int vector) static int physflat_probe(void) { - if (apic == &apic_physflat || num_possible_cpus() > 8) + if (apic == &apic_physflat || num_possible_cpus() > 8 || + jailhouse_paravirt()) return 1; return 0; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 1186b8909595..57f49963d8dc 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -29,12 +30,43 @@ static uint32_t __init jailhouse_detect(void) return jailhouse_cpuid_base(); } +static void __init jailhouse_get_smp_config(unsigned int early) +{ + unsigned int cpu; + + if (x2apic_enabled()) { + /* + * We do not have access to IR inside Jailhouse non-root cells. + * So we have to run in physical mode. + */ + x2apic_phys = 1; + + /* + * This will trigger the switch to apic_x2apic_phys. + * Empty OEM IDs ensure that only this APIC driver picks up + * the call. + */ + default_acpi_madt_oem_check("", ""); + } + + register_lapic_address(0xfee00000); + + for (cpu = 0; cpu < setup_data.num_cpus; cpu++) { + generic_processor_info(setup_data.cpu_ids[cpu], + boot_cpu_apic_version); + } + + smp_found_config = 1; +} + static void __init jailhouse_init_platform(void) { u64 pa_data = boot_params.hdr.setup_data; struct setup_data header; void *mapping; + x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); memcpy(&header, mapping, sizeof(header)); @@ -66,8 +98,18 @@ bool jailhouse_paravirt(void) return jailhouse_cpuid_base() != 0; } +static bool jailhouse_x2apic_available(void) +{ + /* + * The x2APIC is only available if the root cell enabled it. Jailhouse + * does not support switching between xAPIC and x2APIC. + */ + return x2apic_enabled(); +} + const struct hypervisor_x86 x86_hyper_jailhouse __refconst = { .name = "Jailhouse", .detect = jailhouse_detect, .init.init_platform = jailhouse_init_platform, + .init.x2apic_available = jailhouse_x2apic_available, }; -- cgit v1.2.3 From 87e65d05bb0a18e00655a58159790bc8d38e219e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:48 +0100 Subject: x86/jailhouse: Enable PMTIMER Jailhouse exposes the PMTIMER as only reference clock to all cells. Pick up its address from the setup data. Allow to enable the Linux support of it by relaxing its strict dependency on ACPI. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/6d5c3fadd801eb3fba9510e2d3db14a9c404a1a0.1511770314.git.jan.kiszka@siemens.com --- arch/x86/Kconfig | 1 + arch/x86/kernel/jailhouse.c | 4 ++++ drivers/acpi/Kconfig | 32 ++++++++++++++++---------------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fbea8d15fcfb..a936e29245d0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -799,6 +799,7 @@ config PARAVIRT_CLOCK config JAILHOUSE_GUEST bool "Jailhouse non-root cell support" depends on X86_64 + select X86_PM_TIMER ---help--- This option allows to run Linux as guest in a Jailhouse non-root cell. You can leave this option disabled if you only want to start diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 57f49963d8dc..21c107770d67 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -8,6 +8,7 @@ * Jan Kiszka */ +#include #include #include #include @@ -91,6 +92,9 @@ static void __init jailhouse_init_platform(void) if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION) panic("Jailhouse: Unsupported setup data structure"); + + pmtmr_ioport = setup_data.pm_timer_address; + pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); } bool jailhouse_paravirt(void) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 46505396869e..d650c5b6ec90 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -361,22 +361,6 @@ config ACPI_PCI_SLOT i.e., segment/bus/device/function tuples, with physical slots in the system. If you are unsure, say N. -config X86_PM_TIMER - bool "Power Management Timer Support" if EXPERT - depends on X86 - default y - help - The Power Management Timer is available on all ACPI-capable, - in most cases even if ACPI is unusable or blacklisted. - - This timing source is not affected by power management features - like aggressive processor idling, throttling, frequency and/or - voltage scaling, unlike the commonly used Time Stamp Counter - (TSC) timing source. - - You should nearly always say Y here because many modern - systems require this timer. - config ACPI_CONTAINER bool "Container and Module Devices" default (ACPI_HOTPLUG_MEMORY || ACPI_HOTPLUG_CPU) @@ -564,3 +548,19 @@ config TPS68470_PMIC_OPREGION using this, are probed. endif # ACPI + +config X86_PM_TIMER + bool "Power Management Timer Support" if EXPERT + depends on X86 && (ACPI || JAILHOUSE_GUEST) + default y + help + The Power Management Timer is available on all ACPI-capable, + in most cases even if ACPI is unusable or blacklisted. + + This timing source is not affected by power management features + like aggressive processor idling, throttling, frequency and/or + voltage scaling, unlike the commonly used Time Stamp Counter + (TSC) timing source. + + You should nearly always say Y here because many modern + systems require this timer. -- cgit v1.2.3 From e85eb632f651e70252bb18b292efaf6961164e32 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:49 +0100 Subject: x86/jailhouse: Set up timekeeping Get the precalibrated frequencies for the TSC and the APIC timer from the Jailhouse platform info and set the kernel values accordingly. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/b2557426332fc337a74d3141cb920f7dce9ad601.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 21c107770d67..34cf9d3e1751 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -16,6 +16,7 @@ #include static __initdata struct jailhouse_setup_data setup_data; +static unsigned int precalibrated_tsc_khz; static uint32_t jailhouse_cpuid_base(void) { @@ -31,6 +32,16 @@ static uint32_t __init jailhouse_detect(void) return jailhouse_cpuid_base(); } +static void __init jailhouse_timer_init(void) +{ + lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); +} + +static unsigned long jailhouse_get_tsc(void) +{ + return precalibrated_tsc_khz; +} + static void __init jailhouse_get_smp_config(unsigned int early) { unsigned int cpu; @@ -66,8 +77,12 @@ static void __init jailhouse_init_platform(void) struct setup_data header; void *mapping; + x86_init.timers.timer_init = jailhouse_timer_init; x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + x86_platform.calibrate_cpu = jailhouse_get_tsc; + x86_platform.calibrate_tsc = jailhouse_get_tsc; + while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); memcpy(&header, mapping, sizeof(header)); @@ -95,6 +110,8 @@ static void __init jailhouse_init_platform(void) pmtmr_ioport = setup_data.pm_timer_address; pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); + + precalibrated_tsc_khz = setup_data.tsc_khz; } bool jailhouse_paravirt(void) -- cgit v1.2.3 From 0d7c1e22183b9ddaa0b3bf30ece6577741bc13b3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:50 +0100 Subject: x86/jailhouse: Avoid access of unsupported platform resources Non-root cells do not have CMOS access, thus the warm reset cannot be enabled. There is no RTC, thus also no wall clock. Furthermore, there are no ISA IRQs and no PIC. Also disable probing of i8042 devices that are typically blocked for non-root cells. In theory, access could also be granted to a non-root cell, provided the root cell is not using the devices. But there is no concrete scenario in sight, and disabling probing over Jailhouse allows to build generic kernels that keep CONFIG_SERIO enabled for use in normal systems. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/39b68cc2c496501c9d95e6f40e5d76e3053c3908.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 34cf9d3e1751..b9f116d62f81 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -13,6 +13,7 @@ #include #include #include +#include #include static __initdata struct jailhouse_setup_data setup_data; @@ -32,6 +33,11 @@ static uint32_t __init jailhouse_detect(void) return jailhouse_cpuid_base(); } +static void jailhouse_get_wallclock(struct timespec *now) +{ + memset(now, 0, sizeof(*now)); +} + static void __init jailhouse_timer_init(void) { lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); @@ -77,11 +83,18 @@ static void __init jailhouse_init_platform(void) struct setup_data header; void *mapping; + x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = jailhouse_timer_init; x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; x86_platform.calibrate_cpu = jailhouse_get_tsc; x86_platform.calibrate_tsc = jailhouse_get_tsc; + x86_platform.get_wallclock = jailhouse_get_wallclock; + x86_platform.legacy.rtc = 0; + x86_platform.legacy.warm_reset = 0; + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; + + legacy_pic = &null_legacy_pic; while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); -- cgit v1.2.3 From 5ae4443010b83cce3d55ce5259870e542a7c9551 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:51 +0100 Subject: x86/jailhouse: Silence ACPI warning Jailhouse support does not depend on ACPI, and does not even use it. But if it should be enabled, avoid warning about its absence in the platform. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/939687007cbd7643b02fd330e8616e7e5944063f.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index b9f116d62f81..54469ef4c3c7 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -125,6 +125,12 @@ static void __init jailhouse_init_platform(void) pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); precalibrated_tsc_khz = setup_data.tsc_khz; + + /* + * Avoid that the kernel complains about missing ACPI tables - there + * are none in a non-root cell. + */ + disable_acpi(); } bool jailhouse_paravirt(void) -- cgit v1.2.3 From fd498076821739db38babe72602f7c227587cbb5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:52 +0100 Subject: x86/jailhouse: Halt instead of failing to restart Jailhouse provides no guest-initiated restart. So, do not even try to. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/ef8a0ef95c2b17c21066e5f28ea56b58bf7eaa82.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 54469ef4c3c7..2b55672ca05f 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -10,10 +10,12 @@ #include #include +#include #include #include #include #include +#include #include static __initdata struct jailhouse_setup_data setup_data; @@ -77,6 +79,12 @@ static void __init jailhouse_get_smp_config(unsigned int early) smp_found_config = 1; } +static void jailhouse_no_restart(void) +{ + pr_notice("Jailhouse: Restart not supported, halting\n"); + machine_halt(); +} + static void __init jailhouse_init_platform(void) { u64 pa_data = boot_params.hdr.setup_data; @@ -96,6 +104,8 @@ static void __init jailhouse_init_platform(void) legacy_pic = &null_legacy_pic; + machine_ops.emergency_restart = jailhouse_no_restart; + while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); memcpy(&header, mapping, sizeof(header)); -- cgit v1.2.3 From cf878e169d37b596de41322291523951540984c1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:53 +0100 Subject: x86/jailhouse: Wire up IOAPIC for legacy UART ports The typical I/O interrupts in non-root cells are MSI-based. However, the platform UARTs do not support MSI. In order to run a non-root cell that shall use one of them, the standard IOAPIC must be registered and 1:1 routing for IRQ 3 and 4 set up. If an IOAPIC is not available, the boot loader clears standard_ioapic in the setup data, so registration is skipped. If the guest is not allowed to to use one of those pins, Jailhouse will simply ignore the access. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/90d942dda9d48a8046e00bb3c1bb6757c83227be.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 2b55672ca05f..01d5b06a42bc 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,15 @@ static unsigned long jailhouse_get_tsc(void) static void __init jailhouse_get_smp_config(unsigned int early) { + struct ioapic_domain_cfg ioapic_cfg = { + .type = IOAPIC_DOMAIN_STRICT, + .ops = &mp_ioapic_irqdomain_ops, + }; + struct mpc_intsrc mp_irq = { + .type = MP_INTSRC, + .irqtype = mp_INT, + .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE, + }; unsigned int cpu; if (x2apic_enabled()) { @@ -77,6 +87,17 @@ static void __init jailhouse_get_smp_config(unsigned int early) } smp_found_config = 1; + + if (setup_data.standard_ioapic) { + mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg); + + /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */ + mp_irq.srcbusirq = mp_irq.dstirq = 3; + mp_save_irq(&mp_irq); + + mp_irq.srcbusirq = mp_irq.dstirq = 4; + mp_save_irq(&mp_irq); + } } static void jailhouse_no_restart(void) -- cgit v1.2.3 From a0c01e4bb92d085462c293091a521cb9e7000371 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Nov 2017 09:11:54 +0100 Subject: x86/jailhouse: Initialize PCI support With this change, PCI devices can be detected and used inside a non-root cell. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: jailhouse-dev@googlegroups.com Link: https://lkml.kernel.org/r/e8d19494b96b68a749bcac514795d864ad9c28c3.1511770314.git.jan.kiszka@siemens.com --- arch/x86/kernel/jailhouse.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 01d5b06a42bc..d6d5976a9b51 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,19 @@ static void jailhouse_no_restart(void) machine_halt(); } +static int __init jailhouse_pci_arch_init(void) +{ + pci_direct_init(1); + + /* + * There are no bridges on the virtual PCI root bus under Jailhouse, + * thus no other way to discover all devices than a full scan. + */ + pcibios_last_bus = 0xff; + + return 0; +} + static void __init jailhouse_init_platform(void) { u64 pa_data = boot_params.hdr.setup_data; @@ -115,6 +129,7 @@ static void __init jailhouse_init_platform(void) x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = jailhouse_timer_init; x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + x86_init.pci.arch_init = jailhouse_pci_arch_init; x86_platform.calibrate_cpu = jailhouse_get_tsc; x86_platform.calibrate_tsc = jailhouse_get_tsc; @@ -157,6 +172,8 @@ static void __init jailhouse_init_platform(void) precalibrated_tsc_khz = setup_data.tsc_khz; + pci_probe = 0; + /* * Avoid that the kernel complains about missing ACPI tables - there * are none in a non-root cell. -- cgit v1.2.3 From b8b9ce4b5aec8de9e23cabb0a26b78641f9ab1d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 22:13:29 +0100 Subject: x86/retpoline: Remove compile time warning Remove the compile time warning when CONFIG_RETPOLINE=y and the compiler does not have retpoline support. Linus rationale for this is: It's wrong because it will just make people turn off RETPOLINE, and the asm updates - and return stack clearing - that are independent of the compiler are likely the most important parts because they are likely the ones easiest to target. And it's annoying because most people won't be able to do anything about it. The number of people building their own compiler? Very small. So if their distro hasn't got a compiler yet (and pretty much nobody does), the warning is just annoying crap. It is already properly reported as part of the sysfs interface. The compile-time warning only encourages bad things. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Requested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Peter Zijlstra (Intel) Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/CA+55aFzWgquv4i6Mab6bASqYXg3ErV3XDFEYf=GEcCDQg5uAtw@mail.gmail.com --- arch/x86/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 974c61864978..504b1a4535ac 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -240,8 +240,6 @@ ifdef CONFIG_RETPOLINE RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) ifneq ($(RETPOLINE_CFLAGS),) KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - else - $(warning CONFIG_RETPOLINE=y, but not supported by the compiler. Toolchain update recommended.) endif endif -- cgit v1.2.3 From 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 12 Jan 2018 18:59:52 -0800 Subject: bpf: fix 32-bit divide by zero due to some JITs doing if (src_reg == 0) check in 64-bit mode for div/mod operations mask upper 32-bits of src register before doing the check Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.") Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 18 ++++++++++++++++++ net/core/filter.c | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20eb04fd155e..b7448347e6b6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4445,6 +4445,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) int i, cnt, delta = 0; for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { + /* due to JIT bugs clear upper 32-bits of src register + * before div/mod operation + */ + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg); + insn_buf[1] = *insn; + cnt = 2; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; diff --git a/net/core/filter.c b/net/core/filter.c index d339ef170df6..1c0eb436671f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -458,6 +458,10 @@ do_pass: convert_bpf_extensions(fp, &insn)) break; + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) || + fp->code == (BPF_ALU | BPF_MOD | BPF_X)) + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X); + *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k); break; -- cgit v1.2.3 From ed4bbf7910b28ce3c691aef28d245585eaabda06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:19:49 +0100 Subject: timers: Unconditionally check deferrable base When the timer base is checked for expired timers then the deferrable base must be checked as well. This was missed when making the deferrable base independent of base::nohz_active. Fixes: ced6d5c11d3e ("timers: Use deferrable base independent of base::nohz_active") Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Paul McKenney Cc: stable@vger.kernel.org Cc: rt@linutronix.de --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 89a9e1b4264a..0bcf00e3ce48 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1696,7 +1696,7 @@ void run_local_timers(void) hrtimer_run_queues(); /* Raise the softirq only if required. */ if (time_before(jiffies, base->clk)) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) return; /* CPU is awake, so check the deferrable base. */ base++; -- cgit v1.2.3 From a8750ddca918032d6349adbf9a4b6555e7db20da Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Jan 2018 15:32:30 -0800 Subject: Linux 4.15-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c4aa6210a2a4..bf5b8cbb9469 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 0d39e2669d7b0fefd2d8f9e7868ae669b364d9ba Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 10 Jan 2018 18:36:02 +0300 Subject: x86/kasan: Panic if there is not enough memory to boot Currently KASAN doesn't panic in case it don't have enough memory to boot. Instead, it crashes in some random place: kernel BUG at arch/x86/mm/physaddr.c:27! RIP: 0010:__phys_addr+0x268/0x276 Call Trace: kasan_populate_shadow+0x3f2/0x497 kasan_init+0x12e/0x2b2 setup_arch+0x2825/0x2a2c start_kernel+0xc8/0x15f4 x86_64_start_reservations+0x2a/0x2c x86_64_start_kernel+0x72/0x75 secondary_startup_64+0xa5/0xb0 Use memblock_virt_alloc_try_nid() for allocations without failure fallback. It will panic with an out of memory message. Reported-by: kernel test robot Signed-off-by: Andrey Ryabinin Signed-off-by: Thomas Gleixner Acked-by: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: lkp@01.org Link: https://lkml.kernel.org/r/20180110153602.18919-1-aryabinin@virtuozzo.com --- arch/x86/mm/kasan_init_64.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 47388f0c0e59..af6f2f9c6a26 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static __init void *early_alloc(size_t size, int nid) +static __init void *early_alloc(size_t size, int nid, bool panic) { - return memblock_virt_alloc_try_nid_nopanic(size, size, - __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + if (panic) + return memblock_virt_alloc_try_nid(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + else + return memblock_virt_alloc_try_nid_nopanic(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); } static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, @@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (boot_cpu_has(X86_FEATURE_PSE) && ((end - addr) == PMD_SIZE) && IS_ALIGNED(addr, PMD_SIZE)) { - p = early_alloc(PMD_SIZE, nid); + p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PMD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pmd_populate_kernel(&init_mm, pmd, p); } @@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (!pte_none(*pte)) continue; - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, if (boot_cpu_has(X86_FEATURE_GBPAGES) && ((end - addr) == PUD_SIZE) && IS_ALIGNED(addr, PUD_SIZE)) { - p = early_alloc(PUD_SIZE, nid); + p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PUD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pud_populate(&init_mm, pud, p); } @@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, unsigned long next; if (p4d_none(*p4d)) { - void *p = early_alloc(PAGE_SIZE, nid); + void *p = early_alloc(PAGE_SIZE, nid, true); p4d_populate(&init_mm, p4d, p); } @@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, unsigned long next; if (pgd_none(*pgd)) { - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pgd_populate(&init_mm, pgd, p); } -- cgit v1.2.3 From c995efd5a740d9cbafbf58bde4973e8b50b4d761 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jan 2018 17:49:25 +0000 Subject: x86/retpoline: Fill RSB on context switch for affected CPUs On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gnomes@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lendacky@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-dwmw@amazon.co.uk --- arch/x86/entry/entry_32.S | 11 +++++++++++ arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a54f23a..60c4c342316c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc1aed2..d54a0ede61d1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447862f4..aa09559b2c0b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc26185aa7..390b3dc3d438 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -213,6 +231,24 @@ retpoline_auto: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_PTI) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } } #undef pr_fmt -- cgit v1.2.3 From 28d437d550e1e39f805d99f9f8ac399c778827b7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Sat, 13 Jan 2018 17:27:30 -0600 Subject: x86/retpoline: Add LFENCE to the retpoline/RSB filling RSB macros The PAUSE instruction is currently used in the retpoline and RSB filling macros as a speculation trap. The use of PAUSE was originally suggested because it showed a very, very small difference in the amount of cycles/time used to execute the retpoline as compared to LFENCE. On AMD, the PAUSE instruction is not a serializing instruction, so the pause/jmp loop will use excess power as it is speculated over waiting for return to mispredict to the correct target. The RSB filling macro is applicable to AMD, and, if software is unable to verify that LFENCE is serializing on AMD (possible when running under a hypervisor), the generic retpoline support will be used and, so, is also applicable to AMD. Keep the current usage of PAUSE for Intel, but add an LFENCE instruction to the speculation trap for AMD. The same sequence has been adopted by GCC for the GCC generated retpolines. Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Acked-by: David Woodhouse Acked-by: Arjan van de Ven Cc: Rik van Riel Cc: Andi Kleen Cc: Paul Turner Cc: Peter Zijlstra Cc: Tim Chen Cc: Jiri Kosina Cc: Dave Hansen Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Dan Williams Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Kees Cook Link: https://lkml.kernel.org/r/20180113232730.31060.36287.stgit@tlendack-t1.amdoffice.net --- arch/x86/include/asm/nospec-branch.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..7b45d8424150 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,7 +11,7 @@ * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; jmp' loop to capture speculative execution. + * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to @@ -38,11 +38,13 @@ call 772f; \ 773: /* speculation trap */ \ pause; \ + lfence; \ jmp 773b; \ 772: \ call 774f; \ 775: /* speculation trap */ \ pause; \ + lfence; \ jmp 775b; \ 774: \ dec reg; \ @@ -73,6 +75,7 @@ call .Ldo_rop_\@ .Lspec_trap_\@: pause + lfence jmp .Lspec_trap_\@ .Ldo_rop_\@: mov \reg, (%_ASM_SP) @@ -165,6 +168,7 @@ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ + " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: addl $4, %%esp;\n" \ -- cgit v1.2.3 From 17218e0092f8c7b7edce7ff08c8b23212eac7271 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 12 Jan 2018 14:10:38 +0100 Subject: PM / genpd: Stop/start devices without pm_runtime_force_suspend/resume() There are problems with calling pm_runtime_force_suspend/resume() to "stop" and "start" devices in genpd_finish_suspend() and genpd_resume_noirq() (and in analogous hibernation-specific genpd callbacks) after commit 122a22377a3d (PM / Domains: Stop/start devices during system PM suspend/resume in genpd) as those routines do much more than just "stopping" and "starting" devices (which was the stated purpose of that commit) unnecessarily and may not play well with system-wide PM driver callbacks. First, consider the pm_runtime_force_suspend() in genpd_finish_suspend(). If the current runtime PM status of the device is "suspended", that function most likely does the right thing by ignoring the device, because it should have been "stopped" already and whatever needed to be done to deactivate it shoud have been done. In turn, if the runtime PM status of the device is "active", genpd_runtime_suspend() is called for it (indirectly) and (1) runs the ->runtime_suspend callback provided by the device's driver (assuming no bus type with ->runtime_suspend of its own), (2) "stops" the device and (3) checks if the domain can be powered down, and then (4) the device's runtime PM status is changed to "suspended". Out of the four actions above (1) is not necessary and it may be outright harmful, (3) is pointless and (4) is questionable. The only operation that needs to be carried out here is (2). The reason why (1) is not necessary is because the system-wide PM callbacks provided by the device driver for the transition in question have been run and they should have taken care of the driver's part of device suspend already. Moreover, it may be harmful, because the ->runtime_suspend callback may want to access the device which is partially suspended at that point and may not be responsive. Also, system-wide PM callbacks may have been run already (in the previous phases of the system transition under way) for the device's parent or for its supplier devices (if any) and the device may not be accessible because of that. There also is no reason to do (3), because genpd_finish_suspend() will repeat it anyway, and (4) potentially causes confusion to ensue during the subsequent system transition to the working state. Consider pm_runtime_force_resume() in genpd_resume_noirq() now. It runs genpd_runtime_resume() for all devices with runtime PM status set to "suspended", which includes all of the devices whose runtime PM status was changed by pm_runtime_force_suspend() before and may include some devices already suspended when the pm_runtime_force_suspend() was running, which may be confusing. The genpd_runtime_resume() first tries to power up the domain, which (again) is pointless, because genpd_resume_noirq() has done that already. Then, it "starts" the device and runs the ->runtime_resume callback (from the driver, say) for it. If all is well, the device is left with the runtime PM status set to "active". Unfortunately, running the driver's ->runtime_resume callback before its system-wide PM callbacks and possibly before some system-wide PM callbacks of the parent device's driver (let alone supplier drivers) is asking for trouble, especially if the device had been suspended before pm_runtime_force_suspend() ran previously or if the callbacks in question expect to be run back-to-back with their suspend-side counterparts. It also should not be necessary, because the system-wide PM driver callbacks that will be invoked for the device subsequently should take care of resuming it just fine. [Running the driver's ->runtime_resume callback in the "noirq" phase of the transition to the working state may be problematic even for devices whose drivers do use pm_runtime_force_resume() in (or as) their system-wide PM callbacks if they have suppliers other than their parents, because it may cause the supplier to be resumed after the consumer in some cases.] Because of the above, modify genpd as follows: 1. Change genpd_finish_suspend() to only "stop" devices with runtime PM status set to "active" (without invoking runtime PM callbacks for them, changing their runtime PM status and so on). That doesn't change the handling of devices whose drivers use pm_runtime_force_suspend/resume() in (or as) their system-wide PM callbacks and addresses the issues described above for the other devices. 2. Change genpd_resume_noirq() to only "start" devices with runtime PM status set to "active" (without invoking runtime PM callbacks for them, changing their runtime PM status and so on). Again, that doesn't change the handling of devices whose drivers use pm_runtime_force_suspend/resume() in (or as) their system-wide PM callbacks and addresses the described issues for the other devices. Devices with runtime PM status set to "suspended" are not started with the assumption that they will be resumed later, either by pm_runtime_force_resume() or via runtime PM. 3. Change genpd_restore_noirq() to follow genpd_resume_noirq(). That causes devices already suspended before hibernation to be left alone (which also is the case without the change) and avoids running the ->runtime_resume driver callback too early for the other devices. 4. Change genpd_freeze_noirq() and genpd_thaw_noirq() in accordance with the above modifications. Fixes: 122a22377a3d (PM / Domains: Stop/start devices during system PM suspend/resume in genpd) Signed-off-by: Rafael J. Wysocki Acked-by: Ulf Hansson --- drivers/base/power/domain.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 48255ce7c0ad..528b24149bc7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1048,8 +1048,9 @@ static int genpd_finish_suspend(struct device *dev, bool poweroff) if (dev->power.wakeup_path && genpd_is_active_wakeup(genpd)) return 0; - if (genpd->dev_ops.stop && genpd->dev_ops.start) { - ret = pm_runtime_force_suspend(dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start && + !pm_runtime_status_suspended(dev)) { + ret = genpd_stop_dev(genpd, dev); if (ret) { if (poweroff) pm_generic_restore_noirq(dev); @@ -1106,8 +1107,9 @@ static int genpd_resume_noirq(struct device *dev) genpd->suspended_count--; genpd_unlock(genpd); - if (genpd->dev_ops.stop && genpd->dev_ops.start) { - ret = pm_runtime_force_resume(dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start && + !pm_runtime_status_suspended(dev)) { + ret = genpd_start_dev(genpd, dev); if (ret) return ret; } @@ -1139,8 +1141,9 @@ static int genpd_freeze_noirq(struct device *dev) if (ret) return ret; - if (genpd->dev_ops.stop && genpd->dev_ops.start) - ret = pm_runtime_force_suspend(dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start && + !pm_runtime_status_suspended(dev)) + ret = genpd_stop_dev(genpd, dev); return ret; } @@ -1163,8 +1166,9 @@ static int genpd_thaw_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - if (genpd->dev_ops.stop && genpd->dev_ops.start) { - ret = pm_runtime_force_resume(dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start && + !pm_runtime_status_suspended(dev)) { + ret = genpd_start_dev(genpd, dev); if (ret) return ret; } @@ -1221,8 +1225,9 @@ static int genpd_restore_noirq(struct device *dev) genpd_sync_power_on(genpd, true, 0); genpd_unlock(genpd); - if (genpd->dev_ops.stop && genpd->dev_ops.start) { - ret = pm_runtime_force_resume(dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start && + !pm_runtime_status_suspended(dev)) { + ret = genpd_start_dev(genpd, dev); if (ret) return ret; } -- cgit v1.2.3 From 4918e1f87c5fb7fc8f73a7d8fb118beeb94e05f7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 12 Jan 2018 14:12:05 +0100 Subject: PM / runtime: Rework pm_runtime_force_suspend/resume() One of the limitations of pm_runtime_force_suspend/resume() is that if a parent driver wants to use these functions, all of its child drivers generally have to do that too because of the parent usage counter manipulations necessary to get the correct state of the parent during system-wide transitions to the working state (system resume). However, that limitation turns out to be artificial, so remove it. Namely, pm_runtime_force_suspend() only needs to update the children counter of its parent (if there's is a parent) when the device can stay in suspend after the subsequent system resume transition, as that counter is correct already otherwise. Now, if the parent's children counter is not updated, it is not necessary to increment the parent's usage counter in that case any more, as long as the children counters of devices are checked along with their usage counters in order to decide whether or not the devices may be left in suspend after the subsequent system resume transition. Accordingly, modify pm_runtime_force_suspend() to only call pm_runtime_set_suspended() for devices whose usage and children counters are at the "no references" level (the runtime PM status of the device needs to be updated to "suspended" anyway in case this function is called once again for the same device during the transition under way), drop the parent usage counter incrementation from it and update pm_runtime_force_resume() to compensate for these changes. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/runtime.c | 74 ++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 6e89b51ea3d9..84832f1a75bf 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1613,17 +1613,28 @@ void pm_runtime_drop_link(struct device *dev) spin_unlock_irq(&dev->power.lock); } +static bool pm_runtime_need_not_resume(struct device *dev) +{ + return atomic_read(&dev->power.usage_count) <= 1 && + atomic_read(&dev->power.child_count) == 0; +} + /** * pm_runtime_force_suspend - Force a device into suspend state if needed. * @dev: Device to suspend. * * Disable runtime PM so we safely can check the device's runtime PM status and - * if it is active, invoke it's .runtime_suspend callback to bring it into - * suspend state. Keep runtime PM disabled to preserve the state unless we - * encounter errors. + * if it is active, invoke its ->runtime_suspend callback to suspend it and + * change its runtime PM status field to RPM_SUSPENDED. Also, if the device's + * usage and children counters don't indicate that the device was in use before + * the system-wide transition under way, decrement its parent's children counter + * (if there is a parent). Keep runtime PM disabled to preserve the state + * unless we encounter errors. * * Typically this function may be invoked from a system suspend callback to make - * sure the device is put into low power state. + * sure the device is put into low power state and it should only be used during + * system-wide PM transitions to sleep states. It assumes that the analogous + * pm_runtime_force_resume() will be used to resume the device. */ int pm_runtime_force_suspend(struct device *dev) { @@ -1646,17 +1657,18 @@ int pm_runtime_force_suspend(struct device *dev) goto err; /* - * Increase the runtime PM usage count for the device's parent, in case - * when we find the device being used when system suspend was invoked. - * This informs pm_runtime_force_resume() to resume the parent - * immediately, which is needed to be able to resume its children, - * when not deferring the resume to be managed via runtime PM. + * If the device can stay in suspend after the system-wide transition + * to the working state that will follow, drop the children counter of + * its parent, but set its status to RPM_SUSPENDED anyway in case this + * function will be called again for it in the meantime. */ - if (dev->parent && atomic_read(&dev->power.usage_count) > 1) - pm_runtime_get_noresume(dev->parent); + if (pm_runtime_need_not_resume(dev)) + pm_runtime_set_suspended(dev); + else + __update_runtime_status(dev, RPM_SUSPENDED); - pm_runtime_set_suspended(dev); return 0; + err: pm_runtime_enable(dev); return ret; @@ -1669,13 +1681,9 @@ EXPORT_SYMBOL_GPL(pm_runtime_force_suspend); * * Prior invoking this function we expect the user to have brought the device * into low power state by a call to pm_runtime_force_suspend(). Here we reverse - * those actions and brings the device into full power, if it is expected to be - * used on system resume. To distinguish that, we check whether the runtime PM - * usage count is greater than 1 (the PM core increases the usage count in the - * system PM prepare phase), as that indicates a real user (such as a subsystem, - * driver, userspace, etc.) is using it. If that is the case, the device is - * expected to be used on system resume as well, so then we resume it. In the - * other case, we defer the resume to be managed via runtime PM. + * those actions and bring the device into full power, if it is expected to be + * used on system resume. In the other case, we defer the resume to be managed + * via runtime PM. * * Typically this function may be invoked from a system resume callback. */ @@ -1684,32 +1692,18 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - callback = RPM_GET_CALLBACK(dev, runtime_resume); - - if (!callback) { - ret = -ENOSYS; - goto out; - } - - if (!pm_runtime_status_suspended(dev)) + if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev)) goto out; /* - * Decrease the parent's runtime PM usage count, if we increased it - * during system suspend in pm_runtime_force_suspend(). - */ - if (atomic_read(&dev->power.usage_count) > 1) { - if (dev->parent) - pm_runtime_put_noidle(dev->parent); - } else { - goto out; - } + * The value of the parent's children counter is correct already, so + * just update the status of the device. + */ + __update_runtime_status(dev, RPM_ACTIVE); - ret = pm_runtime_set_active(dev); - if (ret) - goto out; + callback = RPM_GET_CALLBACK(dev, runtime_resume); - ret = callback(dev); + ret = callback ? callback(dev) : -ENOSYS; if (ret) { pm_runtime_set_suspended(dev); goto out; -- cgit v1.2.3 From a48a52b7bea81c046fe1c1288f84d0eba214cba0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:12:05 +0100 Subject: cfg80211: fully initialize old channel for event Paul reported that he got a report about undefined behaviour that seems to me to originate in using uninitialized memory when the channel structure here is used in the event code in nl80211 later. He never reported whether this fixed it, and I wasn't able to trigger this so far, but we should do the right thing and fully initialize the on-stack structure anyway. Reported-by: Paul Menzel Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 78e71b0390be..7b42f0bacfd8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS) return; - chan_before.center_freq = chan->center_freq; - chan_before.flags = chan->flags; + chan_before = *chan; if (chan->flags & IEEE80211_CHAN_NO_IR) { chan->flags &= ~IEEE80211_CHAN_NO_IR; -- cgit v1.2.3 From 7a94b8c2eee7083ddccd0515830f8c81a8e44b1a Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 15 Jan 2018 08:12:15 +0100 Subject: nl80211: take RCU read lock when calling ieee80211_bss_get_ie() As ieee80211_bss_get_ie() derefences an RCU to return ssid_ie, both the call to this function and any operation on this variable need protection by the RCU read lock. Fixes: 44905265bc15 ("nl80211: don't expose wdev->ssid for most interfaces") Signed-off-by: Dominik Brodowski Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2b3dbcd40e46..ed87a97fcb0b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag const u8 *ssid_ie; if (!wdev->current_bss) break; + rcu_read_lock(); ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub, WLAN_EID_SSID); - if (!ssid_ie) - break; - if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) - goto nla_put_failure_locked; + if (ssid_ie && + nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2)) + goto nla_put_failure_rcu_locked; + rcu_read_unlock(); break; } default: @@ -2635,6 +2636,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; + nla_put_failure_rcu_locked: + rcu_read_unlock(); nla_put_failure_locked: wdev_unlock(wdev); nla_put_failure: -- cgit v1.2.3 From b71d856ab536f25eb97c011a351ecddf5518de41 Mon Sep 17 00:00:00 2001 From: Benjamin Beichler Date: Wed, 10 Jan 2018 17:42:51 +0100 Subject: mac80211_hwsim: add workqueue to wait for deferred radio deletion on mod unload When closing multiple wmediumd instances with many radios and try to unload the mac80211_hwsim module, it may happen that the work items live longer than the module. To wait especially for this deletion work items, add a work queue, otherwise flush_scheduled_work would be necessary. Signed-off-by: Benjamin Beichler Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e8189c07b41f..ccd573e53c92 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -489,6 +489,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = { static spinlock_t hwsim_radio_lock; static LIST_HEAD(hwsim_radios); +static struct workqueue_struct *hwsim_wq; static int hwsim_radio_idx; static struct platform_driver mac80211_hwsim_driver = { @@ -3342,7 +3343,7 @@ static void remove_user_radios(u32 portid) if (entry->destroy_on_close && entry->portid == portid) { list_del(&entry->list); INIT_WORK(&entry->destroy_work, destroy_radio); - schedule_work(&entry->destroy_work); + queue_work(hwsim_wq, &entry->destroy_work); } } spin_unlock_bh(&hwsim_radio_lock); @@ -3417,7 +3418,7 @@ static void __net_exit hwsim_exit_net(struct net *net) list_del(&data->list); INIT_WORK(&data->destroy_work, destroy_radio); - schedule_work(&data->destroy_work); + queue_work(hwsim_wq, &data->destroy_work); } spin_unlock_bh(&hwsim_radio_lock); } @@ -3449,6 +3450,10 @@ static int __init init_mac80211_hwsim(void) spin_lock_init(&hwsim_radio_lock); + hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0); + if (!hwsim_wq) + return -ENOMEM; + err = register_pernet_device(&hwsim_net_ops); if (err) return err; @@ -3587,8 +3592,11 @@ static void __exit exit_mac80211_hwsim(void) hwsim_exit_netlink(); mac80211_hwsim_free(); + flush_workqueue(hwsim_wq); + unregister_netdev(hwsim_mon); platform_driver_unregister(&mac80211_hwsim_driver); unregister_pernet_device(&hwsim_net_ops); + destroy_workqueue(hwsim_wq); } module_exit(exit_mac80211_hwsim); -- cgit v1.2.3 From 51a1aaa631c90223888d8beac4d649dc11d2ca55 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:32:36 +0100 Subject: mac80211_hwsim: validate number of different channels When creating a new radio on the fly, hwsim allows this to be done with an arbitrary number of channels, but cfg80211 only supports a limited number of simultaneous channels, leading to a warning. Fix this by validating the number - this requires moving the define for the maximum out to a visible header file. Reported-by: syzbot+8dd9051ff19940290931@syzkaller.appspotmail.com Fixes: b59ec8dd4394 ("mac80211_hwsim: fix number of channels in interface combinations") Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ include/net/cfg80211.h | 2 ++ net/wireless/core.h | 2 -- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index ccd573e53c92..f6d4a50f1bdb 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3121,6 +3121,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) if (info->attrs[HWSIM_ATTR_CHANNELS]) param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]); + if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) { + GENL_SET_ERR_MSG(info, "too many channels specified"); + return -EINVAL; + } + if (info->attrs[HWSIM_ATTR_NO_VIF]) param.no_vif = true; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cb4d92b79cd9..fb94a8bd8ab5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -815,6 +815,8 @@ struct cfg80211_csa_settings { u8 count; }; +#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 + /** * struct iface_combination_params - input parameters for interface combinations * diff --git a/net/wireless/core.h b/net/wireless/core.h index d2f7e8b8a097..eaff636169c2 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); -#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 - #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else -- cgit v1.2.3 From be6d447e4f9c5cc6d48aabc3ec362b6a559c3fd7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Jan 2018 10:24:34 +0100 Subject: x86/jailhouse: Hide x2apic code when CONFIG_X86_X2APIC=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit x2apic_phys is not available when CONFIG_X86_X2APIC=n and the code is not optimized out resulting in a build fail: jailhouse.c: In function ‘jailhouse_get_smp_config’: jailhouse.c:73:3: error: ‘x2apic_phys’ undeclared (first use in this function) Fixes: 11c8dc419bbc ("x86/jailhouse: Enable APIC and SMP support") Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Jan Kiszka Cc: jailhouse-dev@googlegroups.com --- arch/x86/kernel/jailhouse.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index d6d5976a9b51..7ade152133c7 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -52,6 +52,24 @@ static unsigned long jailhouse_get_tsc(void) return precalibrated_tsc_khz; } +static void __init jailhouse_x2apic_init(void) +{ +#ifdef CONFIG_X86_X2APIC + if (!x2apic_enabled()) + return; + /* + * We do not have access to IR inside Jailhouse non-root cells. So + * we have to run in physical mode. + */ + x2apic_phys = 1; + /* + * This will trigger the switch to apic_x2apic_phys. Empty OEM IDs + * ensure that only this APIC driver picks up the call. + */ + default_acpi_madt_oem_check("", ""); +#endif +} + static void __init jailhouse_get_smp_config(unsigned int early) { struct ioapic_domain_cfg ioapic_cfg = { @@ -65,20 +83,7 @@ static void __init jailhouse_get_smp_config(unsigned int early) }; unsigned int cpu; - if (x2apic_enabled()) { - /* - * We do not have access to IR inside Jailhouse non-root cells. - * So we have to run in physical mode. - */ - x2apic_phys = 1; - - /* - * This will trigger the switch to apic_x2apic_phys. - * Empty OEM IDs ensure that only this APIC driver picks up - * the call. - */ - default_acpi_madt_oem_check("", ""); - } + jailhouse_x2apic_init(); register_lapic_address(0xfee00000); -- cgit v1.2.3 From 59b179b48ce2a6076448a44531242ac2b3f6cef2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 09:58:27 +0100 Subject: cfg80211: check dev_set_name() return value syzbot reported a warning from rfkill_alloc(), and after a while I think that the reason is that it was doing fault injection and the dev_set_name() failed, leaving the name NULL, and we didn't check the return value and got to rfkill_alloc() with a NULL name. Since we really don't want a NULL name, we ought to check the return value. Fixes: fb28ad35906a ("net: struct device - replace bus_id with dev_name(), dev_set_name()") Reported-by: syzbot+1ddfb3357e1d7bb5b5d3@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- net/wireless/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index fdde0d98fde1..a6f3cac8c640 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, if (rv) goto use_default_name; } else { + int rv; + use_default_name: /* NOTE: This is *probably* safe w/out holding rtnl because of * the restrictions on phy names. Probably this call could @@ -446,7 +448,11 @@ use_default_name: * phyX. But, might should add some locking and check return * value, and use a different name if this one exists? */ - dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); + if (rv < 0) { + kfree(rdev); + return NULL; + } } INIT_LIST_HEAD(&rdev->wiphy.wdev_list); -- cgit v1.2.3 From 08f411bcb5fbd96c53d4535e0526f70b971ee5d5 Mon Sep 17 00:00:00 2001 From: Samuel Mendoza-Jonas Date: Mon, 15 Jan 2018 16:58:58 +1100 Subject: hwmon: (pmbus/ir35221) Remove unnecessary scaling The ir35221 datasheet describes specific scaling factors for a number of commands which the current driver applies when reading. However now that the ir35221 has been tested on machines with more easily verifiable readings these descriptions have turned out to be superfluous and reading each command according to the linear format is sufficient. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/ir35221.c | 189 ------------------------------------------ 1 file changed, 189 deletions(-) diff --git a/drivers/hwmon/pmbus/ir35221.c b/drivers/hwmon/pmbus/ir35221.c index 8b906b44484b..977315b0fd90 100644 --- a/drivers/hwmon/pmbus/ir35221.c +++ b/drivers/hwmon/pmbus/ir35221.c @@ -25,168 +25,19 @@ #define IR35221_MFR_IOUT_VALLEY 0xcb #define IR35221_MFR_TEMP_VALLEY 0xcc -static long ir35221_reg2data(int data, enum pmbus_sensor_classes class) -{ - s16 exponent; - s32 mantissa; - long val; - - /* We only modify LINEAR11 formats */ - exponent = ((s16)data) >> 11; - mantissa = ((s16)((data & 0x7ff) << 5)) >> 5; - - val = mantissa * 1000L; - - /* scale result to micro-units for power sensors */ - if (class == PSC_POWER) - val = val * 1000L; - - if (exponent >= 0) - val <<= exponent; - else - val >>= -exponent; - - return val; -} - -#define MAX_MANTISSA (1023 * 1000) -#define MIN_MANTISSA (511 * 1000) - -static u16 ir35221_data2reg(long val, enum pmbus_sensor_classes class) -{ - s16 exponent = 0, mantissa; - bool negative = false; - - if (val == 0) - return 0; - - if (val < 0) { - negative = true; - val = -val; - } - - /* Power is in uW. Convert to mW before converting. */ - if (class == PSC_POWER) - val = DIV_ROUND_CLOSEST(val, 1000L); - - /* Reduce large mantissa until it fits into 10 bit */ - while (val >= MAX_MANTISSA && exponent < 15) { - exponent++; - val >>= 1; - } - /* Increase small mantissa to improve precision */ - while (val < MIN_MANTISSA && exponent > -15) { - exponent--; - val <<= 1; - } - - /* Convert mantissa from milli-units to units */ - mantissa = DIV_ROUND_CLOSEST(val, 1000); - - /* Ensure that resulting number is within range */ - if (mantissa > 0x3ff) - mantissa = 0x3ff; - - /* restore sign */ - if (negative) - mantissa = -mantissa; - - /* Convert to 5 bit exponent, 11 bit mantissa */ - return (mantissa & 0x7ff) | ((exponent << 11) & 0xf800); -} - -static u16 ir35221_scale_result(s16 data, int shift, - enum pmbus_sensor_classes class) -{ - long val; - - val = ir35221_reg2data(data, class); - - if (shift < 0) - val >>= -shift; - else - val <<= shift; - - return ir35221_data2reg(val, class); -} - static int ir35221_read_word_data(struct i2c_client *client, int page, int reg) { int ret; switch (reg) { - case PMBUS_IOUT_OC_FAULT_LIMIT: - case PMBUS_IOUT_OC_WARN_LIMIT: - ret = pmbus_read_word_data(client, page, reg); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, 1, PSC_CURRENT_OUT); - break; - case PMBUS_VIN_OV_FAULT_LIMIT: - case PMBUS_VIN_OV_WARN_LIMIT: - case PMBUS_VIN_UV_WARN_LIMIT: - ret = pmbus_read_word_data(client, page, reg); - ret = ir35221_scale_result(ret, -4, PSC_VOLTAGE_IN); - break; - case PMBUS_IIN_OC_WARN_LIMIT: - ret = pmbus_read_word_data(client, page, reg); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -1, PSC_CURRENT_IN); - break; - case PMBUS_READ_VIN: - ret = pmbus_read_word_data(client, page, PMBUS_READ_VIN); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -5, PSC_VOLTAGE_IN); - break; - case PMBUS_READ_IIN: - ret = pmbus_read_word_data(client, page, PMBUS_READ_IIN); - if (ret < 0) - break; - if (page == 0) - ret = ir35221_scale_result(ret, -4, PSC_CURRENT_IN); - else - ret = ir35221_scale_result(ret, -5, PSC_CURRENT_IN); - break; - case PMBUS_READ_POUT: - ret = pmbus_read_word_data(client, page, PMBUS_READ_POUT); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -1, PSC_POWER); - break; - case PMBUS_READ_PIN: - ret = pmbus_read_word_data(client, page, PMBUS_READ_PIN); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -1, PSC_POWER); - break; - case PMBUS_READ_IOUT: - ret = pmbus_read_word_data(client, page, PMBUS_READ_IOUT); - if (ret < 0) - break; - if (page == 0) - ret = ir35221_scale_result(ret, -1, PSC_CURRENT_OUT); - else - ret = ir35221_scale_result(ret, -2, PSC_CURRENT_OUT); - break; case PMBUS_VIRT_READ_VIN_MAX: ret = pmbus_read_word_data(client, page, IR35221_MFR_VIN_PEAK); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -5, PSC_VOLTAGE_IN); break; case PMBUS_VIRT_READ_VOUT_MAX: ret = pmbus_read_word_data(client, page, IR35221_MFR_VOUT_PEAK); break; case PMBUS_VIRT_READ_IOUT_MAX: ret = pmbus_read_word_data(client, page, IR35221_MFR_IOUT_PEAK); - if (ret < 0) - break; - if (page == 0) - ret = ir35221_scale_result(ret, -1, PSC_CURRENT_IN); - else - ret = ir35221_scale_result(ret, -2, PSC_CURRENT_IN); break; case PMBUS_VIRT_READ_TEMP_MAX: ret = pmbus_read_word_data(client, page, IR35221_MFR_TEMP_PEAK); @@ -194,9 +45,6 @@ static int ir35221_read_word_data(struct i2c_client *client, int page, int reg) case PMBUS_VIRT_READ_VIN_MIN: ret = pmbus_read_word_data(client, page, IR35221_MFR_VIN_VALLEY); - if (ret < 0) - break; - ret = ir35221_scale_result(ret, -5, PSC_VOLTAGE_IN); break; case PMBUS_VIRT_READ_VOUT_MIN: ret = pmbus_read_word_data(client, page, @@ -205,12 +53,6 @@ static int ir35221_read_word_data(struct i2c_client *client, int page, int reg) case PMBUS_VIRT_READ_IOUT_MIN: ret = pmbus_read_word_data(client, page, IR35221_MFR_IOUT_VALLEY); - if (ret < 0) - break; - if (page == 0) - ret = ir35221_scale_result(ret, -1, PSC_CURRENT_IN); - else - ret = ir35221_scale_result(ret, -2, PSC_CURRENT_IN); break; case PMBUS_VIRT_READ_TEMP_MIN: ret = pmbus_read_word_data(client, page, @@ -224,36 +66,6 @@ static int ir35221_read_word_data(struct i2c_client *client, int page, int reg) return ret; } -static int ir35221_write_word_data(struct i2c_client *client, int page, int reg, - u16 word) -{ - int ret; - u16 val; - - switch (reg) { - case PMBUS_IOUT_OC_FAULT_LIMIT: - case PMBUS_IOUT_OC_WARN_LIMIT: - val = ir35221_scale_result(word, -1, PSC_CURRENT_OUT); - ret = pmbus_write_word_data(client, page, reg, val); - break; - case PMBUS_VIN_OV_FAULT_LIMIT: - case PMBUS_VIN_OV_WARN_LIMIT: - case PMBUS_VIN_UV_WARN_LIMIT: - val = ir35221_scale_result(word, 4, PSC_VOLTAGE_IN); - ret = pmbus_write_word_data(client, page, reg, val); - break; - case PMBUS_IIN_OC_WARN_LIMIT: - val = ir35221_scale_result(word, 1, PSC_CURRENT_IN); - ret = pmbus_write_word_data(client, page, reg, val); - break; - default: - ret = -ENODATA; - break; - } - - return ret; -} - static int ir35221_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -292,7 +104,6 @@ static int ir35221_probe(struct i2c_client *client, if (!info) return -ENOMEM; - info->write_word_data = ir35221_write_word_data; info->read_word_data = ir35221_read_word_data; info->pages = 2; -- cgit v1.2.3 From 499ed50f603b4c9834197b2411ba3bd9aaa624d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Th=C3=A9baudeau?= Date: Sun, 14 Jan 2018 19:43:05 +0100 Subject: mmc: sdhci-esdhc-imx: Fix i.MX53 eSDHCv3 clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5143c953a786 ("mmc: sdhci-esdhc-imx: Allow all supported prescaler values") made it possible to set SYSCTL.SDCLKFS to 0 in SDR mode, thus bypassing the SD clock frequency prescaler, in order to be able to get higher SD clock frequencies in some contexts. However, that commit missed the fact that this value is illegal on the eSDHCv3 instance of the i.MX53. This seems to be the only exception on i.MX, this value being legal even for the eSDHCv2 instances of the i.MX53. Fix this issue by changing the minimum prescaler value if the i.MX53 eSDHCv3 is detected. According to the i.MX53 reference manual, if DLLCTRL[10] can be set, then the controller is eSDHCv3, else it is eSDHCv2. This commit fixes the following issue, which was preventing the i.MX53 Loco (IMX53QSB) board from booting Linux 4.15.0-rc5: [ 1.882668] mmcblk1: error -84 transferring data, sector 2048, nr 8, cmd response 0x900, card status 0xc00 [ 2.002255] mmcblk1: error -84 transferring data, sector 2050, nr 6, cmd response 0x900, card status 0xc00 [ 12.645056] mmc1: Timeout waiting for hardware interrupt. [ 12.650473] mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== [ 12.656921] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00001201 [ 12.663366] mmc1: sdhci: Blk size: 0x00000004 | Blk cnt: 0x00000000 [ 12.669813] mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000013 [ 12.676258] mmc1: sdhci: Present: 0x01f8028f | Host ctl: 0x00000013 [ 12.682703] mmc1: sdhci: Power: 0x00000002 | Blk gap: 0x00000000 [ 12.689148] mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x0000003f [ 12.695594] mmc1: sdhci: Timeout: 0x0000008e | Int stat: 0x00000000 [ 12.702039] mmc1: sdhci: Int enab: 0x107f004b | Sig enab: 0x107f004b [ 12.708485] mmc1: sdhci: AC12 err: 0x00000000 | Slot int: 0x00001201 [ 12.714930] mmc1: sdhci: Caps: 0x07eb0000 | Caps_1: 0x08100810 [ 12.721375] mmc1: sdhci: Cmd: 0x0000163a | Max curr: 0x00000000 [ 12.727821] mmc1: sdhci: Resp[0]: 0x00000920 | Resp[1]: 0x00000000 [ 12.734265] mmc1: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 [ 12.740709] mmc1: sdhci: Host ctl2: 0x00000000 [ 12.745157] mmc1: sdhci: ADMA Err: 0x00000001 | ADMA Ptr: 0xc8049200 [ 12.751601] mmc1: sdhci: ============================================ [ 12.758110] print_req_error: I/O error, dev mmcblk1, sector 2050 [ 12.764135] Buffer I/O error on dev mmcblk1p1, logical block 0, lost sync page write [ 12.775163] EXT4-fs (mmcblk1p1): mounted filesystem without journal. Opts: (null) [ 12.782746] VFS: Mounted root (ext4 filesystem) on device 179:9. [ 12.789151] mmcblk1: response CRC error sending SET_BLOCK_COUNT command, card status 0x900 Signed-off-by: Benoît Thébaudeau Reported-by: Wladimir J. van der Laan Tested-by: Wladimir J. van der Laan Fixes: 5143c953a786 ("mmc: sdhci-esdhc-imx: Allow all supported prescaler values") Cc: # v4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 85140c9af581..8b941f814472 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -687,6 +687,20 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host, return; } + /* For i.MX53 eSDHCv3, SYSCTL.SDCLKFS may not be set to 0. */ + if (is_imx53_esdhc(imx_data)) { + /* + * According to the i.MX53 reference manual, if DLLCTRL[10] can + * be set, then the controller is eSDHCv3, else it is eSDHCv2. + */ + val = readl(host->ioaddr + ESDHC_DLL_CTRL); + writel(val | BIT(10), host->ioaddr + ESDHC_DLL_CTRL); + temp = readl(host->ioaddr + ESDHC_DLL_CTRL); + writel(val, host->ioaddr + ESDHC_DLL_CTRL); + if (temp & BIT(10)) + pre_div = 2; + } + temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL); temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN | ESDHC_CLOCK_MASK); -- cgit v1.2.3 From 33193dca671c8b75eb030234cefdcd2ceebd7516 Mon Sep 17 00:00:00 2001 From: Martin Peres Date: Sun, 14 Jan 2018 18:05:53 +0200 Subject: ALSA: usb-audio: Add a quirk for Nura's first gen headset The capture interface does not work, and the playback interface actually supports only 48kHz unlike what is advertised (44.1, 32, 22, 16, 8). The only unknown here is if there are other devices that use the same product ID, but given that this ID is currently unknown, I would assume it is specially allocated for the nura headset. Signed-off-by: Martin Peres Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 8a59d4782a0f..50252046b01d 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3277,4 +3277,52 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } }, +{ + /* + * Nura's first gen headphones use Cambridge Silicon Radio's vendor + * ID, but it looks like the product ID actually is only for Nura. + * The capture interface does not work at all (even on Windows), + * and only the 48 kHz sample rate works for the playback interface. + */ + USB_DEVICE(0x0a12, 0x1243), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + /* Capture */ + { + .ifnum = 1, + .type = QUIRK_IGNORE_INTERFACE, + }, + /* Playback */ + { + .ifnum = 2, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .iface = 2, + .altsetting = 1, + .altset_idx = 1, + .attributes = UAC_EP_CS_ATTR_FILL_MAX | + UAC_EP_CS_ATTR_SAMPLE_RATE, + .endpoint = 0x03, + .ep_attr = USB_ENDPOINT_XFER_ISOC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { + 48000 + } + } + }, + } + } +}, + #undef USB_DEVICE_VENDOR_SPEC -- cgit v1.2.3 From d87ce76402950b8e4d5117276d44465658e886a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Nov 2017 21:19:08 +0200 Subject: drm/i915: Add .get_hw_state() method for planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a .get_hw_state() method for planes, returning true or false depending on whether the plane is enabled. Use it to rewrite the plane enabled/disabled asserts in platform agnostic fashion. We do lose the pre-gen4 plane<->pipe mapping checks, but since we're supposed sanitize that anyway it doesn't really matter. v2: Reoder patches to not depend on enum old_plane_id Just call assert_plane_disabled() from assert_planes_disabled() v3: Deal with disabled power wells in .get_hw_state() v4: Rebase due skl primary plane code removal Cc: Thierry Reding Cc: Alex Villacís Lasso Reviewed-by: Daniel Vetter #v2 Tested-by: Thierry Reding #v2 Link: https://patchwork.freedesktop.org/patch/msgid/20171117191917.11506-2-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit 51f5a096398433a881e845d3685a2c1dac756019) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 188 +++++++++++++++++------------------ drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_sprite.c | 83 ++++++++++++++++ 3 files changed, 175 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 123585eeb87d..2f60679f99c3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1211,23 +1211,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe) pipe_name(pipe)); } -static void assert_cursor(struct drm_i915_private *dev_priv, - enum pipe pipe, bool state) -{ - bool cur_state; - - if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) - cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; - else - cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; - - I915_STATE_WARN(cur_state != state, - "cursor on pipe %c assertion failure (expected %s, current %s)\n", - pipe_name(pipe), onoff(state), onoff(cur_state)); -} -#define assert_cursor_enabled(d, p) assert_cursor(d, p, true) -#define assert_cursor_disabled(d, p) assert_cursor(d, p, false) - void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state) { @@ -1255,77 +1238,25 @@ void assert_pipe(struct drm_i915_private *dev_priv, pipe_name(pipe), onoff(state), onoff(cur_state)); } -static void assert_plane(struct drm_i915_private *dev_priv, - enum plane plane, bool state) +static void assert_plane(struct intel_plane *plane, bool state) { - u32 val; - bool cur_state; + bool cur_state = plane->get_hw_state(plane); - val = I915_READ(DSPCNTR(plane)); - cur_state = !!(val & DISPLAY_PLANE_ENABLE); I915_STATE_WARN(cur_state != state, - "plane %c assertion failure (expected %s, current %s)\n", - plane_name(plane), onoff(state), onoff(cur_state)); + "%s assertion failure (expected %s, current %s)\n", + plane->base.name, onoff(state), onoff(cur_state)); } -#define assert_plane_enabled(d, p) assert_plane(d, p, true) -#define assert_plane_disabled(d, p) assert_plane(d, p, false) +#define assert_plane_enabled(p) assert_plane(p, true) +#define assert_plane_disabled(p) assert_plane(p, false) -static void assert_planes_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) +static void assert_planes_disabled(struct intel_crtc *crtc) { - int i; - - /* Primary planes are fixed to pipes on gen4+ */ - if (INTEL_GEN(dev_priv) >= 4) { - u32 val = I915_READ(DSPCNTR(pipe)); - I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE, - "plane %c assertion failure, should be disabled but not\n", - plane_name(pipe)); - return; - } - - /* Need to check both planes against the pipe */ - for_each_pipe(dev_priv, i) { - u32 val = I915_READ(DSPCNTR(i)); - enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >> - DISPPLANE_SEL_PIPE_SHIFT; - I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe, - "plane %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(i), pipe_name(pipe)); - } -} - -static void assert_sprites_disabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - int sprite; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_plane *plane; - if (INTEL_GEN(dev_priv) >= 9) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(PLANE_CTL(pipe, sprite)); - I915_STATE_WARN(val & PLANE_CTL_ENABLE, - "plane %d assertion failure, should be off on pipe %c but is still active\n", - sprite, pipe_name(pipe)); - } - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - for_each_sprite(dev_priv, pipe, sprite) { - u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite)); - I915_STATE_WARN(val & SP_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - sprite_name(pipe, sprite), pipe_name(pipe)); - } - } else if (INTEL_GEN(dev_priv) >= 7) { - u32 val = I915_READ(SPRCTL(pipe)); - I915_STATE_WARN(val & SPRITE_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { - u32 val = I915_READ(DVSCNTR(pipe)); - I915_STATE_WARN(val & DVS_ENABLE, - "sprite %c assertion failure, should be off on pipe %c but is still active\n", - plane_name(pipe), pipe_name(pipe)); - } + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) + assert_plane_disabled(plane); } static void assert_vblank_disabled(struct drm_crtc *crtc) @@ -1918,9 +1849,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc) DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe)); - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); /* * A pipe without a PLL won't actually be able to drive bits from @@ -1989,9 +1918,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc) * Make sure planes won't keep trying to pump pixels to us, * or we might hang the display. */ - assert_planes_disabled(dev_priv, pipe); - assert_cursor_disabled(dev_priv, pipe); - assert_sprites_disabled(dev_priv, pipe); + assert_planes_disabled(crtc); reg = PIPECONF(cpu_transcoder); val = I915_READ(reg); @@ -3385,6 +3312,31 @@ static void i9xx_disable_primary_plane(struct intel_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool i9xx_plane_get_hw_state(struct intel_plane *primary) +{ + + struct drm_i915_private *dev_priv = to_i915(primary->base.dev); + enum intel_display_power_domain power_domain; + enum plane plane = primary->plane; + enum pipe pipe = primary->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-4 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane) { @@ -4866,7 +4818,8 @@ void hsw_enable_ips(struct intel_crtc *crtc) * a vblank wait. */ - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, @@ -4899,7 +4852,8 @@ void hsw_disable_ips(struct intel_crtc *crtc) if (!crtc->config->ips_enabled) return; - assert_plane_enabled(dev_priv, crtc->plane); + assert_plane_enabled(to_intel_plane(crtc->base.primary)); + if (IS_BROADWELL(dev_priv)) { mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); @@ -9477,6 +9431,23 @@ static void i845_disable_cursor(struct intel_plane *plane, i845_update_cursor(plane, NULL, NULL); } +static bool i845_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(PIPE_A); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -9670,6 +9641,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane, i9xx_update_cursor(plane, NULL, NULL); } +static bool i9xx_cursor_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + /* + * Not 100% correct for planes that can move between pipes, + * but that's only the case for gen2-3 which don't have any + * display power wells. + */ + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} /* VESA 640x480x72Hz mode to set on the pipe */ static const struct drm_display_mode load_detect_mode = { @@ -13205,6 +13198,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -13215,6 +13209,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = skl_update_plane; primary->disable_plane = skl_disable_plane; + primary->get_hw_state = skl_plane_get_hw_state; } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -13222,6 +13217,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } else { intel_primary_formats = i8xx_primary_formats; num_formats = ARRAY_SIZE(i8xx_primary_formats); @@ -13229,6 +13225,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->update_plane = i9xx_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; + primary->get_hw_state = i9xx_plane_get_hw_state; } if (INTEL_GEN(dev_priv) >= 9) @@ -13318,10 +13315,12 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv, if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { cursor->update_plane = i845_update_cursor; cursor->disable_plane = i845_disable_cursor; + cursor->get_hw_state = i845_cursor_get_hw_state; cursor->check_plane = i845_check_cursor; } else { cursor->update_plane = i9xx_update_cursor; cursor->disable_plane = i9xx_disable_cursor; + cursor->get_hw_state = i9xx_cursor_get_hw_state; cursor->check_plane = i9xx_check_cursor; } @@ -14671,8 +14670,8 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_plane_disabled(dev_priv, PLANE_A); - assert_plane_disabled(dev_priv, PLANE_B); + assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_A)); + assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_B)); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); @@ -14885,20 +14884,13 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) intel_display_power_put(dev_priv, POWER_DOMAIN_VGA); } -static bool primary_get_hw_state(struct intel_plane *plane) -{ - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - - return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE; -} - /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { struct intel_plane *primary = to_intel_plane(crtc->base.primary); bool visible; - visible = crtc->active && primary_get_hw_state(primary); + visible = crtc->active && primary->get_hw_state(primary); intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), to_intel_plane_state(primary->base.state), diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6c7f8bca574e..5d77f75a9f9c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -862,6 +862,7 @@ struct intel_plane { const struct intel_plane_state *plane_state); void (*disable_plane)(struct intel_plane *plane, struct intel_crtc *crtc); + bool (*get_hw_state)(struct intel_plane *plane); int (*check_plane)(struct intel_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state); @@ -1924,6 +1925,7 @@ void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state); void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); +bool skl_plane_get_hw_state(struct intel_plane *plane); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 4fcf80ca91dd..4a8a5d918a83 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -329,6 +329,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +bool +skl_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static void chv_update_csc(struct intel_plane *plane, uint32_t format) { @@ -506,6 +526,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +vlv_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -646,6 +686,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +ivb_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { @@ -777,6 +836,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); } +static bool +g4x_plane_get_hw_state(struct intel_plane *plane) +{ + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + enum intel_display_power_domain power_domain; + enum pipe pipe = plane->pipe; + bool ret; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + return false; + + ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE; + + intel_display_power_put(dev_priv, power_domain); + + return ret; +} + static int intel_check_sprite_plane(struct intel_plane *plane, struct intel_crtc_state *crtc_state, @@ -1232,6 +1310,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1242,6 +1321,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = skl_update_plane; intel_plane->disable_plane = skl_disable_plane; + intel_plane->get_hw_state = skl_plane_get_hw_state; plane_formats = skl_plane_formats; num_plane_formats = ARRAY_SIZE(skl_plane_formats); @@ -1252,6 +1332,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = vlv_update_plane; intel_plane->disable_plane = vlv_disable_plane; + intel_plane->get_hw_state = vlv_plane_get_hw_state; plane_formats = vlv_plane_formats; num_plane_formats = ARRAY_SIZE(vlv_plane_formats); @@ -1267,6 +1348,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = ivb_update_plane; intel_plane->disable_plane = ivb_disable_plane; + intel_plane->get_hw_state = ivb_plane_get_hw_state; plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); @@ -1277,6 +1359,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->update_plane = g4x_update_plane; intel_plane->disable_plane = g4x_disable_plane; + intel_plane->get_hw_state = g4x_plane_get_hw_state; modifiers = i9xx_plane_format_modifiers; if (IS_GEN6(dev_priv)) { -- cgit v1.2.3 From 23ac12732825901b3fc6ac720958d8bff9a0d6ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Nov 2017 21:19:09 +0200 Subject: drm/i915: Redo plane sanitation during readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify the plane disabling during state readout by pulling the code into a new helper intel_plane_disable_noatomic(). We'll also read out the state of all planes, so that we know which planes really need to be diabled. Additonally we change the plane<->pipe mapping sanitation to work by simply disabling the offending planes instead of entire pipes. And we do it before we otherwise sanitize the crtcs, which means we don't have to worry about misassigned planes during crtc sanitation anymore. v2: Reoder patches to not depend on enum old_plane_id v3: s/for_each_pipe/for_each_intel_crtc/ Cc: Thierry Reding Cc: Alex Villacís Lasso Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103223 Reviewed-by: Daniel Vetter Tested-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20171117191917.11506-3-ville.syrjala@linux.intel.com Signed-off-by: Ville Syrjälä (cherry picked from commit b1e01595a66dc206a2c75401ec4c285740537f3f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 114 ++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2f60679f99c3..44a9337e3f04 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2747,6 +2747,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state, crtc_state->active_planes); } +static void intel_plane_disable_noatomic(struct intel_crtc *crtc, + struct intel_plane *plane) +{ + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + + intel_set_plane_visible(crtc_state, plane_state, false); + + if (plane->id == PLANE_PRIMARY) + intel_pre_disable_primary_noatomic(&crtc->base); + + trace_intel_disable_plane(&plane->base, crtc); + plane->disable_plane(plane, crtc); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2804,12 +2821,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - intel_set_plane_visible(to_intel_crtc_state(crtc_state), - to_intel_plane_state(plane_state), - false); - intel_pre_disable_primary_noatomic(&intel_crtc->base); - trace_intel_disable_plane(primary, intel_crtc); - intel_plane->disable_plane(intel_plane, intel_crtc); + intel_plane_disable_noatomic(intel_crtc, intel_plane); return; @@ -5853,6 +5865,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; + struct intel_plane *plane; u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; @@ -5861,11 +5874,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, if (!intel_crtc->active) return; - if (crtc->primary->state->visible) { - intel_pre_disable_primary_noatomic(crtc); + for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) { + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; + if (plane_state->base.visible) + intel_plane_disable_noatomic(intel_crtc, plane); } state = drm_atomic_state_alloc(crtc->dev); @@ -14682,22 +14696,36 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) POSTING_READ(DPLL(pipe)); } -static bool -intel_check_plane_mapping(struct intel_crtc *crtc) +static bool intel_plane_mapping_ok(struct intel_crtc *crtc, + struct intel_plane *primary) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 val; + enum plane plane = primary->plane; + u32 val = I915_READ(DSPCNTR(plane)); - if (INTEL_INFO(dev_priv)->num_pipes == 1) - return true; + return (val & DISPLAY_PLANE_ENABLE) == 0 || + (val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe); +} - val = I915_READ(DSPCNTR(!crtc->plane)); +static void +intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv) +{ + struct intel_crtc *crtc; - if ((val & DISPLAY_PLANE_ENABLE) && - (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe)) - return false; + if (INTEL_GEN(dev_priv) >= 4) + return; - return true; + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_plane *plane = + to_intel_plane(crtc->base.primary); + + if (intel_plane_mapping_ok(crtc, plane)) + continue; + + DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n", + plane->base.name); + intel_plane_disable_noatomic(crtc, plane); + } } static bool intel_crtc_has_encoders(struct intel_crtc *crtc) @@ -14753,33 +14781,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc, /* Disable everything but the primary plane */ for_each_intel_plane_on_crtc(dev, crtc, plane) { - if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) - continue; + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); - trace_intel_disable_plane(&plane->base, crtc); - plane->disable_plane(plane, crtc); + if (plane_state->base.visible && + plane->base.type != DRM_PLANE_TYPE_PRIMARY) + intel_plane_disable_noatomic(crtc, plane); } } - /* We need to sanitize the plane -> pipe mapping first because this will - * disable the crtc (and hence change the state) if it is wrong. Note - * that gen4+ has a fixed plane -> pipe mapping. */ - if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) { - bool plane; - - DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n", - crtc->base.base.id, crtc->base.name); - - /* Pipe has the wrong plane attached and the plane is active. - * Temporarily change the plane mapping and disable everything - * ... */ - plane = crtc->plane; - crtc->base.primary->state->visible = true; - crtc->plane = !plane; - intel_crtc_disable_noatomic(&crtc->base, ctx); - crtc->plane = plane; - } - /* Adjust the state of the output pipe according to whether we * have active connectors/encoders. */ if (crtc->active && !intel_crtc_has_encoders(crtc)) @@ -14887,14 +14897,18 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct intel_plane *primary = to_intel_plane(crtc->base.primary); - bool visible; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane *plane; - visible = crtc->active && primary->get_hw_state(primary); + for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + bool visible = plane->get_hw_state(plane); - intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), - to_intel_plane_state(primary->base.state), - visible); + intel_set_plane_visible(crtc_state, plane_state, visible); + } } static void intel_modeset_readout_hw_state(struct drm_device *dev) @@ -15092,6 +15106,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev, /* HW state is read out, now we need to sanitize this mess. */ get_encoder_power_domains(dev_priv); + intel_sanitize_plane_mapping(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } -- cgit v1.2.3 From 4488496d58200c7511842e049a4cc891d928da56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Nov 2017 14:54:11 +0200 Subject: drm/i915: Fix deadlock in i830_disable_pipe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i830_disable_pipe() gets called from the power well code, and thus we're already holding the power domain mutex. That means we can't call plane->get_hw_state() as it will also try to grab the same mutex and will thus deadlock. Replace the assert_plane() calls (which calls ->get_hw_state()) with just raw register reads in i830_disable_pipe(). As a bonus we can now get a warning if plane C is enabled even though we don't even expose it as a drm plane. v2: Do a separate WARN_ON() for each plane (Chris) Cc: Chris Wilson Reviewed-by: Chris Wilson Fixes: d87ce7640295 ("drm/i915: Add .get_hw_state() method for planes") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20171129125411.29055-1-ville.syrjala@linux.intel.com (cherry picked from commit 5816d9cbc0a0fbf232fe297cefcb85361a3cde90) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 44a9337e3f04..50f8443641b8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14684,8 +14684,11 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n", pipe_name(pipe)); - assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_A)); - assert_planes_disabled(intel_get_crtc_for_pipe(dev_priv, PIPE_B)); + WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE); + WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE); + WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE); I915_WRITE(PIPECONF(pipe), 0); POSTING_READ(PIPECONF(pipe)); -- cgit v1.2.3 From 79c48ccf2fec7c10105bd635d3bb1128167b1258 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 14 Jan 2018 12:39:00 +0200 Subject: nvme-pci: serialize pci resets Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2fe15351ac4e..4d8f63b3c5b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -95,7 +95,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -104,6 +104,7 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) flush_work(&ctrl->reset_work); return ret; } +EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); static void nvme_delete_ctrl_work(struct work_struct *work) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 4112fb6ce80d..77faf2049917 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -394,6 +394,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 62119078c2bf..dc9a4cf7c1d1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2537,7 +2537,7 @@ static void nvme_reset_prepare(struct pci_dev *pdev) static void nvme_reset_done(struct pci_dev *pdev) { struct nvme_dev *dev = pci_get_drvdata(pdev); - nvme_reset_ctrl(&dev->ctrl); + nvme_reset_ctrl_sync(&dev->ctrl); } static void nvme_shutdown(struct pci_dev *pdev) -- cgit v1.2.3 From 147b27e4bd08406a6abebedbb478b431ec197be1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 14 Jan 2018 12:39:01 +0200 Subject: nvme-pci: allocate device queues storage space at probe It may cause race by setting 'nvmeq' in nvme_init_request() because .init_request is called inside switching io scheduler, which may happen when the NVMe device is being resetted and its nvme queues are being freed and created. We don't have any sync between the two pathes. This patch changes the nvmeq allocation to occur at probe time so there is no way we can dereference it at init_request. [ 93.268391] kernel BUG at drivers/nvme/host/pci.c:408! [ 93.274146] invalid opcode: 0000 [#1] SMP [ 93.278618] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache sunrpc ipmi_ssif vfat fat intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel iTCO_wdt intel_cstate ipmi_si iTCO_vendor_support intel_uncore mxm_wmi mei_me ipmi_devintf intel_rapl_perf pcspkr sg ipmi_msghandler lpc_ich dcdbas mei shpchp acpi_power_meter wmi dm_multipath ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci nvme libata crc32c_intel nvme_core tg3 megaraid_sas ptp i2c_core pps_core dm_mirror dm_region_hash dm_log dm_mod [ 93.349071] CPU: 5 PID: 1842 Comm: sh Not tainted 4.15.0-rc2.ming+ #4 [ 93.356256] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.5.5 08/16/2017 [ 93.364801] task: 00000000fb8abf2a task.stack: 0000000028bd82d1 [ 93.371408] RIP: 0010:nvme_init_request+0x36/0x40 [nvme] [ 93.377333] RSP: 0018:ffffc90002537ca8 EFLAGS: 00010246 [ 93.383161] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000008 [ 93.391122] RDX: 0000000000000000 RSI: ffff880276ae0000 RDI: ffff88047bae9008 [ 93.399084] RBP: ffff88047bae9008 R08: ffff88047bae9008 R09: 0000000009dabc00 [ 93.407045] R10: 0000000000000004 R11: 000000000000299c R12: ffff880186bc1f00 [ 93.415007] R13: ffff880276ae0000 R14: 0000000000000000 R15: 0000000000000071 [ 93.422969] FS: 00007f33cf288740(0000) GS:ffff88047ba80000(0000) knlGS:0000000000000000 [ 93.431996] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 93.438407] CR2: 00007f33cf28e000 CR3: 000000047e5bb006 CR4: 00000000001606e0 [ 93.446368] Call Trace: [ 93.449103] blk_mq_alloc_rqs+0x231/0x2a0 [ 93.453579] blk_mq_sched_alloc_tags.isra.8+0x42/0x80 [ 93.459214] blk_mq_init_sched+0x7e/0x140 [ 93.463687] elevator_switch+0x5a/0x1f0 [ 93.467966] ? elevator_get.isra.17+0x52/0xc0 [ 93.472826] elv_iosched_store+0xde/0x150 [ 93.477299] queue_attr_store+0x4e/0x90 [ 93.481580] kernfs_fop_write+0xfa/0x180 [ 93.485958] __vfs_write+0x33/0x170 [ 93.489851] ? __inode_security_revalidate+0x4c/0x60 [ 93.495390] ? selinux_file_permission+0xda/0x130 [ 93.500641] ? _cond_resched+0x15/0x30 [ 93.504815] vfs_write+0xad/0x1a0 [ 93.508512] SyS_write+0x52/0xc0 [ 93.512113] do_syscall_64+0x61/0x1a0 [ 93.516199] entry_SYSCALL64_slow_path+0x25/0x25 [ 93.521351] RIP: 0033:0x7f33ce96aab0 [ 93.525337] RSP: 002b:00007ffe57570238 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 93.533785] RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f33ce96aab0 [ 93.541746] RDX: 0000000000000006 RSI: 00007f33cf28e000 RDI: 0000000000000001 [ 93.549707] RBP: 00007f33cf28e000 R08: 000000000000000a R09: 00007f33cf288740 [ 93.557669] R10: 00007f33cf288740 R11: 0000000000000246 R12: 00007f33cec42400 [ 93.565630] R13: 0000000000000006 R14: 0000000000000001 R15: 0000000000000000 [ 93.573592] Code: 4c 8d 40 08 4c 39 c7 74 16 48 8b 00 48 8b 04 08 48 85 c0 74 16 48 89 86 78 01 00 00 31 c0 c3 8d 4a 01 48 63 c9 48 c1 e1 03 eb de <0f> 0b 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 85 f6 53 48 89 [ 93.594676] RIP: nvme_init_request+0x36/0x40 [nvme] RSP: ffffc90002537ca8 [ 93.602273] ---[ end trace 810dde3993e5f14e ]--- Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 63 ++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index dc9a4cf7c1d1..b058b1e9b5bb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -75,7 +75,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); * Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { - struct nvme_queue **queues; + struct nvme_queue *queues; struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; @@ -365,7 +365,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); @@ -387,7 +387,7 @@ static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; - struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; + struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; if (!nvmeq->tags) nvmeq->tags = &dev->tagset.tags[hctx_idx]; @@ -403,7 +403,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, struct nvme_dev *dev = set->driver_data; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0; - struct nvme_queue *nvmeq = dev->queues[queue_idx]; + struct nvme_queue *nvmeq = &dev->queues[queue_idx]; BUG_ON(!nvmeq); iod->nvmeq = nvmeq; @@ -1046,7 +1046,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; struct nvme_command c; memset(&c, 0, sizeof(c)); @@ -1282,7 +1282,6 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) if (nvmeq->sq_cmds) dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); - kfree(nvmeq); } static void nvme_free_queues(struct nvme_dev *dev, int lowest) @@ -1290,10 +1289,8 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest) int i; for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) { - struct nvme_queue *nvmeq = dev->queues[i]; dev->ctrl.queue_count--; - dev->queues[i] = NULL; - nvme_free_queue(nvmeq); + nvme_free_queue(&dev->queues[i]); } } @@ -1325,10 +1322,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { - struct nvme_queue *nvmeq = dev->queues[0]; + struct nvme_queue *nvmeq = &dev->queues[0]; - if (!nvmeq) - return; if (nvme_suspend_queue(nvmeq)) return; @@ -1384,13 +1379,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, return 0; } -static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, - int depth, int node) +static int nvme_alloc_queue(struct nvme_dev *dev, int qid, + int depth, int node) { - struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL, - node); - if (!nvmeq) - return NULL; + struct nvme_queue *nvmeq = &dev->queues[qid]; nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); @@ -1409,17 +1401,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, nvmeq->q_depth = depth; nvmeq->qid = qid; nvmeq->cq_vector = -1; - dev->queues[qid] = nvmeq; dev->ctrl.queue_count++; - return nvmeq; + return 0; free_cqdma: dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); free_nvmeq: - kfree(nvmeq); - return NULL; + return -ENOMEM; } static int queue_request_irq(struct nvme_queue *nvmeq) @@ -1592,14 +1582,12 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result < 0) return result; - nvmeq = dev->queues[0]; - if (!nvmeq) { - nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, - dev_to_node(dev->dev)); - if (!nvmeq) - return -ENOMEM; - } + result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, + dev_to_node(dev->dev)); + if (result) + return result; + nvmeq = &dev->queues[0]; aqa = nvmeq->q_depth - 1; aqa |= aqa << 16; @@ -1629,7 +1617,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { /* vector == qid - 1, match nvme_create_queue */ - if (!nvme_alloc_queue(dev, i, dev->q_depth, + if (nvme_alloc_queue(dev, i, dev->q_depth, pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) { ret = -ENOMEM; break; @@ -1638,7 +1626,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->ctrl.queue_count - 1); for (i = dev->online_queues; i <= max; i++) { - ret = nvme_create_queue(dev->queues[i], i); + ret = nvme_create_queue(&dev->queues[i], i); if (ret) break; } @@ -1894,7 +1882,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static int nvme_setup_io_queues(struct nvme_dev *dev) { - struct nvme_queue *adminq = dev->queues[0]; + struct nvme_queue *adminq = &dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); int result, nr_io_queues; unsigned long size; @@ -2020,7 +2008,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) retry: timeout = ADMIN_TIMEOUT; for (; i > 0; i--, sent++) - if (nvme_delete_queue(dev->queues[i], opcode)) + if (nvme_delete_queue(&dev->queues[i], opcode)) break; while (sent--) { @@ -2212,7 +2200,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) queues = dev->online_queues - 1; for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(dev->queues[i]); + nvme_suspend_queue(&dev->queues[i]); if (dead) { /* A device might become IO incapable very soon during @@ -2220,7 +2208,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * queue_count can be 0 here. */ if (dev->ctrl.queue_count) - nvme_suspend_queue(dev->queues[0]); + nvme_suspend_queue(&dev->queues[0]); } else { nvme_disable_io_queues(dev, queues); nvme_disable_admin_queue(dev, shutdown); @@ -2482,8 +2470,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return -ENOMEM; - dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), - GFP_KERNEL, node); + + dev->queues = kcalloc_node(num_possible_cpus() + 1, + sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) goto free; -- cgit v1.2.3 From bc8d062c36e3525e81ea8237ff0ab3264c2317b6 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 9 Jan 2018 20:46:49 -0500 Subject: block: only bdi_unregister() in del_gendisk() if !GENHD_FL_HIDDEN device_add_disk() will only call bdi_register_owner() if !GENHD_FL_HIDDEN, so it follows that del_gendisk() should only call bdi_unregister() if !GENHD_FL_HIDDEN. Found with code inspection. bdi_unregister() won't do any harm if bdi_register_owner() wasn't used but best to avoid the unnecessary call to bdi_unregister(). Fixes: 8ddcd65325 ("block: introduce GENHD_FL_HIDDEN") Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/genhd.c b/block/genhd.c index 96a66f671720..00620e01e043 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -725,7 +725,8 @@ void del_gendisk(struct gendisk *disk) * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs). */ - bdi_unregister(disk->queue->backing_dev_info); + if (!(disk->flags & GENHD_FL_HIDDEN)) + bdi_unregister(disk->queue->backing_dev_info); blk_unregister_queue(disk); } else { WARN_ON(1); -- cgit v1.2.3 From 667257e8b2988c0183ba23e2bcd6900e87961606 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 11 Jan 2018 14:11:01 -0500 Subject: block: properly protect the 'queue' kobj in blk_unregister_queue The original commit e9a823fb34a8b (block: fix warning when I/O elevator is changed as request_queue is being removed) is pretty conflated. "conflated" because the resource being protected by q->sysfs_lock isn't the queue_flags (it is the 'queue' kobj). q->sysfs_lock serializes __elevator_change() (via elv_iosched_store) from racing with blk_unregister_queue(): 1) By holding q->sysfs_lock first, __elevator_change() can complete before a racing blk_unregister_queue(). 2) Conversely, __elevator_change() is testing for QUEUE_FLAG_REGISTERED in case elv_iosched_store() loses the race with blk_unregister_queue(), it needs a way to know the 'queue' kobj isn't there. Expand the scope of blk_unregister_queue()'s q->sysfs_lock use so it is held until after the 'queue' kobj is removed. To do so blk_mq_unregister_dev() must not also take q->sysfs_lock. So rename __blk_mq_unregister_dev() to blk_mq_unregister_dev(). Also, blk_unregister_queue() should use q->queue_lock to protect against any concurrent writes to q->queue_flags -- even though chances are the queue is being cleaned up so no concurrent writes are likely. Fixes: e9a823fb34a8b ("block: fix warning when I/O elevator is changed as request_queue is being removed") Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 9 +-------- block/blk-sysfs.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 79969c3c234f..a54b4b070f1c 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -248,7 +248,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) return ret; } -static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) +void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i; @@ -265,13 +265,6 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) q->mq_sysfs_init_done = false; } -void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) -{ - mutex_lock(&q->sysfs_lock); - __blk_mq_unregister_dev(dev, q); - mutex_unlock(&q->sysfs_lock); -} - void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) { kobject_init(&hctx->kobj, &blk_mq_hw_ktype); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 870484eaed1f..9272452ff456 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -929,12 +929,17 @@ void blk_unregister_queue(struct gendisk *disk) if (WARN_ON(!q)) return; + /* + * Protect against the 'queue' kobj being accessed + * while/after it is removed. + */ mutex_lock(&q->sysfs_lock); - queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q); - mutex_unlock(&q->sysfs_lock); - wbt_exit(q); + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_REGISTERED, q); + spin_unlock_irq(q->queue_lock); + wbt_exit(q); if (q->mq_ops) blk_mq_unregister_dev(disk_to_dev(disk), q); @@ -946,4 +951,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); kobject_put(&disk_to_dev(disk)->kobj); + + mutex_unlock(&q->sysfs_lock); } -- cgit v1.2.3 From fa70d2e2c4a0a54ced98260c6a176cc94c876d27 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 8 Jan 2018 22:01:13 -0500 Subject: block: allow gendisk's request_queue registration to be deferred Since I can remember DM has forced the block layer to allow the allocation and initialization of the request_queue to be distinct operations. Reason for this is block/genhd.c:add_disk() has requires that the request_queue (and associated bdi) be tied to the gendisk before add_disk() is called -- because add_disk() also deals with exposing the request_queue via blk_register_queue(). DM's dynamic creation of arbitrary device types (and associated request_queue types) requires the DM device's gendisk be available so that DM table loads can establish a master/slave relationship with subordinate devices that are referenced by loaded DM tables -- using bd_link_disk_holder(). But until these DM tables, and their associated subordinate devices, are known DM cannot know what type of request_queue it needs -- nor what its queue_limits should be. This chicken and egg scenario has created all manner of problems for DM and, at times, the block layer. Summary of changes: - Add device_add_disk_no_queue_reg() and add_disk_no_queue_reg() variant that drivers may use to add a disk without also calling blk_register_queue(). Driver must call blk_register_queue() once its request_queue is fully initialized. - Return early from blk_unregister_queue() if QUEUE_FLAG_REGISTERED is not set. It won't be set if driver used add_disk_no_queue_reg() but driver encounters an error and must del_gendisk() before calling blk_register_queue(). - Export blk_register_queue(). These changes allow DM to use add_disk_no_queue_reg() to anchor its gendisk as the "master" for master/slave relationships DM must establish with subordinate devices referenced in DM tables that get loaded. Once all "slave" devices for a DM device are known its request_queue can be properly initialized and then advertised via sysfs -- important improvement being that no request_queue resource initialization performed by blk_register_queue() is missed for DM devices anymore. Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 5 +++++ block/genhd.c | 20 +++++++++++++++++--- include/linux/genhd.h | 5 +++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9272452ff456..4a6a40ffd78e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -921,6 +921,7 @@ unlock: mutex_unlock(&q->sysfs_lock); return ret; } +EXPORT_SYMBOL_GPL(blk_register_queue); void blk_unregister_queue(struct gendisk *disk) { @@ -929,6 +930,10 @@ void blk_unregister_queue(struct gendisk *disk) if (WARN_ON(!q)) return; + /* Return early if disk->queue was never registered. */ + if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) + return; + /* * Protect against the 'queue' kobj being accessed * while/after it is removed. diff --git a/block/genhd.c b/block/genhd.c index 00620e01e043..88a53c188cb7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -629,16 +629,18 @@ exit: } /** - * device_add_disk - add partitioning information to kernel list + * __device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information + * @register_queue: register the queue if set to true * * This function registers the partitioning information in @disk * with the kernel. * * FIXME: error handling */ -void device_add_disk(struct device *parent, struct gendisk *disk) +static void __device_add_disk(struct device *parent, struct gendisk *disk, + bool register_queue) { dev_t devt; int retval; @@ -682,7 +684,8 @@ void device_add_disk(struct device *parent, struct gendisk *disk) exact_match, exact_lock, disk); } register_disk(parent, disk); - blk_register_queue(disk); + if (register_queue) + blk_register_queue(disk); /* * Take an extra ref on queue which will be put on disk_release() @@ -693,8 +696,19 @@ void device_add_disk(struct device *parent, struct gendisk *disk) disk_add_events(disk); blk_integrity_add(disk); } + +void device_add_disk(struct device *parent, struct gendisk *disk) +{ + __device_add_disk(parent, disk, true); +} EXPORT_SYMBOL(device_add_disk); +void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) +{ + __device_add_disk(parent, disk, false); +} +EXPORT_SYMBOL(device_add_disk_no_queue_reg); + void del_gendisk(struct gendisk *disk) { struct disk_part_iter piter; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5144ebe046c9..5e3531027b51 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -395,6 +395,11 @@ static inline void add_disk(struct gendisk *disk) { device_add_disk(NULL, disk); } +extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk); +static inline void add_disk_no_queue_reg(struct gendisk *disk) +{ + device_add_disk_no_queue_reg(NULL, disk); +} extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); -- cgit v1.2.3 From 671ec859e5ee06ab0bf968e639a25576b18865ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Jan 2018 16:48:36 +0100 Subject: ALSA: seq: Process queue tempo/ppq change in a shot The SNDRV_SEQ_IOCTL_SET_QUEUE_TEMPO ioctl sets the tempo and the ppq in a single call, while the current implementation updates each value one by one. This is a bit racy, and also suboptimal from the performance POV, as each call does re-acquire the lock and invokes the update of ALSA timer resolution. This patch reorganizes the code slightly so that we change both the tempo and the ppq in a shot. The skew value can be put into the same lock, but this is rather a rarely used feature and completely independent from the temp/ppq (it's evaluated only in the interrupt), so it's left as it was. Signed-off-by: Takashi Iwai --- sound/core/seq/seq_queue.c | 4 +--- sound/core/seq/seq_timer.c | 13 ++++++++----- sound/core/seq/seq_timer.h | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 79e0c5604ef8..0428e9061b47 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -497,9 +497,7 @@ int snd_seq_queue_timer_set_tempo(int queueid, int client, return -EPERM; } - result = snd_seq_timer_set_tempo(q->timer, info->tempo); - if (result >= 0) - result = snd_seq_timer_set_ppq(q->timer, info->ppq); + result = snd_seq_timer_set_tempo_ppq(q->timer, info->tempo, info->ppq); if (result >= 0 && info->skew_base > 0) result = snd_seq_timer_set_skew(q->timer, info->skew_value, info->skew_base); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index b80985fbc334..23167578231f 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -191,14 +191,15 @@ int snd_seq_timer_set_tempo(struct snd_seq_timer * tmr, int tempo) return 0; } -/* set current ppq */ -int snd_seq_timer_set_ppq(struct snd_seq_timer * tmr, int ppq) +/* set current tempo and ppq in a shot */ +int snd_seq_timer_set_tempo_ppq(struct snd_seq_timer *tmr, int tempo, int ppq) { + int changed; unsigned long flags; if (snd_BUG_ON(!tmr)) return -EINVAL; - if (ppq <= 0) + if (tempo <= 0 || ppq <= 0) return -EINVAL; spin_lock_irqsave(&tmr->lock, flags); if (tmr->running && (ppq != tmr->ppq)) { @@ -208,9 +209,11 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer * tmr, int ppq) pr_debug("ALSA: seq: cannot change ppq of a running timer\n"); return -EBUSY; } - + changed = (tempo != tmr->tempo) || (ppq != tmr->ppq); + tmr->tempo = tempo; tmr->ppq = ppq; - snd_seq_timer_set_tick_resolution(tmr); + if (changed) + snd_seq_timer_set_tick_resolution(tmr); spin_unlock_irqrestore(&tmr->lock, flags); return 0; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 9506b661fe5b..62f390671096 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -131,7 +131,7 @@ int snd_seq_timer_stop(struct snd_seq_timer *tmr); int snd_seq_timer_start(struct snd_seq_timer *tmr); int snd_seq_timer_continue(struct snd_seq_timer *tmr); int snd_seq_timer_set_tempo(struct snd_seq_timer *tmr, int tempo); -int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq); +int snd_seq_timer_set_tempo_ppq(struct snd_seq_timer *tmr, int tempo, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -- cgit v1.2.3 From 69e0927b3774563c19b5fb32e91d75edc147fb62 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sun, 14 Jan 2018 17:00:48 -0500 Subject: blk_rq_map_user_iov: fix error override During stress tests by syzkaller on the sg driver the block layer infrequently returns EINVAL. Closer inspection shows the block layer was trying to return ENOMEM (which is much more understandable) but for some reason overroad that useful error. Patch below does not show this (unchanged) line: ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); That 'ret' was being overridden when that function failed. Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe --- block/blk-map.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index b21f8e86f120..209eb3b45c54 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -114,7 +114,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); struct bio *bio = NULL; struct iov_iter i; - int ret; + int ret = -EINVAL; if (!iter_is_iovec(iter)) goto fail; @@ -143,7 +143,7 @@ unmap_rq: __blk_rq_unmap_user(bio); fail: rq->bio = NULL; - return -EINVAL; + return ret; } EXPORT_SYMBOL(blk_rq_map_user_iov); -- cgit v1.2.3 From c100ec49fdd2222836ff8a17c7bfcc7611d2ee2b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 8 Jan 2018 20:03:04 -0500 Subject: dm: fix incomplete request_queue initialization DM is no longer prone to having its request_queue be improperly initialized. Summary of changes: - defer DM's blk_register_queue() from add_disk()-time until dm_setup_md_queue() by using add_disk_no_queue_reg() in alloc_dev(). - dm_setup_md_queue() is updated to fully initialize DM's request_queue (_after_ all table loads have occurred and the request_queue's type, features and limits are known). A very welcome side-effect of these changes is DM no longer needs to: 1) backfill the "mq" sysfs entry (because historically DM didn't initialize the request_queue to use blk-mq until _after_ blk_register_queue() was called via add_disk()). 2) call elv_register_queue() to get .request_fn request-based DM device's "iosched" exposed in syfs. In addition, blk-mq debugfs support is now made available because request-based DM's blk-mq request_queue is now properly initialized before dm_setup_md_queue() calls blk_register_queue(). These changes also stave off the need to introduce new DM-specific workarounds in block core, e.g. this proposal: https://patchwork.kernel.org/patch/10067961/ In the end DM devices should be less unicorn in nature (relative to initialization and availability of block core infrastructure provided by the request_queue). Signed-off-by: Mike Snitzer Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/md/dm-rq.c | 9 --------- drivers/md/dm.c | 11 ++++++++++- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 9d32f25489c2..c28357f5cb0e 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -713,8 +713,6 @@ int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t) return error; } - elv_register_queue(md->queue); - return 0; } @@ -812,15 +810,8 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) } dm_init_md_queue(md); - /* backfill 'mq' sysfs registration normally done in blk_register_queue */ - err = blk_mq_register_dev(disk_to_dev(md->disk), q); - if (err) - goto out_cleanup_queue; - return 0; -out_cleanup_queue: - blk_cleanup_queue(q); out_tag_set: blk_mq_free_tag_set(md->tag_set); out_kfree_tag_set: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7475739fee49..8c26bfc35335 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1761,7 +1761,7 @@ static struct mapped_device *alloc_dev(int minor) goto bad; md->dax_dev = dax_dev; - add_disk(md->disk); + add_disk_no_queue_reg(md->disk); format_dev_t(md->name, MKDEV(_major, minor)); md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); @@ -2021,6 +2021,7 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits); int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) { int r; + struct queue_limits limits; enum dm_queue_mode type = dm_get_md_type(md); switch (type) { @@ -2057,6 +2058,14 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; } + r = dm_calculate_queue_limits(t, &limits); + if (r) { + DMERR("Cannot calculate initial queue limits"); + return r; + } + dm_table_set_restrictions(t, md->queue, &limits); + blk_register_queue(md->disk); + return 0; } -- cgit v1.2.3 From 0c4c5860e9983eb3da7a3d73ca987643c3ed034b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 15 Jan 2018 14:58:21 +0100 Subject: hwmon: (ina2xx) Fix access to uninitialized mutex Initialize data->config_lock mutex before it is used by the driver code. This fixes following warning on Odroid XU3 boards: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc7-next-20180115-00001-gb75575dee3f2 #107 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x90/0xc8) [] (dump_stack) from [] (register_lock_class+0x1c0/0x59c) [] (register_lock_class) from [] (__lock_acquire+0x78/0x1850) [] (__lock_acquire) from [] (lock_acquire+0xc8/0x2b8) [] (lock_acquire) from [] (__mutex_lock+0x60/0xa0c) [] (__mutex_lock) from [] (mutex_lock_nested+0x1c/0x24) [] (mutex_lock_nested) from [] (ina2xx_set_shunt+0x70/0xb0) [] (ina2xx_set_shunt) from [] (ina2xx_probe+0x88/0x1b0) [] (ina2xx_probe) from [] (i2c_device_probe+0x1e0/0x2d0) [] (i2c_device_probe) from [] (driver_probe_device+0x2b8/0x4a0) [] (driver_probe_device) from [] (__driver_attach+0xfc/0x120) [] (__driver_attach) from [] (bus_for_each_dev+0x58/0x7c) [] (bus_for_each_dev) from [] (bus_add_driver+0x174/0x250) [] (bus_add_driver) from [] (driver_register+0x78/0xf4) [] (driver_register) from [] (i2c_register_driver+0x38/0xa8) [] (i2c_register_driver) from [] (do_one_initcall+0x48/0x18c) [] (do_one_initcall) from [] (kernel_init_freeable+0x110/0x1d4) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x114) [] (kernel_init) from [] (ret_from_fork+0x14/0x20) Fixes: 5d389b125186 ("hwmon: (ina2xx) Make calibration register value fixed") Signed-off-by: Marek Szyprowski Signed-off-by: Guenter Roeck --- drivers/hwmon/ina2xx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index e362a932fe8c..e9e6aeabbf84 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -454,6 +454,7 @@ static int ina2xx_probe(struct i2c_client *client, /* set the device type */ data->config = &ina2xx_config[chip]; + mutex_init(&data->config_lock); if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { struct ina2xx_platform_data *pdata = dev_get_platdata(dev); @@ -480,8 +481,6 @@ static int ina2xx_probe(struct i2c_client *client, return -ENODEV; } - mutex_init(&data->config_lock); - data->groups[group++] = &ina2xx_group; if (id->driver_data == ina226) data->groups[group++] = &ina226_group; -- cgit v1.2.3 From b227c59b9b5b8ae52639c8980af853d2f654f90a Mon Sep 17 00:00:00 2001 From: Roy Shterman Date: Sun, 14 Jan 2018 12:39:02 +0200 Subject: nvme: host delete_work and reset_work on separate workqueues We need to ensure that delete_work will be hosted on a different workqueue than all the works we flush or cancel from it. Otherwise we may hit a circular dependency warning [1]. Also, given that delete_work flushes reset_work, host reset_work on nvme_reset_wq and delete_work on nvme_delete_wq. In addition, fix the flushing in the individual drivers to flush nvme_delete_wq when draining queued deletes. [1]: [ 178.491942] ============================================= [ 178.492718] [ INFO: possible recursive locking detected ] [ 178.493495] 4.9.0-rc4-c844263313a8-lb #3 Tainted: G OE [ 178.494382] --------------------------------------------- [ 178.495160] kworker/5:1/135 is trying to acquire lock: [ 178.495894] ( [ 178.496120] "nvme-wq" [ 178.496471] ){++++.+} [ 178.496599] , at: [ 178.496921] [] flush_work+0x1a6/0x2d0 [ 178.497670] but task is already holding lock: [ 178.498499] ( [ 178.498724] "nvme-wq" [ 178.499074] ){++++.+} [ 178.499202] , at: [ 178.499520] [] process_one_work+0x162/0x6a0 [ 178.500343] other info that might help us debug this: [ 178.501269] Possible unsafe locking scenario: [ 178.502113] CPU0 [ 178.502472] ---- [ 178.502829] lock( [ 178.503115] "nvme-wq" [ 178.503467] ); [ 178.503716] lock( [ 178.504001] "nvme-wq" [ 178.504353] ); [ 178.504601] *** DEADLOCK *** [ 178.505441] May be due to missing lock nesting notation [ 178.506453] 2 locks held by kworker/5:1/135: [ 178.507068] #0: [ 178.507330] ( [ 178.507598] "nvme-wq" [ 178.507726] ){++++.+} [ 178.508079] , at: [ 178.508173] [] process_one_work+0x162/0x6a0 [ 178.509004] #1: [ 178.509265] ( [ 178.509532] (&ctrl->delete_work) [ 178.509795] ){+.+.+.} [ 178.510145] , at: [ 178.510239] [] process_one_work+0x162/0x6a0 [ 178.511070] stack backtrace: : [ 178.511693] CPU: 5 PID: 135 Comm: kworker/5:1 Tainted: G OE 4.9.0-rc4-c844263313a8-lb #3 [ 178.512974] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 [ 178.514247] Workqueue: nvme-wq nvme_del_ctrl_work [nvme_tcp] [ 178.515071] ffffc2668175bae0 ffffffffa7450823 ffffffffa88abd80 ffffffffa88abd80 [ 178.516195] ffffc2668175bb98 ffffffffa70eb012 ffffffffa8d8d90d ffff9c472e9ea700 [ 178.517318] ffff9c472e9ea700 ffff9c4700000000 ffff9c4700007200 ab83be61bec0d50e [ 178.518443] Call Trace: [ 178.518807] [] dump_stack+0x85/0xc2 [ 178.519542] [] __lock_acquire+0x17d2/0x18f0 [ 178.520377] [] ? serial8250_console_putchar+0x27/0x30 [ 178.521330] [] ? wait_for_xmitr+0xa0/0xa0 [ 178.522174] [] ? flush_work+0x18b/0x2d0 [ 178.522975] [] lock_acquire+0x11b/0x220 [ 178.523753] [] ? flush_work+0x1a6/0x2d0 [ 178.524535] [] flush_work+0x1c9/0x2d0 [ 178.525291] [] ? flush_work+0x1a6/0x2d0 [ 178.526077] [] ? flush_workqueue_prep_pwqs+0x220/0x220 [ 178.527040] [] __cancel_work_timer+0x10f/0x1d0 [ 178.527907] [] ? vprintk_default+0x29/0x40 [ 178.528726] [] ? printk+0x48/0x50 [ 178.529434] [] cancel_delayed_work_sync+0x13/0x20 [ 178.530381] [] nvme_stop_ctrl+0x5b/0x70 [nvme_core] [ 178.531314] [] nvme_del_ctrl_work+0x2c/0x50 [nvme_tcp] [ 178.532271] [] process_one_work+0x1e1/0x6a0 [ 178.533101] [] ? process_one_work+0x162/0x6a0 [ 178.533954] [] worker_thread+0x4e/0x490 [ 178.534735] [] ? process_one_work+0x6a0/0x6a0 [ 178.535588] [] ? process_one_work+0x6a0/0x6a0 [ 178.536441] [] kthread+0xff/0x120 [ 178.537149] [] ? kthread_park+0x60/0x60 [ 178.538094] [] ? kthread_park+0x60/0x60 [ 178.538900] [] ret_from_fork+0x2a/0x40 Signed-off-by: Roy Shterman Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 44 +++++++++++++++++++++++++++++++++++++++----- drivers/nvme/host/nvme.h | 2 ++ drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4d8f63b3c5b6..fde6fd2e7eef 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -65,9 +65,26 @@ static bool streams; module_param(streams, bool, 0644); MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); +/* + * nvme_wq - hosts nvme related works that are not reset or delete + * nvme_reset_wq - hosts nvme reset works + * nvme_delete_wq - hosts nvme delete works + * + * nvme_wq will host works such are scan, aen handling, fw activation, + * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq + * runs reset works which also flush works hosted on nvme_wq for + * serialization purposes. nvme_delete_wq host controller deletion + * works which flush reset works for serialization. + */ struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); +struct workqueue_struct *nvme_reset_wq; +EXPORT_SYMBOL_GPL(nvme_reset_wq); + +struct workqueue_struct *nvme_delete_wq; +EXPORT_SYMBOL_GPL(nvme_delete_wq); + static DEFINE_IDA(nvme_subsystems_ida); static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); @@ -89,7 +106,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->reset_work)) + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; return 0; } @@ -123,7 +140,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->delete_work)) + if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) return -EBUSY; return 0; } @@ -3526,16 +3543,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset); int __init nvme_core_init(void) { - int result; + int result = -ENOMEM; nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) - return -ENOMEM; + goto out; + + nvme_reset_wq = alloc_workqueue("nvme-reset-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_reset_wq) + goto destroy_wq; + + nvme_delete_wq = alloc_workqueue("nvme-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_delete_wq) + goto destroy_reset_wq; result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); if (result < 0) - goto destroy_wq; + goto destroy_delete_wq; nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { @@ -3554,8 +3581,13 @@ destroy_class: class_destroy(nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); +destroy_delete_wq: + destroy_workqueue(nvme_delete_wq); +destroy_reset_wq: + destroy_workqueue(nvme_reset_wq); destroy_wq: destroy_workqueue(nvme_wq); +out: return result; } @@ -3565,6 +3597,8 @@ void nvme_core_exit(void) class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + destroy_workqueue(nvme_delete_wq); + destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 77faf2049917..8e7fc1b041b7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -32,6 +32,8 @@ extern unsigned int admin_timeout; #define NVME_KATO_GRACE 10 extern struct workqueue_struct *nvme_wq; +extern struct workqueue_struct *nvme_reset_wq; +extern struct workqueue_struct *nvme_delete_wq; enum { NVME_NS_LBA = 0, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 75d6956eb380..38e183461d9d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2029,7 +2029,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) } mutex_unlock(&nvme_rdma_ctrl_mutex); - flush_workqueue(nvme_wq); + flush_workqueue(nvme_delete_wq); } static struct ib_client nvme_rdma_ib_client = { diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index fdfcc961029f..7991ec3a17db 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -717,7 +717,7 @@ static void __exit nvme_loop_cleanup_module(void) nvme_delete_ctrl(&ctrl->ctrl); mutex_unlock(&nvme_loop_ctrl_mutex); - flush_workqueue(nvme_wq); + flush_workqueue(nvme_delete_wq); } module_init(nvme_loop_init_module); -- cgit v1.2.3 From 8adb8c147b2f6383a1676325c27e3dbc29d2fba7 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 14 Jan 2018 16:14:27 +0900 Subject: nvme: fix comment typos in nvme_create_io_queues fix comment typos in nvme_create_io_queues() like below. _aount_ to _amount_ _an_ to _can_ Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b058b1e9b5bb..13057aee84e6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1633,8 +1633,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) /* * Ignore failing Create SQ/CQ commands, we can continue with less - * than the desired aount of queues, and even a controller without - * I/O queues an still be used to issue admin commands. This might + * than the desired amount of queues, and even a controller without + * I/O queues can still be used to issue admin commands. This might * be useful to upgrade a buggy firmware for example. */ return ret >= 0 ? 0 : ret; -- cgit v1.2.3 From df351ef73789345b4b6c00434c5fd1fca7175643 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 11 Jan 2018 13:38:00 -0800 Subject: nvme-fabrics: fix memory leak when parsing host ID option We use match_strdup() to get a copy of the option string for host ID string, but we just pass it to uuid_parse() and don't store the string pointer, so we need to kfree() the string after parsing it. Signed-off-by: Roland Dreier Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2f68befd31bf..eb46967bb0d5 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -738,7 +738,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } - if (uuid_parse(p, &hostid)) { + ret = uuid_parse(p, &hostid); + kfree(p); + if (ret) { pr_err("Invalid hostid %s\n", p); ret = -EINVAL; goto out; -- cgit v1.2.3 From 423b4487fb23cc9bcbf14f748915bff46151506a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 14 Jan 2018 18:34:22 +0200 Subject: nvmet: release a ns reference in nvmet_req_uninit if needed nvmet_req_init looked up a namespace and took a reference on it (unless it failed prior to that). If the request is uninitialized (in error cases) we need to remove that reference in case it was taken, otherwise we leak namespace reference when calling nvme_req_uninit. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 7282ea8d3b96..0bd737117a80 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -512,6 +512,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sg_cnt = 0; req->transfer_len = 0; req->rsp->status = 0; + req->ns = NULL; /* no support for fused commands yet */ if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) { @@ -557,6 +558,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init); void nvmet_req_uninit(struct nvmet_req *req) { percpu_ref_put(&req->sq->ref); + if (req->ns) + nvmet_put_namespace(req->ns); } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -- cgit v1.2.3 From 1f5c6855260141ac3115e9a065491ee2ac07f9bc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Jan 2018 01:46:25 +0100 Subject: PM / runtime: Check ignore_children in pm_runtime_need_not_resume() Modify pm_runtime_need_not_resume() to make it avoid taking power.child_count for devices with power.ignore_children which is consistent with the runtime PM usage of these fields. Suggested-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- drivers/base/power/runtime.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 84832f1a75bf..cb5e48b86453 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1616,7 +1616,8 @@ void pm_runtime_drop_link(struct device *dev) static bool pm_runtime_need_not_resume(struct device *dev) { return atomic_read(&dev->power.usage_count) <= 1 && - atomic_read(&dev->power.child_count) == 0; + (atomic_read(&dev->power.child_count) == 0 || + dev->power.ignore_children); } /** -- cgit v1.2.3 From e1681599345b8466786b6e54a2db2a00a068a3f3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 14 Jan 2018 21:01:48 +0100 Subject: ACPI / LPSS: Do not instiate platform_dev for devs without MMIO resources acpi_lpss_create_device() skips handling LPSS devices which do not have a mmio resources in their resource list (typically these devices are disabled by the firmware). But since the LPSS code does not bind to the device, acpi_bus_attach() ends up still creating a platform device for it and the regular platform_driver for the ACPI HID still tries to bind to it. This happens e.g. on some boards which do not use the pwm-controller and have an empty or invalid resource-table for it. Currently this causes these error messages to get logged: [ 3.281966] pwm-lpss 80862288:00: invalid resource [ 3.287098] pwm-lpss: probe of 80862288:00 failed with error -22 This commit stops the undesirable creation of a platform_device for disabled LPSS devices by setting pnp.type.platform_id to 0. Note that acpi_scan_attach_handler() also sets pnp.type.platform_id to 0 when there is a matching handler for the device and that handler has no attach callback, so we simply behave as a handler without an attach function in this case. Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index d78c57a95b86..2bcffec8dbf0 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -601,6 +601,8 @@ static int acpi_lpss_create_device(struct acpi_device *adev, acpi_dev_free_resource_list(&resource_list); if (!pdata->mmio_base) { + /* Avoid acpi_bus_attach() instantiating a pdev for this dev. */ + adev->pnp.type.platform_id = 0; /* Skip the device, but continue the namespace scan. */ ret = 0; goto err_out; -- cgit v1.2.3 From abde587b61a3ddb2918385f95ef2b3ca37d5a017 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 15 Jan 2018 16:51:20 +0100 Subject: x86/jailhouse: Add PCI dependency Building jailhouse support without PCI results in a link error: arch/x86/kernel/jailhouse.o: In function `jailhouse_init_platform': jailhouse.c:(.init.text+0x235): undefined reference to `pci_probe' arch/x86/kernel/jailhouse.o: In function `jailhouse_pci_arch_init': jailhouse.c:(.init.text+0x265): undefined reference to `pci_direct_init' jailhouse.c:(.init.text+0x26c): undefined reference to `pcibios_last_bus' Add the missing Kconfig dependency. Fixes: a0c01e4bb92d ("x86/jailhouse: Initialize PCI support") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: Jan Kiszka Link: https://lkml.kernel.org/r/20180115155150.51407-1-arnd@arndb.de --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a936e29245d0..390be2eb153d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -798,7 +798,7 @@ config PARAVIRT_CLOCK config JAILHOUSE_GUEST bool "Jailhouse non-root cell support" - depends on X86_64 + depends on X86_64 && PCI select X86_PM_TIMER ---help--- This option allows to run Linux as guest in a Jailhouse non-root -- cgit v1.2.3 From a0e3a18f4baf8e3754ac1e56f0ade924d0c0c721 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 15 Jan 2018 10:47:09 -0500 Subject: ring-buffer: Bring back context level recursive checks Commit 1a149d7d3f45 ("ring-buffer: Rewrite trace_recursive_(un)lock() to be simpler") replaced the context level recursion checks with a simple counter. This would prevent the ring buffer code from recursively calling itself more than the max number of contexts that exist (Normal, softirq, irq, nmi). But this change caused a lockup in a specific case, which was during suspend and resume using a global clock. Adding a stack dump to see where this occurred, the issue was in the trace global clock itself: trace_buffer_lock_reserve+0x1c/0x50 __trace_graph_entry+0x2d/0x90 trace_graph_entry+0xe8/0x200 prepare_ftrace_return+0x69/0xc0 ftrace_graph_caller+0x78/0xa8 queued_spin_lock_slowpath+0x5/0x1d0 trace_clock_global+0xb0/0xc0 ring_buffer_lock_reserve+0xf9/0x390 The function graph tracer traced queued_spin_lock_slowpath that was called by trace_clock_global. This pointed out that the trace_clock_global() is not reentrant, as it takes a spin lock. It depended on the ring buffer recursive lock from letting that happen. By removing the context detection and adding just a max number of allowable recursions, it allowed the trace_clock_global() to be entered again and try to retake the spinlock it already held, causing a deadlock. Fixes: 1a149d7d3f45 ("ring-buffer: Rewrite trace_recursive_(un)lock() to be simpler") Reported-by: David Weinehall Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 62 +++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9ab18995ff1e..0cddf60186da 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2534,29 +2534,59 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can - * be modified more than once via an interrupt. There are four - * different contexts that we need to consider. + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. * - * Normal context. - * SoftIRQ context - * IRQ context - * NMI context + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. * - * If for some reason the ring buffer starts to recurse, we - * only allow that to happen at most 4 times (one for each - * context). If it happens 5 times, then we consider this a - * recusive loop and do not let it go further. + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. */ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - if (cpu_buffer->current_context >= 4) + unsigned int val = cpu_buffer->current_context; + unsigned long pc = preempt_count(); + int bit; + + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + bit = RB_CTX_NORMAL; + else + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : + pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ; + + if (unlikely(val & (1 << bit))) return 1; - cpu_buffer->current_context++; - /* Interrupts must see this update */ - barrier(); + val |= (1 << bit); + cpu_buffer->current_context = val; return 0; } @@ -2564,9 +2594,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - /* Don't let the dec leak out */ - barrier(); - cpu_buffer->current_context--; + cpu_buffer->current_context &= cpu_buffer->current_context - 1; } /** -- cgit v1.2.3 From d542296a4d0d9f41d0186edcac2baba1b674d02f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 8 Jan 2018 08:23:18 -0800 Subject: 9p: add missing module license for xen transport The 9P of Xen module is missing required license and module information. See https://bugzilla.kernel.org/show_bug.cgi?id=198109 Reported-by: Alan Bartlett Fixes: 868eb122739a ("xen/9pfs: introduce Xen 9pfs transport driver") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/9p/trans_xen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 325c56043007..086a4abdfa7c 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void) return xenbus_unregister_driver(&xen_9pfs_front_driver); } module_exit(p9_trans_xen_exit); + +MODULE_AUTHOR("Stefano Stabellini "); +MODULE_DESCRIPTION("Xen Transport for 9P"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 66940f35d5a81d5969bb5543171c70a434fc5110 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 10 Jan 2018 16:03:05 +0200 Subject: ptr_ring: document usage around __ptr_ring_peek This explains why is the net usage of __ptr_ring_peek actually ok without locks. Signed-off-by: Michael S. Tsirkin Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6866df4f31b5..d72b2e7dd500 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -174,6 +174,15 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. * If ring is never resized, and if the pointer is merely * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. + * However, if called outside the lock, and if some other CPU + * consumes ring entries at the same time, the value returned + * is not guaranteed to be correct. + * In this case - to avoid incorrectly detecting the ring + * as empty - the CPU consuming the ring entries is responsible + * for either consuming all ring entries until the ring is empty, + * or synchronizing with some other CPU and causing it to + * execute __ptr_ring_peek and/or consume the ring enteries + * after the synchronization point. */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { @@ -182,10 +191,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) return NULL; } -/* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). Callers must take consumer_lock - * if the ring is ever resized - see e.g. ptr_ring_empty. - */ +/* See __ptr_ring_peek above for locking rules. */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { return !__ptr_ring_peek(r); -- cgit v1.2.3 From 0171c41835591e9aa2e384b703ef9a6ae367c610 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 10 Jan 2018 16:24:45 +0100 Subject: ppp: unlock all_ppp_mutex before registering device ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices, needs to lock pn->all_ppp_mutex. Therefore we mustn't call register_netdevice() with pn->all_ppp_mutex already locked, or we'd deadlock in case register_netdevice() fails and calls .ndo_uninit(). Fortunately, we can unlock pn->all_ppp_mutex before calling register_netdevice(). This lock protects pn->units_idr, which isn't used in the device registration process. However, keeping pn->all_ppp_mutex locked during device registration did ensure that no device in transient state would be published in pn->units_idr. In practice, unlocking it before calling register_netdevice() doesn't change this property: ppp_unit_register() is called with 'ppp_mutex' locked and all searches done in pn->units_idr hold this lock too. Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index d8e5747ff4e3..264d4af0bf69 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1006,17 +1006,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) if (!ifname_is_set) snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + mutex_unlock(&pn->all_ppp_mutex); + ret = register_netdevice(ppp->dev); if (ret < 0) goto err_unit; atomic_inc(&ppp_unit_count); - mutex_unlock(&pn->all_ppp_mutex); - return 0; err_unit: + mutex_lock(&pn->all_ppp_mutex); unit_put(&pn->units_idr, ppp->file.index); err: mutex_unlock(&pn->all_ppp_mutex); -- cgit v1.2.3 From 6200b430220f3b9207861b16f57916950f4ecd8e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 17:30:22 +0100 Subject: net: cs89x0: add MODULE_LICENSE This driver lacks a MODULE_LICENSE tag, leading to a Kbuild warning: WARNING: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/cirrus/cs89x0.o This adds license, author, and description according to the comment block at the start of the file. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/cirrus/cs89x0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index 410a0a95130b..b3e7fafee3df 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -1913,3 +1913,7 @@ static struct platform_driver cs89x0_driver = { module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe); #endif /* CONFIG_CS89x0_PLATFORM */ + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Crystal Semiconductor (Now Cirrus Logic) CS89[02]0 network driver"); +MODULE_AUTHOR("Russell Nelson "); -- cgit v1.2.3 From 749439bfac6e1a2932c582e2699f91d329658196 Mon Sep 17 00:00:00 2001 From: Mike Maloney Date: Wed, 10 Jan 2018 12:45:10 -0500 Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU The logic in __ip6_append_data() assumes that the MTU is at least large enough for the headers. A device's MTU may be adjusted after being added while sendmsg() is processing data, resulting in __ip6_append_data() seeing any MTU. For an mtu smaller than the size of the fragmentation header, the math results in a negative 'maxfraglen', which causes problems when refragmenting any previous skb in the skb_write_queue, leaving it possibly malformed. Instead sendmsg returns EINVAL when the mtu is calculated to be less than IPV6_MIN_MTU. Found by syzkaller: kernel BUG at ./include/linux/skbuff.h:2064! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip6_finish_skb include/net/ipv6.h:911 [inline] udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x352/0x5a0 net/socket.c:1750 SyS_sendto+0x40/0x50 net/socket.c:1718 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512e9 RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 Reported-by: syzbot Signed-off-by: Mike Maloney Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 688ba5f7516b..8fe58a2d305c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1206,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(&rt->dst); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } + if (mtu < IPV6_MIN_MTU) + return -EINVAL; cork->base.fragsize = mtu; if (dst_allfrag(rt->dst.path)) cork->base.flags |= IPCORK_ALLFRAG; -- cgit v1.2.3 From 59b36613e85fb16ebf9feaf914570879cd5c2a21 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 10 Jan 2018 12:50:25 -0800 Subject: tipc: fix a memory leak in tipc_nl_node_get_link() When tipc_node_find_by_name() fails, the nlmsg is not freed. While on it, switch to a goto label to properly free it. Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node") Reported-by: Dmitry Vyukov Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Cong Wang Acked-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 507017fe0f1b..9036d8756e73 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); - if (!node) - return -EINVAL; + if (!node) { + err = -EINVAL; + goto err_free; + } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; + err = -EINVAL; + goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) -- cgit v1.2.3 From 123af9043e93cb6f235207d260d50f832cdb5439 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 15 Jan 2018 11:08:38 +0300 Subject: ASoC: au1x: Fix timeout tests in au1xac97c_ac97_read() The loop timeout doesn't work because it's a post op and ends with "tmo" set to -1. I changed it from a post-op to a pre-op and I changed the initial the starting value from 5 to 6 so we still iterate 5 times. I left the other as it was because it's a large number. Fixes: b3c70c9ea62a ("ASoC: Alchemy AC97C/I2SC audio support") Signed-off-by: Dan Carpenter Signed-off-by: Mark Brown --- sound/soc/au1x/ac97c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/au1x/ac97c.c b/sound/soc/au1x/ac97c.c index 29a97d52e8ad..66d6c52e7761 100644 --- a/sound/soc/au1x/ac97c.c +++ b/sound/soc/au1x/ac97c.c @@ -91,8 +91,8 @@ static unsigned short au1xac97c_ac97_read(struct snd_ac97 *ac97, do { mutex_lock(&ctx->lock); - tmo = 5; - while ((RD(ctx, AC97_STATUS) & STAT_CP) && tmo--) + tmo = 6; + while ((RD(ctx, AC97_STATUS) & STAT_CP) && --tmo) udelay(21); /* wait an ac97 frame time */ if (!tmo) { pr_debug("ac97rd timeout #1\n"); @@ -105,7 +105,7 @@ static unsigned short au1xac97c_ac97_read(struct snd_ac97 *ac97, * poll, Forrest, poll... */ tmo = 0x10000; - while ((RD(ctx, AC97_STATUS) & STAT_CP) && tmo--) + while ((RD(ctx, AC97_STATUS) & STAT_CP) && --tmo) asm volatile ("nop"); data = RD(ctx, AC97_CMDRESP); -- cgit v1.2.3 From cbbdf8433a5f117b1a2119ea30fc651b61ef7570 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 10 Jan 2018 13:00:39 -0800 Subject: netlink: extack needs to be reset each time through loop syzbot triggered the WARN_ON in netlink_ack testing the bad_attr value. The problem is that netlink_rcv_skb loops over the skb repeatedly invoking the callback and without resetting the extack leaving potentially stale data. Initializing each time through avoids the WARN_ON. Fixes: 2d4bc93368f5a ("netlink: extended ACK reporting") Reported-by: syzbot+315fa6766d0f7c359327@syzkaller.appspotmail.com Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 79cc1bf36e4a..47ef2d8683d6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2384,7 +2384,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, struct netlink_ext_ack *)) { - struct netlink_ext_ack extack = {}; + struct netlink_ext_ack extack; struct nlmsghdr *nlh; int err; @@ -2405,6 +2405,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; + memset(&extack, 0, sizeof(extack)); err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; -- cgit v1.2.3 From abaca806fd13afd069e04e883de8ec75924b0598 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 15 Jan 2018 09:57:39 +0100 Subject: IIO: ADC: stm32-dfsdm: code optimization Use of_device_get_match_data to optimize the source code. No check is needed on dev_data as match table is defined in driver. Signed-off-by: Arnaud Pouliquen Acked-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-adc.c | 9 +-------- drivers/iio/adc/stm32-dfsdm-core.c | 8 +------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 5e871404f565..3fe9b34ac6af 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1087,18 +1087,11 @@ static int stm32_dfsdm_adc_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; const struct stm32_dfsdm_dev_data *dev_data; struct iio_dev *iio; - const struct of_device_id *of_id; char *name; int ret, irq, val; - of_id = of_match_node(stm32_dfsdm_adc_match, np); - if (!of_id->data) { - dev_err(&pdev->dev, "Data associated to device is missing\n"); - return -EINVAL; - } - - dev_data = (const struct stm32_dfsdm_dev_data *)of_id->data; + dev_data = of_device_get_match_data(dev); iio = devm_iio_device_alloc(dev, sizeof(*adc)); if (!iio) { dev_err(dev, "%s: Failed to allocate IIO\n", __func__); diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 6cd655f8239b..6290332cfd3f 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -242,7 +242,6 @@ MODULE_DEVICE_TABLE(of, stm32_dfsdm_of_match); static int stm32_dfsdm_probe(struct platform_device *pdev) { struct dfsdm_priv *priv; - const struct of_device_id *of_id; const struct stm32_dfsdm_dev_data *dev_data; struct stm32_dfsdm *dfsdm; int ret; @@ -253,13 +252,8 @@ static int stm32_dfsdm_probe(struct platform_device *pdev) priv->pdev = pdev; - of_id = of_match_node(stm32_dfsdm_of_match, pdev->dev.of_node); - if (!of_id->data) { - dev_err(&pdev->dev, "Data associated to device is missing\n"); - return -EINVAL; - } + dev_data = of_device_get_match_data(&pdev->dev); - dev_data = (const struct stm32_dfsdm_dev_data *)of_id->data; dfsdm = &priv->dfsdm; dfsdm->fl_list = devm_kcalloc(&pdev->dev, dev_data->num_filters, sizeof(*dfsdm->fl_list), GFP_KERNEL); -- cgit v1.2.3 From 1175d0f9f4031ce02845f6f843f58a9caaee7817 Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 15 Jan 2018 10:00:26 +0100 Subject: IIO: ADC: stm32-dfsdm: fix static check warning iio_priv does not return an error pointer, so check is not valid. Patch suppresses it. Signed-off-by: Arnaud Pouliquen Acked-by: Jonathan Cameron Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-adc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 3fe9b34ac6af..daa026d6a94f 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -1099,10 +1099,6 @@ static int stm32_dfsdm_adc_probe(struct platform_device *pdev) } adc = iio_priv(iio); - if (IS_ERR(adc)) { - dev_err(dev, "%s: Failed to allocate ADC\n", __func__); - return PTR_ERR(adc); - } adc->dfsdm = dev_get_drvdata(dev->parent); iio->dev.parent = dev; -- cgit v1.2.3 From 6503a30440962f1e1ccb8868816b4e18201218d4 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 11 Jan 2018 18:36:26 +0900 Subject: net: ipv4: Make "ip route get" match iif lo rules again. Commit 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup") broke "ip route get" in the presence of rules that specify iif lo. Host-originated traffic always has iif lo, because ip_route_output_key_hash and ip6_route_output_flags set the flow iif to LOOPBACK_IFINDEX. Thus, putting "iif lo" in an ip rule is a convenient way to select only originated traffic and not forwarded traffic. inet_rtm_getroute used to match these rules correctly because even though it sets the flow iif to 0, it called ip_route_output_key which overwrites iif with LOOPBACK_IFINDEX. But now that it calls ip_route_output_key_hash_rcu, the ifindex will remain 0 and not match the iif lo in the rule. As a result, "ip route get" will return ENETUNREACH. Fixes: 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup") Tested: https://android.googlesource.com/kernel/tests/+/master/net/test/multinetwork_test.py passes again Signed-off-by: Lorenzo Colitti Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 43b69af242e1..4e153b23bcec 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { + fl4.flowi4_iif = LOOPBACK_IFINDEX; rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); err = 0; if (IS_ERR(rt)) -- cgit v1.2.3 From 37f47bc90c7481e7959703ad1defc4fc9f5d85e3 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 11 Jan 2018 14:22:06 -0200 Subject: sctp: avoid compiler warning on implicit fallthru These fall-through are expected. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Reviewed-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 1 + net/sctp/outqueue.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 3b18085e3b10..5d4c15bf66d2 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) case AF_INET: if (!__ipv6_only_sock(sctp_opt2sk(sp))) return 1; + /* fallthru */ default: return 0; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 7d67feeeffc1..c4ec99b20150 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -918,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) break; case SCTP_CID_ABORT: - if (sctp_test_T_bit(chunk)) { + if (sctp_test_T_bit(chunk)) packet->vtag = asoc->c.my_vtag; - } + /* fallthru */ /* The following chunks are "response" chunks, i.e. * they are generated in response to something we * received. If we are sending these, then we can -- cgit v1.2.3 From 68e76e034b6b1c1ce2eece1ab8ae4008e14be470 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Jan 2018 11:07:27 -0800 Subject: tracing: Prevent PROFILE_ALL_BRANCHES when FORTIFY_SOURCE=y I regularly get 50 MB - 60 MB files during kernel randconfig builds. These large files mostly contain (many repeats of; e.g., 124,594): In file included from ../include/linux/string.h:6:0, from ../include/linux/uuid.h:20, from ../include/linux/mod_devicetable.h:13, from ../scripts/mod/devicetable-offsets.c:3: ../include/linux/compiler.h:64:4: warning: '______f' is static but declared in inline function 'strcpy' which is not static [enabled by default] ______f = { \ ^ ../include/linux/compiler.h:56:23: note: in expansion of macro '__trace_if' ^ ../include/linux/string.h:425:2: note: in expansion of macro 'if' if (p_size == (size_t)-1 && q_size == (size_t)-1) ^ This only happens when CONFIG_FORTIFY_SOURCE=y and CONFIG_PROFILE_ALL_BRANCHES=y, so prevent PROFILE_ALL_BRANCHES if FORTIFY_SOURCE=y. Link: http://lkml.kernel.org/r/9199446b-a141-c0c3-9678-a3f9107f2750@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 904c952ac383..f54dc62b599c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -355,7 +355,7 @@ config PROFILE_ANNOTATED_BRANCHES on if you need to profile the system's use of these macros. config PROFILE_ALL_BRANCHES - bool "Profile all if conditionals" + bool "Profile all if conditionals" if !FORTIFY_SOURCE select TRACE_BRANCH_PROFILING help This tracer profiles all branch conditions. Every if () -- cgit v1.2.3 From c469652bb5e8fb715db7d152f46d33b3740c9b87 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 15 Jan 2018 10:44:35 +0100 Subject: ALSA: hda - Use IS_REACHABLE() for dependency on input The commit ffcd28d88e4f ("ALSA: hda - Select INPUT for Realtek HD-audio codec") introduced the reverse-selection of CONFIG_INPUT for Realtek codec in order to avoid the mess with dependency between built-in and modules. Later on, we obtained IS_REACHABLE() macro exactly for this kind of problems, and now we can remove th INPUT selection in Kconfig and put IS_REACHABLE(INPUT) to the appropriate places in the code, so that the driver doesn't need to select other subsystem forcibly. Fixes: ffcd28d88e4f ("ALSA: hda - Select INPUT for Realtek HD-audio codec") Reported-by: Randy Dunlap Acked-by: Randy Dunlap # and build-tested Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 1 - sound/pci/hda/patch_realtek.c | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 7f3b5ed81995..f7a492c382d9 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -88,7 +88,6 @@ config SND_HDA_PATCH_LOADER config SND_HDA_CODEC_REALTEK tristate "Build Realtek HD-audio codec support" select SND_HDA_GENERIC - select INPUT help Say Y or M here to include Realtek HD-audio codec support in snd-hda-intel driver, such as ALC880. diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 93d2ce891db6..552646c049fa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3810,6 +3810,7 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec, } } +#if IS_REACHABLE(INPUT) static void gpio2_mic_hotkey_event(struct hda_codec *codec, struct hda_jack_callback *event) { @@ -3942,6 +3943,10 @@ static void alc233_fixup_lenovo_line2_mic_hotkey(struct hda_codec *codec, spec->kb_dev = NULL; } } +#else /* INPUT */ +#define alc280_fixup_hp_gpio2_mic_hotkey NULL +#define alc233_fixup_lenovo_line2_mic_hotkey NULL +#endif /* INPUT */ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) -- cgit v1.2.3 From 95ef498d977bf44ac094778fd448b98af158a3e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Jan 2018 22:31:18 -0800 Subject: ipv6: ip6_make_skb() needs to clear cork.base.dst In my last patch, I missed fact that cork.base.dst was not initialized in ip6_make_skb() : If ip6_setup_cork() returns an error, we might attempt a dst_release() on some random pointer. Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8fe58a2d305c..4f7d8de56611 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1735,6 +1735,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, cork.base.flags = 0; cork.base.addr = 0; cork.base.opt = NULL; + cork.base.dst = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); if (err) { -- cgit v1.2.3 From 30be8f8dba1bd2aff73e8447d59228471233a3d4 Mon Sep 17 00:00:00 2001 From: "r.hering@avm.de" Date: Fri, 12 Jan 2018 15:42:06 +0100 Subject: net/tls: Fix inverted error codes to avoid endless loop sendfile() calls can hang endless with using Kernel TLS if a socket error occurs. Socket error codes must be inverted by Kernel TLS before returning because they are stored with positive sign. If returned non-inverted they are interpreted as number of bytes sent, causing endless looping of the splice mechanic behind sendfile(). Signed-off-by: Robert Hering Signed-off-by: David S. Miller --- include/net/tls.h | 2 +- net/tls/tls_sw.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 936cfc5cab7d..9185e53a743c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -170,7 +170,7 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) static inline void tls_err_abort(struct sock *sk) { - sk->sk_err = -EBADMSG; + sk->sk_err = EBADMSG; sk->sk_error_report(sk); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 73d19210dd49..9773571b6a34 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) while (msg_data_left(msg)) { if (sk->sk_err) { - ret = sk->sk_err; + ret = -sk->sk_err; goto send_end; } @@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page, size_t copy, required_size; if (sk->sk_err) { - ret = sk->sk_err; + ret = -sk->sk_err; goto sendpage_end; } -- cgit v1.2.3 From 95a332088ecb113c2e8753fa3f1df9b0dda9beec Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 12 Jan 2018 12:29:22 -0800 Subject: Revert "openvswitch: Add erspan tunnel support." This reverts commit ceaa001a170e43608854d5290a48064f57b565ed. The OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS attr should be designed as a nested attribute to support all ERSPAN v1 and v2's fields. The current attr is a be32 supporting only one field. Thus, this patch reverts it and later patch will redo it using nested attr. Signed-off-by: William Tu Cc: Jiri Benc Cc: Pravin Shelar Acked-by: Jiri Benc Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 1 - net/openvswitch/flow_netlink.c | 51 +--------------------------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 4265d7f9e1f2..dcfab5e3b55c 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -363,7 +363,6 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, - OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* be32 ERSPAN index. */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 624ea74353dd..f143908b651d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,7 +49,6 @@ #include #include #include -#include #include "flow_netlink.h" @@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void) * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ - + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ - + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ + + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } static size_t ovs_nsh_key_attr_size(void) @@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, - [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, }; static const struct ovs_len_tbl @@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } -static int erspan_tun_opt_from_nlattr(const struct nlattr *attr, - struct sw_flow_match *match, bool is_mask, - bool log) -{ - unsigned long opt_key_offset; - struct erspan_metadata opts; - - BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); - - memset(&opts, 0, sizeof(opts)); - opts.index = nla_get_be32(attr); - - /* Index has only 20-bit */ - if (ntohl(opts.index) & ~INDEX_MASK) { - OVS_NLERR(log, "ERSPAN index number %x too large.", - ntohl(opts.index)); - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask); - opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); - SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), - is_mask); - - return 0; -} - static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; - case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - if (opts_type) { - OVS_NLERR(log, "Multiple metadata blocks provided"); - return -EINVAL; - } - - err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); - if (err) - return err; - - tun_flags |= TUNNEL_ERSPAN_OPT; - opts_type = type; - break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; - else if (output->tun_flags & TUNNEL_ERSPAN_OPT && - nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, - ((struct erspan_metadata *)tun_opts)->index)) - return -EMSGSIZE; } return 0; @@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; - case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: - break; } }; -- cgit v1.2.3 From 17d0fb0caa68f2bfd8aaa8125ff15abebfbfa1d7 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sat, 13 Jan 2018 20:22:01 +0300 Subject: sh_eth: fix dumping ARSTR ARSTR is always located at the start of the TSU register region, thus using add_reg() instead of add_tsu_reg() in __sh_eth_get_regs() to dump it causes EDMR or EDSR (depending on the register layout) to be dumped instead of ARSTR. Use the correct condition/macro there... Fixes: 6b4b4fead342 ("sh_eth: Implement ethtool register dump operations") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index b9e2846589f8..53924a4fc31c 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2089,8 +2089,8 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf) add_reg(CSMR); if (cd->select_mii) add_reg(RMII_MII); - add_reg(ARSTR); if (cd->tsu) { + add_tsu_reg(ARSTR); add_tsu_reg(TSU_CTRST); add_tsu_reg(TSU_FWEN0); add_tsu_reg(TSU_FWEN1); -- cgit v1.2.3 From 096b9854c04df86f03b38a97d40b6506e5730919 Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:50 -0800 Subject: net: Allow neigh contructor functions ability to modify the primary_key Use n->primary_key instead of pkey to account for the possibility that a neigh constructor function may have modified the primary_key value. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d1f5fe986edd..7f831711b6e0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, n1 != NULL; n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { - if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { + if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { if (want_ref) neigh_hold(n1); rc = n1; -- cgit v1.2.3 From cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 Mon Sep 17 00:00:00 2001 From: Jim Westfall Date: Sun, 14 Jan 2018 04:18:51 -0800 Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices to avoid making an entry for every remote ip the device needs to talk to. This used the be the old behavior but became broken in a263b3093641f (ipv4: Make neigh lookups directly in output packet path) and later removed in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point devices) because it was broken. Signed-off-by: Jim Westfall Signed-off-by: David S. Miller --- include/net/arp.h | 3 +++ net/ipv4/arp.c | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/arp.h b/include/net/arp.h index dc8cd47f883b..977aabfcdc03 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = INADDR_ANY; + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a8d7c5a9fb05..6c231b43974d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey) static int arp_constructor(struct neighbour *neigh) { - __be32 addr = *(__be32 *)neigh->primary_key; + __be32 addr; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; + u32 inaddr_any = INADDR_ANY; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); + + addr = *(__be32 *)neigh->primary_key; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (!in_dev) { -- cgit v1.2.3 From 6311b7ce42e0c1d6d944bc099dc47e936c20cf11 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 12:42:25 +0100 Subject: netlink: extack: avoid parenthesized string constant warning NL_SET_ERR_MSG() and NL_SET_ERR_MSG_ATTR() lead to the following warning in newer versions of gcc: warning: array initialized from parenthesized string constant Just remove the parentheses, they're not needed in this context since anyway since there can be no operator precendence issues or similar. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 49b4257ce1ea..f3075d6c7e82 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -85,7 +85,7 @@ struct netlink_ext_ack { * to the lack of an output buffer.) */ #define NL_SET_ERR_MSG(extack, msg) do { \ - static const char __msg[] = (msg); \ + static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) \ @@ -101,7 +101,7 @@ struct netlink_ext_ack { } while (0) #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do { \ - static const char __msg[] = (msg); \ + static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) { \ -- cgit v1.2.3 From ae59c3f0b6cfd472fed96e50548a799b8971d876 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 12 Jan 2018 07:58:39 +0200 Subject: RDMA/mlx5: Fix out-of-bound access while querying AH The rdma_ah_find_type() accesses the port array based on an index controlled by userspace. The existing bounds check is after the first use of the index, so userspace can generate an out of bounds access, as shown by the KASN report below. ================================================================== BUG: KASAN: slab-out-of-bounds in to_rdma_ah_attr+0xa8/0x3b0 Read of size 4 at addr ffff880019ae2268 by task ibv_rc_pingpong/409 CPU: 0 PID: 409 Comm: ibv_rc_pingpong Not tainted 4.15.0-rc2-00031-gb60a3faf5b83-dirty #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 Call Trace: dump_stack+0xe9/0x18f print_address_description+0xa2/0x350 kasan_report+0x3a5/0x400 to_rdma_ah_attr+0xa8/0x3b0 mlx5_ib_query_qp+0xd35/0x1330 ib_query_qp+0x8a/0xb0 ib_uverbs_query_qp+0x237/0x7f0 ib_uverbs_write+0x617/0xd80 __vfs_write+0xf7/0x500 vfs_write+0x149/0x310 SyS_write+0xca/0x190 entry_SYSCALL_64_fastpath+0x18/0x85 RIP: 0033:0x7fe9c7a275a0 RSP: 002b:00007ffee5498738 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fe9c7ce4b00 RCX: 00007fe9c7a275a0 RDX: 0000000000000018 RSI: 00007ffee5498800 RDI: 0000000000000003 RBP: 000055d0c8d3f010 R08: 00007ffee5498800 R09: 0000000000000018 R10: 00000000000000ba R11: 0000000000000246 R12: 0000000000008000 R13: 0000000000004fb0 R14: 000055d0c8d3f050 R15: 00007ffee5498560 Allocated by task 1: __kmalloc+0x3f9/0x430 alloc_mad_private+0x25/0x50 ib_mad_post_receive_mads+0x204/0xa60 ib_mad_init_device+0xa59/0x1020 ib_register_device+0x83a/0xbc0 mlx5_ib_add+0x50e/0x5c0 mlx5_add_device+0x142/0x410 mlx5_register_interface+0x18f/0x210 mlx5_ib_init+0x56/0x63 do_one_initcall+0x15b/0x270 kernel_init_freeable+0x2d8/0x3d0 kernel_init+0x14/0x190 ret_from_fork+0x24/0x30 Freed by task 0: (stack is not available) The buggy address belongs to the object at ffff880019ae2000 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 104 bytes to the right of 512-byte region [ffff880019ae2000, ffff880019ae2200) The buggy address belongs to the page: page:000000005d674e18 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x4000000000008100(slab|head) raw: 4000000000008100 0000000000000000 0000000000000000 00000001000c000c raw: dead000000000100 dead000000000200 ffff88001a402000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880019ae2100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880019ae2180: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc >ffff880019ae2200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff880019ae2280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff880019ae2300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Disabling lock debugging due to kernel taint Cc: Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 31ad28853efa..cffe5966aef9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4362,12 +4362,11 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev, memset(ah_attr, 0, sizeof(*ah_attr)); - ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port); - rdma_ah_set_port_num(ah_attr, path->port); - if (rdma_ah_get_port_num(ah_attr) == 0 || - rdma_ah_get_port_num(ah_attr) > MLX5_CAP_GEN(dev, num_ports)) + if (!path->port || path->port > MLX5_CAP_GEN(dev, num_ports)) return; + ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port); + rdma_ah_set_port_num(ah_attr, path->port); rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf); -- cgit v1.2.3 From 2a0098d70640dda192a79966c14d449e7a34d675 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 08:17:07 -0600 Subject: objtool: Fix seg fault with gold linker Objtool segfaults when the gold linker is used with CONFIG_MODVERSIONS=y and CONFIG_UNWINDER_ORC=y. With CONFIG_MODVERSIONS=y, the .o file gets passed to the linker before being passed to objtool. The gold linker seems to strip unused ELF symbols by default, which confuses objtool and causes the seg fault when it's trying to generate ORC metadata. Objtool should really be running immediately after GCC anyway, without a linker call in between. Change the makefile ordering so that objtool is called before the linker. Reported-and-tested-by: Markus Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Link: http://lkml.kernel.org/r/355f04da33581f4a3bf82e5b512973624a1e23a2.1516025651.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- scripts/Makefile.build | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index e63af4e19382..6bed45dc2cb1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -270,12 +270,18 @@ else objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) endif +ifdef CONFIG_MODVERSIONS +objtool_o = $(@D)/.tmp_$(@F) +else +objtool_o = $(@) +endif + # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file cmd_objtool = $(if $(patsubst y%,, \ $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ - $(__objtool_obj) $(objtool_args) "$(@)";) + $(__objtool_obj) $(objtool_args) "$(objtool_o)";) objtool_obj = $(if $(patsubst y%,, \ $(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \ $(__objtool_obj)) @@ -291,15 +297,15 @@ objtool_dep = $(objtool_obj) \ define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ - $(cmd_modversions_c) \ $(call echo-cmd,objtool) $(cmd_objtool) \ + $(cmd_modversions_c) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ - $(cmd_modversions_S) \ - $(call echo-cmd,objtool) $(cmd_objtool) + $(call echo-cmd,objtool) $(cmd_objtool) \ + $(cmd_modversions_S) endef # List module undefined symbols (or empty line if not enabled) -- cgit v1.2.3 From 385d11b152c4eb638eeb769edcb3249533bb9a00 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 15 Jan 2018 08:17:08 -0600 Subject: objtool: Improve error message for bad file argument If a nonexistent file is supplied to objtool, it complains with a non-helpful error: open: No such file or directory Improve it to: objtool: Can't open 'foo': No such file or directory Reported-by: Markus Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/406a3d00a21225eee2819844048e17f68523ccf6.1516025651.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/elf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 24460155c82c..c1c338661699 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "elf.h" #include "warn.h" @@ -358,7 +359,8 @@ struct elf *elf_open(const char *name, int flags) elf->fd = open(name, flags); if (elf->fd == -1) { - perror("open"); + fprintf(stderr, "objtool: Can't open '%s': %s\n", + name, strerror(errno)); goto err; } -- cgit v1.2.3 From 1303880179e67c59e801429b7e5d0f6b21137d99 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 10 Jan 2018 13:25:56 -0600 Subject: x86/mm: Clean up register saving in the __enc_copy() assembly code Clean up the use of PUSH and POP and when registers are saved in the __enc_copy() assembly function in order to improve the readability of the code. Move parameter register saving into general purpose registers earlier in the code and move all the pushes to the beginning of the function with corresponding pops at the end. We do this to prepare fixes. Tested-by: Gabriel Craciunescu Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brijesh Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180110192556.6026.74187.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt_boot.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 730e6d541df1..de3688461145 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -103,20 +103,19 @@ ENTRY(__enc_copy) orq $X86_CR4_PGE, %rdx mov %rdx, %cr4 + push %r15 + + movq %rcx, %r9 /* Save kernel length */ + movq %rdi, %r10 /* Save encrypted kernel address */ + movq %rsi, %r11 /* Save decrypted kernel address */ + /* Set the PAT register PA5 entry to write-protect */ - push %rcx movl $MSR_IA32_CR_PAT, %ecx rdmsr - push %rdx /* Save original PAT value */ + mov %rdx, %r15 /* Save original PAT value */ andl $0xffff00ff, %edx /* Clear PA5 */ orl $0x00000500, %edx /* Set PA5 to WP */ wrmsr - pop %rdx /* RDX contains original PAT value */ - pop %rcx - - movq %rcx, %r9 /* Save kernel length */ - movq %rdi, %r10 /* Save encrypted kernel address */ - movq %rsi, %r11 /* Save decrypted kernel address */ wbinvd /* Invalidate any cache entries */ @@ -138,12 +137,13 @@ ENTRY(__enc_copy) jnz 1b /* Kernel length not zero? */ /* Restore PAT register */ - push %rdx /* Save original PAT value */ movl $MSR_IA32_CR_PAT, %ecx rdmsr - pop %rdx /* Restore original PAT value */ + mov %r15, %rdx /* Restore original PAT value */ wrmsr + pop %r15 + ret .L__enc_copy_end: ENDPROC(__enc_copy) -- cgit v1.2.3 From bacf6b499e11760aef73a3bb5ce4e5eea74a3fd4 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 10 Jan 2018 13:26:05 -0600 Subject: x86/mm: Use a struct to reduce parameters for SME PGD mapping In preparation for follow-on patches, combine the PGD mapping parameters into a struct to reduce the number of function arguments and allow for direct updating of the next pagetable mapping area pointer. Tested-by: Gabriel Craciunescu Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brijesh Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180110192605.6026.96206.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt.c | 90 ++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..5a20696c5440 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -464,6 +464,14 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); } +struct sme_populate_pgd_data { + void *pgtable_area; + pgd_t *pgd; + + pmdval_t pmd_val; + unsigned long vaddr; +}; + static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, unsigned long end) { @@ -486,15 +494,14 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, #define PUD_FLAGS _KERNPG_TABLE_NOENC #define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) -static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, - unsigned long vaddr, pmdval_t pmd_val) +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { pgd_t *pgd_p; p4d_t *p4d_p; pud_t *pud_p; pmd_t *pmd_p; - pgd_p = pgd_base + pgd_index(vaddr); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); if (native_pgd_val(*pgd_p)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); @@ -504,15 +511,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, pgd_t pgd; if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p = pgtable_area; + p4d_p = ppd->pgtable_area; memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); - pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; + ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); } else { - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); } @@ -520,44 +527,41 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, } if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p += p4d_index(vaddr); + p4d_p += p4d_index(ppd->vaddr); if (native_p4d_val(*p4d_p)) { pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); } else { p4d_t p4d; - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); native_set_p4d(p4d_p, p4d); } } - pud_p += pud_index(vaddr); + pud_p += pud_index(ppd->vaddr); if (native_pud_val(*pud_p)) { if (native_pud_val(*pud_p) & _PAGE_PSE) - goto out; + return; pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); } else { pud_t pud; - pmd_p = pgtable_area; + pmd_p = ppd->pgtable_area; memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); - pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; + ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); native_set_pud(pud_p, pud); } - pmd_p += pmd_index(vaddr); + pmd_p += pmd_index(ppd->vaddr); if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) - native_set_pmd(pmd_p, native_make_pmd(pmd_val)); - -out: - return pgtable_area; + native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val)); } static unsigned long __init sme_pgtable_calc(unsigned long len) @@ -615,11 +619,10 @@ void __init sme_encrypt_kernel(void) unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; unsigned long kernel_start, kernel_end, kernel_len; + struct sme_populate_pgd_data ppd; unsigned long pgtable_area_len; unsigned long paddr, pmd_flags; unsigned long decrypted_base; - void *pgtable_area; - pgd_t *pgd; if (!sme_active()) return; @@ -683,18 +686,18 @@ void __init sme_encrypt_kernel(void) * pagetables and when the new encrypted and decrypted kernel * mappings are populated. */ - pgtable_area = (void *)execute_end; + ppd.pgtable_area = (void *)execute_end; /* * Make sure the current pagetable structure has entries for * addressing the workarea. */ - pgd = (pgd_t *)native_read_cr3_pa(); + ppd.pgd = (pgd_t *)native_read_cr3_pa(); paddr = workarea_start; while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); + ppd.pmd_val = paddr + PMD_FLAGS; + ppd.vaddr = paddr; + sme_populate_pgd_large(&ppd); paddr += PMD_PAGE_SIZE; } @@ -708,17 +711,17 @@ void __init sme_encrypt_kernel(void) * populated with new PUDs and PMDs as the encrypted and decrypted * kernel mappings are created. */ - pgd = pgtable_area; - memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); - pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; + ppd.pgd = ppd.pgtable_area; + memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); + ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; /* Add encrypted kernel (identity) mappings */ pmd_flags = PMD_FLAGS | _PAGE_ENC; paddr = kernel_start; while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + pmd_flags); + ppd.pmd_val = paddr + pmd_flags; + ppd.vaddr = paddr; + sme_populate_pgd_large(&ppd); paddr += PMD_PAGE_SIZE; } @@ -736,9 +739,9 @@ void __init sme_encrypt_kernel(void) pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); paddr = kernel_start; while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + pmd_flags); + ppd.pmd_val = paddr + pmd_flags; + ppd.vaddr = paddr + decrypted_base; + sme_populate_pgd_large(&ppd); paddr += PMD_PAGE_SIZE; } @@ -746,30 +749,29 @@ void __init sme_encrypt_kernel(void) /* Add decrypted workarea mappings to both kernel mappings */ paddr = workarea_start; while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); + ppd.pmd_val = paddr + PMD_FLAGS; + ppd.vaddr = paddr; + sme_populate_pgd_large(&ppd); - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + PMD_FLAGS); + ppd.vaddr = paddr + decrypted_base; + sme_populate_pgd_large(&ppd); paddr += PMD_PAGE_SIZE; } /* Perform the encryption */ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, - kernel_len, workarea_start, (unsigned long)pgd); + kernel_len, workarea_start, (unsigned long)ppd.pgd); /* * At this point we are running encrypted. Remove the mappings for * the decrypted areas - all that is needed for this is to remove * the PGD entry/entries. */ - sme_clear_pgd(pgd, kernel_start + decrypted_base, + sme_clear_pgd(ppd.pgd, kernel_start + decrypted_base, kernel_end + decrypted_base); - sme_clear_pgd(pgd, workarea_start + decrypted_base, + sme_clear_pgd(ppd.pgd, workarea_start + decrypted_base, workarea_end + decrypted_base); /* Flush the TLB - no globals so cr3 is enough */ -- cgit v1.2.3 From 2b5d00b6c2cdd94f6d6a494a6f6c0c0fc7b8e711 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 10 Jan 2018 13:26:16 -0600 Subject: x86/mm: Centralize PMD flags in sme_encrypt_kernel() In preparation for encrypting more than just the kernel during early boot processing, centralize the use of the PMD flag settings based on the type of mapping desired. When 4KB aligned encryption is added, this will allow either PTE flags or large page PMD flags to be used without requiring the caller to adjust. Tested-by: Gabriel Craciunescu Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brijesh Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180110192615.6026.14767.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt.c | 133 +++++++++++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 56 deletions(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 5a20696c5440..35f38caa1fa3 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -468,31 +468,39 @@ struct sme_populate_pgd_data { void *pgtable_area; pgd_t *pgd; - pmdval_t pmd_val; + pmdval_t pmd_flags; + unsigned long paddr; + unsigned long vaddr; + unsigned long vaddr_end; }; -static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, - unsigned long end) +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; - pgd_start = start & PGDIR_MASK; - pgd_end = end & PGDIR_MASK; + pgd_start = ppd->vaddr & PGDIR_MASK; + pgd_end = ppd->vaddr_end & PGDIR_MASK; - pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); - pgd_size *= sizeof(pgd_t); + pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); - pgd_p = pgd_base + pgd_index(start); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); memset(pgd_p, 0, pgd_size); } -#define PGD_FLAGS _KERNPG_TABLE_NOENC -#define P4D_FLAGS _KERNPG_TABLE_NOENC -#define PUD_FLAGS _KERNPG_TABLE_NOENC -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) +#define PGD_FLAGS _KERNPG_TABLE_NOENC +#define P4D_FLAGS _KERNPG_TABLE_NOENC +#define PUD_FLAGS _KERNPG_TABLE_NOENC + +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) + +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) { @@ -561,7 +569,35 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) pmd_p += pmd_index(ppd->vaddr); if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) - native_set_pmd(pmd_p, native_make_pmd(ppd->pmd_val)); + native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); +} + +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags) +{ + ppd->pmd_flags = pmd_flags; + + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd_large(ppd); + + ppd->vaddr += PMD_PAGE_SIZE; + ppd->paddr += PMD_PAGE_SIZE; + } +} + +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_ENC); +} + +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC); +} + +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC_WP); } static unsigned long __init sme_pgtable_calc(unsigned long len) @@ -621,7 +657,6 @@ void __init sme_encrypt_kernel(void) unsigned long kernel_start, kernel_end, kernel_len; struct sme_populate_pgd_data ppd; unsigned long pgtable_area_len; - unsigned long paddr, pmd_flags; unsigned long decrypted_base; if (!sme_active()) @@ -693,14 +728,10 @@ void __init sme_encrypt_kernel(void) * addressing the workarea. */ ppd.pgd = (pgd_t *)native_read_cr3_pa(); - paddr = workarea_start; - while (paddr < workarea_end) { - ppd.pmd_val = paddr + PMD_FLAGS; - ppd.vaddr = paddr; - sme_populate_pgd_large(&ppd); - - paddr += PMD_PAGE_SIZE; - } + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); @@ -715,17 +746,6 @@ void __init sme_encrypt_kernel(void) memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; - /* Add encrypted kernel (identity) mappings */ - pmd_flags = PMD_FLAGS | _PAGE_ENC; - paddr = kernel_start; - while (paddr < kernel_end) { - ppd.pmd_val = paddr + pmd_flags; - ppd.vaddr = paddr; - sme_populate_pgd_large(&ppd); - - paddr += PMD_PAGE_SIZE; - } - /* * A different PGD index/entry must be used to get different * pagetable entries for the decrypted mapping. Choose the next @@ -735,29 +755,28 @@ void __init sme_encrypt_kernel(void) decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); decrypted_base <<= PGDIR_SHIFT; + /* Add encrypted kernel (identity) mappings */ + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start; + ppd.vaddr_end = kernel_end; + sme_map_range_encrypted(&ppd); + /* Add decrypted, write-protected kernel (non-identity) mappings */ - pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); - paddr = kernel_start; - while (paddr < kernel_end) { - ppd.pmd_val = paddr + pmd_flags; - ppd.vaddr = paddr + decrypted_base; - sme_populate_pgd_large(&ppd); - - paddr += PMD_PAGE_SIZE; - } + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); /* Add decrypted workarea mappings to both kernel mappings */ - paddr = workarea_start; - while (paddr < workarea_end) { - ppd.pmd_val = paddr + PMD_FLAGS; - ppd.vaddr = paddr; - sme_populate_pgd_large(&ppd); - - ppd.vaddr = paddr + decrypted_base; - sme_populate_pgd_large(&ppd); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); - paddr += PMD_PAGE_SIZE; - } + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_map_range_decrypted(&ppd); /* Perform the encryption */ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, @@ -768,11 +787,13 @@ void __init sme_encrypt_kernel(void) * the decrypted areas - all that is needed for this is to remove * the PGD entry/entries. */ - sme_clear_pgd(ppd.pgd, kernel_start + decrypted_base, - kernel_end + decrypted_base); + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_clear_pgd(&ppd); - sme_clear_pgd(ppd.pgd, workarea_start + decrypted_base, - workarea_end + decrypted_base); + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_clear_pgd(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); -- cgit v1.2.3 From cc5f01e28d6c60f274fd1e33b245f679f79f543c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 10 Jan 2018 13:26:26 -0600 Subject: x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption In preparation for encrypting more than just the kernel, the encryption support in sme_encrypt_kernel() needs to support 4KB page aligned encryption instead of just 2MB large page aligned encryption. Update the routines that populate the PGD to support non-2MB aligned addresses. This is done by creating PTE page tables for the start and end portion of the address range that fall outside of the 2MB alignment. This results in, at most, two extra pages to hold the PTE entries for each mapping of a range. Tested-by: Gabriel Craciunescu Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brijesh Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180110192626.6026.75387.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/mm/mem_encrypt.c | 123 +++++++++++++++++++++++++++++++++++------ arch/x86/mm/mem_encrypt_boot.S | 20 +++++-- 2 files changed, 121 insertions(+), 22 deletions(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 35f38caa1fa3..e74a1722d438 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -469,6 +469,7 @@ struct sme_populate_pgd_data { pgd_t *pgd; pmdval_t pmd_flags; + pteval_t pte_flags; unsigned long paddr; unsigned long vaddr; @@ -493,6 +494,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) #define PGD_FLAGS _KERNPG_TABLE_NOENC #define P4D_FLAGS _KERNPG_TABLE_NOENC #define PUD_FLAGS _KERNPG_TABLE_NOENC +#define PMD_FLAGS _KERNPG_TABLE_NOENC #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) @@ -502,7 +504,15 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) + +#define PTE_FLAGS_DEC PTE_FLAGS +#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) + +static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd_p; p4d_t *p4d_p; @@ -553,7 +563,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) pud_p += pud_index(ppd->vaddr); if (native_pud_val(*pud_p)) { if (native_pud_val(*pud_p) & _PAGE_PSE) - return; + return NULL; pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); } else { @@ -567,16 +577,55 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) native_set_pud(pud_p, pud); } + return pmd_p; +} + +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +{ + pmd_t *pmd_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + pmd_p += pmd_index(ppd->vaddr); if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); } -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, - pmdval_t pmd_flags) +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) { - ppd->pmd_flags = pmd_flags; + pmd_t *pmd_p; + pte_t *pte_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + + pmd_p += pmd_index(ppd->vaddr); + if (native_pmd_val(*pmd_p)) { + if (native_pmd_val(*pmd_p) & _PAGE_PSE) + return; + + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); + } else { + pmd_t pmd; + pte_p = ppd->pgtable_area; + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; + + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); + native_set_pmd(pmd_p, pmd); + } + + pte_p += pte_index(ppd->vaddr); + if (!native_pte_val(*pte_p)) + native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); +} + +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +{ while (ppd->vaddr < ppd->vaddr_end) { sme_populate_pgd_large(ppd); @@ -585,33 +634,71 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, } } +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd(ppd); + + ppd->vaddr += PAGE_SIZE; + ppd->paddr += PAGE_SIZE; + } +} + +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags, pteval_t pte_flags) +{ + unsigned long vaddr_end; + + ppd->pmd_flags = pmd_flags; + ppd->pte_flags = pte_flags; + + /* Save original end value since we modify the struct value */ + vaddr_end = ppd->vaddr_end; + + /* If start is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + __sme_map_range_pte(ppd); + + /* Create PMD entries */ + ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + __sme_map_range_pmd(ppd); + + /* If end is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = vaddr_end; + __sme_map_range_pte(ppd); +} + static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) { - __sme_map_range(ppd, PMD_FLAGS_ENC); + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); } static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) { - __sme_map_range(ppd, PMD_FLAGS_DEC); + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); } static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) { - __sme_map_range(ppd, PMD_FLAGS_DEC_WP); + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } static unsigned long __init sme_pgtable_calc(unsigned long len) { - unsigned long p4d_size, pud_size, pmd_size; + unsigned long p4d_size, pud_size, pmd_size, pte_size; unsigned long total; /* * Perform a relatively simplistic calculation of the pagetable - * entries that are needed. That mappings will be covered by 2MB - * PMD entries so we can conservatively calculate the required + * entries that are needed. Those mappings will be covered mostly + * by 2MB PMD entries so we can conservatively calculate the required * number of P4D, PUD and PMD structures needed to perform the - * mappings. Incrementing the count for each covers the case where - * the addresses cross entries. + * mappings. For mappings that are not 2MB aligned, PTE mappings + * would be needed for the start and end portion of the address range + * that fall outside of the 2MB alignment. This results in, at most, + * two extra pages to hold PTE entries for each range that is mapped. + * Incrementing the count for each covers the case where the addresses + * cross entries. */ if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; @@ -625,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) } pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; - total = p4d_size + pud_size + pmd_size; + total = p4d_size + pud_size + pmd_size + pte_size; /* * Now calculate the added pagetable structures needed to populate @@ -709,10 +797,13 @@ void __init sme_encrypt_kernel(void) /* * The total workarea includes the executable encryption area and - * the pagetable area. + * the pagetable area. The start of the workarea is already 2MB + * aligned, align the end of the workarea on a 2MB boundary so that + * we don't try to create/allocate PTE entries from the workarea + * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = workarea_start + workarea_len; + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); /* * Set the address to the start of where newly created pagetable diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index de3688461145..23a8a9e411ea 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -104,6 +104,7 @@ ENTRY(__enc_copy) mov %rdx, %cr4 push %r15 + push %r12 movq %rcx, %r9 /* Save kernel length */ movq %rdi, %r10 /* Save encrypted kernel address */ @@ -119,21 +120,27 @@ ENTRY(__enc_copy) wbinvd /* Invalidate any cache entries */ - /* Copy/encrypt 2MB at a time */ + /* Copy/encrypt up to 2MB at a time */ + movq $PMD_PAGE_SIZE, %r12 1: + cmpq %r12, %r9 + jnb 2f + movq %r9, %r12 + +2: movq %r11, %rsi /* Source - decrypted kernel */ movq %r8, %rdi /* Dest - intermediate copy buffer */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r12, %rcx rep movsb movq %r8, %rsi /* Source - intermediate copy buffer */ movq %r10, %rdi /* Dest - encrypted kernel */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r12, %rcx rep movsb - addq $PMD_PAGE_SIZE, %r11 - addq $PMD_PAGE_SIZE, %r10 - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ + addq %r12, %r11 + addq %r12, %r10 + subq %r12, %r9 /* Kernel length decrement */ jnz 1b /* Kernel length not zero? */ /* Restore PAT register */ @@ -142,6 +149,7 @@ ENTRY(__enc_copy) mov %r15, %rdx /* Restore original PAT value */ wrmsr + pop %r12 pop %r15 ret -- cgit v1.2.3 From 107cd2532181b96c549e8f224cdcca8631c3076b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 10 Jan 2018 13:26:34 -0600 Subject: x86/mm: Encrypt the initrd earlier for BSP microcode update Currently the BSP microcode update code examines the initrd very early in the boot process. If SME is active, the initrd is treated as being encrypted but it has not been encrypted (in place) yet. Update the early boot code that encrypts the kernel to also encrypt the initrd so that early BSP microcode updates work. Tested-by: Gabriel Craciunescu Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brijesh Singh Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180110192634.6026.10452.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mem_encrypt.h | 4 +-- arch/x86/kernel/head64.c | 4 +-- arch/x86/kernel/setup.c | 10 ------ arch/x86/mm/mem_encrypt.c | 66 +++++++++++++++++++++++++++++++++----- arch/x86/mm/mem_encrypt_boot.S | 46 +++++++++++++------------- 5 files changed, 85 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9459a4c3c68..22c5f3e6f820 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); -void __init sme_encrypt_kernel(void); +void __init sme_encrypt_kernel(struct boot_params *bp); void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); @@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_encrypt_kernel(void) { } +static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } static inline void __init sme_enable(struct boot_params *bp) { } static inline bool sme_active(void) { return false; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a5d757b9cfd..7ba5d819ebe3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr, p = fixup_pointer(&phys_base, physaddr); *p += load_delta - sme_get_me_mask(); - /* Encrypt the kernel (if SME is active) */ - sme_encrypt_kernel(); + /* Encrypt the kernel and related (if SME is active) */ + sme_encrypt_kernel(bp); /* * Return the SME encryption mask (if SME is active) to be used as a diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 145810b0edf6..68d7ab81c62f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -364,16 +364,6 @@ static void __init reserve_initrd(void) !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ - /* - * If SME is active, this memory will be marked encrypted by the - * kernel when it is accessed (including relocation). However, the - * ramdisk image was loaded decrypted by the bootloader, so make - * sure that it is encrypted before accessing it. For SEV the - * ramdisk will already be encrypted, so only do this for SME. - */ - if (sme_active()) - sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); - initrd_start = 0; mapped_size = memblock_mem_size(max_pfn_mapped); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index e74a1722d438..3ef362f598e3 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -738,11 +738,12 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return total; } -void __init sme_encrypt_kernel(void) +void __init sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; unsigned long kernel_start, kernel_end, kernel_len; + unsigned long initrd_start, initrd_end, initrd_len; struct sme_populate_pgd_data ppd; unsigned long pgtable_area_len; unsigned long decrypted_base; @@ -751,14 +752,15 @@ void __init sme_encrypt_kernel(void) return; /* - * Prepare for encrypting the kernel by building new pagetables with - * the necessary attributes needed to encrypt the kernel in place. + * Prepare for encrypting the kernel and initrd by building new + * pagetables with the necessary attributes needed to encrypt the + * kernel in place. * * One range of virtual addresses will map the memory occupied - * by the kernel as encrypted. + * by the kernel and initrd as encrypted. * * Another range of virtual addresses will map the memory occupied - * by the kernel as decrypted and write-protected. + * by the kernel and initrd as decrypted and write-protected. * * The use of write-protect attribute will prevent any of the * memory from being cached. @@ -769,6 +771,20 @@ void __init sme_encrypt_kernel(void) kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); kernel_len = kernel_end - kernel_start; + initrd_start = 0; + initrd_end = 0; + initrd_len = 0; +#ifdef CONFIG_BLK_DEV_INITRD + initrd_len = (unsigned long)bp->hdr.ramdisk_size | + ((unsigned long)bp->ext_ramdisk_size << 32); + if (initrd_len) { + initrd_start = (unsigned long)bp->hdr.ramdisk_image | + ((unsigned long)bp->ext_ramdisk_image << 32); + initrd_end = PAGE_ALIGN(initrd_start + initrd_len); + initrd_len = initrd_end - initrd_start; + } +#endif + /* Set the encryption workarea to be immediately after the kernel */ workarea_start = kernel_end; @@ -791,6 +807,8 @@ void __init sme_encrypt_kernel(void) */ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; + if (initrd_len) + pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; /* PUDs and PMDs needed in the current pagetables for the workarea */ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); @@ -829,9 +847,9 @@ void __init sme_encrypt_kernel(void) /* * A new pagetable structure is being built to allow for the kernel - * to be encrypted. It starts with an empty PGD that will then be - * populated with new PUDs and PMDs as the encrypted and decrypted - * kernel mappings are created. + * and initrd to be encrypted. It starts with an empty PGD that will + * then be populated with new PUDs and PMDs as the encrypted and + * decrypted kernel mappings are created. */ ppd.pgd = ppd.pgtable_area; memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); @@ -844,6 +862,12 @@ void __init sme_encrypt_kernel(void) * the base of the mapping. */ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); + if (initrd_len) { + unsigned long check_base; + + check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); + decrypted_base = max(decrypted_base, check_base); + } decrypted_base <<= PGDIR_SHIFT; /* Add encrypted kernel (identity) mappings */ @@ -858,6 +882,21 @@ void __init sme_encrypt_kernel(void) ppd.vaddr_end = kernel_end + decrypted_base; sme_map_range_decrypted_wp(&ppd); + if (initrd_len) { + /* Add encrypted initrd (identity) mappings */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start; + ppd.vaddr_end = initrd_end; + sme_map_range_encrypted(&ppd); + /* + * Add decrypted, write-protected initrd (non-identity) mappings + */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); + } + /* Add decrypted workarea mappings to both kernel mappings */ ppd.paddr = workarea_start; ppd.vaddr = workarea_start; @@ -873,6 +912,11 @@ void __init sme_encrypt_kernel(void) sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, kernel_len, workarea_start, (unsigned long)ppd.pgd); + if (initrd_len) + sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, + initrd_len, workarea_start, + (unsigned long)ppd.pgd); + /* * At this point we are running encrypted. Remove the mappings for * the decrypted areas - all that is needed for this is to remove @@ -882,6 +926,12 @@ void __init sme_encrypt_kernel(void) ppd.vaddr_end = kernel_end + decrypted_base; sme_clear_pgd(&ppd); + if (initrd_len) { + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_clear_pgd(&ppd); + } + ppd.vaddr = workarea_start + decrypted_base; ppd.vaddr_end = workarea_end + decrypted_base; sme_clear_pgd(&ppd); diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 23a8a9e411ea..01f682cf77a8 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute) /* * Entry parameters: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping - * RDX - length of kernel + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping + * RDX - length to encrypt * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) @@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute) addq $PAGE_SIZE, %rax /* Workarea encryption routine */ push %r12 - movq %rdi, %r10 /* Encrypted kernel */ - movq %rsi, %r11 /* Decrypted kernel */ - movq %rdx, %r12 /* Kernel length */ + movq %rdi, %r10 /* Encrypted area */ + movq %rsi, %r11 /* Decrypted area */ + movq %rdx, %r12 /* Area length */ /* Copy encryption routine into the workarea */ movq %rax, %rdi /* Workarea encryption routine */ @@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute) rep movsb /* Setup registers for call */ - movq %r10, %rdi /* Encrypted kernel */ - movq %r11, %rsi /* Decrypted kernel */ + movq %r10, %rdi /* Encrypted area */ + movq %r11, %rsi /* Decrypted area */ movq %r8, %rdx /* Pagetables used for encryption */ - movq %r12, %rcx /* Kernel length */ + movq %r12, %rcx /* Area length */ movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ @@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute) ENTRY(__enc_copy) /* - * Routine used to encrypt kernel. + * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since * the kernel will be encrypted during the process. So this * routine is defined here and then copied to an area outside @@ -79,19 +79,19 @@ ENTRY(__enc_copy) * during execution. * * On entry the registers must be: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping * RDX - address of the pagetables to use for encryption - * RCX - length of kernel + * RCX - length of area * R8 - intermediate copy buffer * * RAX - points to this routine * - * The kernel will be encrypted by copying from the non-encrypted - * kernel space to an intermediate buffer and then copying from the - * intermediate buffer back to the encrypted kernel space. The physical - * addresses of the two kernel space mappings are the same which - * results in the kernel being encrypted "in place". + * The area will be encrypted by copying from the non-encrypted + * memory space to an intermediate buffer and then copying from the + * intermediate buffer back to the encrypted memory space. The physical + * addresses of the two mappings are the same which results in the area + * being encrypted "in place". */ /* Enable the new page tables */ mov %rdx, %cr3 @@ -106,9 +106,9 @@ ENTRY(__enc_copy) push %r15 push %r12 - movq %rcx, %r9 /* Save kernel length */ - movq %rdi, %r10 /* Save encrypted kernel address */ - movq %rsi, %r11 /* Save decrypted kernel address */ + movq %rcx, %r9 /* Save area length */ + movq %rdi, %r10 /* Save encrypted area address */ + movq %rsi, %r11 /* Save decrypted area address */ /* Set the PAT register PA5 entry to write-protect */ movl $MSR_IA32_CR_PAT, %ecx @@ -128,13 +128,13 @@ ENTRY(__enc_copy) movq %r9, %r12 2: - movq %r11, %rsi /* Source - decrypted kernel */ + movq %r11, %rsi /* Source - decrypted area */ movq %r8, %rdi /* Dest - intermediate copy buffer */ movq %r12, %rcx rep movsb movq %r8, %rsi /* Source - intermediate copy buffer */ - movq %r10, %rdi /* Dest - encrypted kernel */ + movq %r10, %rdi /* Dest - encrypted area */ movq %r12, %rcx rep movsb -- cgit v1.2.3 From ae67badaa1643253998cb21d5782e4ea7c231a29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 14 Jan 2018 23:30:51 +0100 Subject: hrtimer: Optimize the hrtimer code by using static keys for migration_enable/nohz_active The hrtimer_cpu_base::migration_enable and ::nohz_active fields were originally introduced to avoid accessing global variables for these decisions. Still that results in a (cache hot) load and conditional branch, which can be avoided by using static keys. Implement it with static keys and optimize for the most critical case of high performance networking which tends to disable the timer migration functionality. No change in functionality. Signed-off-by: Thomas Gleixner Cc: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: Frederic Weisbecker Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1801142327490.2371@nanos Link: https://lkml.kernel.org/r/20171221104205.7269-2-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 --- kernel/time/hrtimer.c | 17 +++------- kernel/time/tick-internal.h | 19 +++++++---- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 83 +++++++++++++++++++++++---------------------- 5 files changed, 60 insertions(+), 65 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 012c37fdb688..79b2a8d29d8c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -153,8 +153,6 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @migration_enabled: The migration of hrtimers to other cpus is enabled - * @nohz_active: The nohz functionality is enabled * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @next_timer: Pointer to the first expiring timer @@ -178,8 +176,6 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - bool migration_enabled; - bool nohz_active; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hres_active : 1, diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d32520840fde..1d06d2bde733 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -178,23 +178,16 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) #endif } -#ifdef CONFIG_NO_HZ_COMMON -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) -{ - if (pinned || !base->migration_enabled) - return base; - return &per_cpu(hrtimer_bases, get_nohz_timer_target()); -} -#else static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && !pinned) + return &per_cpu(hrtimer_bases, get_nohz_timer_target()); +#endif return base; } -#endif /* * We switch the timer base to a power-optimized selected CPU target, @@ -969,7 +962,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Kick to reschedule the next tick to handle the new timer * on dynticks target. */ - if (new_base->cpu_base->nohz_active) + if (is_timers_nohz_active()) wake_up_nohz_cpu(new_base->cpu_base->cpu); } else { hrtimer_reprogram(timer, new_base); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f8e1845aa464..f690628e068c 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -150,14 +150,19 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; -#else +extern void timers_update_nohz(void); +extern struct static_key_false timers_nohz_active; +static inline bool is_timers_nohz_active(void) +{ + return static_branch_likely(&timers_nohz_active); +} +# ifdef CONFIG_SMP +extern struct static_key_false timers_migration_enabled; +# endif +#else /* CONFIG_NO_HZ_COMMON */ +static inline void timers_update_nohz(void) { } #define tick_nohz_active (0) -#endif - -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) -extern void timers_update_migration(bool update_nohz); -#else -static inline void timers_update_migration(bool update_nohz) { } +static inline bool is_timers_nohz_active(void) { return false; } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f7cc7abfcf25..29a5733eff83 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1107,7 +1107,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode) ts->nohz_mode = mode; /* One update is enough */ if (!test_and_set_bit(0, &tick_nohz_active)) - timers_update_migration(true); + timers_update_nohz(); } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0bcf00e3ce48..d530f72b32f9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -200,8 +200,6 @@ struct timer_base { unsigned long clk; unsigned long next_expiry; unsigned int cpu; - bool migration_enabled; - bool nohz_active; bool is_idle; bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); @@ -210,45 +208,57 @@ struct timer_base { static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) +#ifdef CONFIG_NO_HZ_COMMON + +DEFINE_STATIC_KEY_FALSE(timers_nohz_active); +static DEFINE_MUTEX(timer_keys_mutex); + +static void timer_update_keys(struct work_struct *work); +static DECLARE_WORK(timer_update_work, timer_update_keys); + +#ifdef CONFIG_SMP unsigned int sysctl_timer_migration = 1; -void timers_update_migration(bool update_nohz) +DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); + +static void timers_update_migration(void) { - bool on = sysctl_timer_migration && tick_nohz_active; - unsigned int cpu; + if (sysctl_timer_migration && tick_nohz_active) + static_branch_enable(&timers_migration_enabled); + else + static_branch_disable(&timers_migration_enabled); +} +#else +static inline void timers_update_migration(void) { } +#endif /* !CONFIG_SMP */ - /* Avoid the loop, if nothing to update */ - if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) - return; +static void timer_update_keys(struct work_struct *work) +{ + mutex_lock(&timer_keys_mutex); + timers_update_migration(); + static_branch_enable(&timers_nohz_active); + mutex_unlock(&timer_keys_mutex); +} - for_each_possible_cpu(cpu) { - per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; - per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; - per_cpu(hrtimer_bases.migration_enabled, cpu) = on; - if (!update_nohz) - continue; - per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; - per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; - per_cpu(hrtimer_bases.nohz_active, cpu) = true; - } +void timers_update_nohz(void) +{ + schedule_work(&timer_update_work); } int timer_migration_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - static DEFINE_MUTEX(mutex); int ret; - mutex_lock(&mutex); + mutex_lock(&timer_keys_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) - timers_update_migration(false); - mutex_unlock(&mutex); + timers_update_migration(); + mutex_unlock(&timer_keys_mutex); return ret; } -#endif +#endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, bool force_up) @@ -534,7 +544,7 @@ __internal_add_timer(struct timer_base *base, struct timer_list *timer) static void trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) { - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) + if (!is_timers_nohz_active()) return; /* @@ -849,21 +859,20 @@ static inline struct timer_base *get_timer_base(u32 tflags) return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); } -#ifdef CONFIG_NO_HZ_COMMON static inline struct timer_base * get_target_base(struct timer_base *base, unsigned tflags) { -#ifdef CONFIG_SMP - if ((tflags & TIMER_PINNED) || !base->migration_enabled) - return get_timer_this_cpu_base(tflags); - return get_timer_cpu_base(tflags, get_nohz_timer_target()); -#else - return get_timer_this_cpu_base(tflags); +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) + if (static_branch_likely(&timers_migration_enabled) && + !(tflags & TIMER_PINNED)) + return get_timer_cpu_base(tflags, get_nohz_timer_target()); #endif + return get_timer_this_cpu_base(tflags); } static inline void forward_timer_base(struct timer_base *base) { +#ifdef CONFIG_NO_HZ_COMMON unsigned long jnow; /* @@ -887,16 +896,8 @@ static inline void forward_timer_base(struct timer_base *base) base->clk = jnow; else base->clk = base->next_expiry; -} -#else -static inline struct timer_base * -get_target_base(struct timer_base *base, unsigned tflags) -{ - return get_timer_this_cpu_base(tflags); -} - -static inline void forward_timer_base(struct timer_base *base) { } #endif +} /* -- cgit v1.2.3 From d05ca13b8d3f685667b3b1748fa89285466270c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:41:31 +0100 Subject: hrtimer: Correct blatantly incorrect comment The protection of a hrtimer which runs its callback against migration to a different CPU has nothing to do with hard interrupt context. The protection against migration of a hrtimer running the expiry callback is the pointer in the cpu_base which holds a pointer to the currently running timer. This pointer is evaluated in the code which potentially switches the timer base and makes sure it's kept on the CPU on which the callback is running. Reported-by: Anna-Maria Gleixner Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Frederic Weisbecker Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-3-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1d06d2bde733..7687355c00ff 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1195,9 +1195,9 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, timer->is_rel = false; /* - * Because we run timers from hardirq context, there is no chance - * they get migrated to another cpu, therefore its safe to unlock - * the timer base. + * The timer is marked as running in the CPU base, so it is + * protected against migration to a different CPU even if the lock + * is dropped. */ raw_spin_unlock(&cpu_base->lock); trace_hrtimer_expire_entry(timer, now); -- cgit v1.2.3 From 1fbc78b3c980364d4fc15db83eca4a8e7ad289da Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:32 +0100 Subject: hrtimer: Fix kerneldoc syntax for 'struct hrtimer_cpu_base' The '/**' sequence marks the start of a structure description. Add the missing second asterisk. While at it adapt the ordering of the struct members to the struct definition and document the purpose of expires_next more precisely. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-4-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 79b2a8d29d8c..b3a382be8db0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -144,7 +144,7 @@ enum hrtimer_base_type { HRTIMER_MAX_CLOCK_BASES, }; -/* +/** * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers @@ -153,12 +153,12 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @expires_next: absolute time of the next event which was scheduled - * via clock_set_next_event() - * @next_timer: Pointer to the first expiring timer * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode * @hang_detected: The last hrtimer interrupt detected a hang + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs -- cgit v1.2.3 From 907777136f80d0cc0f714e5a389c4dfa9b4670ee Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:33 +0100 Subject: hrtimer: Clean up the 'int clock' parameter of schedule_hrtimeout_range_clock() schedule_hrtimeout_range_clock() uses an 'int clock' parameter for the clock ID, instead of the customary predefined "clockid_t" type. In hrtimer coding style the canonical variable name for the clock ID is 'clock_id', therefore change the name of the parameter here as well to make it all consistent. While at it, clean up the description for the 'clock_id' and 'mode' function parameters. The clock modes and the clock IDs are not restricted as the comment suggests. Fix the mode description as well for the callers of schedule_hrtimeout_range_clock(). No functional changes intended. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-5-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 +- kernel/time/hrtimer.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index b3a382be8db0..931ce9c89c93 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -462,7 +462,7 @@ extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, extern int schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, const enum hrtimer_mode mode, - int clock); + clockid_t clock_id); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 7687355c00ff..f2de328bb8d5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1662,12 +1662,12 @@ void __init hrtimers_init(void) * schedule_hrtimeout_range_clock - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL - * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME + * @mode: timer mode + * @clock_id: timer clock to be used */ int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, - const enum hrtimer_mode mode, int clock) + const enum hrtimer_mode mode, clockid_t clock_id) { struct hrtimer_sleeper t; @@ -1688,7 +1688,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, clock, mode); + hrtimer_init_on_stack(&t.timer, clock_id, mode); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1710,7 +1710,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) * @delta: slack in expires timeout (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless @@ -1749,7 +1749,7 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); /** * schedule_hrtimeout - sleep until timeout * @expires: timeout value (ktime_t) - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * @mode: timer mode * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless -- cgit v1.2.3 From 6de6250c759781daeadca784d0cc34ae73f3b502 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:34 +0100 Subject: hrtimer: Fix hrtimer_start[_range_ns]() function descriptions The hrtimer_start[_range_ns]() functions start a timer reliably on this CPU only when HRTIMER_MODE_PINNED is set. Furthermore the HRTIMER_MODE_PINNED mode is not considered when a hrtimer is initialized. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-6-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 931ce9c89c93..4e6a8841dcbe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -361,11 +361,11 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 range_ns, const enum hrtimer_mode mode); /** - * hrtimer_start - (re)start an hrtimer on the current CPU + * hrtimer_start - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f2de328bb8d5..fd08729de5d2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -924,12 +924,12 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, } /** - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU + * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL) + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -1107,7 +1107,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used - * @mode: timer mode abs/rel + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL); pinned is not considered here! */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) -- cgit v1.2.3 From 48d0c9becc7f3c66874c100c126459a9da0fdced Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:35 +0100 Subject: hrtimer: Ensure POSIX compliance (relative CLOCK_REALTIME hrtimers) The POSIX specification defines that relative CLOCK_REALTIME timers are not affected by clock modifications. Those timers have to use CLOCK_MONOTONIC to ensure POSIX compliance. The introduction of the additional HRTIMER_MODE_PINNED mode broke this requirement for pinned timers. There is no user space visible impact because user space timers are not using pinned mode, but for consistency reasons this needs to be fixed. Check whether the mode has the HRTIMER_MODE_REL bit set instead of comparing with HRTIMER_MODE_ABS. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Fixes: 597d0275736d ("timers: Framework for identifying pinned timers") Link: http://lkml.kernel.org/r/20171221104205.7269-7-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index fd08729de5d2..60faade2bb4e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1095,7 +1095,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, cpu_base = raw_cpu_ptr(&hrtimer_bases); - if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) + /* + * POSIX magic: Relative CLOCK_REALTIME timers are not affected by + * clock modifications, so they needs to become CLOCK_MONOTONIC to + * ensure POSIX compliance. + */ + if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; base = hrtimer_clockid_to_base(clock_id); -- cgit v1.2.3 From 19b51cb5ff6ab7957bcbbec4ff812b83208f7e99 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:36 +0100 Subject: hrtimer: Clean up 'enum hrtimer_mode' It's not obvious that the HRTIMER_MODE variants are bit combinations, because all modes are hard coded constants currently. Change it so the bit meanings are clear; and use the symbols for creating modes which combine bits. While at it get rid of the ugly tail comments as well. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-8-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4e6a8841dcbe..28f267cf2851 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -28,13 +28,19 @@ struct hrtimer_cpu_base; /* * Mode arguments of xxx_hrtimer functions: + * + * HRTIMER_MODE_ABS - Time value is absolute + * HRTIMER_MODE_REL - Time value is relative to now + * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered + * when starting the timer) */ enum hrtimer_mode { - HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ - HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ - HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ - HRTIMER_MODE_ABS_PINNED = 0x02, - HRTIMER_MODE_REL_PINNED = 0x03, + HRTIMER_MODE_ABS = 0x00, + HRTIMER_MODE_REL = 0x01, + HRTIMER_MODE_PINNED = 0x02, + + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, }; /* -- cgit v1.2.3 From 91633eed73a3ac37aaece5c8c1f93a18bae616a9 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:37 +0100 Subject: tracing/hrtimer: Fix tracing bugs by taking all clock bases and modes into account So far only CLOCK_MONOTONIC and CLOCK_REALTIME were taken into account as well as HRTIMER_MODE_ABS/REL in the hrtimer_init tracepoint. The query for detecting the ABS or REL timer modes is not valid anymore, it got broken by the introduction of HRTIMER_MODE_PINNED. HRTIMER_MODE_PINNED is not evaluated in the hrtimer_init() call, but for the sake of completeness print all given modes. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-9-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 16e305e69f34..c6f728037c53 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -136,6 +136,20 @@ DEFINE_EVENT(timer_class, timer_cancel, TP_ARGS(timer) ); +#define decode_clockid(type) \ + __print_symbolic(type, \ + { CLOCK_REALTIME, "CLOCK_REALTIME" }, \ + { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \ + { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \ + { CLOCK_TAI, "CLOCK_TAI" }) + +#define decode_hrtimer_mode(mode) \ + __print_symbolic(mode, \ + { HRTIMER_MODE_ABS, "ABS" }, \ + { HRTIMER_MODE_REL, "REL" }, \ + { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) + /** * hrtimer_init - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer @@ -162,10 +176,8 @@ TRACE_EVENT(hrtimer_init, ), TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, - __entry->clockid == CLOCK_REALTIME ? - "CLOCK_REALTIME" : "CLOCK_MONOTONIC", - __entry->mode == HRTIMER_MODE_ABS ? - "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") + decode_clockid(__entry->clockid), + decode_hrtimer_mode(__entry->mode)) ); /** -- cgit v1.2.3 From 63e2ed3659752a4850e0ef3a07f809988fcd74a4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:38 +0100 Subject: tracing/hrtimer: Print the hrtimer mode in the 'hrtimer_start' tracepoint The 'hrtimer_start' tracepoint lacks the mode information. The mode is important because consecutive starts can switch from ABS to REL or from PINNED to non PINNED. Append the mode field. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-10-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/timer.h | 13 ++++++++----- kernel/time/hrtimer.c | 16 +++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index c6f728037c53..744b4310b24b 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -186,15 +186,16 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *hrtimer), + TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), - TP_ARGS(hrtimer), + TP_ARGS(hrtimer, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) + __field( enum hrtimer_mode, mode ) ), TP_fast_assign( @@ -202,12 +203,14 @@ TRACE_EVENT(hrtimer_start, __entry->function = hrtimer->function; __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); + __entry->mode = mode; ), - TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", - __entry->hrtimer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu " + "mode=%s", __entry->hrtimer, __entry->function, (unsigned long long) __entry->expires, - (unsigned long long) __entry->softexpires) + (unsigned long long) __entry->softexpires, + decode_hrtimer_mode(__entry->mode)) ); /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 60faade2bb4e..f4f46589d7cc 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -435,10 +435,11 @@ debug_init(struct hrtimer *timer, clockid_t clockid, trace_hrtimer_init(timer, clockid, mode); } -static inline void debug_activate(struct hrtimer *timer) +static inline void debug_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { debug_hrtimer_activate(timer); - trace_hrtimer_start(timer); + trace_hrtimer_start(timer, mode); } static inline void debug_deactivate(struct hrtimer *timer) @@ -828,9 +829,10 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); * Returns 1 when the new timer is the leftmost timer in the tree. */ static int enqueue_hrtimer(struct hrtimer *timer, - struct hrtimer_clock_base *base) + struct hrtimer_clock_base *base, + enum hrtimer_mode mode) { - debug_activate(timer); + debug_activate(timer, mode); base->cpu_base->active_bases |= 1 << base->index; @@ -953,7 +955,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - leftmost = enqueue_hrtimer(timer, new_base); + leftmost = enqueue_hrtimer(timer, new_base, mode); if (!leftmost) goto unlock; @@ -1222,7 +1224,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, */ if (restart != HRTIMER_NORESTART && !(timer->state & HRTIMER_STATE_ENQUEUED)) - enqueue_hrtimer(timer, base); + enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); /* * Separate the ->running assignment from the ->state assignment. @@ -1621,7 +1623,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base); + enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); } } -- cgit v1.2.3 From c272ca58c3ec5631f4ab507489d9477f74efe645 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:39 +0100 Subject: hrtimer: Switch 'for' loop to _ffs() evaluation Looping over all clock bases to find active bits is suboptimal if not all bases are active. Avoid this by converting it to a __ffs() evaluation. The functionallity is outsourced into its own function and is called via a macro as suggested by Peter Zijlstra. Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-11-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f4f46589d7cc..cfcf8decf102 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -448,6 +448,23 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +static struct hrtimer_clock_base * +__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) +{ + unsigned int idx; + + if (!*active) + return NULL; + + idx = __ffs(*active); + *active &= ~(1U << idx); + + return &cpu_base->clock_base[idx]; +} + +#define for_each_active_base(base, cpu_base, active) \ + while ((base = __next_base((cpu_base), &(active)))) + #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, struct hrtimer *timer) @@ -459,18 +476,15 @@ static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { - struct hrtimer_clock_base *base = cpu_base->clock_base; + struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; ktime_t expires, expires_next = KTIME_MAX; hrtimer_update_next_timer(cpu_base, NULL); - for (; active; base++, active >>= 1) { + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; - if (!(active & 0x01)) - continue; - next = timerqueue_getnext(&base->active); timer = container_of(next, struct hrtimer, node); expires = ktime_sub(hrtimer_get_expires(timer), base->offset); @@ -1241,16 +1255,13 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) { - struct hrtimer_clock_base *base = cpu_base->clock_base; + struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; - for (; active; base++, active >>= 1) { + for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; ktime_t basenow; - if (!(active & 0x01)) - continue; - basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { -- cgit v1.2.3 From 3f0b9e8eec7262648ab9c8321bf931624ee5c10a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:40 +0100 Subject: hrtimer: Store running timer in hrtimer_clock_base The pointer to the currently running timer is stored in hrtimer_cpu_base before the base lock is dropped and the callback is invoked. This results in two levels of indirections and the upcoming support for softirq based hrtimer requires splitting the "running" storage into soft and hard IRQ context expiry. Storing both in the cpu base would require conditionals in all code paths accessing that information. It's possible to have a per clock base sequence count and running pointer without changing the semantics of the related mechanisms because the timer base pointer cannot be changed while a timer is running the callback. Unfortunately this makes cpu_clock base larger than 32 bytes on 32-bit kernels. Instead of having huge gaps due to alignment, remove the alignment and let the compiler pack CPU base for 32-bit kernels. The resulting cache access patterns are fortunately not really different from the current behaviour. On 64-bit kernels the 64-byte alignment stays and the behaviour is unchanged. This was determined by analyzing the resulting layout and looking at the number of cache lines involved for the frequently used clocks. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-12-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 +++++++++----------- kernel/time/hrtimer.c | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 28f267cf2851..1bae7b9f071d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -118,9 +118,9 @@ struct hrtimer_sleeper { }; #ifdef CONFIG_64BIT -# define HRTIMER_CLOCK_BASE_ALIGN 64 +# define __hrtimer_clock_base_align ____cacheline_aligned #else -# define HRTIMER_CLOCK_BASE_ALIGN 32 +# define __hrtimer_clock_base_align #endif /** @@ -129,18 +129,22 @@ struct hrtimer_sleeper { * @index: clock type index for per_cpu support when moving a * timer to a base on another cpu. * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer * @active: red black tree root node for the active timers * @get_time: function to retrieve the current time of the clock * @offset: offset of this clock to the monotonic base */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; - int index; + unsigned int index; clockid_t clockid; + seqcount_t seq; + struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); ktime_t offset; -} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); +} __hrtimer_clock_base_align; enum hrtimer_base_type { HRTIMER_BASE_MONOTONIC, @@ -154,8 +158,6 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events @@ -177,8 +179,6 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - seqcount_t seq; - struct hrtimer *running; unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; @@ -198,8 +198,6 @@ struct hrtimer_cpu_base { static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); - timer->node.expires = time; timer->_softexpires = time; } @@ -424,7 +422,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) */ static inline int hrtimer_callback_running(struct hrtimer *timer) { - return timer->base->cpu_base->running == timer; + return timer->base->running == timer; } /* Forward a hrtimer so it expires after now: */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index cfcf8decf102..e56805fe5d00 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -70,7 +70,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = { .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), - .seq = SEQCNT_ZERO(hrtimer_bases.seq), .clock_base = { { @@ -118,7 +117,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { - .seq = SEQCNT_ZERO(migration_cpu_base), .clock_base = { { .cpu_base = &migration_cpu_base, }, }, }; @@ -1148,19 +1146,19 @@ EXPORT_SYMBOL_GPL(hrtimer_init); */ bool hrtimer_active(const struct hrtimer *timer) { - struct hrtimer_cpu_base *cpu_base; + struct hrtimer_clock_base *base; unsigned int seq; do { - cpu_base = READ_ONCE(timer->base->cpu_base); - seq = raw_read_seqcount_begin(&cpu_base->seq); + base = READ_ONCE(timer->base); + seq = raw_read_seqcount_begin(&base->seq); if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + base->running == timer) return true; - } while (read_seqcount_retry(&cpu_base->seq, seq) || - cpu_base != READ_ONCE(timer->base->cpu_base)); + } while (read_seqcount_retry(&base->seq, seq) || + base != READ_ONCE(timer->base)); return false; } @@ -1194,16 +1192,16 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, lockdep_assert_held(&cpu_base->lock); debug_deactivate(timer); - cpu_base->running = timer; + base->running = timer; /* * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); fn = timer->function; @@ -1244,13 +1242,13 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * Separate the ->running assignment from the ->state assignment. * * As with a regular write barrier, this ensures the read side in - * hrtimer_active() cannot observe cpu_base->running == NULL && + * hrtimer_active() cannot observe base->running.timer == NULL && * timer->state == INACTIVE. */ - raw_write_seqcount_barrier(&cpu_base->seq); + raw_write_seqcount_barrier(&base->seq); - WARN_ON_ONCE(cpu_base->running != timer); - cpu_base->running = NULL; + WARN_ON_ONCE(base->running != timer); + base->running = NULL; } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) -- cgit v1.2.3 From da21c5a58a7f30db69e04e06dfb6777ccbb1113c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:41 +0100 Subject: hrtimer: Make room in 'struct hrtimer_cpu_base' The upcoming softirq based hrtimers support requires an additional field in the hrtimer_cpu_base struct, which would grow the struct size beyond a cache line. The hrtimer_cpu_base::nr_retries and ::nr_hangs members are solely used for diagnostic output and have no requirement to be 'unsigned int'. Make them 'unsigned short' to create room for the new struct member. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-13-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1bae7b9f071d..56e56bcb6f0f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -189,8 +189,8 @@ struct hrtimer_cpu_base { ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; - unsigned int nr_retries; - unsigned int nr_hangs; + unsigned short nr_retries; + unsigned short nr_hangs; unsigned int max_hang_time; #endif struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -- cgit v1.2.3 From 28bfd18bf3daa5db8bb3422ea7138c8b7d2444ac Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:42 +0100 Subject: hrtimer: Make the hrtimer_cpu_base::hres_active field unconditional, to simplify the code The hrtimer_cpu_base::hres_active_member field depends on CONFIG_HIGH_RES_TIMERS=y currently, and all related functions to this member are conditional as well. To simplify the code make it unconditional and set it to zero during initialization. (This will also help with the upcoming softirq based hrtimers code.) The conditional code sections can be avoided by adding IS_ENABLED(HIGHRES) conditionals into common functions, which ensures dead code elimination. There is no functional change. Suggested-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-14-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 20 ++++++++------------ kernel/time/hrtimer.c | 31 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 56e56bcb6f0f..22627b3a33fe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -161,8 +161,8 @@ enum hrtimer_base_type { * @cpu: cpu number * @active_bases: Bitfield to mark bases with active timers * @clock_was_set_seq: Sequence counter of clock was set events - * @in_hrtirq: hrtimer_interrupt() is currently executing * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue @@ -182,9 +182,9 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; + unsigned int hres_active : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, - hres_active : 1, hang_detected : 1; ktime_t expires_next; struct hrtimer *next_timer; @@ -266,16 +266,17 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) return timer->base->get_time(); } +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + timer->base->cpu_base->hres_active : 0; +} + #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; extern void hrtimer_interrupt(struct clock_event_device *dev); -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return timer->base->cpu_base->hres_active; -} - /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -298,11 +299,6 @@ extern unsigned int hrtimer_resolution; #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return 0; -} - static inline void clock_was_set_delayed(void) { } #endif diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e56805fe5d00..b688090093d6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -512,6 +512,20 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) offs_real, offs_boot, offs_tai); } +/* + * Is the high resolution mode active ? + */ +static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) +{ + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? + cpu_base->hres_active : 0; +} + +static inline int hrtimer_hres_active(void) +{ + return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -540,19 +554,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -/* - * Is the high resolution mode active ? - */ -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) -{ - return cpu_base->hres_active; -} - -static inline int hrtimer_hres_active(void) -{ - return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); -} - /* * Reprogram the event source with checking both queues for the * next event @@ -661,7 +662,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next = KTIME_MAX; - base->hres_active = 0; } /* @@ -720,8 +720,6 @@ void clock_was_set_delayed(void) #else -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } -static inline int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void @@ -1600,6 +1598,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) } cpu_base->cpu = cpu; + cpu_base->hres_active = 0; hrtimer_init_hres(cpu_base); return 0; } -- cgit v1.2.3 From 851cff8caf4d638d001aac6e57a3511abd94f100 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:43 +0100 Subject: hrtimer: Use accesor functions instead of direct access __hrtimer_hres_active() is now available unconditionally, so replace open coded direct accesses to hrtimer_cpu_base.hres_active. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-15-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index b688090093d6..5a624f9c8408 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -564,7 +564,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - if (!cpu_base->hres_active) + if (!__hrtimer_hres_active(cpu_base)) return; expires_next = __hrtimer_get_next_event(cpu_base); @@ -673,7 +673,7 @@ static void retrigger_next_event(void *arg) { struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); - if (!base->hres_active) + if (!__hrtimer_hres_active(base)) return; raw_spin_lock(&base->lock); -- cgit v1.2.3 From 07a9a7eae86abb796468b225586086d7c4cb59fc Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:44 +0100 Subject: hrtimer: Make the remote enqueue check unconditional hrtimer_cpu_base.expires_next is used to cache the next event armed in the timer hardware. The value is used to check whether an hrtimer can be enqueued remotely. If the new hrtimer is expiring before expires_next, then remote enqueue is not possible as the remote hrtimer hardware cannot be accessed for reprogramming to an earlier expiry time. The remote enqueue check is currently conditional on CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active. There is no compelling reason to make this conditional. Move hrtimer_cpu_base.expires_next out of the CONFIG_HIGH_RES_TIMERS=y guarded area and remove the conditionals in hrtimer_check_target(). The check is currently a NOOP for the CONFIG_HIGH_RES_TIMERS=n and the !hrtimer_cpu_base.hres_active case because in these cases nothing updates hrtimer_cpu_base.expires_next yet. This will be changed with later patches which further reduce the #ifdef zoo in this code. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-16-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +++--- kernel/time/hrtimer.c | 26 ++++++-------------------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 22627b3a33fe..bb7270e8bc37 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - ktime_t expires_next; struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif + ktime_t expires_next; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5a624f9c8408..a9ab67f3e5d5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -154,26 +154,21 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * With HIGHRES=y we do not migrate the timer when it is expiring - * before the next event on the target cpu because we cannot reprogram - * the target cpu hardware and we would cause it to fire late. + * We do not migrate the timer when it is expiring before the next + * event on the target cpu. When high resolution is enabled, we cannot + * reprogram the target cpu hardware and we would cause it to fire + * late. To keep it simple, we handle the high resolution enabled and + * disabled case similar. * * Called with cpu_base->lock of target cpu held. */ static int hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) { -#ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires; - if (!new_base->cpu_base->hres_active) - return 0; - expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); return expires <= new_base->cpu_base->expires_next; -#else - return 0; -#endif } static inline @@ -656,14 +651,6 @@ static void hrtimer_reprogram(struct hrtimer *timer, tick_program_event(expires, 1); } -/* - * Initialize the high resolution related parts of cpu_base - */ -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) -{ - base->expires_next = KTIME_MAX; -} - /* * Retrigger next event is called after clock was set * @@ -729,7 +716,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1599,7 +1585,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; - hrtimer_init_hres(cpu_base); + cpu_base->expires_next = KTIME_MAX; return 0; } -- cgit v1.2.3 From eb27926ba05233dc4f2052cc9d4f19359ec3cd2c Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:45 +0100 Subject: hrtimer: Make hrtimer_cpu_base.next_timer handling unconditional hrtimer_cpu_base.next_timer stores the pointer to the next expiring timer in a CPU base. This pointer cannot be dereferenced and is solely used to check whether a hrtimer which is removed is the hrtimer which is the first to expire in the CPU base. If this is the case, then the timer hardware needs to be reprogrammed to avoid an extra interrupt for nothing. Again, this is conditional functionality, but there is no compelling reason to make this conditional. As a preparation, hrtimer_cpu_base.next_timer needs to be available unconditonally. Aside of that the upcoming support for softirq based hrtimers requires access to this pointer unconditionally as well, so our motivation is not entirely simplicity based. Make the update of hrtimer_cpu_base.next_timer unconditional and remove the #ifdef cruft. The impact on CONFIG_HIGH_RES_TIMERS=n && CONFIG_NOHZ=n is marginal as it's just a store on an already dirtied cacheline. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-17-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ++-- kernel/time/hrtimer.c | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index bb7270e8bc37..2d3e1d678a4d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -164,13 +164,13 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang - * @next_timer: Pointer to the first expiring timer * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue + * @next_timer: Pointer to the first expiring timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -186,13 +186,13 @@ struct hrtimer_cpu_base { #ifdef CONFIG_HIGH_RES_TIMERS unsigned int in_hrtirq : 1, hang_detected : 1; - struct hrtimer *next_timer; unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; unsigned int max_hang_time; #endif ktime_t expires_next; + struct hrtimer *next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index a9ab67f3e5d5..26abaa7b0419 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -459,21 +459,13 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) while ((base = __next_base((cpu_base), &(active)))) #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) -static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, - struct hrtimer *timer) -{ -#ifdef CONFIG_HIGH_RES_TIMERS - cpu_base->next_timer = timer; -#endif -} - static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; ktime_t expires, expires_next = KTIME_MAX; - hrtimer_update_next_timer(cpu_base, NULL); + cpu_base->next_timer = NULL; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; @@ -483,7 +475,7 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - hrtimer_update_next_timer(cpu_base, timer); + cpu_base->next_timer = timer; } } /* -- cgit v1.2.3 From 11a9fe069e341ac53bddb8fe1a85ea986cff1a42 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:46 +0100 Subject: hrtimer: Make hrtimer_reprogramm() unconditional hrtimer_reprogram() needs to be available unconditionally for softirq based hrtimers. Move the function and all required struct members out of the CONFIG_HIGH_RES_TIMERS #ifdef. There is no functional change because hrtimer_reprogram() is only invoked when hrtimer_cpu_base.hres_active is true. Making it unconditional increases the text size for the CONFIG_HIGH_RES_TIMERS=n case, but avoids replication of that code for the upcoming softirq based hrtimers support. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-18-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 6 +-- kernel/time/hrtimer.c | 129 +++++++++++++++++++++++------------------------- 2 files changed, 65 insertions(+), 70 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2d3e1d678a4d..98ed35767ac5 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -182,10 +182,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int in_hrtirq : 1, + unsigned int hres_active : 1, + in_hrtirq : 1, hang_detected : 1; +#ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; unsigned short nr_hangs; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 26abaa7b0419..63d804aea1ea 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -581,68 +581,6 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) tick_program_event(cpu_base->expires_next, 1); } -/* - * When a timer is enqueued and expires earlier than the already enqueued - * timers, we have to check, whether it expires earlier than the timer for - * which the clock event device was armed. - * - * Called with interrupts disabled and base->cpu_base.lock held - */ -static void hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); - ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); - - WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); - - /* - * If the timer is not on the current cpu, we cannot reprogram - * the other cpus clock event device. - */ - if (base->cpu_base != cpu_base) - return; - - /* - * If the hrtimer interrupt is running, then it will - * reevaluate the clock bases and reprogram the clock event - * device. The callbacks are always executed in hard interrupt - * context so we don't need an extra check for a running - * callback. - */ - if (cpu_base->in_hrtirq) - return; - - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - - if (expires >= cpu_base->expires_next) - return; - - /* Update the pointer to the next expiring timer */ - cpu_base->next_timer = timer; - - /* - * If a hang was detected in the last timer interrupt then we - * do not schedule a timer which is earlier than the expiry - * which we enforced in the hang detection. We want the system - * to make progress. - */ - if (cpu_base->hang_detected) - return; - - /* - * Program the timer hardware. We enforce the expiry for - * events which are already in the past. - */ - cpu_base->expires_next = expires; - tick_program_event(expires, 1); -} - /* * Retrigger next event is called after clock was set * @@ -703,15 +641,72 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } -static inline int hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) -{ - return 0; -} static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ +/* + * When a timer is enqueued and expires earlier than the already enqueued + * timers, we have to check, whether it expires earlier than the timer for + * which the clock event device was armed. + * + * Called with interrupts disabled and base->cpu_base.lock held + */ +static void hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); + + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + + /* + * If the timer is not on the current cpu, we cannot reprogram + * the other cpus clock event device. + */ + if (base->cpu_base != cpu_base) + return; + + /* + * If the hrtimer interrupt is running, then it will + * reevaluate the clock bases and reprogram the clock event + * device. The callbacks are always executed in hard interrupt + * context so we don't need an extra check for a running + * callback. + */ + if (cpu_base->in_hrtirq) + return; + + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (expires >= cpu_base->expires_next) + return; + + /* Update the pointer to the next expiring timer */ + cpu_base->next_timer = timer; + + /* + * If a hang was detected in the last timer interrupt then we + * do not schedule a timer which is earlier than the expiry + * which we enforced in the hang detection. We want the system + * to make progress. + */ + if (cpu_base->hang_detected) + return; + + /* + * Program the timer hardware. We enforce the expiry for + * events which are already in the past. + */ + cpu_base->expires_next = expires; + tick_program_event(expires, 1); +} + /* * Clock realtime was set * -- cgit v1.2.3 From ebba2c723f38a766546b2eaf828c522576c791d4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:47 +0100 Subject: hrtimer: Make hrtimer_force_reprogramm() unconditionally available hrtimer_force_reprogram() needs to be available unconditionally for softirq based hrtimers. Move the function and all required struct members out of the CONFIG_HIGH_RES_TIMERS #ifdef. There is no functional change because hrtimer_force_reprogram() is only invoked when hrtimer_cpu_base.hres_active is true and CONFIG_HIGH_RES_TIMERS=y. Making it unconditional increases the text size for the CONFIG_HIGH_RES_TIMERS=n case slightly, but avoids replication of that code for the upcoming softirq based hrtimers support. Most of the code gets eliminated in the CONFIG_HIGH_RES_TIMERS=n case by the compiler. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-19-anna-maria@linutronix.de [ Made it build on !CONFIG_HIGH_RES_TIMERS ] Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 60 ++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 63d804aea1ea..2b3222ea2a6c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -458,7 +458,6 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) { struct hrtimer_clock_base *base; @@ -487,7 +486,6 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) expires_next = 0; return expires_next; } -#endif static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { @@ -513,34 +511,6 @@ static inline int hrtimer_hres_active(void) return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); } -/* High resolution timer related functions */ -#ifdef CONFIG_HIGH_RES_TIMERS - -/* - * High resolution timer enabled ? - */ -static bool hrtimer_hres_enabled __read_mostly = true; -unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; -EXPORT_SYMBOL_GPL(hrtimer_resolution); - -/* - * Enable / Disable high resolution mode - */ -static int __init setup_hrtimer_hres(char *str) -{ - return (kstrtobool(str, &hrtimer_hres_enabled) == 0); -} - -__setup("highres=", setup_hrtimer_hres); - -/* - * hrtimer_high_res_enabled - query, if the highres mode is enabled - */ -static inline int hrtimer_is_hres_enabled(void) -{ - return hrtimer_hres_enabled; -} - /* * Reprogram the event source with checking both queues for the * next event @@ -581,6 +551,34 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) tick_program_event(cpu_base->expires_next, 1); } +/* High resolution timer related functions */ +#ifdef CONFIG_HIGH_RES_TIMERS + +/* + * High resolution timer enabled ? + */ +static bool hrtimer_hres_enabled __read_mostly = true; +unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; +EXPORT_SYMBOL_GPL(hrtimer_resolution); + +/* + * Enable / Disable high resolution mode + */ +static int __init setup_hrtimer_hres(char *str) +{ + return (kstrtobool(str, &hrtimer_hres_enabled) == 0); +} + +__setup("highres=", setup_hrtimer_hres); + +/* + * hrtimer_high_res_enabled - query, if the highres mode is enabled + */ +static inline int hrtimer_is_hres_enabled(void) +{ + return hrtimer_hres_enabled; +} + /* * Retrigger next event is called after clock was set * @@ -639,8 +637,6 @@ void clock_was_set_delayed(void) static inline int hrtimer_is_hres_enabled(void) { return 0; } static inline void hrtimer_switch_to_hres(void) { } -static inline void -hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -- cgit v1.2.3 From 61bb4bcb79c7afcd0bf0d20aef4704977172fd60 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:48 +0100 Subject: hrtimer: Unify hrtimer removal handling When the first hrtimer on the current CPU is removed, hrtimer_force_reprogram() is invoked but only when CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active is set. hrtimer_force_reprogram() updates hrtimer_cpu_base.expires_next and reprograms the clock event device. When CONFIG_HIGH_RES_TIMERS=y and hrtimer_cpu_base.hres_active is set, a pointless hrtimer interrupt can be prevented. hrtimer_check_target() makes the 'can remote enqueue' decision. As soon as hrtimer_check_target() is unconditionally available and hrtimer_cpu_base.expires_next is updated by hrtimer_reprogram(), hrtimer_force_reprogram() needs to be available unconditionally as well to prevent the following scenario with CONFIG_HIGH_RES_TIMERS=n: - the first hrtimer on this CPU is removed and hrtimer_force_reprogram() is not executed - CPU goes idle (next timer is calculated and hrtimers are taken into account) - a hrtimer is enqueued remote on the idle CPU: hrtimer_check_target() compares expiry value and hrtimer_cpu_base.expires_next. The expiry value is after expires_next, so the hrtimer is enqueued. This timer will fire late, if it expires before the effective first hrtimer on this CPU and the comparison was with an outdated expires_next value. To prevent this scenario, make hrtimer_force_reprogram() unconditional except the effective reprogramming part, which gets eliminated by the compiler in the CONFIG_HIGH_RES_TIMERS=n case. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-20-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 2b3222ea2a6c..e6a78ae103ca 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -521,9 +521,6 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - if (!__hrtimer_hres_active(cpu_base)) - return; - expires_next = __hrtimer_get_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) @@ -532,6 +529,9 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next = expires_next; /* + * If hres is not active, hardware does not have to be + * reprogrammed yet. + * * If a hang was detected in the last timer interrupt then we * leave the hang delay active in the hardware. We want the * system to make progress. That also prevents the following @@ -545,7 +545,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) * set. So we'd effectivly block all timers until the T2 event * fires. */ - if (cpu_base->hang_detected) + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; tick_program_event(cpu_base->expires_next, 1); @@ -844,7 +844,6 @@ static void __remove_hrtimer(struct hrtimer *timer, if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); -#ifdef CONFIG_HIGH_RES_TIMERS /* * Note: If reprogram is false we do not update * cpu_base->next_timer. This happens when we remove the first @@ -855,7 +854,6 @@ static void __remove_hrtimer(struct hrtimer *timer, */ if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); -#endif } /* -- cgit v1.2.3 From 14c803419de6acba08e143d51813ac5e0f3443b8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:49 +0100 Subject: hrtimer: Unify remote enqueue handling hrtimer_reprogram() is conditionally invoked from hrtimer_start_range_ns() when hrtimer_cpu_base.hres_active is true. In the !hres_active case there is a special condition for the nohz_active case: If the newly enqueued timer expires before the first expiring timer on a remote CPU then the remote CPU needs to be notified and woken up from a NOHZ idle sleep to take the new first expiring timer into account. Previous changes have already established the prerequisites to make the remote enqueue behaviour the same whether high resolution mode is active or not: If the to be enqueued timer expires before the first expiring timer on a remote CPU, then it cannot be enqueued there. This was done for the high resolution mode because there is no way to access the remote CPU timer hardware. The same is true for NOHZ, but was handled differently by unconditionally enqueuing the timer and waking up the remote CPU so it can reprogram its timer. Again there is no compelling reason for this difference. hrtimer_check_target(), which makes the 'can remote enqueue' decision is already unconditional, but not yet functional because nothing updates hrtimer_cpu_base.expires_next in the !hres_active case. To unify this the following changes are required: 1) Make the store of the new first expiry time unconditonal in hrtimer_reprogram() and check __hrtimer_hres_active() before proceeding to the actual hardware access. This check also lets the compiler eliminate the rest of the function in case of CONFIG_HIGH_RES_TIMERS=n. 2) Invoke hrtimer_reprogram() unconditionally from hrtimer_start_range_ns() 3) Remove the remote wakeup special case for the !high_res && nohz_active case. Confine the timers_nohz_active static key to timer.c which is the only user now. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-21-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 18 ++++++------------ kernel/time/tick-internal.h | 6 ------ kernel/time/timer.c | 9 ++++++++- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e6a78ae103ca..1c68bf21f603 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -685,21 +685,24 @@ static void hrtimer_reprogram(struct hrtimer *timer, /* Update the pointer to the next expiring timer */ cpu_base->next_timer = timer; + cpu_base->expires_next = expires; /* + * If hres is not active, hardware does not have to be + * programmed yet. + * * If a hang was detected in the last timer interrupt then we * do not schedule a timer which is earlier than the expiry * which we enforced in the hang detection. We want the system * to make progress. */ - if (cpu_base->hang_detected) + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) return; /* * Program the timer hardware. We enforce the expiry for * events which are already in the past. */ - cpu_base->expires_next = expires; tick_program_event(expires, 1); } @@ -936,16 +939,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, if (!leftmost) goto unlock; - if (!hrtimer_is_hres_active(timer)) { - /* - * Kick to reschedule the next tick to handle the new timer - * on dynticks target. - */ - if (is_timers_nohz_active()) - wake_up_nohz_cpu(new_base->cpu_base->cpu); - } else { - hrtimer_reprogram(timer, new_base); - } + hrtimer_reprogram(timer, new_base); unlock: unlock_hrtimer_base(timer, &flags); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f690628e068c..e277284c2831 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -151,18 +151,12 @@ static inline void tick_nohz_init(void) { } #ifdef CONFIG_NO_HZ_COMMON extern unsigned long tick_nohz_active; extern void timers_update_nohz(void); -extern struct static_key_false timers_nohz_active; -static inline bool is_timers_nohz_active(void) -{ - return static_branch_likely(&timers_nohz_active); -} # ifdef CONFIG_SMP extern struct static_key_false timers_migration_enabled; # endif #else /* CONFIG_NO_HZ_COMMON */ static inline void timers_update_nohz(void) { } #define tick_nohz_active (0) -static inline bool is_timers_nohz_active(void) { return false; } #endif DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d530f72b32f9..48150ab42de9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -210,7 +210,7 @@ static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); #ifdef CONFIG_NO_HZ_COMMON -DEFINE_STATIC_KEY_FALSE(timers_nohz_active); +static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); static DEFINE_MUTEX(timer_keys_mutex); static void timer_update_keys(struct work_struct *work); @@ -258,6 +258,13 @@ int timer_migration_handler(struct ctl_table *table, int write, mutex_unlock(&timer_keys_mutex); return ret; } + +static inline bool is_timers_nohz_active(void) +{ + return static_branch_unlikely(&timers_nohz_active); +} +#else +static inline bool is_timers_nohz_active(void) { return false; } #endif /* NO_HZ_COMMON */ static unsigned long round_jiffies_common(unsigned long j, int cpu, -- cgit v1.2.3 From 2ac2dccce9d16a7b1a8fddf69a955d249375bce4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:50 +0100 Subject: hrtimer: Make remote enqueue decision less restrictive The current decision whether a timer can be queued on a remote CPU checks for timer->expiry <= remote_cpu_base.expires_next. This is too restrictive because a timer with the same expiry time as an existing timer will be enqueued on right-hand size of the existing timer inside the rbtree, i.e. behind the first expiring timer. So its safe to allow enqueuing timers with the same expiry time as the first expiring timer on a remote CPU base. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-22-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1c68bf21f603..f4a56fbae662 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -168,7 +168,7 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires <= new_base->cpu_base->expires_next; + return expires < new_base->cpu_base->expires_next; } static inline -- cgit v1.2.3 From 3ec7a3ee9f15f6dcac1591902d85b94c2a4b520d Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:51 +0100 Subject: hrtimer: Remove the 'base' parameter from hrtimer_reprogram() hrtimer_reprogram() must have access to the hrtimer_clock_base of the new first expiring timer to access hrtimer_clock_base.offset for adjusting the expiry time to CLOCK_MONOTONIC. This is required to evaluate whether the new left most timer in the hrtimer_clock_base is the first expiring timer of all clock bases in a hrtimer_cpu_base. The only user of hrtimer_reprogram() is hrtimer_start_range_ns(), which has a pointer to hrtimer_clock_base() already and hands it in as a parameter. But hrtimer_start_range_ns() will be split for the upcoming support for softirq based hrtimers to avoid code duplication and will lose the direct access to the clock base pointer. Instead of handing in timer and timer->base as a parameter remove the base parameter from hrtimer_reprogram() instead and retrieve the clock base internally. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-23-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f4a56fbae662..33a6c990166d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -648,10 +648,10 @@ static inline void retrigger_next_event(void *arg) { } * * Called with interrupts disabled and base->cpu_base.lock held */ -static void hrtimer_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base) +static void hrtimer_reprogram(struct hrtimer *timer) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + struct hrtimer_clock_base *base = timer->base; ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); @@ -939,7 +939,7 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, if (!leftmost) goto unlock; - hrtimer_reprogram(timer, new_base); + hrtimer_reprogram(timer); unlock: unlock_hrtimer_base(timer, &flags); } -- cgit v1.2.3 From 138a6b7ae4dedde5513678f57b275eee19c41b6a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:52 +0100 Subject: hrtimer: Factor out __hrtimer_start_range_ns() Preparatory patch for softirq based hrtimers to avoid code duplication, factor out the __hrtimer_start_range_ns() function from hrtimer_start_range_ns(). No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-24-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 33a6c990166d..4142e6f536b4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -905,22 +905,11 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } -/** - * hrtimer_start_range_ns - (re)start an hrtimer - * @timer: the timer to be added - * @tim: expiry time - * @delta_ns: "slack" range for the timer - * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) - */ -void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - u64 delta_ns, const enum hrtimer_mode mode) +static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode, + struct hrtimer_clock_base *base) { - struct hrtimer_clock_base *base, *new_base; - unsigned long flags; - int leftmost; - - base = lock_hrtimer_base(timer, &flags); + struct hrtimer_clock_base *new_base; /* Remove an active timer from the queue: */ remove_hrtimer(timer, base, true); @@ -935,12 +924,27 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - leftmost = enqueue_hrtimer(timer, new_base, mode); - if (!leftmost) - goto unlock; + return enqueue_hrtimer(timer, new_base, mode); +} +/** + * hrtimer_start_range_ns - (re)start an hrtimer + * @timer: the timer to be added + * @tim: expiry time + * @delta_ns: "slack" range for the timer + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + */ +void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + u64 delta_ns, const enum hrtimer_mode mode) +{ + struct hrtimer_clock_base *base; + unsigned long flags; + + base = lock_hrtimer_base(timer, &flags); + + if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) + hrtimer_reprogram(timer); - hrtimer_reprogram(timer); -unlock: unlock_hrtimer_base(timer, &flags); } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); -- cgit v1.2.3 From ad38f596d8e4babc19be8b21a7a49debffb4a7f5 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:53 +0100 Subject: hrtimer: Factor out __hrtimer_next_event_base() Preparatory patch for softirq based hrtimers to avoid code duplication. No functional change. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-25-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4142e6f536b4..5d9b81d224b3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -458,13 +458,13 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) #define for_each_active_base(base, cpu_base, active) \ while ((base = __next_base((cpu_base), &(active)))) -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, + unsigned int active, + ktime_t expires_next) { struct hrtimer_clock_base *base; - unsigned int active = cpu_base->active_bases; - ktime_t expires, expires_next = KTIME_MAX; + ktime_t expires; - cpu_base->next_timer = NULL; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *next; struct hrtimer *timer; @@ -487,6 +487,18 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) return expires_next; } +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +{ + unsigned int active = cpu_base->active_bases; + ktime_t expires_next = KTIME_MAX; + + cpu_base->next_timer = NULL; + + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + + return expires_next; +} + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; -- cgit v1.2.3 From dd934aa8ad1fbaab3d916125c7fe42fff75aa7ff Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:54 +0100 Subject: hrtimer: Use irqsave/irqrestore around __run_hrtimer() __run_hrtimer() is called with the hrtimer_cpu_base.lock held and interrupts disabled. Before invoking the timer callback the base lock is dropped, but interrupts stay disabled. The upcoming support for softirq based hrtimers requires that interrupts are enabled before the timer callback is invoked. To avoid code duplication, take hrtimer_cpu_base.lock with raw_spin_lock_irqsave(flags) at the call site and hand in the flags as a parameter. So raw_spin_unlock_irqrestore() before the callback invocation will either keep interrupts disabled in interrupt context or restore to interrupt enabled state when called from softirq context. Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-26-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5d9b81d224b3..31ccd86e63c0 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1159,7 +1159,8 @@ EXPORT_SYMBOL_GPL(hrtimer_active); static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, - struct hrtimer *timer, ktime_t *now) + struct hrtimer *timer, ktime_t *now, + unsigned long flags) { enum hrtimer_restart (*fn)(struct hrtimer *); int restart; @@ -1194,11 +1195,11 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, * protected against migration to a different CPU even if the lock * is dropped. */ - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); trace_hrtimer_expire_entry(timer, now); restart = fn(timer); trace_hrtimer_expire_exit(timer); - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irq(&cpu_base->lock); /* * Note: We clear the running state after enqueue_hrtimer and @@ -1226,7 +1227,8 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, base->running = NULL; } -static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) +static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, + unsigned long flags) { struct hrtimer_clock_base *base; unsigned int active = cpu_base->active_bases; @@ -1257,7 +1259,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) if (basenow < hrtimer_get_softexpires_tv64(timer)) break; - __run_hrtimer(cpu_base, base, timer, &basenow); + __run_hrtimer(cpu_base, base, timer, &basenow, flags); } } } @@ -1272,13 +1274,14 @@ void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; + unsigned long flags; int retries = 0; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; dev->next_event = KTIME_MAX; - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); entry_time = now = hrtimer_update_base(cpu_base); retry: cpu_base->in_hrtirq = 1; @@ -1291,7 +1294,7 @@ retry: */ cpu_base->expires_next = KTIME_MAX; - __hrtimer_run_queues(cpu_base, now); + __hrtimer_run_queues(cpu_base, now, flags); /* Reevaluate the clock bases for the next expiry */ expires_next = __hrtimer_get_next_event(cpu_base); @@ -1301,7 +1304,7 @@ retry: */ cpu_base->expires_next = expires_next; cpu_base->in_hrtirq = 0; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); /* Reprogramming necessary ? */ if (!tick_program_event(expires_next, 0)) { @@ -1322,7 +1325,7 @@ retry: * Acquire base lock for updating the offsets and retrieving * the current time. */ - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); cpu_base->nr_retries++; if (++retries < 3) @@ -1335,7 +1338,8 @@ retry: */ cpu_base->nr_hangs++; cpu_base->hang_detected = 1; - raw_spin_unlock(&cpu_base->lock); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + delta = ktime_sub(now, entry_time); if ((unsigned int)delta > cpu_base->max_hang_time) cpu_base->max_hang_time = (unsigned int) delta; @@ -1377,6 +1381,7 @@ static inline void __hrtimer_peek_ahead_timers(void) { } void hrtimer_run_queues(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; ktime_t now; if (__hrtimer_hres_active(cpu_base)) @@ -1394,10 +1399,10 @@ void hrtimer_run_queues(void) return; } - raw_spin_lock(&cpu_base->lock); + raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now); - raw_spin_unlock(&cpu_base->lock); + __hrtimer_run_queues(cpu_base, now, flags); + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } /* -- cgit v1.2.3 From 98ecadd4305d8677ba77162152485798d47dcc85 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:55 +0100 Subject: hrtimer: Add clock bases and hrtimer mode for softirq context Currently hrtimer callback functions are always executed in hard interrupt context. Users of hrtimers, which need their timer function to be executed in soft interrupt context, make use of tasklets to get the proper context. Add additional hrtimer clock bases for timers which must expire in softirq context, so the detour via the tasklet can be avoided. This is also required for RT, where the majority of hrtimer is moved into softirq hrtimer context. The selection of the expiry mode happens via a mode bit. Introduce HRTIMER_MODE_SOFT and the matching combinations with the ABS/REL/PINNED bits and update the decoding of hrtimer_mode in tracepoints. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-27-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 14 ++++++++++++++ include/trace/events/timer.h | 6 +++++- kernel/time/hrtimer.c | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 98ed35767ac5..26ae8a868ea8 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -33,14 +33,24 @@ struct hrtimer_cpu_base; * HRTIMER_MODE_REL - Time value is relative to now * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered * when starting the timer) + * HRTIMER_MODE_SOFT - Timer callback function will be executed in + * soft irq context */ enum hrtimer_mode { HRTIMER_MODE_ABS = 0x00, HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, + HRTIMER_MODE_SOFT = 0x04, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, + + HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, + + HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + }; /* @@ -151,6 +161,10 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_BOOTTIME, HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, HRTIMER_MAX_CLOCK_BASES, }; diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 744b4310b24b..a57e4ee989d6 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -148,7 +148,11 @@ DEFINE_EVENT(timer_class, timer_cancel, { HRTIMER_MODE_ABS, "ABS" }, \ { HRTIMER_MODE_REL, "REL" }, \ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ - { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ + { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ + { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ + { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) /** * hrtimer_init - called when the hrtimer is initialized diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 31ccd86e63c0..e2353f5cdf51 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -92,6 +92,26 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, + { + .index = HRTIMER_BASE_MONOTONIC_SOFT, + .clockid = CLOCK_MONOTONIC, + .get_time = &ktime_get, + }, + { + .index = HRTIMER_BASE_REALTIME_SOFT, + .clockid = CLOCK_REALTIME, + .get_time = &ktime_get_real, + }, + { + .index = HRTIMER_BASE_BOOTTIME_SOFT, + .clockid = CLOCK_BOOTTIME, + .get_time = &ktime_get_boottime, + }, + { + .index = HRTIMER_BASE_TAI_SOFT, + .clockid = CLOCK_TAI, + .get_time = &ktime_get_clocktai, + }, } }; -- cgit v1.2.3 From c458b1d102036eaa2c70e03000c959bd491c2037 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:56 +0100 Subject: hrtimer: Prepare handling of hard and softirq based hrtimers The softirq based hrtimer can utilize most of the existing hrtimers functions, but need to operate on a different data set. Add an 'active_mask' parameter to various functions so the hard and soft bases can be selected. Fixup the existing callers and hand in the ACTIVE_HARD mask. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-28-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e2353f5cdf51..ba4674e9adc2 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -59,6 +59,15 @@ #include "tick-internal.h" +/* + * Masks for selecting the soft and hard context timers from + * cpu_base->active + */ +#define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) +#define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) +#define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) +#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) + /* * The timer bases: * @@ -507,13 +516,24 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, return expires_next; } -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) +/* + * Recomputes cpu_base::*next_timer and returns the earliest expires_next but + * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * + * @active_mask must be one of: + * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_SOFT, or + * - HRTIMER_ACTIVE_HARD. + */ +static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, + unsigned int active_mask) { - unsigned int active = cpu_base->active_bases; + unsigned int active; ktime_t expires_next = KTIME_MAX; cpu_base->next_timer = NULL; + active = cpu_base->active_bases & active_mask; expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); return expires_next; @@ -553,7 +573,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -1074,7 +1094,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1248,10 +1268,10 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, } static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, - unsigned long flags) + unsigned long flags, unsigned int active_mask) { struct hrtimer_clock_base *base; - unsigned int active = cpu_base->active_bases; + unsigned int active = cpu_base->active_bases & active_mask; for_each_active_base(base, cpu_base, active) { struct timerqueue_node *node; @@ -1314,10 +1334,10 @@ retry: */ cpu_base->expires_next = KTIME_MAX; - __hrtimer_run_queues(cpu_base, now, flags); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); /* * Store the new expiry value so the migration code can verify * against it. @@ -1421,7 +1441,7 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); - __hrtimer_run_queues(cpu_base, now, flags); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } -- cgit v1.2.3 From c96f5471ce7d2aefd0dda560cc23f08ab00bc65d Mon Sep 17 00:00:00 2001 From: Josh Snyder Date: Mon, 18 Dec 2017 16:15:10 +0000 Subject: delayacct: Account blkio completion on the correct task Before commit: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler") delayacct_blkio_end() was called after context-switching into the task which completed I/O. This resulted in double counting: the task would account a delay both waiting for I/O and for time spent in the runqueue. With e33a9bba85a8, delayacct_blkio_end() is called by try_to_wake_up(). In ttwu, we have not yet context-switched. This is more correct, in that the delay accounting ends when the I/O is complete. But delayacct_blkio_end() relies on 'get_current()', and we have not yet context-switched into the task whose I/O completed. This results in the wrong task having its delay accounting statistics updated. Instead of doing that, pass the task_struct being woken to delayacct_blkio_end(), so that it can update the statistics of the correct task. Signed-off-by: Josh Snyder Acked-by: Tejun Heo Acked-by: Balbir Singh Cc: Cc: Brendan Gregg Cc: Jens Axboe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-block@vger.kernel.org Fixes: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler") Link: http://lkml.kernel.org/r/1513613712-571-1-git-send-email-joshs@netflix.com Signed-off-by: Ingo Molnar --- include/linux/delayacct.h | 8 ++++---- kernel/delayacct.c | 42 ++++++++++++++++++++++++++---------------- kernel/sched/core.c | 6 +++--- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 4178d2493547..5e335b6203f4 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -71,7 +71,7 @@ extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); -extern void __delayacct_blkio_end(void); +extern void __delayacct_blkio_end(struct task_struct *); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); @@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void) __delayacct_blkio_start(); } -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) { if (current->delays) - __delayacct_blkio_end(); + __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } @@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) {} static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 4a1c33416b6a..e2764d767f18 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumalator (@total) and @count */ -static void delayacct_end(u64 *start, u64 *total, u32 *count) +static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count) { s64 ns = ktime_get_ns() - *start; unsigned long flags; if (ns > 0) { - spin_lock_irqsave(¤t->delays->lock, flags); + spin_lock_irqsave(lock, flags); *total += ns; (*count)++; - spin_unlock_irqrestore(¤t->delays->lock, flags); + spin_unlock_irqrestore(lock, flags); } } @@ -69,17 +69,25 @@ void __delayacct_blkio_start(void) current->delays->blkio_start = ktime_get_ns(); } -void __delayacct_blkio_end(void) +/* + * We cannot rely on the `current` macro, as we haven't yet switched back to + * the process being woken. + */ +void __delayacct_blkio_end(struct task_struct *p) { - if (current->delays->flags & DELAYACCT_PF_SWAPIN) - /* Swapin block I/O */ - delayacct_end(¤t->delays->blkio_start, - ¤t->delays->swapin_delay, - ¤t->delays->swapin_count); - else /* Other block I/O */ - delayacct_end(¤t->delays->blkio_start, - ¤t->delays->blkio_delay, - ¤t->delays->blkio_count); + struct task_delay_info *delays = p->delays; + u64 *total; + u32 *count; + + if (p->delays->flags & DELAYACCT_PF_SWAPIN) { + total = &delays->swapin_delay; + count = &delays->swapin_count; + } else { + total = &delays->blkio_delay; + count = &delays->blkio_count; + } + + delayacct_end(&delays->lock, &delays->blkio_start, total, count); } int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) @@ -153,8 +161,10 @@ void __delayacct_freepages_start(void) void __delayacct_freepages_end(void) { - delayacct_end(¤t->delays->freepages_start, - ¤t->delays->freepages_delay, - ¤t->delays->freepages_count); + delayacct_end( + ¤t->delays->lock, + ¤t->delays->freepages_start, + ¤t->delays->freepages_delay, + ¤t->delays->freepages_count); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 644fa2e3d993..a7bf32aabfda 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) p->state = TASK_WAKING; if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&task_rq(p)->nr_iowait); } @@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) #else /* CONFIG_SMP */ if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&task_rq(p)->nr_iowait); } @@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) if (!task_on_rq_queued(p)) { if (p->in_iowait) { - delayacct_blkio_end(); + delayacct_blkio_end(p); atomic_dec(&rq->nr_iowait); } ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); -- cgit v1.2.3 From 673aa20c55a138621d1340d343cd6b07c1cb4e92 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:39:59 -0600 Subject: x86/platform/UV: Update uv_mmrs.h to prepare for UV4A fixes Regenerate uv_mmrs.h file to accommodate fixes to UV4A MMRs. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-2-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_mmrs.h | 615 +++++++++++++++++++++++++++++++++----- 1 file changed, 533 insertions(+), 82 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 548d684a7960..f113e278ffff 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -3031,6 +3031,41 @@ union uvh_node_present_table_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long v; @@ -3042,6 +3077,46 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3064,6 +3139,41 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long v; @@ -3075,6 +3185,46 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3097,6 +3247,41 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long v; @@ -3108,6 +3293,46 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s4; }; /* ========================================================================= */ @@ -3126,6 +3351,21 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long v; @@ -3134,6 +3374,31 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3152,6 +3417,21 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long v; @@ -3160,6 +3440,31 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3178,6 +3483,21 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long v; @@ -3186,6 +3506,31 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; + struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s4; }; /* ========================================================================= */ @@ -3383,6 +3728,106 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { } s4; }; +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR") +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR) + + + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_mmioh_overlay_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR) + + + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL + + +union uvh_rh_gam_mmioh_overlay_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; + struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s4; +}; + /* ========================================================================= */ /* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ @@ -3437,6 +3882,94 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { } s2; }; +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR) + +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH) + + + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL + + +union uvh_rh_gam_mmioh_redirect_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; + struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR) + +#define UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") +#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH") +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH ( \ + is_uv1_hub() ? UV1H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH) + + + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL + +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 +#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL + + +union uvh_rh_gam_mmioh_redirect_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; + struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s4; +}; + /* ========================================================================= */ /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ @@ -4137,88 +4670,6 @@ union uv3h_gr0_gam_gr_config_u { } s3; }; -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL - -union uv3h_rh_gam_mmioh_overlay_config0_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { - unsigned long rsvd_0_25:26; - unsigned long base:20; /* RW */ - unsigned long m_io:6; /* RW */ - unsigned long n_io:4; - unsigned long rsvd_56_62:7; - unsigned long enable:1; /* RW */ - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL - -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL - -union uv3h_rh_gam_mmioh_overlay_config1_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { - unsigned long rsvd_0_25:26; - unsigned long base:20; /* RW */ - unsigned long m_io:6; /* RW */ - unsigned long n_io:4; - unsigned long rsvd_56_62:7; - unsigned long enable:1; /* RW */ - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL - -union uv3h_rh_gam_mmioh_redirect_config0_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { - unsigned long nasid:15; /* RW */ - unsigned long rsvd_15_63:49; - } s3; -}; - -/* ========================================================================= */ -/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ -/* ========================================================================= */ -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 - -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 -#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL - -union uv3h_rh_gam_mmioh_redirect_config1_mmr_u { - unsigned long v; - struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { - unsigned long nasid:15; /* RW */ - unsigned long rsvd_15_63:49; - } s3; -}; - /* ========================================================================= */ /* UV4H_LB_PROC_INTD_QUEUE_FIRST */ /* ========================================================================= */ -- cgit v1.2.3 From 62807106c3219d2d6ddbfc778a5ee7e6ba38e58f Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:40:00 -0600 Subject: x86/platform/UV: Fix UV4A support on new Intel Processors Upcoming Intel CascadeLake and IceLake processors have some architecture changes that required fixes in the UV4 HUB bringing that chip to revision 2. The nomenclature for that new chip is "UV4A". This patch fixes the references for the expanded MMR definitions in the previous (automated) patch. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-3-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 6de35fc8fb3a..ebb7d264bcac 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -768,6 +768,7 @@ static __init void map_gru_high(int max_pnode) return; } + /* Only UV3 has distributed GRU mode */ if (is_uv3_hub() && gru.s3.mode) { map_gru_distributed(gru.v); return; @@ -817,17 +818,20 @@ static __initdata struct mmioh_config mmiohs[] = { /* UV3 & UV4 have identical MMIOH overlay configs */ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) { - union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; + union uvh_rh_gam_mmioh_overlay_config0_mmr_u overlay; unsigned long mmr; unsigned long base; + unsigned long m_overlay; int i, n, shift, m_io, max_io; int nasid, lnasid, fi, li; char *id; id = mmiohs[index].id; overlay.v = uv_read_local_mmr(mmiohs[index].overlay); + m_overlay = mmiohs[index].overlay; - pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io); + pr_info("UV: %s overlay 0x%lx(@0x%lx) base:0x%x m_io:%d\n", + id, overlay.v, m_overlay, overlay.s3.base, overlay.s3.m_io); if (!overlay.s3.enable) { pr_info("UV: %s disabled\n", id); return; @@ -844,10 +848,14 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) max_io = lnasid = fi = li = -1; for (i = 0; i < n; i++) { - union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; + union uvh_rh_gam_mmioh_redirect_config0_mmr_u redirect; + unsigned long m_redirect = mmr + i * 8; redirect.v = uv_read_local_mmr(mmr + i * 8); nasid = redirect.s3.nasid; + printk_once(KERN_INFO + "UV: %s redirect 0x%lx(@0x%lx) 0x%04x\n", + id, redirect.v, m_redirect, nasid); /* Invalid NASID: */ if (nasid < min_pnode || max_pnode < nasid) nasid = -1; -- cgit v1.2.3 From 8078d1951da228e20dc36f83306845a565f51345 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:40:01 -0600 Subject: x86/platform/UV: Add references to access fixed UV4A HUB MMRs Add references to enable access to fixed UV4A (rev2) HUB MMRs. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-4-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 14 ++++++++++++++ arch/x86/include/asm/uv/uv_mmrs.h | 1 + arch/x86/kernel/apic/x2apic_uv_x.c | 2 ++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 036e26d63d9a..44cf6d6deb7a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -241,6 +241,7 @@ static inline int uv_hub_info_check(int version) #define UV2_HUB_REVISION_BASE 3 #define UV3_HUB_REVISION_BASE 5 #define UV4_HUB_REVISION_BASE 7 +#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ #ifdef UV1_HUB_IS_SUPPORTED static inline int is_uv1_hub(void) @@ -280,6 +281,19 @@ static inline int is_uv3_hub(void) } #endif +/* First test "is UV4A", then "is UV4" */ +#ifdef UV4A_HUB_IS_SUPPORTED +static inline int is_uv4a_hub(void) +{ + return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE); +} +#else +static inline int is_uv4a_hub(void) +{ + return 0; +} +#endif + #ifdef UV4_HUB_IS_SUPPORTED static inline int is_uv4_hub(void) { diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index f113e278ffff..b3afccc2b92e 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -99,6 +99,7 @@ #define UV2_HUB_IS_SUPPORTED 1 #define UV3_HUB_IS_SUPPORTED 1 #define UV4_HUB_IS_SUPPORTED 1 +#define UV4A_HUB_IS_SUPPORTED 1 /* Error function to catch undefined references */ extern unsigned long uv_undefined(char *str); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ebb7d264bcac..2ddc140c23fe 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -137,6 +137,8 @@ static int __init early_get_pnodeid(void) case UV3_HUB_PART_NUMBER_X: uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; break; + + /* Update: UV4A has only a modified revision to indicate HUB fixes */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */ -- cgit v1.2.3 From ecce47e0bde6faa3256740280754bfd06a1a4efa Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:40:02 -0600 Subject: x86/platform/UV: Fix GAM MMR changes in UV4A Intel processor changes necessitated UV4 HUB Global Address Memory (GAM) fixes to accommodate support for those processors. This patch deals with the updated address range change from 46 to 52 bits in UV4A. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-5-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_mmrs.h | 86 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index b3afccc2b92e..30db549885e2 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -3743,7 +3743,6 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR) - #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 @@ -3758,6 +3757,30 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { #define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL #define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK) union uvh_rh_gam_mmioh_overlay_config0_mmr_u { unsigned long v; @@ -3777,6 +3800,14 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s4; + struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; }; /* ========================================================================= */ @@ -3784,8 +3815,8 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { /* ========================================================================= */ #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") -#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ @@ -3793,7 +3824,6 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR) - #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 @@ -3808,6 +3838,24 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { #define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL #define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52 +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL +#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK) + +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK) union uvh_rh_gam_mmioh_overlay_config1_mmr_u { unsigned long v; @@ -3827,6 +3875,14 @@ union uvh_rh_gam_mmioh_overlay_config1_mmr_u { unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s4; + struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:26; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long undef_62:1; /* Undefined */ + unsigned long enable:1; /* RW */ + } s4a; }; /* ========================================================================= */ @@ -3907,13 +3963,18 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH) - #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK) union uvh_rh_gam_mmioh_redirect_config0_mmr_u { unsigned long v; @@ -3925,6 +3986,10 @@ union uvh_rh_gam_mmioh_redirect_config0_mmr_u { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; } s4; + struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; }; /* ========================================================================= */ @@ -3951,13 +4016,18 @@ union uvh_rh_gam_mmioh_redirect_config0_mmr_u { /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH) - #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 #define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 #define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL +#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL + +#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK ( \ + is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ + is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \ + /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK) union uvh_rh_gam_mmioh_redirect_config1_mmr_u { unsigned long v; @@ -3969,6 +4039,10 @@ union uvh_rh_gam_mmioh_redirect_config1_mmr_u { unsigned long nasid:15; /* RW */ unsigned long rsvd_15_63:49; } s4; + struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:12; /* RW */ + unsigned long rsvd_12_63:52; + } s4a; }; /* ========================================================================= */ -- cgit v1.2.3 From 09c3ae12b2bf6dc2837d89c1017bf151af610a1f Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:40:03 -0600 Subject: x86/platform/UV: Fix GAM MMR references in the UV x2apic code Along with the fixes in UV4A (rev2) MMRs, the code to access those MMRs also was modified by the fixes. UV3, UV4, and UV4A no longer have compatible setups for Global Address Memory (GAM). Correct the new mistakes. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-6-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 83 +++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2ddc140c23fe..46b675aaf20b 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -794,70 +794,61 @@ static __init void map_mmr_high(int max_pnode) pr_info("UV: MMR disabled\n"); } -/* - * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY - * and REDIRECT MMR regs are exactly the same on UV3. - */ -struct mmioh_config { - unsigned long overlay; - unsigned long redirect; - char *id; -}; - -static __initdata struct mmioh_config mmiohs[] = { - { - UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR, - UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR, - "MMIOH0" - }, - { - UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR, - UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR, - "MMIOH1" - }, -}; - -/* UV3 & UV4 have identical MMIOH overlay configs */ -static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) +/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */ +static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode) { - union uvh_rh_gam_mmioh_overlay_config0_mmr_u overlay; + unsigned long overlay; unsigned long mmr; unsigned long base; + unsigned long nasid_mask; unsigned long m_overlay; int i, n, shift, m_io, max_io; int nasid, lnasid, fi, li; char *id; - id = mmiohs[index].id; - overlay.v = uv_read_local_mmr(mmiohs[index].overlay); - m_overlay = mmiohs[index].overlay; - - pr_info("UV: %s overlay 0x%lx(@0x%lx) base:0x%x m_io:%d\n", - id, overlay.v, m_overlay, overlay.s3.base, overlay.s3.m_io); - if (!overlay.s3.enable) { + if (index == 0) { + id = "MMIOH0"; + m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR; + overlay = uv_read_local_mmr(m_overlay); + base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK; + mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR; + m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK) + >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT; + shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT; + n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK; + } else { + id = "MMIOH1"; + m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR; + overlay = uv_read_local_mmr(m_overlay); + base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK; + mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR; + m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK) + >> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT; + shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT; + n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH; + nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK; + } + pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io); + if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) { pr_info("UV: %s disabled\n", id); return; } - shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT; - base = (unsigned long)overlay.s3.base; - m_io = overlay.s3.m_io; - mmr = mmiohs[index].redirect; - n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; /* Convert to NASID: */ min_pnode *= 2; max_pnode *= 2; max_io = lnasid = fi = li = -1; for (i = 0; i < n; i++) { - union uvh_rh_gam_mmioh_redirect_config0_mmr_u redirect; unsigned long m_redirect = mmr + i * 8; + unsigned long redirect = uv_read_local_mmr(m_redirect); + + nasid = redirect & nasid_mask; + if (i == 0) + pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n", + id, redirect, m_redirect, nasid); - redirect.v = uv_read_local_mmr(mmr + i * 8); - nasid = redirect.s3.nasid; - printk_once(KERN_INFO - "UV: %s redirect 0x%lx(@0x%lx) 0x%04x\n", - id, redirect.v, m_redirect, nasid); /* Invalid NASID: */ if (nasid < min_pnode || max_pnode < nasid) nasid = -1; @@ -905,8 +896,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode) if (is_uv3_hub() || is_uv4_hub()) { /* Map both MMIOH regions: */ - map_mmioh_high_uv3(0, min_pnode, max_pnode); - map_mmioh_high_uv3(1, min_pnode, max_pnode); + map_mmioh_high_uv34(0, min_pnode, max_pnode); + map_mmioh_high_uv34(1, min_pnode, max_pnode); return; } -- cgit v1.2.3 From a631a0a7a3caf6a9924856f3dcfe256e747f7467 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 8 Jan 2018 13:40:04 -0600 Subject: x86/platform/UV: Fix UV4A BAU MMRs Fixes to accommodate Intel Processor changes for UV4A broadcast assist unit (BAU) MMRs. Signed-off-by: Mike Travis Acked-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440405-20880-7-git-send-email-mike.travis@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_mmrs.h | 59 +++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 30db549885e2..ecb9ddef128f 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -39,9 +39,11 @@ * #define UV2Hxxx b * #define UV3Hxxx c * #define UV4Hxxx d + * #define UV4AHxxx e * #define UVHxxx (is_uv1_hub() ? UV1Hxxx : * (is_uv2_hub() ? UV2Hxxx : * (is_uv3_hub() ? UV3Hxxx : + * (is_uv4a_hub() ? UV4AHxxx : * UV4Hxxx)) * * If the MMR exists on all hub types > 1 but have different addresses, the @@ -49,8 +51,10 @@ * #define UV2Hxxx b * #define UV3Hxxx c * #define UV4Hxxx d + * #define UV4AHxxx e * #define UVHxxx (is_uv2_hub() ? UV2Hxxx : * (is_uv3_hub() ? UV3Hxxx : + * (is_uv4a_hub() ? UV4AHxxx : * UV4Hxxx)) * * union uvh_xxx { @@ -63,6 +67,7 @@ * } s2; * struct uv3h_xxx_s { # Full UV3 definition (*) * } s3; + * (NOTE: No struct uv4ah_xxx_s members exist) * struct uv4h_xxx_s { # Full UV4 definition (*) * } s4; * }; @@ -2780,35 +2785,47 @@ union uvh_lb_bau_sb_activation_status_1_u { /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32) #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL - - -union uvh_lb_bau_sb_descriptor_base_u { - unsigned long v; - struct uvh_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long rsvd_12_48:37; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s; - struct uv4h_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:34; /* RW */ - unsigned long rsvd_46_48:3; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s4; -}; +#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 53 +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL +#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0xffe0000000000000UL + +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT) + +#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK) + +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK ( \ + is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK : \ + /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK) /* ========================================================================= */ /* UVH_NODE_ID */ -- cgit v1.2.3 From 1da2fd61d956a01ead87173a8367e5c664617f7b Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Mon, 8 Jan 2018 13:43:12 -0600 Subject: x86/platform/uv/BAU: Replace hard-coded values with MMR definitions Replaces hard-coded node ID shift for the descriptor base MMR to fix initialization on UV4A while maintaining support for previous architectures. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1515440592-44060-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 1 - arch/x86/platform/uv/tlb_uv.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 7cac79802ad2..7803114aa140 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -48,7 +48,6 @@ #define UV2_NET_ENDPOINT_INTD 0x28 #define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) -#define UV_DESC_PSHIFT 49 #define UV_PAYLOADQ_GNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_BAU_BASENAME "sgi_uv/bau_tunables" diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 8538a6723171..c2e9285d1bf1 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1751,7 +1751,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode) uv1 = 1; /* the 14-bit pnode */ - write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); + write_mmr_descriptor_base(pnode, + (n << UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT | m)); /* * Initializing all 8 (ITEMS_PER_DESC) descriptors for each * cpu even though we only use the first one; one descriptor can -- cgit v1.2.3 From 84362d79f436f12d1d9b9640a633de1b684a2609 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 16 Jan 2018 11:43:51 +0800 Subject: mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1 Add CQHCI initialization and implement CQHCI operations for Arasan SDHCI variant host, namely arasan,sdhci-5.1, which is used by Rockchip RK3399 platform. Signed-off-by: Shawn Lin Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 1 + drivers/mmc/host/sdhci-of-arasan.c | 137 +++++++++++++++++++++++++++++++++++-- 2 files changed, 132 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 945ba50e6e6e..579fc7adf15b 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -133,6 +133,7 @@ config MMC_SDHCI_OF_ARASAN depends on MMC_SDHCI_PLTFM depends on OF depends on COMMON_CLK + select MMC_CQHCI help This selects the Arasan Secure Digital Host Controller Interface (SDHCI). This hardware is found e.g. in Xilinx' Zynq SoC. diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index fb572066a88b..c33a5f7393bd 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -25,11 +25,13 @@ #include #include #include -#include "sdhci-pltfm.h" #include -#define SDHCI_ARASAN_VENDOR_REGISTER 0x78 +#include "cqhci.h" +#include "sdhci-pltfm.h" +#define SDHCI_ARASAN_VENDOR_REGISTER 0x78 +#define SDHCI_ARASAN_CQE_BASE_ADDR 0x200 #define VENDOR_ENHANCED_STROBE BIT(0) #define PHY_CLK_TOO_SLOW_HZ 400000 @@ -90,6 +92,7 @@ struct sdhci_arasan_data { struct phy *phy; bool is_phy_on; + bool has_cqe; struct clk_hw sdcardclk_hw; struct clk *sdcardclk; @@ -290,6 +293,62 @@ static const struct sdhci_pltfm_data sdhci_arasan_pdata = { SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, }; +static u32 sdhci_arasan_cqhci_irq(struct sdhci_host *host, u32 intmask) +{ + int cmd_error = 0; + int data_error = 0; + + if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error)) + return intmask; + + cqhci_irq(host->mmc, intmask, cmd_error, data_error); + + return 0; +} + +static void sdhci_arasan_dumpregs(struct mmc_host *mmc) +{ + sdhci_dumpregs(mmc_priv(mmc)); +} + +static void sdhci_arasan_cqe_enable(struct mmc_host *mmc) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 reg; + + reg = sdhci_readl(host, SDHCI_PRESENT_STATE); + while (reg & SDHCI_DATA_AVAILABLE) { + sdhci_readl(host, SDHCI_BUFFER); + reg = sdhci_readl(host, SDHCI_PRESENT_STATE); + } + + sdhci_cqe_enable(mmc); +} + +static const struct cqhci_host_ops sdhci_arasan_cqhci_ops = { + .enable = sdhci_arasan_cqe_enable, + .disable = sdhci_cqe_disable, + .dumpregs = sdhci_arasan_dumpregs, +}; + +static const struct sdhci_ops sdhci_arasan_cqe_ops = { + .set_clock = sdhci_arasan_set_clock, + .get_max_clock = sdhci_pltfm_clk_get_max_clock, + .get_timeout_clock = sdhci_pltfm_clk_get_max_clock, + .set_bus_width = sdhci_set_bus_width, + .reset = sdhci_arasan_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_arasan_set_power, + .irq = sdhci_arasan_cqhci_irq, +}; + +static const struct sdhci_pltfm_data sdhci_arasan_cqe_pdata = { + .ops = &sdhci_arasan_cqe_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, +}; + #ifdef CONFIG_PM_SLEEP /** * sdhci_arasan_suspend - Suspend method for the driver @@ -309,6 +368,12 @@ static int sdhci_arasan_suspend(struct device *dev) if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); + if (sdhci_arasan->has_cqe) { + ret = cqhci_suspend(host->mmc); + if (ret) + return ret; + } + ret = sdhci_suspend_host(host); if (ret) return ret; @@ -365,7 +430,16 @@ static int sdhci_arasan_resume(struct device *dev) sdhci_arasan->is_phy_on = true; } - return sdhci_resume_host(host); + ret = sdhci_resume_host(host); + if (ret) { + dev_err(dev, "Cannot resume host.\n"); + return ret; + } + + if (sdhci_arasan->has_cqe) + return cqhci_resume(host->mmc); + + return 0; } #endif /* ! CONFIG_PM_SLEEP */ @@ -568,6 +642,49 @@ static void sdhci_arasan_unregister_sdclk(struct device *dev) of_clk_del_provider(dev->of_node); } +static int sdhci_arasan_add_host(struct sdhci_arasan_data *sdhci_arasan) +{ + struct sdhci_host *host = sdhci_arasan->host; + struct cqhci_host *cq_host; + bool dma64; + int ret; + + if (!sdhci_arasan->has_cqe) + return sdhci_add_host(host); + + ret = sdhci_setup_host(host); + if (ret) + return ret; + + cq_host = devm_kzalloc(host->mmc->parent, + sizeof(*cq_host), GFP_KERNEL); + if (!cq_host) { + ret = -ENOMEM; + goto cleanup; + } + + cq_host->mmio = host->ioaddr + SDHCI_ARASAN_CQE_BASE_ADDR; + cq_host->ops = &sdhci_arasan_cqhci_ops; + + dma64 = host->flags & SDHCI_USE_64_BIT_DMA; + if (dma64) + cq_host->caps |= CQHCI_TASK_DESC_SZ_128; + + ret = cqhci_init(cq_host, host->mmc, dma64); + if (ret) + goto cleanup; + + ret = __sdhci_add_host(host); + if (ret) + goto cleanup; + + return 0; + +cleanup: + sdhci_cleanup_host(host); + return ret; +} + static int sdhci_arasan_probe(struct platform_device *pdev) { int ret; @@ -578,9 +695,15 @@ static int sdhci_arasan_probe(struct platform_device *pdev) struct sdhci_pltfm_host *pltfm_host; struct sdhci_arasan_data *sdhci_arasan; struct device_node *np = pdev->dev.of_node; + const struct sdhci_pltfm_data *pdata; + + if (of_device_is_compatible(pdev->dev.of_node, "arasan,sdhci-5.1")) + pdata = &sdhci_arasan_cqe_pdata; + else + pdata = &sdhci_arasan_pdata; + + host = sdhci_pltfm_init(pdev, pdata, sizeof(*sdhci_arasan)); - host = sdhci_pltfm_init(pdev, &sdhci_arasan_pdata, - sizeof(*sdhci_arasan)); if (IS_ERR(host)) return PTR_ERR(host); @@ -675,9 +798,11 @@ static int sdhci_arasan_probe(struct platform_device *pdev) sdhci_arasan_hs400_enhanced_strobe; host->mmc_host_ops.start_signal_voltage_switch = sdhci_arasan_voltage_switch; + sdhci_arasan->has_cqe = true; + host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD; } - ret = sdhci_add_host(host); + ret = sdhci_arasan_add_host(sdhci_arasan); if (ret) goto err_add_host; -- cgit v1.2.3 From 82cea533aed44c1b8553fe782e1bc5e1262bd71a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 08:43:40 +0100 Subject: mtd: onenand: omap2: print resource using %pR format string The omap2 onenand driver is now available for compile-testing, which uncovers a warning in configurations that have a 64-bit resource_size_t: drivers/mtd/onenand/omap2.c: In function 'omap2_onenand_probe': drivers/mtd/onenand/omap2.c:536:54: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'resource_size_t {aka long long unsigned int}' [-Werror=format=] dev_err(dev, "Cannot reserve memory region at 0x%08x, size: 0x%x\n", drivers/mtd/onenand/omap2.c:536:66: error: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t {aka long long unsigned int}' [-Werror=format=] Changing the format string to the special %pR simplifies the code and lets it do the right thing in that configuration, while avoiding the warning. Fixes: a758f50f10cf ("mtd: onenand: omap2: Configure driver from DT") Signed-off-by: Arnd Bergmann Reviewed-by: Sebastian Reichel Reviewed-by: Peter Ujfalusi Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 2ce73fb6da1c..a4a2159bcfb7 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -533,8 +533,7 @@ static int omap2_onenand_probe(struct platform_device *pdev) c->onenand.base = devm_ioremap_resource(dev, res); if (IS_ERR(c->onenand.base)) { - dev_err(dev, "Cannot reserve memory region at 0x%08x, size: 0x%x\n", - res->start, resource_size(res)); + dev_err(dev, "Cannot reserve memory region %pR\n", res); return PTR_ERR(c->onenand.base); } -- cgit v1.2.3 From 5da70160462e80b0ab8a6960cdd0cdd476907523 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:57 +0100 Subject: hrtimer: Implement support for softirq based hrtimers hrtimer callbacks are always invoked in hard interrupt context. Several users in tree require soft interrupt context for their callbacks and achieve this by combining a hrtimer with a tasklet. The hrtimer schedules the tasklet in hard interrupt context and the tasklet callback gets invoked in softirq context later. That's suboptimal and aside of that the real-time patch moves most of the hrtimers into softirq context. So adding native support for hrtimers expiring in softirq context is a valuable extension for both mainline and the RT patch set. Each valid hrtimer clock id has two associated hrtimer clock bases: one for timers expiring in hardirq context and one for timers expiring in softirq context. Implement the functionality to associate a hrtimer with the hard or softirq related clock bases and update the relevant functions to take them into account when the next expiry time needs to be evaluated. Add a check into the hard interrupt context handler functions to check whether the first expiring softirq based timer has expired. If it's expired the softirq is raised and the accounting of softirq based timers to evaluate the next expiry time for programming the timer hardware is skipped until the softirq processing has finished. At the end of the softirq processing the regular processing is resumed. Suggested-by: Thomas Gleixner Suggested-by: Peter Zijlstra Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-29-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 21 ++++-- kernel/time/hrtimer.c | 196 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 188 insertions(+), 29 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 26ae8a868ea8..c7902ca7c9f4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -103,6 +103,7 @@ enum hrtimer_restart { * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative + * @is_soft: Set if hrtimer will be expired in soft interrupt context. * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -113,6 +114,7 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; + u8 is_soft; }; /** @@ -178,13 +180,18 @@ enum hrtimer_base_type { * @hres_active: State of high resolution mode * @in_hrtirq: hrtimer_interrupt() is currently executing * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. * @nr_events: Total number of hrtimer interrupt events * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer * @clock_base: array of clock bases for this cpu * * Note: next_timer is just an optimization for __remove_hrtimer(). @@ -196,9 +203,10 @@ struct hrtimer_cpu_base { unsigned int cpu; unsigned int active_bases; unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; @@ -207,6 +215,8 @@ struct hrtimer_cpu_base { #endif ktime_t expires_next; struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -379,7 +389,8 @@ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * @timer: the timer to be added * @tim: expiry time * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index ba4674e9adc2..d93e3e745592 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -411,7 +411,8 @@ static inline void debug_hrtimer_init(struct hrtimer *timer) debug_object_init(timer, &hrtimer_debug_descr); } -static inline void debug_hrtimer_activate(struct hrtimer *timer) +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { debug_object_activate(timer, &hrtimer_debug_descr); } @@ -444,8 +445,10 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else + static inline void debug_hrtimer_init(struct hrtimer *timer) { } -static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer, + enum hrtimer_mode mode) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif @@ -460,7 +463,7 @@ debug_init(struct hrtimer *timer, clockid_t clockid, static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode) { - debug_hrtimer_activate(timer); + debug_hrtimer_activate(timer, mode); trace_hrtimer_start(timer, mode); } @@ -503,7 +506,10 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, expires = ktime_sub(hrtimer_get_expires(timer), base->offset); if (expires < expires_next) { expires_next = expires; - cpu_base->next_timer = timer; + if (timer->is_soft) + cpu_base->softirq_next_timer = timer; + else + cpu_base->next_timer = timer; } } /* @@ -520,21 +526,39 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, * Recomputes cpu_base::*next_timer and returns the earliest expires_next but * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. * + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, + * those timers will get run whenever the softirq gets handled, at the end of + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. + * + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. + * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. + * * @active_mask must be one of: - * - HRTIMER_ACTIVE, + * - HRTIMER_ACTIVE_ALL, * - HRTIMER_ACTIVE_SOFT, or * - HRTIMER_ACTIVE_HARD. */ -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, - unsigned int active_mask) +static ktime_t +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) { unsigned int active; + struct hrtimer *next_timer = NULL; ktime_t expires_next = KTIME_MAX; - cpu_base->next_timer = NULL; + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; + cpu_base->softirq_next_timer = NULL; + expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); + + next_timer = cpu_base->softirq_next_timer; + } - active = cpu_base->active_bases & active_mask; - expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + if (active_mask & HRTIMER_ACTIVE_HARD) { + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; + cpu_base->next_timer = next_timer; + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); + } return expires_next; } @@ -545,8 +569,14 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; - return ktime_get_update_offsets_now(&base->clock_was_set_seq, + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, offs_boot, offs_tai); + + base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; + base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; + + return now; } /* @@ -573,7 +603,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * Find the current next expiration time. + */ + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + + if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { + /* + * When the softirq is activated, hrtimer has to be + * programmed with the first hard hrtimer because soft + * timer interrupt could occur too late. + */ + if (cpu_base->softirq_activated) + expires_next = __hrtimer_get_next_event(cpu_base, + HRTIMER_ACTIVE_HARD); + else + cpu_base->softirq_expires_next = expires_next; + } if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -700,7 +746,7 @@ static inline void retrigger_next_event(void *arg) { } * * Called with interrupts disabled and base->cpu_base.lock held */ -static void hrtimer_reprogram(struct hrtimer *timer) +static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = timer->base; @@ -708,6 +754,37 @@ static void hrtimer_reprogram(struct hrtimer *timer) WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Set it to 0. + */ + if (expires < 0) + expires = 0; + + if (timer->is_soft) { + /* + * soft hrtimer could be started on a remote CPU. In this + * case softirq_expires_next needs to be updated on the + * remote CPU. The soft hrtimer will not expire before the + * first hard hrtimer on the remote CPU - + * hrtimer_check_target() prevents this case. + */ + struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; + + if (timer_cpu_base->softirq_activated) + return; + + if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) + return; + + timer_cpu_base->softirq_next_timer = timer; + timer_cpu_base->softirq_expires_next = expires; + + if (!ktime_before(expires, timer_cpu_base->expires_next) || + !reprogram) + return; + } + /* * If the timer is not on the current cpu, we cannot reprogram * the other cpus clock event device. @@ -725,13 +802,6 @@ static void hrtimer_reprogram(struct hrtimer *timer) if (cpu_base->in_hrtirq) return; - /* - * CLOCK_REALTIME timer might be requested with an absolute - * expiry time which is less than base->offset. Set it to 0. - */ - if (expires < 0) - expires = 0; - if (expires >= cpu_base->expires_next) return; @@ -957,6 +1027,31 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, return tim; } +static void +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) +{ + ktime_t expires; + + /* + * Find the next SOFT expiration. + */ + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + + /* + * reprogramming needs to be triggered, even if the next soft + * hrtimer expires at the same time than the next hard + * hrtimer. cpu_base->softirq_expires_next needs to be updated! + */ + if (expires == KTIME_MAX) + return; + + /* + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() + * cpu_base->*expires_next is only set by hrtimer_reprogram() + */ + hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); +} + static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) @@ -978,13 +1073,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, return enqueue_hrtimer(timer, new_base, mode); } + /** * hrtimer_start_range_ns - (re)start an hrtimer * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED) + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); + * softirq based mode is considered for debug purpose only! */ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode) @@ -992,10 +1089,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, struct hrtimer_clock_base *base; unsigned long flags; + /* + * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft + * match. + */ + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); + base = lock_hrtimer_base(timer, &flags); if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) - hrtimer_reprogram(timer); + hrtimer_reprogram(timer, true); unlock_hrtimer_base(timer, &flags); } @@ -1094,7 +1197,7 @@ u64 hrtimer_get_next_event(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); if (!__hrtimer_hres_active(cpu_base)) - expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); @@ -1304,6 +1407,23 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, } } +static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + unsigned long flags; + ktime_t now; + + raw_spin_lock_irqsave(&cpu_base->lock, flags); + + now = hrtimer_update_base(cpu_base); + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); + + cpu_base->softirq_activated = 0; + hrtimer_update_softirq_timer(cpu_base, true); + + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1334,10 +1454,16 @@ retry: */ cpu_base->expires_next = KTIME_MAX; + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); /* * Store the new expiry value so the migration code can verify * against it. @@ -1441,6 +1567,13 @@ void hrtimer_run_queues(void) raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); + + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); } @@ -1622,6 +1755,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->cpu = cpu; cpu_base->hres_active = 0; cpu_base->expires_next = KTIME_MAX; + cpu_base->softirq_expires_next = KTIME_MAX; return 0; } @@ -1665,6 +1799,12 @@ int hrtimers_dead_cpu(unsigned int scpu) BUG_ON(cpu_online(scpu)); tick_cancel_sched_timer(scpu); + /* + * this BH disable ensures that raise_softirq_irqoff() does + * not wakeup ksoftirqd (and acquire the pi-lock) while + * holding the cpu_base lock + */ + local_bh_disable(); local_irq_disable(); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); @@ -1680,12 +1820,19 @@ int hrtimers_dead_cpu(unsigned int scpu) &new_base->clock_base[i]); } + /* + * The migration might have changed the first expiring softirq + * timer on this CPU. Update it. + */ + hrtimer_update_softirq_timer(new_base, false); + raw_spin_unlock(&old_base->lock); raw_spin_unlock(&new_base->lock); /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); local_irq_enable(); + local_bh_enable(); return 0; } @@ -1694,6 +1841,7 @@ int hrtimers_dead_cpu(unsigned int scpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } /** -- cgit v1.2.3 From 42f42da41b54c191ae6a775e84a86c100d66c5e8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Thu, 21 Dec 2017 11:41:58 +0100 Subject: hrtimer: Implement SOFT/HARD clock base selection All prerequisites to handle hrtimers for expiry in either hard or soft interrupt context are in place. Add the missing bit in hrtimer_init() which associates the timer to the hard or the softirq clock base. Signed-off-by: Anna-Maria Gleixner Cc: Christoph Hellwig Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-30-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d93e3e745592..3d201582630d 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1220,8 +1220,9 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { + bool softtimer = !!(mode & HRTIMER_MODE_SOFT); + int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; struct hrtimer_cpu_base *cpu_base; - int base; memset(timer, 0, sizeof(struct hrtimer)); @@ -1235,7 +1236,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) clock_id = CLOCK_MONOTONIC; - base = hrtimer_clockid_to_base(clock_id); + base += hrtimer_clockid_to_base(clock_id); + timer->is_soft = softtimer; timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); } @@ -1244,8 +1246,13 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, * hrtimer_init - initialize a timer to the given clock * @timer: the timer to be initialized * @clock_id: the clock to be used - * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or - * relative (HRTIMER_MODE_REL); pinned is not considered here! + * @mode: The modes which are relevant for intitialization: + * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, + * HRTIMER_MODE_REL_SOFT + * + * The PINNED variants of the above can be handed in, + * but the PINNED bit is ignored as pinning happens + * when the hrtimer is started */ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) -- cgit v1.2.3 From b03bbbe08ff04d80136b6aac152954ef308a4909 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:42:03 +0100 Subject: ALSA/dummy: Replace tasklet with softirq hrtimer The tasklet is used to defer the execution of snd_pcm_period_elapsed() to the softirq context. Using the HRTIMER_MODE_SOFT mode invokes the timer callback in softirq context as well which renders the tasklet useless. [o-takashi: avoid stall due to a call of hrtimer_cancel() on a callback of hrtimer] Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Reviewed-by: Takashi Iwai Cc: Christoph Hellwig Cc: Jaroslav Kysela Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Takashi Iwai Cc: Takashi Sakamoto Cc: alsa-devel@alsa-project.org Cc: keescook@chromium.org Link: http://lkml.kernel.org/r/20171221104205.7269-35-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- sound/drivers/dummy.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 7b2b1f766b00..6ad2ff57833d 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -375,17 +375,9 @@ struct dummy_hrtimer_pcm { ktime_t period_time; atomic_t running; struct hrtimer timer; - struct tasklet_struct tasklet; struct snd_pcm_substream *substream; }; -static void dummy_hrtimer_pcm_elapsed(unsigned long priv) -{ - struct dummy_hrtimer_pcm *dpcm = (struct dummy_hrtimer_pcm *)priv; - if (atomic_read(&dpcm->running)) - snd_pcm_period_elapsed(dpcm->substream); -} - static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) { struct dummy_hrtimer_pcm *dpcm; @@ -393,7 +385,14 @@ static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) dpcm = container_of(timer, struct dummy_hrtimer_pcm, timer); if (!atomic_read(&dpcm->running)) return HRTIMER_NORESTART; - tasklet_schedule(&dpcm->tasklet); + /* + * In cases of XRUN and draining, this calls .trigger to stop PCM + * substream. + */ + snd_pcm_period_elapsed(dpcm->substream); + if (!atomic_read(&dpcm->running)) + return HRTIMER_NORESTART; + hrtimer_forward_now(timer, dpcm->period_time); return HRTIMER_RESTART; } @@ -403,7 +402,7 @@ static int dummy_hrtimer_start(struct snd_pcm_substream *substream) struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; dpcm->base_time = hrtimer_cb_get_time(&dpcm->timer); - hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL); + hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL_SOFT); atomic_set(&dpcm->running, 1); return 0; } @@ -413,14 +412,14 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream) struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; atomic_set(&dpcm->running, 0); - hrtimer_cancel(&dpcm->timer); + if (!hrtimer_callback_running(&dpcm->timer)) + hrtimer_cancel(&dpcm->timer); return 0; } static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) { hrtimer_cancel(&dpcm->timer); - tasklet_kill(&dpcm->tasklet); } static snd_pcm_uframes_t @@ -465,12 +464,10 @@ static int dummy_hrtimer_create(struct snd_pcm_substream *substream) if (!dpcm) return -ENOMEM; substream->runtime->private_data = dpcm; - hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dpcm->timer.function = dummy_hrtimer_callback; dpcm->substream = substream; atomic_set(&dpcm->running, 0); - tasklet_init(&dpcm->tasklet, dummy_hrtimer_pcm_elapsed, - (unsigned long)dpcm); return 0; } -- cgit v1.2.3 From b1a31a5f5f27ff8aba42b545a1c721941f735107 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Dec 2017 11:42:04 +0100 Subject: usb/gadget/NCM: Replace tasklet with softirq hrtimer The tx_tasklet tasklet is used in invoke the hrtimer (task_timer) in softirq context. This can be also achieved without the tasklet but with HRTIMER_MODE_SOFT as hrtimer mode. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Gleixner Acked-by: Felipe Balbi Cc: Christoph Hellwig Cc: Felipe Balbi Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Cc: linux-usb@vger.kernel.org Link: http://lkml.kernel.org/r/20171221104205.7269-36-anna-maria@linutronix.de Signed-off-by: Ingo Molnar --- drivers/usb/gadget/function/f_ncm.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index c5bce8e22983..5780fba620ab 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -73,9 +73,7 @@ struct f_ncm { struct sk_buff *skb_tx_ndp; u16 ndp_dgram_count; bool timer_force_tx; - struct tasklet_struct tx_tasklet; struct hrtimer task_timer; - bool timer_stopping; }; @@ -1104,7 +1102,7 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port, /* Delay the timer. */ hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS, - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_SOFT); /* Add the datagram position entries */ ntb_ndp = skb_put_zero(ncm->skb_tx_ndp, dgram_idx_len); @@ -1148,17 +1146,15 @@ err: } /* - * This transmits the NTB if there are frames waiting. + * The transmit should only be run if no skb data has been sent + * for a certain duration. */ -static void ncm_tx_tasklet(unsigned long data) +static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) { - struct f_ncm *ncm = (void *)data; - - if (ncm->timer_stopping) - return; + struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); /* Only send if data is available. */ - if (ncm->skb_tx_data) { + if (!ncm->timer_stopping && ncm->skb_tx_data) { ncm->timer_force_tx = true; /* XXX This allowance of a NULL skb argument to ndo_start_xmit @@ -1171,16 +1167,6 @@ static void ncm_tx_tasklet(unsigned long data) ncm->timer_force_tx = false; } -} - -/* - * The transmit should only be run if no skb data has been sent - * for a certain duration. - */ -static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) -{ - struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); - tasklet_schedule(&ncm->tx_tasklet); return HRTIMER_NORESTART; } @@ -1513,8 +1499,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) ncm->port.open = ncm_open; ncm->port.close = ncm_close; - tasklet_init(&ncm->tx_tasklet, ncm_tx_tasklet, (unsigned long) ncm); - hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); ncm->task_timer.function = ncm_tx_timeout; DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n", @@ -1623,7 +1608,6 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) DBG(c->cdev, "ncm unbind\n"); hrtimer_cancel(&ncm->task_timer); - tasklet_kill(&ncm->tx_tasklet); ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); -- cgit v1.2.3 From 3d3dd0d3ac207e8d28f6289896b99c1c0dad2fbe Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jan 2018 01:59:01 +0000 Subject: ASoC: tlv320dac33: fix regression by adding back .read/.write commit c4305af43a8 ("ASoC: use internal reg_cache on tlv320dac33") removed .read/.write from driver, but it might breaks non-regmap driver, because ALSA SoC framework might call it. To fix this regression, this patch back .read/.write Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320dac33.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sound/soc/codecs/tlv320dac33.c b/sound/soc/codecs/tlv320dac33.c index 675f5b1b90a6..8c71d2f876ff 100644 --- a/sound/soc/codecs/tlv320dac33.c +++ b/sound/soc/codecs/tlv320dac33.c @@ -246,6 +246,19 @@ static int dac33_write(struct snd_soc_codec *codec, unsigned int reg, return ret; } +static int dac33_write_locked(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + struct tlv320dac33_priv *dac33 = snd_soc_codec_get_drvdata(codec); + int ret; + + mutex_lock(&dac33->mutex); + ret = dac33_write(codec, reg, value); + mutex_unlock(&dac33->mutex); + + return ret; +} + #define DAC33_I2C_ADDR_AUTOINC 0x80 static int dac33_write16(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) @@ -1422,6 +1435,8 @@ static int dac33_soc_remove(struct snd_soc_codec *codec) } static const struct snd_soc_codec_driver soc_codec_dev_tlv320dac33 = { + .read = dac33_read_reg_cache, + .write = dac33_write_locked, .set_bias_level = dac33_set_bias_level, .idle_bias_off = true, -- cgit v1.2.3 From 7604d8068e6253c143b27ce7e164ee8e7a9da5b9 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jan 2018 01:59:53 +0000 Subject: ASoC: uda1380: fix regression by adding back .read/.write commit c001bf633a9 ("ASoC: use internal reg_cache on uda1380") removed .read/.write from driver, but it might breaks non-regmap driver, because ALSA SoC framework might call it. To fix this regression, this patch back .read/.write Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/uda1380.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 46a495b4da8d..c73e6a192224 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -726,6 +726,8 @@ static int uda1380_probe(struct snd_soc_codec *codec) static const struct snd_soc_codec_driver soc_codec_dev_uda1380 = { .probe = uda1380_probe, + .read = uda1380_read_reg_cache, + .write = uda1380_write, .set_bias_level = uda1380_set_bias_level, .suspend_bias_off = true, -- cgit v1.2.3 From 3c89724e994f4aee6ae2637ccd4e12aa4f92666c Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jan 2018 02:00:18 +0000 Subject: ASoC: cx20442: fix regression by adding back .read/.write commit 39b5a0f80c07f ("ASoC: cx20442: don't use reg_cache") removed .read/.write from driver, but it might breaks non-regmap driver, because ALSA SoC framework might call it. To fix this regression, this patch back .read/.write. and also this patch uses cx20442 internal reg_cache which is needed for .read/.write. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/codecs/cx20442.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index 6b6f8e44369b..95bb10ba80dc 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -28,6 +28,7 @@ struct cx20442_priv { struct tty_struct *tty; struct regulator *por; + u8 reg_cache; }; #define CX20442_PM 0x0 @@ -88,6 +89,17 @@ static const struct snd_soc_dapm_route cx20442_audio_map[] = { {"ADC", NULL, "Input Mixer"}, }; +static unsigned int cx20442_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + struct cx20442_priv *cx20442 = snd_soc_codec_get_drvdata(codec); + + if (reg >= 1) + return -EINVAL; + + return cx20442->reg_cache; +} + enum v253_vls { V253_VLS_NONE = 0, V253_VLS_T, @@ -112,8 +124,6 @@ enum v253_vls { V253_VLS_TEST, }; -#if 0 -/* FIXME : these function will be re-used */ static int cx20442_pm_to_v253_vls(u8 value) { switch (value & ~(1 << CX20442_AGC)) { @@ -147,11 +157,10 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) { struct cx20442_priv *cx20442 = snd_soc_codec_get_drvdata(codec); - u8 *reg_cache = codec->reg_cache; int vls, vsp, old, len; char buf[18]; - if (reg >= codec->driver->reg_cache_size) + if (reg >= 1) return -EINVAL; /* tty and write pointers required for talking to the modem @@ -159,8 +168,8 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, if (!cx20442->tty || !cx20442->tty->ops->write) return -EIO; - old = reg_cache[reg]; - reg_cache[reg] = value; + old = cx20442->reg_cache; + cx20442->reg_cache = value; vls = cx20442_pm_to_v253_vls(value); if (vls < 0) @@ -190,7 +199,6 @@ static int cx20442_write(struct snd_soc_codec *codec, unsigned int reg, return 0; } -#endif /* * Line discpline related code @@ -384,12 +392,12 @@ static int cx20442_codec_remove(struct snd_soc_codec *codec) return 0; } -static const u8 cx20442_reg; - static const struct snd_soc_codec_driver cx20442_codec_dev = { .probe = cx20442_codec_probe, .remove = cx20442_codec_remove, .set_bias_level = cx20442_set_bias_level, + .read = cx20442_read_reg_cache, + .write = cx20442_write, .component_driver = { .dapm_widgets = cx20442_dapm_widgets, -- cgit v1.2.3 From d0ff8ba57d9654e6f7a2e18f192bac3b93268fef Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jan 2018 02:00:59 +0000 Subject: ASoC: add Component level .read/.write In current ALSA SoC, Codec only has .read/.write callback. Codec will be merged into Component in next generation ALSA SoC, thus current Codec specific feature need to be merged into it. This is glue patch for it. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/soc.h | 3 +++ sound/soc/soc-io.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/include/sound/soc.h b/include/sound/soc.h index 1a7323238c49..6e865c2bcffe 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -802,6 +802,9 @@ struct snd_soc_component_driver { int (*suspend)(struct snd_soc_component *); int (*resume)(struct snd_soc_component *); + unsigned int (*read)(struct snd_soc_component *, unsigned int); + int (*write)(struct snd_soc_component *, unsigned int, unsigned int); + /* pcm creation and destruction */ int (*pcm_new)(struct snd_soc_pcm_runtime *); void (*pcm_free)(struct snd_pcm *); diff --git a/sound/soc/soc-io.c b/sound/soc/soc-io.c index 20340ade20a7..2bc1c4c17896 100644 --- a/sound/soc/soc-io.c +++ b/sound/soc/soc-io.c @@ -34,6 +34,10 @@ int snd_soc_component_read(struct snd_soc_component *component, ret = regmap_read(component->regmap, reg, val); else if (component->read) ret = component->read(component, reg, val); + else if (component->driver->read) { + *val = component->driver->read(component, reg); + ret = 0; + } else ret = -EIO; @@ -70,6 +74,8 @@ int snd_soc_component_write(struct snd_soc_component *component, return regmap_write(component->regmap, reg, val); else if (component->write) return component->write(component, reg, val); + else if (component->driver->write) + return component->driver->write(component, reg, val); else return -EIO; } -- cgit v1.2.3 From 3c7f69195cdd2a14ab85dfb32805e866eb241a6e Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 16 Jan 2018 13:26:26 +0000 Subject: ALSA: pcm: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index f08772568c17..484a18d96371 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3446,7 +3446,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_lib_default_mmap); int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area) { - struct snd_pcm_runtime *runtime = substream->runtime;; + struct snd_pcm_runtime *runtime = substream->runtime; area->vm_page_prot = pgprot_noncached(area->vm_page_prot); return vm_iomap_memory(area, runtime->dma_addr, runtime->dma_bytes); -- cgit v1.2.3 From d020fc8e5089dd6c60b1638030e0046dffa0fdbc Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Jan 2018 09:50:33 +0100 Subject: mtd: mtdpart: Make ECC stat handling consistent part_read() and part_read_oob() were counting ECC failures and bitflips differently. Adjust part_read_oob() to mimic what is done in part_read(). This is needed to use ->_read_oob() as a fallback when when ->_read() is not implemented. Note that bitflips and ECC failure accounting on MTD partitions is broken by design, because nothing prevents concurrent accesses to the underlying master MTD device between the moment we save the stats in a local variable and the moment master->_read[_oob]() returns. It's not something that can easily be fixed, so leave it like that for now. Suggested-by: Brian Norris Signed-off-by: Boris Brezillon Tested-by: Ladislav Michl --- drivers/mtd/mtdpart.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index be088bccd593..79bf1f61c7a0 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -105,6 +105,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_part *part = mtd_to_part(mtd); + struct mtd_ecc_stats stats; int res; if (from >= mtd->size) @@ -126,13 +127,14 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, return -EINVAL; } + stats = part->parent->ecc_stats; res = part->parent->_read_oob(part->parent, from + part->offset, ops); - if (unlikely(res)) { - if (mtd_is_bitflip(res)) - mtd->ecc_stats.corrected++; - if (mtd_is_eccerr(res)) - mtd->ecc_stats.failed++; - } + if (unlikely(mtd_is_eccerr(res))) + mtd->ecc_stats.failed += + part->parent->ecc_stats.failed - stats.failed; + else + mtd->ecc_stats.corrected += + part->parent->ecc_stats.corrected - stats.corrected; return res; } -- cgit v1.2.3 From 24ff12922278573b1e4c54b4898ab7a3c64be960 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Jan 2018 09:50:34 +0100 Subject: mtd: Fallback to ->_read/write_oob() when ->_read/write() is missing Some MTD sublayers/drivers are implementing ->_read/write_oob() and provide dummy wrappers for their ->_read/write() implementations. Let the core handle this case instead of duplicating the logic. Signed-off-by: Boris Brezillon Acked-by: Robert Jarzmik Acked-by: Brian Norris Reviewed-by: Miquel Raynal Tested-by: Ladislav Michl --- drivers/mtd/devices/docg3.c | 65 -------------------------------------- drivers/mtd/mtdcore.c | 31 ++++++++++++++++-- drivers/mtd/mtdpart.c | 6 ++-- drivers/mtd/nand/nand_base.c | 56 -------------------------------- drivers/mtd/onenand/onenand_base.c | 63 ------------------------------------ 5 files changed, 33 insertions(+), 188 deletions(-) diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 0806f72102c0..5fb5e93d1547 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -990,36 +990,6 @@ err_in_read: goto out; } -/** - * doc_read - Read bytes from flash - * @mtd: the device - * @from: the offset from first block and first page, in bytes, aligned on page - * size - * @len: the number of bytes to read (must be a multiple of 4) - * @retlen: the number of bytes actually read - * @buf: the filled in buffer - * - * Reads flash memory pages. This function does not read the OOB chunk, but only - * the page data. - * - * Returns 0 if read successful, of -EIO, -EINVAL if an error occurred - */ -static int doc_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, u_char *buf) -{ - struct mtd_oob_ops ops; - size_t ret; - - memset(&ops, 0, sizeof(ops)); - ops.datbuf = buf; - ops.len = len; - ops.mode = MTD_OPS_AUTO_OOB; - - ret = doc_read_oob(mtd, from, &ops); - *retlen = ops.retlen; - return ret; -} - static int doc_reload_bbt(struct docg3 *docg3) { int block = DOC_LAYOUT_BLOCK_BBT; @@ -1513,39 +1483,6 @@ static int doc_write_oob(struct mtd_info *mtd, loff_t ofs, return ret; } -/** - * doc_write - Write a buffer to the chip - * @mtd: the device - * @to: the offset from first block and first page, in bytes, aligned on page - * size - * @len: the number of bytes to write (must be a full page size, ie. 512) - * @retlen: the number of bytes actually written (0 or 512) - * @buf: the buffer to get bytes from - * - * Writes data to the chip. - * - * Returns 0 if write successful, -EIO if write error - */ -static int doc_write(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const u_char *buf) -{ - struct docg3 *docg3 = mtd->priv; - int ret; - struct mtd_oob_ops ops; - - doc_dbg("doc_write(to=%lld, len=%zu)\n", to, len); - ops.datbuf = (char *)buf; - ops.len = len; - ops.mode = MTD_OPS_PLACE_OOB; - ops.oobbuf = NULL; - ops.ooblen = 0; - ops.ooboffs = 0; - - ret = doc_write_oob(mtd, to, &ops); - *retlen = ops.retlen; - return ret; -} - static struct docg3 *sysfs_dev2docg3(struct device *dev, struct device_attribute *attr) { @@ -1866,8 +1803,6 @@ static int __init doc_set_driver_info(int chip_id, struct mtd_info *mtd) mtd->writebufsize = mtd->writesize = DOC_LAYOUT_PAGE_SIZE; mtd->oobsize = DOC_LAYOUT_OOB_SIZE; mtd->_erase = doc_erase; - mtd->_read = doc_read; - mtd->_write = doc_write; mtd->_read_oob = doc_read_oob; mtd->_write_oob = doc_write_oob; mtd->_block_isbad = doc_block_isbad; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 642c35dde686..d7ab091b36b2 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -1058,7 +1058,20 @@ int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, * representing the maximum number of bitflips that were corrected on * any one ecc region (if applicable; zero otherwise). */ - ret_code = mtd->_read(mtd, from, len, retlen, buf); + if (mtd->_read) { + ret_code = mtd->_read(mtd, from, len, retlen, buf); + } else if (mtd->_read_oob) { + struct mtd_oob_ops ops = { + .len = len, + .datbuf = buf, + }; + + ret_code = mtd->_read_oob(mtd, from, &ops); + *retlen = ops.retlen; + } else { + return -ENOTSUPP; + } + if (unlikely(ret_code < 0)) return ret_code; if (mtd->ecc_strength == 0) @@ -1073,11 +1086,25 @@ int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, *retlen = 0; if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; - if (!mtd->_write || !(mtd->flags & MTD_WRITEABLE)) + if ((!mtd->_write && !mtd->_write_oob) || + !(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!len) return 0; ledtrig_mtd_activity(); + + if (!mtd->_write) { + struct mtd_oob_ops ops = { + .len = len, + .datbuf = (u8 *)buf, + }; + int ret; + + ret = mtd->_write_oob(mtd, to, &ops); + *retlen = ops.retlen; + return ret; + } + return mtd->_write(mtd, to, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_write); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 79bf1f61c7a0..dd28cb0de2c8 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -437,8 +437,10 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent, parent->dev.parent; slave->mtd.dev.of_node = part->of_node; - slave->mtd._read = part_read; - slave->mtd._write = part_write; + if (parent->_read) + slave->mtd._read = part_read; + if (parent->_write) + slave->mtd._write = part_write; if (parent->_panic_write) slave->mtd._panic_write = part_panic_write; diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 6135d007a068..889ceadbf607 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2026,33 +2026,6 @@ read_retry: return max_bitflips; } -/** - * nand_read - [MTD Interface] MTD compatibility function for nand_do_read_ecc - * @mtd: MTD device structure - * @from: offset to read from - * @len: number of bytes to read - * @retlen: pointer to variable to store the number of read bytes - * @buf: the databuffer to put data - * - * Get hold of the chip and call nand_do_read. - */ -static int nand_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, uint8_t *buf) -{ - struct mtd_oob_ops ops; - int ret; - - nand_get_device(mtd, FL_READING); - memset(&ops, 0, sizeof(ops)); - ops.len = len; - ops.datbuf = buf; - ops.mode = MTD_OPS_PLACE_OOB; - ret = nand_do_read_ops(mtd, from, &ops); - *retlen = ops.retlen; - nand_release_device(mtd); - return ret; -} - /** * nand_read_oob_std - [REPLACEABLE] the most common OOB data read function * @mtd: mtd info structure @@ -2821,33 +2794,6 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len, return ret; } -/** - * nand_write - [MTD Interface] NAND write with ECC - * @mtd: MTD device structure - * @to: offset to write to - * @len: number of bytes to write - * @retlen: pointer to variable to store the number of written bytes - * @buf: the data to write - * - * NAND write with ECC. - */ -static int nand_write(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const uint8_t *buf) -{ - struct mtd_oob_ops ops; - int ret; - - nand_get_device(mtd, FL_WRITING); - memset(&ops, 0, sizeof(ops)); - ops.len = len; - ops.datbuf = (uint8_t *)buf; - ops.mode = MTD_OPS_PLACE_OOB; - ret = nand_do_write_ops(mtd, to, &ops); - *retlen = ops.retlen; - nand_release_device(mtd); - return ret; -} - /** * nand_do_write_oob - [MTD Interface] NAND write out-of-band * @mtd: MTD device structure @@ -4917,8 +4863,6 @@ int nand_scan_tail(struct mtd_info *mtd) mtd->_erase = nand_erase; mtd->_point = NULL; mtd->_unpoint = NULL; - mtd->_read = nand_read; - mtd->_write = nand_write; mtd->_panic_write = panic_nand_write; mtd->_read_oob = nand_read_oob; mtd->_write_oob = nand_write_oob; diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 1a6d0e367b89..050ba8a87543 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -1447,38 +1447,6 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, return 0; } -/** - * onenand_read - [MTD Interface] Read data from flash - * @param mtd MTD device structure - * @param from offset to read from - * @param len number of bytes to read - * @param retlen pointer to variable to store the number of read bytes - * @param buf the databuffer to put data - * - * Read with ecc -*/ -static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len, - size_t *retlen, u_char *buf) -{ - struct onenand_chip *this = mtd->priv; - struct mtd_oob_ops ops = { - .len = len, - .ooblen = 0, - .datbuf = buf, - .oobbuf = NULL, - }; - int ret; - - onenand_get_device(mtd, FL_READING); - ret = ONENAND_IS_4KB_PAGE(this) ? - onenand_mlc_read_ops_nolock(mtd, from, &ops) : - onenand_read_ops_nolock(mtd, from, &ops); - onenand_release_device(mtd); - - *retlen = ops.retlen; - return ret; -} - /** * onenand_read_oob - [MTD Interface] Read main and/or out-of-band * @param mtd: MTD device structure @@ -2128,35 +2096,6 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, return ret; } -/** - * onenand_write - [MTD Interface] write buffer to FLASH - * @param mtd MTD device structure - * @param to offset to write to - * @param len number of bytes to write - * @param retlen pointer to variable to store the number of written bytes - * @param buf the data to write - * - * Write with ECC - */ -static int onenand_write(struct mtd_info *mtd, loff_t to, size_t len, - size_t *retlen, const u_char *buf) -{ - struct mtd_oob_ops ops = { - .len = len, - .ooblen = 0, - .datbuf = (u_char *) buf, - .oobbuf = NULL, - }; - int ret; - - onenand_get_device(mtd, FL_WRITING); - ret = onenand_write_ops_nolock(mtd, to, &ops); - onenand_release_device(mtd); - - *retlen = ops.retlen; - return ret; -} - /** * onenand_write_oob - [MTD Interface] NAND write data and/or out-of-band * @param mtd: MTD device structure @@ -4038,8 +3977,6 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) mtd->_erase = onenand_erase; mtd->_point = NULL; mtd->_unpoint = NULL; - mtd->_read = onenand_read; - mtd->_write = onenand_write; mtd->_read_oob = onenand_read_oob; mtd->_write_oob = onenand_write_oob; mtd->_panic_write = onenand_panic_write; -- cgit v1.2.3 From 0aede42e98e0dfc64534617332b6a120cfcfe850 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 9 Jan 2018 09:50:35 +0100 Subject: mtd: Remove duplicate checks on mtd_oob_ops parameter Some of the check done in custom ->_read/write_oob() implementation are already done by the core (in mtd_check_oob_ops()). Suggested-by: Peter Pan [Remove redundant checks done in mtdpart.c] Signed-off-by: Boris Brezillon Tested-by: Ladislav Michl --- drivers/mtd/devices/docg3.c | 5 ----- drivers/mtd/mtdpart.c | 23 ------------------- drivers/mtd/nand/nand_base.c | 45 -------------------------------------- drivers/mtd/onenand/onenand_base.c | 18 --------------- 4 files changed, 91 deletions(-) diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 5fb5e93d1547..a85af236b44d 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -904,9 +904,6 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from, if (ooblen % DOC_LAYOUT_OOB_SIZE) return -EINVAL; - if (from + len > mtd->size) - return -EINVAL; - ops->oobretlen = 0; ops->retlen = 0; ret = 0; @@ -1441,8 +1438,6 @@ static int doc_write_oob(struct mtd_info *mtd, loff_t ofs, if (len && ooblen && (len / DOC_LAYOUT_PAGE_SIZE) != (ooblen / oobdelta)) return -EINVAL; - if (ofs + len > mtd->size) - return -EINVAL; ops->oobretlen = 0; ops->retlen = 0; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index dd28cb0de2c8..76cd21d1171b 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -108,25 +108,6 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_ecc_stats stats; int res; - if (from >= mtd->size) - return -EINVAL; - if (ops->datbuf && from + ops->len > mtd->size) - return -EINVAL; - - /* - * If OOB is also requested, make sure that we do not read past the end - * of this partition. - */ - if (ops->oobbuf) { - size_t len, pages; - - len = mtd_oobavail(mtd, ops); - pages = mtd_div_by_ws(mtd->size, mtd); - pages -= mtd_div_by_ws(from, mtd); - if (ops->ooboffs + ops->ooblen > pages * len) - return -EINVAL; - } - stats = part->parent->ecc_stats; res = part->parent->_read_oob(part->parent, from + part->offset, ops); if (unlikely(mtd_is_eccerr(res))) @@ -191,10 +172,6 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to, { struct mtd_part *part = mtd_to_part(mtd); - if (to >= mtd->size) - return -EINVAL; - if (ops->datbuf && to + ops->len > mtd->size) - return -EINVAL; return part->parent->_write_oob(part->parent, to + part->offset, ops); } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 889ceadbf607..e7ec55b1d368 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2187,21 +2187,6 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from, len = mtd_oobavail(mtd, ops); - if (unlikely(ops->ooboffs >= len)) { - pr_debug("%s: attempt to start read outside oob\n", - __func__); - return -EINVAL; - } - - /* Do not allow reads past end of device */ - if (unlikely(from >= mtd->size || - ops->ooboffs + readlen > ((mtd->size >> chip->page_shift) - - (from >> chip->page_shift)) * len)) { - pr_debug("%s: attempt to read beyond end of device\n", - __func__); - return -EINVAL; - } - chipnr = (int)(from >> chip->chip_shift); chip->select_chip(mtd, chipnr); @@ -2272,13 +2257,6 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from, ops->retlen = 0; - /* Do not allow reads past end of device */ - if (ops->datbuf && (from + ops->len) > mtd->size) { - pr_debug("%s: attempt to read beyond end of device\n", - __func__); - return -EINVAL; - } - if (ops->mode != MTD_OPS_PLACE_OOB && ops->mode != MTD_OPS_AUTO_OOB && ops->mode != MTD_OPS_RAW) @@ -2820,22 +2798,6 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, return -EINVAL; } - if (unlikely(ops->ooboffs >= len)) { - pr_debug("%s: attempt to start write outside oob\n", - __func__); - return -EINVAL; - } - - /* Do not allow write past end of device */ - if (unlikely(to >= mtd->size || - ops->ooboffs + ops->ooblen > - ((mtd->size >> chip->page_shift) - - (to >> chip->page_shift)) * len)) { - pr_debug("%s: attempt to write beyond end of device\n", - __func__); - return -EINVAL; - } - chipnr = (int)(to >> chip->chip_shift); /* @@ -2891,13 +2853,6 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to, ops->retlen = 0; - /* Do not allow writes past end of device */ - if (ops->datbuf && (to + ops->len) > mtd->size) { - pr_debug("%s: attempt to write beyond end of device\n", - __func__); - return -EINVAL; - } - nand_get_device(mtd, FL_WRITING); switch (ops->mode) { diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 050ba8a87543..979f4031f23c 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -1383,15 +1383,6 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from, return -EINVAL; } - /* Do not allow reads past end of device */ - if (unlikely(from >= mtd->size || - column + len > ((mtd->size >> this->page_shift) - - (from >> this->page_shift)) * oobsize)) { - printk(KERN_ERR "%s: Attempted to read beyond end of device\n", - __func__); - return -EINVAL; - } - stats = mtd->ecc_stats; readcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_READ : ONENAND_CMD_READOOB; @@ -2024,15 +2015,6 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to, return -EINVAL; } - /* Do not allow reads past end of device */ - if (unlikely(to >= mtd->size || - column + len > ((mtd->size >> this->page_shift) - - (to >> this->page_shift)) * oobsize)) { - printk(KERN_ERR "%s: Attempted to write past end of device\n", - __func__); - return -EINVAL; - } - oobbuf = this->oob_buf; oobcmd = ONENAND_IS_4KB_PAGE(this) ? ONENAND_CMD_PROG : ONENAND_CMD_PROGOOB; -- cgit v1.2.3 From d8a243af1a68395e07ac85384a2740d4134c67f4 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Mon, 15 Jan 2018 16:31:19 +0100 Subject: can: peak: fix potential bug in packet fragmentation In some rare conditions when running one PEAK USB-FD interface over a non high-speed USB controller, one useless USB fragment might be sent. This patch fixes the way a USB command is fragmented when its length is greater than 64 bytes and when the underlying USB controller is not a high-speed one. Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 7ccdc3e30c98..53d6bb045e9e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) void *cmd_head = pcan_usb_fd_cmd_buffer(dev); int err = 0; u8 *packet_ptr; - int i, n = 1, packet_len; + int packet_len; ptrdiff_t cmd_len; /* usb device unregistered? */ @@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr = cmd_head; + packet_len = cmd_len; /* firmware is not able to re-assemble 512 bytes buffer in full-speed */ - if ((dev->udev->speed != USB_SPEED_HIGH) && - (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) { - packet_len = PCAN_UFD_LOSPD_PKT_SIZE; - n += cmd_len / packet_len; - } else { - packet_len = cmd_len; - } + if (unlikely(dev->udev->speed != USB_SPEED_HIGH)) + packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE); - for (i = 0; i < n; i++) { + do { err = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, PCAN_USBPRO_EP_CMDOUT), @@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail) } packet_ptr += packet_len; - } + cmd_len -= packet_len; + + if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE) + packet_len = cmd_len; + + } while (packet_len > 0); return err; } -- cgit v1.2.3 From 955999c9023290da18230b57df1f04187a43a4c0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 27 Nov 2017 09:02:22 +0100 Subject: m68k/defconfig: Update defconfigs for v4.15-rc1 Signed-off-by: Geert Uytterhoeven --- arch/m68k/configs/amiga_defconfig | 4 +++- arch/m68k/configs/apollo_defconfig | 4 +++- arch/m68k/configs/atari_defconfig | 4 +++- arch/m68k/configs/bvme6000_defconfig | 4 +++- arch/m68k/configs/hp300_defconfig | 4 +++- arch/m68k/configs/mac_defconfig | 4 +++- arch/m68k/configs/multi_defconfig | 4 +++- arch/m68k/configs/mvme147_defconfig | 4 +++- arch/m68k/configs/mvme16x_defconfig | 4 +++- arch/m68k/configs/q40_defconfig | 4 +++- arch/m68k/configs/sun3_defconfig | 4 +++- arch/m68k/configs/sun3x_defconfig | 4 +++- 12 files changed, 36 insertions(+), 12 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 5b5fa9831b4d..e0b285e1e75f 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -454,7 +454,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -595,6 +594,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -624,6 +624,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -653,3 +654,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 72a7764b74ed..3281026a3e15 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -422,7 +422,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -554,6 +553,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -583,6 +583,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -612,3 +613,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 884b43a2f0d9..e943fad480cf 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -437,7 +437,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_ATARI=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -576,6 +575,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -605,6 +605,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -634,3 +635,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index fcfa60d31499..700c2310c336 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -420,7 +420,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -546,6 +545,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -575,6 +575,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -604,3 +605,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 9d597bbbbbfe..271d57fa4301 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -425,7 +425,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -556,6 +555,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -585,6 +585,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -614,3 +615,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 45da20d1286c..88761b867975 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -447,7 +447,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y @@ -578,6 +577,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -607,6 +607,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -636,3 +637,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index fda880c10861..7cb35dadf03b 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -504,7 +504,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FB_CIRRUS=y CONFIG_FB_AMIGA=y @@ -658,6 +657,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -687,6 +687,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -716,3 +717,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7d5e4863efec..b139d7b68393 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -420,7 +420,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -546,6 +545,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -575,6 +575,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -604,3 +605,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7763b71a9c49..398346138769 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -420,7 +420,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -546,6 +545,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -575,6 +575,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -604,3 +605,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 17eaebfa3e19..14c608326f6d 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -437,7 +437,6 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -569,6 +568,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -598,6 +598,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -627,3 +628,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index d1cb7a04ae1d..97dec0bf52f1 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -419,7 +419,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -548,6 +547,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -576,6 +576,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -605,3 +606,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index ea3a331c62d5..56df28d6d91d 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -419,7 +419,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -# CONFIG_RC_CORE is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -548,6 +547,7 @@ CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_HASH=m CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m +CONFIG_TEST_FIND_BIT=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_SYSCTL=m CONFIG_TEST_UDELAY=m @@ -577,6 +577,7 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m +CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -606,3 +607,4 @@ CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m +CONFIG_STRING_SELFTEST=m -- cgit v1.2.3 From 2f828fb21df42058084b16d5e07cecdc30dbc3a5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Avoid array underflow and overflow Check array indices. Avoid sprintf. Use buffers of sufficient size. Use appropriate types for array length parameters. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 29 +++++++++++++++++------------ drivers/nubus/proc.c | 12 ++++++------ include/linux/nubus.h | 10 ++++------ 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index b793727cd4f7..b6c97e07f15e 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -161,7 +161,7 @@ static unsigned char *nubus_dirptr(const struct nubus_dirent *nd) pointed to with offsets) out of the card ROM. */ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, - int len) + unsigned int len) { unsigned char *t = (unsigned char *)dest; unsigned char *p = nubus_dirptr(dirent); @@ -173,18 +173,22 @@ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, } EXPORT_SYMBOL(nubus_get_rsrc_mem); -void nubus_get_rsrc_str(void *dest, const struct nubus_dirent *dirent, - int len) +void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, + unsigned int len) { - unsigned char *t = (unsigned char *)dest; + char *t = dest; unsigned char *p = nubus_dirptr(dirent); - while (len) { - *t = nubus_get_rom(&p, 1, dirent->mask); - if (!*t++) + while (len > 1) { + unsigned char c = nubus_get_rom(&p, 1, dirent->mask); + + if (!c) break; + *t++ = c; len--; } + if (len > 0) + *t = '\0'; } EXPORT_SYMBOL(nubus_get_rsrc_str); @@ -468,7 +472,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, } case NUBUS_RESID_NAME: { - nubus_get_rsrc_str(dev->name, &ent, 64); + nubus_get_rsrc_str(dev->name, &ent, sizeof(dev->name)); pr_info(" name: %s\n", dev->name); break; } @@ -528,7 +532,7 @@ static int __init nubus_get_vidnames(struct nubus_board *board, /* Don't know what this is yet */ u16 id; /* Longest one I've seen so far is 26 characters */ - char name[32]; + char name[36]; }; pr_info(" video modes supported:\n"); @@ -598,8 +602,8 @@ static int __init nubus_get_vendorinfo(struct nubus_board *board, char name[64]; /* These are all strings, we think */ - nubus_get_rsrc_str(name, &ent, 64); - if (ent.type > 5) + nubus_get_rsrc_str(name, &ent, sizeof(name)); + if (ent.type < 1 || ent.type > 5) ent.type = 5; pr_info(" %s: %s\n", vendor_fields[ent.type - 1], name); } @@ -633,7 +637,8 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, break; } case NUBUS_RESID_NAME: - nubus_get_rsrc_str(board->name, &ent, 64); + nubus_get_rsrc_str(board->name, &ent, + sizeof(board->name)); pr_info(" name: %s\n", board->name); break; case NUBUS_RESID_ICON: diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 004a122ac0ff..fc20dbcd3b9a 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -73,10 +73,10 @@ static void nubus_proc_subdir(struct nubus_dev* dev, /* Some of these are directories, others aren't */ while (nubus_readdir(dir, &ent) != -1) { - char name[8]; + char name[9]; struct proc_dir_entry* e; - sprintf(name, "%x", ent.type); + snprintf(name, sizeof(name), "%x", ent.type); e = proc_create(name, S_IFREG | S_IRUGO | S_IWUSR, parent, &nubus_proc_subdir_fops); if (!e) @@ -95,11 +95,11 @@ static void nubus_proc_populate(struct nubus_dev* dev, /* We know these are all directories (board resource + one or more functional resources) */ while (nubus_readdir(root, &ent) != -1) { - char name[8]; + char name[9]; struct proc_dir_entry* e; struct nubus_dir dir; - sprintf(name, "%x", ent.type); + snprintf(name, sizeof(name), "%x", ent.type); e = proc_mkdir(name, parent); if (!e) return; @@ -119,7 +119,7 @@ int nubus_proc_attach_device(struct nubus_dev *dev) { struct proc_dir_entry *e; struct nubus_dir root; - char name[8]; + char name[9]; if (dev == NULL) { printk(KERN_ERR @@ -135,7 +135,7 @@ int nubus_proc_attach_device(struct nubus_dev *dev) } /* Create a directory */ - sprintf(name, "%x", dev->board->slot); + snprintf(name, sizeof(name), "%x", dev->board->slot); e = dev->procdir = proc_mkdir(name, proc_bus_nubus_dir); if (!e) return -ENOMEM; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 11ce6b1117a8..d8d63370a28c 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -126,10 +126,8 @@ int nubus_rewinddir(struct nubus_dir* dir); /* Things to do with directory entries */ int nubus_get_subdir(const struct nubus_dirent* ent, struct nubus_dir* dir); -void nubus_get_rsrc_mem(void* dest, - const struct nubus_dirent *dirent, - int len); -void nubus_get_rsrc_str(void* dest, - const struct nubus_dirent *dirent, - int maxlen); +void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, + unsigned int len); +void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, + unsigned int maxlen); #endif /* LINUX_NUBUS_H */ -- cgit v1.2.3 From 1ff2775a32ef105d9bdbb5f00f20293244a2accc Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Fix up header split Due to the '#ifdef __KERNEL__' being located in the wrong place, some definitions from the kernel API were placed in the UAPI header during the scripted header split. Fix this. Also, remove the duplicate comment which is only relevant to the UAPI header. Fixes: 607ca46e97a1 ("UAPI: (Scripted) Disintegrate include/linux") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- include/linux/nubus.h | 27 +++++++++++++++++++++++---- include/uapi/linux/nubus.h | 23 ----------------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/include/linux/nubus.h b/include/linux/nubus.h index d8d63370a28c..55b9a4569a69 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -5,16 +5,28 @@ Originally written by Alan Cox. Hacked to death by C. Scott Ananian and David Huggins-Daines. - - Some of the constants in here are from the corresponding - NetBSD/OpenBSD header file, by Allen Briggs. We figured out the - rest of them on our own. */ +*/ + #ifndef LINUX_NUBUS_H #define LINUX_NUBUS_H #include #include +struct nubus_dir { + unsigned char *base; + unsigned char *ptr; + int done; + int mask; +}; + +struct nubus_dirent { + unsigned char *base; + unsigned char type; + __u32 data; /* Actually 24 bits used */ + int mask; +}; + struct nubus_board { struct nubus_board* next; struct nubus_dev* first_dev; @@ -130,4 +142,11 @@ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, unsigned int len); void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, unsigned int maxlen); + +/* Returns a pointer to the "standard" slot space. */ +static inline void *nubus_slot_addr(int slot) +{ + return (void *)(0xF0000000 | (slot << 24)); +} + #endif /* LINUX_NUBUS_H */ diff --git a/include/uapi/linux/nubus.h b/include/uapi/linux/nubus.h index f3776cc80f4d..48031e7858f1 100644 --- a/include/uapi/linux/nubus.h +++ b/include/uapi/linux/nubus.h @@ -221,27 +221,4 @@ enum nubus_display_res_id { NUBUS_RESID_SIXTHMODE = 0x0085 }; -struct nubus_dir -{ - unsigned char *base; - unsigned char *ptr; - int done; - int mask; -}; - -struct nubus_dirent -{ - unsigned char *base; - unsigned char type; - __u32 data; /* Actually 24bits used */ - int mask; -}; - - -/* We'd like to get rid of this eventually. Only daynaport.c uses it now. */ -static inline void *nubus_slot_addr(int slot) -{ - return (void *)(0xF0000000|(slot<<24)); -} - #endif /* _UAPILINUX_NUBUS_H */ -- cgit v1.2.3 From 460cf95e8b6cda2823a6432253ae91d3e1e7a021 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Use static functions where possible This fixes a couple of warnings from 'make W=1': drivers/nubus/nubus.c:790: warning: no previous prototype for 'nubus_probe_slot' drivers/nubus/nubus.c:824: warning: no previous prototype for 'nubus_scan_bus' Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 4 ++-- include/linux/nubus.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index b6c97e07f15e..35056cee94b1 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -793,7 +793,7 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) return board; } -void __init nubus_probe_slot(int slot) +static void __init nubus_probe_slot(int slot) { unsigned char dp; unsigned char *rp; @@ -827,7 +827,7 @@ void __init nubus_probe_slot(int slot) } } -void __init nubus_scan_bus(void) +static void __init nubus_scan_bus(void) { int slot; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 55b9a4569a69..e525669f1991 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -92,7 +92,6 @@ extern struct nubus_dev* nubus_devices; extern struct nubus_board* nubus_boards; /* Generic NuBus interface functions, modelled after the PCI interface */ -void nubus_scan_bus(void); #ifdef CONFIG_PROC_FS extern void nubus_proc_init(void); #else -- cgit v1.2.3 From f53bad0881a35c45d89bd8c552dc0498b96403c5 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Fix log spam Testing shows that a single Radius PrecisionColor 24X display board, which has 95 functional resources, produces over a thousand lines of log messages. Suppress these messages with pr_debug(). Remove some redundant messages relating to nubus_get_subdir() calls. Fix the format block debug messages as the sequence of entries is backwards (my bad). Move the "scanning slots" message to its proper location. Fixes: 71ae40e4cf33 ("nubus: Clean up printk calls") Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 129 ++++++++++++++++++++++---------------------------- 1 file changed, 56 insertions(+), 73 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 35056cee94b1..ef3a115920ca 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -353,15 +353,15 @@ static int __init nubus_show_display_resource(struct nubus_dev *dev, { switch (ent->type) { case NUBUS_RESID_GAMMADIR: - pr_info(" gamma directory offset: 0x%06x\n", ent->data); + pr_debug(" gamma directory offset: 0x%06x\n", ent->data); break; case 0x0080 ... 0x0085: - pr_info(" mode %02X info offset: 0x%06x\n", - ent->type, ent->data); + pr_debug(" mode 0x%02x info offset: 0x%06x\n", + ent->type, ent->data); break; default: - pr_info(" unknown resource %02X, data 0x%06x\n", - ent->type, ent->data); + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent->type, ent->data); } return 0; } @@ -375,12 +375,12 @@ static int __init nubus_show_network_resource(struct nubus_dev *dev, char addr[6]; nubus_get_rsrc_mem(addr, ent, 6); - pr_info(" MAC address: %pM\n", addr); + pr_debug(" MAC address: %pM\n", addr); break; } default: - pr_info(" unknown resource %02X, data 0x%06x\n", - ent->type, ent->data); + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent->type, ent->data); } return 0; } @@ -394,8 +394,8 @@ static int __init nubus_show_cpu_resource(struct nubus_dev *dev, unsigned long meminfo[2]; nubus_get_rsrc_mem(&meminfo, ent, 8); - pr_info(" memory: [ 0x%08lx 0x%08lx ]\n", - meminfo[0], meminfo[1]); + pr_debug(" memory: [ 0x%08lx 0x%08lx ]\n", + meminfo[0], meminfo[1]); break; } case NUBUS_RESID_ROMINFO: @@ -403,13 +403,13 @@ static int __init nubus_show_cpu_resource(struct nubus_dev *dev, unsigned long rominfo[2]; nubus_get_rsrc_mem(&rominfo, ent, 8); - pr_info(" ROM: [ 0x%08lx 0x%08lx ]\n", - rominfo[0], rominfo[1]); + pr_debug(" ROM: [ 0x%08lx 0x%08lx ]\n", + rominfo[0], rominfo[1]); break; } default: - pr_info(" unknown resource %02X, data 0x%06x\n", - ent->type, ent->data); + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent->type, ent->data); } return 0; } @@ -428,8 +428,8 @@ static int __init nubus_show_private_resource(struct nubus_dev *dev, nubus_show_cpu_resource(dev, ent); break; default: - pr_info(" unknown resource %02X, data 0x%06x\n", - ent->type, ent->data); + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent->type, ent->data); } return 0; } @@ -442,12 +442,9 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, struct nubus_dirent ent; struct nubus_dev *dev; - pr_info(" Function 0x%02x:\n", parent->type); + pr_debug(" Functional resource 0x%02x:\n", parent->type); nubus_get_subdir(parent, &dir); - pr_debug("%s: parent is 0x%p, dir is 0x%p\n", - __func__, parent->base, dir.base); - /* Actually we should probably panic if this fails */ if ((dev = kzalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) return NULL; @@ -466,14 +463,14 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, dev->type = nbtdata[1]; dev->dr_sw = nbtdata[2]; dev->dr_hw = nbtdata[3]; - pr_info(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", - nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); + pr_debug(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", + nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); break; } case NUBUS_RESID_NAME: { nubus_get_rsrc_str(dev->name, &ent, sizeof(dev->name)); - pr_info(" name: %s\n", dev->name); + pr_debug(" name: %s\n", dev->name); break; } case NUBUS_RESID_DRVRDIR: @@ -486,7 +483,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, nubus_get_subdir(&ent, &drvr_dir); nubus_readdir(&drvr_dir, &drvr_ent); dev->driver = nubus_dirptr(&drvr_ent); - pr_info(" driver at: 0x%p\n", dev->driver); + pr_debug(" driver at: 0x%p\n", dev->driver); break; } case NUBUS_RESID_MINOR_BASEOS: @@ -494,20 +491,20 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, multiple framebuffers. It might be handy for Ethernet as well */ nubus_get_rsrc_mem(&dev->iobase, &ent, 4); - pr_info(" memory offset: 0x%08lx\n", dev->iobase); + pr_debug(" memory offset: 0x%08lx\n", dev->iobase); break; case NUBUS_RESID_MINOR_LENGTH: /* Ditto */ nubus_get_rsrc_mem(&dev->iosize, &ent, 4); - pr_info(" memory length: 0x%08lx\n", dev->iosize); + pr_debug(" memory length: 0x%08lx\n", dev->iosize); break; case NUBUS_RESID_FLAGS: dev->flags = ent.data; - pr_info(" flags: 0x%06x\n", dev->flags); + pr_debug(" flags: 0x%06x\n", dev->flags); break; case NUBUS_RESID_HWDEVID: dev->hwdevid = ent.data; - pr_info(" hwdevid: 0x%06x\n", dev->hwdevid); + pr_debug(" hwdevid: 0x%06x\n", dev->hwdevid); break; default: /* Local/Private resources have their own @@ -535,10 +532,8 @@ static int __init nubus_get_vidnames(struct nubus_board *board, char name[36]; }; - pr_info(" video modes supported:\n"); + pr_debug(" video modes supported:\n"); nubus_get_subdir(parent, &dir); - pr_debug("%s: parent is 0x%p, dir is 0x%p\n", - __func__, parent->base, dir.base); while (nubus_readdir(&dir, &ent) != -1) { struct vidmode mode; @@ -552,7 +547,7 @@ static int __init nubus_get_vidnames(struct nubus_board *board, size = sizeof(mode) - 1; memset(&mode, 0, sizeof(mode)); nubus_get_rsrc_mem(&mode, &ent, size); - pr_info(" %02X: (%02X) %s\n", ent.type, + pr_debug(" 0x%02x: 0x%04x %s\n", ent.type, mode.id, mode.name); } return 0; @@ -563,25 +558,16 @@ static int __init nubus_get_icon(struct nubus_board *board, const struct nubus_dirent *ent) { /* Should be 32x32 if my memory serves me correctly */ - unsigned char icon[128]; - int x, y; + u32 icon[32]; + int i; nubus_get_rsrc_mem(&icon, ent, 128); - pr_info(" icon:\n"); - - /* We should actually plot these somewhere in the framebuffer - init. This is just to demonstrate that they do, in fact, - exist */ - for (y = 0; y < 32; y++) { - pr_info(" "); - for (x = 0; x < 32; x++) { - if (icon[y * 4 + x / 8] & (0x80 >> (x % 8))) - pr_cont("*"); - else - pr_cont(" "); - } - pr_cont("\n"); - } + pr_debug(" icon:\n"); + for (i = 0; i < 8; i++) + pr_debug(" %08x %08x %08x %08x\n", + icon[i * 4 + 0], icon[i * 4 + 1], + icon[i * 4 + 2], icon[i * 4 + 3]); + return 0; } @@ -593,10 +579,8 @@ static int __init nubus_get_vendorinfo(struct nubus_board *board, static char *vendor_fields[6] = { "ID", "serial", "revision", "part", "date", "unknown field" }; - pr_info(" vendor info:\n"); + pr_debug(" vendor info:\n"); nubus_get_subdir(parent, &dir); - pr_debug("%s: parent is 0x%p, dir is 0x%p\n", - __func__, parent->base, dir.base); while (nubus_readdir(&dir, &ent) != -1) { char name[64]; @@ -605,7 +589,7 @@ static int __init nubus_get_vendorinfo(struct nubus_board *board, nubus_get_rsrc_str(name, &ent, sizeof(name)); if (ent.type < 1 || ent.type > 5) ent.type = 5; - pr_info(" %s: %s\n", vendor_fields[ent.type - 1], name); + pr_debug(" %s: %s\n", vendor_fields[ent.type - 1], name); } return 0; } @@ -616,9 +600,8 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, struct nubus_dir dir; struct nubus_dirent ent; + pr_debug(" Board resource 0x%02x:\n", parent->type); nubus_get_subdir(parent, &dir); - pr_debug("%s: parent is 0x%p, dir is 0x%p\n", - __func__, parent->base, dir.base); while (nubus_readdir(&dir, &ent) != -1) { switch (ent.type) { @@ -629,8 +612,8 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, useful except insofar as it tells us that we really are looking at a board resource. */ nubus_get_rsrc_mem(nbtdata, &ent, 8); - pr_info(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", - nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); + pr_debug(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", + nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); if (nbtdata[0] != 1 || nbtdata[1] != 0 || nbtdata[2] != 0 || nbtdata[3] != 0) pr_err("this sResource is not a board resource!\n"); @@ -639,28 +622,29 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, case NUBUS_RESID_NAME: nubus_get_rsrc_str(board->name, &ent, sizeof(board->name)); - pr_info(" name: %s\n", board->name); + pr_debug(" name: %s\n", board->name); break; case NUBUS_RESID_ICON: nubus_get_icon(board, &ent); break; case NUBUS_RESID_BOARDID: - pr_info(" board id: 0x%x\n", ent.data); + pr_debug(" board id: 0x%x\n", ent.data); break; case NUBUS_RESID_PRIMARYINIT: - pr_info(" primary init offset: 0x%06x\n", ent.data); + pr_debug(" primary init offset: 0x%06x\n", ent.data); break; case NUBUS_RESID_VENDORINFO: nubus_get_vendorinfo(board, &ent); break; case NUBUS_RESID_FLAGS: - pr_info(" flags: 0x%06x\n", ent.data); + pr_debug(" flags: 0x%06x\n", ent.data); break; case NUBUS_RESID_HWDEVID: - pr_info(" hwdevid: 0x%06x\n", ent.data); + pr_debug(" hwdevid: 0x%06x\n", ent.data); break; case NUBUS_RESID_SECONDINIT: - pr_info(" secondary init offset: 0x%06x\n", ent.data); + pr_debug(" secondary init offset: 0x%06x\n", + ent.data); break; /* WTF isn't this in the functional resources? */ case NUBUS_RESID_VIDNAMES: @@ -668,12 +652,12 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, break; /* Same goes for this */ case NUBUS_RESID_VIDMODES: - pr_info(" video mode parameter directory offset: 0x%06x\n", - ent.data); + pr_debug(" video mode parameter directory offset: 0x%06x\n", + ent.data); break; default: - pr_info(" unknown resource %02X, data 0x%06x\n", - ent.type, ent.data); + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent.type, ent.data); } } return 0; @@ -700,14 +684,14 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) /* Dump the format block for debugging purposes */ pr_debug("Slot %X, format block at 0x%p:\n", slot, rp); + pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); + pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); + pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); pr_debug("%02lx\n", nubus_get_rom(&rp, 1, bytelanes)); pr_debug("%02lx\n", nubus_get_rom(&rp, 1, bytelanes)); pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); pr_debug("%02lx\n", nubus_get_rom(&rp, 1, bytelanes)); pr_debug("%02lx\n", nubus_get_rom(&rp, 1, bytelanes)); - pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); - pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); - pr_debug("%08lx\n", nubus_get_rom(&rp, 4, bytelanes)); rp = board->fblock; board->slot = slot; @@ -747,7 +731,7 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) nubus_get_root_dir(board, &dir); /* We're ready to rock */ - pr_info("Slot %X:\n", slot); + pr_debug("Slot %X resources:\n", slot); /* Each slot should have one board resource and any number of functional resources. So we'll fill in some fields in the @@ -759,7 +743,6 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) pr_err("Board resource not found!\n"); return NULL; } else { - pr_info(" Board resource:\n"); nubus_get_board_resource(board, slot, &ent); } @@ -831,6 +814,7 @@ static void __init nubus_scan_bus(void) { int slot; + pr_info("NuBus: Scanning NuBus slots.\n"); for (slot = 9; slot < 15; slot++) { nubus_probe_slot(slot); } @@ -841,7 +825,6 @@ static int __init nubus_init(void) if (!MACH_IS_MAC) return 0; - pr_info("NuBus: Scanning NuBus slots.\n"); nubus_devices = NULL; nubus_boards = NULL; nubus_scan_bus(); -- cgit v1.2.3 From d7811a3678eac540a43d9fd3dda7867280844836 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Validate slot resource IDs While we are here, include the slot number in the related error messages. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index ef3a115920ca..e7c7e49a074a 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -616,7 +616,8 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); if (nbtdata[0] != 1 || nbtdata[1] != 0 || nbtdata[2] != 0 || nbtdata[3] != 0) - pr_err("this sResource is not a board resource!\n"); + pr_err("Slot %X: sResource is not a board resource!\n", + slot); break; } case NUBUS_RESID_NAME: @@ -672,6 +673,7 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) unsigned long dpat; struct nubus_dir dir; struct nubus_dirent ent; + int prev_resid = -1; /* Move to the start of the format block */ rp = nubus_rom_addr(slot); @@ -711,10 +713,10 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) /* Directory offset should be small and negative... */ if (!(board->doffset & 0x00FF0000)) - pr_warn("Dodgy doffset!\n"); + pr_warn("Slot %X: Dodgy doffset!\n", slot); dpat = nubus_get_rom(&rp, 4, bytelanes); if (dpat != NUBUS_TEST_PATTERN) - pr_warn("Wrong test pattern %08lx!\n", dpat); + pr_warn("Slot %X: Wrong test pattern %08lx!\n", slot, dpat); /* * I wonder how the CRC is meant to work - @@ -740,12 +742,15 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) for each of them. */ if (nubus_readdir(&dir, &ent) == -1) { /* We can't have this! */ - pr_err("Board resource not found!\n"); + pr_err("Slot %X: Board resource not found!\n", slot); return NULL; - } else { - nubus_get_board_resource(board, slot, &ent); } + if (ent.type < 1 || ent.type > 127) + pr_warn("Slot %X: Board resource ID is invalid!\n", slot); + + nubus_get_board_resource(board, slot, &ent); + while (nubus_readdir(&dir, &ent) != -1) { struct nubus_dev *dev; struct nubus_dev **devp; @@ -754,6 +759,15 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) if (dev == NULL) continue; + /* Resources should appear in ascending ID order. This sanity + * check prevents duplicate resource IDs. + */ + if (dev->resid <= prev_resid) { + kfree(dev); + continue; + } + prev_resid = dev->resid; + /* We zeroed this out above */ if (board->first_dev == NULL) board->first_dev = dev; -- cgit v1.2.3 From 6c8b89ea55c9d53979e2be7977e945edba100359 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Call proc_mkdir() not more than once per slot directory This patch fixes the following WARNING. proc_dir_entry 'nubus/a' already registered Modules linked in: CPU: 0 PID: 1 Comm: swapper Tainted: G W 4.13.0-00036-gd57552077387 #1 Stack from 01c1bd9c: 01c1bd9c 003c2c8b 01c1bdc0 0001b0fe 00000000 00322f4a 01c43a20 01c43b0c 01c8c420 01c1bde8 0001b1b8 003a4ac3 00000148 000faa26 00000009 00000000 01c1bde0 003a4b6c 01c1bdfc 01c1be20 000faa26 003a4ac3 00000148 003a4b6c 01c43a71 01c8c471 01c10000 00326430 0043d00c 00000005 01c71a00 0020bce0 00322964 01c1be38 000fac04 01c43a20 01c8c420 01c1bee0 01c8c420 01c1be50 000fac4c 01c1bee0 00000000 01c43a20 00000000 01c1bee8 0020bd26 01c1bee0 Call Trace: [<0001b0fe>] __warn+0xae/0xde [<00322f4a>] memcmp+0x0/0x5c [<0001b1b8>] warn_slowpath_fmt+0x2e/0x36 [<000faa26>] proc_register+0xbe/0xd8 [<000faa26>] proc_register+0xbe/0xd8 [<00326430>] sprintf+0x0/0x20 [<0020bce0>] nubus_proc_attach_device+0x0/0x1b8 [<00322964>] strcpy+0x0/0x22 [<000fac04>] proc_mkdir_data+0x64/0x96 [<000fac4c>] proc_mkdir+0x16/0x1c [<0020bd26>] nubus_proc_attach_device+0x46/0x1b8 [<0020bce0>] nubus_proc_attach_device+0x0/0x1b8 [<00322964>] strcpy+0x0/0x22 [<00001ba6>] kernel_pg_dir+0xba6/0x1000 [<004339a2>] proc_bus_nubus_add_devices+0x1a/0x2e [<000faa40>] proc_create_data+0x0/0xf2 [<0003297c>] parse_args+0x0/0x2d4 [<00433a08>] nubus_proc_init+0x52/0x5a [<00433944>] nubus_init+0x0/0x44 [<00433982>] nubus_init+0x3e/0x44 [<000020dc>] do_one_initcall+0x38/0x196 [<000020a4>] do_one_initcall+0x0/0x196 [<0003297c>] parse_args+0x0/0x2d4 [<00322964>] strcpy+0x0/0x22 [<00040004>] __up_read+0xe/0x40 [<004231d4>] repair_env_string+0x0/0x7a [<0042312e>] kernel_init_freeable+0xee/0x194 [<00423146>] kernel_init_freeable+0x106/0x194 [<00433944>] nubus_init+0x0/0x44 [<000a6000>] kfree+0x0/0x156 [<0032768c>] kernel_init+0x0/0xda [<00327698>] kernel_init+0xc/0xda [<0032768c>] kernel_init+0x0/0xda [<00002a90>] ret_from_kernel_thread+0xc/0x14 ---[ end trace 14a6d619908ea253 ]--- ------------[ cut here ]------------ This gets repeated with each additional functional reasource. The problem here is the call to proc_mkdir() when the directory already exists. Each nubus_board gets a directory, such as /proc/bus/nubus/s/ where s is the hex slot number. Therefore, store the 'procdir' pointer in struct nubus_board instead of struct nubus_dev. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/proc.c | 6 +++++- include/linux/nubus.h | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index fc20dbcd3b9a..91211192f36f 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -134,9 +134,13 @@ int nubus_proc_attach_device(struct nubus_dev *dev) return -1; } + if (dev->board->procdir) + return 0; + /* Create a directory */ snprintf(name, sizeof(name), "%x", dev->board->slot); - e = dev->procdir = proc_mkdir(name, proc_bus_nubus_dir); + e = proc_mkdir(name, proc_bus_nubus_dir); + dev->board->procdir = e; if (!e) return -ENOMEM; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index e525669f1991..2245430e1357 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -53,13 +53,14 @@ struct nubus_board { unsigned char rev; unsigned char format; unsigned char lanes; + + /* Directory entry in /proc/bus/nubus */ + struct proc_dir_entry *procdir; }; struct nubus_dev { /* Next link in device list */ struct nubus_dev* next; - /* Directory entry in /proc/bus/nubus */ - struct proc_dir_entry* procdir; /* The functional resource ID of this device */ unsigned char resid; -- cgit v1.2.3 From 9f97977deb22e602f91047a105b961ffb36adc2b Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Remove redundant code Eliminate unused values from struct nubus_dev to save wasted memory (a Radius PrecisionColor 24X card has about 95 functional resources and up to six such cards may be fitted). Also remove redundant static variable initialization, an unreachable !MACH_IS_MAC conditional, the unused nubus_find_device() function, the bogus get_nubus_list() prototype and the pointless card_present temporary variable. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 57 ++++++++++++++++++++------------------------------- drivers/nubus/proc.c | 2 -- include/linux/nubus.h | 17 +-------------- 3 files changed, 23 insertions(+), 53 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index e7c7e49a074a..4ae5c420f13f 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -281,23 +281,6 @@ EXPORT_SYMBOL(nubus_rewinddir); /* Driver interface functions, more or less like in pci.c */ -struct nubus_dev* -nubus_find_device(unsigned short category, unsigned short type, - unsigned short dr_hw, unsigned short dr_sw, - const struct nubus_dev *from) -{ - struct nubus_dev *itor = from ? from->next : nubus_devices; - - while (itor) { - if (itor->category == category && itor->type == type && - itor->dr_hw == dr_hw && itor->dr_sw == dr_sw) - return itor; - itor = itor->next; - } - return NULL; -} -EXPORT_SYMBOL(nubus_find_device); - struct nubus_dev* nubus_find_type(unsigned short category, unsigned short type, const struct nubus_dev *from) @@ -469,8 +452,10 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, } case NUBUS_RESID_NAME: { - nubus_get_rsrc_str(dev->name, &ent, sizeof(dev->name)); - pr_debug(" name: %s\n", dev->name); + char name[64]; + + nubus_get_rsrc_str(name, &ent, sizeof(name)); + pr_debug(" name: %s\n", name); break; } case NUBUS_RESID_DRVRDIR: @@ -479,32 +464,39 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, use this :-) */ struct nubus_dir drvr_dir; struct nubus_dirent drvr_ent; + unsigned char *driver; nubus_get_subdir(&ent, &drvr_dir); nubus_readdir(&drvr_dir, &drvr_ent); - dev->driver = nubus_dirptr(&drvr_ent); - pr_debug(" driver at: 0x%p\n", dev->driver); + driver = nubus_dirptr(&drvr_ent); + pr_debug(" driver at: 0x%p\n", driver); break; } case NUBUS_RESID_MINOR_BASEOS: + { /* We will need this in order to support multiple framebuffers. It might be handy for Ethernet as well */ - nubus_get_rsrc_mem(&dev->iobase, &ent, 4); - pr_debug(" memory offset: 0x%08lx\n", dev->iobase); + u32 base_offset; + + nubus_get_rsrc_mem(&base_offset, &ent, 4); + pr_debug(" memory offset: 0x%08x\n", base_offset); break; + } case NUBUS_RESID_MINOR_LENGTH: + { /* Ditto */ - nubus_get_rsrc_mem(&dev->iosize, &ent, 4); - pr_debug(" memory length: 0x%08lx\n", dev->iosize); + u32 length; + + nubus_get_rsrc_mem(&length, &ent, 4); + pr_debug(" memory length: 0x%08x\n", length); break; + } case NUBUS_RESID_FLAGS: - dev->flags = ent.data; - pr_debug(" flags: 0x%06x\n", dev->flags); + pr_debug(" flags: 0x%06x\n", ent.data); break; case NUBUS_RESID_HWDEVID: - dev->hwdevid = ent.data; - pr_debug(" hwdevid: 0x%06x\n", dev->hwdevid); + pr_debug(" hwdevid: 0x%06x\n", ent.data); break; default: /* Local/Private resources have their own @@ -798,11 +790,8 @@ static void __init nubus_probe_slot(int slot) rp = nubus_rom_addr(slot); for (i = 4; i; i--) { - int card_present; - rp--; - card_present = hwreg_present(rp); - if (!card_present) + if (!hwreg_present(rp)) continue; dp = *rp; @@ -839,8 +828,6 @@ static int __init nubus_init(void) if (!MACH_IS_MAC) return 0; - nubus_devices = NULL; - nubus_boards = NULL; nubus_scan_bus(); nubus_proc_init(); return 0; diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 91211192f36f..41ec859bdd8b 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -230,8 +230,6 @@ void __init proc_bus_nubus_add_devices(void) void __init nubus_proc_init(void) { proc_create("nubus", 0, NULL, &nubus_proc_fops); - if (!MACH_IS_MAC) - return; proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL); proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops); proc_bus_nubus_add_devices(); diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 2245430e1357..3c7b236074b3 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -70,16 +70,6 @@ struct nubus_dev { unsigned short type; unsigned short dr_sw; unsigned short dr_hw; - /* This is the device's name rather than the board's. - Sometimes they are different. Usually the board name is - more correct. */ - char name[64]; - /* MacOS driver (I kid you not) */ - unsigned char* driver; - /* Actually this is an offset */ - unsigned long iobase; - unsigned long iosize; - unsigned char flags, hwdevid; /* Functional directory */ unsigned char* directory; @@ -98,14 +88,9 @@ extern void nubus_proc_init(void); #else static inline void nubus_proc_init(void) {} #endif -int get_nubus_list(char *buf); + int nubus_proc_attach_device(struct nubus_dev *dev); /* If we need more precision we can add some more of these */ -struct nubus_dev* nubus_find_device(unsigned short category, - unsigned short type, - unsigned short dr_hw, - unsigned short dr_sw, - const struct nubus_dev* from); struct nubus_dev* nubus_find_type(unsigned short category, unsigned short type, const struct nubus_dev* from); -- cgit v1.2.3 From 4bccc4b629de3af24308a24c41aff6270a6404aa Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Clean up whitespace Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- include/linux/nubus.h | 58 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 3c7b236074b3..2d6f04055ebe 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -28,9 +28,9 @@ struct nubus_dirent { }; struct nubus_board { - struct nubus_board* next; - struct nubus_dev* first_dev; - + struct nubus_board *next; + struct nubus_dev *first_dev; + /* Only 9-E actually exist, though 0-8 are also theoretically possible, and 0 is a special case which represents the motherboard and onboard peripherals (Ethernet, video) */ @@ -39,10 +39,10 @@ struct nubus_board { char name[64]; /* Format block */ - unsigned char* fblock; + unsigned char *fblock; /* Root directory (does *not* always equal fblock + doffset!) */ - unsigned char* directory; - + unsigned char *directory; + unsigned long slot_addr; /* Offset to root directory (sometimes) */ unsigned long doffset; @@ -60,7 +60,7 @@ struct nubus_board { struct nubus_dev { /* Next link in device list */ - struct nubus_dev* next; + struct nubus_dev *next; /* The functional resource ID of this device */ unsigned char resid; @@ -70,17 +70,17 @@ struct nubus_dev { unsigned short type; unsigned short dr_sw; unsigned short dr_hw; - + /* Functional directory */ - unsigned char* directory; + unsigned char *directory; /* Much of our info comes from here */ - struct nubus_board* board; + struct nubus_board *board; }; /* This is all NuBus devices (used to find devices later on) */ -extern struct nubus_dev* nubus_devices; +extern struct nubus_dev *nubus_devices; /* This is all NuBus cards */ -extern struct nubus_board* nubus_boards; +extern struct nubus_board *nubus_boards; /* Generic NuBus interface functions, modelled after the PCI interface */ #ifdef CONFIG_PROC_FS @@ -91,38 +91,38 @@ static inline void nubus_proc_init(void) {} int nubus_proc_attach_device(struct nubus_dev *dev); /* If we need more precision we can add some more of these */ -struct nubus_dev* nubus_find_type(unsigned short category, +struct nubus_dev *nubus_find_type(unsigned short category, unsigned short type, - const struct nubus_dev* from); + const struct nubus_dev *from); /* Might have more than one device in a slot, you know... */ -struct nubus_dev* nubus_find_slot(unsigned int slot, - const struct nubus_dev* from); +struct nubus_dev *nubus_find_slot(unsigned int slot, + const struct nubus_dev *from); /* These are somewhat more NuBus-specific. They all return 0 for success and -1 for failure, as you'd expect. */ /* The root directory which contains the board and functional directories */ -int nubus_get_root_dir(const struct nubus_board* board, - struct nubus_dir* dir); +int nubus_get_root_dir(const struct nubus_board *board, + struct nubus_dir *dir); /* The board directory */ -int nubus_get_board_dir(const struct nubus_board* board, - struct nubus_dir* dir); +int nubus_get_board_dir(const struct nubus_board *board, + struct nubus_dir *dir); /* The functional directory */ -int nubus_get_func_dir(const struct nubus_dev* dev, - struct nubus_dir* dir); +int nubus_get_func_dir(const struct nubus_dev *dev, + struct nubus_dir *dir); /* These work on any directory gotten via the above */ -int nubus_readdir(struct nubus_dir* dir, - struct nubus_dirent* ent); -int nubus_find_rsrc(struct nubus_dir* dir, +int nubus_readdir(struct nubus_dir *dir, + struct nubus_dirent *ent); +int nubus_find_rsrc(struct nubus_dir *dir, unsigned char rsrc_type, - struct nubus_dirent* ent); -int nubus_rewinddir(struct nubus_dir* dir); + struct nubus_dirent *ent); +int nubus_rewinddir(struct nubus_dir *dir); /* Things to do with directory entries */ -int nubus_get_subdir(const struct nubus_dirent* ent, - struct nubus_dir* dir); +int nubus_get_subdir(const struct nubus_dirent *ent, + struct nubus_dir *dir); void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, unsigned int len); void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, -- cgit v1.2.3 From 883b8cb31a8546b9921c98b255d5f7779d1bc9f6 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Generalize block resource handling Scrap the specialized code to unpack video mode name resources and driver resources. It isn't useful. Instead, add a re-usable function to handle lists of block resources of any kind, and descend into the video mode table resource directory. Rename callers as nubus_get_foo(), consistent with their purpose and with related functions in the same file. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 123 ++++++++++++++++++++++++++------------------------ 1 file changed, 65 insertions(+), 58 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 4ae5c420f13f..c56ac36d91f2 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -331,16 +331,63 @@ EXPORT_SYMBOL(nubus_find_rsrc); among other things. The rest of it should go in the /proc code. For now, we just use it to give verbose boot logs. */ -static int __init nubus_show_display_resource(struct nubus_dev *dev, - const struct nubus_dirent *ent) +static int __init nubus_get_block_rsrc_dir(struct nubus_board *board, + const struct nubus_dirent *parent) +{ + struct nubus_dir dir; + struct nubus_dirent ent; + + nubus_get_subdir(parent, &dir); + + while (nubus_readdir(&dir, &ent) != -1) { + u32 size; + + nubus_get_rsrc_mem(&size, &ent, 4); + pr_debug(" block (0x%x), size %d\n", ent.type, size); + } + return 0; +} + +static int __init nubus_get_display_vidmode(struct nubus_board *board, + const struct nubus_dirent *parent) +{ + struct nubus_dir dir; + struct nubus_dirent ent; + + nubus_get_subdir(parent, &dir); + + while (nubus_readdir(&dir, &ent) != -1) { + switch (ent.type) { + case 1: /* mVidParams */ + case 2: /* mTable */ + { + u32 size; + + nubus_get_rsrc_mem(&size, &ent, 4); + pr_debug(" block (0x%x), size %d\n", ent.type, + size); + break; + } + default: + pr_debug(" unknown resource 0x%02x, data 0x%06x\n", + ent.type, ent.data); + } + } + return 0; +} + +static int __init nubus_get_display_resource(struct nubus_dev *dev, + const struct nubus_dirent *ent) { switch (ent->type) { case NUBUS_RESID_GAMMADIR: pr_debug(" gamma directory offset: 0x%06x\n", ent->data); + nubus_get_block_rsrc_dir(dev->board, ent); break; case 0x0080 ... 0x0085: pr_debug(" mode 0x%02x info offset: 0x%06x\n", ent->type, ent->data); + nubus_get_display_vidmode(dev->board, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", @@ -349,8 +396,8 @@ static int __init nubus_show_display_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_show_network_resource(struct nubus_dev *dev, - const struct nubus_dirent *ent) +static int __init nubus_get_network_resource(struct nubus_dev *dev, + const struct nubus_dirent *ent) { switch (ent->type) { case NUBUS_RESID_MAC_ADDRESS: @@ -368,8 +415,8 @@ static int __init nubus_show_network_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_show_cpu_resource(struct nubus_dev *dev, - const struct nubus_dirent *ent) +static int __init nubus_get_cpu_resource(struct nubus_dev *dev, + const struct nubus_dirent *ent) { switch (ent->type) { case NUBUS_RESID_MEMINFO: @@ -397,18 +444,18 @@ static int __init nubus_show_cpu_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_show_private_resource(struct nubus_dev *dev, - const struct nubus_dirent *ent) +static int __init nubus_get_private_resource(struct nubus_dev *dev, + const struct nubus_dirent *ent) { switch (dev->category) { case NUBUS_CAT_DISPLAY: - nubus_show_display_resource(dev, ent); + nubus_get_display_resource(dev, ent); break; case NUBUS_CAT_NETWORK: - nubus_show_network_resource(dev, ent); + nubus_get_network_resource(dev, ent); break; case NUBUS_CAT_CPU: - nubus_show_cpu_resource(dev, ent); + nubus_get_cpu_resource(dev, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", @@ -462,14 +509,9 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, { /* MacOS driver. If we were NetBSD we might use this :-) */ - struct nubus_dir drvr_dir; - struct nubus_dirent drvr_ent; - unsigned char *driver; - - nubus_get_subdir(&ent, &drvr_dir); - nubus_readdir(&drvr_dir, &drvr_ent); - driver = nubus_dirptr(&drvr_ent); - pr_debug(" driver at: 0x%p\n", driver); + pr_debug(" driver directory offset: 0x%06x\n", + ent.data); + nubus_get_block_rsrc_dir(board, &ent); break; } case NUBUS_RESID_MINOR_BASEOS: @@ -501,50 +543,13 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, default: /* Local/Private resources have their own function */ - nubus_show_private_resource(dev, &ent); + nubus_get_private_resource(dev, &ent); } } return dev; } -/* This is cool. */ -static int __init nubus_get_vidnames(struct nubus_board *board, - const struct nubus_dirent *parent) -{ - struct nubus_dir dir; - struct nubus_dirent ent; - - /* FIXME: obviously we want to put this in a header file soon */ - struct vidmode { - u32 size; - /* Don't know what this is yet */ - u16 id; - /* Longest one I've seen so far is 26 characters */ - char name[36]; - }; - - pr_debug(" video modes supported:\n"); - nubus_get_subdir(parent, &dir); - - while (nubus_readdir(&dir, &ent) != -1) { - struct vidmode mode; - u32 size; - - /* First get the length */ - nubus_get_rsrc_mem(&size, &ent, 4); - - /* Now clobber the whole thing */ - if (size > sizeof(mode) - 1) - size = sizeof(mode) - 1; - memset(&mode, 0, sizeof(mode)); - nubus_get_rsrc_mem(&mode, &ent, size); - pr_debug(" 0x%02x: 0x%04x %s\n", ent.type, - mode.id, mode.name); - } - return 0; -} - /* This is *really* cool. */ static int __init nubus_get_icon(struct nubus_board *board, const struct nubus_dirent *ent) @@ -641,7 +646,9 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, break; /* WTF isn't this in the functional resources? */ case NUBUS_RESID_VIDNAMES: - nubus_get_vidnames(board, &ent); + pr_debug(" vidnames directory offset: 0x%06x\n", + ent.data); + nubus_get_block_rsrc_dir(board, &ent); break; /* Same goes for this */ case NUBUS_RESID_VIDMODES: -- cgit v1.2.3 From 2f7dd07ecadac6bdc3d55c217d65efa2834ba1cb Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Rework /proc/bus/nubus/s/ implementation The /proc/bus/nubus/s/ directory tree for any slot s is missing a lot of information. The struct file_operations methods have long been left unimplemented (hence the familiar compile-time warning, "Need to set some I/O handlers here"). Slot resources have a complex structure which varies depending on board function. The logic for interpreting these ROM data structures is found in nubus.c. Let's not duplicate that logic in proc.c. Create the /proc/bus/nubus/s/ inodes while scanning slot s. During descent through slot resource subdirectories, call the new nubus_proc_add_foo() functions to create the procfs inodes. Also add a new function, nubus_seq_write_rsrc_mem(), to write the contents of a particular slot resource to a given seq_file. This is used by the procfs file_operations methods, to finally give userspace access to slot ROM information, such as the available video modes. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/nubus.c | 114 ++++++++++++++++++++------ drivers/nubus/proc.c | 222 ++++++++++++++++++++++++++++++-------------------- include/linux/nubus.h | 37 ++++++++- 3 files changed, 256 insertions(+), 117 deletions(-) diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index c56ac36d91f2..f05541914c21 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -146,7 +147,7 @@ static inline void *nubus_rom_addr(int slot) return (void *)(0xF1000000 + (slot << 24)); } -static unsigned char *nubus_dirptr(const struct nubus_dirent *nd) +unsigned char *nubus_dirptr(const struct nubus_dirent *nd) { unsigned char *p = nd->base; @@ -173,8 +174,8 @@ void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, } EXPORT_SYMBOL(nubus_get_rsrc_mem); -void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, - unsigned int len) +unsigned int nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, + unsigned int len) { char *t = dest; unsigned char *p = nubus_dirptr(dirent); @@ -189,9 +190,33 @@ void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, } if (len > 0) *t = '\0'; + return t - dest; } EXPORT_SYMBOL(nubus_get_rsrc_str); +void nubus_seq_write_rsrc_mem(struct seq_file *m, + const struct nubus_dirent *dirent, + unsigned int len) +{ + unsigned long buf[32]; + unsigned int buf_size = sizeof(buf); + unsigned char *p = nubus_dirptr(dirent); + + /* If possible, write out full buffers */ + while (len >= buf_size) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(buf); i++) + buf[i] = nubus_get_rom(&p, sizeof(buf[0]), + dirent->mask); + seq_write(m, buf, buf_size); + len -= buf_size; + } + /* If not, write out individual bytes */ + while (len--) + seq_putc(m, nubus_get_rom(&p, 1, dirent->mask)); +} + int nubus_get_root_dir(const struct nubus_board *board, struct nubus_dir *dir) { @@ -326,35 +351,35 @@ EXPORT_SYMBOL(nubus_find_rsrc); looking at, and print out lots and lots of information from the resource blocks. */ -/* FIXME: A lot of this stuff will eventually be useful after - initialization, for intelligently probing Ethernet and video chips, - among other things. The rest of it should go in the /proc code. - For now, we just use it to give verbose boot logs. */ - static int __init nubus_get_block_rsrc_dir(struct nubus_board *board, + struct proc_dir_entry *procdir, const struct nubus_dirent *parent) { struct nubus_dir dir; struct nubus_dirent ent; nubus_get_subdir(parent, &dir); + dir.procdir = nubus_proc_add_rsrc_dir(procdir, parent, board); while (nubus_readdir(&dir, &ent) != -1) { u32 size; nubus_get_rsrc_mem(&size, &ent, 4); pr_debug(" block (0x%x), size %d\n", ent.type, size); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, size); } return 0; } static int __init nubus_get_display_vidmode(struct nubus_board *board, + struct proc_dir_entry *procdir, const struct nubus_dirent *parent) { struct nubus_dir dir; struct nubus_dirent ent; nubus_get_subdir(parent, &dir); + dir.procdir = nubus_proc_add_rsrc_dir(procdir, parent, board); while (nubus_readdir(&dir, &ent) != -1) { switch (ent.type) { @@ -366,37 +391,42 @@ static int __init nubus_get_display_vidmode(struct nubus_board *board, nubus_get_rsrc_mem(&size, &ent, 4); pr_debug(" block (0x%x), size %d\n", ent.type, size); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, size); break; } default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent.type, ent.data); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 0); } } return 0; } static int __init nubus_get_display_resource(struct nubus_dev *dev, + struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { switch (ent->type) { case NUBUS_RESID_GAMMADIR: pr_debug(" gamma directory offset: 0x%06x\n", ent->data); - nubus_get_block_rsrc_dir(dev->board, ent); + nubus_get_block_rsrc_dir(dev->board, procdir, ent); break; case 0x0080 ... 0x0085: pr_debug(" mode 0x%02x info offset: 0x%06x\n", ent->type, ent->data); - nubus_get_display_vidmode(dev->board, ent); + nubus_get_display_vidmode(dev->board, procdir, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent->type, ent->data); + nubus_proc_add_rsrc_mem(procdir, ent, 0); } return 0; } static int __init nubus_get_network_resource(struct nubus_dev *dev, + struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { switch (ent->type) { @@ -406,16 +436,19 @@ static int __init nubus_get_network_resource(struct nubus_dev *dev, nubus_get_rsrc_mem(addr, ent, 6); pr_debug(" MAC address: %pM\n", addr); + nubus_proc_add_rsrc_mem(procdir, ent, 6); break; } default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent->type, ent->data); + nubus_proc_add_rsrc_mem(procdir, ent, 0); } return 0; } static int __init nubus_get_cpu_resource(struct nubus_dev *dev, + struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { switch (ent->type) { @@ -426,6 +459,7 @@ static int __init nubus_get_cpu_resource(struct nubus_dev *dev, nubus_get_rsrc_mem(&meminfo, ent, 8); pr_debug(" memory: [ 0x%08lx 0x%08lx ]\n", meminfo[0], meminfo[1]); + nubus_proc_add_rsrc_mem(procdir, ent, 8); break; } case NUBUS_RESID_ROMINFO: @@ -435,31 +469,35 @@ static int __init nubus_get_cpu_resource(struct nubus_dev *dev, nubus_get_rsrc_mem(&rominfo, ent, 8); pr_debug(" ROM: [ 0x%08lx 0x%08lx ]\n", rominfo[0], rominfo[1]); + nubus_proc_add_rsrc_mem(procdir, ent, 8); break; } default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent->type, ent->data); + nubus_proc_add_rsrc_mem(procdir, ent, 0); } return 0; } static int __init nubus_get_private_resource(struct nubus_dev *dev, + struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { switch (dev->category) { case NUBUS_CAT_DISPLAY: - nubus_get_display_resource(dev, ent); + nubus_get_display_resource(dev, procdir, ent); break; case NUBUS_CAT_NETWORK: - nubus_get_network_resource(dev, ent); + nubus_get_network_resource(dev, procdir, ent); break; case NUBUS_CAT_CPU: - nubus_get_cpu_resource(dev, ent); + nubus_get_cpu_resource(dev, procdir, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent->type, ent->data); + nubus_proc_add_rsrc_mem(procdir, ent, 0); } return 0; } @@ -474,6 +512,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, pr_debug(" Functional resource 0x%02x:\n", parent->type); nubus_get_subdir(parent, &dir); + dir.procdir = nubus_proc_add_rsrc_dir(board->procdir, parent, board); /* Actually we should probably panic if this fails */ if ((dev = kzalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) @@ -495,14 +534,17 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, dev->dr_hw = nbtdata[3]; pr_debug(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 8); break; } case NUBUS_RESID_NAME: { char name[64]; + unsigned int len; - nubus_get_rsrc_str(name, &ent, sizeof(name)); + len = nubus_get_rsrc_str(name, &ent, sizeof(name)); pr_debug(" name: %s\n", name); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, len + 1); break; } case NUBUS_RESID_DRVRDIR: @@ -511,7 +553,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, use this :-) */ pr_debug(" driver directory offset: 0x%06x\n", ent.data); - nubus_get_block_rsrc_dir(board, &ent); + nubus_get_block_rsrc_dir(board, dir.procdir, &ent); break; } case NUBUS_RESID_MINOR_BASEOS: @@ -523,6 +565,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, nubus_get_rsrc_mem(&base_offset, &ent, 4); pr_debug(" memory offset: 0x%08x\n", base_offset); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 4); break; } case NUBUS_RESID_MINOR_LENGTH: @@ -532,18 +575,21 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, nubus_get_rsrc_mem(&length, &ent, 4); pr_debug(" memory length: 0x%08x\n", length); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 4); break; } case NUBUS_RESID_FLAGS: pr_debug(" flags: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; case NUBUS_RESID_HWDEVID: pr_debug(" hwdevid: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; default: /* Local/Private resources have their own function */ - nubus_get_private_resource(dev, &ent); + nubus_get_private_resource(dev, dir.procdir, &ent); } } @@ -552,6 +598,7 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, /* This is *really* cool. */ static int __init nubus_get_icon(struct nubus_board *board, + struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { /* Should be 32x32 if my memory serves me correctly */ @@ -564,11 +611,13 @@ static int __init nubus_get_icon(struct nubus_board *board, pr_debug(" %08x %08x %08x %08x\n", icon[i * 4 + 0], icon[i * 4 + 1], icon[i * 4 + 2], icon[i * 4 + 3]); + nubus_proc_add_rsrc_mem(procdir, ent, 128); return 0; } static int __init nubus_get_vendorinfo(struct nubus_board *board, + struct proc_dir_entry *procdir, const struct nubus_dirent *parent) { struct nubus_dir dir; @@ -578,15 +627,18 @@ static int __init nubus_get_vendorinfo(struct nubus_board *board, pr_debug(" vendor info:\n"); nubus_get_subdir(parent, &dir); + dir.procdir = nubus_proc_add_rsrc_dir(procdir, parent, board); while (nubus_readdir(&dir, &ent) != -1) { char name[64]; + unsigned int len; /* These are all strings, we think */ - nubus_get_rsrc_str(name, &ent, sizeof(name)); + len = nubus_get_rsrc_str(name, &ent, sizeof(name)); if (ent.type < 1 || ent.type > 5) ent.type = 5; pr_debug(" %s: %s\n", vendor_fields[ent.type - 1], name); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, len + 1); } return 0; } @@ -599,6 +651,7 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, pr_debug(" Board resource 0x%02x:\n", parent->type); nubus_get_subdir(parent, &dir); + dir.procdir = nubus_proc_add_rsrc_dir(board->procdir, parent, board); while (nubus_readdir(&dir, &ent) != -1) { switch (ent.type) { @@ -615,49 +668,62 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, nbtdata[2] != 0 || nbtdata[3] != 0) pr_err("Slot %X: sResource is not a board resource!\n", slot); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 8); break; } case NUBUS_RESID_NAME: - nubus_get_rsrc_str(board->name, &ent, - sizeof(board->name)); + { + unsigned int len; + + len = nubus_get_rsrc_str(board->name, &ent, + sizeof(board->name)); pr_debug(" name: %s\n", board->name); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, len + 1); break; + } case NUBUS_RESID_ICON: - nubus_get_icon(board, &ent); + nubus_get_icon(board, dir.procdir, &ent); break; case NUBUS_RESID_BOARDID: pr_debug(" board id: 0x%x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; case NUBUS_RESID_PRIMARYINIT: pr_debug(" primary init offset: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; case NUBUS_RESID_VENDORINFO: - nubus_get_vendorinfo(board, &ent); + nubus_get_vendorinfo(board, dir.procdir, &ent); break; case NUBUS_RESID_FLAGS: pr_debug(" flags: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; case NUBUS_RESID_HWDEVID: pr_debug(" hwdevid: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; case NUBUS_RESID_SECONDINIT: pr_debug(" secondary init offset: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; /* WTF isn't this in the functional resources? */ case NUBUS_RESID_VIDNAMES: pr_debug(" vidnames directory offset: 0x%06x\n", ent.data); - nubus_get_block_rsrc_dir(board, &ent); + nubus_get_block_rsrc_dir(board, dir.procdir, &ent); break; /* Same goes for this */ case NUBUS_RESID_VIDMODES: pr_debug(" video mode parameter directory offset: 0x%06x\n", ent.data); + nubus_proc_add_rsrc(dir.procdir, &ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", ent.type, ent.data); + nubus_proc_add_rsrc_mem(dir.procdir, &ent, 0); } } return 0; @@ -748,6 +814,8 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) if (ent.type < 1 || ent.type > 127) pr_warn("Slot %X: Board resource ID is invalid!\n", slot); + board->procdir = nubus_proc_add_board(board); + nubus_get_board_resource(board, slot, &ent); while (nubus_readdir(&dir, &ent) != -1) { @@ -835,8 +903,8 @@ static int __init nubus_init(void) if (!MACH_IS_MAC) return 0; - nubus_scan_bus(); nubus_proc_init(); + nubus_scan_bus(); return 0; } diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 41ec859bdd8b..f47d90924ab4 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -11,24 +11,28 @@ structure in /proc analogous to the structure of the NuBus ROM resources. - Therefore each NuBus device is in fact a directory, which may in - turn contain subdirectories. The "files" correspond to NuBus - resource records. For those types of records which we know how to - convert to formats that are meaningful to userspace (mostly just - icons) these files will provide "cooked" data. Otherwise they will - simply provide raw access (read-only of course) to the ROM. */ + Therefore each board function gets a directory, which may in turn + contain subdirectories. Each slot resource is a file. Unrecognized + resources are empty files, since every resource ID requires a special + case (e.g. if the resource ID implies a directory or block, then its + value has to be interpreted as a slot ROM pointer etc.). + */ #include #include #include #include #include +#include #include #include - #include #include +/* + * /proc/bus/nubus/devices stuff + */ + static int nubus_devices_proc_show(struct seq_file *m, void *v) { @@ -61,96 +65,141 @@ static const struct file_operations nubus_devices_proc_fops = { static struct proc_dir_entry *proc_bus_nubus_dir; -static const struct file_operations nubus_proc_subdir_fops = { -#warning Need to set some I/O handlers here +/* + * /proc/bus/nubus/x/ stuff + */ + +struct proc_dir_entry *nubus_proc_add_board(struct nubus_board *board) +{ + char name[2]; + + if (!proc_bus_nubus_dir) + return NULL; + snprintf(name, sizeof(name), "%x", board->slot); + return proc_mkdir(name, proc_bus_nubus_dir); +} + +/* The PDE private data for any directory under /proc/bus/nubus/x/ + * is the bytelanes value for the board in slot x. + */ + +struct proc_dir_entry *nubus_proc_add_rsrc_dir(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + struct nubus_board *board) +{ + char name[9]; + int lanes = board->lanes; + + if (!procdir) + return NULL; + snprintf(name, sizeof(name), "%x", ent->type); + return proc_mkdir_data(name, 0555, procdir, (void *)lanes); +} + +/* The PDE private data for a file under /proc/bus/nubus/x/ is a pointer to + * an instance of the following structure, which gives the location and size + * of the resource data in the slot ROM. For slot resources which hold only a + * small integer, this integer value is stored directly and size is set to 0. + * A NULL private data pointer indicates an unrecognized resource. + */ + +struct nubus_proc_pde_data { + unsigned char *res_ptr; + unsigned int res_size; }; -static void nubus_proc_subdir(struct nubus_dev* dev, - struct proc_dir_entry* parent, - struct nubus_dir* dir) +static struct nubus_proc_pde_data * +nubus_proc_alloc_pde_data(unsigned char *ptr, unsigned int size) { - struct nubus_dirent ent; - - /* Some of these are directories, others aren't */ - while (nubus_readdir(dir, &ent) != -1) { - char name[9]; - struct proc_dir_entry* e; - - snprintf(name, sizeof(name), "%x", ent.type); - e = proc_create(name, S_IFREG | S_IRUGO | S_IWUSR, parent, - &nubus_proc_subdir_fops); - if (!e) - return; - } + struct nubus_proc_pde_data *pde_data; + + pde_data = kmalloc(sizeof(*pde_data), GFP_KERNEL); + if (!pde_data) + return NULL; + + pde_data->res_ptr = ptr; + pde_data->res_size = size; + return pde_data; } -/* Can't do this recursively since the root directory is structured - somewhat differently from the subdirectories */ -static void nubus_proc_populate(struct nubus_dev* dev, - struct proc_dir_entry* parent, - struct nubus_dir* root) +static int nubus_proc_rsrc_show(struct seq_file *m, void *v) { - struct nubus_dirent ent; - - /* We know these are all directories (board resource + one or - more functional resources) */ - while (nubus_readdir(root, &ent) != -1) { - char name[9]; - struct proc_dir_entry* e; - struct nubus_dir dir; - - snprintf(name, sizeof(name), "%x", ent.type); - e = proc_mkdir(name, parent); - if (!e) return; - - /* And descend */ - if (nubus_get_subdir(&ent, &dir) == -1) { - /* This shouldn't happen */ - printk(KERN_ERR "NuBus root directory node %x:%x has no subdir!\n", - dev->board->slot, ent.type); - continue; - } else { - nubus_proc_subdir(dev, e, &dir); - } + struct inode *inode = m->private; + struct nubus_proc_pde_data *pde_data; + + pde_data = PDE_DATA(inode); + if (!pde_data) + return 0; + + if (pde_data->res_size > m->size) + return -EFBIG; + + if (pde_data->res_size) { + int lanes = (int)proc_get_parent_data(inode); + struct nubus_dirent ent; + + if (!lanes) + return 0; + + ent.mask = lanes; + ent.base = pde_data->res_ptr; + ent.data = 0; + nubus_seq_write_rsrc_mem(m, &ent, pde_data->res_size); + } else { + unsigned int data = (unsigned int)pde_data->res_ptr; + + seq_putc(m, data >> 16); + seq_putc(m, data >> 8); + seq_putc(m, data >> 0); } + return 0; } -int nubus_proc_attach_device(struct nubus_dev *dev) +static int nubus_proc_rsrc_open(struct inode *inode, struct file *file) +{ + return single_open(file, nubus_proc_rsrc_show, inode); +} + +static const struct file_operations nubus_proc_rsrc_fops = { + .open = nubus_proc_rsrc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + unsigned int size) { - struct proc_dir_entry *e; - struct nubus_dir root; char name[9]; + struct nubus_proc_pde_data *pde_data; - if (dev == NULL) { - printk(KERN_ERR - "NULL pointer in nubus_proc_attach_device, shoot the programmer!\n"); - return -1; - } - - if (dev->board == NULL) { - printk(KERN_ERR - "NULL pointer in nubus_proc_attach_device, shoot the programmer!\n"); - printk("dev = %p, dev->board = %p\n", dev, dev->board); - return -1; - } - - if (dev->board->procdir) - return 0; + if (!procdir) + return; - /* Create a directory */ - snprintf(name, sizeof(name), "%x", dev->board->slot); - e = proc_mkdir(name, proc_bus_nubus_dir); - dev->board->procdir = e; - if (!e) - return -ENOMEM; + snprintf(name, sizeof(name), "%x", ent->type); + if (size) + pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size); + else + pde_data = NULL; + proc_create_data(name, S_IFREG | 0444, procdir, + &nubus_proc_rsrc_fops, pde_data); +} - /* Now recursively populate it with files */ - nubus_get_root_dir(dev->board, &root); - nubus_proc_populate(dev, e, &root); +void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent) +{ + char name[9]; + unsigned char *data = (unsigned char *)ent->data; - return 0; + if (!procdir) + return; + + snprintf(name, sizeof(name), "%x", ent->type); + proc_create_data(name, S_IFREG | 0444, procdir, + &nubus_proc_rsrc_fops, + nubus_proc_alloc_pde_data(data, 0)); } -EXPORT_SYMBOL(nubus_proc_attach_device); /* * /proc/nubus stuff @@ -219,18 +268,11 @@ static const struct file_operations nubus_proc_fops = { .release = seq_release, }; -void __init proc_bus_nubus_add_devices(void) -{ - struct nubus_dev *dev; - - for(dev = nubus_devices; dev; dev = dev->next) - nubus_proc_attach_device(dev); -} - void __init nubus_proc_init(void) { proc_create("nubus", 0, NULL, &nubus_proc_fops); proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL); + if (!proc_bus_nubus_dir) + return; proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops); - proc_bus_nubus_add_devices(); } diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 2d6f04055ebe..0a9e08e76606 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -13,11 +13,15 @@ #include #include +struct proc_dir_entry; +struct seq_file; + struct nubus_dir { unsigned char *base; unsigned char *ptr; int done; int mask; + struct proc_dir_entry *procdir; }; struct nubus_dirent { @@ -84,12 +88,33 @@ extern struct nubus_board *nubus_boards; /* Generic NuBus interface functions, modelled after the PCI interface */ #ifdef CONFIG_PROC_FS -extern void nubus_proc_init(void); +void nubus_proc_init(void); +struct proc_dir_entry *nubus_proc_add_board(struct nubus_board *board); +struct proc_dir_entry *nubus_proc_add_rsrc_dir(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + struct nubus_board *board); +void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + unsigned int size); +void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent); #else static inline void nubus_proc_init(void) {} +static inline +struct proc_dir_entry *nubus_proc_add_board(struct nubus_board *board) +{ return NULL; } +static inline +struct proc_dir_entry *nubus_proc_add_rsrc_dir(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + struct nubus_board *board) +{ return NULL; } +static inline void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent, + unsigned int size) {} +static inline void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, + const struct nubus_dirent *ent) {} #endif -int nubus_proc_attach_device(struct nubus_dev *dev); /* If we need more precision we can add some more of these */ struct nubus_dev *nubus_find_type(unsigned short category, unsigned short type, @@ -125,8 +150,12 @@ int nubus_get_subdir(const struct nubus_dirent *ent, struct nubus_dir *dir); void nubus_get_rsrc_mem(void *dest, const struct nubus_dirent *dirent, unsigned int len); -void nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, - unsigned int maxlen); +unsigned int nubus_get_rsrc_str(char *dest, const struct nubus_dirent *dirent, + unsigned int len); +void nubus_seq_write_rsrc_mem(struct seq_file *m, + const struct nubus_dirent *dirent, + unsigned int len); +unsigned char *nubus_dirptr(const struct nubus_dirent *nd); /* Returns a pointer to the "standard" slot space. */ static inline void *nubus_slot_addr(int slot) -- cgit v1.2.3 From 189e19e8cbb49f5bf483e55bdbd1e56d3d6bcf75 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Rename struct nubus_dev It is misleading to call a functional resource a "device". In adopting the Linux Driver Model, the struct device will be embedded in struct nubus_board. That will compound the terminlogy problem because drivers will bind with boards, not with functional resources. Avoid this by renaming struct nubus_dev as struct nubus_rsrc. "Functional resource" is the vendor's terminology so this helps avoid confusion. Cc: "David S. Miller" Cc: Bartlomiej Zolnierkiewicz Acked-by: Bartlomiej Zolnierkiewicz Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/net/ethernet/8390/mac8390.c | 26 ++++---- drivers/net/ethernet/natsemi/macsonic.c | 22 +++---- drivers/nubus/nubus.c | 105 ++++++++++++++++---------------- drivers/nubus/proc.c | 15 ++--- drivers/video/fbdev/macfb.c | 2 +- include/linux/nubus.h | 30 +++++---- 6 files changed, 98 insertions(+), 102 deletions(-) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 9497f18eaba0..929ff6419621 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -123,7 +123,8 @@ enum mac8390_access { }; extern int mac8390_memtest(struct net_device *dev); -static int mac8390_initdev(struct net_device *dev, struct nubus_dev *ndev, +static int mac8390_initdev(struct net_device *dev, + struct nubus_rsrc *ndev, enum mac8390_type type); static int mac8390_open(struct net_device *dev); @@ -169,11 +170,11 @@ static void word_memcpy_tocard(unsigned long tp, const void *fp, int count); static void word_memcpy_fromcard(void *tp, unsigned long fp, int count); static u32 mac8390_msg_enable; -static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) +static enum mac8390_type __init mac8390_ident(struct nubus_rsrc *fres) { - switch (dev->dr_sw) { + switch (fres->dr_sw) { case NUBUS_DRSW_3COM: - switch (dev->dr_hw) { + switch (fres->dr_hw) { case NUBUS_DRHW_APPLE_SONIC_NB: case NUBUS_DRHW_APPLE_SONIC_LC: case NUBUS_DRHW_SONNET: @@ -184,7 +185,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) break; case NUBUS_DRSW_APPLE: - switch (dev->dr_hw) { + switch (fres->dr_hw) { case NUBUS_DRHW_ASANTE_LC: return MAC8390_NONE; case NUBUS_DRHW_CABLETRON: @@ -201,7 +202,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) case NUBUS_DRSW_TECHWORKS: case NUBUS_DRSW_DAYNA2: case NUBUS_DRSW_DAYNA_LC: - if (dev->dr_hw == NUBUS_DRHW_CABLETRON) + if (fres->dr_hw == NUBUS_DRHW_CABLETRON) return MAC8390_CABLETRON; else return MAC8390_APPLE; @@ -212,7 +213,7 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) break; case NUBUS_DRSW_KINETICS: - switch (dev->dr_hw) { + switch (fres->dr_hw) { case NUBUS_DRHW_INTERLAN: return MAC8390_INTERLAN; default: @@ -225,8 +226,8 @@ static enum mac8390_type __init mac8390_ident(struct nubus_dev *dev) * These correspond to Dayna Sonic cards * which use the macsonic driver */ - if (dev->dr_hw == NUBUS_DRHW_SMC9194 || - dev->dr_hw == NUBUS_DRHW_INTERLAN) + if (fres->dr_hw == NUBUS_DRHW_SMC9194 || + fres->dr_hw == NUBUS_DRHW_INTERLAN) return MAC8390_NONE; else return MAC8390_DAYNA; @@ -289,7 +290,8 @@ static int __init mac8390_memsize(unsigned long membase) return i * 0x1000; } -static bool __init mac8390_init(struct net_device *dev, struct nubus_dev *ndev, +static bool __init mac8390_init(struct net_device *dev, + struct nubus_rsrc *ndev, enum mac8390_type cardtype) { struct nubus_dir dir; @@ -394,7 +396,7 @@ static bool __init mac8390_init(struct net_device *dev, struct nubus_dev *ndev, struct net_device * __init mac8390_probe(int unit) { struct net_device *dev; - struct nubus_dev *ndev = NULL; + struct nubus_rsrc *ndev = NULL; int err = -ENODEV; struct ei_device *ei_local; @@ -489,7 +491,7 @@ static const struct net_device_ops mac8390_netdev_ops = { }; static int __init mac8390_initdev(struct net_device *dev, - struct nubus_dev *ndev, + struct nubus_rsrc *ndev, enum mac8390_type type) { static u32 fwrd4_offsets[16] = { diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index a42433fb6949..14f3fb50dc21 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -428,26 +428,26 @@ static int mac_nubus_sonic_ethernet_addr(struct net_device *dev, return 0; } -static int macsonic_ident(struct nubus_dev *ndev) +static int macsonic_ident(struct nubus_rsrc *fres) { - if (ndev->dr_hw == NUBUS_DRHW_ASANTE_LC && - ndev->dr_sw == NUBUS_DRSW_SONIC_LC) + if (fres->dr_hw == NUBUS_DRHW_ASANTE_LC && + fres->dr_sw == NUBUS_DRSW_SONIC_LC) return MACSONIC_DAYNALINK; - if (ndev->dr_hw == NUBUS_DRHW_SONIC && - ndev->dr_sw == NUBUS_DRSW_APPLE) { + if (fres->dr_hw == NUBUS_DRHW_SONIC && + fres->dr_sw == NUBUS_DRSW_APPLE) { /* There has to be a better way to do this... */ - if (strstr(ndev->board->name, "DuoDock")) + if (strstr(fres->board->name, "DuoDock")) return MACSONIC_DUODOCK; else return MACSONIC_APPLE; } - if (ndev->dr_hw == NUBUS_DRHW_SMC9194 && - ndev->dr_sw == NUBUS_DRSW_DAYNA) + if (fres->dr_hw == NUBUS_DRHW_SMC9194 && + fres->dr_sw == NUBUS_DRSW_DAYNA) return MACSONIC_DAYNA; - if (ndev->dr_hw == NUBUS_DRHW_APPLE_SONIC_LC && - ndev->dr_sw == 0) { /* huh? */ + if (fres->dr_hw == NUBUS_DRHW_APPLE_SONIC_LC && + fres->dr_sw == 0) { /* huh? */ return MACSONIC_APPLE16; } return -1; @@ -456,7 +456,7 @@ static int macsonic_ident(struct nubus_dev *ndev) static int mac_nubus_sonic_probe(struct net_device *dev) { static int slots; - struct nubus_dev* ndev = NULL; + struct nubus_rsrc *ndev = NULL; struct sonic_local* lp = netdev_priv(dev); unsigned long base_addr, prom_addr; u16 sonic_dcr; diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index f05541914c21..3657b13c0022 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -32,7 +32,7 @@ /* Globals */ -struct nubus_dev *nubus_devices; +struct nubus_rsrc *nubus_func_rsrcs; struct nubus_board *nubus_boards; /* Meaning of "bytelanes": @@ -228,12 +228,11 @@ int nubus_get_root_dir(const struct nubus_board *board, EXPORT_SYMBOL(nubus_get_root_dir); /* This is a slyly renamed version of the above */ -int nubus_get_func_dir(const struct nubus_dev *dev, - struct nubus_dir *dir) +int nubus_get_func_dir(const struct nubus_rsrc *fres, struct nubus_dir *dir) { - dir->ptr = dir->base = dev->directory; + dir->ptr = dir->base = fres->directory; dir->done = 0; - dir->mask = dev->board->lanes; + dir->mask = fres->board->lanes; return 0; } EXPORT_SYMBOL(nubus_get_func_dir); @@ -306,11 +305,10 @@ EXPORT_SYMBOL(nubus_rewinddir); /* Driver interface functions, more or less like in pci.c */ -struct nubus_dev* -nubus_find_type(unsigned short category, unsigned short type, - const struct nubus_dev *from) +struct nubus_rsrc *nubus_find_type(unsigned short category, unsigned short type, + const struct nubus_rsrc *from) { - struct nubus_dev *itor = from ? from->next : nubus_devices; + struct nubus_rsrc *itor = from ? from->next : nubus_func_rsrcs; while (itor) { if (itor->category == category && itor->type == type) @@ -321,10 +319,10 @@ nubus_find_type(unsigned short category, unsigned short type, } EXPORT_SYMBOL(nubus_find_type); -struct nubus_dev* -nubus_find_slot(unsigned int slot, const struct nubus_dev *from) +struct nubus_rsrc *nubus_find_slot(unsigned int slot, + const struct nubus_rsrc *from) { - struct nubus_dev *itor = from ? from->next : nubus_devices; + struct nubus_rsrc *itor = from ? from->next : nubus_func_rsrcs; while (itor) { if (itor->board->slot == slot) @@ -403,19 +401,19 @@ static int __init nubus_get_display_vidmode(struct nubus_board *board, return 0; } -static int __init nubus_get_display_resource(struct nubus_dev *dev, +static int __init nubus_get_display_resource(struct nubus_rsrc *fres, struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { switch (ent->type) { case NUBUS_RESID_GAMMADIR: pr_debug(" gamma directory offset: 0x%06x\n", ent->data); - nubus_get_block_rsrc_dir(dev->board, procdir, ent); + nubus_get_block_rsrc_dir(fres->board, procdir, ent); break; case 0x0080 ... 0x0085: pr_debug(" mode 0x%02x info offset: 0x%06x\n", ent->type, ent->data); - nubus_get_display_vidmode(dev->board, procdir, ent); + nubus_get_display_vidmode(fres->board, procdir, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", @@ -425,7 +423,7 @@ static int __init nubus_get_display_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_get_network_resource(struct nubus_dev *dev, +static int __init nubus_get_network_resource(struct nubus_rsrc *fres, struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { @@ -447,7 +445,7 @@ static int __init nubus_get_network_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_get_cpu_resource(struct nubus_dev *dev, +static int __init nubus_get_cpu_resource(struct nubus_rsrc *fres, struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { @@ -480,19 +478,19 @@ static int __init nubus_get_cpu_resource(struct nubus_dev *dev, return 0; } -static int __init nubus_get_private_resource(struct nubus_dev *dev, +static int __init nubus_get_private_resource(struct nubus_rsrc *fres, struct proc_dir_entry *procdir, const struct nubus_dirent *ent) { - switch (dev->category) { + switch (fres->category) { case NUBUS_CAT_DISPLAY: - nubus_get_display_resource(dev, procdir, ent); + nubus_get_display_resource(fres, procdir, ent); break; case NUBUS_CAT_NETWORK: - nubus_get_network_resource(dev, procdir, ent); + nubus_get_network_resource(fres, procdir, ent); break; case NUBUS_CAT_CPU: - nubus_get_cpu_resource(dev, procdir, ent); + nubus_get_cpu_resource(fres, procdir, ent); break; default: pr_debug(" unknown resource 0x%02x, data 0x%06x\n", @@ -502,24 +500,25 @@ static int __init nubus_get_private_resource(struct nubus_dev *dev, return 0; } -static struct nubus_dev * __init +static struct nubus_rsrc * __init nubus_get_functional_resource(struct nubus_board *board, int slot, const struct nubus_dirent *parent) { struct nubus_dir dir; struct nubus_dirent ent; - struct nubus_dev *dev; + struct nubus_rsrc *fres; pr_debug(" Functional resource 0x%02x:\n", parent->type); nubus_get_subdir(parent, &dir); dir.procdir = nubus_proc_add_rsrc_dir(board->procdir, parent, board); /* Actually we should probably panic if this fails */ - if ((dev = kzalloc(sizeof(*dev), GFP_ATOMIC)) == NULL) + fres = kzalloc(sizeof(*fres), GFP_ATOMIC); + if (!fres) return NULL; - dev->resid = parent->type; - dev->directory = dir.base; - dev->board = board; + fres->resid = parent->type; + fres->directory = dir.base; + fres->board = board; while (nubus_readdir(&dir, &ent) != -1) { switch (ent.type) { @@ -528,10 +527,10 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, unsigned short nbtdata[4]; nubus_get_rsrc_mem(nbtdata, &ent, 8); - dev->category = nbtdata[0]; - dev->type = nbtdata[1]; - dev->dr_sw = nbtdata[2]; - dev->dr_hw = nbtdata[3]; + fres->category = nbtdata[0]; + fres->type = nbtdata[1]; + fres->dr_sw = nbtdata[2]; + fres->dr_hw = nbtdata[3]; pr_debug(" type: [cat 0x%x type 0x%x sw 0x%x hw 0x%x]\n", nbtdata[0], nbtdata[1], nbtdata[2], nbtdata[3]); nubus_proc_add_rsrc_mem(dir.procdir, &ent, 8); @@ -589,11 +588,11 @@ nubus_get_functional_resource(struct nubus_board *board, int slot, default: /* Local/Private resources have their own function */ - nubus_get_private_resource(dev, dir.procdir, &ent); + nubus_get_private_resource(fres, dir.procdir, &ent); } } - return dev; + return fres; } /* This is *really* cool. */ @@ -729,7 +728,6 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, return 0; } -/* Add a board (might be many devices) to the list */ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) { struct nubus_board *board; @@ -801,10 +799,11 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) pr_debug("Slot %X resources:\n", slot); /* Each slot should have one board resource and any number of - functional resources. So we'll fill in some fields in the - struct nubus_board from the board resource, then walk down - the list of functional resources, spinning out a nubus_dev - for each of them. */ + * functional resources. So we'll fill in some fields in the + * struct nubus_board from the board resource, then walk down + * the list of functional resources, spinning out a nubus_rsrc + * for each of them. + */ if (nubus_readdir(&dir, &ent) == -1) { /* We can't have this! */ pr_err("Slot %X: Board resource not found!\n", slot); @@ -819,32 +818,32 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) nubus_get_board_resource(board, slot, &ent); while (nubus_readdir(&dir, &ent) != -1) { - struct nubus_dev *dev; - struct nubus_dev **devp; + struct nubus_rsrc *fres; + struct nubus_rsrc **fresp; - dev = nubus_get_functional_resource(board, slot, &ent); - if (dev == NULL) + fres = nubus_get_functional_resource(board, slot, &ent); + if (fres == NULL) continue; /* Resources should appear in ascending ID order. This sanity * check prevents duplicate resource IDs. */ - if (dev->resid <= prev_resid) { - kfree(dev); + if (fres->resid <= prev_resid) { + kfree(fres); continue; } - prev_resid = dev->resid; + prev_resid = fres->resid; /* We zeroed this out above */ - if (board->first_dev == NULL) - board->first_dev = dev; + if (board->first_func_rsrc == NULL) + board->first_func_rsrc = fres; - /* Put it on the global NuBus device chain. Keep entries in order. */ - for (devp = &nubus_devices; *devp != NULL; - devp = &((*devp)->next)) + /* Put it on the func. resource list. Keep entries in order. */ + for (fresp = &nubus_func_rsrcs; *fresp != NULL; + fresp = &((*fresp)->next)) /* spin */; - *devp = dev; - dev->next = NULL; + *fresp = fres; + fres->next = NULL; } /* Put it on the global NuBus board chain. Keep entries in order. */ diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index f47d90924ab4..f2b118330be0 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -36,17 +36,14 @@ static int nubus_devices_proc_show(struct seq_file *m, void *v) { - struct nubus_dev *dev = nubus_devices; + struct nubus_rsrc *fres = nubus_func_rsrcs; - while (dev) { + while (fres) { seq_printf(m, "%x\t%04x %04x %04x %04x", - dev->board->slot, - dev->category, - dev->type, - dev->dr_sw, - dev->dr_hw); - seq_printf(m, "\t%08lx\n", dev->board->slot_addr); - dev = dev->next; + fres->board->slot, fres->category, fres->type, + fres->dr_sw, fres->dr_hw); + seq_printf(m, "\t%08lx\n", fres->board->slot_addr); + fres = fres->next; } return 0; } diff --git a/drivers/video/fbdev/macfb.c b/drivers/video/fbdev/macfb.c index cda7587cbc86..e86a2796e3d9 100644 --- a/drivers/video/fbdev/macfb.c +++ b/drivers/video/fbdev/macfb.c @@ -556,7 +556,7 @@ static void __init iounmap_macfb(void) static int __init macfb_init(void) { int video_cmap_len, video_is_nubus = 0; - struct nubus_dev* ndev = NULL; + struct nubus_rsrc *ndev = NULL; char *option = NULL; int err; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 0a9e08e76606..4a481610ad38 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -33,7 +33,7 @@ struct nubus_dirent { struct nubus_board { struct nubus_board *next; - struct nubus_dev *first_dev; + struct nubus_rsrc *first_func_rsrc; /* Only 9-E actually exist, though 0-8 are also theoretically possible, and 0 is a special case which represents the @@ -62,11 +62,11 @@ struct nubus_board { struct proc_dir_entry *procdir; }; -struct nubus_dev { - /* Next link in device list */ - struct nubus_dev *next; +struct nubus_rsrc { + /* Next link in list */ + struct nubus_rsrc *next; - /* The functional resource ID of this device */ + /* The functional resource ID */ unsigned char resid; /* These are mostly here for convenience; we could always read them from the ROMs if we wanted to */ @@ -81,8 +81,8 @@ struct nubus_dev { struct nubus_board *board; }; -/* This is all NuBus devices (used to find devices later on) */ -extern struct nubus_dev *nubus_devices; +/* This is all NuBus functional resources (used to find devices later on) */ +extern struct nubus_rsrc *nubus_func_rsrcs; /* This is all NuBus cards */ extern struct nubus_board *nubus_boards; @@ -115,13 +115,12 @@ static inline void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, const struct nubus_dirent *ent) {} #endif -/* If we need more precision we can add some more of these */ -struct nubus_dev *nubus_find_type(unsigned short category, - unsigned short type, - const struct nubus_dev *from); -/* Might have more than one device in a slot, you know... */ -struct nubus_dev *nubus_find_slot(unsigned int slot, - const struct nubus_dev *from); +struct nubus_rsrc *nubus_find_type(unsigned short category, + unsigned short type, + const struct nubus_rsrc *from); + +struct nubus_rsrc *nubus_find_slot(unsigned int slot, + const struct nubus_rsrc *from); /* These are somewhat more NuBus-specific. They all return 0 for success and -1 for failure, as you'd expect. */ @@ -134,8 +133,7 @@ int nubus_get_root_dir(const struct nubus_board *board, int nubus_get_board_dir(const struct nubus_board *board, struct nubus_dir *dir); /* The functional directory */ -int nubus_get_func_dir(const struct nubus_dev *dev, - struct nubus_dir *dir); +int nubus_get_func_dir(const struct nubus_rsrc *fres, struct nubus_dir *dir); /* These work on any directory gotten via the above */ int nubus_readdir(struct nubus_dir *dir, -- cgit v1.2.3 From 41b848160eabb22957652936b66ccafd95ab5ad8 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Adopt standard linked list implementation This increases code re-use and improves readability. Cc: "David S. Miller" Cc: Bartlomiej Zolnierkiewicz Acked-by: Bartlomiej Zolnierkiewicz Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/net/ethernet/8390/mac8390.c | 7 +++-- drivers/net/ethernet/cirrus/mac89x0.c | 6 +++-- drivers/net/ethernet/natsemi/macsonic.c | 8 +++--- drivers/nubus/nubus.c | 45 ++++++++------------------------- drivers/nubus/proc.c | 11 +++----- drivers/video/fbdev/macfb.c | 8 +++--- include/linux/nubus.h | 15 +++++------ 7 files changed, 40 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 929ff6419621..2f91ce8dc614 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -416,8 +416,11 @@ struct net_device * __init mac8390_probe(int unit) if (unit >= 0) sprintf(dev->name, "eth%d", unit); - while ((ndev = nubus_find_type(NUBUS_CAT_NETWORK, NUBUS_TYPE_ETHERNET, - ndev))) { + for_each_func_rsrc(ndev) { + if (ndev->category != NUBUS_CAT_NETWORK || + ndev->type != NUBUS_TYPE_ETHERNET) + continue; + /* Have we seen it already? */ if (slots & (1 << ndev->board->slot)) continue; diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c index f910f0f386d6..977d4c2c759d 100644 --- a/drivers/net/ethernet/cirrus/mac89x0.c +++ b/drivers/net/ethernet/cirrus/mac89x0.c @@ -187,6 +187,7 @@ struct net_device * __init mac89x0_probe(int unit) unsigned long ioaddr; unsigned short sig; int err = -ENODEV; + struct nubus_rsrc *fres; if (!MACH_IS_MAC) return ERR_PTR(-ENODEV); @@ -207,8 +208,9 @@ struct net_device * __init mac89x0_probe(int unit) /* We might have to parameterize this later */ slot = 0xE; /* Get out now if there's a real NuBus card in slot E */ - if (nubus_find_slot(slot, NULL) != NULL) - goto out; + for_each_func_rsrc(fres) + if (fres->board->slot == slot) + goto out; /* The pseudo-ISA bits always live at offset 0x300 (gee, wonder why...) */ diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 14f3fb50dc21..313fe5e0184b 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -464,9 +464,11 @@ static int mac_nubus_sonic_probe(struct net_device *dev) int reg_offset, dma_bitmode; /* Find the first SONIC that hasn't been initialized already */ - while ((ndev = nubus_find_type(NUBUS_CAT_NETWORK, - NUBUS_TYPE_ETHERNET, ndev)) != NULL) - { + for_each_func_rsrc(ndev) { + if (ndev->category != NUBUS_CAT_NETWORK || + ndev->type != NUBUS_TYPE_ETHERNET) + continue; + /* Have we seen it already? */ if (slots & (1<board->slot)) continue; diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 3657b13c0022..0bb54ccd7a1a 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -32,7 +32,7 @@ /* Globals */ -struct nubus_rsrc *nubus_func_rsrcs; +LIST_HEAD(nubus_func_rsrcs); struct nubus_board *nubus_boards; /* Meaning of "bytelanes": @@ -305,33 +305,20 @@ EXPORT_SYMBOL(nubus_rewinddir); /* Driver interface functions, more or less like in pci.c */ -struct nubus_rsrc *nubus_find_type(unsigned short category, unsigned short type, - const struct nubus_rsrc *from) +struct nubus_rsrc *nubus_first_rsrc_or_null(void) { - struct nubus_rsrc *itor = from ? from->next : nubus_func_rsrcs; - - while (itor) { - if (itor->category == category && itor->type == type) - return itor; - itor = itor->next; - } - return NULL; + return list_first_entry_or_null(&nubus_func_rsrcs, struct nubus_rsrc, + list); } -EXPORT_SYMBOL(nubus_find_type); +EXPORT_SYMBOL(nubus_first_rsrc_or_null); -struct nubus_rsrc *nubus_find_slot(unsigned int slot, - const struct nubus_rsrc *from) +struct nubus_rsrc *nubus_next_rsrc_or_null(struct nubus_rsrc *from) { - struct nubus_rsrc *itor = from ? from->next : nubus_func_rsrcs; - - while (itor) { - if (itor->board->slot == slot) - return itor; - itor = itor->next; - } - return NULL; + if (list_is_last(&from->list, &nubus_func_rsrcs)) + return NULL; + return list_next_entry(from, list); } -EXPORT_SYMBOL(nubus_find_slot); +EXPORT_SYMBOL(nubus_next_rsrc_or_null); int nubus_find_rsrc(struct nubus_dir *dir, unsigned char rsrc_type, @@ -819,7 +806,6 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) while (nubus_readdir(&dir, &ent) != -1) { struct nubus_rsrc *fres; - struct nubus_rsrc **fresp; fres = nubus_get_functional_resource(board, slot, &ent); if (fres == NULL) @@ -834,16 +820,7 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) } prev_resid = fres->resid; - /* We zeroed this out above */ - if (board->first_func_rsrc == NULL) - board->first_func_rsrc = fres; - - /* Put it on the func. resource list. Keep entries in order. */ - for (fresp = &nubus_func_rsrcs; *fresp != NULL; - fresp = &((*fresp)->next)) - /* spin */; - *fresp = fres; - fres->next = NULL; + list_add_tail(&fres->list, &nubus_func_rsrcs); } /* Put it on the global NuBus board chain. Keep entries in order. */ diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index f2b118330be0..60c0f40b4d5e 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -36,15 +36,12 @@ static int nubus_devices_proc_show(struct seq_file *m, void *v) { - struct nubus_rsrc *fres = nubus_func_rsrcs; + struct nubus_rsrc *fres; - while (fres) { - seq_printf(m, "%x\t%04x %04x %04x %04x", + for_each_func_rsrc(fres) + seq_printf(m, "%x\t%04x %04x %04x %04x\t%08lx\n", fres->board->slot, fres->category, fres->type, - fres->dr_sw, fres->dr_hw); - seq_printf(m, "\t%08lx\n", fres->board->slot_addr); - fres = fres->next; - } + fres->dr_sw, fres->dr_hw, fres->board->slot_addr); return 0; } diff --git a/drivers/video/fbdev/macfb.c b/drivers/video/fbdev/macfb.c index e86a2796e3d9..e707e617bf1c 100644 --- a/drivers/video/fbdev/macfb.c +++ b/drivers/video/fbdev/macfb.c @@ -670,15 +670,17 @@ static int __init macfb_init(void) * code is really broken :-) */ - while ((ndev = nubus_find_type(NUBUS_CAT_DISPLAY, - NUBUS_TYPE_VIDEO, ndev))) - { + for_each_func_rsrc(ndev) { unsigned long base = ndev->board->slot_addr; if (mac_bi_data.videoaddr < base || mac_bi_data.videoaddr - base > 0xFFFFFF) continue; + if (ndev->category != NUBUS_CAT_DISPLAY || + ndev->type != NUBUS_TYPE_VIDEO) + continue; + video_is_nubus = 1; slot_addr = (unsigned char *)base; diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 4a481610ad38..2cbc7a199bca 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -33,7 +33,6 @@ struct nubus_dirent { struct nubus_board { struct nubus_board *next; - struct nubus_rsrc *first_func_rsrc; /* Only 9-E actually exist, though 0-8 are also theoretically possible, and 0 is a special case which represents the @@ -63,8 +62,7 @@ struct nubus_board { }; struct nubus_rsrc { - /* Next link in list */ - struct nubus_rsrc *next; + struct list_head list; /* The functional resource ID */ unsigned char resid; @@ -82,7 +80,7 @@ struct nubus_rsrc { }; /* This is all NuBus functional resources (used to find devices later on) */ -extern struct nubus_rsrc *nubus_func_rsrcs; +extern struct list_head nubus_func_rsrcs; /* This is all NuBus cards */ extern struct nubus_board *nubus_boards; @@ -115,12 +113,11 @@ static inline void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, const struct nubus_dirent *ent) {} #endif -struct nubus_rsrc *nubus_find_type(unsigned short category, - unsigned short type, - const struct nubus_rsrc *from); +struct nubus_rsrc *nubus_first_rsrc_or_null(void); +struct nubus_rsrc *nubus_next_rsrc_or_null(struct nubus_rsrc *from); -struct nubus_rsrc *nubus_find_slot(unsigned int slot, - const struct nubus_rsrc *from); +#define for_each_func_rsrc(f) \ + for (f = nubus_first_rsrc_or_null(); f; f = nubus_next_rsrc_or_null(f)) /* These are somewhat more NuBus-specific. They all return 0 for success and -1 for failure, as you'd expect. */ -- cgit v1.2.3 From b87eaec27eca3def6c8ed617e3b1bac08d7bc715 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:13 -0500 Subject: nubus: Add expansion_type values for various Mac models Add an expansion slot attribute to allow drivers to properly handle cards like Comm Slot cards and PDS cards without declaration ROMs. This clarifies the logic for the Centris 610 model which has no Comm Slot but has an optional on-board SONIC device. Cc: "David S. Miller" Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/macintosh.h | 9 ++- arch/m68k/mac/config.c | 110 +++++++++++++------------------- drivers/net/ethernet/natsemi/macsonic.c | 8 +-- 3 files changed, 54 insertions(+), 73 deletions(-) diff --git a/arch/m68k/include/asm/macintosh.h b/arch/m68k/include/asm/macintosh.h index f42c27400dbc..9b840c03ebb7 100644 --- a/arch/m68k/include/asm/macintosh.h +++ b/arch/m68k/include/asm/macintosh.h @@ -33,7 +33,7 @@ struct mac_model char ide_type; char scc_type; char ether_type; - char nubus_type; + char expansion_type; char floppy_type; }; @@ -73,8 +73,11 @@ struct mac_model #define MAC_ETHER_SONIC 1 #define MAC_ETHER_MACE 2 -#define MAC_NO_NUBUS 0 -#define MAC_NUBUS 1 +#define MAC_EXP_NONE 0 +#define MAC_EXP_PDS 1 /* Accepts only a PDS card */ +#define MAC_EXP_NUBUS 2 /* Accepts only NuBus card(s) */ +#define MAC_EXP_PDS_NUBUS 3 /* Accepts PDS card and/or NuBus card(s) */ +#define MAC_EXP_PDS_COMM 4 /* Accepts PDS card or Comm Slot card */ #define MAC_FLOPPY_IWM 0 #define MAC_FLOPPY_SWIM_ADDR1 1 diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 16cd5cea5207..d3d435248a24 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -212,7 +212,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_IWM, }, @@ -227,7 +227,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_IWM, }, { .ident = MAC_MODEL_IIX, @@ -236,7 +236,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_IICX, @@ -245,7 +245,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_SE30, @@ -254,7 +254,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_II, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -272,7 +272,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_IIFX, @@ -281,7 +281,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_IIFX, .scc_type = MAC_SCC_IOP, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_IOP, }, { .ident = MAC_MODEL_IISI, @@ -290,7 +290,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_IIVI, @@ -299,7 +299,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_IIVX, @@ -308,7 +308,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -323,7 +323,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_CCL, @@ -332,7 +331,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_CCLII, @@ -341,7 +340,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -356,7 +355,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_LCII, @@ -365,7 +364,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_LCIII, @@ -374,7 +373,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -395,7 +394,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q605_ACC, @@ -404,7 +403,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q610, @@ -414,7 +413,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q630, @@ -424,8 +423,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .ide_type = MAC_IDE_QUADRA, .scc_type = MAC_SCC_QUADRA, - .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_COMM, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q650, @@ -435,7 +433,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, /* The Q700 does have a NS Sonic */ @@ -447,7 +445,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q800, @@ -457,7 +455,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_Q840, @@ -467,7 +465,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA3, .scc_type = MAC_SCC_PSC, .ether_type = MAC_ETHER_MACE, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_AV, }, { .ident = MAC_MODEL_Q900, @@ -477,7 +475,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_IOP, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_IOP, }, { .ident = MAC_MODEL_Q950, @@ -487,7 +485,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA2, .scc_type = MAC_SCC_IOP, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_IOP, }, @@ -502,7 +500,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_P475, @@ -511,7 +509,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_P475F, @@ -520,7 +518,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_P520, @@ -529,7 +527,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_P550, @@ -538,7 +536,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, /* These have the comm slot, and therefore possibly SONIC ethernet */ @@ -549,8 +547,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_II, - .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_COMM, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_P588, @@ -560,8 +557,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .ide_type = MAC_IDE_QUADRA, .scc_type = MAC_SCC_II, - .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_COMM, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_TV, @@ -570,7 +566,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_P600, @@ -579,7 +574,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_LC, .scc_type = MAC_SCC_II, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -596,7 +591,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_C650, @@ -606,7 +601,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR1, }, { .ident = MAC_MODEL_C660, @@ -616,7 +611,7 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_QUADRA3, .scc_type = MAC_SCC_PSC, .ether_type = MAC_ETHER_MACE, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_PDS_NUBUS, .floppy_type = MAC_FLOPPY_AV, }, @@ -633,7 +628,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB145, @@ -642,7 +636,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB150, @@ -652,7 +645,6 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_OLD, .ide_type = MAC_IDE_PB, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB160, @@ -661,7 +653,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB165, @@ -670,7 +661,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB165C, @@ -679,7 +669,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB170, @@ -688,7 +677,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB180, @@ -697,7 +685,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB180C, @@ -706,7 +693,6 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_QUADRA, .scsi_type = MAC_SCSI_OLD, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB190, @@ -716,7 +702,6 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_LATE, .ide_type = MAC_IDE_BABOON, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB520, @@ -726,7 +711,6 @@ static struct mac_model mac_data_table[] = { .scsi_type = MAC_SCSI_LATE, .scc_type = MAC_SCC_QUADRA, .ether_type = MAC_ETHER_SONIC, - .nubus_type = MAC_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -743,7 +727,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB230, @@ -752,7 +736,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB250, @@ -761,7 +745,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB270C, @@ -770,7 +754,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB280, @@ -779,7 +763,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, { .ident = MAC_MODEL_PB280C, @@ -788,7 +772,7 @@ static struct mac_model mac_data_table[] = { .via_type = MAC_VIA_IICI, .scsi_type = MAC_SCSI_DUO, .scc_type = MAC_SCC_QUADRA, - .nubus_type = MAC_NUBUS, + .expansion_type = MAC_EXP_NUBUS, .floppy_type = MAC_FLOPPY_SWIM_ADDR2, }, @@ -1100,14 +1084,12 @@ int __init mac_platform_init(void) * Ethernet device */ - switch (macintosh_config->ether_type) { - case MAC_ETHER_SONIC: + if (macintosh_config->ether_type == MAC_ETHER_SONIC || + macintosh_config->expansion_type == MAC_EXP_PDS_COMM) platform_device_register_simple("macsonic", -1, NULL, 0); - break; - case MAC_ETHER_MACE: + + if (macintosh_config->ether_type == MAC_ETHER_MACE) platform_device_register_simple("macmace", -1, NULL, 0); - break; - } return 0; } diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 313fe5e0184b..b922ab5cedea 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -311,7 +311,7 @@ static int mac_onboard_sonic_probe(struct net_device *dev) { struct sonic_local* lp = netdev_priv(dev); int sr; - int commslot = 0; + bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM; if (!MACH_IS_MAC) return -ENODEV; @@ -322,10 +322,7 @@ static int mac_onboard_sonic_probe(struct net_device *dev) Ethernet (BTW, the Ethernet *is* always at the same address, and nothing else lives there, at least if Apple's documentation is to be believed) */ - if (macintosh_config->ident == MAC_MODEL_Q630 || - macintosh_config->ident == MAC_MODEL_P588 || - macintosh_config->ident == MAC_MODEL_P575 || - macintosh_config->ident == MAC_MODEL_C610) { + if (commslot || macintosh_config->ident == MAC_MODEL_C610) { int card_present; card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS); @@ -333,7 +330,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev) printk("none.\n"); return -ENODEV; } - commslot = 1; } printk("yes\n"); -- cgit v1.2.3 From 7f86c765a6a2bb837c45f11526176125ff50e21f Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:37:14 -0500 Subject: nubus: Add support for the driver model This patch brings basic support for the Linux Driver Model to the NuBus subsystem. For flexibility, the matching of boards with drivers is left up to the drivers. This is also the approach taken by NetBSD. A board may have many functions, and drivers may have to consider many functional resources and board resources in order to match a device. This implementation does not bind drivers to resources (nor does it bind many drivers to the same board). Apple's NuBus declaration ROM design is flexible enough to allow that, but I don't see a need to support it as we don't use the "slot zero" resources (in the main logic board ROM). Eliminate the global nubus_boards linked list by rewriting the procfs board iterator around bus_for_each_dev(). Hence the nubus device refcount can be used to determine the lifespan of board objects. Cc: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- drivers/nubus/Makefile | 2 +- drivers/nubus/bus.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/nubus/nubus.c | 24 +++++----- drivers/nubus/proc.c | 55 +---------------------- include/linux/nubus.h | 33 ++++++++++++-- 5 files changed, 161 insertions(+), 70 deletions(-) create mode 100644 drivers/nubus/bus.c diff --git a/drivers/nubus/Makefile b/drivers/nubus/Makefile index 21bda2031e7e..6d063cde39d1 100644 --- a/drivers/nubus/Makefile +++ b/drivers/nubus/Makefile @@ -2,6 +2,6 @@ # Makefile for the nubus specific drivers. # -obj-y := nubus.o +obj-y := nubus.o bus.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/nubus/bus.c b/drivers/nubus/bus.c new file mode 100644 index 000000000000..d306c348c857 --- /dev/null +++ b/drivers/nubus/bus.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Bus implementation for the NuBus subsystem. +// +// Copyright (C) 2017 Finn Thain + +#include +#include +#include +#include +#include + +#define to_nubus_board(d) container_of(d, struct nubus_board, dev) +#define to_nubus_driver(d) container_of(d, struct nubus_driver, driver) + +static int nubus_bus_match(struct device *dev, struct device_driver *driver) +{ + return 1; +} + +static int nubus_device_probe(struct device *dev) +{ + struct nubus_driver *ndrv = to_nubus_driver(dev->driver); + int err = -ENODEV; + + if (ndrv->probe) + err = ndrv->probe(to_nubus_board(dev)); + return err; +} + +static int nubus_device_remove(struct device *dev) +{ + struct nubus_driver *ndrv = to_nubus_driver(dev->driver); + int err = -ENODEV; + + if (dev->driver && ndrv->remove) + err = ndrv->remove(to_nubus_board(dev)); + return err; +} + +struct bus_type nubus_bus_type = { + .name = "nubus", + .match = nubus_bus_match, + .probe = nubus_device_probe, + .remove = nubus_device_remove, +}; +EXPORT_SYMBOL(nubus_bus_type); + +int nubus_driver_register(struct nubus_driver *ndrv) +{ + ndrv->driver.bus = &nubus_bus_type; + return driver_register(&ndrv->driver); +} +EXPORT_SYMBOL(nubus_driver_register); + +void nubus_driver_unregister(struct nubus_driver *ndrv) +{ + driver_unregister(&ndrv->driver); +} +EXPORT_SYMBOL(nubus_driver_unregister); + +static struct device nubus_parent = { + .init_name = "nubus", +}; + +int __init nubus_bus_register(void) +{ + int err; + + err = device_register(&nubus_parent); + if (err) + return err; + + err = bus_register(&nubus_bus_type); + if (!err) + return 0; + + device_unregister(&nubus_parent); + return err; +} + +static void nubus_device_release(struct device *dev) +{ + struct nubus_board *board = to_nubus_board(dev); + struct nubus_rsrc *fres, *tmp; + + list_for_each_entry_safe(fres, tmp, &nubus_func_rsrcs, list) + if (fres->board == board) { + list_del(&fres->list); + kfree(fres); + } + kfree(board); +} + +int nubus_device_register(struct nubus_board *board) +{ + board->dev.parent = &nubus_parent; + board->dev.release = nubus_device_release; + board->dev.bus = &nubus_bus_type; + dev_set_name(&board->dev, "slot.%X", board->slot); + return device_register(&board->dev); +} + +static int nubus_print_device_name_fn(struct device *dev, void *data) +{ + struct nubus_board *board = to_nubus_board(dev); + struct seq_file *m = data; + + seq_printf(m, "Slot %X: %s\n", board->slot, board->name); + return 0; +} + +int nubus_proc_show(struct seq_file *m, void *data) +{ + return bus_for_each_dev(&nubus_bus_type, NULL, m, + nubus_print_device_name_fn); +} diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 0bb54ccd7a1a..4621ff98138c 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -33,7 +33,6 @@ /* Globals */ LIST_HEAD(nubus_func_rsrcs); -struct nubus_board *nubus_boards; /* Meaning of "bytelanes": @@ -715,10 +714,9 @@ static int __init nubus_get_board_resource(struct nubus_board *board, int slot, return 0; } -static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) +static void __init nubus_add_board(int slot, int bytelanes) { struct nubus_board *board; - struct nubus_board **boardp; unsigned char *rp; unsigned long dpat; struct nubus_dir dir; @@ -731,7 +729,7 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) /* Actually we should probably panic if this fails */ if ((board = kzalloc(sizeof(*board), GFP_ATOMIC)) == NULL) - return NULL; + return; board->fblock = rp; /* Dump the format block for debugging purposes */ @@ -794,7 +792,8 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) if (nubus_readdir(&dir, &ent) == -1) { /* We can't have this! */ pr_err("Slot %X: Board resource not found!\n", slot); - return NULL; + kfree(board); + return; } if (ent.type < 1 || ent.type > 127) @@ -823,14 +822,8 @@ static struct nubus_board * __init nubus_add_board(int slot, int bytelanes) list_add_tail(&fres->list, &nubus_func_rsrcs); } - /* Put it on the global NuBus board chain. Keep entries in order. */ - for (boardp = &nubus_boards; *boardp != NULL; - boardp = &((*boardp)->next)) - /* spin */; - *boardp = board; - board->next = NULL; - - return board; + if (nubus_device_register(board)) + put_device(&board->dev); } static void __init nubus_probe_slot(int slot) @@ -876,10 +869,15 @@ static void __init nubus_scan_bus(void) static int __init nubus_init(void) { + int err; + if (!MACH_IS_MAC) return 0; nubus_proc_init(); + err = nubus_bus_register(); + if (err) + return err; nubus_scan_bus(); return 0; } diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 60c0f40b4d5e..c2e5a7e6bd3e 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -198,68 +198,17 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, /* * /proc/nubus stuff */ -static int nubus_proc_show(struct seq_file *m, void *v) -{ - const struct nubus_board *board = v; - - /* Display header on line 1 */ - if (v == SEQ_START_TOKEN) - seq_puts(m, "Nubus devices found:\n"); - else - seq_printf(m, "Slot %X: %s\n", board->slot, board->name); - return 0; -} - -static void *nubus_proc_start(struct seq_file *m, loff_t *_pos) -{ - struct nubus_board *board; - unsigned pos; - - if (*_pos > LONG_MAX) - return NULL; - pos = *_pos; - if (pos == 0) - return SEQ_START_TOKEN; - for (board = nubus_boards; board; board = board->next) - if (--pos == 0) - break; - return board; -} - -static void *nubus_proc_next(struct seq_file *p, void *v, loff_t *_pos) -{ - /* Walk the list of NuBus boards */ - struct nubus_board *board = v; - - ++*_pos; - if (v == SEQ_START_TOKEN) - board = nubus_boards; - else if (board) - board = board->next; - return board; -} - -static void nubus_proc_stop(struct seq_file *p, void *v) -{ -} - -static const struct seq_operations nubus_proc_seqops = { - .start = nubus_proc_start, - .next = nubus_proc_next, - .stop = nubus_proc_stop, - .show = nubus_proc_show, -}; static int nubus_proc_open(struct inode *inode, struct file *file) { - return seq_open(file, &nubus_proc_seqops); + return single_open(file, nubus_proc_show, NULL); } static const struct file_operations nubus_proc_fops = { .open = nubus_proc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; void __init nubus_proc_init(void) diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 2cbc7a199bca..6e8200215321 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -10,6 +10,7 @@ #ifndef LINUX_NUBUS_H #define LINUX_NUBUS_H +#include #include #include @@ -32,7 +33,7 @@ struct nubus_dirent { }; struct nubus_board { - struct nubus_board *next; + struct device dev; /* Only 9-E actually exist, though 0-8 are also theoretically possible, and 0 is a special case which represents the @@ -81,8 +82,14 @@ struct nubus_rsrc { /* This is all NuBus functional resources (used to find devices later on) */ extern struct list_head nubus_func_rsrcs; -/* This is all NuBus cards */ -extern struct nubus_board *nubus_boards; + +struct nubus_driver { + struct device_driver driver; + int (*probe)(struct nubus_board *board); + int (*remove)(struct nubus_board *board); +}; + +extern struct bus_type nubus_bus_type; /* Generic NuBus interface functions, modelled after the PCI interface */ #ifdef CONFIG_PROC_FS @@ -119,6 +126,9 @@ struct nubus_rsrc *nubus_next_rsrc_or_null(struct nubus_rsrc *from); #define for_each_func_rsrc(f) \ for (f = nubus_first_rsrc_or_null(); f; f = nubus_next_rsrc_or_null(f)) +#define for_each_board_func_rsrc(b, f) \ + for_each_func_rsrc(f) if (f->board != b) {} else + /* These are somewhat more NuBus-specific. They all return 0 for success and -1 for failure, as you'd expect. */ @@ -152,6 +162,23 @@ void nubus_seq_write_rsrc_mem(struct seq_file *m, unsigned int len); unsigned char *nubus_dirptr(const struct nubus_dirent *nd); +/* Declarations relating to driver model objects */ +int nubus_bus_register(void); +int nubus_device_register(struct nubus_board *board); +int nubus_driver_register(struct nubus_driver *ndrv); +void nubus_driver_unregister(struct nubus_driver *ndrv); +int nubus_proc_show(struct seq_file *m, void *data); + +static inline void nubus_set_drvdata(struct nubus_board *board, void *data) +{ + dev_set_drvdata(&board->dev, data); +} + +static inline void *nubus_get_drvdata(struct nubus_board *board) +{ + return dev_get_drvdata(&board->dev); +} + /* Returns a pointer to the "standard" slot space. */ static inline void *nubus_slot_addr(int slot) { -- cgit v1.2.3 From 317b749e37789ecb3366f304dab905a97e650b41 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 13 Jan 2018 17:44:31 -0500 Subject: m68k/mac: Fix race conditions in OSS interrupt dispatch The interrupt dispatch algorithm used in the OSS driver seems to be subject to race conditions: an IRQ flag could be lost if asserted between the MOV instructions from and to the interrupt flag register. But testing shows that the write to the flag register has no effect, so rewrite the algorithm without the theoretical race condition. There is a second theoretical race condition here. When oss_irq() is called with say, IPL == 2 it will invoke the SCSI interrupt handler. The SCSI IRQ is then cleared by the mac_scsi driver. If SCSI and NuBus IRQs are now asserted together, oss_irq() will be invoked with IPL == 3 and the mac_scsi interrupt handler can be re-entered. This re-entrance issue is not limited to SCSI and could affect NuBus and ADB drivers too. Fix it by splitting up oss_irq() into separate handlers for each IPL. No-one seems to know how OSS irq flags can be cleared, if at all, so add a comment to this effect (actually reinstate one I previously removed). Testing showed that a slot IRQ with no handler can remain asserted (in this case a Radius video card) without causing problems for other IRQs. Tested-by: Stan Johnson Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- arch/m68k/mac/oss.c | 67 +++++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 3f81892527ad..921e6c092f2c 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -53,56 +53,41 @@ void __init oss_init(void) } /* - * Handle miscellaneous OSS interrupts. + * Handle OSS interrupts. + * XXX how do you clear a pending IRQ? is it even necessary? */ -static void oss_irq(struct irq_desc *desc) +static void oss_iopism_irq(struct irq_desc *desc) { - int events = oss->irq_pending & - (OSS_IP_IOPSCC | OSS_IP_SCSI | OSS_IP_IOPISM); - - if (events & OSS_IP_IOPSCC) { - oss->irq_pending &= ~OSS_IP_IOPSCC; - generic_handle_irq(IRQ_MAC_SCC); - } - - if (events & OSS_IP_SCSI) { - oss->irq_pending &= ~OSS_IP_SCSI; - generic_handle_irq(IRQ_MAC_SCSI); - } - - if (events & OSS_IP_IOPISM) { - oss->irq_pending &= ~OSS_IP_IOPISM; - generic_handle_irq(IRQ_MAC_ADB); - } + generic_handle_irq(IRQ_MAC_ADB); } -/* - * Nubus IRQ handler, OSS style - * - * Unlike the VIA/RBV this is on its own autovector interrupt level. - */ +static void oss_scsi_irq(struct irq_desc *desc) +{ + generic_handle_irq(IRQ_MAC_SCSI); +} static void oss_nubus_irq(struct irq_desc *desc) { - int events, irq_bit, i; + u16 events, irq_bit; + int irq_num; events = oss->irq_pending & OSS_IP_NUBUS; - if (!events) - return; - - /* There are only six slots on the OSS, not seven */ - - i = 6; - irq_bit = 0x40; + irq_num = NUBUS_SOURCE_BASE + 5; + irq_bit = OSS_IP_NUBUS5; do { - --i; - irq_bit >>= 1; if (events & irq_bit) { - oss->irq_pending &= ~irq_bit; - generic_handle_irq(NUBUS_SOURCE_BASE + i); + events &= ~irq_bit; + generic_handle_irq(irq_num); } - } while(events & (irq_bit - 1)); + --irq_num; + irq_bit >>= 1; + } while (events); +} + +static void oss_iopscc_irq(struct irq_desc *desc) +{ + generic_handle_irq(IRQ_MAC_SCC); } /* @@ -122,14 +107,14 @@ static void oss_nubus_irq(struct irq_desc *desc) void __init oss_register_interrupts(void) { - irq_set_chained_handler(OSS_IRQLEV_IOPISM, oss_irq); - irq_set_chained_handler(OSS_IRQLEV_SCSI, oss_irq); + irq_set_chained_handler(OSS_IRQLEV_IOPISM, oss_iopism_irq); + irq_set_chained_handler(OSS_IRQLEV_SCSI, oss_scsi_irq); irq_set_chained_handler(OSS_IRQLEV_NUBUS, oss_nubus_irq); - irq_set_chained_handler(OSS_IRQLEV_IOPSCC, oss_irq); + irq_set_chained_handler(OSS_IRQLEV_IOPSCC, oss_iopscc_irq); irq_set_chained_handler(OSS_IRQLEV_VIA1, via1_irq); /* OSS_VIA1 gets enabled here because it has no machspec interrupt. */ - oss->irq_level[OSS_VIA1] = IRQ_AUTO_6; + oss->irq_level[OSS_VIA1] = OSS_IRQLEV_VIA1; } /* -- cgit v1.2.3 From 2334b1ac1235934fc196f2d25bae7f348d3bf42e Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sun, 14 Jan 2018 22:41:13 -0500 Subject: MAINTAINERS: Add NuBus subsystem entry This is legacy code but it might as well have an official maintainer. Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index aa71ab52fd76..e572d6cbddbc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9745,6 +9745,15 @@ S: Supported F: Documentation/filesystems/ntfs.txt F: fs/ntfs/ +NUBUS SUBSYSTEM +M: Finn Thain +L: linux-m68k@lists.linux-m68k.org +S: Maintained +F: arch/*/include/asm/nubus.h +F: drivers/nubus/ +F: include/linux/nubus.h +F: include/uapi/linux/nubus.h + NVIDIA (rivafb and nvidiafb) FRAMEBUFFER DRIVER M: Antonino Daplas L: linux-fbdev@vger.kernel.org -- cgit v1.2.3 From ddc212313f16cd65fcf5e8d9ae223f8374822e4d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 Jan 2018 16:01:36 +0100 Subject: blkcg: simplify statistic accumulation code Some older compilers (gcc-4.4 through 4.6 in particular) struggle with the way that blkg_rwstat_read() returns a structure, leading to excessive stack usage and rather inefficient code: block/blk-cgroup.c: In function 'blkg_destroy': block/blk-cgroup.c:354:1: error: the frame size of 1296 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] block/cfq-iosched.c: In function 'cfqg_stats_add_aux': block/cfq-iosched.c:753:1: error: the frame size of 1928 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] block/bfq-cgroup.c: In function 'bfqg_stats_add_aux': block/bfq-cgroup.c:299:1: error: the frame size of 1928 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] I also notice that there is no point in using atomic accesses for the local variables, so storing the temporaries in simple 'u64' variables not only avoids the stack usage on older compilers but also improves the object code on modern versions. Fixes: e6269c445467 ("blkcg: add blkg_[rw]stat->aux_cnt and replace cfq_group->dead_stats with it") Acked-by: Tejun Heo Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e9825ff57b15..69bea82ebeb1 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -660,12 +660,14 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, struct blkg_rwstat *from) { - struct blkg_rwstat v = blkg_rwstat_read(from); + u64 sum[BLKG_RWSTAT_NR]; int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_add(atomic64_read(&v.aux_cnt[i]) + - atomic64_read(&from->aux_cnt[i]), + sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), &to->aux_cnt[i]); } -- cgit v1.2.3 From 3d1661304f0b2b51a8a43785b764822611dbdd53 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 10 Jan 2018 19:39:52 -0600 Subject: ibmvnic: Fix pending MAC address changes Due to architecture limitations, the IBM VNIC client driver is unable to perform MAC address changes unless the device has "logged in" to its backing device. Currently, pending MAC changes are handled before login, resulting in an error and failure to change the MAC address. Moving that chunk to the end of the ibmvnic_login function, when we are sure that it was successful, fixes that. The MAC address can be changed when the device is up or down, so only check if the device is in a "PROBED" state before setting the MAC address. Fixes: c26eba03e407 ("ibmvnic: Update reset infrastructure to support tunable parameters") Signed-off-by: Thomas Falcon Reviewed-by: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1dc4aef37d3a..4b3df17c7a45 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -756,6 +756,12 @@ static int ibmvnic_login(struct net_device *netdev) } } while (adapter->renegotiate); + /* handle pending MAC address changes after successful login */ + if (adapter->mac_change_pending) { + __ibmvnic_set_mac(netdev, &adapter->desired.mac); + adapter->mac_change_pending = false; + } + return 0; } @@ -993,11 +999,6 @@ static int ibmvnic_open(struct net_device *netdev) mutex_lock(&adapter->reset_lock); - if (adapter->mac_change_pending) { - __ibmvnic_set_mac(netdev, &adapter->desired.mac); - adapter->mac_change_pending = false; - } - if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) { @@ -1527,7 +1528,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; - if (adapter->state != VNIC_OPEN) { + if (adapter->state == VNIC_PROBED) { memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr)); adapter->mac_change_pending = true; return 0; -- cgit v1.2.3 From acfb3b883f6d6a4b5d27ad7fdded11f6a09ae6dd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 16 Jan 2018 10:23:47 +0000 Subject: arm64: KVM: Fix SMCCC handling of unimplemented SMC/HVC calls KVM doesn't follow the SMCCC when it comes to unimplemented calls, and inject an UNDEF instead of returning an error. Since firmware calls are now used for security mitigation, they are becoming more common, and the undef is counter productive. Instead, let's follow the SMCCC which states that -1 must be returned to the caller when getting an unknown function number. Cc: Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/kvm/handle_exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 304203fa9e33..e60494f1eef9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_psci_call(vcpu); if (ret < 0) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } @@ -54,7 +54,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - kvm_inject_undefined(vcpu); + vcpu_set_reg(vcpu, 0, ~0UL); return 1; } -- cgit v1.2.3 From 838cda3697073982acd276ac43387b2a0aed04b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FChristian=3D20K=3DC3=3DB6nig=3F=3D?= Date: Tue, 16 Jan 2018 10:43:17 +0100 Subject: x86/PCI: Enable AMD 64-bit window on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reenable the 64-bit window during resume. Fixes: fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") Reported-by: Tom St Denis Signed-off-by: Christian König Signed-off-by: Bjorn Helgaas --- arch/x86/pci/fixup.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index f6a26e3cb476..54ef19e90705 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -662,11 +662,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); */ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) { + static const char *name = "PCI Bus 0000:00"; + struct resource *res, *conflict; u32 base, limit, high; struct pci_dev *other; - struct resource *res; unsigned i; - int r; if (!(pci_probe & PCI_BIG_ROOT_WINDOW)) return; @@ -707,21 +707,26 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) * Allocate a 256GB window directly below the 0xfd00000000 hardware * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6). */ - res->name = "PCI Bus 0000:00"; + res->name = name; res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_WINDOW; res->start = 0xbd00000000ull; res->end = 0xfd00000000ull - 1; - r = request_resource(&iomem_resource, res); - if (r) { + conflict = request_resource_conflict(&iomem_resource, res); + if (conflict) { kfree(res); - return; - } + if (conflict->name != name) + return; - dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", - res); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + /* We are resuming from suspend; just reenable the window */ + res = conflict; + } else { + dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", + res); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + pci_bus_add_resource(dev->bus, res, 0); + } base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; @@ -733,13 +738,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) pci_write_config_dword(dev, AMD_141b_MMIO_HIGH(i), high); pci_write_config_dword(dev, AMD_141b_MMIO_LIMIT(i), limit); pci_write_config_dword(dev, AMD_141b_MMIO_BASE(i), base); - - pci_bus_add_resource(dev->bus, res, 0); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); #endif -- cgit v1.2.3 From 664eadd6f44b3d71dcc62d0a825319000de0d5c9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 14 Jan 2018 13:34:02 -0800 Subject: bcma: Fix 'allmodconfig' and BCMA builds on MIPS targets Mips builds with BCMA host mode enabled fail in mainline and -next with: In file included from include/linux/bcma/bcma.h:10:0, from drivers/bcma/bcma_private.h:9, from drivers/bcma/main.c:8: include/linux/bcma/bcma_driver_pci.h:218:24: error: field 'pci_controller' has incomplete type Bisect points to commit d41e6858ba58c ("MIPS: Kconfig: Set default MIPS system type as generic") as the culprit. Analysis shows that the commmit changes PCI configuration and enables PCI_DRIVERS_GENERIC. This in turn disables PCI_DRIVERS_LEGACY. 'struct pci_controller' is, however, only defined if PCI_DRIVERS_LEGACY is enabled. Ultimately that means that BCMA_DRIVER_PCI_HOSTMODE depends on PCI_DRIVERS_LEGACY. Add the missing dependency. Fixes: d41e6858ba58c ("MIPS: Kconfig: Set default MIPS system type as ...") Cc: Matt Redfearn Cc: James Hogan Signed-off-by: Guenter Roeck Reviewed-by: James Hogan Signed-off-by: Kalle Valo --- drivers/bcma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig index 02d78f6cecbb..ba8acca036df 100644 --- a/drivers/bcma/Kconfig +++ b/drivers/bcma/Kconfig @@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI config BCMA_DRIVER_PCI_HOSTMODE bool "Driver for PCI core working in hostmode" - depends on MIPS && BCMA_DRIVER_PCI + depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY help PCI core hostmode operation (external PCI bus). -- cgit v1.2.3 From 58eae1416b804d900014d84feadda7195007cc30 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 15 Jan 2018 21:17:14 +0000 Subject: ssb: Disable PCI host for PCI_DRIVERS_GENERIC Since commit d41e6858ba58 ("MIPS: Kconfig: Set default MIPS system type as generic") changed the default MIPS platform to the "generic" platform, which uses PCI_DRIVERS_GENERIC instead of PCI_DRIVERS_LEGACY, various files in drivers/ssb/ have failed to build. This is particularly due to the existence of struct pci_controller being dependent on PCI_DRIVERS_LEGACY since commit c5611df96804 ("MIPS: PCI: Introduce CONFIG_PCI_DRIVERS_LEGACY"), so add that dependency to Kconfig to prevent these files being built for the "generic" platform including all{yes,mod}config builds. Fixes: c5611df96804 ("MIPS: PCI: Introduce CONFIG_PCI_DRIVERS_LEGACY") Signed-off-by: James Hogan Cc: Michael Buesch Cc: Ralf Baechle Cc: Paul Burton Cc: Matt Redfearn Cc: Guenter Roeck Cc: linux-wireless@vger.kernel.org Cc: linux-mips@linux-mips.org Tested-by: Guenter Roeck Signed-off-by: Kalle Valo --- drivers/ssb/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig index d8e4219c2324..71c73766ee22 100644 --- a/drivers/ssb/Kconfig +++ b/drivers/ssb/Kconfig @@ -32,7 +32,7 @@ config SSB_BLOCKIO config SSB_PCIHOST_POSSIBLE bool - depends on SSB && (PCI = y || PCI = SSB) + depends on SSB && (PCI = y || PCI = SSB) && PCI_DRIVERS_LEGACY default y config SSB_PCIHOST -- cgit v1.2.3 From cc124d5cc8d81985c3511892d7a6d546552ff754 Mon Sep 17 00:00:00 2001 From: Wright Feng Date: Tue, 16 Jan 2018 17:26:50 +0800 Subject: brcmfmac: fix CLM load error for legacy chips when user helper is enabled For legacy chips without CLM blob files, kernel with user helper function returns -EAGAIN when we request_firmware(), and then driver got failed when bringing up legacy chips. We expect the CLM blob file for legacy chip is not existence in firmware path, but the -ENOENT error is transferred to -EAGAIN in firmware_class.c with user helper. Because of that, we continue with CLM data currently present in firmware if getting error from doing request_firmware(). Cc: stable@vger.kernel.org # v4.15.y Reviewed-by: Arend van Spriel Signed-off-by: Wright Feng Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index 6a59d0609d30..9be0b051066a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -182,12 +182,9 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp) err = request_firmware(&clm, clm_name, dev); if (err) { - if (err == -ENOENT) { - brcmf_dbg(INFO, "continue with CLM data currently present in firmware\n"); - return 0; - } - brcmf_err("request CLM blob file failed (%d)\n", err); - return err; + brcmf_info("no clm_blob available(err=%d), device may have limited channels available\n", + err); + return 0; } chunk_buf = kzalloc(sizeof(*chunk_buf) + MAX_CHUNK_LEN - 1, GFP_KERNEL); -- cgit v1.2.3 From 625637bf4afa45204bd87e4218645182a919485a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:01:19 +0800 Subject: sctp: reinit stream if stream outcnt has been change by sinit in sendmsg After introducing sctp_stream structure, sctp uses stream->outcnt as the out stream nums instead of c.sinit_num_ostreams. However when users use sinit in cmsg, it only updates c.sinit_num_ostreams in sctp_sendmsg. At that moment, stream->outcnt is still using previous value. If it's value is not updated, the sinit_num_ostreams of sinit could not really work. This patch is to fix it by updating stream->outcnt and reiniting stream if stream outcnt has been change by sinit in sendmsg. Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf") Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9b01e994f661..15ae018b386f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1883,8 +1883,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) */ if (sinit) { if (sinit->sinit_num_ostreams) { - asoc->c.sinit_num_ostreams = - sinit->sinit_num_ostreams; + __u16 outcnt = sinit->sinit_num_ostreams; + + asoc->c.sinit_num_ostreams = outcnt; + /* outcnt has been changed, so re-init stream */ + err = sctp_stream_init(&asoc->stream, outcnt, 0, + GFP_KERNEL); + if (err) + goto out_free; } if (sinit->sinit_max_instreams) { asoc->c.sinit_max_instreams = -- cgit v1.2.3 From a0ff660058b88d12625a783ce9e5c1371c87951f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:01:36 +0800 Subject: sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf After commit cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep"), it may change to lock another sk if the asoc has been peeled off in sctp_wait_for_sndbuf. However, the asoc's new sk could be already closed elsewhere, as it's in the sendmsg context of the old sk that can't avoid the new sk's closing. If the sk's last one refcnt is held by this asoc, later on after putting this asoc, the new sk will be freed, while under it's own lock. This patch is to revert that commit, but fix the old issue by returning error under the old sk's lock. Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 15ae018b386f..feb2ca69827a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -85,7 +85,7 @@ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk); + size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1977,7 +1977,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { /* sk can be changed by peel off when waiting for buf. */ - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) { if (err == -ESRCH) { /* asoc is already dead. */ @@ -8022,12 +8022,12 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len, struct sock **orig_sk) + size_t msg_len) { struct sock *sk = asoc->base.sk; - int err = 0; long current_timeo = *timeo_p; DEFINE_WAIT(wait); + int err = 0; pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); @@ -8056,17 +8056,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); - if (sk != asoc->base.sk) { - release_sock(sk); - sk = asoc->base.sk; - lock_sock(sk); - } + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } out: - *orig_sk = sk; finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ -- cgit v1.2.3 From c5006b8aa74599ce19104b31d322d2ea9ff887cc Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 15 Jan 2018 17:02:00 +0800 Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address The check in sctp_sockaddr_af is not robust enough to forbid binding a v4mapped v6 addr on a v4 socket. The worse thing is that v4 socket's bind_verify would not convert this v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 socket bound a v6 addr. This patch is to fix it by doing the common sa.sa_family check first, then AF_INET check for v4mapped v6 addrs. Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com Acked-by: Neil Horman Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index feb2ca69827a..039fcb618c34 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (len < sizeof (struct sockaddr)) return NULL; + if (!opt->pf->af_supported(addr->sa.sa_family, opt)) + return NULL; + /* V4 mapped address are really of AF_INET family */ if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { - if (!opt->pf->af_supported(AF_INET, opt)) - return NULL; - } else { - /* Does this PF support this AF? */ - if (!opt->pf->af_supported(addr->sa.sa_family, opt)) - return NULL; - } + ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); -- cgit v1.2.3 From a2284d912bfc865cdca4c00488e08a3550f9a405 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Jan 2018 03:46:08 +0100 Subject: bpf, arm64: fix stack_depth tracking in combination with tail calls Using dynamic stack_depth tracking in arm64 JIT is currently broken in combination with tail calls. In prologue, we cache ctx->stack_size and adjust SP reg for setting up function call stack, and tearing it down again in epilogue. Problem is that when doing a tail call, the cached ctx->stack_size might not be the same. One way to fix the problem with minimal overhead is to re-adjust SP in emit_bpf_tail_call() and properly adjust it to the current program's ctx->stack_size. Tested on Cavium ThunderX ARMv8. Fixes: f1c9eed7f437 ("bpf, arm64: take advantage of stack_depth tracking") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ba38d403abb2..bb32f7f6dd0f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -148,7 +148,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) -#define PROLOGUE_OFFSET 8 +/* Tail call offset to jump into */ +#define PROLOGUE_OFFSET 7 static int build_prologue(struct jit_ctx *ctx) { @@ -200,19 +201,19 @@ static int build_prologue(struct jit_ctx *ctx) /* Initialize tail_call_cnt */ emit(A64_MOVZ(1, tcc, 0, 0), ctx); - /* 4 byte extra for skb_copy_bits buffer */ - ctx->stack_size = prog->aux->stack_depth + 4; - ctx->stack_size = STACK_ALIGN(ctx->stack_size); - - /* Set up function call stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); - cur_offset = ctx->idx - idx0; if (cur_offset != PROLOGUE_OFFSET) { pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", cur_offset, PROLOGUE_OFFSET); return -1; } + + /* 4 byte extra for skb_copy_bits buffer */ + ctx->stack_size = prog->aux->stack_depth + 4; + ctx->stack_size = STACK_ALIGN(ctx->stack_size); + + /* Set up function call stack */ + emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); return 0; } @@ -260,11 +261,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx); - /* goto *(prog->bpf_func + prologue_size); */ + /* goto *(prog->bpf_func + prologue_offset); */ off = offsetof(struct bpf_prog, bpf_func); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR64(tmp, prg, tmp), ctx); emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); emit(A64_BR(tmp), ctx); /* out: */ -- cgit v1.2.3 From a5b1379afbfabf91e3a689e82ac619a7157336b3 Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Mon, 15 Jan 2018 13:24:28 -0500 Subject: lan78xx: Fix failure in USB Full Speed Fix initialize the uninitialized tx_qlen to an appropriate value when USB Full Speed is used. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 94c7804903c4..ec56ff29aac4 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2396,6 +2396,7 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; dev->rx_qlen = 4; + dev->tx_qlen = 4; } ret = lan78xx_write_reg(dev, BURST_CAP, buf); -- cgit v1.2.3 From 0d9c9f0f40ca262b67fc06a702b85f3976f5e1a1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 15 Jan 2018 11:47:53 -0800 Subject: nfp: use the correct index for link speed table sts variable is holding link speed as well as state. We should be using ls to index into ls_to_ethtool. Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 2801ecd09eab..6c02b2d6ba06 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -333,7 +333,7 @@ nfp_net_get_link_ksettings(struct net_device *netdev, ls >= ARRAY_SIZE(ls_to_ethtool)) return 0; - cmd->base.speed = ls_to_ethtool[sts]; + cmd->base.speed = ls_to_ethtool[ls]; cmd->base.duplex = DUPLEX_FULL; return 0; -- cgit v1.2.3 From 70eeff66c4696cee4076d6388b6bede5bd7ff71c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 15 Jan 2018 12:24:49 -0800 Subject: qed: Fix potential use-after-free in qed_spq_post() We need to check if p_ent->comp_mode is QED_SPQ_MODE_EBLOCK before calling qed_spq_add_entry(). The test is fine is the mode is EBLOCK, but if it isn't then qed_spq_add_entry() might kfree(p_ent). Signed-off-by: Roland Dreier Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index be48d9abd001..3588081b2e27 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -776,6 +776,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, int rc = 0; struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL; bool b_ret_ent = true; + bool eblock; if (!p_hwfn) return -EINVAL; @@ -794,6 +795,11 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, if (rc) goto spq_post_fail; + /* Check if entry is in block mode before qed_spq_add_entry, + * which might kfree p_ent. + */ + eblock = (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK); + /* Add the request to the pending queue */ rc = qed_spq_add_entry(p_hwfn, p_ent, p_ent->priority); if (rc) @@ -811,7 +817,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, spin_unlock_bh(&p_spq->lock); - if (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK) { + if (eblock) { /* For entries in QED BLOCK mode, the completion code cannot * perform the necessary cleanup - if it did, we couldn't * access p_ent here to see whether it's successful or not. -- cgit v1.2.3 From 81d947e2b8dd2394586c3eaffdd2357797d3bf59 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 15 Jan 2018 23:12:09 +0100 Subject: net, sched: fix panic when updating miniq {b,q}stats While working on fixing another bug, I ran into the following panic on arm64 by simply attaching clsact qdisc, adding a filter and running traffic on ingress to it: [...] [ 178.188591] Unable to handle kernel read from unreadable memory at virtual address 810fb501f000 [ 178.197314] Mem abort info: [ 178.200121] ESR = 0x96000004 [ 178.203168] Exception class = DABT (current EL), IL = 32 bits [ 178.209095] SET = 0, FnV = 0 [ 178.212157] EA = 0, S1PTW = 0 [ 178.215288] Data abort info: [ 178.218175] ISV = 0, ISS = 0x00000004 [ 178.222019] CM = 0, WnR = 0 [ 178.224997] user pgtable: 4k pages, 48-bit VAs, pgd = 0000000023cb3f33 [ 178.231531] [0000810fb501f000] *pgd=0000000000000000 [ 178.236508] Internal error: Oops: 96000004 [#1] SMP [...] [ 178.311855] CPU: 73 PID: 2497 Comm: ping Tainted: G W 4.15.0-rc7+ #5 [ 178.319413] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017 [ 178.326887] pstate: 60400005 (nZCv daif +PAN -UAO) [ 178.331685] pc : __netif_receive_skb_core+0x49c/0xac8 [ 178.336728] lr : __netif_receive_skb+0x28/0x78 [ 178.341161] sp : ffff00002344b750 [ 178.344465] x29: ffff00002344b750 x28: ffff810fbdfd0580 [ 178.349769] x27: 0000000000000000 x26: ffff000009378000 [...] [ 178.418715] x1 : 0000000000000054 x0 : 0000000000000000 [ 178.424020] Process ping (pid: 2497, stack limit = 0x000000009f0a3ff4) [ 178.430537] Call trace: [ 178.432976] __netif_receive_skb_core+0x49c/0xac8 [ 178.437670] __netif_receive_skb+0x28/0x78 [ 178.441757] process_backlog+0x9c/0x160 [ 178.445584] net_rx_action+0x2f8/0x3f0 [...] Reason is that sch_ingress and sch_clsact are doing mini_qdisc_pair_init() which sets up miniq pointers to cpu_{b,q}stats from the underlying qdisc. Problem is that this cannot work since they are actually set up right after the qdisc ->init() callback in qdisc_create(), so first packet going into sch_handle_ingress() tries to call mini_qdisc_bstats_cpu_update() and we therefore panic. In order to fix this, allocation of {b,q}stats needs to happen before we call into ->init(). In net-next, there's already such option through commit d59f5ffa59d8 ("net: sched: a dflt qdisc may be used with per cpu stats"). However, the bug needs to be fixed in net still for 4.15. Thus, include these bits to reduce any merge churn and reuse the static_flags field to set TCQ_F_CPUSTATS, and remove the allocation from qdisc_create() since there is no other user left. Prashant Bhole ran into the same issue but for net-next, thus adding him below as well as co-author. Same issue was also reported by Sandipan Das when using bcc. Fixes: 46209401f8f6 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath") Reference: https://lists.iovisor.org/pipermail/iovisor-dev/2018-January/001190.html Reported-by: Sandipan Das Co-authored-by: Prashant Bhole Co-authored-by: John Fastabend Signed-off-by: Daniel Borkmann Cc: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 ++ net/sched/sch_api.c | 15 +-------------- net/sched/sch_generic.c | 18 +++++++++++++++++- net/sched/sch_ingress.c | 19 ++++--------------- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 83a3e47d5845..becf86aa4ac6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -179,6 +179,7 @@ struct Qdisc_ops { const struct Qdisc_class_ops *cl_ops; char id[IFNAMSIZ]; int priv_size; + unsigned int static_flags; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -444,6 +445,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops); +void qdisc_free(struct Qdisc *qdisc); struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, const struct Qdisc_ops *ops, u32 parentid); void __qdisc_calculate_pkt_len(struct sk_buff *skb, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0f1eab99ff4e..52529b7f8d96 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1063,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev, } if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { - if (qdisc_is_percpu_stats(sch)) { - sch->cpu_bstats = - netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); - if (!sch->cpu_bstats) - goto err_out4; - - sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); - if (!sch->cpu_qstats) - goto err_out4; - } - if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB]); if (IS_ERR(stab)) { @@ -1115,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, ops->destroy(sch); err_out3: dev_put(dev); - kfree((char *) sch - sch->padded); + qdisc_free(sch); err_out2: module_put(ops->owner); err_out: @@ -1123,8 +1112,6 @@ err_out: return NULL; err_out4: - free_percpu(sch->cpu_bstats); - free_percpu(sch->cpu_qstats); /* * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 661c7144b53a..cac003fddf3e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -633,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, qdisc_skb_head_init(&sch->q); spin_lock_init(&sch->q.lock); + if (ops->static_flags & TCQ_F_CPUSTATS) { + sch->cpu_bstats = + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + if (!sch->cpu_bstats) + goto errout1; + + sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); + if (!sch->cpu_qstats) { + free_percpu(sch->cpu_bstats); + goto errout1; + } + } + spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); @@ -642,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, dev->qdisc_running_key ?: &qdisc_running_key); sch->ops = ops; + sch->flags = ops->static_flags; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; sch->dev_queue = dev_queue; @@ -649,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, refcount_set(&sch->refcnt, 1); return sch; +errout1: + kfree(p); errout: return ERR_PTR(err); } @@ -698,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_free(struct Qdisc *qdisc) +void qdisc_free(struct Qdisc *qdisc) { if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index fc1286f499c1..003e1b063447 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -66,7 +66,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); - int err; net_inc_ingress_queue(); @@ -76,13 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt) q->block_info.chain_head_change = clsact_chain_head_change; q->block_info.chain_head_change_priv = &q->miniqp; - err = tcf_block_get_ext(&q->block, sch, &q->block_info); - if (err) - return err; - - sch->flags |= TCQ_F_CPUSTATS; - - return 0; + return tcf_block_get_ext(&q->block, sch, &q->block_info); } static void ingress_destroy(struct Qdisc *sch) @@ -121,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", .priv_size = sizeof(struct ingress_sched_data), + .static_flags = TCQ_F_CPUSTATS, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -192,13 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) q->egress_block_info.chain_head_change = clsact_chain_head_change; q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; - err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); - if (err) - return err; - - sch->flags |= TCQ_F_CPUSTATS; - - return 0; + return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); } static void clsact_destroy(struct Qdisc *sch) @@ -225,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", .priv_size = sizeof(struct clsact_sched_data), + .static_flags = TCQ_F_CPUSTATS, .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, -- cgit v1.2.3 From 07c7b6a52503ac13ae357a8b3ef3456590a64b65 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 16 Jan 2018 09:51:51 +0100 Subject: gpio: mmio: Also read bits that are zero The code for .get_multiple() has bugs: 1. The simple .get_multiple() just reads a register, masks it and sets the return value. This is not correct: we only want to assign values (whether 0 or 1) to the bits that are set in the mask. Fix this by using &= ~mask to clear all bits in the mask and then |= val & mask to set the corresponding bits from the read. 2. The bgpio_get_multiple_be() call has a similar problem: it uses the |= operator to set the bits, so only the bits in the mask are affected, but it misses to clear all returned bits from the mask initially, so some bits will be returned erroneously set to 1. 3. The bgpio_get_set_multiple() again fails to clear the bits from the mask. 4. find_next_bit() wasn't handled correctly, use a totally different approach for one function and change the other function to follow the design pattern of assigning the first bit to -1, then use bit + 1 in the for loop and < num_iterations as break condition. Fixes: 80057cb417b2 ("gpio-mmio: Use the new .get_multiple() callback") Cc: Bartosz Golaszewski Reported-by: Clemens Gruber Tested-by: Clemens Gruber Reported-by: Lukas Wunner Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mmio.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c index f9042bcc27a4..7b14d6280e44 100644 --- a/drivers/gpio/gpio-mmio.c +++ b/drivers/gpio/gpio-mmio.c @@ -152,14 +152,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask, { unsigned long get_mask = 0; unsigned long set_mask = 0; - int bit = 0; - while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) { - if (gc->bgpio_dir & BIT(bit)) - set_mask |= BIT(bit); - else - get_mask |= BIT(bit); - } + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + + /* Exploit the fact that we know which directions are set */ + set_mask = *mask & gc->bgpio_dir; + get_mask = *mask & ~gc->bgpio_dir; if (set_mask) *bits |= gc->read_reg(gc->reg_set) & set_mask; @@ -176,13 +175,13 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) /* * This only works if the bits in the GPIO register are in native endianness. - * It is dirt simple and fast in this case. (Also the most common case.) */ static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { - - *bits = gc->read_reg(gc->reg_dat) & *mask; + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + *bits |= gc->read_reg(gc->reg_dat) & *mask; return 0; } @@ -196,9 +195,12 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, unsigned long val; int bit; + /* Make sure we first clear any bits that are zero when we read the register */ + *bits &= ~*mask; + /* Create a mirrored mask */ - bit = 0; - while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) + bit = -1; + while ((bit = find_next_bit(mask, gc->ngpio, bit + 1)) < gc->ngpio) readmask |= bgpio_line2mask(gc, bit); /* Read the register */ @@ -208,8 +210,8 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask, * Mirror the result into the "bits" result, this will give line 0 * in bit 0 ... line 31 in bit 31 for a 32bit register. */ - bit = 0; - while ((bit = find_next_bit(&val, gc->ngpio, bit)) != gc->ngpio) + bit = -1; + while ((bit = find_next_bit(&val, gc->ngpio, bit + 1)) < gc->ngpio) *bits |= bgpio_line2mask(gc, bit); return 0; -- cgit v1.2.3 From f37a8cb84cce18762e8f86a70bd6a49a66ab964c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 16 Jan 2018 23:30:10 +0100 Subject: bpf: reject stores into ctx via st and xadd Alexei found that verifier does not reject stores into context via BPF_ST instead of BPF_STX. And while looking at it, we also should not allow XADD variant of BPF_STX. The context rewriter is only assuming either BPF_LDX_MEM- or BPF_STX_MEM-type operations, thus reject anything other than that so that assumptions in the rewriter properly hold. Add test cases as well for BPF selftests. Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") Reported-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 19 +++++++++++++++++++ tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b7448347e6b6..eb062b0fbf27 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -978,6 +978,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); } +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = cur_regs(env) + regno; + + return reg->type == PTR_TO_CTX; +} + static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict) @@ -1258,6 +1265,12 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins return -EACCES; } + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_XADD stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check whether atomic_add can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1); @@ -3993,6 +4006,12 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_ST stores into R%d context is not allowed\n", + insn->dst_reg); + return -EACCES; + } + /* check that memory (dst_reg + off) is writeable */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6bafa5456568..67e7c41674d2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2592,6 +2592,29 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "context stores via ST", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_ST stores into R1 context is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "context stores via XADD", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, + BPF_REG_0, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_XADD stores into R1 context is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "direct packet access: test1", .insns = { @@ -4312,7 +4335,8 @@ static struct bpf_test tests[] = { .fixup_map1 = { 2 }, .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, - .result = ACCEPT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 context is not allowed", }, { "leak pointer into ctx 2", @@ -4326,7 +4350,8 @@ static struct bpf_test tests[] = { }, .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, - .result = ACCEPT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 context is not allowed", }, { "leak pointer into ctx 3", -- cgit v1.2.3 From 0d83620fd18e5b8d79d390e482583b379a6a986d Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Wed, 3 Jan 2018 21:58:00 +1300 Subject: alpha: extend memset16 to EV6 optimised routines Commit 92ce4c3ea7c4, "alpha: add support for memset16", renamed the function memsetw() to be memset16() but neglected to do this for the EV6 optimised version, thus when building a kernel optimised for EV6 (or later) link errors result. This extends the memset16 support to EV6. Signed-off-by: Michael Cree Signed-off-by: Matt Turner --- arch/alpha/lib/ev6-memset.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S index 316a99aa9efe..1cfcfbbea6f0 100644 --- a/arch/alpha/lib/ev6-memset.S +++ b/arch/alpha/lib/ev6-memset.S @@ -18,7 +18,7 @@ * The algorithm for the leading and trailing quadwords remains the same, * however the loop has been unrolled to enable better memory throughput, * and the code has been replicated for each of the entry points: __memset - * and __memsetw to permit better scheduling to eliminate the stalling + * and __memset16 to permit better scheduling to eliminate the stalling * encountered during the mask replication. * A future enhancement might be to put in a byte store loop for really * small (say < 32 bytes) memset()s. Whether or not that change would be @@ -34,7 +34,7 @@ .globl memset .globl __memset .globl ___memset - .globl __memsetw + .globl __memset16 .globl __constant_c_memset .ent ___memset @@ -415,9 +415,9 @@ end: * to mask stalls. Note that entry point names also had to change */ .align 5 - .ent __memsetw + .ent __memset16 -__memsetw: +__memset16: .frame $30,0,$26,0 .prologue 0 @@ -596,8 +596,8 @@ end_w: nop ret $31,($26),1 # L0 : - .end __memsetw - EXPORT_SYMBOL(__memsetw) + .end __memset16 + EXPORT_SYMBOL(__memset16) memset = ___memset __memset = ___memset -- cgit v1.2.3 From 4fdec2034b7540dda461c6ba33325dfcff345c64 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 16 Jan 2018 16:42:25 +0100 Subject: x86/cpufeature: Move processor tracing out of scattered features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Processor tracing is already enumerated in word 9 (CPUID[7,0].EBX), so do not duplicate it in the scattered features word. Besides being more tidy, this will be useful for KVM when it presents processor tracing to the guests. KVM selects host features that are supported by both the host kernel (depending on command line options, CPU errata, or whatever) and KVM. Whenever a full feature word exists, KVM's code is written in the expectation that the CPUID bit number matches the X86_FEATURE_* bit number, but this is not the case for X86_FEATURE_INTEL_PT. Signed-off-by: Paolo Bonzini Cc: Borislav Petkov Cc: Linus Torvalds Cc: Luwei Kang Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1516117345-34561-1-git-send-email-pbonzini@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/scattered.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index aa09559b2c0b..25b9375c1484 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,7 +206,6 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ @@ -246,6 +245,7 @@ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 05459ad3db46..d0e69769abfd 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -21,7 +21,6 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, -- cgit v1.2.3 From 2b0bc68cccc70f1a61b90b49012e917eea4cb251 Mon Sep 17 00:00:00 2001 From: Woody Suwalski Date: Wed, 17 Jan 2018 09:07:47 +0100 Subject: drm/vmwgfx: Fix a boot time warning The 4.15 vmwgfx driver shows a warning during boot. It is caused by a mismatch between the result of vmw_enable_vblank() and what the drm_atomic_helper expects. Signed-off by: Woody Suwalski Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 641294aef165..fcd58145d0da 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1863,7 +1863,7 @@ u32 vmw_get_vblank_counter(struct drm_device *dev, unsigned int pipe) */ int vmw_enable_vblank(struct drm_device *dev, unsigned int pipe) { - return -ENOSYS; + return -EINVAL; } /** -- cgit v1.2.3 From 6cfb521ac0d5b97470883ff9b7facae264b7ab12 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 16 Jan 2018 12:52:28 -0800 Subject: module: Add retpoline tag to VERMAGIC Add a marker for retpoline to the module VERMAGIC. This catches the case when a non RETPOLINE compiled module gets loaded into a retpoline kernel, making it insecure. It doesn't handle the case when retpoline has been runtime disabled. Even in this case the match of the retcompile status will be enforced. This implies that even with retpoline run time disabled all modules loaded need to be recompiled. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Acked-by: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20180116205228.4890-1-andi@firstfloor.org --- include/linux/vermagic.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index bae807eb2933..853291714ae0 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -31,11 +31,17 @@ #else #define MODULE_RANDSTRUCT_PLUGIN #endif +#ifdef RETPOLINE +#define MODULE_VERMAGIC_RETPOLINE "retpoline " +#else +#define MODULE_VERMAGIC_RETPOLINE "" +#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ MODULE_ARCH_VERMAGIC \ - MODULE_RANDSTRUCT_PLUGIN + MODULE_RANDSTRUCT_PLUGIN \ + MODULE_VERMAGIC_RETPOLINE -- cgit v1.2.3 From d47924417319e3b6a728c0b690f183e75bc2a702 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 Jan 2018 19:59:59 +0100 Subject: x86/intel_rdt/cqm: Prevent use after free intel_rdt_iffline_cpu() -> domain_remove_cpu() frees memory first and then proceeds accessing it. BUG: KASAN: use-after-free in find_first_bit+0x1f/0x80 Read of size 8 at addr ffff883ff7c1e780 by task cpuhp/31/195 find_first_bit+0x1f/0x80 has_busy_rmid+0x47/0x70 intel_rdt_offline_cpu+0x4b4/0x510 Freed by task 195: kfree+0x94/0x1a0 intel_rdt_offline_cpu+0x17d/0x510 Do the teardown first and then free memory. Fixes: 24247aeeabe9 ("x86/intel_rdt/cqm: Improve limbo list processing") Reported-by: Joseph Salisbury Signed-off-by: Thomas Gleixner Cc: Ravi Shankar Cc: Peter Zilstra Cc: Stephane Eranian Cc: Vikas Shivappa Cc: Andi Kleen Cc: "Roderick W. Smith" Cc: 1733662@bugs.launchpad.net Cc: Fenghua Yu Cc: Tony Luck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801161957510.2366@nanos --- arch/x86/kernel/cpu/intel_rdt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 88dcf8479013..99442370de40 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); - kfree(d->ctrl_val); - kfree(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); list_del(&d->list); if (is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cancel_delayed_work(&d->cqm_limbo); } + kfree(d->ctrl_val); + kfree(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); kfree(d); return; } -- cgit v1.2.3 From 45d55e7bac4028af93f5fa324e69958a0b868e96 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 16 Jan 2018 12:20:18 +0100 Subject: x86/apic/vector: Fix off by one in error path Keith reported the following warning: WARNING: CPU: 28 PID: 1420 at kernel/irq/matrix.c:222 irq_matrix_remove_managed+0x10f/0x120 x86_vector_free_irqs+0xa1/0x180 x86_vector_alloc_irqs+0x1e4/0x3a0 msi_domain_alloc+0x62/0x130 The reason for this is that if the vector allocation fails the error handling code tries to free the failed vector as well, which causes the above imbalance warning to trigger. Adjust the error path to handle this correctly. Fixes: b5dc8e6c21e7 ("x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors") Reported-by: Keith Busch Signed-off-by: Thomas Gleixner Tested-by: Keith Busch Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801161217300.1823@nanos --- arch/x86/kernel/apic/vector.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f8b03bb8e725..3cc471beb50b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, err = assign_irq_vector_policy(irqd, info); trace_vector_setup(virq + i, false, err); - if (err) + if (err) { + irqd->chip_data = NULL; + free_apic_chip_data(apicd); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } -- cgit v1.2.3 From c2b691ee35004ba3d5428cf48672cbbf1a50fbfb Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 17 Jan 2018 15:22:55 +0800 Subject: ALSA: hda/realtek - Support headset mode for ALC215/ALC285/ALC289 This patch will enable headset mode for ALC215/ALC285/ALC289 platform. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 552646c049fa..2efba4bd9f2b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4086,8 +4086,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) case 0x10ec0668: alc_process_coef_fw(codec, coef0668); break; + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_process_coef_fw(codec, coef0225); break; @@ -4209,8 +4212,11 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, alc_process_coef_fw(codec, coef0688); snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50); break; + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_process_coef_fw(codec, alc225_pre_hsmode); alc_update_coef_idx(codec, 0x45, 0x3f<<10, 0x31<<10); @@ -4281,8 +4287,11 @@ static void alc_headset_mode_default(struct hda_codec *codec) }; switch (codec->core.vendor_id) { + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_process_coef_fw(codec, alc225_pre_hsmode); alc_process_coef_fw(codec, coef0225); @@ -4424,8 +4433,11 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) case 0x10ec0668: alc_process_coef_fw(codec, coef0688); break; + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: val = alc_read_coef_idx(codec, 0x45); if (val & (1 << 9)) @@ -4528,8 +4540,11 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) case 0x10ec0668: alc_process_coef_fw(codec, coef0688); break; + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_process_coef_fw(codec, coef0225); break; @@ -4658,8 +4673,11 @@ static void alc_determine_headset_type(struct hda_codec *codec) val = alc_read_coef_idx(codec, 0xbe); is_ctia = (val & 0x1c02) == 0x1c02; break; + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: snd_hda_codec_write(codec, 0x21, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); -- cgit v1.2.3 From 1b6832be1b61f63ef367e48050617e870a46b417 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 17 Jan 2018 15:32:03 +0800 Subject: ALSA: hda/realtek - update ALC215 depop optimize Add ALC215 its own depop functions for alc_init and alc_shutup. Assign it to ALC225 usage. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2efba4bd9f2b..0004e282a837 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7042,6 +7042,8 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0285: case 0x10ec0289: spec->codec_variant = ALC269_TYPE_ALC215; + spec->shutup = alc225_shutup; + spec->init_hook = alc225_init; spec->gen.mixer_nid = 0; break; case 0x10ec0225: -- cgit v1.2.3 From fb2fcaeaad504ae9dad26f7b26a8ea840d00535f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Jan 2018 10:17:08 +0000 Subject: ASoC: Intel: remove second duplicated assignment to pointer 'res' The second assignment to res is identical to the previous assignment so it is redundant and can be removed. Cleans up clang warning: sound/soc/intel/skylake/skl-topology.c:191:25: warning: Value stored to 'res' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-topology.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 28bc16a8e09a..73af6e19ebbd 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -190,7 +190,6 @@ skl_tplg_free_pipe_mcps(struct skl *skl, struct skl_module_cfg *mconfig) u8 res_idx = mconfig->res_idx; struct skl_module_res *res = &mconfig->module->resources[res_idx]; - res = &mconfig->module->resources[res_idx]; skl->resource.mcps -= res->cps; } -- cgit v1.2.3 From d11ed3ab3166a2bfad60681aebf3e13e1c3408a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Expand INIT_TASK() in init/init_task.c and remove It's no longer necessary to have an INIT_TASK() macro, and this can be expanded into the one place it is now used and removed. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- include/linux/init_task.h | 87 +++-------------------------------------------- init/init_task.c | 85 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 85 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 30a89b99a5af..9711611b831d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -218,91 +218,12 @@ extern struct cred init_cred; #define INIT_TASK_SECURITY #endif -/* - * INIT_TASK is used to set up the first task table, touch at - * your own risk!. Base=0, limit=0x1fffff (=2MB) - */ -#define INIT_TASK(tsk) \ -{ \ - INIT_TASK_TI(tsk) \ - .state = 0, \ - .stack = init_stack, \ - .usage = ATOMIC_INIT(2), \ - .flags = PF_KTHREAD, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ - .normal_prio = MAX_PRIO-20, \ - .policy = SCHED_NORMAL, \ - .cpus_allowed = CPU_MASK_ALL, \ - .nr_cpus_allowed= NR_CPUS, \ - .mm = NULL, \ - .active_mm = &init_mm, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .se = { \ - .group_node = LIST_HEAD_INIT(tsk.se.group_node), \ - }, \ - .rt = { \ - .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ - .time_slice = RR_TIMESLICE, \ - }, \ - .tasks = LIST_HEAD_INIT(tsk.tasks), \ - INIT_PUSHABLE_TASKS(tsk) \ - INIT_CGROUP_SCHED(tsk) \ - .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ - .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ - .real_parent = &tsk, \ - .parent = &tsk, \ - .children = LIST_HEAD_INIT(tsk.children), \ - .sibling = LIST_HEAD_INIT(tsk.sibling), \ - .group_leader = &tsk, \ - RCU_POINTER_INITIALIZER(real_cred, &init_cred), \ - RCU_POINTER_INITIALIZER(cred, &init_cred), \ - .comm = INIT_TASK_COMM, \ - .thread = INIT_THREAD, \ - .fs = &init_fs, \ - .files = &init_files, \ - .signal = &init_signals, \ - .sighand = &init_sighand, \ - .nsproxy = &init_nsproxy, \ - .pending = { \ - .list = LIST_HEAD_INIT(tsk.pending.list), \ - .signal = {{0}}}, \ - .blocked = {{0}}, \ - .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ - .journal_info = NULL, \ - INIT_CPU_TIMERS(tsk) \ - .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ - .timer_slack_ns = 50000, /* 50 usec default slack */ \ - .pids = { \ - [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ - [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ - [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ - }, \ - .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ - .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \ - INIT_IDS \ - INIT_PERF_EVENTS(tsk) \ - INIT_TRACE_IRQFLAGS \ - INIT_LOCKDEP \ - INIT_FTRACE_GRAPH \ - INIT_TRACE_RECURSION \ - INIT_TASK_RCU_PREEMPT(tsk) \ - INIT_TASK_RCU_TASKS(tsk) \ - INIT_CPUSET_SEQ(tsk) \ - INIT_RT_MUTEXES(tsk) \ - INIT_PREV_CPUTIME(tsk) \ - INIT_VTIME(tsk) \ - INIT_NUMA_BALANCING(tsk) \ - INIT_KASAN(tsk) \ - INIT_LIVEPATCH(tsk) \ - INIT_TASK_SECURITY \ -} - - /* Attach to the init_task data structure for proper alignment */ +#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK #define __init_task_data __attribute__((__section__(".data..init_task"))) +#else +#define __init_task_data /**/ +#endif /* Attach to the thread_info data structure for proper alignment */ #define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) diff --git a/init/init_task.c b/init/init_task.c index 2285aa42cbe1..7b2436f02dad 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -16,12 +16,93 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* Initial task structure */ + +/* + * Set up the first task table, touch at your own risk!. Base=0, + * limit=0x1fffff (=2MB) + */ struct task_struct init_task #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK __init_task_data #endif - = INIT_TASK(init_task); += { + INIT_TASK_TI(init_task) + .state = 0, + .stack = init_stack, + .usage = ATOMIC_INIT(2), + .flags = PF_KTHREAD, + .prio = MAX_PRIO-20, + .static_prio = MAX_PRIO-20, + .normal_prio = MAX_PRIO-20, + .policy = SCHED_NORMAL, + .cpus_allowed = CPU_MASK_ALL, + .nr_cpus_allowed= NR_CPUS, + .mm = NULL, + .active_mm = &init_mm, + .restart_block = { + .fn = do_no_restart_syscall, + }, + .se = { + .group_node = LIST_HEAD_INIT(init_task.se.group_node), + }, + .rt = { + .run_list = LIST_HEAD_INIT(init_task.rt.run_list), + .time_slice = RR_TIMESLICE, + }, + .tasks = LIST_HEAD_INIT(init_task.tasks), + INIT_PUSHABLE_TASKS(init_task) + INIT_CGROUP_SCHED(init_task) + .ptraced = LIST_HEAD_INIT(init_task.ptraced), + .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), + .real_parent = &init_task, + .parent = &init_task, + .children = LIST_HEAD_INIT(init_task.children), + .sibling = LIST_HEAD_INIT(init_task.sibling), + .group_leader = &init_task, + RCU_POINTER_INITIALIZER(real_cred, &init_cred), + RCU_POINTER_INITIALIZER(cred, &init_cred), + .comm = INIT_TASK_COMM, + .thread = INIT_THREAD, + .fs = &init_fs, + .files = &init_files, + .signal = &init_signals, + .sighand = &init_sighand, + .nsproxy = &init_nsproxy, + .pending = { + .list = LIST_HEAD_INIT(init_task.pending.list), + .signal = {{0}} + }, + .blocked = {{0}}, + .alloc_lock = __SPIN_LOCK_UNLOCKED(init_task.alloc_lock), + .journal_info = NULL, + INIT_CPU_TIMERS(init_task) + .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), + .timer_slack_ns = 50000, /* 50 usec default slack */ + .pids = { + [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), + [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), + [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), + }, + .thread_group = LIST_HEAD_INIT(init_task.thread_group), + .thread_node = LIST_HEAD_INIT(init_signals.thread_head), + INIT_IDS + INIT_PERF_EVENTS(init_task) + INIT_TRACE_IRQFLAGS + INIT_LOCKDEP + INIT_FTRACE_GRAPH + INIT_TRACE_RECURSION + INIT_TASK_RCU_PREEMPT(init_task) + INIT_TASK_RCU_TASKS(init_task) + INIT_CPUSET_SEQ(init_task) + INIT_RT_MUTEXES(init_task) + INIT_PREV_CPUTIME(init_task) + INIT_VTIME(init_task) + INIT_NUMA_BALANCING(init_task) + INIT_KASAN(init_task) + INIT_LIVEPATCH(init_task) + INIT_TASK_SECURITY +}; + EXPORT_SYMBOL(init_task); /* -- cgit v1.2.3 From 4e7e3adbba5224604b34b0d42003ff6dbdc8ddd9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Expand various INIT_* macros and remove Expand various INIT_* macros into the single places they're used in init/init_task.c and remove them. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- include/linux/ftrace.h | 12 ----- include/linux/init_task.h | 112 ---------------------------------------------- include/linux/irqflags.h | 2 - include/linux/lockdep.h | 3 -- init/init_task.c | 95 +++++++++++++++++++++++++++++---------- 5 files changed, 71 insertions(+), 153 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2bab81951ced..6311f35acbc4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -764,9 +764,6 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -/* for init task */ -#define INIT_FTRACE_GRAPH .ret_stack = NULL, - /* * Stack of return addresses for functions * of a thread. @@ -844,7 +841,6 @@ static inline void unpause_graph_tracing(void) #else /* !CONFIG_FUNCTION_GRAPH_TRACER */ #define __notrace_funcgraph -#define INIT_FTRACE_GRAPH static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } @@ -923,10 +919,6 @@ extern int tracepoint_printk; extern void disable_trace_on_warning(void); extern int __disable_trace_on_warning; -#ifdef CONFIG_PREEMPT -#define INIT_TRACE_RECURSION .trace_recursion = 0, -#endif - int tracepoint_printk_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -935,10 +927,6 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write, static inline void disable_trace_on_warning(void) { } #endif /* CONFIG_TRACING */ -#ifndef INIT_TRACE_RECURSION -#define INIT_TRACE_RECURSION -#endif - #ifdef CONFIG_FTRACE_SYSCALLS unsigned long arch_syscall_addr(int nr); diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9711611b831d..b1385e1dca63 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -21,23 +21,9 @@ #include -#ifdef CONFIG_SMP -# define INIT_PUSHABLE_TASKS(tsk) \ - .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), -#else -# define INIT_PUSHABLE_TASKS(tsk) -#endif - extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CPUSETS -#define INIT_CPUSET_SEQ(tsk) \ - .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), -#else -#define INIT_CPUSET_SEQ(tsk) -#endif - #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define INIT_PREV_CPUTIME(x) .prev_cputime = { \ .lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \ @@ -117,107 +103,10 @@ extern struct group_info init_groups; .pid = &init_struct_pid, \ } -#ifdef CONFIG_AUDITSYSCALL -#define INIT_IDS \ - .loginuid = INVALID_UID, \ - .sessionid = (unsigned int)-1, -#else -#define INIT_IDS -#endif - -#ifdef CONFIG_PREEMPT_RCU -#define INIT_TASK_RCU_PREEMPT(tsk) \ - .rcu_read_lock_nesting = 0, \ - .rcu_read_unlock_special.s = 0, \ - .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ - .rcu_blocked_node = NULL, -#else -#define INIT_TASK_RCU_PREEMPT(tsk) -#endif -#ifdef CONFIG_TASKS_RCU -#define INIT_TASK_RCU_TASKS(tsk) \ - .rcu_tasks_holdout = false, \ - .rcu_tasks_holdout_list = \ - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \ - .rcu_tasks_idle_cpu = -1, -#else -#define INIT_TASK_RCU_TASKS(tsk) -#endif - extern struct cred init_cred; -#ifdef CONFIG_CGROUP_SCHED -# define INIT_CGROUP_SCHED(tsk) \ - .sched_task_group = &root_task_group, -#else -# define INIT_CGROUP_SCHED(tsk) -#endif - -#ifdef CONFIG_PERF_EVENTS -# define INIT_PERF_EVENTS(tsk) \ - .perf_event_mutex = \ - __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ - .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), -#else -# define INIT_PERF_EVENTS(tsk) -#endif - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -# define INIT_VTIME(tsk) \ - .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ - .vtime.starttime = 0, \ - .vtime.state = VTIME_SYS, -#else -# define INIT_VTIME(tsk) -#endif - #define INIT_TASK_COMM "swapper" -#ifdef CONFIG_RT_MUTEXES -# define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = RB_ROOT_CACHED, \ - .pi_top_task = NULL, -#else -# define INIT_RT_MUTEXES(tsk) -#endif - -#ifdef CONFIG_NUMA_BALANCING -# define INIT_NUMA_BALANCING(tsk) \ - .numa_preferred_nid = -1, \ - .numa_group = NULL, \ - .numa_faults = NULL, -#else -# define INIT_NUMA_BALANCING(tsk) -#endif - -#ifdef CONFIG_KASAN -# define INIT_KASAN(tsk) \ - .kasan_depth = 1, -#else -# define INIT_KASAN(tsk) -#endif - -#ifdef CONFIG_LIVEPATCH -# define INIT_LIVEPATCH(tsk) \ - .patch_state = KLP_UNDEFINED, -#else -# define INIT_LIVEPATCH(tsk) -#endif - -#ifdef CONFIG_THREAD_INFO_IN_TASK -# define INIT_TASK_TI(tsk) \ - .thread_info = INIT_THREAD_INFO(tsk), \ - .stack_refcount = ATOMIC_INIT(1), -#else -# define INIT_TASK_TI(tsk) -#endif - -#ifdef CONFIG_SECURITY -#define INIT_TASK_SECURITY .security = NULL, -#else -#define INIT_TASK_SECURITY -#endif - /* Attach to the init_task data structure for proper alignment */ #ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK #define __init_task_data __attribute__((__section__(".data..init_task"))) @@ -228,5 +117,4 @@ extern struct cred init_cred; /* Attach to the thread_info data structure for proper alignment */ #define __init_thread_info __attribute__((__section__(".data..init_thread_info"))) - #endif diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 46cb57d5eb13..2ec81dc1487e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -44,7 +44,6 @@ do { \ current->softirq_context--; \ crossrelease_hist_end(XHLOCK_SOFT); \ } while (0) -# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) @@ -58,7 +57,6 @@ do { \ # define trace_hardirq_exit() do { } while (0) # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS #endif #if defined(CONFIG_IRQSOFF_TRACER) || \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2e75dc34bff5..26f2ccc60669 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -367,8 +367,6 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); -# define INIT_LOCKDEP .lockdep_recursion = 0, - #define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0) #define lockdep_assert_held(l) do { \ @@ -426,7 +424,6 @@ static inline void lockdep_on(void) * #ifdef the call himself. */ -# define INIT_LOCKDEP # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) # define lockdep_sys_exit() do { } while (0) diff --git a/init/init_task.c b/init/init_task.c index 7b2436f02dad..aa4030a939e5 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -16,7 +16,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); - /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) @@ -26,20 +25,23 @@ struct task_struct init_task __init_task_data #endif = { - INIT_TASK_TI(init_task) +#ifdef CONFIG_THREAD_INFO_IN_TASK + .thread_info = INIT_THREAD_INFO(init_task), + .stack_refcount = ATOMIC_INIT(1), +#endif .state = 0, .stack = init_stack, .usage = ATOMIC_INIT(2), .flags = PF_KTHREAD, - .prio = MAX_PRIO-20, - .static_prio = MAX_PRIO-20, - .normal_prio = MAX_PRIO-20, + .prio = MAX_PRIO - 20, + .static_prio = MAX_PRIO - 20, + .normal_prio = MAX_PRIO - 20, .policy = SCHED_NORMAL, .cpus_allowed = CPU_MASK_ALL, .nr_cpus_allowed= NR_CPUS, .mm = NULL, .active_mm = &init_mm, - .restart_block = { + .restart_block = { .fn = do_no_restart_syscall, }, .se = { @@ -50,8 +52,12 @@ struct task_struct init_task .time_slice = RR_TIMESLICE, }, .tasks = LIST_HEAD_INIT(init_task.tasks), - INIT_PUSHABLE_TASKS(init_task) - INIT_CGROUP_SCHED(init_task) +#ifdef CONFIG_SMP + .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), +#endif +#ifdef CONFIG_CGROUP_SCHED + .sched_task_group = &root_task_group, +#endif .ptraced = LIST_HEAD_INIT(init_task.ptraced), .ptrace_entry = LIST_HEAD_INIT(init_task.ptrace_entry), .real_parent = &init_task, @@ -85,24 +91,65 @@ struct task_struct init_task }, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), - INIT_IDS - INIT_PERF_EVENTS(init_task) - INIT_TRACE_IRQFLAGS - INIT_LOCKDEP - INIT_FTRACE_GRAPH - INIT_TRACE_RECURSION - INIT_TASK_RCU_PREEMPT(init_task) - INIT_TASK_RCU_TASKS(init_task) - INIT_CPUSET_SEQ(init_task) - INIT_RT_MUTEXES(init_task) +#ifdef CONFIG_AUDITSYSCALL + .loginuid = INVALID_UID, + .sessionid = (unsigned int)-1, +#endif +#ifdef CONFIG_PERF_EVENTS + .perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex), + .perf_event_list = LIST_HEAD_INIT(init_task.perf_event_list), +#endif +#ifdef CONFIG_PREEMPT_RCU + .rcu_read_lock_nesting = 0, + .rcu_read_unlock_special.s = 0, + .rcu_node_entry = LIST_HEAD_INIT(init_task.rcu_node_entry), + .rcu_blocked_node = NULL, +#endif +#ifdef CONFIG_TASKS_RCU + .rcu_tasks_holdout = false, + .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), + .rcu_tasks_idle_cpu = -1, +#endif +#ifdef CONFIG_CPUSETS + .mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq), +#endif +#ifdef CONFIG_RT_MUTEXES + .pi_waiters = RB_ROOT_CACHED, + .pi_top_task = NULL, +#endif INIT_PREV_CPUTIME(init_task) - INIT_VTIME(init_task) - INIT_NUMA_BALANCING(init_task) - INIT_KASAN(init_task) - INIT_LIVEPATCH(init_task) - INIT_TASK_SECURITY +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + .vtime.seqcount = SEQCNT_ZERO(init_task.vtime_seqcount), + .vtime.starttime = 0, + .vtime.state = VTIME_SYS, +#endif +#ifdef CONFIG_NUMA_BALANCING + .numa_preferred_nid = -1, + .numa_group = NULL, + .numa_faults = NULL, +#endif +#ifdef CONFIG_KASAN + .kasan_depth = 1, +#endif +#ifdef CONFIG_TRACE_IRQFLAGS + .softirqs_enabled = 1, +#endif +#ifdef CONFIG_LOCKDEP + .lockdep_recursion = 0, +#endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .ret_stack = NULL, +#endif +#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT) + .trace_recursion = 0, +#endif +#ifdef CONFIG_LIVEPATCH + .patch_state = KLP_UNDEFINED, +#endif +#ifdef CONFIG_SECURITY + .security = NULL, +#endif }; - EXPORT_SYMBOL(init_task); /* -- cgit v1.2.3 From 3678e2fcc293cf2ff1fe961838734a70c185de8a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Expand the INIT_SIGNALS and INIT_SIGHAND macros and remove There doesn't seem to be any need to have the INIT_SIGNALS and INIT_SIGHAND macros, so expand them in their single places of use and remove them. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- include/linux/init_task.h | 43 ++++--------------------------------------- init/init_task.c | 30 ++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b1385e1dca63..5b5f41328115 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -23,6 +23,9 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; +extern struct nsproxy init_nsproxy; +extern struct group_info init_groups; +extern struct cred init_cred; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define INIT_PREV_CPUTIME(x) .prev_cputime = { \ @@ -33,52 +36,16 @@ extern struct fs_struct init_fs; #endif #ifdef CONFIG_POSIX_TIMERS -#define INIT_POSIX_TIMERS(s) \ - .posix_timers = LIST_HEAD_INIT(s.posix_timers), #define INIT_CPU_TIMERS(s) \ .cpu_timers = { \ LIST_HEAD_INIT(s.cpu_timers[0]), \ LIST_HEAD_INIT(s.cpu_timers[1]), \ - LIST_HEAD_INIT(s.cpu_timers[2]), \ - }, -#define INIT_CPUTIMER(s) \ - .cputimer = { \ - .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = false, \ - .checking_timer = false, \ + LIST_HEAD_INIT(s.cpu_timers[2]), \ }, #else -#define INIT_POSIX_TIMERS(s) #define INIT_CPU_TIMERS(s) -#define INIT_CPUTIMER(s) #endif -#define INIT_SIGNALS(sig) { \ - .nr_threads = 1, \ - .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ - .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ - .shared_pending = { \ - .list = LIST_HEAD_INIT(sig.shared_pending.list), \ - .signal = {{0}}}, \ - INIT_POSIX_TIMERS(sig) \ - INIT_CPU_TIMERS(sig) \ - .rlim = INIT_RLIMITS, \ - INIT_CPUTIMER(sig) \ - INIT_PREV_CPUTIME(sig) \ - .cred_guard_mutex = \ - __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ -} - -extern struct nsproxy init_nsproxy; - -#define INIT_SIGHAND(sighand) { \ - .count = ATOMIC_INIT(1), \ - .action = { { { .sa_handler = SIG_DFL, } }, }, \ - .siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \ - .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \ -} - -extern struct group_info init_groups; #define INIT_STRUCT_PID { \ .count = ATOMIC_INIT(1), \ @@ -103,8 +70,6 @@ extern struct group_info init_groups; .pid = &init_struct_pid, \ } -extern struct cred init_cred; - #define INIT_TASK_COMM "swapper" /* Attach to the init_task data structure for proper alignment */ diff --git a/init/init_task.c b/init/init_task.c index aa4030a939e5..3ac6e754cf64 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -13,8 +13,34 @@ #include #include -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +static struct signal_struct init_signals = { + .nr_threads = 1, + .thread_head = LIST_HEAD_INIT(init_task.thread_node), + .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(init_signals.wait_chldexit), + .shared_pending = { + .list = LIST_HEAD_INIT(init_signals.shared_pending.list), + .signal = {{0}} + }, + .rlim = INIT_RLIMITS, + .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), +#ifdef CONFIG_POSIX_TIMERS + .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), + .cputimer = { + .cputime_atomic = INIT_CPUTIME_ATOMIC, + .running = false, + .checking_timer = false, + }, +#endif + INIT_CPU_TIMERS(init_signals) + INIT_PREV_CPUTIME(init_signals) +}; + +static struct sighand_struct init_sighand = { + .count = ATOMIC_INIT(1), + .action = { { { .sa_handler = SIG_DFL, } }, }, + .siglock = __SPIN_LOCK_UNLOCKED(init_sighand.siglock), + .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), +}; /* * Set up the first task table, touch at your own risk!. Base=0, -- cgit v1.2.3 From e1e871aff3ded26348c631b1370e257d401cd22d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 15:12:01 +0000 Subject: Expand INIT_STRUCT_PID and remove Expand INIT_STRUCT_PID in the single place that uses it and then remove it. There doesn't seem any point in the macro. Signed-off-by: David Howells Tested-by: Tony Luck Tested-by: Will Deacon (arm64) Tested-by: Palmer Dabbelt Acked-by: Thomas Gleixner --- include/linux/init_task.h | 15 --------------- kernel/pid.c | 14 +++++++++++++- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5b5f41328115..a454b8aeb938 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -46,21 +46,6 @@ extern struct cred init_cred; #define INIT_CPU_TIMERS(s) #endif - -#define INIT_STRUCT_PID { \ - .count = ATOMIC_INIT(1), \ - .tasks = { \ - { .first = NULL }, \ - { .first = NULL }, \ - { .first = NULL }, \ - }, \ - .level = 0, \ - .numbers = { { \ - .nr = 0, \ - .ns = &init_pid_ns, \ - }, } \ -} - #define INIT_PID_LINK(type) \ { \ .node = { \ diff --git a/kernel/pid.c b/kernel/pid.c index b13b624e2c49..161af2eda943 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -41,7 +41,19 @@ #include #include -struct pid init_struct_pid = INIT_STRUCT_PID; +struct pid init_struct_pid = { + .count = ATOMIC_INIT(1), + .tasks = { + { .first = NULL }, + { .first = NULL }, + { .first = NULL }, + }, + .level = 0, + .numbers = { { + .nr = 0, + .ns = &init_pid_ns, + }, } +}; int pid_max = PID_MAX_DEFAULT; -- cgit v1.2.3 From 5c3c6126b62d29f539a8712c65c58afc9c9d2c91 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Jan 2018 09:52:18 +0200 Subject: mmc: sdhci-pci: Stop calling sdhci_enable_irq_wakeups() sdhci_enable_irq_wakeups() is already called by sdhci_suspend_host() so sdhci-pci should not need to call it. However sdhci_suspend_host() only calls it if wakeups are enabled, and sdhci-pci does not enable them until after calling sdhci_suspend_host(). So move the calls to sdhci_pci_init_wakeup() before calling sdhci_suspend_host(), and stop calling sdhci_enable_irq_wakeups(). That results in some simplification because sdhci_pci_suspend_host() and __sdhci_pci_suspend_host() no longer need to be separate functions. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 58 ++++++++++++++------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 00fa7a36b336..b99a970645e7 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -38,10 +38,29 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host); #ifdef CONFIG_PM_SLEEP -static int __sdhci_pci_suspend_host(struct sdhci_pci_chip *chip) +static int sdhci_pci_init_wakeup(struct sdhci_pci_chip *chip) +{ + mmc_pm_flag_t pm_flags = 0; + int i; + + for (i = 0; i < chip->num_slots; i++) { + struct sdhci_pci_slot *slot = chip->slots[i]; + + if (slot) + pm_flags |= slot->host->mmc->pm_flags; + } + + return device_init_wakeup(&chip->pdev->dev, + (pm_flags & MMC_PM_KEEP_POWER) && + (pm_flags & MMC_PM_WAKE_SDIO_IRQ)); +} + +static int sdhci_pci_suspend_host(struct sdhci_pci_chip *chip) { int i, ret; + sdhci_pci_init_wakeup(chip); + for (i = 0; i < chip->num_slots; i++) { struct sdhci_pci_slot *slot = chip->slots[i]; struct sdhci_host *host; @@ -57,9 +76,6 @@ static int __sdhci_pci_suspend_host(struct sdhci_pci_chip *chip) ret = sdhci_suspend_host(host); if (ret) goto err_pci_suspend; - - if (host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ) - sdhci_enable_irq_wakeups(host); } return 0; @@ -70,36 +86,6 @@ err_pci_suspend: return ret; } -static int sdhci_pci_init_wakeup(struct sdhci_pci_chip *chip) -{ - mmc_pm_flag_t pm_flags = 0; - int i; - - for (i = 0; i < chip->num_slots; i++) { - struct sdhci_pci_slot *slot = chip->slots[i]; - - if (slot) - pm_flags |= slot->host->mmc->pm_flags; - } - - return device_init_wakeup(&chip->pdev->dev, - (pm_flags & MMC_PM_KEEP_POWER) && - (pm_flags & MMC_PM_WAKE_SDIO_IRQ)); -} - -static int sdhci_pci_suspend_host(struct sdhci_pci_chip *chip) -{ - int ret; - - ret = __sdhci_pci_suspend_host(chip); - if (ret) - return ret; - - sdhci_pci_init_wakeup(chip); - - return 0; -} - int sdhci_pci_resume_host(struct sdhci_pci_chip *chip) { struct sdhci_pci_slot *slot; @@ -1109,7 +1095,7 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip) { int i, ret; - ret = __sdhci_pci_suspend_host(chip); + ret = sdhci_pci_suspend_host(chip); if (ret) return ret; @@ -1119,8 +1105,6 @@ static int jmicron_suspend(struct sdhci_pci_chip *chip) jmicron_enable_mmc(chip->slots[i]->host, 0); } - sdhci_pci_init_wakeup(chip); - return 0; } -- cgit v1.2.3 From e92cc35d627f13c85a3662949ddec79345498e33 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Jan 2018 09:52:19 +0200 Subject: mmc: sdhci-pci: Use device wakeup capability to determine MMC_PM_WAKE_SDIO_IRQ capability PCI and ACPI determine if a device is wakeup capable, so use that to determine the MMC_PM_WAKE_SDIO_IRQ capability correctly. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index b99a970645e7..6d1a983e6227 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -50,9 +50,9 @@ static int sdhci_pci_init_wakeup(struct sdhci_pci_chip *chip) pm_flags |= slot->host->mmc->pm_flags; } - return device_init_wakeup(&chip->pdev->dev, - (pm_flags & MMC_PM_KEEP_POWER) && - (pm_flags & MMC_PM_WAKE_SDIO_IRQ)); + return device_set_wakeup_enable(&chip->pdev->dev, + (pm_flags & MMC_PM_KEEP_POWER) && + (pm_flags & MMC_PM_WAKE_SDIO_IRQ)); } static int sdhci_pci_suspend_host(struct sdhci_pci_chip *chip) @@ -1682,10 +1682,13 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( } } - host->mmc->pm_caps = MMC_PM_KEEP_POWER | MMC_PM_WAKE_SDIO_IRQ; + host->mmc->pm_caps = MMC_PM_KEEP_POWER; host->mmc->slotno = slotno; host->mmc->caps2 |= MMC_CAP2_NO_PRESCAN_POWERUP; + if (device_can_wakeup(&pdev->dev)) + host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ; + if (slot->cd_idx >= 0) { ret = mmc_gpiod_request_cd(host->mmc, NULL, slot->cd_idx, slot->cd_override_level, 0, NULL); -- cgit v1.2.3 From 551d6bde462932e8a024d89e7325f7c6e073500b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Jan 2018 09:52:20 +0200 Subject: mmc: sdhci: Stop exporting sdhci_enable_irq_wakeups() Now that it is not being used by any drivers, stop exporting it. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 3 +-- drivers/mmc/host/sdhci.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 80b1a59bc3c5..6ac4bdd7715d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2828,7 +2828,7 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) * sdhci_disable_irq_wakeups() since it will be set by * sdhci_enable_card_detection() or sdhci_init(). */ -void sdhci_enable_irq_wakeups(struct sdhci_host *host) +static void sdhci_enable_irq_wakeups(struct sdhci_host *host) { u8 val; u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE @@ -2846,7 +2846,6 @@ void sdhci_enable_irq_wakeups(struct sdhci_host *host) sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); sdhci_writel(host, irq_val, SDHCI_INT_ENABLE); } -EXPORT_SYMBOL_GPL(sdhci_enable_irq_wakeups); static void sdhci_disable_irq_wakeups(struct sdhci_host *host) { diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 54bc444c317f..7393b3a54772 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -718,7 +718,6 @@ void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable); #ifdef CONFIG_PM int sdhci_suspend_host(struct sdhci_host *host); int sdhci_resume_host(struct sdhci_host *host); -void sdhci_enable_irq_wakeups(struct sdhci_host *host); int sdhci_runtime_suspend_host(struct sdhci_host *host); int sdhci_runtime_resume_host(struct sdhci_host *host); #endif -- cgit v1.2.3 From 58e79b60751d1ea6d13e09c6095f5e4cd5e040ee Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Jan 2018 09:52:21 +0200 Subject: mmc: sdhci: Handle failure of enable_irq_wake() Now that sdhci_enable_irq_wakeups() is a local function, change it to return whether the IRQ wakeup was successfully enabled. This is in preparation for adding more conditions for whether IRQ wakeup is enabled. Note it is assumed, for SDHCI devices, that suspend is more important than wakeup, so we continue to suspend regardless. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 24 +++++++++++++++--------- drivers/mmc/host/sdhci.h | 1 + 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 6ac4bdd7715d..cc9776d9e8f4 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2828,7 +2828,7 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) * sdhci_disable_irq_wakeups() since it will be set by * sdhci_enable_card_detection() or sdhci_init(). */ -static void sdhci_enable_irq_wakeups(struct sdhci_host *host) +static bool sdhci_enable_irq_wakeups(struct sdhci_host *host) { u8 val; u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE @@ -2845,6 +2845,10 @@ static void sdhci_enable_irq_wakeups(struct sdhci_host *host) } sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); sdhci_writel(host, irq_val, SDHCI_INT_ENABLE); + + host->irq_wake_enabled = !enable_irq_wake(host->irq); + + return host->irq_wake_enabled; } static void sdhci_disable_irq_wakeups(struct sdhci_host *host) @@ -2856,6 +2860,10 @@ static void sdhci_disable_irq_wakeups(struct sdhci_host *host) val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL); val &= ~mask; sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); + + disable_irq_wake(host->irq); + + host->irq_wake_enabled = false; } int sdhci_suspend_host(struct sdhci_host *host) @@ -2864,15 +2872,14 @@ int sdhci_suspend_host(struct sdhci_host *host) mmc_retune_timer_stop(host->mmc); - if (!device_may_wakeup(mmc_dev(host->mmc))) { + if (!device_may_wakeup(mmc_dev(host->mmc)) || + !sdhci_enable_irq_wakeups(host)) { host->ier = 0; sdhci_writel(host, 0, SDHCI_INT_ENABLE); sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE); free_irq(host->irq, host); - } else { - sdhci_enable_irq_wakeups(host); - enable_irq_wake(host->irq); } + return 0; } @@ -2900,15 +2907,14 @@ int sdhci_resume_host(struct sdhci_host *host) mmiowb(); } - if (!device_may_wakeup(mmc_dev(host->mmc))) { + if (host->irq_wake_enabled) { + sdhci_disable_irq_wakeups(host); + } else { ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq, IRQF_SHARED, mmc_hostname(host->mmc), host); if (ret) return ret; - } else { - sdhci_disable_irq_wakeups(host); - disable_irq_wake(host->irq); } sdhci_enable_card_detection(host); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 7393b3a54772..afab26fd70e6 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -484,6 +484,7 @@ struct sdhci_host { bool bus_on; /* Bus power prevents runtime suspend */ bool preset_enabled; /* Preset is enabled */ bool pending_reset; /* Cmd/data reset is pending */ + bool irq_wake_enabled; /* IRQ wakeup is enabled */ struct mmc_request *mrqs_done[SDHCI_MAX_MRQS]; /* Requests done */ struct mmc_command *cmd; /* Current command */ -- cgit v1.2.3 From 81b14543ac81d529d6c07ce3be0cdfc9fe417389 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Jan 2018 09:52:22 +0200 Subject: mmc: sdhci: Rework sdhci_enable_irq_wakeups() In preparation for adding more conditions for whether IRQ wakeup is enabled, rework sdhci_enable_irq_wakeups() so that needed bits are added instead of adding them all and then removing the unneeded bits. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index cc9776d9e8f4..070aff9c108f 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2830,20 +2830,25 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id) */ static bool sdhci_enable_irq_wakeups(struct sdhci_host *host) { + u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE | + SDHCI_WAKE_ON_INT; + u32 irq_val = 0; + u8 wake_val = 0; u8 val; - u8 mask = SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE - | SDHCI_WAKE_ON_INT; - u32 irq_val = SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE | - SDHCI_INT_CARD_INT; - val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL); - val |= mask ; - /* Avoid fake wake up */ - if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) { - val &= ~(SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE); - irq_val &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE); + if (!(host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)) { + wake_val |= SDHCI_WAKE_ON_INSERT | SDHCI_WAKE_ON_REMOVE; + irq_val |= SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE; } + + wake_val |= SDHCI_WAKE_ON_INT; + irq_val |= SDHCI_INT_CARD_INT; + + val = sdhci_readb(host, SDHCI_WAKE_UP_CONTROL); + val &= ~mask; + val |= wake_val; sdhci_writeb(host, val, SDHCI_WAKE_UP_CONTROL); + sdhci_writel(host, irq_val, SDHCI_INT_ENABLE); host->irq_wake_enabled = !enable_irq_wake(host->irq); -- cgit v1.2.3 From 8ffdfe35b8a67e509dee5719807b7f88ed2cda7e Mon Sep 17 00:00:00 2001 From: Kyungsik Lee Date: Tue, 16 Jan 2018 10:19:43 +0900 Subject: PM / hibernate: Drop unused parameter of enough_swap Parameter flags is no longer used, remove it. Signed-off-by: Kyungsik Lee Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 293ead59eccc..a46be1261c09 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -879,7 +879,7 @@ out_clean: * space avaiable from the resume partition. */ -static int enough_swap(unsigned int nr_pages, unsigned int flags) +static int enough_swap(unsigned int nr_pages) { unsigned int free_swap = count_swap_pages(root_swap, 1); unsigned int required; @@ -915,7 +915,7 @@ int swsusp_write(unsigned int flags) return error; } if (flags & SF_NOCOMPRESS_MODE) { - if (!enough_swap(pages, flags)) { + if (!enough_swap(pages)) { pr_err("Not enough free swap\n"); error = -ENOSPC; goto out_finish; -- cgit v1.2.3 From 617fcb673090e495f58565ff0171d07abdad53a7 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 16 Jan 2018 09:01:27 +0100 Subject: PM / runtime: Allow no callbacks in pm_runtime_force_suspend|resume() The pm_runtime_force_suspend|resume() helpers currently requires the device to at some level (PM domain, bus, etc), have the ->runtime_suspend|resume() callbacks assigned for it, else -ENOSYS is returned as an error. However, there are no reason for this requirement, so let's simply remove it by allowing these callbacks to be NULL. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index cb5e48b86453..8bef3cb2424d 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1640,7 +1640,7 @@ static bool pm_runtime_need_not_resume(struct device *dev) int pm_runtime_force_suspend(struct device *dev) { int (*callback)(struct device *); - int ret = 0; + int ret; pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev)) @@ -1648,12 +1648,7 @@ int pm_runtime_force_suspend(struct device *dev) callback = RPM_GET_CALLBACK(dev, runtime_suspend); - if (!callback) { - ret = -ENOSYS; - goto err; - } - - ret = callback(dev); + ret = callback ? callback(dev) : 0; if (ret) goto err; @@ -1704,7 +1699,7 @@ int pm_runtime_force_resume(struct device *dev) callback = RPM_GET_CALLBACK(dev, runtime_resume); - ret = callback ? callback(dev) : -ENOSYS; + ret = callback ? callback(dev) : 0; if (ret) { pm_runtime_set_suspended(dev); goto out; -- cgit v1.2.3 From 1131b0a4af911de50b22239cabdf6dcd3f15df15 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 17 Jan 2018 10:38:28 +0100 Subject: dmaengine: rcar-dmac: Make DMAC reinit during system resume explicit The current (empty) system sleep callbacks rely on the PM core to force a runtime resume to reinitialize the DMAC registers during system resume. Without a reinitialization, e.g. SCIF DMA will hang silently after a system resume on R-Car Gen3. Make this explicit by using pm_runtime_force_{suspend,resume}() as the system sleep callbacks instead. Use SET_LATE_SYSTEM_SLEEP_PM_OPS() as DMA engines must be initialized before all DMA slave devices. Fixes: 17218e0092f8 "PM / genpd: Stop/start devices without pm_runtime_force_suspend/resume()" Suggested-by: Ulf Hansson Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulf Hansson Acked-by: Vinod Koul Signed-off-by: Rafael J. Wysocki --- drivers/dma/sh/rcar-dmac.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 2b2c7db3e480..35c3936edc45 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1615,22 +1615,6 @@ static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec, * Power management */ -#ifdef CONFIG_PM_SLEEP -static int rcar_dmac_sleep_suspend(struct device *dev) -{ - /* - * TODO: Wait for the current transfer to complete and stop the device. - */ - return 0; -} - -static int rcar_dmac_sleep_resume(struct device *dev) -{ - /* TODO: Resume transfers, if any. */ - return 0; -} -#endif - #ifdef CONFIG_PM static int rcar_dmac_runtime_suspend(struct device *dev) { @@ -1646,7 +1630,13 @@ static int rcar_dmac_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops rcar_dmac_pm = { - SET_SYSTEM_SLEEP_PM_OPS(rcar_dmac_sleep_suspend, rcar_dmac_sleep_resume) + /* + * TODO for system sleep/resume: + * - Wait for the current transfer to complete and stop the device, + * - Resume transfers, if any. + */ + SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume, NULL) }; -- cgit v1.2.3 From 90fd94e4aba4ddfb6764c051a6bf9a3f4f26fb50 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 16 Jan 2018 13:51:04 -0500 Subject: ACPI/PCI: pci_link: reduce verbosity when IRQ is enabled When ACPI Link object is enabled, the message is printed with a warning prefix. Some test tools are capturing warning and test error types as errors. Let's reduce the verbosity of success case. Signed-off-by: Sinan Kaya Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index bc3d914dfc3e..85ad679390e3 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -612,7 +612,7 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) acpi_isa_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING; - printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n", + pr_info("%s [%s] enabled at IRQ %d\n", acpi_device_name(link->device), acpi_device_bid(link->device), link->irq.active); } -- cgit v1.2.3 From 01857cf7748aef41c20987526d4c12f12b2f04ff Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 17 Jan 2018 10:30:34 +0000 Subject: powercap: intel_rapl: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Rafael J. Wysocki --- drivers/powercap/intel_rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 0188cff98cdd..35636e1d8a3d 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -1211,7 +1211,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp) struct rapl_domain *rd; char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/ struct powercap_zone *power_zone = NULL; - int nr_pl, ret;; + int nr_pl, ret; /* Update the domain data of the new package */ rapl_update_domain_data(rp); -- cgit v1.2.3 From dff4113d5e3753c23e8b5bb6818f5829ccd0d06a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 10 Jan 2018 16:44:14 +0000 Subject: drivers: psci: remove cluster terminology and dependency on physical_package_id Since the definition of the term "cluster" is not well defined in the architecture, we should avoid using it. Also the physical package id is currently mapped to so called "clusters" in ARM/ARM64 platforms which is already argumentative. Currently PSCI checker uses the physical package id assuming that CPU power domains map to "clusters" and the physical package id in the code as it stands also maps to cluster boundaries. It does that trying to test "cluster" idle states to its best. However the CPU power domain often but not always maps directly to the processor topology. This patch removes the dependency on physical_package_id from the topology in this PSCI checker. Also it replaces all the occurences of clusters to cpu_groups which is derived from core_sibling_mask and may not directly map to physical "cluster". Acked-by: Lorenzo Pieralisi Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/firmware/psci_checker.c | 46 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci_checker.c index f3f4f810e5df..bb1c068bff19 100644 --- a/drivers/firmware/psci_checker.c +++ b/drivers/firmware/psci_checker.c @@ -77,8 +77,8 @@ static int psci_ops_check(void) return 0; } -static int find_clusters(const struct cpumask *cpus, - const struct cpumask **clusters) +static int find_cpu_groups(const struct cpumask *cpus, + const struct cpumask **cpu_groups) { unsigned int nb = 0; cpumask_var_t tmp; @@ -88,11 +88,11 @@ static int find_clusters(const struct cpumask *cpus, cpumask_copy(tmp, cpus); while (!cpumask_empty(tmp)) { - const struct cpumask *cluster = + const struct cpumask *cpu_group = topology_core_cpumask(cpumask_any(tmp)); - clusters[nb++] = cluster; - cpumask_andnot(tmp, tmp, cluster); + cpu_groups[nb++] = cpu_group; + cpumask_andnot(tmp, tmp, cpu_group); } free_cpumask_var(tmp); @@ -170,24 +170,24 @@ static int hotplug_tests(void) { int err; cpumask_var_t offlined_cpus; - int i, nb_cluster; - const struct cpumask **clusters; + int i, nb_cpu_group; + const struct cpumask **cpu_groups; char *page_buf; err = -ENOMEM; if (!alloc_cpumask_var(&offlined_cpus, GFP_KERNEL)) return err; - /* We may have up to nb_available_cpus clusters. */ - clusters = kmalloc_array(nb_available_cpus, sizeof(*clusters), - GFP_KERNEL); - if (!clusters) + /* We may have up to nb_available_cpus cpu_groups. */ + cpu_groups = kmalloc_array(nb_available_cpus, sizeof(*cpu_groups), + GFP_KERNEL); + if (!cpu_groups) goto out_free_cpus; page_buf = (char *)__get_free_page(GFP_KERNEL); if (!page_buf) - goto out_free_clusters; + goto out_free_cpu_groups; err = 0; - nb_cluster = find_clusters(cpu_online_mask, clusters); + nb_cpu_group = find_cpu_groups(cpu_online_mask, cpu_groups); /* * Of course the last CPU cannot be powered down and cpu_down() should @@ -197,24 +197,22 @@ static int hotplug_tests(void) err += down_and_up_cpus(cpu_online_mask, offlined_cpus); /* - * Take down CPUs by cluster this time. When the last CPU is turned - * off, the cluster itself should shut down. + * Take down CPUs by cpu group this time. When the last CPU is turned + * off, the cpu group itself should shut down. */ - for (i = 0; i < nb_cluster; ++i) { - int cluster_id = - topology_physical_package_id(cpumask_any(clusters[i])); + for (i = 0; i < nb_cpu_group; ++i) { ssize_t len = cpumap_print_to_pagebuf(true, page_buf, - clusters[i]); + cpu_groups[i]); /* Remove trailing newline. */ page_buf[len - 1] = '\0'; - pr_info("Trying to turn off and on again cluster %d " - "(CPUs %s)\n", cluster_id, page_buf); - err += down_and_up_cpus(clusters[i], offlined_cpus); + pr_info("Trying to turn off and on again group %d (CPUs %s)\n", + i, page_buf); + err += down_and_up_cpus(cpu_groups[i], offlined_cpus); } free_page((unsigned long)page_buf); -out_free_clusters: - kfree(clusters); +out_free_cpu_groups: + kfree(cpu_groups); out_free_cpus: free_cpumask_var(offlined_cpus); return err; -- cgit v1.2.3 From 343a8d17fa8d6dd97f408e8fedbcef12073f3774 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 10 Jan 2018 16:44:15 +0000 Subject: cpufreq: scpi: remove arm_big_little dependency The dependency on physical_package_id from the topology to get the cluster identifier is wrong. The concept of cluster used in ARM topology is unfortunately not well defined in the architecture, we should avoid using it. Further the frequency domain need not be mapped to so called "clusters" one to one. SCPI already provides means to obtain the frequency domain id from the device tree. In order to support some new topologies(e.g. DSU which contains 2 frequency domains within the physical cluster), pseudo clusters are created to make this driver work which is wrong again. In order to solve those issues and also remove dependency of topological physical id for frequency domain, this patch removes the arm_big_little dependency from scpi driver. Acked-by: Viresh Kumar Signed-off-by: Sudeep Holla Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scpi-cpufreq.c | 193 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 178 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 05d299052c5c..247fcbfa4cb5 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -18,27 +18,89 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include +#include +#include +#include #include -#include +#include #include #include +#include #include -#include "arm_big_little.h" +struct scpi_data { + struct clk *clk; + struct device *cpu_dev; + struct thermal_cooling_device *cdev; +}; static struct scpi_ops *scpi_ops; -static int scpi_get_transition_latency(struct device *cpu_dev) +static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); + struct scpi_data *priv = policy->driver_data; + unsigned long rate = clk_get_rate(priv->clk); + + return rate / 1000; +} + +static int +scpi_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int index) +{ + struct scpi_data *priv = policy->driver_data; + u64 rate = policy->freq_table[index].frequency * 1000; + int ret; + + ret = clk_set_rate(priv->clk, rate); + if (!ret && (clk_get_rate(priv->clk) != rate)) + ret = -EIO; + + return ret; +} + +static int +scpi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) { - return scpi_ops->get_transition_latency(cpu_dev); + int cpu, domain, tdomain; + struct device *tcpu_dev; + + domain = scpi_ops->device_domain_id(cpu_dev); + if (domain < 0) + return domain; + + for_each_possible_cpu(cpu) { + if (cpu == cpu_dev->id) + continue; + + tcpu_dev = get_cpu_device(cpu); + if (!tcpu_dev) + continue; + + tdomain = scpi_ops->device_domain_id(tcpu_dev); + if (tdomain == domain) + cpumask_set_cpu(cpu, cpumask); + } + + return 0; } -static int scpi_init_opp_table(const struct cpumask *cpumask) +static int scpi_cpufreq_init(struct cpufreq_policy *policy) { int ret; - struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask)); + unsigned int latency; + struct device *cpu_dev; + struct scpi_data *priv; + struct cpufreq_frequency_table *freq_table; + + cpu_dev = get_cpu_device(policy->cpu); + if (!cpu_dev) { + pr_err("failed to get cpu%d device\n", policy->cpu); + return -ENODEV; + } ret = scpi_ops->add_opps_to_device(cpu_dev); if (ret) { @@ -46,32 +108,133 @@ static int scpi_init_opp_table(const struct cpumask *cpumask) return ret; } - ret = dev_pm_opp_set_sharing_cpus(cpu_dev, cpumask); - if (ret) + ret = scpi_get_sharing_cpus(cpu_dev, policy->cpus); + if (ret) { + dev_warn(cpu_dev, "failed to get sharing cpumask\n"); + return ret; + } + + ret = dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus); + if (ret) { dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n", __func__, ret); + return ret; + } + + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret <= 0) { + dev_dbg(cpu_dev, "OPP table is not ready, deferring probe\n"); + ret = -EPROBE_DEFER; + goto out_free_opp; + } + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + ret = -ENOMEM; + goto out_free_opp; + } + + ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table); + if (ret) { + dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret); + goto out_free_priv; + } + + priv->cpu_dev = cpu_dev; + priv->clk = clk_get(cpu_dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(cpu_dev, "%s: Failed to get clk for cpu: %d\n", + __func__, cpu_dev->id); + goto out_free_cpufreq_table; + } + + policy->driver_data = priv; + + ret = cpufreq_table_validate_and_show(policy, freq_table); + if (ret) { + dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__, + ret); + goto out_put_clk; + } + + /* scpi allows DVFS request for any domain from any CPU */ + policy->dvfs_possible_from_any_cpu = true; + + latency = scpi_ops->get_transition_latency(cpu_dev); + if (!latency) + latency = CPUFREQ_ETERNAL; + + policy->cpuinfo.transition_latency = latency; + + policy->fast_switch_possible = false; + return 0; + +out_put_clk: + clk_put(priv->clk); +out_free_cpufreq_table: + dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table); +out_free_priv: + kfree(priv); +out_free_opp: + dev_pm_opp_cpumask_remove_table(policy->cpus); + return ret; } -static const struct cpufreq_arm_bL_ops scpi_cpufreq_ops = { - .name = "scpi", - .get_transition_latency = scpi_get_transition_latency, - .init_opp_table = scpi_init_opp_table, - .free_opp_table = dev_pm_opp_cpumask_remove_table, +static int scpi_cpufreq_exit(struct cpufreq_policy *policy) +{ + struct scpi_data *priv = policy->driver_data; + + cpufreq_cooling_unregister(priv->cdev); + clk_put(priv->clk); + dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); + kfree(priv); + dev_pm_opp_cpumask_remove_table(policy->related_cpus); + + return 0; +} + +static void scpi_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct scpi_data *priv = policy->driver_data; + struct thermal_cooling_device *cdev; + + cdev = of_cpufreq_cooling_register(policy); + if (!IS_ERR(cdev)) + priv->cdev = cdev; +} + +static struct cpufreq_driver scpi_cpufreq_driver = { + .name = "scpi-cpufreq", + .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .verify = cpufreq_generic_frequency_table_verify, + .attr = cpufreq_generic_attr, + .get = scpi_cpufreq_get_rate, + .init = scpi_cpufreq_init, + .exit = scpi_cpufreq_exit, + .ready = scpi_cpufreq_ready, + .target_index = scpi_cpufreq_set_target, }; static int scpi_cpufreq_probe(struct platform_device *pdev) { + int ret; + scpi_ops = get_scpi_ops(); if (!scpi_ops) return -EIO; - return bL_cpufreq_register(&scpi_cpufreq_ops); + ret = cpufreq_register_driver(&scpi_cpufreq_driver); + if (ret) + dev_err(&pdev->dev, "%s: registering cpufreq failed, err: %d\n", + __func__, ret); + return ret; } static int scpi_cpufreq_remove(struct platform_device *pdev) { - bL_cpufreq_unregister(&scpi_cpufreq_ops); + cpufreq_unregister_driver(&scpi_cpufreq_driver); scpi_ops = NULL; return 0; } -- cgit v1.2.3 From fd6e440f20b1a4304553775fc55938848ff617c9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 21:20:05 +1100 Subject: powerpc/64s: Wire up cpu_show_meltdown() The recent commit 87590ce6e373 ("sysfs/cpu: Add vulnerability folder") added a generic folder and set of files for reporting information on CPU vulnerabilities. One of those was for meltdown: /sys/devices/system/cpu/vulnerabilities/meltdown This commit wires up that file for 64-bit Book3S powerpc. For now we default to "Vulnerable" unless the RFI flush is enabled. That may not actually be true on all hardware, further patches will refine the reporting based on the CPU/platform etc. But for now we default to being pessimists. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/setup_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..2ed525a44734 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 491be4179ddd..624d2a62d05d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -901,4 +901,12 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3 From 236003e6b5443c45c18e613d2b0d776a9f87540e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jan 2018 22:17:18 +1100 Subject: powerpc/64s: Allow control of RFI flush via debugfs Expose the state of the RFI flush (enabled/disabled) via debugfs, and allow it to be enabled/disabled at runtime. eg: $ cat /sys/kernel/debug/powerpc/rfi_flush 1 $ echo 0 > /sys/kernel/debug/powerpc/rfi_flush $ cat /sys/kernel/debug/powerpc/rfi_flush 0 Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/setup_64.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 624d2a62d05d..e67413f4a8f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -902,6 +903,35 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) rfi_flush_enable(enable); } +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { if (rfi_flush) -- cgit v1.2.3 From 1b689a95ce7427075f9ac9fb4aea1af530742b7f Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 15 Jan 2018 14:30:03 +0100 Subject: powerpc/pseries: include linux/types.h in asm/hvcall.h Commit 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") uses u64 in asm/hvcall.h without including linux/types.h This breaks hvcall.h users that do not include the header themselves. Fixes: 6e032b350cd1 ("powerpc/powernv: Check device-tree for RFI flush settings") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index f0461618bf7b..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -353,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments -- cgit v1.2.3 From 952a99ccfa9db2f9a32810fc9c0084f532dd871a Mon Sep 17 00:00:00 2001 From: Michael Sartain Date: Thu, 11 Jan 2018 19:47:42 -0500 Subject: tools lib traceevent: Fix bad force_token escape sequence Older kernels have a bug that creates invalid symbols. event-parse.c handles them by replacing them with a "%s" token. But the fix included an extra backslash, and "\%s" was added incorrectly. Signed-off-by: Michael Sartain Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004821.827168881@goodmis.org Link: http://lkml.kernel.org/r/d320000d37c10ce0912851e1fb78d1e0c946bcd9.1497486273.git.mikesart@fastmail.com Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 7ce724fc0544..0bc1a6df8a27 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1094,7 +1094,7 @@ static enum event_type __read_token(char **tok) if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { free(*tok); *tok = NULL; - return force_token("\"\%s\" ", tok); + return force_token("\"%s\" ", tok); } else if (strcmp(*tok, "STA_PR_FMT") == 0) { free(*tok); *tok = NULL; -- cgit v1.2.3 From 3df76c9a8167ffff1588516fc74b980cde664efe Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:43 -0500 Subject: tools lib traceevent: Show value of flags that have not been parsed If the value contains bits that are not defined by print_flags() helper, then show the remaining bits. This aligns with the functionality of the kernel. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/e60c889f-55e7-4ee8-0e50-151e435ffd8c@siemens.com Link: http://lkml.kernel.org/r/20180112004821.976225232@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0bc1a6df8a27..96c9c0b33423 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3970,6 +3970,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, val &= ~fval; } } + if (val) { + if (print && arg->flags.delim) + trace_seq_puts(s, arg->flags.delim); + trace_seq_printf(s, "0x%llx", val); + } break; case PRINT_SYMBOL: val = eval_num_arg(data, size, event, arg->symbol.field); -- cgit v1.2.3 From d63444739bee6acfa9a834515da17f9cec544505 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 11 Jan 2018 19:47:44 -0500 Subject: tools lib traceevent: Print value of unknown symbolic fields Aligns trace-cmd with the behavior of the kernel. Signed-off-by: Jan Kiszka Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/e60c889f-55e7-4ee8-0e50-151e435ffd8c@siemens.com Link: http://lkml.kernel.org/r/20180112004822.118332436@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 96c9c0b33423..87757eabbb08 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3985,6 +3985,8 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, break; } } + if (!flag) + trace_seq_printf(s, "0x%llx", val); break; case PRINT_HEX: case PRINT_HEX_STR: -- cgit v1.2.3 From 38d70b7ca1769f26c0b79f3c08ff2cc949712b59 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:45 -0500 Subject: tools lib traceevent: Simplify pointer print logic and fix %pF When processing %pX in pretty_print(), simplify the logic slightly by incrementing the ptr to the format string if isalnum(ptr[1]) is true. This follows the logic a bit more closely to what is in the kernel. Also, this fixes a small bug where %pF was not giving the offset of the function. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.260262257@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 87757eabbb08..8757dd64e42c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4956,21 +4956,22 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event else ls = 2; - if (*(ptr+1) == 'F' || *(ptr+1) == 'f' || - *(ptr+1) == 'S' || *(ptr+1) == 's') { + if (isalnum(ptr[1])) ptr++; + + if (*ptr == 'F' || *ptr == 'f' || + *ptr == 'S' || *ptr == 's') { show_func = *ptr; - } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { - print_mac_arg(s, *(ptr+1), data, size, event, arg); - ptr++; + } else if (*ptr == 'M' || *ptr == 'm') { + print_mac_arg(s, *ptr, data, size, event, arg); arg = arg->next; break; - } else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { + } else if (*ptr == 'I' || *ptr == 'i') { int n; - n = print_ip_arg(s, ptr+1, data, size, event, arg); + n = print_ip_arg(s, ptr, data, size, event, arg); if (n > 0) { - ptr += n; + ptr += n - 1; arg = arg->next; break; } -- cgit v1.2.3 From 37db96bb49629681cb839d7304a70524fe10f969 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:46 -0500 Subject: tools lib traceevent: Handle new pointer processing of bprint strings The Linux kernel printf() has some extended use cases that dereference the pointer. This is dangerouse for tracing because the pointer that is dereferenced can change or even be unmapped. It also causes issues when the trace data is extracted, because user space does not have access to the contents of the pointer even if it still exists. To handle this, the kernel was updated to process these dereferenced pointers at the time they are recorded, and not post processed. Now they exist in the tracing buffer, and no dereference is needed at the time of reading the trace. The event parsing library needs to handle this new case. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.403349289@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 8757dd64e42c..344a034a8fbc 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4300,6 +4300,26 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case 'p': ls = 1; + if (isalnum(ptr[1])) { + ptr++; + /* Check for special pointers */ + switch (*ptr) { + case 's': + case 'S': + case 'f': + case 'F': + break; + default: + /* + * Older kernels do not process + * dereferenced pointers. + * Only process if the pointer + * value is a printable. + */ + if (isprint(*(char *)bptr)) + goto process_string; + } + } /* fall through */ case 'd': case 'u': @@ -4352,6 +4372,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc break; case 's': + process_string: arg = alloc_arg(); if (!arg) { do_warning_event(event, "%s(%d): not enough memory!", @@ -4959,6 +4980,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event if (isalnum(ptr[1])) ptr++; + if (arg->type == PRINT_BSTRING) { + trace_seq_puts(s, arg->string.string); + break; + } + if (*ptr == 'F' || *ptr == 'f' || *ptr == 'S' || *ptr == 's') { show_func = *ptr; -- cgit v1.2.3 From e877372880f72399323e433187cce2bfbea40263 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:47 -0500 Subject: tools lib traceevent: Show contents (in hex) of data of unrecognized type records When a record has an unrecognized type, an error message is reported, but it would also be helpful to see the contents of that record. At least show what it is in hex, instead of just showing a blank line. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.542204577@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 344a034a8fbc..e5f2acbb70cc 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5566,8 +5566,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, event = pevent_find_event_by_record(pevent, record); if (!event) { - do_warning("ug! no event found for type %d", - trace_parse_common_type(pevent, record->data)); + int i; + int type = trace_parse_common_type(pevent, record->data); + + do_warning("ug! no event found for type %d", type); + trace_seq_printf(s, "[UNKNOWN TYPE %d]", type); + for (i = 0; i < record->size; i++) + trace_seq_printf(s, " %02x", + ((unsigned char *)record->data)[i]); return; } -- cgit v1.2.3 From 67dfc376f3dfdc39b9125f32d5b24053a4da264f Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Thu, 11 Jan 2018 19:47:48 -0500 Subject: tools lib traceevent: Use asprintf when possible It makes the code clearer and less error prone. clearer: - less code - the code is now using the same format to create strings dynamically less error prone: - no magic number +2 +9 +5 to compute the size - no copy&paste of the strings to compute the size and to concatenate The function `asprintf` is not POSIX standard but the program was already using it. Later it can be decided to use only POSIX functions, then we can easly replace all the `asprintf(3)` with a local implementation of that function. Signed-off-by: Federico Vaga Acked-by: Namhyung Kim Cc: Andrew Morton Cc: Federico Vaga Link: http://lkml.kernel.org/r/20170802221558.9684-2-federico.vaga@vaga.pv.it Link: http://lkml.kernel.org/r/20180112004822.686281649@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-plugin.c | 24 +++++++++--------------- tools/lib/traceevent/parse-filter.c | 11 ++++------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index a16756ae3526..d542cb60ca1a 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -120,12 +120,12 @@ char **traceevent_plugin_list_options(void) for (op = reg->options; op->name; op++) { char *alias = op->plugin_alias ? op->plugin_alias : op->file; char **temp = list; + int ret; - name = malloc(strlen(op->name) + strlen(alias) + 2); - if (!name) + ret = asprintf(&name, "%s:%s", alias, op->name); + if (ret < 0) goto err; - sprintf(name, "%s:%s", alias, op->name); list = realloc(list, count + 2); if (!list) { list = temp; @@ -290,17 +290,14 @@ load_plugin(struct pevent *pevent, const char *path, const char *alias; char *plugin; void *handle; + int ret; - plugin = malloc(strlen(path) + strlen(file) + 2); - if (!plugin) { + ret = asprintf(&plugin, "%s/%s", path, file); + if (ret < 0) { warning("could not allocate plugin memory\n"); return; } - strcpy(plugin, path); - strcat(plugin, "/"); - strcat(plugin, file); - handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); if (!handle) { warning("could not load plugin '%s'\n%s\n", @@ -391,6 +388,7 @@ load_plugins(struct pevent *pevent, const char *suffix, char *home; char *path; char *envdir; + int ret; if (pevent->flags & PEVENT_DISABLE_PLUGINS) return; @@ -421,16 +419,12 @@ load_plugins(struct pevent *pevent, const char *suffix, if (!home) return; - path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2); - if (!path) { + ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); + if (ret < 0) { warning("could not allocate plugin memory\n"); return; } - strcpy(path, home); - strcat(path, "/"); - strcat(path, LOCAL_PLUGIN_DIR); - load_plugins_dir(pevent, suffix, path, load_plugin, data); free(path); diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 315df0a70265..2410afdcbcfe 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -287,12 +287,10 @@ find_event(struct pevent *pevent, struct event_list **events, sys_name = NULL; } - reg = malloc(strlen(event_name) + 3); - if (reg == NULL) + ret = asprintf(®, "^%s$", event_name); + if (ret < 0) return PEVENT_ERRNO__MEM_ALLOC_FAILED; - sprintf(reg, "^%s$", event_name); - ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); free(reg); @@ -300,13 +298,12 @@ find_event(struct pevent *pevent, struct event_list **events, return PEVENT_ERRNO__INVALID_EVENT_NAME; if (sys_name) { - reg = malloc(strlen(sys_name) + 3); - if (reg == NULL) { + ret = asprintf(®, "^%s$", sys_name); + if (ret < 0) { regfree(&ereg); return PEVENT_ERRNO__MEM_ALLOC_FAILED; } - sprintf(reg, "^%s$", sys_name); ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); free(reg); if (ret) { -- cgit v1.2.3 From 6d36ce261614fbac3557cc58ba6a33424944c8a2 Mon Sep 17 00:00:00 2001 From: Michael Sartain Date: Thu, 11 Jan 2018 19:47:49 -0500 Subject: tools lib traceevent: Add UL suffix to MISSING_EVENTS Add UL suffix to MISSING_EVENTS since ints shouldn't be left shifted by 31. Signed-off-by: Michael Sartain Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20171016165542.13038-4-mikesart@fastmail.com Link: http://lkml.kernel.org/r/20180112004822.829533885@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/kbuffer-parse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index c94e3641b046..ca424b157e46 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -24,8 +24,8 @@ #include "kbuffer.h" -#define MISSING_EVENTS (1 << 31) -#define MISSING_STORED (1 << 30) +#define MISSING_EVENTS (1UL << 31) +#define MISSING_STORED (1UL << 30) #define COMMIT_MASK ((1 << 27) - 1) -- cgit v1.2.3 From 806efaed3cacab1521895d20bb3b5ed610909299 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 11 Jan 2018 19:47:50 -0500 Subject: tools lib traceevent: Fix missing break in FALSE case of pevent_filter_clear_trivial() Currently the FILTER_TRIVIAL_FALSE case has a missing break statement, if the trivial type is FALSE, it will also run into the TRUE case, and always be skipped as the TRUE statement will continue the loop on the inverse condition of the FALSE statement. Reported-by: Namhyung Kim Acked-by: Namhyung Kim Signed-off-by: Taeung Song Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004823.012918807@goodmis.org Link: http://lkml.kernel.org/r/1493218540-12296-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 2410afdcbcfe..2b9048f90bae 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1631,6 +1631,7 @@ int pevent_filter_clear_trivial(struct event_filter *filter, case FILTER_TRIVIAL_FALSE: if (filter_type->filter->boolean.value) continue; + break; case FILTER_TRIVIAL_TRUE: if (!filter_type->filter->boolean.value) continue; -- cgit v1.2.3 From d777f8de99b05d399c0e4e51cdce016f26bd971b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:51 -0500 Subject: tools lib traceevent: Fix get_field_str() for dynamic strings If a field is a dynamic string, get_field_str() returned just the offset/size value and not the string. Have it parse the offset/size correctly to return the actual string. Otherwise filtering fails when trying to filter fields that are dynamic strings. Reported-by: Gopanapalli Pradeep Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004823.146333275@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 2b9048f90bae..431e8b309f6e 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1877,17 +1877,25 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r struct pevent *pevent; unsigned long long addr; const char *val = NULL; + unsigned int size; char hex[64]; /* If the field is not a string convert it */ if (arg->str.field->flags & FIELD_IS_STRING) { val = record->data + arg->str.field->offset; + size = arg->str.field->size; + + if (arg->str.field->flags & FIELD_IS_DYNAMIC) { + addr = *(unsigned int *)val; + val = record->data + (addr & 0xffff); + size = addr >> 16; + } /* * We need to copy the data since we can't be sure the field * is null terminated. */ - if (*(val + arg->str.field->size - 1)) { + if (*(val + size - 1)) { /* copy it */ memcpy(arg->str.buffer, val, arg->str.field->size); /* the buffer is already NULL terminated */ -- cgit v1.2.3 From ffd3d18c20b8df281a18940ee80a99b28114d4b7 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Sun, 14 Jan 2018 13:28:50 -0600 Subject: perf tools: Add ARM Statistical Profiling Extensions (SPE) support 'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005c: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips Reviewed-by: Dongjiu Geng Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier Cc: Mark Rutland Cc: Mathieu Poirier Cc: Pawel Moll Cc: Peter Zijlstra Cc: Rob Herring Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Wang Nan Cc: Will Deacon Link: http://lkml.kernel.org/r/20180114132850.0b127434b704a26bad13268f@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/auxtrace.c | 77 +++++- tools/perf/arch/arm/util/pmu.c | 6 + tools/perf/arch/arm64/util/Build | 3 +- tools/perf/arch/arm64/util/arm-spe.c | 225 +++++++++++++++++ tools/perf/util/Build | 2 + tools/perf/util/arm-spe-pkt-decoder.c | 462 ++++++++++++++++++++++++++++++++++ tools/perf/util/arm-spe-pkt-decoder.h | 43 ++++ tools/perf/util/arm-spe.c | 231 +++++++++++++++++ tools/perf/util/arm-spe.h | 31 +++ tools/perf/util/auxtrace.c | 3 + tools/perf/util/auxtrace.h | 1 + 11 files changed, 1077 insertions(+), 7 deletions(-) create mode 100644 tools/perf/arch/arm64/util/arm-spe.c create mode 100644 tools/perf/util/arm-spe-pkt-decoder.c create mode 100644 tools/perf/util/arm-spe-pkt-decoder.h create mode 100644 tools/perf/util/arm-spe.c create mode 100644 tools/perf/util/arm-spe.h diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 8edf2cb71564..2323581b157d 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -22,6 +22,42 @@ #include "../../util/evlist.h" #include "../../util/pmu.h" #include "cs-etm.h" +#include "arm-spe.h" + +static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err) +{ + struct perf_pmu **arm_spe_pmus = NULL; + int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + /* arm_spe_xxxxxxxxx\0 */ + char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10]; + + arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus); + if (!arm_spe_pmus) { + pr_err("spes alloc failed\n"); + *err = -ENOMEM; + return NULL; + } + + for (i = 0; i < nr_cpus; i++) { + ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i); + if (ret < 0) { + pr_err("sprintf failed\n"); + *err = -ENOMEM; + return NULL; + } + + arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name); + if (arm_spe_pmus[*nr_spes]) { + pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n", + __func__, __LINE__, *nr_spes, + arm_spe_pmus[*nr_spes]->type, + arm_spe_pmus[*nr_spes]->name); + (*nr_spes)++; + } + } + + return arm_spe_pmus; +} struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, int *err) @@ -29,22 +65,51 @@ struct auxtrace_record struct perf_pmu *cs_etm_pmu; struct perf_evsel *evsel; bool found_etm = false; + bool found_spe = false; + static struct perf_pmu **arm_spe_pmus = NULL; + static int nr_spes = 0; + int i; + + if (!evlist) + return NULL; cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); - if (evlist) { - evlist__for_each_entry(evlist, evsel) { - if (cs_etm_pmu && - evsel->attr.type == cs_etm_pmu->type) - found_etm = true; + if (!arm_spe_pmus) + arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); + + evlist__for_each_entry(evlist, evsel) { + if (cs_etm_pmu && + evsel->attr.type == cs_etm_pmu->type) + found_etm = true; + + if (!nr_spes) + continue; + + for (i = 0; i < nr_spes; i++) { + if (evsel->attr.type == arm_spe_pmus[i]->type) { + found_spe = true; + break; + } } } + if (found_etm && found_spe) { + pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n"); + *err = -EOPNOTSUPP; + return NULL; + } + if (found_etm) return cs_etm_record_init(err); +#if defined(__aarch64__) + if (found_spe) + return arm_spe_recording_init(err, arm_spe_pmus[i]); +#endif + /* - * Clear 'err' even if we haven't found a cs_etm event - that way perf + * Clear 'err' even if we haven't found an event - that way perf * record can still be used even if tracers aren't present. The NULL * return value will take care of telling the infrastructure HW tracing * isn't available. diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 98d67399a0d6..ac4dffc807b8 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -20,6 +20,7 @@ #include #include "cs-etm.h" +#include "arm-spe.h" #include "../../util/pmu.h" struct perf_event_attr @@ -30,7 +31,12 @@ struct perf_event_attr /* add ETM default config here */ pmu->selectable = true; pmu->set_drv_config = cs_etm_set_drv_config; +#if defined(__aarch64__) + } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { + return arm_spe_pmu_default_config(pmu); +#endif } + #endif return NULL; } diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index e04f6cdd6f32..c0b8dfef98ba 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ - ../../arm/util/cs-etm.o + ../../arm/util/cs-etm.o \ + arm-spe.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c new file mode 100644 index 000000000000..1120e39c1b00 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include +#include + +#include "../../util/cpumap.h" +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/session.h" +#include "../../util/util.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/arm-spe.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +struct arm_spe_recording { + struct auxtrace_record itr; + struct perf_pmu *arm_spe_pmu; + struct perf_evlist *evlist; +}; + +static size_t +arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return ARM_SPE_AUXTRACE_PRIV_SIZE; +} + +static int arm_spe_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + + if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; + auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + + return 0; +} + +static int arm_spe_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_evsel *evsel, *arm_spe_evsel = NULL; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + struct perf_evsel *tracking_evsel; + int err; + + sper->evlist = evlist; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == arm_spe_pmu->type) { + if (arm_spe_evsel) { + pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + arm_spe_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (!opts->full_auxtrace) + return 0; + + /* We are in full trace mode but '-m,xyz' wasn't specified */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + perf_evlist__to_front(evlist, arm_spe_evsel); + + perf_evsel__set_sample_bit(arm_spe_evsel, CPU); + perf_evsel__set_sample_bit(arm_spe_evsel, TIME); + perf_evsel__set_sample_bit(arm_spe_evsel, TID); + + /* Add dummy event to keep tracking */ + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + perf_evsel__set_sample_bit(tracking_evsel, TIME); + perf_evsel__set_sample_bit(tracking_evsel, CPU); + perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + + return 0; +} + +static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + + return ts.tv_sec ^ ts.tv_nsec; +} + +static void arm_spe_recording_free(struct auxtrace_record *itr) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + + free(sper); +} + +static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each_entry(sper->evlist, evsel) { + if (evsel->attr.type == sper->arm_spe_pmu->type) + return perf_evlist__enable_event_idx(sper->evlist, + evsel, idx); + } + return -EINVAL; +} + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu) +{ + struct arm_spe_recording *sper; + + if (!arm_spe_pmu) { + *err = -ENODEV; + return NULL; + } + + sper = zalloc(sizeof(struct arm_spe_recording)); + if (!sper) { + *err = -ENOMEM; + return NULL; + } + + sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.recording_options = arm_spe_recording_options; + sper->itr.info_priv_size = arm_spe_info_priv_size; + sper->itr.info_fill = arm_spe_info_fill; + sper->itr.free = arm_spe_recording_free; + sper->itr.reference = arm_spe_reference; + sper->itr.read_finish = arm_spe_read_finish; + sper->itr.alignment = 0; + + return &sper->itr; +} + +struct perf_event_attr +*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) { + pr_err("arm_spe default config cannot allocate a perf_event_attr\n"); + return NULL; + } + + /* + * If kernel driver doesn't advertise a minimum, + * use max allowable by PMSIDR_EL1.INTERVAL + */ + if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", + &attr->sample_period) != 1) { + pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); + attr->sample_period = 4096; + } + + arm_spe_pmu->selectable = true; + arm_spe_pmu->is_uncore = false; + + return attr; +} diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a3de7916fe63..7c6a8b461e24 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -86,6 +86,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o +libperf-$(CONFIG_AUXTRACE) += arm-spe.o +libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o libperf-y += parse-branch-options.o libperf-y += dump-insn.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-pkt-decoder.c new file mode 100644 index 000000000000..b94001b756c7 --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include + +#include "arm-spe-pkt-decoder.h" + +#define BIT(n) (1ULL << (n)) + +#define NS_FLAG BIT(63) +#define EL_FLAG (BIT(62) | BIT(61)) + +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */ +#define SPE_HEADER0_ADDRESS_MASK 0x38 +#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */ +#define SPE_HEADER0_COUNTER_MASK 0x38 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_EVENTS 0x2 +#define SPE_HEADER0_EVENTS_MASK 0xf +#define SPE_HEADER0_SOURCE 0x3 +#define SPE_HEADER0_SOURCE_MASK 0xf +#define SPE_HEADER0_CONTEXT 0x24 +#define SPE_HEADER0_CONTEXT_MASK 0x3c +#define SPE_HEADER0_OP_TYPE 0x8 +#define SPE_HEADER0_OP_TYPE_MASK 0x3c +#define SPE_HEADER1_ALIGNMENT 0x0 +#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */ +#define SPE_HEADER1_ADDRESS_MASK 0xf8 +#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */ +#define SPE_HEADER1_COUNTER_MASK 0xf8 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const arm_spe_packet_name[] = { + [ARM_SPE_PAD] = "PAD", + [ARM_SPE_END] = "END", + [ARM_SPE_TIMESTAMP] = "TS", + [ARM_SPE_ADDRESS] = "ADDR", + [ARM_SPE_COUNTER] = "LAT", + [ARM_SPE_CONTEXT] = "CONTEXT", + [ARM_SPE_OP_TYPE] = "OP-TYPE", + [ARM_SPE_EVENTS] = "EVENTS", + [ARM_SPE_DATA_SOURCE] = "DATA-SOURCE", +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) +{ + return arm_spe_packet_name[type]; +} + +/* return ARM SPE payload size from its encoding, + * which is in bits 5:4 of the byte. + * 00 : byte + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) + */ +static int payloadlen(unsigned char byte) +{ + return 1 << ((byte & 0x30) >> 4); +} + +static int arm_spe_get_payload(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + size_t payload_len = payloadlen(buf[0]); + + if (len < 1 + payload_len) + return ARM_SPE_NEED_MORE_BYTES; + + buf++; + + switch (payload_len) { + case 1: packet->payload = *(uint8_t *)buf; break; + case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; + case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; + case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + default: return ARM_SPE_BAD_PACKET; + } + + return 1 + payload_len; +} + +static int arm_spe_get_pad(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_PAD; + return 1; +} + +static int arm_spe_get_alignment(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int alignment = 1 << ((buf[0] & 0xf) + 1); + + if (len < alignment) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_PAD; + return alignment - (((uintptr_t)buf) & (alignment - 1)); +} + +static int arm_spe_get_end(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_END; + return 1; +} + +static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_TIMESTAMP; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_events(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret = arm_spe_get_payload(buf, len, packet); + + packet->type = ARM_SPE_EVENTS; + + /* we use index to identify Events with a less number of + * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, + * LLC-REFILL, and REMOTE-ACCESS events are identified iff + * index > 1. + */ + packet->index = ret - 1; + + return ret; +} + +static int arm_spe_get_data_source(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_DATA_SOURCE; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_context(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_CONTEXT; + packet->index = buf[0] & 0x3; + + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_op_type(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_OP_TYPE; + packet->index = buf[0] & 0x3; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_counter(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 2) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_COUNTER; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + + return 1 + ext_hdr + 2; +} + +static int arm_spe_get_addr(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 8) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_ADDRESS; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + memcpy_le64(&packet->payload, buf + 1, 8); + + return 1 + ext_hdr + 8; +} + +static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct arm_spe_pkt)); + + if (!len) + return ARM_SPE_NEED_MORE_BYTES; + + byte = buf[0]; + if (byte == SPE_HEADER0_PAD) + return arm_spe_get_pad(packet); + else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ + return arm_spe_get_end(packet); + else if (byte & 0xc0 /* 0y11xxxxxx */) { + if (byte & 0x80) { + if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, 0, packet); + if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, 0, packet); + } else + if (byte == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { + /* 16-bit header */ + byte = buf[1]; + if (byte == SPE_HEADER1_ALIGNMENT) + return arm_spe_get_alignment(buf, len, packet); + else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) + return arm_spe_get_addr(buf, len, 1, packet); + else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) + return arm_spe_get_counter(buf, len, 1, packet); + } + + return ARM_SPE_BAD_PACKET; +} + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret; + + ret = arm_spe_do_get_packet(buf, len, packet); + /* put multiple consecutive PADs on the same line, up to + * the fixed-width output format of 16 bytes per line. + */ + if (ret > 0 && packet->type == ARM_SPE_PAD) { + while (ret < 16 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, ns, el, idx = packet->index; + unsigned long long payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + + switch (packet->type) { + case ARM_SPE_BAD: + case ARM_SPE_PAD: + case ARM_SPE_END: + return snprintf(buf, buf_len, "%s", name); + case ARM_SPE_EVENTS: { + size_t blen = buf_len; + + ret = 0; + ret = snprintf(buf, buf_len, "EV"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " RETIRED"); + buf += ret; + blen -= ret; + } + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " L1D-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " L1D-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " TLB-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x20) { + ret = snprintf(buf, buf_len, " TLB-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x40) { + ret = snprintf(buf, buf_len, " NOT-TAKEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x80) { + ret = snprintf(buf, buf_len, " MISPRED"); + buf += ret; + blen -= ret; + } + if (idx > 1) { + if (payload & 0x100) { + ret = snprintf(buf, buf_len, " LLC-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x200) { + ret = snprintf(buf, buf_len, " LLC-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x400) { + ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); + buf += ret; + blen -= ret; + } + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case ARM_SPE_OP_TYPE: + switch (idx) { + case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ? + "COND-SELECT" : "INSN-OTHER"); + case 1: { + size_t blen = buf_len; + + if (payload & 0x1) + ret = snprintf(buf, buf_len, "ST"); + else + ret = snprintf(buf, buf_len, "LD"); + buf += ret; + blen -= ret; + if (payload & 0x2) { + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " AT"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " EXCL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " AR"); + buf += ret; + blen -= ret; + } + } else if (payload & 0x4) { + ret = snprintf(buf, buf_len, " SIMD-FP"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case 2: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "B"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " COND"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " IND"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: return 0; + } + case ARM_SPE_DATA_SOURCE: + case ARM_SPE_TIMESTAMP: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case ARM_SPE_ADDRESS: + switch (idx) { + case 0: + case 1: ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload); + case 3: ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "PA 0x%llx ns=%d", + payload, ns); + default: return 0; + } + case ARM_SPE_CONTEXT: + return snprintf(buf, buf_len, "%s 0x%lx el%d", name, + (unsigned long)payload, idx + 1); + case ARM_SPE_COUNTER: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "%s %d ", name, + (unsigned short)payload); + buf += ret; + blen -= ret; + switch (idx) { + case 0: ret = snprintf(buf, buf_len, "TOT"); break; + case 1: ret = snprintf(buf, buf_len, "ISSUE"); break; + case 2: ret = snprintf(buf, buf_len, "XLAT"); break; + default: ret = 0; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: + break; + } + + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->index); +} diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-pkt-decoder.h new file mode 100644 index 000000000000..d786ef65113f --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ +#define INCLUDE__ARM_SPE_PKT_DECODER_H__ + +#include +#include + +#define ARM_SPE_PKT_DESC_MAX 256 + +#define ARM_SPE_NEED_MORE_BYTES -1 +#define ARM_SPE_BAD_PACKET -2 + +enum arm_spe_pkt_type { + ARM_SPE_BAD, + ARM_SPE_PAD, + ARM_SPE_END, + ARM_SPE_TIMESTAMP, + ARM_SPE_ADDRESS, + ARM_SPE_COUNTER, + ARM_SPE_CONTEXT, + ARM_SPE_OP_TYPE, + ARM_SPE_EVENTS, + ARM_SPE_DATA_SOURCE, +}; + +struct arm_spe_pkt { + enum arm_spe_pkt_type type; + unsigned char index; + uint64_t payload; +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type); + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet); + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len); +#endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c new file mode 100644 index 000000000000..6067267cc76c --- /dev/null +++ b/tools/perf/util/arm-spe.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "debug.h" +#include "auxtrace.h" +#include "arm-spe.h" +#include "arm-spe-pkt-decoder.h" + +struct arm_spe { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; +}; + +struct arm_spe_queue { + struct arm_spe *spe; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; +}; + +static void arm_spe_dump(struct arm_spe *spe __maybe_unused, + unsigned char *buf, size_t len) +{ + struct arm_spe_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[ARM_SPE_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... ARM SPE data: size %zu bytes\n", + len); + + while (len) { + ret = arm_spe_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = arm_spe_pkt_desc(&packet, desc, + ARM_SPE_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, + size_t len) +{ + printf(".\n"); + arm_spe_dump(spe, buf, len); +} + +static int arm_spe_process_event(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static int arm_spe_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + int err; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&spe->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + arm_spe_dump_event(spe, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + + return 0; +} + +static int arm_spe_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void arm_spe_free_queue(void *priv) +{ + struct arm_spe_queue *speq = priv; + + if (!speq) + return; + free(speq); +} + +static void arm_spe_free_events(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_queues *queues = &spe->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + arm_spe_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void arm_spe_free(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + + auxtrace_heap__free(&spe->heap); + arm_spe_free_events(session); + session->auxtrace = NULL; + free(spe); +} + +static const char * const arm_spe_info_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +}; + +static void arm_spe_print_info(u64 *arr) +{ + if (!dump_trace) + return; + + fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); +} + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; + struct arm_spe *spe; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + spe = zalloc(sizeof(struct arm_spe)); + if (!spe) + return -ENOMEM; + + err = auxtrace_queues__init(&spe->queues); + if (err) + goto err_free; + + spe->session = session; + spe->machine = &session->machines.host; /* No kvm support */ + spe->auxtrace_type = auxtrace_info->type; + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + + spe->auxtrace.process_event = arm_spe_process_event; + spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; + spe->auxtrace.flush_events = arm_spe_flush; + spe->auxtrace.free_events = arm_spe_free_events; + spe->auxtrace.free = arm_spe_free; + session->auxtrace = &spe->auxtrace; + + arm_spe_print_info(&auxtrace_info->priv[0]); + + return 0; + +err_free: + free(spe); + return err; +} diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h new file mode 100644 index 000000000000..98d3235781c3 --- /dev/null +++ b/tools/perf/util/arm-spe.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__PERF_ARM_SPE_H__ +#define INCLUDE__PERF_ARM_SPE_H__ + +#define ARM_SPE_PMU_NAME "arm_spe_" + +enum { + ARM_SPE_PMU_TYPE, + ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu); + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu); +#endif diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c76687e42344..3bba9947ab7f 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -54,6 +54,7 @@ #include "intel-pt.h" #include "intel-bts.h" +#include "arm-spe.h" #include "sane_ctype.h" #include "symbol/kallsyms.h" @@ -910,6 +911,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, return intel_pt_process_auxtrace_info(event, session); case PERF_AUXTRACE_INTEL_BTS: return intel_bts_process_auxtrace_info(event, session); + case PERF_AUXTRACE_ARM_SPE: + return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: case PERF_AUXTRACE_UNKNOWN: default: diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index d19e11b68de7..453c148d2158 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -43,6 +43,7 @@ enum auxtrace_type { PERF_AUXTRACE_INTEL_PT, PERF_AUXTRACE_INTEL_BTS, PERF_AUXTRACE_CS_ETM, + PERF_AUXTRACE_ARM_SPE, }; enum itrace_period_type { -- cgit v1.2.3 From 249d98e567e25dd03e015e2d31e1b7b9648f34df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 11:07:58 -0300 Subject: perf callchain: Fix attr.sample_max_stack setting When setting the "dwarf" unwinder for a specific event and not specifying the max-stack, the attr.sample_max_stack ended up using an uninitialized callchain_param.max_stack, fix it by using designated initializers for that callchain_param variable, zeroing all non explicitely initialized struct members. Here is what happened: # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 callchain: type DWARF callchain: stack dump size 8192 perf_event_attr: type 2 size 112 config 0x730 { sample_period, sample_freq } 1 sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC exclude_callchain_user 1 { wakeup_events, wakeup_watermark } 1 sample_regs_user 0xff0fff sample_stack_user 8192 sample_max_stack 50656 sys_perf_event_open failed, error -75 Value too large for defined data type # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 callchain: type DWARF callchain: stack dump size 8192 perf_event_attr: type 2 size 112 config 0x730 sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC exclude_callchain_user 1 sample_regs_user 0xff0fff sample_stack_user 8192 sample_max_stack 30448 sys_perf_event_open failed, error -75 Value too large for defined data type # Now the attr.sample_max_stack is set to zero and the above works as expected: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms 0.000 probe_libc:inet_pton:(7feb7a998350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa39b6108f3f] (/usr/bin/ping) # Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-is9tramondqa9jlxxsgcm9iz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index efa2e629a669..8f971a2301d1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -731,14 +731,14 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->attr; - struct callchain_param param; + /* callgraph default */ + struct callchain_param param = { + .record_mode = callchain_param.record_mode, + }; u32 dump_size = 0; int max_stack = 0; const char *callgraph_buf = NULL; - /* callgraph default */ - param.record_mode = callchain_param.record_mode; - list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: -- cgit v1.2.3 From eabad8c6856f185f876b54c426c2cc69fe0f0a7d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 16:48:46 -0300 Subject: perf unwind: Do not look just at the global callchain_param.record_mode When setting up DWARF callchains on specific events, without using 'record' or 'trace' --call-graph, but instead doing it like: perf trace -e cycles/call-graph=dwarf/ The unwind__prepare_access() call in thread__insert_map() when we process PERF_RECORD_MMAP(2) metadata events were not being performed, precluding us from using per-event DWARF callchains, handling them just when we asked for all events to be DWARF, using "--call-graph dwarf". We do it in the PERF_RECORD_MMAP because we have to look at one of the executable maps to figure out the executable type (64-bit, 32-bit) of the DSO laid out in that mmap. Also to look at the architecture where the perf.data file was recorded. All this probably should be deferred to when we process a sample for some thread that has callchains, so that we do this processing only for the threads with samples, not for all of them. For now, fix using DWARF on specific events. Before: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms 0.000 probe_libc:inet_pton:(7fe9597bb350)) Problem processing probe_libc:inet_pton callchain, skipping... # After: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms 0.000 probe_libc:inet_pton:(7fd4aa930350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa804e51af3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa804e51b379] (/usr/bin/ping) # # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms 0.000 probe_libc:inet_pton:(7f9363b9e350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffa9e8a14e0f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffa9e8a14e1379] (/usr/bin/ping) # # perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms 0.000 probe_libc:inet_pton:(7f4947e1c350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa716d88ef3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa716d88f379] (/usr/bin/ping) # # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms 0.000 probe_libc:inet_pton:(7fa157696350)) __GI___inet_pton (/usr/lib64/libc-2.26.so) getaddrinfo (/usr/lib64/libc-2.26.so) [0xffffa9ba39c74f40] (/usr/bin/ping) # Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/r/20180116182650.GE16107@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 5 +++-- tools/perf/builtin-report.c | 5 +++-- tools/perf/builtin-script.c | 5 +++-- tools/perf/tests/dwarf-unwind.c | 1 + tools/perf/util/callchain.c | 10 ++++++++++ tools/perf/util/callchain.h | 2 ++ tools/perf/util/unwind-libunwind-local.c | 9 +++------ 7 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c0debc3f79b6..c0815a37fdb5 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist) enum perf_call_graph_mode mode = CALLCHAIN_NONE; if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) mode = CALLCHAIN_LBR; else if (sample_type & PERF_SAMPLE_CALLCHAIN) mode = CALLCHAIN_FP; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dd4df9a5cd06..6593779224d5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep) if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { callchain_param.record_mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c1cce474c0f1..08bc818f371b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script) if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { callchain_param.record_mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index ac40e05bcab4..260418969120 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu } callchain_param.record_mode = CALLCHAIN_DWARF; + dwarf_callchain_users = true; if (init_live_machine(machine)) { pr_err("Could not init machine\n"); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 082505d08d72..32ef7bdca1cf 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -37,6 +37,15 @@ struct callchain_param callchain_param = { CALLCHAIN_PARAM_DEFAULT }; +/* + * Are there any events usind DWARF callchains? + * + * I.e. + * + * -e cycles/call-graph=dwarf/ + */ +bool dwarf_callchain_users; + struct callchain_param callchain_param_default = { CALLCHAIN_PARAM_DEFAULT }; @@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) ret = 0; param->record_mode = CALLCHAIN_DWARF; param->dump_size = default_stack_dump_size; + dwarf_callchain_users = true; tok = strtok_r(NULL, ",", &saveptr); if (tok) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b79ef2478a57..154560b1eb65 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -89,6 +89,8 @@ enum chain_value { CCVAL_COUNT, }; +extern bool dwarf_callchain_users; + struct callchain_param { bool enabled; enum perf_call_graph_mode record_mode; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 7a42f703e858..af873044d33a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -631,9 +631,8 @@ static unw_accessors_t accessors = { static int _unwind__prepare_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return 0; - thread->addr_space = unw_create_addr_space(&accessors, 0); if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); @@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_flush_cache(thread->addr_space, 0, 0); } static void _unwind__finish_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_destroy_addr_space(thread->addr_space); } -- cgit v1.2.3 From 75d501171462d8624fd14a2baa474476d6745e95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 10:39:55 -0300 Subject: perf trace: Setup DWARF callchains for non-syscall events when --max-stack is used If we use: perf trace --max-stack=4 then the syscall events will use DWARF callchains, when available (libunwind enabled in the build) and the printing will stop at 4 levels. When we introduced support for tracepoint events this ended up not applying for them, fix it. Before: # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7fc6c2a16350)) # After: # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.087 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.087/0.087/0.087/0.000 ms 0.000 probe_libc:inet_pton:(7fbf9a041350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa947cb67f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa947cb68379] (/usr/bin/ping) # Reported-by: Thomas Richter Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-afsu9eegd43ppihiuafhh9qv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0362974854e9..ee85c29dbf70 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2350,7 +2350,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts, NULL); + perf_evlist__config(evlist, &trace->opts, &callchain_param); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); @@ -3065,8 +3065,9 @@ int cmd_trace(int argc, const char **argv) } #ifdef HAVE_DWARF_UNWIND_SUPPORT - if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls) + if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); + } #endif if (callchain_param.enabled) { -- cgit v1.2.3 From bd3dda9ab0fbdb8a91a2e869d93a0c9692b8444f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 11:33:53 -0300 Subject: perf trace: Allow overriding global --max-stack per event The per-event max-stack setting wasn't overriding the global --max-stack setting: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms 0.000 probe_libc:inet_pton:(7feb7a998350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa39b6108f3f] (/usr/bin/ping) # Fix it: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.073 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms 0.000 probe_libc:inet_pton:(7f1083221350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) # Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ic3g837xg8ob3kcpkspxwz0g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee85c29dbf70..531d43bf57e1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse struct addr_location al; if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack)) return -1; return 0; @@ -2423,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || evlist->threads->nr > 1 || perf_evlist__first(evlist)->attr.inherit; + + /* + * Now that we already used evsel->attr to ask the kernel to setup the + * events, lets reuse evsel->attr.sample_max_stack as the limit in + * trace__resolve_callchain(), allowing per-event max-stack settings + * to override an explicitely set --max-stack global setting. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && + evsel->attr.sample_max_stack == 0) + evsel->attr.sample_max_stack = trace->max_stack; + } again: before = trace->nr_events; -- cgit v1.2.3 From 0d3dcc0ef13d9d78132d7d6ae068c17ed858e65e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Jan 2018 11:16:25 -0300 Subject: perf callchains: Ask for PERF_RECORD_MMAP for data mmaps for DWARF unwinding When we use a global DWARF setting as in: perf record --call-graph dwarf According to 5c0cf22477ea ("perf record: Store data mmaps for dwarf unwind") we need to set up some extra perf_event_attr bits. But when we instead do a per event dwarf setting: perf record -e cycles/call-graph=dwarf/ This was not being done, make them equivalent. This didn't produce any output changes in my tests while fixing up loose ends in the per-event settings, I found it just by comparing the perf_event_attr fields trying to find an explanation for those problems. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Noel Grandin Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-6476r53h2o38skbs9qa4ust4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8f971a2301d1..85eb84dfdf91 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -726,7 +726,7 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel, } static void apply_config_terms(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, bool track) { struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; @@ -797,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel, /* User explicitly set per-event callgraph, clear the old setting and reset. */ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + bool sample_address = false; + if (max_stack) { param.max_stack = max_stack; if (callgraph_buf == NULL) @@ -816,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel, evsel->name); return; } + if (param.record_mode == CALLCHAIN_DWARF) + sample_address = true; } } if (dump_size > 0) { @@ -828,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel, perf_evsel__reset_callgraph(evsel, &callchain_param); /* set perf-event callgraph */ - if (param.enabled) + if (param.enabled) { + if (sample_address) { + perf_evsel__set_sample_bit(evsel, ADDR); + perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel->attr.mmap_data = track; + } perf_evsel__config_callchain(evsel, opts, ¶m); + } } } @@ -1060,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel, opts); + apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; } -- cgit v1.2.3 From eb0b419eff8cf51af8e16cc8c5d2a92d19824266 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:26 +0800 Subject: perf report: Improve error msg when no first/last sample time found The following message will be returned to user when executing 'perf report --time' if perf data file doesn't contain the first/last sample time. "HINT: no first/last sample time found in perf data. Please use latest perf binary to execute 'perf record' (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')." Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6593779224d5..7d4f0a5de326 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1300,7 +1300,9 @@ repeat: if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { - pr_err("No first/last sample time in perf data\n"); + pr_err("HINT: no first/last sample time found in perf data.\n" + "Please use latest perf binary to execute 'perf record'\n" + "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); return -EINVAL; } -- cgit v1.2.3 From 1e2778e91616086177a255f3fc8c72ecaa564ae6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:27 +0800 Subject: perf script: Improve error msg when no first/last sample time found The following message will be returned to user when executing 'perf script --time' if perf data file doesn't contain the first/last sample time. "HINT: no first/last sample time found in perf data. Please use latest perf binary to execute 'perf record' (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')." Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 08bc818f371b..ac781916e51e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3449,7 +3449,9 @@ int cmd_script(int argc, const char **argv) if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { - pr_err("No first/last sample time in perf data\n"); + pr_err("HINT: no first/last sample time found in perf data.\n" + "Please use latest perf binary to execute 'perf record'\n" + "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); err = -EINVAL; goto out_delete; } -- cgit v1.2.3 From 6e761cbc9127fb8fc609aea2265ee8279b8d6c55 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:28 +0800 Subject: perf util: Improve error checking for time percent input The command line like 'perf report --stdio --time 1abc%/1' could be accepted by perf. It looks not very good. This patch uses strtod() to replace original atof() and check the entire string. Now for the same command line, it would return error message "Invalid time string". root@skl:/tmp# perf report --stdio --time 1abc%/1 Invalid time string Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 3f7f18f06982..88510ab6450e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -116,7 +116,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) static int parse_percent(double *pcnt, char *str) { - char *c; + char *c, *endptr; + double d; c = strchr(str, '%'); if (c) @@ -124,8 +125,11 @@ static int parse_percent(double *pcnt, char *str) else return -1; - *pcnt = atof(str) / 100.0; + d = strtod(str, &endptr); + if (endptr != str + strlen(str)) + return -1; + *pcnt = d / 100.0; return 0; } -- cgit v1.2.3 From 3002812e602d3f991a5b8cdc0499e63e13ff65c4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:29 +0800 Subject: perf util: Support no index time percent slice Previously, the time percent slice needs an index to specify which one the user wants. It may be easier to use if the index can be omitted. So with this patch, for example, perf report --stdio --time 10%/1 should be equivalent to perf report --stdio --time 10% Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 88510ab6450e..5769f972c23e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -261,6 +261,37 @@ static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, return i; } +static int one_percent_convert(struct perf_time_interval *ptime_buf, + const char *ostr, u64 start, u64 end, char *c) +{ + char *str; + int len = strlen(ostr), ret; + + /* + * c points to '%'. + * '%' should be the last character + */ + if (ostr + len - 1 != c) + return -1; + + /* + * Construct a string like "xx%/1" + */ + str = malloc(len + 3); + if (str == NULL) + return -ENOMEM; + + memcpy(str, ostr, len); + strcpy(str + len, "/1"); + + ret = percent_slash_split(str, ptime_buf, start, end); + if (ret == 0) + ret = 1; + + free(str); + return ret; +} + int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, const char *ostr, u64 start, u64 end) { @@ -270,6 +301,7 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, * ostr example: * 10%/2,10%/3: select the second 10% slice and the third 10% slice * 0%-10%,30%-40%: multiple time range + * 50%: just one percent */ memset(ptime_buf, 0, sizeof(*ptime_buf) * num); @@ -286,6 +318,10 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, end, percent_dash_split); } + c = strchr(ostr, '%'); + if (c) + return one_percent_convert(ptime_buf, ostr, start, end, c); + return -1; } -- cgit v1.2.3 From 7425664bbd3174814500c7ab8740cbb9bb25396c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:30 +0800 Subject: perf report: Add an indication of what time slices are used Add a time slices indication to the perf report header. For example, # perf report --stdio --time 10% # Total Lost Samples: 0 # # Samples: 9K of event 'cycles:ppp' (time slices: 10%) # Event count (approx.): 8951288803 Signed-off-by: Jin Yao Suggested--by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-6-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7d4f0a5de326..4aaaa37262a8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -404,6 +404,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (rep->time_str) + ret += fprintf(fp, " (time slices: %s)", rep->time_str); + if (symbol_conf.show_ref_callgraph && strstr(evname, "call-graph=no")) { ret += fprintf(fp, ", show reference callgraph"); -- cgit v1.2.3 From 5a031f887cb8d60fe87d21159c3cf82c38f55679 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:31 +0800 Subject: perf util: Allocate time slices buffer according to number of comma Previously we use a magic number 10 to limit the number of time slices. It's not very good. This patch creates a new function perf_time__range_alloc() to allocate time slices buffer. The number of buffer entries is determined by the number of comma in string but at least it will allocate one entry even if no comma is found. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/time-utils.h | 2 ++ 2 files changed, 30 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 5769f972c23e..6193b46050a5 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -325,6 +325,34 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, return -1; } +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size) +{ + const char *p1, *p2; + int i = 1; + struct perf_time_interval *ptime; + + /* + * At least allocate one time range. + */ + if (!ostr) + goto alloc; + + p1 = ostr; + while (p1 < ostr + strlen(ostr)) { + p2 = strchr(p1, ','); + if (!p2) + break; + + p1 = p2 + 1; + i++; + } + +alloc: + *size = i; + ptime = calloc(i, sizeof(*ptime)); + return ptime; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 34d5eba26bf5..70b177d2b98c 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -16,6 +16,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, const char *ostr, u64 start, u64 end); +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, -- cgit v1.2.3 From 0a3cc3ae05c363dabd891ed5f918c62197de8c7f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:32 +0800 Subject: perf report: Remove the time slices number limitation Previously it was only allowed to use at most 10 time slices in 'perf report --time'. This patch removes this limitation. For example, following command line is OK (12 time slices) perf report --stdio --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12 Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-8-git-send-email-yao.jin@linux.intel.com [ No need to check for NULL to call free, use zfree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/builtin-report.c | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 63d0db3184c9..907e505b6309 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -403,7 +403,7 @@ OPTIONS to end of file. Also support time percent with multiple time range. Time string is - 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: Select the second 10% time slice: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4aaaa37262a8..42a52dcc41cd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -54,8 +54,6 @@ #include #include -#define PTIME_RANGE_MAX 10 - struct report { struct perf_tool tool; struct perf_session *session; @@ -76,7 +74,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + struct perf_time_interval *ptime_range; + int range_size; int range_num; float min_percent; u64 nr_entries; @@ -1300,24 +1299,33 @@ repeat: if (symbol__init(&session->header.env) < 0) goto error; + report.ptime_range = perf_time__range_alloc(report.time_str, + &report.range_size); + if (!report.ptime_range) { + ret = -ENOMEM; + goto error; + } + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" "Please use latest perf binary to execute 'perf record'\n" "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); - return -EINVAL; + ret = -EINVAL; + goto error; } report.range_num = perf_time__percent_parse_str( - report.ptime_range, PTIME_RANGE_MAX, + report.ptime_range, report.range_size, report.time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); if (report.range_num < 0) { pr_err("Invalid time string\n"); - return -EINVAL; + ret = -EINVAL; + goto error; } } else { report.range_num = 1; @@ -1333,6 +1341,8 @@ repeat: ret = 0; error: + zfree(&report.ptime_range); + perf_session__delete(session); return ret; } -- cgit v1.2.3 From cc2ef584a863b7c8033b78723cd253ca47e9a589 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:33 +0800 Subject: perf script: Remove the time slices number limitation Previously it was only allowed to use at most 10 time slices in 'perf script --time'. This patch removes this limitation. For example, following command line is OK (12 time slices) perf script --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12 Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-9-git-send-email-yao.jin@linux.intel.com [ No need to check for NULL to call free, use zfree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 10 +++++----- tools/perf/builtin-script.c | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 806ec6391fd6..7730c1d2b5d3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -351,19 +351,19 @@ include::itrace.txt[] to end of file. Also support time percent with multipe time range. Time string is - 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: - Select the second 10% time slice + Select the second 10% time slice: perf script --time 10%/2 - Select from 0% to 10% time slice + Select from 0% to 10% time slice: perf script --time 0%-10% - Select the first and second 10% time slices + Select the first and second 10% time slices: perf script --time 10%/1,10%/2 - Select from 0% to 10% and 30% to 40% slices + Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% --max-blocks:: diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac781916e51e..3499d68e1d70 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } -#define PTIME_RANGE_MAX 10 - struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1496,7 +1494,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + struct perf_time_interval *ptime_range; + int range_size; int range_num; }; @@ -3445,6 +3444,13 @@ int cmd_script(int argc, const char **argv) if (err < 0) goto out_delete; + script.ptime_range = perf_time__range_alloc(script.time_str, + &script.range_size); + if (!script.ptime_range) { + err = -ENOMEM; + goto out_delete; + } + /* needs to be parsed after looking up reference time */ if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && @@ -3457,7 +3463,7 @@ int cmd_script(int argc, const char **argv) } script.range_num = perf_time__percent_parse_str( - script.ptime_range, PTIME_RANGE_MAX, + script.ptime_range, script.range_size, script.time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); @@ -3476,6 +3482,8 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: + zfree(&script.ptime_range); + perf_evlist__free_stats(session->evlist); perf_session__delete(session); -- cgit v1.2.3 From 81fccd6ca507d3b2012eaf1edeb9b1dbf4bd22db Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 17 Jan 2018 14:16:11 +0100 Subject: perf record: Fix failed memory allocation for get_cpuid_str In x86 architecture dependend part function get_cpuid_str() mallocs a 128 byte buffer, but does not check if the memory allocation succeeded or not. When the memory allocation fails, function __get_cpuid() is called with first parameter being a NULL pointer. However this function references its first parameter and operates on a NULL pointer which might cause core dumps. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180117131611.34319-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index b626d2bad9f1..fb0d71afee8b 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -70,7 +70,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); - if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) { + if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) { free(buf); return NULL; } -- cgit v1.2.3 From 37b95951c58fdf08dc10afa9d02066ed9f176fb5 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Tue, 16 Jan 2018 17:34:07 +0800 Subject: KVM/x86: Fix wrong macro references of X86_CR0_PG_BIT and X86_CR4_PAE_BIT in kvm_valid_sregs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_valid_sregs() should use X86_CR0_PG and X86_CR4_PAE to check bit status rather than X86_CR0_PG_BIT and X86_CR4_PAE_BIT. This patch is to fix it. Fixes: f29810335965a(KVM/x86: Check input paging mode when cs.l is set) Reported-by: Jeremi Piotrowski Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Tianyu Lan Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1cec2c62a0b0..c53298dfbf50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7496,13 +7496,13 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* * When EFER.LME and CR0.PG are set, the processor is in * 64-bit mode (though maybe in a 32-bit code segment). * CR4.PAE and EFER.LMA must be set. */ - if (!(sregs->cr4 & X86_CR4_PAE_BIT) + if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) return -EINVAL; } else { -- cgit v1.2.3 From 490ae017f54e55bde382d45ea24bddfb6d1a0aaf Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Tue, 12 Dec 2017 18:21:40 +0800 Subject: dm thin metadata: THIN_MAX_CONCURRENT_LOCKS should be 6 For btree removal, there is a corner case that a single thread could takes 6 locks which is more than THIN_MAX_CONCURRENT_LOCKS(5) and leads to deadlock. A btree removal might eventually call rebalance_children()->rebalance3() to rebalance entries of three neighbor child nodes when shadow_spine has already acquired two write locks. In rebalance3(), it tries to shadow and acquire the write locks of all three child nodes. However, shadowing a child node requires acquiring a read lock of the original child node and a write lock of the new block. Although the read lock will be released after block shadowing, shadowing the third child node in rebalance3() could still take the sixth lock. (2 write locks for shadow_spine + 2 write locks for the first two child nodes's shadow + 1 write lock for the last child node's shadow + 1 read lock for the last child node) Cc: stable@vger.kernel.org Signed-off-by: Dennis Yang Acked-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index d31d18d9727c..36ef284ad086 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -80,10 +80,14 @@ #define SECTOR_TO_BLOCK_SHIFT 3 /* + * For btree insert: * 3 for btree insert + * 2 for btree lookup used within space map + * For btree remove: + * 2 for shadow spine + + * 4 for rebalance 3 child node */ -#define THIN_MAX_CONCURRENT_LOCKS 5 +#define THIN_MAX_CONCURRENT_LOCKS 6 /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 -- cgit v1.2.3 From bc68d0a43560e950850fc69b58f0f8254b28f6d6 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 20 Dec 2017 09:56:06 +0000 Subject: dm btree: fix serious bug in btree_split_beneath() When inserting a new key/value pair into a btree we walk down the spine of btree nodes performing the following 2 operations: i) space for a new entry ii) adjusting the first key entry if the new key is lower than any in the node. If the _root_ node is full, the function btree_split_beneath() allocates 2 new nodes, and redistibutes the root nodes entries between them. The root node is left with 2 entries corresponding to the 2 new nodes. btree_split_beneath() then adjusts the spine to point to one of the two new children. This means the first key is never adjusted if the new key was lower, ie. operation (ii) gets missed out. This can result in the new key being 'lost' for a period; until another low valued key is inserted that will uncover it. This is a serious bug, and quite hard to make trigger in normal use. A reproducing test case ("thin create devices-in-reverse-order") is available as part of the thin-provision-tools project: https://github.com/jthornber/thin-provisioning-tools/blob/master/functional-tests/device-mapper/dm-tests.scm#L593 Fix the issue by changing btree_split_beneath() so it no longer adjusts the spine. Instead it unlocks both the new nodes, and lets the main loop in btree_insert_raw() relock the appropriate one and make any neccessary adjustments. Cc: stable@vger.kernel.org Reported-by: Monty Pavel Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index f21ce6a3d4cf..58b319757b1e 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -683,23 +683,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) pn->keys[1] = rn->keys[0]; memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); - /* - * rejig the spine. This is ugly, since it knows too - * much about the spine - */ - if (s->nodes[0] != new_parent) { - unlock_block(s->info, s->nodes[0]); - s->nodes[0] = new_parent; - } - if (key < le64_to_cpu(rn->keys[0])) { - unlock_block(s->info, right); - s->nodes[1] = left; - } else { - unlock_block(s->info, left); - s->nodes[1] = right; - } - s->count = 2; - + unlock_block(s->info, left); + unlock_block(s->info, right); return 0; } -- cgit v1.2.3 From 27c7003697fc2c78f965984aa224ef26cd6b2949 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 3 Jan 2018 22:48:59 +0100 Subject: dm crypt: fix crash by adding missing check for auth key size If dm-crypt uses authenticated mode with separate MAC, there are two concatenated part of the key structure - key(s) for encryption and authentication key. Add a missing check for authenticated key length. If this key length is smaller than actually provided key, dm-crypt now properly fails instead of crashing. Fixes: ef43aa3806 ("dm crypt: add cryptographic data integrity protection (authenticated encryption)") Cc: stable@vger.kernel.org # 4.12+ Reported-by: Salah Coronya Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9fc12f556534..4cc3809b2a3a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1954,10 +1954,15 @@ static int crypt_setkey(struct crypt_config *cc) /* Ignore extra keys (which are used for IV etc) */ subkey_size = crypt_subkey_size(cc); - if (crypt_integrity_hmac(cc)) + if (crypt_integrity_hmac(cc)) { + if (subkey_size < cc->key_mac_size) + return -EINVAL; + crypt_copy_authenckey(cc->authenc_key, cc->key, subkey_size - cc->key_mac_size, cc->key_mac_size); + } + for (i = 0; i < cc->tfms_count; i++) { if (crypt_integrity_hmac(cc)) r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i], -- cgit v1.2.3 From 717f4b1c52135f279112df82583e0c77e80f90de Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Jan 2018 09:32:47 -0500 Subject: dm integrity: don't store cipher request on the stack Some asynchronous cipher implementations may use DMA. The stack may be mapped in the vmalloc area that doesn't support DMA. Therefore, the cipher request and initialization vector shouldn't be on the stack. Fix this by allocating the request and iv with kmalloc. Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 49 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 05c7bfd0c9d9..46d7c8749222 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2559,7 +2559,8 @@ static int create_journal(struct dm_integrity_c *ic, char **error) int r = 0; unsigned i; __u64 journal_pages, journal_desc_size, journal_tree_size; - unsigned char *crypt_data = NULL; + unsigned char *crypt_data = NULL, *crypt_iv = NULL; + struct skcipher_request *req = NULL; ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL); ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL); @@ -2617,9 +2618,20 @@ static int create_journal(struct dm_integrity_c *ic, char **error) if (blocksize == 1) { struct scatterlist *sg; - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; - skcipher_request_set_tfm(req, ic->journal_crypt); + + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } ic->journal_xor = dm_integrity_alloc_page_list(ic); if (!ic->journal_xor) { @@ -2641,9 +2653,9 @@ static int create_journal(struct dm_integrity_c *ic, char **error) sg_set_buf(&sg[i], va, PAGE_SIZE); } sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); - skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv); + skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2659,10 +2671,22 @@ static int create_journal(struct dm_integrity_c *ic, char **error) crypto_free_skcipher(ic->journal_crypt); ic->journal_crypt = NULL; } else { - SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt); - unsigned char iv[ivsize]; unsigned crypt_len = roundup(ivsize, blocksize); + req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL); + if (!req) { + *error = "Could not allocate crypt request"; + r = -ENOMEM; + goto bad; + } + + crypt_iv = kmalloc(ivsize, GFP_KERNEL); + if (!crypt_iv) { + *error = "Could not allocate iv"; + r = -ENOMEM; + goto bad; + } + crypt_data = kmalloc(crypt_len, GFP_KERNEL); if (!crypt_data) { *error = "Unable to allocate crypt data"; @@ -2670,8 +2694,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - skcipher_request_set_tfm(req, ic->journal_crypt); - ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal); if (!ic->journal_scatterlist) { *error = "Unable to allocate sg list"; @@ -2695,12 +2717,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error) struct skcipher_request *section_req; __u32 section_le = cpu_to_le32(i); - memset(iv, 0x00, ivsize); + memset(crypt_iv, 0x00, ivsize); memset(crypt_data, 0x00, crypt_len); memcpy(crypt_data, §ion_le, min((size_t)crypt_len, sizeof(section_le))); sg_init_one(&sg, crypt_data, crypt_len); - skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv); + skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv); init_completion(&comp.comp); comp.in_flight = (atomic_t)ATOMIC_INIT(1); if (do_crypt(true, req, &comp)) @@ -2758,6 +2780,9 @@ retest_commit_id: } bad: kfree(crypt_data); + kfree(crypt_iv); + skcipher_request_free(req); + return r; } -- cgit v1.2.3 From dc94902bde1e158cd19c4deab208e5d6eb382a44 Mon Sep 17 00:00:00 2001 From: Ondrej Kozina Date: Fri, 12 Jan 2018 16:30:32 +0100 Subject: dm crypt: wipe kernel key copy after IV initialization Loading key via kernel keyring service erases the internal key copy immediately after we pass it in crypto layer. This is wrong because IV is initialized later and we use wrong key for the initialization (instead of real key there's just zeroed block). The bug may cause data corruption if key is loaded via kernel keyring service first and later same crypt device is reactivated using exactly same key in hexbyte representation, or vice versa. The bug (and fix) affects only ciphers using following IVs: essiv, lmk and tcw. Fixes: c538f6ec9f56 ("dm crypt: add ability to use keys from the kernel key retention service") Cc: stable@vger.kernel.org # 4.10+ Signed-off-by: Ondrej Kozina Reviewed-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4cc3809b2a3a..971241409c30 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2058,9 +2058,6 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string ret = crypt_setkey(cc); - /* wipe the kernel key payload copy in each case */ - memset(cc->key, 0, cc->key_size * sizeof(u8)); - if (!ret) { set_bit(DM_CRYPT_KEY_VALID, &cc->flags); kzfree(cc->key_string); @@ -2528,6 +2525,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key) } } + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); + return ret; } @@ -2966,6 +2967,9 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv) return ret; if (cc->iv_gen_ops && cc->iv_gen_ops->init) ret = cc->iv_gen_ops->init(cc); + /* wipe the kernel key payload copy */ + if (cc->key_string) + memset(cc->key, 0, cc->key_size * sizeof(u8)); return ret; } if (argc == 2 && !strcasecmp(argv[1], "wipe")) { @@ -3012,7 +3016,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 18, 0}, + .version = {1, 18, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- cgit v1.2.3 From 3cc2e57c4beabcbbaa46e1ac6d77ca8276a4a42d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Jan 2018 11:24:26 +0000 Subject: dm crypt: fix error return code in crypt_ctr() Fix to return error code -ENOMEM from the mempool_create_kmalloc_pool() error handling case instead of 0, as done elsewhere in this function. Fixes: ef43aa38063a6 ("dm crypt: add cryptographic data integrity protection (authenticated encryption)") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Wei Yongjun Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 971241409c30..554d60394c06 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2746,6 +2746,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->tag_pool_max_sectors * cc->on_disk_tag_size); if (!cc->tag_pool) { ti->error = "Cannot allocate integrity tags mempool"; + ret = -ENOMEM; goto bad; } -- cgit v1.2.3 From e0638fa400eaccf9fa8060f67140264c4e276552 Mon Sep 17 00:00:00 2001 From: Lixin Wang Date: Mon, 27 Nov 2017 15:06:55 +0800 Subject: i2c: core: decrease reference count of device node in i2c_unregister_device Reference count of device node was increased in of_i2c_register_device, but without decreasing it in i2c_unregister_device. Then the added device node will never be released. Fix this by adding the of_node_put. Signed-off-by: Lixin Wang Tested-by: Wolfram Sang Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-base.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 706164b4c5be..f7829a74140c 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -821,8 +821,12 @@ void i2c_unregister_device(struct i2c_client *client) { if (!client) return; - if (client->dev.of_node) + + if (client->dev.of_node) { of_node_clear_flag(client->dev.of_node, OF_POPULATED); + of_node_put(client->dev.of_node); + } + if (ACPI_COMPANION(&client->dev)) acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev)); device_unregister(&client->dev); -- cgit v1.2.3 From 89c6efa61f5709327ecfa24bff18e57a4e80c7fa Mon Sep 17 00:00:00 2001 From: Jeremy Compostella Date: Wed, 15 Nov 2017 12:31:44 -0700 Subject: i2c: core-smbus: prevent stack corruption on read I2C_BLOCK_DATA On a I2C_SMBUS_I2C_BLOCK_DATA read request, if data->block[0] is greater than I2C_SMBUS_BLOCK_MAX + 1, the underlying I2C driver writes data out of the msgbuf1 array boundary. It is possible from a user application to run into that issue by calling the I2C_SMBUS ioctl with data.block[0] greater than I2C_SMBUS_BLOCK_MAX + 1. This patch makes the code compliant with Documentation/i2c/dev-interface by raising an error when the requested size is larger than 32 bytes. Call Trace: [] dump_stack+0x67/0x92 [] panic+0xc5/0x1eb [] ? vprintk_default+0x1f/0x30 [] ? i2cdev_ioctl_smbus+0x303/0x320 [] __stack_chk_fail+0x1b/0x20 [] i2cdev_ioctl_smbus+0x303/0x320 [] i2cdev_ioctl+0x4d/0x1e0 [] do_vfs_ioctl+0x2ba/0x490 [] ? security_file_ioctl+0x43/0x60 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x6a Signed-off-by: Jeremy Compostella Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-smbus.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index 4bb9927afd01..a1082c04ac5c 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -397,16 +397,17 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, the underlying bus driver */ break; case I2C_SMBUS_I2C_BLOCK_DATA: + if (data->block[0] > I2C_SMBUS_BLOCK_MAX) { + dev_err(&adapter->dev, "Invalid block %s size %d\n", + read_write == I2C_SMBUS_READ ? "read" : "write", + data->block[0]); + return -EINVAL; + } + if (read_write == I2C_SMBUS_READ) { msg[1].len = data->block[0]; } else { msg[0].len = data->block[0] + 1; - if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) { - dev_err(&adapter->dev, - "Invalid block write size %d\n", - data->block[0]); - return -EINVAL; - } for (i = 1; i <= data->block[0]; i++) msgbuf0[i] = data->block[i]; } -- cgit v1.2.3 From 8a510a5c75261ba0ec39155326982aa786541e29 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 17 Jan 2018 10:16:20 -0500 Subject: drm/vmwgfx: fix memory corruption with legacy/sou connectors It looks like in all cases 'struct vmw_connector_state' is used. But only in stdu connectors, was atomic_{duplicate,destroy}_state() properly subclassed. Leading to writes beyond the end of the allocated connector state block and all sorts of fun memory corruption related crashes. Fixes: d7721ca71126 "drm/vmwgfx: Connector atomic state" Cc: Signed-off-by: Rob Clark Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index b8a09807c5de..3824595fece1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -266,8 +266,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = { .set_property = vmw_du_connector_set_property, .destroy = vmw_ldu_connector_destroy, .reset = vmw_du_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, + .atomic_destroy_state = vmw_du_connector_destroy_state, .atomic_set_property = vmw_du_connector_atomic_set_property, .atomic_get_property = vmw_du_connector_atomic_get_property, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index bc5f6026573d..63a4cd794b73 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -420,8 +420,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = { .set_property = vmw_du_connector_set_property, .destroy = vmw_sou_connector_destroy, .reset = vmw_du_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_duplicate_state = vmw_du_connector_duplicate_state, + .atomic_destroy_state = vmw_du_connector_destroy_state, .atomic_set_property = vmw_du_connector_atomic_set_property, .atomic_get_property = vmw_du_connector_atomic_get_property, }; -- cgit v1.2.3 From 85cf955df8317bbc513663b5610979fdd6a0da2a Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Wed, 17 Jan 2018 16:30:39 +0100 Subject: aoe: use ktime_t instead of timeval 'struct frame' uses two variables to store the sent timestamp - 'struct timeval' and jiffies. jiffies is used to avoid discrepancies caused by updates to system time. 'struct timeval' is deprecated because it uses 32-bit representation for seconds which will overflow in year 2038. This patch does the following: - Replace the use of 'struct timeval' and jiffies with ktime_t, which is the recommended type for timestamping - ktime_t provides both long range (like jiffies) and high resolution (like timeval). Using ktime_get (monotonic time) instead of wall-clock time prevents any discprepancies caused by updates to system time. [updates by Arnd below] The original patch from Tina never went anywhere as we discussed how to keep the impact on performance minimal. I've started over now but arrived at basically the same patch that she had originally, except for an slightly improved tsince_hr() function. I'm making it more robust against overflows, and also optimize explicitly for the common case in which a frame is less than 4.2 seconds old, using only a 32-bit division in that case. This should make the new version more efficient than the old code, since we replace the existing two 32-bit division in do_gettimeofday() plus one multiplication with a single single 32-bit division in tsince_hr() and drop the double bookkeeping. It's also more efficient than the ktime_get_us() API we discussed before, since that would also rely on multiple divisions. Link: https://lists.linaro.org/pipermail/y2038/2015-May/000276.html Signed-off-by: Tina Ruchandani Cc: Ed Cashin Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/aoe/aoe.h | 3 +-- drivers/block/aoe/aoecmd.c | 48 +++++++++++++--------------------------------- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 9220f8e833d0..c0ebda1283cc 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -112,8 +112,7 @@ enum frame_flags { struct frame { struct list_head head; u32 tag; - struct timeval sent; /* high-res time packet was sent */ - u32 sent_jiffs; /* low-res jiffies-based sent time */ + ktime_t sent; /* high-res time packet was sent */ ulong waited; ulong waited_total; struct aoetgt *t; /* parent target I belong to */ diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 812fed069708..540bb60cd071 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -398,8 +398,7 @@ aoecmd_ata_rw(struct aoedev *d) skb = skb_clone(f->skb, GFP_ATOMIC); if (skb) { - do_gettimeofday(&f->sent); - f->sent_jiffs = (u32) jiffies; + f->sent = ktime_get(); __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -489,8 +488,7 @@ resend(struct aoedev *d, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; - do_gettimeofday(&f->sent); - f->sent_jiffs = (u32) jiffies; + f->sent = ktime_get(); __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -499,33 +497,17 @@ resend(struct aoedev *d, struct frame *f) static int tsince_hr(struct frame *f) { - struct timeval now; - int n; + u64 delta = ktime_to_ns(ktime_sub(ktime_get(), f->sent)); - do_gettimeofday(&now); - n = now.tv_usec - f->sent.tv_usec; - n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC; + /* delta is normally under 4.2 seconds, avoid 64-bit division */ + if (likely(delta <= UINT_MAX)) + return (u32)delta / NSEC_PER_USEC; - if (n < 0) - n = -n; + /* avoid overflow after 71 minutes */ + if (delta > ((u64)INT_MAX * NSEC_PER_USEC)) + return INT_MAX; - /* For relatively long periods, use jiffies to avoid - * discrepancies caused by updates to the system time. - * - * On system with HZ of 1000, 32-bits is over 49 days - * worth of jiffies, or over 71 minutes worth of usecs. - * - * Jiffies overflow is handled by subtraction of unsigned ints: - * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe - * $3 = 4 - * (gdb) - */ - if (n > USEC_PER_SEC / 4) { - n = ((u32) jiffies) - f->sent_jiffs; - n *= USEC_PER_SEC / HZ; - } - - return n; + return div_u64(delta, NSEC_PER_USEC); } static int @@ -589,7 +571,6 @@ reassign_frame(struct frame *f) nf->waited = 0; nf->waited_total = f->waited_total; nf->sent = f->sent; - nf->sent_jiffs = f->sent_jiffs; f->skb = skb; return nf; @@ -633,8 +614,7 @@ probe(struct aoetgt *t) skb = skb_clone(f->skb, GFP_ATOMIC); if (skb) { - do_gettimeofday(&f->sent); - f->sent_jiffs = (u32) jiffies; + f->sent = ktime_get(); __skb_queue_head_init(&queue); __skb_queue_tail(&queue, skb); aoenet_xmit(&queue); @@ -1432,10 +1412,8 @@ aoecmd_ata_id(struct aoedev *d) d->timer.function = rexmit_timer; skb = skb_clone(skb, GFP_ATOMIC); - if (skb) { - do_gettimeofday(&f->sent); - f->sent_jiffs = (u32) jiffies; - } + if (skb) + f->sent = ktime_get(); return skb; } -- cgit v1.2.3 From 7bed45954b95601230ebf387d3e4e20e4a3cc025 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 18 Jan 2018 00:41:51 +0800 Subject: blk-mq: make sure hctx->next_cpu is set correctly When hctx->next_cpu is set from possible online CPUs, there is one race in which hctx->next_cpu may be set as >= nr_cpu_ids, and finally break workqueue. The race can be triggered in the following two sitations: 1) when one CPU is becoming DEAD, blk_mq_hctx_notify_dead() is called to dispatch requests from the DEAD cpu context, but at that time, this DEAD CPU has been cleared from 'cpu_online_mask', so all CPUs in hctx->cpumask may become offline, and cause hctx->next_cpu set a bad value. 2) blk_mq_delay_run_hw_queue() is called from CPU B, and found the queue should be run on the other CPU A, then CPU A may become offline at the same time and all CPUs in hctx->cpumask become offline. This patch deals with this issue by re-selecting next CPU, and making sure it is set correctly. Cc: Christian Borntraeger Cc: Stefan Haberland Cc: Christoph Hellwig Cc: Thomas Gleixner Reported-by: "jianchao.wang" Tested-by: "jianchao.wang" Fixes: 20e4d81393 ("blk-mq: simplify queue mapping & schedule with each possisble CPU") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c8f62e6be6b6..3bd41f1066ee 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1319,21 +1319,47 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) */ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) { + bool tried = false; + if (hctx->queue->nr_hw_queues == 1) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { int next_cpu; - +select_cpu: next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask, cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask); - hctx->next_cpu = next_cpu; + /* + * No online CPU is found, so have to make sure hctx->next_cpu + * is set correctly for not breaking workqueue. + */ + if (next_cpu >= nr_cpu_ids) + hctx->next_cpu = cpumask_first(hctx->cpumask); + else + hctx->next_cpu = next_cpu; hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } + /* + * Do unbound schedule if we can't find a online CPU for this hctx, + * and it should only happen in the path of handling CPU DEAD. + */ + if (!cpu_online(hctx->next_cpu)) { + if (!tried) { + tried = true; + goto select_cpu; + } + + /* + * Make sure to re-select CPU next time once after CPUs + * in hctx->cpumask become online again. + */ + hctx->next_cpu_batch = 1; + return WORK_CPU_UNBOUND; + } return hctx->next_cpu; } -- cgit v1.2.3 From 7df938fbc4ee641e70e05002ac67c24b19e86e74 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 18 Jan 2018 00:41:52 +0800 Subject: blk-mq: turn WARN_ON in __blk_mq_run_hw_queue into printk We know this WARN_ON is harmless and in reality it may be trigged, so convert it to printk() and dump_stack() to avoid to confusing people. Also add comment about two releated races here. Cc: Christian Borntraeger Cc: Stefan Haberland Cc: Christoph Hellwig Cc: Thomas Gleixner Cc: "jianchao.wang" Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3bd41f1066ee..ec429be05729 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1294,9 +1294,27 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) /* * We should be running this queue from one of the CPUs that * are mapped to it. + * + * There are at least two related races now between setting + * hctx->next_cpu from blk_mq_hctx_next_cpu() and running + * __blk_mq_run_hw_queue(): + * + * - hctx->next_cpu is found offline in blk_mq_hctx_next_cpu(), + * but later it becomes online, then this warning is harmless + * at all + * + * - hctx->next_cpu is found online in blk_mq_hctx_next_cpu(), + * but later it becomes offline, then the warning can't be + * triggered, and we depend on blk-mq timeout handler to + * handle dispatched requests to this hctx */ - WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && - cpu_online(hctx->next_cpu)); + if (!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && + cpu_online(hctx->next_cpu)) { + printk(KERN_WARNING "run queue from wrong CPU %d, hctx %s\n", + raw_smp_processor_id(), + cpumask_empty(hctx->cpumask) ? "inactive": "active"); + dump_stack(); + } /* * We can't run the queue inline with ints disabled. Ensure that -- cgit v1.2.3 From 0f95549c0ea1e8075ae049202088b2c6a0cb40ad Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 17 Jan 2018 11:25:56 -0500 Subject: blk-mq: factor out a few helpers from __blk_mq_try_issue_directly No functional change. Just makes code flow more logically. In following commit, __blk_mq_try_issue_directly() will be used to return the dispatch result (blk_status_t) to DM. DM needs this information to improve IO merging. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-mq.c | 79 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ec429be05729..ddc46f215bfa 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1738,9 +1738,9 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, - struct request *rq, - blk_qc_t *cookie) +static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, + blk_qc_t *cookie) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1749,6 +1749,43 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, }; blk_qc_t new_cookie; blk_status_t ret; + + new_cookie = request_to_qc_t(hctx, rq); + + /* + * For OK queue, we are done. For error, caller may kill it. + * Any other error (busy), just add it to our list as we + * previously would have done. + */ + ret = q->mq_ops->queue_rq(hctx, &bd); + switch (ret) { + case BLK_STS_OK: + *cookie = new_cookie; + break; + case BLK_STS_RESOURCE: + __blk_mq_requeue_request(rq); + break; + default: + *cookie = BLK_QC_T_NONE; + break; + } + + return ret; +} + +static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx, + struct request *rq, + bool run_queue) +{ + blk_mq_sched_insert_request(rq, false, run_queue, false, + hctx->flags & BLK_MQ_F_BLOCKING); +} + +static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, + blk_qc_t *cookie) +{ + struct request_queue *q = rq->q; bool run_queue = true; /* RCU or SRCU read lock is needed before checking quiesced flag */ @@ -1768,41 +1805,29 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - new_cookie = request_to_qc_t(hctx, rq); - - /* - * For OK queue, we are done. For error, kill it. Any other - * error (busy), just add it to our list as we previously - * would have done - */ - ret = q->mq_ops->queue_rq(hctx, &bd); - switch (ret) { - case BLK_STS_OK: - *cookie = new_cookie; - return; - case BLK_STS_RESOURCE: - __blk_mq_requeue_request(rq); - goto insert; - default: - *cookie = BLK_QC_T_NONE; - blk_mq_end_request(rq, ret); - return; - } - + return __blk_mq_issue_directly(hctx, rq, cookie); insert: - blk_mq_sched_insert_request(rq, false, run_queue, false, - hctx->flags & BLK_MQ_F_BLOCKING); + __blk_mq_fallback_to_insert(hctx, rq, run_queue); + + return BLK_STS_OK; } static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie) { + blk_status_t ret; int srcu_idx; might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); hctx_lock(hctx, &srcu_idx); - __blk_mq_try_issue_directly(hctx, rq, cookie); + + ret = __blk_mq_try_issue_directly(hctx, rq, cookie); + if (ret == BLK_STS_RESOURCE) + __blk_mq_fallback_to_insert(hctx, rq, true); + else if (ret != BLK_STS_OK) + blk_mq_end_request(rq, ret); + hctx_unlock(hctx, srcu_idx); } -- cgit v1.2.3 From 396eaf21ee17c476e8f66249fb1f4a39003d0ab4 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 17 Jan 2018 11:25:57 -0500 Subject: blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback blk_insert_cloned_request() is called in the fast path of a dm-rq driver (e.g. blk-mq request-based DM mpath). blk_insert_cloned_request() uses blk_mq_request_bypass_insert() to directly append the request to the blk-mq hctx->dispatch_list of the underlying queue. 1) This way isn't efficient enough because the hctx spinlock is always used. 2) With blk_insert_cloned_request(), we completely bypass underlying queue's elevator and depend on the upper-level dm-rq driver's elevator to schedule IO. But dm-rq currently can't get the underlying queue's dispatch feedback at all. Without knowing whether a request was issued or not (e.g. due to underlying queue being busy) the dm-rq elevator will not be able to provide effective IO merging (as a side-effect of dm-rq currently blindly destaging a request from its elevator only to requeue it after a delay, which kills any opportunity for merging). This obviously causes very bad sequential IO performance. Fix this by updating blk_insert_cloned_request() to use blk_mq_request_direct_issue(). blk_mq_request_direct_issue() allows a request to be issued directly to the underlying queue and returns the dispatch feedback (blk_status_t). If blk_mq_request_direct_issue() returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE to _not_ destage the request. Whereby preserving the opportunity to merge IO. With this, request-based DM's blk-mq sequential IO performance is vastly improved (as much as 3X in mpath/virtio-scsi testing). Signed-off-by: Ming Lei [blk-mq.c changes heavily influenced by Ming Lei's initial solution, but they were refactored to make them less fragile and easier to read/review] Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- block/blk-mq.c | 37 +++++++++++++++++++++++++++++-------- block/blk-mq.h | 3 +++ drivers/md/dm-rq.c | 19 ++++++++++++++++--- 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 7ba607527487..55f338020254 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - blk_mq_request_bypass_insert(rq, true); - return BLK_STS_OK; + return blk_mq_request_direct_issue(rq); } spin_lock_irqsave(q->queue_lock, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index ddc46f215bfa..e383a20809f4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1775,15 +1775,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool run_queue) + bool run_queue, bool bypass_insert) { - blk_mq_sched_insert_request(rq, false, run_queue, false, - hctx->flags & BLK_MQ_F_BLOCKING); + if (!bypass_insert) + blk_mq_sched_insert_request(rq, false, run_queue, false, + hctx->flags & BLK_MQ_F_BLOCKING); + else + blk_mq_request_bypass_insert(rq, run_queue); } static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, - blk_qc_t *cookie) + blk_qc_t *cookie, + bool bypass_insert) { struct request_queue *q = rq->q; bool run_queue = true; @@ -1794,7 +1798,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, goto insert; } - if (q->elevator) + if (q->elevator && !bypass_insert) goto insert; if (!blk_mq_get_driver_tag(rq, NULL, false)) @@ -1807,7 +1811,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, return __blk_mq_issue_directly(hctx, rq, cookie); insert: - __blk_mq_fallback_to_insert(hctx, rq, run_queue); + __blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert); + if (bypass_insert) + return BLK_STS_RESOURCE; return BLK_STS_OK; } @@ -1822,15 +1828,30 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_lock(hctx, &srcu_idx); - ret = __blk_mq_try_issue_directly(hctx, rq, cookie); + ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); if (ret == BLK_STS_RESOURCE) - __blk_mq_fallback_to_insert(hctx, rq, true); + __blk_mq_fallback_to_insert(hctx, rq, true, false); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); hctx_unlock(hctx, srcu_idx); } +blk_status_t blk_mq_request_direct_issue(struct request *rq) +{ + blk_status_t ret; + int srcu_idx; + blk_qc_t unused_cookie; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu); + + hctx_lock(hctx, &srcu_idx); + ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true); + hctx_unlock(hctx, srcu_idx); + + return ret; +} + static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); diff --git a/block/blk-mq.h b/block/blk-mq.h index 8591a54d989b..e3ebc93646ca 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); +/* Used by blk_insert_cloned_request() to issue request directly */ +blk_status_t blk_mq_request_direct_issue(struct request *rq); + /* * CPU -> queue mappings */ diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index c28357f5cb0e..b7d175e94a02 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, blk_status_t error) dm_complete_request(tio->orig, error); } -static void dm_dispatch_clone_request(struct request *clone, struct request *rq) +static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq) { blk_status_t r; @@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq) clone->start_time = jiffies; r = blk_insert_cloned_request(clone->q, clone); - if (r) + if (r != BLK_STS_OK && r != BLK_STS_RESOURCE) /* must complete clone in terms of original request */ dm_complete_request(rq, r); + return r; } static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, @@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio) struct mapped_device *md = tio->md; struct request *rq = tio->orig; struct request *clone = NULL; + blk_status_t ret; r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); +check_again: switch (r) { case DM_MAPIO_SUBMITTED: /* The target has taken the I/O to submit by itself later */ @@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio) /* The target has remapped the I/O so dispatch it */ trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); - dm_dispatch_clone_request(clone, rq); + ret = dm_dispatch_clone_request(clone, rq); + if (ret == BLK_STS_RESOURCE) { + blk_rq_unprep_clone(clone); + tio->ti->type->release_clone_rq(clone); + tio->clone = NULL; + if (!rq->q->mq_ops) + r = DM_MAPIO_DELAY_REQUEUE; + else + r = DM_MAPIO_REQUEUE; + goto check_again; + } break; case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ -- cgit v1.2.3 From 9e97d2951a7e6ee6e204f87f6bda4ff754a8cede Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 17 Jan 2018 11:25:58 -0500 Subject: blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request After commit: 923218f6166a ("blk-mq: don't allocate driver tag upfront for flush rq") we no longer use the 'can_block' argument in blk_mq_sched_insert_request(). Kill it. Signed-off-by: Mike Snitzer Added actual commit message as to why it's being removed. Signed-off-by: Jens Axboe --- block/blk-exec.c | 2 +- block/blk-mq-sched.c | 2 +- block/blk-mq-sched.h | 2 +- block/blk-mq.c | 16 +++++++--------- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/block/blk-exec.c b/block/blk-exec.c index 5c0f3dc446dc..f7b292f12449 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -61,7 +61,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be reused after dying flag is set */ if (q->mq_ops) { - blk_mq_sched_insert_request(rq, at_head, true, false, false); + blk_mq_sched_insert_request(rq, at_head, true, false); return; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 2ff7cf0cbf73..55c0a745b427 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -427,7 +427,7 @@ done: } void blk_mq_sched_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async, bool can_block) + bool run_queue, bool async) { struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index ba1d1418a96d..1e9c9018ace1 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -18,7 +18,7 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, - bool run_queue, bool async, bool can_block); + bool run_queue, bool async); void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx, struct list_head *list, bool run_queue_async); diff --git a/block/blk-mq.c b/block/blk-mq.c index e383a20809f4..c418858a60ef 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -745,13 +745,13 @@ static void blk_mq_requeue_work(struct work_struct *work) rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, true, false, false, true); + blk_mq_sched_insert_request(rq, true, false, false); } while (!list_empty(&rq_list)) { rq = list_entry(rq_list.next, struct request, queuelist); list_del_init(&rq->queuelist); - blk_mq_sched_insert_request(rq, false, false, false, true); + blk_mq_sched_insert_request(rq, false, false, false); } blk_mq_run_hw_queues(q, false); @@ -1773,13 +1773,11 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } -static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx, - struct request *rq, +static void __blk_mq_fallback_to_insert(struct request *rq, bool run_queue, bool bypass_insert) { if (!bypass_insert) - blk_mq_sched_insert_request(rq, false, run_queue, false, - hctx->flags & BLK_MQ_F_BLOCKING); + blk_mq_sched_insert_request(rq, false, run_queue, false); else blk_mq_request_bypass_insert(rq, run_queue); } @@ -1811,7 +1809,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, return __blk_mq_issue_directly(hctx, rq, cookie); insert: - __blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert); + __blk_mq_fallback_to_insert(rq, run_queue, bypass_insert); if (bypass_insert) return BLK_STS_RESOURCE; @@ -1830,7 +1828,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); if (ret == BLK_STS_RESOURCE) - __blk_mq_fallback_to_insert(hctx, rq, true, false); + __blk_mq_fallback_to_insert(rq, true, false); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1960,7 +1958,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } else if (q->elevator) { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); - blk_mq_sched_insert_request(rq, false, true, true, true); + blk_mq_sched_insert_request(rq, false, true, true); } else { blk_mq_put_ctx(data.ctx); blk_mq_bio_to_request(rq, bio); -- cgit v1.2.3 From d625d05ef0f0914a706d824fab85472a42be6659 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 11 Jan 2018 14:29:22 -0800 Subject: nvme-fc: fix rogue admin cmds stalling teardown When connectivity is lost to a device, the association is terminated and the blk-mq queues are quiesced/stopped. When connectivity is re-established, they are resumed. If an admin command is received while connectivity is list, the ioctl queues the command on the admin_q and the command stalls (the thread issuing the ioctl hangs/waits). if the connectivity is lost long enough such that the controller is then deleted, the delete code makes its calls to initiate the delete, which then expects the core layer to call the transport when all references are removed and the controller can be freed. Unfortunately, nothing in this path dequeued the admin command, so a reference sits outstanding and things stop, hanging the delete indefinitely. Correct by unquiescing the admin queue in the delete association. This means any admin command (which should only be from an ioctl) issued after connectivity is lost will detect the controller is in a reconnecting state and will (fast) fail the command. Thus, a pending reference can no longer be created. Once connectivity is re-established, a new ioctl/admin command would see proper device state and function again. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 2a7a9a75105d..a10c77139f76 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2921,6 +2921,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); nvme_fc_free_queue(&ctrl->queues[0]); + /* re-enable the admin_q so anything new can fast fail */ + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_fc_ctlr_inactive_on_rport(ctrl); } -- cgit v1.2.3 From 0fd997d3f77296522e836f7002e8a0636c9886aa Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 11 Jan 2018 15:21:38 -0800 Subject: nvme-fc: correct hang in nvme_ns_remove() When connectivity is lost to a device, the association is terminated and the blk-mq queues are quiesced/stopped. When connectivity is re-established, they are resumed. If connectivity is lost for a sufficient amount of time that the controller is then deleted, the delete path starts tearing down queues, and eventually calling nvme_ns_remove(). It appears that pending commands may cause blk_cleanup_queue() to never complete and the teardown stalls. Correct by starting the ns queues after transitioning to a DELETING state, allowing pending commands to be flushed with io failures. Thus the delete path is clear when reached. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a10c77139f76..b76ba4629e02 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2938,6 +2938,9 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) * waiting for io to terminate */ nvme_fc_delete_association(ctrl); + + /* resume the io queues so that things will fast fail */ + nvme_start_queues(nctrl); } static void -- cgit v1.2.3 From f65efd6dfe4e687637704f7023157fdee99913ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Dec 2017 14:25:11 +0100 Subject: nvme-pci: clean up CMB initialization Refactor the call to nvme_map_cmb, and change the conditions for probing for the CMB. First remove the version check as NVMe TPs always apply to earlier versions of the spec as well. Second check for the whole CMBSZ register for support of the CMB feature instead of just the size field inside of it to simplify the code a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Logan Gunthorpe --- drivers/nvme/host/pci.c | 41 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 13057aee84e6..edb57e984865 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1651,21 +1651,20 @@ static ssize_t nvme_cmb_show(struct device *dev, } static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); -static void __iomem *nvme_map_cmb(struct nvme_dev *dev) +static void nvme_map_cmb(struct nvme_dev *dev) { u64 szu, size, offset; resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); - void __iomem *cmb; int bar; dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); - if (!(NVME_CMB_SZ(dev->cmbsz))) - return NULL; + if (!dev->cmbsz) + return; dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); if (!use_cmb_sqes) - return NULL; + return; szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); @@ -1674,7 +1673,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) bar_size = pci_resource_len(pdev, bar); if (offset > bar_size) - return NULL; + return; /* * Controllers may support a CMB size larger than their BAR, @@ -1684,13 +1683,16 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); - if (!cmb) - return NULL; - + dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); + if (!dev->cmb) + return; dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; dev->cmb_size = size; - return cmb; + + if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL)) + dev_warn(dev->ctrl.device, + "failed to add sysfs attribute for CMB\n"); } static inline void nvme_release_cmb(struct nvme_dev *dev) @@ -2115,22 +2117,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) "set queue depth=%u\n", dev->q_depth); } - /* - * CMBs can currently only exist on >=1.2 PCIe devices. We only - * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group - * has no name we can pass NULL as final argument to - * sysfs_add_file_to_group. - */ - - if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { - dev->cmb = nvme_map_cmb(dev); - if (dev->cmb) { - if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL)) - dev_warn(dev->ctrl.device, - "failed to add sysfs attribute for CMB\n"); - } - } + nvme_map_cmb(dev); pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); -- cgit v1.2.3 From 88de4598bca84e27b261685c06fff816b8d932a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Dec 2017 14:50:00 +0100 Subject: nvme-pci: clean up SMBSZ bit definitions Define the bit positions instead of macros using the magic values, and move the expanded helpers to calculate the size and size unit into the implementation C file. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Logan Gunthorpe --- drivers/nvme/host/pci.c | 23 +++++++++++++++++------ include/linux/nvme.h | 22 ++++++++++++++-------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index edb57e984865..a2ffb557b616 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1364,7 +1364,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, int qid, int depth) { - if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { + if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), dev->ctrl.page_size); nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset; @@ -1651,9 +1651,21 @@ static ssize_t nvme_cmb_show(struct device *dev, } static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); +static u64 nvme_cmb_size_unit(struct nvme_dev *dev) +{ + u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK; + + return 1ULL << (12 + 4 * szu); +} + +static u32 nvme_cmb_size(struct nvme_dev *dev) +{ + return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK; +} + static void nvme_map_cmb(struct nvme_dev *dev) { - u64 szu, size, offset; + u64 size, offset; resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); int bar; @@ -1666,9 +1678,8 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (!use_cmb_sqes) return; - szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); - size = szu * NVME_CMB_SZ(dev->cmbsz); - offset = szu * NVME_CMB_OFST(dev->cmbloc); + size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev); + offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc); bar = NVME_CMB_BIR(dev->cmbloc); bar_size = pci_resource_len(pdev, bar); @@ -1897,7 +1908,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (nr_io_queues == 0) return 0; - if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { + if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) { result = nvme_cmb_qdepth(dev, nr_io_queues, sizeof(struct nvme_command)); if (result > 0) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index aea87f0d917b..4112e2bd747f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -124,14 +124,20 @@ enum { #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) -#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff) -#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf) - -#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10) -#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8) -#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4) -#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) -#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) + +enum { + NVME_CMBSZ_SQS = 1 << 0, + NVME_CMBSZ_CQS = 1 << 1, + NVME_CMBSZ_LISTS = 1 << 2, + NVME_CMBSZ_RDS = 1 << 3, + NVME_CMBSZ_WDS = 1 << 4, + + NVME_CMBSZ_SZ_SHIFT = 12, + NVME_CMBSZ_SZ_MASK = 0xfffff, + + NVME_CMBSZ_SZU_SHIFT = 8, + NVME_CMBSZ_SZU_MASK = 0xf, +}; /* * Submission and Completion Queue Entry Sizes for the NVM command set. -- cgit v1.2.3 From de99a346884f019387230bc549de74456daca248 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 16 Jan 2018 10:31:39 -0800 Subject: block: Fix __bio_integrity_endio() documentation Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio") Reviewed-by: Martin K. Petersen Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/bio-integrity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 23b42e8aa03e..9cfdd6c83b5b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -374,7 +374,6 @@ static void bio_integrity_verify_fn(struct work_struct *work) /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio - * @error: Pointer to errno * * Description: Completion for integrity I/O * -- cgit v1.2.3 From c877154d307f4a91e0b5b85b75535713dab945ae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 17 Sep 2017 10:32:20 +0200 Subject: ubifs: Fix uninitialized variable in search_dh_cookie() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/ubifs/tnc.c: In function ‘search_dh_cookie’: fs/ubifs/tnc.c:1893: warning: ‘err’ is used uninitialized in this function Indeed, err is always used uninitialized. According to an original review comment from Hyunchul, acknowledged by Richard, err should be initialized to -ENOENT to avoid the first call to tnc_next(). But we can achieve the same by reordering the code. Fixes: 781f675e2d7e ("ubifs: Fix unlink code wrt. double hash lookups") Reported-by: Hyunchul Lee Signed-off-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger --- fs/ubifs/tnc.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 0a213dcba2a1..ba3d0e0f8615 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1890,35 +1890,28 @@ static int search_dh_cookie(struct ubifs_info *c, const union ubifs_key *key, union ubifs_key *dkey; for (;;) { - if (!err) { - err = tnc_next(c, &znode, n); - if (err) - goto out; - } - zbr = &znode->zbranch[*n]; dkey = &zbr->key; if (key_inum(c, dkey) != key_inum(c, key) || key_type(c, dkey) != key_type(c, key)) { - err = -ENOENT; - goto out; + return -ENOENT; } err = tnc_read_hashed_node(c, zbr, dent); if (err) - goto out; + return err; if (key_hash(c, key) == key_hash(c, dkey) && le32_to_cpu(dent->cookie) == cookie) { *zn = znode; - goto out; + return 0; } - } - -out: - return err; + err = tnc_next(c, &znode, n); + if (err) + return err; + } } static int do_lookup_dh(struct ubifs_info *c, const union ubifs_key *key, -- cgit v1.2.3 From c0e860ba034ead2a0f47052c87266e90f23cdb7b Mon Sep 17 00:00:00 2001 From: Jeff Westfahl Date: Tue, 10 Jan 2017 13:30:18 -0600 Subject: mtd: ubi: Use 'max_bad_blocks' to compute bad_peb_limit if available If the user has not set max_beb_per1024 using either the cmdline or Kconfig options for doing so, use the MTD function 'max_bad_blocks' to compute the UBI bad_peb_limit. Signed-off-by: Jeff Westfahl Signed-off-by: Zach Brown Acked-by: Boris Brezillon Acked-by: Richard Weinberger Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/build.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 136ce05d2328..e941395de3ae 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -535,8 +535,17 @@ static int get_bad_peb_limit(const struct ubi_device *ubi, int max_beb_per1024) int limit, device_pebs; uint64_t device_size; - if (!max_beb_per1024) - return 0; + if (!max_beb_per1024) { + /* + * Since max_beb_per1024 has not been set by the user in either + * the cmdline or Kconfig, use mtd_max_bad_blocks to set the + * limit if it is supported by the device. + */ + limit = mtd_max_bad_blocks(ubi->mtd, 0, ubi->mtd->size); + if (limit < 0) + return 0; + return limit; + } /* * Here we are using size of the entire flash chip and -- cgit v1.2.3 From e9062481824384f00299971f923fecf6b3668001 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 11:35:15 +0000 Subject: ARM: net: bpf: avoid 'bx' instruction on non-Thumb capable CPUs Avoid the 'bx' instruction on CPUs that have no support for Thumb and thus do not implement this instruction by moving the generation of this opcode to a separate function that selects between: bx reg and mov pc, reg according to the capabilities of the CPU. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index c199990e12b6..4efb3743a89e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -285,16 +285,20 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) emit_mov_i_no8m(rd, val, ctx); } -static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) { - ctx->seen |= SEEN_CALL; -#if __LINUX_ARM_ARCH__ < 5 - emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); - if (elf_hwcap & HWCAP_THUMB) emit(ARM_BX(tgt_reg), ctx); else emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); +} + +static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +{ + ctx->seen |= SEEN_CALL; +#if __LINUX_ARM_ARCH__ < 5 + emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); + emit_bx_r(tgt_reg, ctx); #else emit(ARM_BLX_R(tgt_reg), ctx); #endif @@ -997,7 +1001,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp2[1], off, false, ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); - emit(ARM_BX(tmp[1]), ctx); + emit_bx_r(tmp[1], ctx); /* out: */ if (out_offset == -1) @@ -1166,7 +1170,7 @@ static void build_epilogue(struct jit_ctx *ctx) emit(ARM_POP(reg_set), ctx); /* Return back to the callee function */ if (!(ctx->seen & SEEN_CALL)) - emit(ARM_BX(ARM_LR), ctx); + emit_bx_r(ARM_LR, ctx); #endif } -- cgit v1.2.3 From f4483f2cc1fdc03488c8a1452e545545ae5bda93 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 11:39:54 +0000 Subject: ARM: net: bpf: fix tail call jumps When a tail call fails, it is documented that the tail call should continue execution at the following instruction. An example tail call sequence is: 12: (85) call bpf_tail_call#12 13: (b7) r0 = 0 14: (95) exit The ARM assembler for the tail call in this case ends up branching to instruction 14 instead of instruction 13, resulting in the BPF filter returning a non-zero value: 178: ldr r8, [sp, #588] ; insn 12 17c: ldr r6, [r8, r6] 180: ldr r8, [sp, #580] 184: cmp r8, r6 188: bcs 0x1e8 18c: ldr r6, [sp, #524] 190: ldr r7, [sp, #528] 194: cmp r7, #0 198: cmpeq r6, #32 19c: bhi 0x1e8 1a0: adds r6, r6, #1 1a4: adc r7, r7, #0 1a8: str r6, [sp, #524] 1ac: str r7, [sp, #528] 1b0: mov r6, #104 1b4: ldr r8, [sp, #588] 1b8: add r6, r8, r6 1bc: ldr r8, [sp, #580] 1c0: lsl r7, r8, #2 1c4: ldr r6, [r6, r7] 1c8: cmp r6, #0 1cc: beq 0x1e8 1d0: mov r8, #32 1d4: ldr r6, [r6, r8] 1d8: add r6, r6, #44 1dc: bx r6 1e0: mov r0, #0 ; insn 13 1e4: mov r1, #0 1e8: add sp, sp, #596 ; insn 14 1ec: pop {r4, r5, r6, r7, r8, sl, pc} For other sequences, the tail call could end up branching midway through the following BPF instructions, or maybe off the end of the function, leading to unknown behaviours. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4efb3743a89e..ce36d2cab50c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -949,7 +949,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const u8 *tcc = bpf2a32[TCALL_CNT]; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) +#define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; /* if (index >= array->map.max_entries) -- cgit v1.2.3 From d1220efd23484c72c82d5471f05daeb35b5d1916 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 16:10:07 +0000 Subject: ARM: net: bpf: fix stack alignment As per 2dede2d8e925 ("ARM EABI: stack pointer must be 64-bit aligned after a CPU exception") the stack should be aligned to a 64-bit boundary on EABI systems. Ensure that the eBPF JIT appropraitely aligns the stack. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index ce36d2cab50c..d00a0eb0386e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -179,8 +179,13 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); } -/* Stack must be multiples of 16 Bytes */ -#define STACK_ALIGN(sz) (((sz) + 3) & ~3) +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) +/* EABI requires the stack to be aligned to 64-bit boundaries */ +#define STACK_ALIGNMENT 8 +#else +/* Stack must be aligned to 32-bit boundaries */ +#define STACK_ALIGNMENT 4 +#endif /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, @@ -194,7 +199,7 @@ static void jit_fill_hole(void *area, unsigned int size) + SCRATCH_SIZE + \ + 4 /* extra for skb_copy_bits buffer */) -#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) +#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ #define STACK_VAR(off) (STACK_SIZE-off-4) -- cgit v1.2.3 From 70ec3a6c2c11e4b0e107a65de943a082f9aff351 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 21:26:14 +0000 Subject: ARM: net: bpf: move stack documentation Move the stack documentation towards the top of the file, where it's relevant for things like the register layout. Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index d00a0eb0386e..e90229d58c77 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -27,6 +27,27 @@ int bpf_jit_enable __read_mostly; +/* + * eBPF prog stack layout + * + * high + * original ARM_SP => +-----+ eBPF prologue + * |FP/LR| + * current ARM_FP => +-----+ + * | ... | callee saved registers + * eBPF fp register => +-----+ <= (BPF_FP) + * | ... | eBPF JIT scratch space + * | | eBPF prog stack + * +-----+ + * |RSVD | JIT scratchpad + * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE) + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + */ + #define STACK_OFFSET(k) (k) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ @@ -1091,27 +1112,6 @@ static void build_prologue(struct jit_ctx *ctx) u16 reg_set = 0; - /* - * eBPF prog stack layout - * - * high - * original ARM_SP => +-----+ eBPF prologue - * |FP/LR| - * current ARM_FP => +-----+ - * | ... | callee saved registers - * eBPF fp register => +-----+ <= (BPF_FP) - * | ... | eBPF JIT scratch space - * | | eBPF prog stack - * +-----+ - * |RSVD | JIT scratchpad - * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) - * | | - * | ... | Function call stack - * | | - * +-----+ - * low - */ - /* Save callee saved registers. */ reg_set |= (1< Date: Sat, 13 Jan 2018 22:51:27 +0000 Subject: ARM: net: bpf: correct stack layout documentation The stack layout documentation incorrectly suggests that the BPF JIT scratch space starts immediately below BPF_FP. This is not correct, so let's fix the documentation to reflect reality. Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e90229d58c77..dcb3181e85f3 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -28,24 +28,43 @@ int bpf_jit_enable __read_mostly; /* - * eBPF prog stack layout + * eBPF prog stack layout: * * high - * original ARM_SP => +-----+ eBPF prologue - * |FP/LR| - * current ARM_FP => +-----+ - * | ... | callee saved registers - * eBPF fp register => +-----+ <= (BPF_FP) + * original ARM_SP => +-----+ + * | | callee saved registers + * +-----+ <= (BPF_FP + SCRATCH_SIZE) * | ... | eBPF JIT scratch space - * | | eBPF prog stack + * eBPF fp register => +-----+ + * (BPF_FP) | ... | eBPF prog stack * +-----+ * |RSVD | JIT scratchpad - * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE) + * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) * | | * | ... | Function call stack * | | * +-----+ * low + * + * The callee saved registers depends on whether frame pointers are enabled. + * With frame pointers (to be compliant with the ABI): + * + * high + * original ARM_SP => +------------------+ \ + * | pc | | + * current ARM_FP => +------------------+ } callee saved registers + * |r4-r8,r10,fp,ip,lr| | + * +------------------+ / + * low + * + * Without frame pointers: + * + * high + * original ARM_SP => +------------------+ + * | lr | (optional) + * | r4-r8,r10 | callee saved registers + * +------------------+ + * low */ #define STACK_OFFSET(k) (k) -- cgit v1.2.3 From 02088d9b392f605c892894b46aa8c83e3abd0115 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 22:38:18 +0000 Subject: ARM: net: bpf: fix register saving When an eBPF program tail-calls another eBPF program, it enters it after the prologue to avoid having complex stack manipulations. This can lead to kernel oopses, and similar. Resolve this by always using a fixed stack layout, a CPU register frame pointer, and using this when reloading registers before returning. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 80 +++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 58 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index dcb3181e85f3..95bb3f896c8f 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -61,20 +61,24 @@ int bpf_jit_enable __read_mostly; * * high * original ARM_SP => +------------------+ - * | lr | (optional) - * | r4-r8,r10 | callee saved registers - * +------------------+ + * | r4-r8,r10,fp,lr | callee saved registers + * current ARM_FP => +------------------+ * low + * + * When popping registers off the stack at the end of a BPF function, we + * reference them via the current ARM_FP register. */ +#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_FP) +#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) +#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) #define STACK_OFFSET(k) (k) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ -/* Flags used for JIT optimization */ -#define SEEN_CALL (1 << 0) - #define FLAG_IMM_OVERFLOW (1 << 0) /* @@ -135,7 +139,6 @@ static const u8 bpf2a32[][2] = { * idx : index of current last JITed instruction. * prologue_bytes : bytes used in prologue. * epilogue_offset : offset of epilogue starting. - * seen : bit mask used for JIT optimization. * offsets : array of eBPF instruction offsets in * JITed code. * target : final JITed code. @@ -150,7 +153,6 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; - u32 seen; u32 flags; u32 *offsets; u32 *target; @@ -340,7 +342,6 @@ static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) { - ctx->seen |= SEEN_CALL; #if __LINUX_ARM_ARCH__ < 5 emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); emit_bx_r(tgt_reg, ctx); @@ -403,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) } /* Call appropriate function */ - ctx->seen |= SEEN_CALL; emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); @@ -669,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); @@ -705,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); @@ -741,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); @@ -877,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, /* Do Multiplication */ emit(ARM_MUL(ARM_IP, rd, rn), ctx); emit(ARM_MUL(ARM_LR, rm, rt), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); @@ -955,7 +947,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op) { switch (op) { case BPF_JSET: - ctx->seen |= SEEN_CALL; emit(ARM_AND_R(ARM_IP, rt, rn), ctx); emit(ARM_AND_R(ARM_LR, rd, rm), ctx); emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); @@ -1119,33 +1110,22 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r2 = bpf2a32[BPF_REG_1][1]; const u8 r3 = bpf2a32[BPF_REG_1][0]; const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; const u8 fplo = bpf2a32[BPF_REG_FP][1]; const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 sp = ARM_SP; const u8 *tcc = bpf2a32[TCALL_CNT]; - u16 reg_set = 0; - /* Save callee saved registers. */ - reg_set |= (1<seen & SEEN_CALL) - reg_set |= (1<stack_size = imm8m(STACK_SIZE); @@ -1168,33 +1148,19 @@ static void build_prologue(struct jit_ctx *ctx) /* end of prologue */ } +/* restore callee saved registers. */ static void build_epilogue(struct jit_ctx *ctx) { - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; - u16 reg_set = 0; - - /* unwind function call stack */ - emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); - - /* restore callee saved registers. */ - reg_set |= (1<seen & SEEN_CALL) - reg_set |= (1<seen & SEEN_CALL)) - emit_bx_r(ARM_LR, ctx); + emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); + emit(ARM_POP(CALLEE_POP_MASK), ctx); #endif } @@ -1422,8 +1388,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_rev32(rt, rt, ctx); goto emit_bswap_uxt; case 64: - /* Because of the usage of ARM_LR */ - ctx->seen |= SEEN_CALL; emit_rev32(ARM_LR, rt, ctx); emit_rev32(rt, rd, ctx); emit(ARM_MOV_R(rd, ARM_LR), ctx); -- cgit v1.2.3 From ec19e02b343db991d2d1610c409efefebf4e2ca9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 21:06:16 +0000 Subject: ARM: net: bpf: fix LDX instructions When the source and destination register are identical, our JIT does not generate correct code, which leads to kernel oopses. Fix this by (a) generating more efficient code, and (b) making use of the temporary earlier if we will overwrite the address register. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 61 +++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 95bb3f896c8f..715e7250de86 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -913,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, - const s32 off, struct jit_ctx *ctx, const u8 sz){ +static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, + s32 off, struct jit_ctx *ctx, const u8 sz){ const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; + const u8 *rd = dstk ? tmp : dst; u8 rm = src; + s32 off_max; - if (off) { + if (sz == BPF_H) + off_max = 0xff; + else + off_max = 0xfff; + + if (off < 0 || off > off_max) { emit_a32_mov_i(tmp[0], off, false, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; + off = 0; + } else if (rd[1] == rm) { + emit(ARM_MOV_R(tmp[0], rm), ctx); + rm = tmp[0]; } switch (sz) { - case BPF_W: - /* Load a Word */ - emit(ARM_LDR_I(rd, rm, 0), ctx); + case BPF_B: + /* Load a Byte */ + emit(ARM_LDRB_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; case BPF_H: /* Load a HalfWord */ - emit(ARM_LDRH_I(rd, rm, 0), ctx); + emit(ARM_LDRH_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; - case BPF_B: - /* Load a Byte */ - emit(ARM_LDRB_I(rd, rm, 0), ctx); + case BPF_W: + /* Load a Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); + break; + case BPF_DW: + /* Load a Double Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } if (dstk) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); + if (dstk && sz == BPF_DW) + emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); } /* Arithmatic Operation */ @@ -1440,22 +1460,7 @@ exit: rn = sstk ? tmp2[1] : src_lo; if (sstk) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - switch (BPF_SIZE(code)) { - case BPF_W: - /* Load a Word */ - case BPF_H: - /* Load a Half-Word */ - case BPF_B: - /* Load a Byte */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); - break; - case BPF_DW: - /* Load a double word */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); - break; - } + emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); break; /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ case BPF_LD | BPF_ABS | BPF_W: -- cgit v1.2.3 From 091f02483df7b56615b524491f404e574c5e0668 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 13 Jan 2018 12:11:26 +0000 Subject: ARM: net: bpf: clarify tail_call index As per 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT"), the index used for array lookup is defined to be 32-bit wide. Update a misleading comment that suggests it is 64-bit wide. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Russell King --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 715e7250de86..323a4df59a6c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1016,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp[1], off, false, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); - /* index (64 bit) */ + /* index is 32-bit for arrays */ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); /* index >= array->map.max_entries */ emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); -- cgit v1.2.3 From 4df0bfc79904b7169dc77dcce44598b1545721f9 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 15 Jan 2018 11:37:29 -0800 Subject: tun: fix a memory leak for tfile->tx_array tfile->tun could be detached before we close the tun fd, via tun_detach_all(), so it should not be used to check for tfile->tx_array. As Jason suggested, we probably have to clean it up unconditionally both in __tun_deatch() and tun_detach_all(), but this requires to check if it is initialized or not. Currently skb_array_cleanup() doesn't have such a check, so I check it in the caller and introduce a helper function, it is a bit ugly but we can always improve it in net-next. Reported-by: Dmitry Vyukov Fixes: 1576d9860599 ("tun: switch to use skb array for tx") Cc: Jason Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4f4a842a1c9c..a8ec589d1359 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -611,6 +611,14 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } +static void tun_cleanup_tx_array(struct tun_file *tfile) +{ + if (tfile->tx_array.ring.queue) { + skb_array_cleanup(&tfile->tx_array); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + } +} + static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -657,8 +665,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) - skb_array_cleanup(&tfile->tx_array); + tun_cleanup_tx_array(tfile); sock_put(&tfile->sk); } } @@ -700,11 +707,13 @@ static void tun_detach_all(struct net_device *dev) /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); sock_put(&tfile->sk); + tun_cleanup_tx_array(tfile); } BUG_ON(tun->numdisabled != 0); @@ -2851,6 +2860,8 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); + return 0; } -- cgit v1.2.3 From 0b1655143df00ac5349f27b765b2ed13a3ac40ca Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 16 Jan 2018 16:46:27 +0800 Subject: r8152: disable RX aggregation on Dell TB16 dock r8153 on Dell TB15/16 dock corrupts rx packets. This change is suggested by Realtek. They guess that the XHCI controller doesn't have enough buffer, and their guesswork is correct, once the RX aggregation gets disabled, the issue is gone. ASMedia is currently working on a real sulotion for this issue. Dell and ODM confirm the bcdDevice and iSerialNumber is unique for TB16. Note that TB15 has different bcdDevice and iSerialNumber, which are not unique values. If you still have TB15, please contact Dell to replace it with TB16. BugLink: https://bugs.launchpad.net/bugs/1729674 Cc: Mario Limonciello Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d51d9abf7986..0657203ffb91 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -606,6 +606,7 @@ enum rtl8152_flags { PHY_RESET, SCHEDULE_NAPI, GREEN_ETHERNET, + DELL_TB_RX_AGG_BUG, }; /* Define these values to match your device */ @@ -1798,6 +1799,9 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg) dev_kfree_skb_any(skb); remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head); + + if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags)) + break; } if (!skb_queue_empty(&skb_head)) { @@ -4133,6 +4137,9 @@ static void r8153_init(struct r8152 *tp) /* rx aggregation */ ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL); ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); + if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags)) + ocp_data |= RX_AGG_DISABLE; + ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); rtl_tally_reset(tp); @@ -5207,6 +5214,12 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } + if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && + udev->serial && !strcmp(udev->serial, "000001000000")) { + dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation"); + set_bit(DELL_TB_RX_AGG_BUG, &tp->flags); + } + netdev->ethtool_ops = &ops; netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE); -- cgit v1.2.3 From f8b39039cbf2a15f2b8c9f081e1cbd5dee00aaf5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 16 Jan 2018 10:33:05 +0100 Subject: net: fs_enet: do not call phy_stop() in interrupts In case of TX timeout, fs_timeout() calls phy_stop(), which triggers the following BUG_ON() as we are in interrupt. [92708.199889] kernel BUG at drivers/net/phy/mdio_bus.c:482! [92708.204985] Oops: Exception in kernel mode, sig: 5 [#1] [92708.210119] PREEMPT [92708.212107] CMPC885 [92708.214216] CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G W 4.9.61 #39 [92708.223227] task: c60f0a40 task.stack: c6104000 [92708.227697] NIP: c02a84bc LR: c02a947c CTR: c02a93d8 [92708.232614] REGS: c6105c70 TRAP: 0700 Tainted: G W (4.9.61) [92708.241193] MSR: 00021032 [92708.244818] CR: 24000822 XER: 20000000 [92708.248767] GPR00: c02a947c c6105d20 c60f0a40 c62b4c00 00000005 0000001f c069aad8 0001a688 GPR08: 00000007 00000100 c02a93d8 00000000 000005fc 00000000 c6213240 c06338e4 GPR16: 00000001 c06330d4 c0633094 00000000 c0680000 c6104000 c6104000 00000000 GPR24: 00000200 00000000 ffffffff 00000004 00000078 00009032 00000000 c62b4c00 NIP [c02a84bc] mdiobus_read+0x20/0x74 [92708.281517] LR [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.286547] Call Trace: [92708.288980] [c6105d20] [c6104000] 0xc6104000 (unreliable) [92708.294339] [c6105d40] [c02a947c] kszphy_config_intr+0xa4/0xc4 [92708.300098] [c6105d50] [c02a5330] phy_stop+0x60/0x9c [92708.305007] [c6105d60] [c02c84d0] fs_timeout+0xdc/0x110 [92708.310197] [c6105d80] [c035cd48] dev_watchdog+0x268/0x2a0 [92708.315593] [c6105db0] [c0060288] call_timer_fn+0x34/0x17c [92708.321014] [c6105dd0] [c00605f0] run_timer_softirq+0x21c/0x2e4 [92708.326887] [c6105e50] [c001e19c] __do_softirq+0xf4/0x2f4 [92708.332207] [c6105eb0] [c001e3c8] run_ksoftirqd+0x2c/0x40 [92708.337560] [c6105ec0] [c003b420] smpboot_thread_fn+0x1f0/0x258 [92708.343405] [c6105ef0] [c003745c] kthread+0xbc/0xd0 [92708.348217] [c6105f40] [c000c400] ret_from_kernel_thread+0x5c/0x64 [92708.354275] Instruction dump: [92708.357207] 7c0803a6 bbc10018 38210020 4e800020 7c0802a6 9421ffe0 54290024 bfc10018 [92708.364865] 90010024 7c7f1b78 81290008 552902ee <0f090000> 3bc3002c 7fc3f378 90810008 [92708.372711] ---[ end trace 42b05441616fafd7 ]--- This patch moves fs_timeout() actions into an async worker. Fixes: commit 48257c4f168e5 ("Add fs_enet ethernet network driver, for several embedded platforms") Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 16 +++++++++++++--- drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 7892f2f0c6b5..2c2976a2dda6 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void fs_timeout(struct net_device *dev) +static void fs_timeout_work(struct work_struct *work) { - struct fs_enet_private *fep = netdev_priv(dev); + struct fs_enet_private *fep = container_of(work, struct fs_enet_private, + timeout_work); + struct net_device *dev = fep->ndev; unsigned long flags; int wake = 0; @@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev) phy_stop(dev->phydev); (*fep->ops->stop)(dev); (*fep->ops->restart)(dev); - phy_start(dev->phydev); } phy_start(dev->phydev); @@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev) netif_wake_queue(dev); } +static void fs_timeout(struct net_device *dev) +{ + struct fs_enet_private *fep = netdev_priv(dev); + + schedule_work(&fep->timeout_work); +} + /*----------------------------------------------------------------------------- * generic link-change handler - should be sufficient for most cases *-----------------------------------------------------------------------------*/ @@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); + cancel_work_sync(&fep->timeout_work); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; + INIT_WORK(&fep->timeout_work, fs_timeout_work); netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 92e06b37a199..195fae6aec4a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -125,6 +125,7 @@ struct fs_enet_private { spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */ struct fs_platform_info *fpi; + struct work_struct timeout_work; const struct fs_ops *ops; int rx_ring, tx_ring; dma_addr_t ring_mem_addr; -- cgit v1.2.3 From a51a0c8d213594bc094cb8e54aad0cb6d7f7b9a6 Mon Sep 17 00:00:00 2001 From: Clay McClure Date: Thu, 21 Sep 2017 19:01:34 -0700 Subject: ubi: Fix race condition between ubi volume creation and udev Similar to commit 714fb87e8bc0 ("ubi: Fix race condition between ubi device creation and udev"), we should make the volume active before registering it. Signed-off-by: Clay McClure Cc: Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/vmt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 85237cf661f9..3fd8d7ff7a02 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -270,6 +270,12 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) vol->last_eb_bytes = vol->usable_leb_size; } + /* Make volume "available" before it becomes accessible via sysfs */ + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = vol; + ubi->vol_count += 1; + spin_unlock(&ubi->volumes_lock); + /* Register character device for the volume */ cdev_init(&vol->cdev, &ubi_vol_cdev_operations); vol->cdev.owner = THIS_MODULE; @@ -298,11 +304,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) if (err) goto out_sysfs; - spin_lock(&ubi->volumes_lock); - ubi->volumes[vol_id] = vol; - ubi->vol_count += 1; - spin_unlock(&ubi->volumes_lock); - ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); self_check_volumes(ubi); return err; @@ -315,6 +316,10 @@ out_sysfs: */ cdev_device_del(&vol->cdev, &vol->dev); out_mapping: + spin_lock(&ubi->volumes_lock); + ubi->volumes[vol_id] = NULL; + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); ubi_eba_destroy_table(eba_tbl); out_acc: spin_lock(&ubi->volumes_lock); -- cgit v1.2.3 From af7bcee27652bbf2502207500ad200763707a160 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 29 Oct 2017 20:40:02 +0800 Subject: ubi: fastmap: Use kmem_cache_free to deallocate memory Memory allocated by kmem_cache_alloc() should not be deallocated with kfree(). Use kmem_cache_free() instead. Signed-off-by: Pan Bian Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 5a832bc79b1b..717db749808a 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -1063,7 +1063,7 @@ int ubi_scan_fastmap(struct ubi_device *ubi, struct ubi_attach_info *ai, e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); if (!e) { while (i--) - kfree(fm->e[i]); + kmem_cache_free(ubi_wl_entry_slab, fm->e[i]); ret = -ENOMEM; goto free_hdr; -- cgit v1.2.3 From f50629df49f59b044c89f99a4bcd02cafdb38258 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 29 Oct 2017 13:14:26 +0000 Subject: ubi: fastmap: Clean up the initialization of pointer p The pointer p is being initialized with one value and a few lines later being set to a newer replacement value. Clean up the code by using the latter assignment to p as the initial value. Cleans up clang warning: drivers/mtd/ubi/fastmap.c:217:19: warning: Value stored to 'p' during its initialization is never read Signed-off-by: Colin Ian King Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 717db749808a..91705962ba73 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -214,9 +214,8 @@ static void assign_aeb_to_av(struct ubi_attach_info *ai, struct ubi_ainf_volume *av) { struct ubi_ainf_peb *tmp_aeb; - struct rb_node **p = &ai->volumes.rb_node, *parent = NULL; + struct rb_node **p = &av->root.rb_node, *parent = NULL; - p = &av->root.rb_node; while (*p) { parent = *p; -- cgit v1.2.3 From 7e35c4dac3e7b02bbf1588af52edf155537e5b61 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:43:13 -0800 Subject: ubifs: switch to fscrypt_file_open() Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger --- fs/ubifs/file.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index dfe85069586e..3a3cdafaab45 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1629,35 +1629,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int ubifs_file_open(struct inode *inode, struct file *filp) -{ - int ret; - struct dentry *dir; - struct ubifs_info *c = inode->i_sb->s_fs_info; - - if (ubifs_crypt_is_encrypted(inode)) { - ret = fscrypt_get_encryption_info(inode); - if (ret) - return -EACCES; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - - dir = dget_parent(file_dentry(filp)); - if (ubifs_crypt_is_encrypted(d_inode(dir)) && - !fscrypt_has_permitted_context(d_inode(dir), inode)) { - ubifs_err(c, "Inconsistent encryption contexts: %lu/%lu", - (unsigned long) d_inode(dir)->i_ino, - (unsigned long) inode->i_ino); - dput(dir); - ubifs_ro_mode(c, -EPERM); - return -EPERM; - } - dput(dir); - - return 0; -} - static const char *ubifs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) @@ -1746,7 +1717,7 @@ const struct file_operations ubifs_file_operations = { .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, - .open = ubifs_file_open, + .open = fscrypt_file_open, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif -- cgit v1.2.3 From 5653878c8ca417b2f7b283df0db0141bb3c185f7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:43:14 -0800 Subject: ubifs: switch to fscrypt_prepare_link() Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 417fe0b29f23..09e6c56b11bc 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -743,9 +743,9 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, ubifs_assert(inode_is_locked(dir)); ubifs_assert(inode_is_locked(inode)); - if (ubifs_crypt_is_encrypted(dir) && - !fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + err = fscrypt_prepare_link(old_dentry, dir, dentry); + if (err) + return err; err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); if (err) -- cgit v1.2.3 From 0c1ad5242d4f7c155661449a766e98f0018799ee Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:43:15 -0800 Subject: ubifs: switch to fscrypt_prepare_rename() Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 09e6c56b11bc..7bf847d79b4a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1353,12 +1353,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlink) ubifs_assert(inode_is_locked(new_inode)); - if (old_dir != new_dir) { - if (ubifs_crypt_is_encrypted(new_dir) && - !fscrypt_has_permitted_context(new_dir, old_inode)) - return -EPERM; - } - if (unlink && is_dir) { err = ubifs_check_dir_empty(new_inode); if (err) @@ -1573,13 +1567,6 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, ubifs_assert(fst_inode && snd_inode); - if ((ubifs_crypt_is_encrypted(old_dir) || - ubifs_crypt_is_encrypted(new_dir)) && - (old_dir != new_dir) && - (!fscrypt_has_permitted_context(new_dir, fst_inode) || - !fscrypt_has_permitted_context(old_dir, snd_inode))) - return -EPERM; - err = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &fst_nm); if (err) return err; @@ -1624,12 +1611,19 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + int err; + if (flags & ~(RENAME_NOREPLACE | RENAME_WHITEOUT | RENAME_EXCHANGE)) return -EINVAL; ubifs_assert(inode_is_locked(old_dir)); ubifs_assert(inode_is_locked(new_dir)); + err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (err) + return err; + if (flags & RENAME_EXCHANGE) return ubifs_xrename(old_dir, old_dentry, new_dir, new_dentry); -- cgit v1.2.3 From a0b3ccd9636014664e5dec80a86ef624399c105c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:43:16 -0800 Subject: ubifs: switch to fscrypt_prepare_lookup() Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7bf847d79b4a..a2ea4856e67b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -220,20 +220,9 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); - if (ubifs_crypt_is_encrypted(dir)) { - err = fscrypt_get_encryption_info(dir); - - /* - * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is - * created while the directory was encrypted and we - * have access to the key. - */ - if (fscrypt_has_encryption_key(dir)) - fscrypt_set_encrypted_dentry(dentry); - fscrypt_set_d_op(dentry); - if (err && err != -ENOKEY) - return ERR_PTR(err); - } + err = fscrypt_prepare_lookup(dir, dentry, flags); + if (err) + return ERR_PTR(err); err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm); if (err) -- cgit v1.2.3 From 252153ba518ac0bcde6b7152c63380d4415bfe5d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:43:17 -0800 Subject: ubifs: switch to fscrypt_prepare_setattr() Signed-off-by: Eric Biggers Signed-off-by: Richard Weinberger --- fs/ubifs/file.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3a3cdafaab45..9fe194a4fa9b 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1284,13 +1284,9 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; - if (ubifs_crypt_is_encrypted(inode) && (attr->ia_valid & ATTR_SIZE)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size) /* Truncation to a smaller size */ -- cgit v1.2.3 From f78e5623f45bab2b726eec29dc5cefbbab2d0b1c Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 5 Dec 2017 16:01:20 +0100 Subject: ubi: fastmap: Erase outdated anchor PEBs during attach The fastmap update code might erase the current fastmap anchor PEB in case it doesn't find any new free PEB. When a power cut happens in this situation we must not have any outdated fastmap anchor PEB on the device, because that would be used to attach during next boot. The easiest way to make that sure is to erase all outdated fastmap anchor PEBs synchronously during attach. Signed-off-by: Sascha Hauer Reviewed-by: Richard Weinberger Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Cc: Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/wl.c | 77 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b5b8cd6f481c..668b46202507 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1528,6 +1528,46 @@ static void shutdown_work(struct ubi_device *ubi) } } +/** + * erase_aeb - erase a PEB given in UBI attach info PEB + * @ubi: UBI device description object + * @aeb: UBI attach info PEB + * @sync: If true, erase synchronously. Otherwise schedule for erasure + */ +static int erase_aeb(struct ubi_device *ubi, struct ubi_ainf_peb *aeb, bool sync) +{ + struct ubi_wl_entry *e; + int err; + + e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->pnum = aeb->pnum; + e->ec = aeb->ec; + ubi->lookuptbl[e->pnum] = e; + + if (sync) { + err = sync_erase(ubi, e, false); + if (err) + goto out_free; + + wl_tree_add(e, &ubi->free); + ubi->free_count++; + } else { + err = schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false); + if (err) + goto out_free; + } + + return 0; + +out_free: + wl_entry_destroy(ubi, e); + + return err; +} + /** * ubi_wl_init - initialize the WL sub-system using attaching information. * @ubi: UBI device description object @@ -1566,18 +1606,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) list_for_each_entry_safe(aeb, tmp, &ai->erase, u.list) { cond_resched(); - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + err = erase_aeb(ubi, aeb, false); + if (err) goto out_free; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); - goto out_free; - } - found_pebs++; } @@ -1635,6 +1667,8 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) ubi_assert(!ubi->lookuptbl[e->pnum]); ubi->lookuptbl[e->pnum] = e; } else { + bool sync = false; + /* * Usually old Fastmap PEBs are scheduled for erasure * and we don't have to care about them but if we face @@ -1644,18 +1678,21 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->lookuptbl[aeb->pnum]) continue; - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) - goto out_free; + /* + * The fastmap update code might not find a free PEB for + * writing the fastmap anchor to and then reuses the + * current fastmap anchor PEB. When this PEB gets erased + * and a power cut happens before it is written again we + * must make sure that the fastmap attach code doesn't + * find any outdated fastmap anchors, hence we erase the + * outdated fastmap anchor PEBs synchronously here. + */ + if (aeb->vol_id == UBI_FM_SB_VOLUME_ID) + sync = true; - e->pnum = aeb->pnum; - e->ec = aeb->ec; - ubi_assert(!ubi->lookuptbl[e->pnum]); - ubi->lookuptbl[e->pnum] = e; - if (schedule_erase(ubi, e, aeb->vol_id, aeb->lnum, 0, false)) { - wl_entry_destroy(ubi, e); + err = erase_aeb(ubi, aeb, sync); + if (err) goto out_free; - } } found_pebs++; -- cgit v1.2.3 From d91c3e17f75f218022140dee18cf515292184a8f Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Tue, 16 Jan 2018 15:31:52 +0200 Subject: net/tls: Only attach to sockets in ESTABLISHED state Calling accept on a TCP socket with a TLS ulp attached results in two sockets that share the same ulp context. The ulp context is freed while a socket is destroyed, so after one of the sockets is released, the second second will trigger a use after free when it tries to access the ulp context attached to it. We restrict the TLS ulp to sockets in ESTABLISHED state to prevent the scenario above. Fixes: 3c4d7559159b ("tls: kernel TLS support") Reported-by: syzbot+904e7cd6c5c741609228@syzkaller.appspotmail.com Signed-off-by: Ilya Lesokhin Signed-off-by: David S. Miller --- net/tls/tls_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e07ee3ae0023..7b7a70e22d90 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -454,6 +454,15 @@ static int tls_init(struct sock *sk) struct tls_context *ctx; int rc = 0; + /* The TLS ulp is currently supported only for TCP sockets + * in ESTABLISHED state. + * Supporting sockets in LISTEN state will require us + * to modify the accept implementation to clone rather then + * share the ulp context. + */ + if (sk->sk_state != TCP_ESTABLISHED) + return -ENOTSUPP; + /* allocate tls context */ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { -- cgit v1.2.3 From 20469a37aed12a886d0deda5a07c04037923144a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 17 Jan 2018 22:04:37 +0100 Subject: nvme-pci: check segement valid for SGL use The driver needs to verify there is a payload with a command before seeing if it should use SGLs to map it. Fixes: 955b1b5a00ba ("nvme-pci: move use_sgl initialization to nvme_init_iod()") Reported-by: Paul Menzel Reviewed-by: Paul Menzel Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d53550e612bc..a7e94cc3c70e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -451,10 +451,13 @@ static void **nvme_pci_iod_list(struct request *req) static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + int nseg = blk_rq_nr_phys_segments(req); unsigned int avg_seg_size; - avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), - blk_rq_nr_phys_segments(req)); + if (nseg == 0) + return false; + + avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) return false; -- cgit v1.2.3 From b0f2853b56a2acaff19cca2c6a608f8ec268d21a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jan 2018 22:04:38 +0100 Subject: nvme-pci: take sglist coalescing in dma_map_sg into account Some iommu implementations can merge physically and/or virtually contiguous segments inside sg_map_dma. The NVMe SGL support does not take this into account and will warn because of falling off a loop. Pass the number of mapped segments to nvme_pci_setup_sgls so that the SGL setup can take the number of mapped segments into account. Reported-by: Fangjian (Turing) Fixes: a7a7cbe3 ("nvme-pci: add SGL support") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a7e94cc3c70e..4276ebfff22b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -725,20 +725,19 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, } static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, - struct request *req, struct nvme_rw_command *cmd) + struct request *req, struct nvme_rw_command *cmd, int entries) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - int length = blk_rq_payload_bytes(req); struct dma_pool *pool; struct nvme_sgl_desc *sg_list; struct scatterlist *sg = iod->sg; - int entries = iod->nents, i = 0; dma_addr_t sgl_dma; + int i = 0; /* setting the transfer type as SGL */ cmd->flags = NVME_CMD_SGL_METABUF; - if (length == sg_dma_len(sg)) { + if (entries == 1) { nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); return BLK_STS_OK; } @@ -778,13 +777,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, } nvme_pci_sgl_set_data(&sg_list[i++], sg); - - length -= sg_dma_len(sg); sg = sg_next(sg); - entries--; - } while (length > 0); + } while (--entries > 0); - WARN_ON(entries > 0); return BLK_STS_OK; } @@ -796,6 +791,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, enum dma_data_direction dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; blk_status_t ret = BLK_STS_IOERR; + int nr_mapped; sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); @@ -803,12 +799,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; ret = BLK_STS_RESOURCE; - if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, - DMA_ATTR_NO_WARN)) + nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, + DMA_ATTR_NO_WARN); + if (!nr_mapped) goto out; if (iod->use_sgl) - ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw); + ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); -- cgit v1.2.3 From cf6d43ef66f416282121f436ce1bee9a25199d52 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 16 Jan 2018 16:04:26 +0100 Subject: tls: fix sw_ctx leak During setsockopt(SOL_TCP, TLS_TX), if initialization of the software context fails in tls_set_sw_offload(), we leak sw_ctx. We also don't reassign ctx->priv_ctx to NULL, so we can't even do another attempt to set it up on the same socket, as it will fail with -EEXIST. Fixes: 3c4d7559159b ('tls: kernel TLS support') Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9773571b6a34..61f394d369bf 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -681,18 +681,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } default: rc = -EINVAL; - goto out; + goto free_priv; } ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; ctx->tag_size = tag_size; ctx->overhead_size = ctx->prepend_size + ctx->tag_size; ctx->iv_size = iv_size; - ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - GFP_KERNEL); + ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); if (!ctx->iv) { rc = -ENOMEM; - goto out; + goto free_priv; } memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); @@ -740,7 +739,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); if (!rc) - goto out; + return 0; free_aead: crypto_free_aead(sw_ctx->aead_send); @@ -751,6 +750,9 @@ free_rec_seq: free_iv: kfree(ctx->iv); ctx->iv = NULL; +free_priv: + kfree(ctx->priv_ctx); + ctx->priv_ctx = NULL; out: return rc; } -- cgit v1.2.3 From 877d17c79b66466942a836403773276e34fe3614 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 16 Jan 2018 16:04:27 +0100 Subject: tls: return -EBUSY if crypto_info is already set do_tls_setsockopt_tx returns 0 without doing anything when crypto_info is already set. Silent failure is confusing for users. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7b7a70e22d90..8e9cbfd21423 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, crypto_info = &ctx->crypto_send; /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; goto out; + } rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { -- cgit v1.2.3 From 6db959c82eb039a151d95a0f8b7dea643657327a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 16 Jan 2018 16:04:28 +0100 Subject: tls: reset crypto_info when do_tls_setsockopt_tx fails The current code copies directly from userspace to ctx->crypto_send, but doesn't always reinitialize it to 0 on failure. This causes any subsequent attempt to use this setsockopt to fail because of the TLS_CRYPTO_INFO_READY check, eventhough crypto_info is not actually ready. This should result in a correctly set up socket after the 3rd call, but currently it does not: size_t s = sizeof(struct tls12_crypto_info_aes_gcm_128); struct tls12_crypto_info_aes_gcm_128 crypto_good = { .info.version = TLS_1_2_VERSION, .info.cipher_type = TLS_CIPHER_AES_GCM_128, }; struct tls12_crypto_info_aes_gcm_128 crypto_bad_type = crypto_good; crypto_bad_type.info.cipher_type = 42; setsockopt(sock, SOL_TLS, TLS_TX, &crypto_bad_type, s); setsockopt(sock, SOL_TLS, TLS_TX, &crypto_good, s - 1); setsockopt(sock, SOL_TLS, TLS_TX, &crypto_good, s); Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 8e9cbfd21423..736719c8314e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -388,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; - goto out; + goto err_crypto_info; } rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), optlen - sizeof(*crypto_info)); @@ -400,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, } default: rc = -EINVAL; - goto out; + goto err_crypto_info; } /* currently SW is default, we will have ethtool in future */ -- cgit v1.2.3 From 5a717843177c96ca3fe4565187de395afdb28092 Mon Sep 17 00:00:00 2001 From: Rex Chang Date: Tue, 16 Jan 2018 15:16:01 -0500 Subject: Net: ethernet: ti: netcp: Fix inbound ping crash if MTU size is greater than 1500 In the receive queue for 4096 bytes fragments, the page address set in the SW data0 field of the descriptor is not the one we got when doing the reassembly in receive. The page structure was retrieved from the wrong descriptor into SW data0 which is then causing a page fault when UDP checksum is accessing data above 1500. Signed-off-by: Rex Chang Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index ed58c746e4af..f5a7eb22d0f5 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -715,7 +715,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp) /* warning!!!! We are retrieving the virtual ptr in the sw_data * field as a 32bit value. Will not work on 64bit machines */ - page = (struct page *)GET_SW_DATA0(desc); + page = (struct page *)GET_SW_DATA0(ndesc); if (likely(dma_buff && buf_len && page)) { dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE, -- cgit v1.2.3 From ad9294dbc227cbc8e173b3b963e7dd9af5314f77 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 17 Jan 2018 22:36:49 +0100 Subject: bpf: fix cls_bpf on filter replace Running the following sequence is currently broken: # tc qdisc add dev foo clsact # tc filter replace dev foo ingress prio 1 handle 1 bpf da obj bar.o # tc filter replace dev foo ingress prio 1 handle 1 bpf da obj bar.o RTNETLINK answers: Invalid argument The normal expectation on kernel side is that the second command succeeds replacing the existing program. However, what happens is in cls_bpf_change(), we bail out with err in the second run in cls_bpf_offload(). The EINVAL comes directly in cls_bpf_offload() when comparing prog vs oldprog's gen_flags. In case of above replace the new prog's gen_flags are 0, but the old ones are 8, which means TCA_CLS_FLAGS_NOT_IN_HW is set (e.g. drivers not having cls_bpf offload). Fix 102740bd9436 ("cls_bpf: fix offload assumptions after callback conversion") in the following way: gen_flags from user space passed down via netlink cannot include status flags like TCA_CLS_FLAGS_IN_HW or TCA_CLS_FLAGS_NOT_IN_HW as opposed to oldprog that we previously loaded. Therefore, it doesn't make any sense to include them in the gen_flags comparison with the new prog before we even attempt to offload. Thus, lets fix this before 4.15 goes out. Fixes: 102740bd9436 ("cls_bpf: fix offload assumptions after callback conversion") Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 8d78e7f4ecc3..a62586e2dbdb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -183,10 +183,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, return 0; } +static u32 cls_bpf_flags(u32 flags) +{ + return flags & CLS_BPF_SUPPORTED_GEN_FLAGS; +} + static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - if (prog && oldprog && prog->gen_flags != oldprog->gen_flags) + if (prog && oldprog && + cls_bpf_flags(prog->gen_flags) != + cls_bpf_flags(oldprog->gen_flags)) return -EINVAL; if (prog && tc_skip_hw(prog->gen_flags)) -- cgit v1.2.3 From ccffe776700343fbd127c8624d9962ebea1bfbb7 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 17 Jan 2018 18:24:48 +0000 Subject: hwmon: (lm75) Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Signed-off-by: Guenter Roeck --- drivers/hwmon/lm75.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 005ffb5ffa92..49f4b33a5685 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -100,7 +100,7 @@ static int lm75_read(struct device *dev, enum hwmon_sensor_types type, switch (attr) { case hwmon_chip_update_interval: *val = data->sample_time; - break;; + break; default: return -EINVAL; } -- cgit v1.2.3 From b3e73839379327ee21240ac48ea9a2eaf7613d79 Mon Sep 17 00:00:00 2001 From: Rock Lee Date: Wed, 10 Jan 2018 21:08:24 -0500 Subject: ubifs: remove error message in ubifs_xattr_get There is a situation that other modules, like overlayfs, try to get xattr value with a small buffer, if they get -ERANGE, they will try again with the proper buffer size. No need to report an error. Signed-off-by: Rock Lee Signed-off-by: Richard Weinberger --- fs/ubifs/xattr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 5ddc89d564fd..759f1a209dbb 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -381,8 +381,6 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { - ubifs_err(c, "buffer size %zd, xattr len %d", - size, ui->data_len); err = -ERANGE; goto out_iput; } -- cgit v1.2.3 From 889027bca233b422aedc2881d330e1a7c97e2315 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Nov 2017 12:14:06 +0100 Subject: ubi: Fastmap: Fix typo Fix misspelling of 'available' in function name. Signed-off-by: Sascha Hauer Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap-wl.c | 2 +- drivers/mtd/ubi/wl.c | 2 +- drivers/mtd/ubi/wl.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 4f0bd6b4422a..590d967011bb 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -66,7 +66,7 @@ static void return_unused_pool_pebs(struct ubi_device *ubi, } } -static int anchor_pebs_avalible(struct rb_root *root) +static int anchor_pebs_available(struct rb_root *root) { struct rb_node *p; struct ubi_wl_entry *e; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 668b46202507..77ab49f2743b 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -692,7 +692,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, #ifdef CONFIG_MTD_UBI_FASTMAP /* Check whether we need to produce an anchor PEB */ if (!anchor) - anchor = !anchor_pebs_avalible(&ubi->free); + anchor = !anchor_pebs_available(&ubi->free); if (anchor) { e1 = find_anchor_wl_entry(&ubi->used); diff --git a/drivers/mtd/ubi/wl.h b/drivers/mtd/ubi/wl.h index 2aaa3f7f2ba9..a9e2d669acd8 100644 --- a/drivers/mtd/ubi/wl.h +++ b/drivers/mtd/ubi/wl.h @@ -2,7 +2,7 @@ #ifndef UBI_WL_H #define UBI_WL_H #ifdef CONFIG_MTD_UBI_FASTMAP -static int anchor_pebs_avalible(struct rb_root *root); +static int anchor_pebs_available(struct rb_root *root); static void update_fastmap_work_fn(struct work_struct *wrk); static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root); static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi); -- cgit v1.2.3 From 01f196945a21b3eec37317e3bc5cf35f95f95063 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Nov 2017 12:17:14 +0100 Subject: ubi: Fix copy/paste error in function documentation The function documentation of leb_write_trylock is copied from leb_write_lock. Replace the function name with the correct one. Signed-off-by: Sascha Hauer Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/eba.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 388e46be6ad9..250e30fac61b 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -384,7 +384,7 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) } /** - * leb_write_lock - lock logical eraseblock for writing. + * leb_write_trylock - try to lock logical eraseblock for writing. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number -- cgit v1.2.3 From 6f16101e6a8b4324c36e58a29d9e0dbb287cdedb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 18 Jan 2018 01:15:21 +0100 Subject: bpf: mark dst unknown on inconsistent {s, u}bounds adjustments syzkaller generated a BPF proglet and triggered a warning with the following: 0: (b7) r0 = 0 1: (d5) if r0 s<= 0x0 goto pc+0 R0=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0 2: (1f) r0 -= r1 R0=inv0 R1=ctx(id=0,off=0,imm=0) R10=fp0 verifier internal error: known but bad sbounds What happens is that in the first insn, r0's min/max value are both 0 due to the immediate assignment, later in the jsle test the bounds are updated for the min value in the false path, meaning, they yield smin_val = 1, smax_val = 0, and when ctx pointer is subtracted from r0, verifier bails out with the internal error and throwing a WARN since smin_val != smax_val for the known constant. For min_val > max_val scenario it means that reg_set_min_max() and reg_set_min_max_inv() (which both refine existing bounds) demonstrated that such branch cannot be taken at runtime. In above scenario for the case where it will be taken, the existing [0, 0] bounds are kept intact. Meaning, the rejection is not due to a verifier internal error, and therefore the WARN() is not necessary either. We could just reject such cases in adjust_{ptr,scalar}_min_max_vals() when either known scalars have smin_val != smax_val or umin_val != umax_val or any scalar reg with bounds smin_val > smax_val or umin_val > umax_val. However, there may be a small risk of breakage of buggy programs, so handle this more gracefully and in adjust_{ptr,scalar}_min_max_vals() just taint the dst reg as unknown scalar when we see ops with such kind of src reg. Reported-by: syzbot+6d362cadd45dc0a12ba4@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 27 +++--- tools/testing/selftests/bpf/test_verifier.c | 123 +++++++++++++++++++++++++++- 2 files changed, 138 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb062b0fbf27..13551e623501 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1895,17 +1895,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst_reg = ®s[dst]; - if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(env, env->cur_state); - verbose(env, - "verifier internal error: known but bad sbounds\n"); - return -EINVAL; - } - if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(env, env->cur_state); - verbose(env, - "verifier internal error: known but bad ubounds\n"); - return -EINVAL; + if ((known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; } if (BPF_CLASS(insn->code) != BPF_ALU64) { @@ -2097,6 +2093,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, src_known = tnum_is_const(src_reg.var_off); dst_known = tnum_is_const(dst_reg->var_off); + if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || + smin_val > smax_val || umin_val > umax_val) { + /* Taint dst register if offset had invalid bounds derived from + * e.g. dead branches. + */ + __mark_reg_unknown(dst_reg); + return 0; + } + if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { __mark_reg_unknown(dst_reg); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 67e7c41674d2..5ed4175c4ff8 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6732,7 +6732,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr = "unbounded min value", + .errstr = "R0 invalid mem access 'inv'", .result = REJECT, }, { @@ -8633,6 +8633,127 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, + { + "check deducing bounds from const, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "check deducing bounds from const, 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "check deducing bounds from const, 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", + }, + { + "check deducing bounds from const, 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + /* Marks reg as unknown. */ + BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + }, { "bpf_exit with invalid return code. test1", .insns = { -- cgit v1.2.3 From bee344cb70e9bf5ad929e0a493c0f7aa3a587bfb Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Wed, 17 Jan 2018 10:33:21 +0000 Subject: PCI / PM: Remove spurious semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 14fd865a5120..765890e77cd5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -953,7 +953,7 @@ static int pci_pm_freeze_late(struct device *dev) if (dev_pm_smart_suspend_and_suspended(dev)) return 0; - return pm_generic_freeze_late(dev);; + return pm_generic_freeze_late(dev); } static int pci_pm_freeze_noirq(struct device *dev) -- cgit v1.2.3 From 23d4ee19e789ae3dce3e04bd24e3d1537965475f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 18 Jan 2018 12:06:59 +0800 Subject: blk-mq: don't dispatch request in blk_mq_request_direct_issue if queue is busy If we run into blk_mq_request_direct_issue(), when queue is busy, we don't want to dispatch this request into hctx->dispatch_list, and what we need to do is to return the queue busy info to caller, so that caller can deal with it well. Fixes: 396eaf21ee ("blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback") Reported-by: Laurence Oberman Reviewed-by: Mike Snitzer Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index c418858a60ef..74a4f237ba91 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1773,15 +1773,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, return ret; } -static void __blk_mq_fallback_to_insert(struct request *rq, - bool run_queue, bool bypass_insert) -{ - if (!bypass_insert) - blk_mq_sched_insert_request(rq, false, run_queue, false); - else - blk_mq_request_bypass_insert(rq, run_queue); -} - static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie, @@ -1790,9 +1781,16 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, struct request_queue *q = rq->q; bool run_queue = true; - /* RCU or SRCU read lock is needed before checking quiesced flag */ + /* + * RCU or SRCU read lock is needed before checking quiesced flag. + * + * When queue is stopped or quiesced, ignore 'bypass_insert' from + * blk_mq_request_direct_issue(), and return BLK_STS_OK to caller, + * and avoid driver to try to dispatch again. + */ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { run_queue = false; + bypass_insert = false; goto insert; } @@ -1809,10 +1807,10 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, return __blk_mq_issue_directly(hctx, rq, cookie); insert: - __blk_mq_fallback_to_insert(rq, run_queue, bypass_insert); if (bypass_insert) return BLK_STS_RESOURCE; + blk_mq_sched_insert_request(rq, false, run_queue, false); return BLK_STS_OK; } @@ -1828,7 +1826,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); if (ret == BLK_STS_RESOURCE) - __blk_mq_fallback_to_insert(rq, true, false); + blk_mq_sched_insert_request(rq, false, true, false); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); -- cgit v1.2.3 From 659032dcb9f11c3bd2a3a23db76e6a70b3ddec79 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Jan 2018 13:41:57 +0000 Subject: mmc: sh_mmcif: remove redundant initialization of 'opc' Variable opc is initialized with a value that is never read, opc is later re-assigned a newer value, hence the initialization can be removed. Cleans up clang warning: drivers/mmc/host/sh_mmcif.c:919:6: warning: Value stored to 'opc' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mmcif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 53fb18bb7bee..7bb00c68a756 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -916,7 +916,7 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host, struct mmc_request *mrq) { struct mmc_command *cmd = mrq->cmd; - u32 opc = cmd->opcode; + u32 opc; u32 mask = 0; unsigned long flags; -- cgit v1.2.3 From 8d09a13386ccdee8fb6d66aa2cfedbbc9255f892 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:01 +0900 Subject: mmc: tmio: ioremap memory resource in tmio_mmc_host_alloc() The register region is ioremap'ed in the tmio_mmc_host_probe(), i.e. drivers cannot get access to the hardware before mmc_add_host(). Actually, renesas_sdhi_core.c reads out the CTL_VERSION register to complete the platform-specific settings. However, at this point, the MMC host is already running. Move the register ioremap to tmio_mmc_host_alloc() so that drivers can perform platform-specific settings between tmio_mmc_host_alloc() and tmio_mmc_host_probe(). I changed tmio_mmc_host_alloc() to return an error pointer to propagate the return code from devm_ioremap_resource(). Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 4 ++-- drivers/mmc/host/tmio_mmc.c | 4 +++- drivers/mmc/host/tmio_mmc_core.c | 16 +++++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 6a2988bd51a2..ccdde2735f68 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -512,8 +512,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, } host = tmio_mmc_host_alloc(pdev); - if (!host) - return -ENOMEM; + if (IS_ERR(host)) + return PTR_ERR(host); if (of_data) { mmc_data->flags |= of_data->tmio_flags; diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index ccfbc154ee5b..d660816bdf89 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -93,8 +93,10 @@ static int tmio_mmc_probe(struct platform_device *pdev) pdata->flags |= TMIO_MMC_HAVE_HIGH_REG; host = tmio_mmc_host_alloc(pdev); - if (!host) + if (IS_ERR(host)) { + ret = PTR_ERR(host); goto cell_disable; + } /* SD control register space size is 0x200, 0x400 for bus_shift=1 */ host->bus_shift = resource_size(res) >> 10; diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 0929b987fb29..4f62ce6664e0 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1150,12 +1150,20 @@ tmio_mmc_host_alloc(struct platform_device *pdev) { struct tmio_mmc_host *host; struct mmc_host *mmc; + struct resource *res; + void __iomem *ctl; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ctl = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ctl)) + return ERR_CAST(ctl); mmc = mmc_alloc_host(sizeof(struct tmio_mmc_host), &pdev->dev); if (!mmc) - return NULL; + return ERR_PTR(-ENOMEM); host = mmc_priv(mmc); + host->ctl = ctl; host->mmc = mmc; host->pdev = pdev; host->ops = tmio_mmc_ops; @@ -1177,7 +1185,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, { struct platform_device *pdev = _host->pdev; struct mmc_host *mmc = _host->mmc; - struct resource *res_ctl; int ret; u32 irq_mask = TMIO_MASK_CMD; @@ -1186,11 +1193,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (!(pdata->flags & TMIO_MMC_HAS_IDLE_WAIT)) _host->write16_hook = NULL; - res_ctl = platform_get_resource(pdev, IORESOURCE_MEM, 0); - _host->ctl = devm_ioremap_resource(&pdev->dev, res_ctl); - if (IS_ERR(_host->ctl)) - return PTR_ERR(_host->ctl); - ret = mmc_of_parse(mmc); if (ret < 0) return ret; -- cgit v1.2.3 From b21fc294387e4cf7916c132f7d6aaeebd4483a16 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:02 +0900 Subject: mmc: tmio: move clk_enable/disable out of tmio_mmc_host_probe() The clock is enabled in the tmio_mmc_host_probe(). It also prevents drivers from performing platform-specific settings before mmc_add_host() because the register access generally requires a clock. Enable/disable the clock in drivers' probe/remove. Also, I passed tmio_mmc_data to tmio_mmc_host_alloc() because renesas_sdhi_clk_enable() needs it to get the private data from tmio_mmc_host. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 13 ++++++++++--- drivers/mmc/host/tmio_mmc.c | 7 +++++-- drivers/mmc/host/tmio_mmc.h | 4 ++-- drivers/mmc/host/tmio_mmc_core.c | 33 +++++++++++++-------------------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index ccdde2735f68..e18a1c553df6 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -511,7 +511,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, "state_uhs"); } - host = tmio_mmc_host_alloc(pdev); + host = tmio_mmc_host_alloc(pdev, mmc_data); if (IS_ERR(host)) return PTR_ERR(host); @@ -571,10 +571,14 @@ int renesas_sdhi_probe(struct platform_device *pdev, /* All SDHI have SDIO status bits which must be 1 */ mmc_data->flags |= TMIO_MMC_SDIO_STATUS_SETBITS; - ret = tmio_mmc_host_probe(host, mmc_data, dma_ops); - if (ret < 0) + ret = renesas_sdhi_clk_enable(host); + if (ret) goto efree; + ret = tmio_mmc_host_probe(host, dma_ops); + if (ret < 0) + goto edisclk; + /* One Gen2 SDHI incarnation does NOT have a CBSY bit */ if (sd_ctrl_read16(host, CTL_VERSION) == SDHI_VER_GEN2_SDR50) mmc_data->flags &= ~TMIO_MMC_HAVE_CBSY; @@ -635,6 +639,8 @@ int renesas_sdhi_probe(struct platform_device *pdev, eirq: tmio_mmc_host_remove(host); +edisclk: + renesas_sdhi_clk_disable(host); efree: tmio_mmc_host_free(host); @@ -647,6 +653,7 @@ int renesas_sdhi_remove(struct platform_device *pdev) struct tmio_mmc_host *host = platform_get_drvdata(pdev); tmio_mmc_host_remove(host); + renesas_sdhi_clk_disable(host); return 0; } diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index d660816bdf89..11b87ce54764 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -92,7 +92,7 @@ static int tmio_mmc_probe(struct platform_device *pdev) pdata->flags |= TMIO_MMC_HAVE_HIGH_REG; - host = tmio_mmc_host_alloc(pdev); + host = tmio_mmc_host_alloc(pdev, pdata); if (IS_ERR(host)) { ret = PTR_ERR(host); goto cell_disable; @@ -101,7 +101,10 @@ static int tmio_mmc_probe(struct platform_device *pdev) /* SD control register space size is 0x200, 0x400 for bus_shift=1 */ host->bus_shift = resource_size(res) >> 10; - ret = tmio_mmc_host_probe(host, pdata, NULL); + host->mmc->f_max = pdata->hclk; + host->mmc->f_min = pdata->hclk / 512; + + ret = tmio_mmc_host_probe(host, NULL); if (ret) goto host_free; diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 52198f2929a5..b52d7368818d 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -195,10 +195,10 @@ struct tmio_mmc_host { const struct tmio_mmc_dma_ops *dma_ops; }; -struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev); +struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev, + struct tmio_mmc_data *pdata); void tmio_mmc_host_free(struct tmio_mmc_host *host); int tmio_mmc_host_probe(struct tmio_mmc_host *host, - struct tmio_mmc_data *pdata, const struct tmio_mmc_dma_ops *dma_ops); void tmio_mmc_host_remove(struct tmio_mmc_host *host); void tmio_mmc_do_data_irq(struct tmio_mmc_host *host); diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 4f62ce6664e0..d2790ff18294 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1145,8 +1145,8 @@ static void tmio_mmc_of_parse(struct platform_device *pdev, pdata->flags |= TMIO_MMC_WRPROTECT_DISABLE; } -struct tmio_mmc_host* -tmio_mmc_host_alloc(struct platform_device *pdev) +struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev, + struct tmio_mmc_data *pdata) { struct tmio_mmc_host *host; struct mmc_host *mmc; @@ -1166,9 +1166,12 @@ tmio_mmc_host_alloc(struct platform_device *pdev) host->ctl = ctl; host->mmc = mmc; host->pdev = pdev; + host->pdata = pdata; host->ops = tmio_mmc_ops; mmc->ops = &host->ops; + platform_set_drvdata(pdev, host); + return host; } EXPORT_SYMBOL_GPL(tmio_mmc_host_alloc); @@ -1180,14 +1183,21 @@ void tmio_mmc_host_free(struct tmio_mmc_host *host) EXPORT_SYMBOL_GPL(tmio_mmc_host_free); int tmio_mmc_host_probe(struct tmio_mmc_host *_host, - struct tmio_mmc_data *pdata, const struct tmio_mmc_dma_ops *dma_ops) { struct platform_device *pdev = _host->pdev; + struct tmio_mmc_data *pdata = _host->pdata; struct mmc_host *mmc = _host->mmc; int ret; u32 irq_mask = TMIO_MASK_CMD; + /* + * Check the sanity of mmc->f_min to prevent tmio_mmc_set_clock() from + * looping forever... + */ + if (mmc->f_min == 0) + return -EINVAL; + tmio_mmc_of_parse(pdev, pdata); if (!(pdata->flags & TMIO_MMC_HAS_IDLE_WAIT)) @@ -1197,9 +1207,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (ret < 0) return ret; - _host->pdata = pdata; - platform_set_drvdata(pdev, _host); - _host->set_pwr = pdata->set_pwr; _host->set_clk_div = pdata->set_clk_div; @@ -1247,18 +1254,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (pdata->flags & TMIO_MMC_MIN_RCAR2) _host->native_hotplug = true; - if (tmio_mmc_clk_enable(_host) < 0) { - mmc->f_max = pdata->hclk; - mmc->f_min = mmc->f_max / 512; - } - - /* - * Check the sanity of mmc->f_min to prevent tmio_mmc_set_clock() from - * looping forever... - */ - if (mmc->f_min == 0) - return -EINVAL; - /* * While using internal tmio hardware logic for card detection, we need * to ensure it stays powered for it to work. @@ -1336,8 +1331,6 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - - tmio_mmc_clk_disable(host); } EXPORT_SYMBOL_GPL(tmio_mmc_host_remove); -- cgit v1.2.3 From 6fb294f791af8f491812d4eef6b13a57c9c1de34 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:03 +0900 Subject: mmc: tmio: move {tmio_}mmc_of_parse() to tmio_mmc_host_alloc() mmc_of_parse() parses various DT properties and sets capability flags accordingly. However, drivers have no chance to run platform init code depending on such flags because mmc_of_parse() is called from tmio_mmc_host_probe(). Move mmc_of_parse() to tmio_mmc_host_alloc() so that drivers can handle capabilities before mmc_add_host(). Move tmio_mmc_of_parse() likewise. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index d2790ff18294..b096b990ab10 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1152,6 +1152,7 @@ struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev, struct mmc_host *mmc; struct resource *res; void __iomem *ctl; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ctl = devm_ioremap_resource(&pdev->dev, res); @@ -1170,8 +1171,20 @@ struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev, host->ops = tmio_mmc_ops; mmc->ops = &host->ops; + ret = mmc_of_parse(host->mmc); + if (ret) { + host = ERR_PTR(ret); + goto free; + } + + tmio_mmc_of_parse(pdev, pdata); + platform_set_drvdata(pdev, host); + return host; +free: + mmc_free_host(mmc); + return host; } EXPORT_SYMBOL_GPL(tmio_mmc_host_alloc); @@ -1198,15 +1211,9 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, if (mmc->f_min == 0) return -EINVAL; - tmio_mmc_of_parse(pdev, pdata); - if (!(pdata->flags & TMIO_MMC_HAS_IDLE_WAIT)) _host->write16_hook = NULL; - ret = mmc_of_parse(mmc); - if (ret < 0) - return ret; - _host->set_pwr = pdata->set_pwr; _host->set_clk_div = pdata->set_clk_div; -- cgit v1.2.3 From bc45719c1b1a56047246d44c7e4ed88a8ae702c1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:04 +0900 Subject: mmc: tmio: remove dma_ops from tmio_mmc_host_probe() argument Drivers need to set up various struct members for tmio_mmc_host before calling tmio_mmc_host_probe(). Do likewise for host->dma_ops instead of passing it as a function argument. Signed-off-by: Masahiro Yamada Reviewed-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 3 ++- drivers/mmc/host/tmio_mmc.c | 2 +- drivers/mmc/host/tmio_mmc.h | 3 +-- drivers/mmc/host/tmio_mmc_core.c | 4 +--- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index e18a1c553df6..80943fa07db6 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -532,6 +532,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, host->clk_update = renesas_sdhi_clk_update; host->clk_disable = renesas_sdhi_clk_disable; host->multi_io_quirk = renesas_sdhi_multi_io_quirk; + host->dma_ops = dma_ops; /* SDR speeds are only available on Gen2+ */ if (mmc_data->flags & TMIO_MMC_MIN_RCAR2) { @@ -575,7 +576,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (ret) goto efree; - ret = tmio_mmc_host_probe(host, dma_ops); + ret = tmio_mmc_host_probe(host); if (ret < 0) goto edisclk; diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 11b87ce54764..43a2ea5cff24 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -104,7 +104,7 @@ static int tmio_mmc_probe(struct platform_device *pdev) host->mmc->f_max = pdata->hclk; host->mmc->f_min = pdata->hclk / 512; - ret = tmio_mmc_host_probe(host, NULL); + ret = tmio_mmc_host_probe(host); if (ret) goto host_free; diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index b52d7368818d..e7d651352dc9 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -198,8 +198,7 @@ struct tmio_mmc_host { struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev, struct tmio_mmc_data *pdata); void tmio_mmc_host_free(struct tmio_mmc_host *host); -int tmio_mmc_host_probe(struct tmio_mmc_host *host, - const struct tmio_mmc_dma_ops *dma_ops); +int tmio_mmc_host_probe(struct tmio_mmc_host *host); void tmio_mmc_host_remove(struct tmio_mmc_host *host); void tmio_mmc_do_data_irq(struct tmio_mmc_host *host); diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index b096b990ab10..7d8eec24f0ed 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1195,8 +1195,7 @@ void tmio_mmc_host_free(struct tmio_mmc_host *host) } EXPORT_SYMBOL_GPL(tmio_mmc_host_free); -int tmio_mmc_host_probe(struct tmio_mmc_host *_host, - const struct tmio_mmc_dma_ops *dma_ops) +int tmio_mmc_host_probe(struct tmio_mmc_host *_host) { struct platform_device *pdev = _host->pdev; struct tmio_mmc_data *pdata = _host->pdata; @@ -1296,7 +1295,6 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, INIT_WORK(&_host->done, tmio_mmc_done_work); /* See if we also get DMA */ - _host->dma_ops = dma_ops; tmio_mmc_request_dma(_host, pdata); pm_runtime_set_active(&pdev->dev); -- cgit v1.2.3 From 85f9ef8cdfb463e6e8ff9fe8cdcc0aed438b526e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:05 +0900 Subject: mmc: slot-gpio: add a helper to check capability of GPIO WP detection Like mmc_can_gpio_cd(), mmc_can_gpio_ro() will also be useful for host drivers to know whether GPIO write-protect detection is supported. Signed-off-by: Masahiro Yamada Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 8 ++++++++ include/linux/mmc/slot-gpio.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index f7c6e0542de7..3698b0576009 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -305,3 +305,11 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, return 0; } EXPORT_SYMBOL(mmc_gpiod_request_ro); + +bool mmc_can_gpio_ro(struct mmc_host *host) +{ + struct mmc_gpio *ctx = host->slot.handler_priv; + + return ctx->ro_gpio ? true : false; +} +EXPORT_SYMBOL(mmc_can_gpio_ro); diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 82f0d289f110..91f1ba0663c8 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -33,5 +33,6 @@ void mmc_gpio_set_cd_isr(struct mmc_host *host, irqreturn_t (*isr)(int irq, void *dev_id)); void mmc_gpiod_request_cd_irq(struct mmc_host *host); bool mmc_can_gpio_cd(struct mmc_host *host); +bool mmc_can_gpio_ro(struct mmc_host *host); #endif -- cgit v1.2.3 From 1910b87f7a9e6d9f9085d36e45dce1e5547c692d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 18 Jan 2018 01:28:06 +0900 Subject: mmc: tmio: refactor .get_ro hook This IP provides the write protect signal level in the status register, but it is also possible to use GPIO for WP. They are exclusive, so it is not efficient to call mmc_gpio_get_ro() every time from tmio_mmc_get_ro() if we know gpio_ro is not used. Check the capability of gpio_ro just once in the probe function, then set mmc_gpio_get_ro to .get_ro if it is the case. Signed-off-by: Masahiro Yamada Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 7d8eec24f0ed..6d8719be75a8 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1076,15 +1076,9 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc) { struct tmio_mmc_host *host = mmc_priv(mmc); struct tmio_mmc_data *pdata = host->pdata; - int ret = mmc_gpio_get_ro(mmc); - if (ret >= 0) - return ret; - - ret = !((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) || - (sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT)); - - return ret; + return !((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) || + (sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT)); } static int tmio_multi_io_quirk(struct mmc_card *card, @@ -1247,6 +1241,9 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) } mmc->max_seg_size = mmc->max_req_size; + if (mmc_can_gpio_ro(mmc)) + _host->ops.get_ro = mmc_gpio_get_ro; + _host->native_hotplug = !(mmc_can_gpio_cd(mmc) || mmc->caps & MMC_CAP_NEEDS_POLL || !mmc_card_is_removable(mmc)); -- cgit v1.2.3 From 8cb68751c115d176ec851ca56ecfbb411568c9e8 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once If an invalid CAN frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+4386709c0c1284dca827@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 003b2d6d655f..ae835382e678 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN, - "PF_CAN: dropped non conform CAN skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) - goto drop; + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); + kfree_skb(skb); + return NET_RX_DROP; + } can_receive(skb, dev); return NET_RX_SUCCESS; - -drop: - kfree_skb(skb); - return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From d4689846881d160a4d12a514e991a740bcb5d65a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 16 Jan 2018 19:30:14 +0100 Subject: can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once If an invalid CANFD frame is received, from a driver or from a tun interface, a Kernel warning is generated. This patch replaces the WARN_ONCE by a simple pr_warn_once, so that a kernel, bootet with panic_on_warn, does not panic. A printk seems to be more appropriate here. Reported-by: syzbot+e3b775f40babeff6e68b@syzkaller.appspotmail.com Suggested-by: Dmitry Vyukov Acked-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index ae835382e678..4d7f988a3130 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -738,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (WARN_ONCE(dev->type != ARPHRD_CAN || - skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN, - "PF_CAN: dropped non conform CAN FD skbuf: " - "dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len)) - goto drop; + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + dev->type, skb->len, cfd->len); + kfree_skb(skb); + return NET_RX_DROP; + } can_receive(skb, dev); return NET_RX_SUCCESS; - -drop: - kfree_skb(skb); - return NET_RX_DROP; } /* -- cgit v1.2.3 From 0ff8e080b18d1d2dbe5c866d5f31c27ab806a785 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:19 -0800 Subject: x86/intel_rdt: Update documentation With more flag bits in /proc/cpuinfo for RDT, it's better to classify the bits for readability. Some previously missing bits are added as well. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-2-git-send-email-fenghua.yu@intel.com --- Documentation/x86/intel_rdt_ui.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index 6851854cf69d..1ad77b1e3e79 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -7,7 +7,13 @@ Tony Luck Vikas Shivappa This feature is enabled by the CONFIG_INTEL_RDT Kconfig and the -X86 /proc/cpuinfo flag bits "rdt", "cqm", "cat_l3" and "cdp_l3". +X86 /proc/cpuinfo flag bits: +RDT (Resource Director Technology) Allocation - "rdt_a" +CAT (Cache Allocation Technology) - "cat_l3", "cat_l2" +CDP (Code and Data Prioritization ) - "cdp_l3" +CQM (Cache QoS Monitoring) - "cqm_llc", "cqm_occup_llc" +MBM (Memory Bandwidth Monitoring) - "cqm_mbm_total", "cqm_mbm_local" +MBA (Memory Bandwidth Allocation) - "mba" To use the feature mount the file system: -- cgit v1.2.3 From aa55d5a4bd919f26fce519c470d11a58541c6aa7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:20 -0800 Subject: x86/intel_rdt: Add L2CDP support in documentation L2 and L3 Code and Data Prioritization (CDP) can be enabled separately. The existing mount parameter "cdp" is only for enabling L3 CDP and will be kept for backwards compability. Add a new mount parameter 'cdpl2' for L2 CDP. [ tglx: Made changelog readable ] Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-3-git-send-email-fenghua.yu@intel.com --- Documentation/x86/intel_rdt_ui.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index 1ad77b1e3e79..756fd76b78a6 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -10,18 +10,21 @@ This feature is enabled by the CONFIG_INTEL_RDT Kconfig and the X86 /proc/cpuinfo flag bits: RDT (Resource Director Technology) Allocation - "rdt_a" CAT (Cache Allocation Technology) - "cat_l3", "cat_l2" -CDP (Code and Data Prioritization ) - "cdp_l3" +CDP (Code and Data Prioritization ) - "cdp_l3", "cdp_l2" CQM (Cache QoS Monitoring) - "cqm_llc", "cqm_occup_llc" MBM (Memory Bandwidth Monitoring) - "cqm_mbm_total", "cqm_mbm_local" MBA (Memory Bandwidth Allocation) - "mba" To use the feature mount the file system: - # mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl + # mount -t resctrl resctrl [-o cdp[,cdpl2]] /sys/fs/resctrl mount options are: "cdp": Enable code/data prioritization in L3 cache allocations. +"cdpl2": Enable code/data prioritization in L2 cache allocations. + +L2 and L3 CDP are controlled seperately. RDT features are orthogonal. A particular system may support only monitoring, only control, or both monitoring and control. -- cgit v1.2.3 From a511e7935378ef1f321456a90beae2a2632d3d83 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:21 -0800 Subject: x86/intel_rdt: Enumerate L2 Code and Data Prioritization (CDP) feature L2 Code and Data Prioritization (CDP) is enumerated in CPUID(EAX=0x10, ECX=0x2):ECX.bit2 Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-4-git-send-email-fenghua.yu@intel.com --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 25b9375c1484..67bbfaa1448b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,6 +206,7 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d0e69769abfd..df4d8f7595a5 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, + { X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 }, { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 }, { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, -- cgit v1.2.3 From def10853930a82456ab862a3a8292a3a16c386e7 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:22 -0800 Subject: x86/intel_rdt: Add two new resources for L2 Code and Data Prioritization (CDP) L2 data and L2 code are added as new resources in rdt_resources_all[] and data in the resources are configured. When L2 CDP is enabled, the schemata will have the two resources in this format: L2DATA:l2id0=xxxx;l2id1=xxxx;.... L2CODE:l2id0=xxxx;l2id1=xxxx;.... xxxx represent CBM (Cache Bit Mask) values in the schemata, similar to all others (L2 CAT/L3 CAT/L3 CDP). Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-5-git-send-email-fenghua.yu@intel.com --- arch/x86/kernel/cpu/intel_rdt.c | 66 ++++++++++++++++++++++++++++++++++------- arch/x86/kernel/cpu/intel_rdt.h | 2 ++ 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 99442370de40..5202da08fd6f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -135,6 +135,40 @@ struct rdt_resource rdt_resources_all[] = { .format_str = "%d=%0*x", .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L2DATA] = + { + .rid = RDT_RESOURCE_L2DATA, + .name = "L2DATA", + .domains = domain_init(RDT_RESOURCE_L2DATA), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 0, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, + [RDT_RESOURCE_L2CODE] = + { + .rid = RDT_RESOURCE_L2CODE, + .name = "L2CODE", + .domains = domain_init(RDT_RESOURCE_L2CODE), + .msr_base = IA32_L2_CBM_BASE, + .msr_update = cat_wrmsr, + .cache_level = 2, + .cache = { + .min_cbm_bits = 1, + .cbm_idx_mult = 2, + .cbm_idx_offset = 1, + }, + .parse_ctrlval = parse_cbm, + .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, + }, [RDT_RESOURCE_MBA] = { .rid = RDT_RESOURCE_MBA, @@ -259,15 +293,15 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) r->alloc_enabled = true; } -static void rdt_get_cdp_l3_config(int type) +static void rdt_get_cdp_config(int level, int type) { - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_l = &rdt_resources_all[level]; struct rdt_resource *r = &rdt_resources_all[type]; - r->num_closid = r_l3->num_closid / 2; - r->cache.cbm_len = r_l3->cache.cbm_len; - r->default_ctrl = r_l3->default_ctrl; - r->cache.shareable_bits = r_l3->cache.shareable_bits; + r->num_closid = r_l->num_closid / 2; + r->cache.cbm_len = r_l->cache.cbm_len; + r->default_ctrl = r_l->default_ctrl; + r->cache.shareable_bits = r_l->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* @@ -277,6 +311,18 @@ static void rdt_get_cdp_l3_config(int type) r->alloc_enabled = false; } +static void rdt_get_cdp_l3_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA); + rdt_get_cdp_config(RDT_RESOURCE_L3, RDT_RESOURCE_L3CODE); +} + +static void rdt_get_cdp_l2_config(void) +{ + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA); + rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE); +} + static int get_cache_id(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); @@ -729,15 +775,15 @@ static __init bool get_rdt_alloc_resources(void) if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); - if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { - rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); - rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); - } + if (rdt_cpu_has(X86_FEATURE_CDP_L3)) + rdt_get_cdp_l3_config(); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); + if (rdt_cpu_has(X86_FEATURE_CDP_L2)) + rdt_get_cdp_l2_config(); ret = true; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3397244984f5..19ffc5a7c116 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -357,6 +357,8 @@ enum { RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE, RDT_RESOURCE_L2, + RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE, RDT_RESOURCE_MBA, /* Must be the last */ -- cgit v1.2.3 From 99adde9b370de8e07ef76630c6f60dbf586cdf0e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:23 -0800 Subject: x86/intel_rdt: Enable L2 CDP in MSR IA32_L2_QOS_CFG Bit 0 in MSR IA32_L2_QOS_CFG (0xc82) is L2 CDP enable bit. By default, the bit is zero, i.e. L2 CAT is enabled, and L2 CDP is disabled. When the resctrl mount parameter "cdpl2" is given, the bit is set to 1 and L2 CDP is enabled. In L2 CDP mode, the L2 CAT mask MSRs are re-mapped into interleaved pairs of mask MSRs for code (referenced by an odd CLOSID) and data (referenced by an even CLOSID). Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-6-git-send-email-fenghua.yu@intel.com --- arch/x86/kernel/cpu/intel_rdt.h | 3 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 117 ++++++++++++++++++++++++------- 2 files changed, 94 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 19ffc5a7c116..3fd7a70ee04a 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -7,12 +7,15 @@ #include #define IA32_L3_QOS_CFG 0xc81 +#define IA32_L2_QOS_CFG 0xc82 #define IA32_L3_CBM_BASE 0xc90 #define IA32_L2_CBM_BASE 0xd10 #define IA32_MBA_THRTL_BASE 0xd50 #define L3_QOS_CDP_ENABLE 0x01ULL +#define L2_QOS_CDP_ENABLE 0x01ULL + /* * Event IDs are used to program IA32_QM_EVTSEL before reading event * counter from IA32_QM_CTR diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 64c5ff97ee0d..bdab7d2f51af 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -990,6 +990,7 @@ out_destroy: kernfs_remove(kn); return ret; } + static void l3_qos_cfg_update(void *arg) { bool *enable = arg; @@ -997,8 +998,17 @@ static void l3_qos_cfg_update(void *arg) wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); } -static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) +static void l2_qos_cfg_update(void *arg) { + bool *enable = arg; + + wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); +} + +static int set_cache_qos_cfg(int level, bool enable) +{ + void (*update)(void *arg); + struct rdt_resource *r_l; cpumask_var_t cpu_mask; struct rdt_domain *d; int cpu; @@ -1006,16 +1016,24 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) return -ENOMEM; - list_for_each_entry(d, &r->domains, list) { + if (level == RDT_RESOURCE_L3) + update = l3_qos_cfg_update; + else if (level == RDT_RESOURCE_L2) + update = l2_qos_cfg_update; + else + return -EINVAL; + + r_l = &rdt_resources_all[level]; + list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ if (cpumask_test_cpu(cpu, cpu_mask)) - l3_qos_cfg_update(&enable); + update(&enable); /* Update QOS_CFG MSR on all other cpus in cpu_mask. */ - smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1); + smp_call_function_many(cpu_mask, update, &enable, 1); put_cpu(); free_cpumask_var(cpu_mask); @@ -1023,52 +1041,99 @@ static int set_l3_qos_cfg(struct rdt_resource *r, bool enable) return 0; } -static int cdp_enable(void) +static int cdp_enable(int level, int data_type, int code_type) { - struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA]; - struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE]; - struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3]; + struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; + struct rdt_resource *r_lcode = &rdt_resources_all[code_type]; + struct rdt_resource *r_l = &rdt_resources_all[level]; int ret; - if (!r_l3->alloc_capable || !r_l3data->alloc_capable || - !r_l3code->alloc_capable) + if (!r_l->alloc_capable || !r_ldata->alloc_capable || + !r_lcode->alloc_capable) return -EINVAL; - ret = set_l3_qos_cfg(r_l3, true); + ret = set_cache_qos_cfg(level, true); if (!ret) { - r_l3->alloc_enabled = false; - r_l3data->alloc_enabled = true; - r_l3code->alloc_enabled = true; + r_l->alloc_enabled = false; + r_ldata->alloc_enabled = true; + r_lcode->alloc_enabled = true; } return ret; } -static void cdp_disable(void) +static int cdpl3_enable(void) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE); +} + +static int cdpl2_enable(void) +{ + return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, + RDT_RESOURCE_L2CODE); +} + +static void cdp_disable(int level, int data_type, int code_type) +{ + struct rdt_resource *r = &rdt_resources_all[level]; r->alloc_enabled = r->alloc_capable; - if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) { - rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled = false; - rdt_resources_all[RDT_RESOURCE_L3CODE].alloc_enabled = false; - set_l3_qos_cfg(r, false); + if (rdt_resources_all[data_type].alloc_enabled) { + rdt_resources_all[data_type].alloc_enabled = false; + rdt_resources_all[code_type].alloc_enabled = false; + set_cache_qos_cfg(level, false); } } +static void cdpl3_disable(void) +{ + cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE); +} + +static void cdpl2_disable(void) +{ + cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE); +} + +static void cdp_disable_all(void) +{ + if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled) + cdpl3_disable(); + if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) + cdpl2_disable(); +} + static int parse_rdtgroupfs_options(char *data) { char *token, *o = data; int ret = 0; while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; + if (!*token) { + ret = -EINVAL; + goto out; + } - if (!strcmp(token, "cdp")) - ret = cdp_enable(); + if (!strcmp(token, "cdp")) { + ret = cdpl3_enable(); + if (ret) + goto out; + } else if (!strcmp(token, "cdpl2")) { + ret = cdpl2_enable(); + if (ret) + goto out; + } else { + ret = -EINVAL; + goto out; + } } + return 0; + +out: + pr_err("Invalid mount option \"%s\"\n", token); + return ret; } @@ -1223,7 +1288,7 @@ out_mongrp: out_info: kernfs_remove(kn_info); out_cdp: - cdp_disable(); + cdp_disable_all(); out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); @@ -1383,7 +1448,7 @@ static void rdt_kill_sb(struct super_block *sb) /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); - cdp_disable(); + cdp_disable_all(); rmdir_all_sub(); static_branch_disable_cpuslocked(&rdt_alloc_enable_key); static_branch_disable_cpuslocked(&rdt_mon_enable_key); -- cgit v1.2.3 From 31516de306c0c9235156cdc7acb976ea21f1f646 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 20 Dec 2017 14:57:24 -0800 Subject: x86/intel_rdt: Add command line parameter to control L2_CDP L2 CDP can be controlled by kernel parameter "rdt=". If "rdt=l2cdp", L2 CDP is turned on. If "rdt=!l2cdp", L2 CDP is turned off. Signed-off-by: Fenghua Yu Signed-off-by: Thomas Gleixner Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: Vikas" Cc: Sai Praneeth" Cc: Reinette" Link: https://lkml.kernel.org/r/1513810644-78015-7-git-send-email-fenghua.yu@intel.com --- Documentation/admin-guide/kernel-parameters.txt | 3 ++- arch/x86/kernel/cpu/intel_rdt.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 46b26bfee27b..fde058ca8419 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3682,7 +3682,8 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: - cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, mba. + cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, + mba. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 5202da08fd6f..410629f10ad3 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -691,6 +691,7 @@ enum { RDT_FLAG_L3_CAT, RDT_FLAG_L3_CDP, RDT_FLAG_L2_CAT, + RDT_FLAG_L2_CDP, RDT_FLAG_MBA, }; @@ -713,6 +714,7 @@ static struct rdt_options rdt_options[] __initdata = { RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), + RDT_OPT(RDT_FLAG_L2_CDP, "l2cdp", X86_FEATURE_CDP_L2), RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) -- cgit v1.2.3 From 388fdb8f882af67ff8394d9420c1e0e42ba35619 Mon Sep 17 00:00:00 2001 From: Ian Douglas Scott Date: Tue, 16 Jan 2018 15:34:50 -0800 Subject: ALSA: usb-audio: Support changing input on Sound Blaster E1 The E1 has two headphone jacks, one of which can be set as a microphone input. In the default mode, it uses the built-in microphone as an input. By sending a special command, the second headphone jack is instead used as an input. This might work with the E3 as well, but I don't have one of those to test it. Signed-off-by: Ian Douglas Scott Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e1e7ce9ab217..e6359d341878 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -27,6 +27,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -1721,6 +1722,83 @@ static int snd_microii_controls_create(struct usb_mixer_interface *mixer) return 0; } +/* Creative Sound Blaster E1 */ + +static int snd_soundblaster_e1_switch_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + ucontrol->value.integer.value[0] = kcontrol->private_value; + return 0; +} + +static int snd_soundblaster_e1_switch_update(struct usb_mixer_interface *mixer, + unsigned char state) +{ + struct snd_usb_audio *chip = mixer->chip; + int err; + unsigned char buff[2]; + + buff[0] = 0x02; + buff[1] = state ? 0x02 : 0x00; + + err = snd_usb_lock_shutdown(chip); + if (err < 0) + return err; + err = snd_usb_ctl_msg(chip->dev, + usb_sndctrlpipe(chip->dev, 0), HID_REQ_SET_REPORT, + USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, + 0x0202, 3, buff, 2); + snd_usb_unlock_shutdown(chip); + return err; +} + +static int snd_soundblaster_e1_switch_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct usb_mixer_elem_list *list = snd_kcontrol_chip(kcontrol); + unsigned char value = !!ucontrol->value.integer.value[0]; + int err; + + if (kcontrol->private_value == value) + return 0; + kcontrol->private_value = value; + err = snd_soundblaster_e1_switch_update(list->mixer, value); + return err < 0 ? err : 1; +} + +static int snd_soundblaster_e1_switch_resume(struct usb_mixer_elem_list *list) +{ + return snd_soundblaster_e1_switch_update(list->mixer, + list->kctl->private_value); +} + +static int snd_soundblaster_e1_switch_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + static const char *const texts[2] = { + "Mic", "Aux" + }; + + return snd_ctl_enum_info(uinfo, 1, ARRAY_SIZE(texts), texts); +} + +static struct snd_kcontrol_new snd_soundblaster_e1_input_switch = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Input Source", + .info = snd_soundblaster_e1_switch_info, + .get = snd_soundblaster_e1_switch_get, + .put = snd_soundblaster_e1_switch_put, + .private_value = 0, +}; + +static int snd_soundblaster_e1_switch_create(struct usb_mixer_interface *mixer) +{ + return add_single_ctl_with_resume(mixer, 0, + snd_soundblaster_e1_switch_resume, + &snd_soundblaster_e1_input_switch, + NULL); +} + int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) { int err = 0; @@ -1802,6 +1880,10 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) case USB_ID(0x1235, 0x800c): /* Focusrite Scarlett 18i20 */ err = snd_scarlett_controls_create(mixer); break; + + case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */ + err = snd_soundblaster_e1_switch_create(mixer); + break; } return err; -- cgit v1.2.3 From cc01572e2fb080e279ca625f239aca61f435ebf3 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Wed, 17 Jan 2018 15:52:41 +0200 Subject: xfrm: Add SA to hardware at the end of xfrm_state_construct() Current code configures the hardware with a new SA before the state has been fully initialized. During this time interval, an incoming ESP packet can cause a crash due to a NULL dereference. More specifically, xfrm_input() considers the packet as valid, and yet, anti-replay mechanism is not initialized. Move hardware configuration to the end of xfrm_state_construct(), and mark the state as valid once the SA is fully initialized. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Aviad Yehezkel Signed-off-by: Aviv Heller Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 10 +++++++--- net/xfrm/xfrm_user.c | 18 +++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 429957412633..2d486492acdb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2272,8 +2272,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) goto error; } - x->km.state = XFRM_STATE_VALID; - error: return err; } @@ -2282,7 +2280,13 @@ EXPORT_SYMBOL(__xfrm_init_state); int xfrm_init_state(struct xfrm_state *x) { - return __xfrm_init_state(x, true, false); + int err; + + err = __xfrm_init_state(x, true, false); + if (!err) + x->km.state = XFRM_STATE_VALID; + + return err; } EXPORT_SYMBOL(xfrm_init_state); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index bdb48e5dba04..7f52b8eb177d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } - if (attrs[XFRMA_OFFLOAD_DEV]) { - err = xfrm_dev_state_add(net, x, - nla_data(attrs[XFRMA_OFFLOAD_DEV])); - if (err) - goto error; - } - if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* override default values from above */ xfrm_update_ae_params(x, attrs, 0); + /* configure the hardware if offload is requested */ + if (attrs[XFRMA_OFFLOAD_DEV]) { + err = xfrm_dev_state_add(net, x, + nla_data(attrs[XFRMA_OFFLOAD_DEV])); + if (err) + goto error; + } + return x; error: @@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } + if (x->km.state == XFRM_STATE_VOID) + x->km.state = XFRM_STATE_VALID; + c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; -- cgit v1.2.3 From a0c9259dc4e1923a98356967ce8b732da1979df8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 Jan 2018 16:01:47 +0100 Subject: irq/matrix: Spread interrupts on allocation Keith reported an issue with vector space exhaustion on a server machine which is caused by the i40e driver allocating 168 MSI interrupts when the driver is initialized, even when most of these interrupts are not used at all. The x86 vector allocation code tries to avoid the immediate allocation with the reservation mode, but the card uses MSI and does not support MSI entry masking, which prevents reservation mode and requires immediate vector allocation. The matrix allocator is a bit naive and prefers the first CPU in the cpumask which describes the possible target CPUs for an allocation. That results in allocating all 168 vectors on CPU0 which later causes vector space exhaustion when the NVMe driver tries to allocate managed interrupts on each CPU for the per CPU queues. Avoid this by finding the CPU which has the lowest vector allocation count to spread out the non managed interrupt accross the possible target CPUs. Fixes: 2f75d9e1c905 ("genirq: Implement bitmap matrix allocator") Reported-by: Keith Busch Signed-off-by: Thomas Gleixner Tested-by: Keith Busch Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801171557330.1777@nanos --- kernel/irq/matrix.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c index 0ba0dd8863a7..5187dfe809ac 100644 --- a/kernel/irq/matrix.c +++ b/kernel/irq/matrix.c @@ -321,15 +321,23 @@ void irq_matrix_remove_reserved(struct irq_matrix *m) int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, bool reserved, unsigned int *mapped_cpu) { - unsigned int cpu; + unsigned int cpu, best_cpu, maxavl = 0; + struct cpumap *cm; + unsigned int bit; + best_cpu = UINT_MAX; for_each_cpu(cpu, msk) { - struct cpumap *cm = per_cpu_ptr(m->maps, cpu); - unsigned int bit; + cm = per_cpu_ptr(m->maps, cpu); - if (!cm->online) + if (!cm->online || cm->available <= maxavl) continue; + best_cpu = cpu; + maxavl = cm->available; + } + + if (maxavl) { + cm = per_cpu_ptr(m->maps, best_cpu); bit = matrix_alloc_area(m, cm, 1, false); if (bit < m->alloc_end) { cm->allocated++; @@ -338,8 +346,8 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, m->global_available--; if (reserved) m->global_reserved--; - *mapped_cpu = cpu; - trace_irq_matrix_alloc(bit, cpu, m, cm); + *mapped_cpu = best_cpu; + trace_irq_matrix_alloc(bit, best_cpu, m, cm); return bit; } } -- cgit v1.2.3 From e29a22a86a20ea7651ff8c731ab034c31bd9764e Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Wed, 17 Jan 2018 19:43:24 +0100 Subject: ASoC: Intel: remove select on non-existing SND_SOC_INTEL_COMMON SND_SST_ATOM_HIFI2_PLATFORM_PCI select SND_SOC_INTEL_COMMON which do not exists anymore. So remove this select. Fixes: c6059879be29 ("ASoC: Intel: Fix Kconfig with top-level selector") Signed-off-by: Corentin Labbe Acked-by: Pierre-Louis Bossart Signed-off-by: Mark Brown --- sound/soc/intel/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index b0bd1938b71e..f2c9e8c5970a 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -77,7 +77,6 @@ config SND_SST_ATOM_HIFI2_PLATFORM_PCI depends on X86 && PCI select SND_SST_IPC_PCI select SND_SOC_COMPRESS - select SND_SOC_INTEL_COMMON help If you have a Intel Medfield or Merrifield/Edison platform, then enable this option by saying Y or m. Distros will typically not -- cgit v1.2.3 From f23d74f6c66c3697e032550eeef3f640391a3a7d Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 17 Jan 2018 17:41:41 -0600 Subject: x86/mm: Rework wbinvd, hlt operation in stop_this_cpu() Some issues have been reported with the for loop in stop_this_cpu() that issues the 'wbinvd; hlt' sequence. Reverting this sequence to halt() has been shown to resolve the issue. However, the wbinvd is needed when running with SME. The reason for the wbinvd is to prevent cache flush races between encrypted and non-encrypted entries that have the same physical address. This can occur when kexec'ing from memory encryption active to inactive or vice-versa. The important thing is to not have outside of kernel text memory references (such as stack usage), so the usage of the native_*() functions is needed since these expand as inline asm sequences. So instead of reverting the change, rework the sequence. Move the wbinvd instruction outside of the for loop as native_wbinvd() and make its execution conditional on X86_FEATURE_SME. In the for loop, change the asm 'wbinvd; hlt' sequence back to a halt sequence but use the native_halt() call. Fixes: bba4ed011a52 ("x86/mm, kexec: Allow kexec to be used with SME") Reported-by: Dave Young Signed-off-by: Tom Lendacky Signed-off-by: Thomas Gleixner Tested-by: Dave Young Cc: Juergen Gross Cc: Tony Luck Cc: Yu Chen Cc: Baoquan He Cc: Linus Torvalds Cc: kexec@lists.infradead.org Cc: ebiederm@redhat.com Cc: Borislav Petkov Cc: Rui Zhang Cc: Arjan van de Ven Cc: Boris Ostrovsky Cc: Dan Williams Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180117234141.21184.44067.stgit@tlendack-t1.amdoffice.net --- arch/x86/kernel/process.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 832a6acd730f..cb368c2a22ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy) disable_local_APIC(); mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); + /* + * Use wbinvd on processors that support SME. This provides support + * for performing a successful kexec when going from SME inactive + * to SME active (or vice-versa). The cache must be cleared so that + * if there are entries with the same physical address, both with and + * without the encryption bit, they don't race each other when flushed + * and potentially end up with the wrong entry being committed to + * memory. + */ + if (boot_cpu_has(X86_FEATURE_SME)) + native_wbinvd(); for (;;) { /* - * Use wbinvd followed by hlt to stop the processor. This - * provides support for kexec on a processor that supports - * SME. With kexec, going from SME inactive to SME active - * requires clearing cache entries so that addresses without - * the encryption bit set don't corrupt the same physical - * address that has the encryption bit set when caches are - * flushed. To achieve this a wbinvd is performed followed by - * a hlt. Even if the processor is not in the kexec/SME - * scenario this only adds a wbinvd to a halting processor. + * Use native_halt() so that memory contents don't change + * (stack usage and variables) after possibly issuing the + * native_wbinvd() above. */ - asm volatile("wbinvd; hlt" : : : "memory"); + native_halt(); } } -- cgit v1.2.3 From 64f29d1bc9fb8196df3d0f1df694245230e208c0 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 17 Jan 2018 07:14:12 -0800 Subject: lockdep: Assign lock keys on registration Lockdep is assigning lock keys when a lock was looked up. This is unnecessary; if the lock has never been registered then it is known that it is not locked. It also complicates the calling convention. Switch to assigning the lock key in register_lock_class(). Signed-off-by: Matthew Wilcox Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: "David S. Miller" Link: https://lkml.kernel.org/r/20180117151414.23686-2-willy@infradead.org --- kernel/locking/lockdep.c | 76 +++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 5fa1324a4f29..472547dd45c3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -647,18 +647,12 @@ static int count_matching_names(struct lock_class *new_class) return count + 1; } -/* - * Register a lock's class in the hash-table, if the class is not present - * yet. Otherwise we look it up. We cache the result in the lock object - * itself, so actual lookup of the hash should be once per lock object. - */ static inline struct lock_class * look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; struct hlist_head *hash_head; struct lock_class *class; - bool is_static = false; if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { debug_locks_off(); @@ -671,24 +665,11 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) } /* - * Static locks do not have their class-keys yet - for them the key - * is the lock object itself. If the lock is in the per cpu area, - * the canonical address of the lock (per cpu offset removed) is - * used. + * If it is not initialised then it has never been locked, + * so it won't be present in the hash table. */ - if (unlikely(!lock->key)) { - unsigned long can_addr, addr = (unsigned long)lock; - - if (__is_kernel_percpu_address(addr, &can_addr)) - lock->key = (void *)can_addr; - else if (__is_module_percpu_address(addr, &can_addr)) - lock->key = (void *)can_addr; - else if (static_obj(lock)) - lock->key = (void *)lock; - else - return ERR_PTR(-EINVAL); - is_static = true; - } + if (unlikely(!lock->key)) + return NULL; /* * NOTE: the class-key must be unique. For dynamic locks, a static @@ -720,7 +701,35 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) } } - return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); + return NULL; +} + +/* + * Static locks do not have their class-keys yet - for them the key is + * the lock object itself. If the lock is in the per cpu area, the + * canonical address of the lock (per cpu offset removed) is used. + */ +static bool assign_lock_key(struct lockdep_map *lock) +{ + unsigned long can_addr, addr = (unsigned long)lock; + + if (__is_kernel_percpu_address(addr, &can_addr)) + lock->key = (void *)can_addr; + else if (__is_module_percpu_address(addr, &can_addr)) + lock->key = (void *)can_addr; + else if (static_obj(lock)) + lock->key = (void *)lock; + else { + /* Debug-check: all keys must be persistent! */ + debug_locks_off(); + pr_err("INFO: trying to register non-static key.\n"); + pr_err("the code is fine but needs lockdep annotation.\n"); + pr_err("turning off the locking correctness validator.\n"); + dump_stack(); + return false; + } + + return true; } /* @@ -738,18 +747,13 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); - if (likely(!IS_ERR_OR_NULL(class))) + if (likely(class)) goto out_set_class_cache; - /* - * Debug-check: all keys must be persistent! - */ - if (IS_ERR(class)) { - debug_locks_off(); - printk("INFO: trying to register non-static key.\n"); - printk("the code is fine but needs lockdep annotation.\n"); - printk("turning off the locking correctness validator.\n"); - dump_stack(); + if (!lock->key) { + if (!assign_lock_key(lock)) + return NULL; + } else if (!static_obj(lock->key)) { return NULL; } @@ -3498,7 +3502,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) * Clearly if the lock hasn't been acquired _ever_, we're not * holding it either, so report failure. */ - if (IS_ERR_OR_NULL(class)) + if (!class) return 0; /* @@ -4294,7 +4298,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) * If the class exists we look it up and zap it: */ class = look_up_lock_class(lock, j); - if (!IS_ERR_OR_NULL(class)) + if (class) zap_class(class); } /* -- cgit v1.2.3 From 08f36ff642342fb058212099757cb5d40f158c2a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 17 Jan 2018 07:14:13 -0800 Subject: lockdep: Make lockdep checking constant There are several places in the kernel which would like to pass a const pointer to lockdep_is_held(). Constify the entire path so nobody has to trick the compiler. Signed-off-by: Matthew Wilcox Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: "David S. Miller" Link: https://lkml.kernel.org/r/20180117151414.23686-3-willy@infradead.org --- include/linux/lockdep.h | 4 ++-- kernel/locking/lockdep.c | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 3251d9c0d313..864d6fc60fa6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -337,9 +337,9 @@ extern void lock_release(struct lockdep_map *lock, int nested, /* * Same "read" as for lock_acquire(), except -1 means any. */ -extern int lock_is_held_type(struct lockdep_map *lock, int read); +extern int lock_is_held_type(const struct lockdep_map *lock, int read); -static inline int lock_is_held(struct lockdep_map *lock) +static inline int lock_is_held(const struct lockdep_map *lock) { return lock_is_held_type(lock, -1); } diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 472547dd45c3..b7a307b53704 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -648,7 +648,7 @@ static int count_matching_names(struct lock_class *new_class) } static inline struct lock_class * -look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) +look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; struct hlist_head *hash_head; @@ -3276,7 +3276,7 @@ print_lock_nested_lock_not_held(struct task_struct *curr, return 0; } -static int __lock_is_held(struct lockdep_map *lock, int read); +static int __lock_is_held(const struct lockdep_map *lock, int read); /* * This gets called for every mutex_lock*()/spin_lock*() operation. @@ -3485,13 +3485,14 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, return 0; } -static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) +static int match_held_lock(const struct held_lock *hlock, + const struct lockdep_map *lock) { if (hlock->instance == lock) return 1; if (hlock->references) { - struct lock_class *class = lock->class_cache[0]; + const struct lock_class *class = lock->class_cache[0]; if (!class) class = look_up_lock_class(lock, 0); @@ -3727,7 +3728,7 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) return 1; } -static int __lock_is_held(struct lockdep_map *lock, int read) +static int __lock_is_held(const struct lockdep_map *lock, int read) { struct task_struct *curr = current; int i; @@ -3941,7 +3942,7 @@ void lock_release(struct lockdep_map *lock, int nested, } EXPORT_SYMBOL_GPL(lock_release); -int lock_is_held_type(struct lockdep_map *lock, int read) +int lock_is_held_type(const struct lockdep_map *lock, int read) { unsigned long flags; int ret = 0; -- cgit v1.2.3 From 05b93801a23c21a6f355f4c492c51715d6ccc96d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 17 Jan 2018 07:14:14 -0800 Subject: lockdep: Convert some users to const These users of lockdep_is_held() either wanted lockdep_is_held to take a const pointer, or would benefit from providing a const pointer. Signed-off-by: Matthew Wilcox Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: "David S. Miller" Link: https://lkml.kernel.org/r/20180117151414.23686-4-willy@infradead.org --- include/linux/backing-dev.h | 2 +- include/linux/srcu.h | 4 ++-- include/net/sock.h | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e54e7e0033eb..3e4ce54d84ab 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -332,7 +332,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode) * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the * associated wb's list_lock. */ -static inline struct bdi_writeback *inode_to_wb(struct inode *inode) +static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 62be8966e837..33c1c698df09 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -92,7 +92,7 @@ void synchronize_srcu(struct srcu_struct *sp); * relies on normal RCU, it can be called from the CPU which * is in the idle loop from an RCU point of view or offline. */ -static inline int srcu_read_lock_held(struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *sp) { if (!debug_lockdep_rcu_enabled()) return 1; @@ -101,7 +101,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -static inline int srcu_read_lock_held(struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *sp) { return 1; } diff --git a/include/net/sock.h b/include/net/sock.h index 7a7b14e9628a..c4a424fe6fdd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1445,10 +1445,8 @@ do { \ } while (0) #ifdef CONFIG_LOCKDEP -static inline bool lockdep_sock_is_held(const struct sock *csk) +static inline bool lockdep_sock_is_held(const struct sock *sk) { - struct sock *sk = (struct sock *)csk; - return lockdep_is_held(&sk->sk_lock) || lockdep_is_held(&sk->sk_lock.slock); } -- cgit v1.2.3 From 8af5748fa48698a433ba9a1766204bda283dffa8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Jan 2018 13:48:54 -0200 Subject: ASoC: sgtl5000: Do not print error on probe deferral When the MCLK is not yet available when the codec is probed, probe deferral will happen and in this case we should not print an error message. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/codecs/sgtl5000.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index f2bb4feba3b6..633cdcfc933d 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1332,10 +1332,13 @@ static int sgtl5000_i2c_probe(struct i2c_client *client, sgtl5000->mclk = devm_clk_get(&client->dev, NULL); if (IS_ERR(sgtl5000->mclk)) { ret = PTR_ERR(sgtl5000->mclk); - dev_err(&client->dev, "Failed to get mclock: %d\n", ret); /* Defer the probe to see if the clk will be provided later */ if (ret == -ENOENT) ret = -EPROBE_DEFER; + + if (ret != -EPROBE_DEFER) + dev_err(&client->dev, "Failed to get mclock: %d\n", + ret); goto disable_regs; } -- cgit v1.2.3 From d04c413f2ab3aa5998bf86f7a2f6235ed82b2ee2 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Jan 2018 13:48:55 -0200 Subject: ASoC: mxs-sgtl5000: Do not print error on probe deferral Probe deferral may happen, so do not print an error message in this case. Signed-off-by: Fabio Estevam Signed-off-by: Mark Brown --- sound/soc/mxs/mxs-sgtl5000.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/mxs/mxs-sgtl5000.c b/sound/soc/mxs/mxs-sgtl5000.c index 2ed3240cc682..5a871f25f438 100644 --- a/sound/soc/mxs/mxs-sgtl5000.c +++ b/sound/soc/mxs/mxs-sgtl5000.c @@ -143,8 +143,9 @@ static int mxs_sgtl5000_probe(struct platform_device *pdev) ret = devm_snd_soc_register_card(&pdev->dev, card); if (ret) { - dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", - ret); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "snd_soc_register_card failed (%d)\n", + ret); return ret; } -- cgit v1.2.3 From 44a5f423e70374e5b42cecd85e78f2d79334e0f2 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 17 Jan 2018 17:15:25 +0100 Subject: spi: a3700: Clear DATA_OUT when performing a read When performing a read using FIFO mode, the spi controller shifts out the last 2 bytes that were written in a previous transfer on MOSI. This undocumented behaviour can cause devices to misinterpret the transfer, so we explicitly clear the WFIFO before each read. This behaviour was noticed on EspressoBin. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 77fe55ce790c..4857b0119556 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -607,6 +607,11 @@ static int a3700_spi_transfer_one(struct spi_master *master, a3700_spi_header_set(a3700_spi); if (xfer->rx_buf) { + /* Clear WFIFO, since it's last 2 bytes are shifted out during + * a read operation + */ + spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, 0); + /* Set read data length */ spireg_write(a3700_spi, A3700_SPI_IF_DIN_CNT_REG, a3700_spi->buf_len); -- cgit v1.2.3 From abf3a49e50967bd67cff67f289690f76436f461f Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 17 Jan 2018 17:15:26 +0100 Subject: spi: a3700: Set frequency limits at startup Armada 3700 SPI controller has an internal clock divider which can divide the parent clock frequency by up to 30. This patch sets the limits in the spi_controller fields so that we can detect when a non-supported frequency is requested by a device for a transfer. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 4857b0119556..07f227e3c834 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -27,6 +27,8 @@ #define DRIVER_NAME "armada_3700_spi" +#define A3700_SPI_MAX_SPEED_HZ 100000000 +#define A3700_SPI_MAX_PRESCALE 30 #define A3700_SPI_TIMEOUT 10 /* SPI Register Offest */ @@ -815,6 +817,11 @@ static int a3700_spi_probe(struct platform_device *pdev) goto error; } + master->max_speed_hz = min_t(unsigned long, A3700_SPI_MAX_SPEED_HZ, + clk_get_rate(spi->clk)); + master->min_speed_hz = DIV_ROUND_UP(clk_get_rate(spi->clk), + A3700_SPI_MAX_PRESCALE); + ret = a3700_spi_init(spi); if (ret) goto error_clk; -- cgit v1.2.3 From a456c9320d997fdb26bfab496dd9e37f155cc788 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 17 Jan 2018 17:15:27 +0100 Subject: spi: a3700: Allow to enable or disable FIFO mode The armada 3700 SPI controller allows to make transfers without using the 32 bytes RFIFO and WFIFO. This commit enable switching between FIFO and non-FIFO mode, which is necessary to implement full-duplex transfers. Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 07f227e3c834..97938c6d6267 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -185,12 +185,15 @@ static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi, return 0; } -static void a3700_spi_fifo_mode_set(struct a3700_spi *a3700_spi) +static void a3700_spi_fifo_mode_set(struct a3700_spi *a3700_spi, bool enable) { u32 val; val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG); - val |= A3700_SPI_FIFO_MODE; + if (enable) + val |= A3700_SPI_FIFO_MODE; + else + val &= ~A3700_SPI_FIFO_MODE; spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val); } @@ -291,7 +294,7 @@ static int a3700_spi_init(struct a3700_spi *a3700_spi) a3700_spi_deactivate_cs(a3700_spi, i); /* Enable FIFO mode */ - a3700_spi_fifo_mode_set(a3700_spi); + a3700_spi_fifo_mode_set(a3700_spi, true); /* Set SPI mode */ a3700_spi_mode_set(a3700_spi, master->mode_bits); -- cgit v1.2.3 From f68a7dcb91b7957c5bb1c3e347775332af719519 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 17 Jan 2018 17:15:28 +0100 Subject: spi: a3700: Add full-duplex support The armada 3700 SPI controller has support for full-duplex transfers, but it can only be done without using the hardware FIFOs. A full duplex transfer is done by shifting 4 bytes at a time, or even one byte at a time for transfers less than 4 bytes long. While this method is perfectly suitable for small transfers, it is still slower than using the FIFOs. This commit implement full-duplex support, making sure that half-duplex transfers are still done using the FIFOs with the existing method. Some setup functions were moved around to make sure the controller is properly configured before beginning each transfer. This was tested on EspressoBin with a logical analyser, and a simple setup where MISO is connected on MOSI. Transfers were made from userspace using spidev and spi-pipe from the spi-tools project Signed-off-by: Maxime Chevallier Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 85 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 97938c6d6267..fdc35dabcda2 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -413,15 +413,20 @@ static void a3700_spi_transfer_setup(struct spi_device *spi, struct spi_transfer *xfer) { struct a3700_spi *a3700_spi; - unsigned int byte_len; a3700_spi = spi_master_get_devdata(spi->master); a3700_spi_clock_set(a3700_spi, xfer->speed_hz); - byte_len = xfer->bits_per_word >> 3; + /* Use 4 bytes long transfers. Each transfer method has its way to deal + * with the remaining bytes for non 4-bytes aligned transfers. + */ + a3700_spi_bytelen_set(a3700_spi, 4); - a3700_spi_fifo_thres_set(a3700_spi, byte_len); + /* Initialize the working buffers */ + a3700_spi->tx_buf = xfer->tx_buf; + a3700_spi->rx_buf = xfer->rx_buf; + a3700_spi->buf_len = xfer->len; } static void a3700_spi_set_cs(struct spi_device *spi, bool enable) @@ -576,27 +581,26 @@ static int a3700_spi_prepare_message(struct spi_master *master, if (ret) return ret; - a3700_spi_bytelen_set(a3700_spi, 4); - a3700_spi_mode_set(a3700_spi, spi->mode); return 0; } -static int a3700_spi_transfer_one(struct spi_master *master, +static int a3700_spi_transfer_one_fifo(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) { struct a3700_spi *a3700_spi = spi_master_get_devdata(master); int ret = 0, timeout = A3700_SPI_TIMEOUT; - unsigned int nbits = 0; + unsigned int nbits = 0, byte_len; u32 val; - a3700_spi_transfer_setup(spi, xfer); + /* Make sure we use FIFO mode */ + a3700_spi_fifo_mode_set(a3700_spi, true); - a3700_spi->tx_buf = xfer->tx_buf; - a3700_spi->rx_buf = xfer->rx_buf; - a3700_spi->buf_len = xfer->len; + /* Configure FIFO thresholds */ + byte_len = xfer->bits_per_word >> 3; + a3700_spi_fifo_thres_set(a3700_spi, byte_len); if (xfer->tx_buf) nbits = xfer->tx_nbits; @@ -731,6 +735,64 @@ out: return ret; } +static int a3700_spi_transfer_one_full_duplex(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + struct a3700_spi *a3700_spi = spi_master_get_devdata(master); + u32 val_in, val_out; + + /* Disable FIFO mode */ + a3700_spi_fifo_mode_set(a3700_spi, false); + + while (a3700_spi->buf_len) { + + /* When we have less than 4 bytes to transfer, switch to 1 byte + * mode. This is reset after each transfer + */ + if (a3700_spi->buf_len < 4) + a3700_spi_bytelen_set(a3700_spi, 1); + + if (a3700_spi->byte_len == 1) + val_out = *a3700_spi->tx_buf; + else + val_out = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); + + spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val_out); + + /* Wait for all the data to be shifted in / out */ + while (!(spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG) & + A3700_SPI_XFER_DONE)) + cpu_relax(); + + val_in = le32_to_cpu(spireg_read(a3700_spi, + A3700_SPI_DATA_IN_REG)); + + memcpy(a3700_spi->rx_buf, &val_in, a3700_spi->byte_len); + + a3700_spi->buf_len -= a3700_spi->byte_len; + a3700_spi->tx_buf += a3700_spi->byte_len; + a3700_spi->rx_buf += a3700_spi->byte_len; + + } + + spi_finalize_current_transfer(master); + + return 0; +} + +static int a3700_spi_transfer_one(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + a3700_spi_transfer_setup(spi, xfer); + + if (xfer->tx_buf && xfer->rx_buf) + return a3700_spi_transfer_one_full_duplex(master, spi, xfer); + + return a3700_spi_transfer_one_fifo(master, spi, xfer); +} + static int a3700_spi_unprepare_message(struct spi_master *master, struct spi_message *message) { @@ -780,7 +842,6 @@ static int a3700_spi_probe(struct platform_device *pdev) master->transfer_one = a3700_spi_transfer_one; master->unprepare_message = a3700_spi_unprepare_message; master->set_cs = a3700_spi_set_cs; - master->flags = SPI_MASTER_HALF_DUPLEX; master->mode_bits |= (SPI_RX_DUAL | SPI_TX_DUAL | SPI_RX_QUAD | SPI_TX_QUAD); -- cgit v1.2.3 From 8d5737a5f53902a916ee1e1cb248c9b8b883b2ea Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Wed, 17 Jan 2018 13:50:50 +0100 Subject: ASoC: bcm2835: fix hw_params error when device is in prepared state If bcm2835 is configured as bitclock master calling hw_params() after prepare() fails with EBUSY. This also makes it impossible to use bcm2835 in full duplex mode. The error is caused by the split clock setup: clk_set_rate is called in hw_params, clk_prepare_enable in prepare. As hw_params doesn't check if the clock was already enabled clk_set_rate fails with EBUSY. Fix this by moving clock startup from prepare to hw_params and let hw_params properly deal with an already set up or enabled clock. Signed-off-by: Matthias Reichl Signed-off-by: Mark Brown --- sound/soc/bcm/bcm2835-i2s.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/soc/bcm/bcm2835-i2s.c b/sound/soc/bcm/bcm2835-i2s.c index 2e449d7173fc..d5f73a8ab893 100644 --- a/sound/soc/bcm/bcm2835-i2s.c +++ b/sound/soc/bcm/bcm2835-i2s.c @@ -130,6 +130,7 @@ struct bcm2835_i2s_dev { struct regmap *i2s_regmap; struct clk *clk; bool clk_prepared; + int clk_rate; }; static void bcm2835_i2s_start_clock(struct bcm2835_i2s_dev *dev) @@ -419,10 +420,19 @@ static int bcm2835_i2s_hw_params(struct snd_pcm_substream *substream, } /* Clock should only be set up here if CPU is clock master */ - if (bit_clock_master) { - ret = clk_set_rate(dev->clk, bclk_rate); - if (ret) - return ret; + if (bit_clock_master && + (!dev->clk_prepared || dev->clk_rate != bclk_rate)) { + if (dev->clk_prepared) + bcm2835_i2s_stop_clock(dev); + + if (dev->clk_rate != bclk_rate) { + ret = clk_set_rate(dev->clk, bclk_rate); + if (ret) + return ret; + dev->clk_rate = bclk_rate; + } + + bcm2835_i2s_start_clock(dev); } /* Setup the frame format */ @@ -618,8 +628,6 @@ static int bcm2835_i2s_prepare(struct snd_pcm_substream *substream, struct bcm2835_i2s_dev *dev = snd_soc_dai_get_drvdata(dai); uint32_t cs_reg; - bcm2835_i2s_start_clock(dev); - /* * Clear both FIFOs if the one that should be started * is not empty at the moment. This should only happen -- cgit v1.2.3 From 166a5a33d5a7bfa62c039eb10e69589b09fd0557 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 Jan 2018 12:17:47 +0100 Subject: IIO: ADC: stm32-dfsdm: remove unused variable again The merge between commit abaca806fd13 ("IIO: ADC: stm32-dfsdm: code optimization") and commit 2353758bc2d4 ("IIO: ADC: stm32-dfsdm: avoid unused-variable warning") left one variable behind that is no longer needed and can be removed, as shown by the gcc warning: drivers/iio/adc/stm32-dfsdm-core.c: In function 'stm32_dfsdm_probe': drivers/iio/adc/stm32-dfsdm-core.c:245:29: error: unused variable 'of_id' [-Werror=unused-variable] Fixes: d84b4c7c706f ("Merge branch 'topic/iio' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into asoc-st-dfsdm") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- drivers/iio/adc/stm32-dfsdm-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/adc/stm32-dfsdm-core.c b/drivers/iio/adc/stm32-dfsdm-core.c index 84277bcc465f..6290332cfd3f 100644 --- a/drivers/iio/adc/stm32-dfsdm-core.c +++ b/drivers/iio/adc/stm32-dfsdm-core.c @@ -242,7 +242,6 @@ MODULE_DEVICE_TABLE(of, stm32_dfsdm_of_match); static int stm32_dfsdm_probe(struct platform_device *pdev) { struct dfsdm_priv *priv; - const struct of_device_id *of_id; const struct stm32_dfsdm_dev_data *dev_data; struct stm32_dfsdm *dfsdm; int ret; -- cgit v1.2.3 From 031734b7d6532633d0cde73475c30646bf37cd6d Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 18 Jan 2018 01:13:54 +0000 Subject: ASoC: soc-core: add missing EXPORT_SYMBOL_GPL() for snd_soc_rtdcom_lookup Reported-by: Atsushi Nemoto Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index b9ca939fd05c..9b79c2199781 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -598,6 +598,7 @@ struct snd_soc_component *snd_soc_rtdcom_lookup(struct snd_soc_pcm_runtime *rtd, return NULL; } +EXPORT_SYMBOL_GPL(snd_soc_rtdcom_lookup); struct snd_pcm_substream *snd_soc_get_dai_substream(struct snd_soc_card *card, const char *dai_link, int stream) -- cgit v1.2.3 From 700c17ca8968f473631594e8a7c2cc880ba2c891 Mon Sep 17 00:00:00 2001 From: Donglin Peng Date: Thu, 18 Jan 2018 13:31:26 +0800 Subject: ASoC: use seq_file to dump the contents of dai_list,platform_list and codec_list Now the debugfs files dais/platforms/codecs have a size limit PAGE_SIZE and the user can not see the whole contents of dai_list/platform_list/codec_list when they are larger than this limit. This patch uses seq_file instead to make sure dais/platforms/codecs show the full contents of dai_list/platform_list/codec_list. Signed-off-by: Donglin Peng Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 111 +++++++++++++++++---------------------------------- 1 file changed, 37 insertions(+), 74 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c0edac80df34..7b582112e3fc 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -349,120 +349,84 @@ static void soc_init_codec_debugfs(struct snd_soc_component *component) "ASoC: Failed to create codec register debugfs file\n"); } -static ssize_t codec_list_read_file(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static int codec_list_seq_show(struct seq_file *m, void *v) { - char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - ssize_t len, ret = 0; struct snd_soc_codec *codec; - if (!buf) - return -ENOMEM; - mutex_lock(&client_mutex); - list_for_each_entry(codec, &codec_list, list) { - len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", - codec->component.name); - if (len >= 0) - ret += len; - if (ret > PAGE_SIZE) { - ret = PAGE_SIZE; - break; - } - } + list_for_each_entry(codec, &codec_list, list) + seq_printf(m, "%s\n", codec->component.name); mutex_unlock(&client_mutex); - if (ret >= 0) - ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); - - kfree(buf); + return 0; +} - return ret; +static int codec_list_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, codec_list_seq_show, NULL); } static const struct file_operations codec_list_fops = { - .read = codec_list_read_file, - .llseek = default_llseek,/* read accesses f_pos */ + .open = codec_list_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; -static ssize_t dai_list_read_file(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static int dai_list_seq_show(struct seq_file *m, void *v) { - char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - ssize_t len, ret = 0; struct snd_soc_component *component; struct snd_soc_dai *dai; - if (!buf) - return -ENOMEM; - mutex_lock(&client_mutex); - list_for_each_entry(component, &component_list, list) { - list_for_each_entry(dai, &component->dai_list, list) { - len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", - dai->name); - if (len >= 0) - ret += len; - if (ret > PAGE_SIZE) { - ret = PAGE_SIZE; - break; - } - } - } + list_for_each_entry(component, &component_list, list) + list_for_each_entry(dai, &component->dai_list, list) + seq_printf(m, "%s\n", dai->name); mutex_unlock(&client_mutex); - ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); - - kfree(buf); + return 0; +} - return ret; +static int dai_list_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, dai_list_seq_show, NULL); } static const struct file_operations dai_list_fops = { - .read = dai_list_read_file, - .llseek = default_llseek,/* read accesses f_pos */ + .open = dai_list_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; -static ssize_t platform_list_read_file(struct file *file, - char __user *user_buf, - size_t count, loff_t *ppos) +static int platform_list_seq_show(struct seq_file *m, void *v) { - char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - ssize_t len, ret = 0; struct snd_soc_platform *platform; - if (!buf) - return -ENOMEM; - mutex_lock(&client_mutex); - list_for_each_entry(platform, &platform_list, list) { - len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", - platform->component.name); - if (len >= 0) - ret += len; - if (ret > PAGE_SIZE) { - ret = PAGE_SIZE; - break; - } - } + list_for_each_entry(platform, &platform_list, list) + seq_printf(m, "%s\n", platform->component.name); mutex_unlock(&client_mutex); - ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); - - kfree(buf); + return 0; +} - return ret; +static int platform_list_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, platform_list_seq_show, NULL); } static const struct file_operations platform_list_fops = { - .read = platform_list_read_file, - .llseek = default_llseek,/* read accesses f_pos */ + .open = platform_list_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; static void soc_init_card_debugfs(struct snd_soc_card *card) @@ -491,7 +455,6 @@ static void soc_cleanup_card_debugfs(struct snd_soc_card *card) debugfs_remove_recursive(card->debugfs_card_root); } - static void snd_soc_debugfs_init(void) { snd_soc_debugfs_root = debugfs_create_dir("asoc", NULL); -- cgit v1.2.3 From a10eb530ae497e2411525fc1f5ec73f39eb11c11 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:50 +0200 Subject: perf intel-pt/bts: Do not swap when synthesizing samples Both 'perf inject' and internal tools consume cpu endian samples, so there is never a need to do any swapping when synthesizing samples. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-bts.c | 6 +----- tools/perf/util/intel-pt.c | 11 +++-------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 5325e65f9711..7077bebc2fb0 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -67,7 +67,6 @@ struct intel_bts { u64 branches_sample_type; u64 branches_id; size_t branches_event_size; - bool synth_needs_swap; unsigned long num_events; }; @@ -303,8 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, bts->branches_sample_type, - 0, &sample, - bts->synth_needs_swap); + 0, &sample, false); if (ret) return ret; } @@ -841,8 +839,6 @@ static int intel_bts_synth_events(struct intel_bts *bts, __perf_evsel__sample_size(attr.sample_type); } - bts->synth_needs_swap = evsel->needs_swap; - return 0; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23f9ba676df0..2daf641beb85 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -104,8 +104,6 @@ struct intel_pt { u64 pwrx_id; u64 cbr_id; - bool synth_needs_swap; - u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -1101,11 +1099,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, } static int intel_pt_inject_event(union perf_event *event, - struct perf_sample *sample, u64 type, - bool swapped) + struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); - return perf_event__synthesize_sample(event, type, 0, sample, swapped); + return perf_event__synthesize_sample(event, type, 0, sample, false); } static inline int intel_pt_opt_inject(struct intel_pt *pt, @@ -1115,7 +1112,7 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, if (!pt->synth_opts.inject) return 0; - return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); + return intel_pt_inject_event(event, sample, type); } static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, @@ -2329,8 +2326,6 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } - pt->synth_needs_swap = evsel->needs_swap; - return 0; } -- cgit v1.2.3 From 59a87fdad1467d228acc5cb1303b0b568a9e86a8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:51 +0200 Subject: perf evsel: Ensure reserved member of PERF_SAMPLE_CPU is zero in perf_event__synthesize_sample() PERF_SAMPLE_CPU contains the cpu number in the first 4 bytes and the second 4 bytes are reserved. Ensure the reserved bytes are zero in perf_event__synthesize_sample(). Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 85eb84dfdf91..44032679180f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2533,6 +2533,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_CPU) { u.val32[0] = sample->cpu; + u.val32[1] = 0; if (swapped) { /* * Inverse of what is done in perf_evsel__parse_sample -- cgit v1.2.3 From 936f1f30bb7892f010670f1edebc419d47b139b1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:52 +0200 Subject: perf tools: Get rid of unused 'swapped' parameter from perf_event__synthesize_sample() There is never a need to synthesize a 'swapped' sample, so all callers to perf_event__synthesize_sample() pass 'false' as the value to 'swapped'. So get rid of the unused 'swapped' parameter. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 3 +-- tools/perf/tests/sample-parsing.c | 2 +- tools/perf/util/event.h | 3 +-- tools/perf/util/evsel.c | 28 +--------------------------- tools/perf/util/intel-bts.c | 2 +- tools/perf/util/intel-pt.c | 2 +- 6 files changed, 6 insertions(+), 34 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 16a28547ca86..40fe919bbcf3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -536,8 +536,7 @@ found: sample_sw.period = sample->period; sample_sw.time = sample->time; perf_event__synthesize_sample(event_sw, evsel->attr.sample_type, - evsel->attr.read_format, &sample_sw, - false); + evsel->attr.read_format, &sample_sw); build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); return perf_event__repipe(tool, event_sw, &sample_sw, machine); } diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 3ec6302b6498..0e2d00d69e6e 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -248,7 +248,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) event->header.size = sz; err = perf_event__synthesize_sample(event, sample_type, read_format, - &sample, false); + &sample); if (err) { pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", "perf_event__synthesize_sample", sample_type, err); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index e5fbd6dd1b01..0f794744919c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -775,8 +775,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped); + const struct perf_sample *sample); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 44032679180f..66fa45198a11 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2472,8 +2472,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped) + const struct perf_sample *sample) { u64 *array; size_t sz; @@ -2498,15 +2497,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_TID) { u.val32[0] = sample->pid; u.val32[1] = sample->tid; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } - *array = u.val64; array++; } @@ -2534,13 +2524,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_CPU) { u.val32[0] = sample->cpu; u.val32[1] = 0; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val64 = bswap_64(u.val64); - } *array = u.val64; array++; } @@ -2587,15 +2570,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_RAW) { u.val32[0] = sample->raw_size; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } *array = u.val64; array = (void *)array + sizeof(u32); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7077bebc2fb0..72db2744876d 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -302,7 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, bts->branches_sample_type, - 0, &sample, false); + 0, &sample); if (ret) return ret; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2daf641beb85..3773d9c54f45 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1102,7 +1102,7 @@ static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); - return perf_event__synthesize_sample(event, type, 0, sample, false); + return perf_event__synthesize_sample(event, type, 0, sample); } static inline int intel_pt_opt_inject(struct intel_pt *pt, -- cgit v1.2.3 From 56271170438df39c1b9a39c7aaf69010e6a4b59a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jan 2018 10:28:14 -0300 Subject: perf tools: Use ui__error() for reporting --fields errors So that we can get it working for TUI, where using just pr_err() would end up making the message emitted to stderr to be erased by the TUI exit routine restoring the terminal to its previous state. Now we can see that trying to use a tracepoint field as one of the --field entries isn't working: # perf top --stdio --no-children -e syscalls:sys_enter_write --fields pid,sym,count Error: Unknown --fields key: `count' Usage: perf top [] --fields output field(s): overhead, period, sample plus all of sort keys # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-usy9hhy7umdd4bbblkn63t8w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 211e7f326b9f..2da4d0456a03 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2887,10 +2887,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) tok; tok = strtok_r(NULL, ", ", &tmp)) { ret = output_field_add(list, tok); if (ret == -EINVAL) { - pr_err("Invalid --fields key: `%s'", tok); + ui__error("Invalid --fields key: `%s'", tok); break; } else if (ret == -ESRCH) { - pr_err("Unknown --fields key: `%s'", tok); + ui__error("Unknown --fields key: `%s'", tok); break; } } -- cgit v1.2.3 From a52a69ea89dc12e6f4572f554940789c1ab23c7a Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 13 Jan 2018 12:05:17 +0100 Subject: block, bfq: limit tags for writes and async I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Asynchronous I/O can easily starve synchronous I/O (both sync reads and sync writes), by consuming all request tags. Similarly, storms of synchronous writes, such as those that sync(2) may trigger, can starve synchronous reads. In their turn, these two problems may also cause BFQ to loose control on latency for interactive and soft real-time applications. For example, on a PLEXTOR PX-256M5S SSD, LibreOffice Writer takes 0.6 seconds to start if the device is idle, but it takes more than 45 seconds (!) if there are sequential writes in the background. This commit addresses this issue by limiting the maximum percentage of tags that asynchronous I/O requests and synchronous write requests can consume. In particular, this commit grants a higher threshold to synchronous writes, to prevent the latter from being starved by asynchronous I/O. According to the above test, LibreOffice Writer now starts in about 1.2 seconds on average, regardless of the background workload, and apart from some rare outlier. To check this improvement, run, e.g., sudo ./comm_startup_lat.sh bfq 5 5 seq 10 "lowriter --terminate_after_init" for the comm_startup_lat benchmark in the S suite [1]. [1] https://github.com/Algodev-github/S Tested-by: Oleksandr Natalenko Tested-by: Holger Hoffstätte Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ block/bfq-iosched.h | 12 +++++++++ 2 files changed, 89 insertions(+) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index f352b1677143..a7ab0cb50733 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -417,6 +417,82 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, } } +/* + * See the comments on bfq_limit_depth for the purpose of + * the depths set in the function. + */ +static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt) +{ + bfqd->sb_shift = bt->sb.shift; + + /* + * In-word depths if no bfq_queue is being weight-raised: + * leaving 25% of tags only for sync reads. + * + * In next formulas, right-shift the value + * (1U<sb_shift), instead of computing directly + * (1U<<(bfqd->sb_shift - something)), to be robust against + * any possible value of bfqd->sb_shift, without having to + * limit 'something'. + */ + /* no more than 50% of tags for async I/O */ + bfqd->word_depths[0][0] = max((1U<sb_shift)>>1, 1U); + /* + * no more than 75% of tags for sync writes (25% extra tags + * w.r.t. async I/O, to prevent async I/O from starving sync + * writes) + */ + bfqd->word_depths[0][1] = max(((1U<sb_shift) * 3)>>2, 1U); + + /* + * In-word depths in case some bfq_queue is being weight- + * raised: leaving ~63% of tags for sync reads. This is the + * highest percentage for which, in our tests, application + * start-up times didn't suffer from any regression due to tag + * shortage. + */ + /* no more than ~18% of tags for async I/O */ + bfqd->word_depths[1][0] = max(((1U<sb_shift) * 3)>>4, 1U); + /* no more than ~37% of tags for sync writes (~20% extra tags) */ + bfqd->word_depths[1][1] = max(((1U<sb_shift) * 6)>>4, 1U); +} + +/* + * Async I/O can easily starve sync I/O (both sync reads and sync + * writes), by consuming all tags. Similarly, storms of sync writes, + * such as those that sync(2) may trigger, can starve sync reads. + * Limit depths of async I/O and sync writes so as to counter both + * problems. + */ +static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +{ + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct bfq_data *bfqd = data->q->elevator->elevator_data; + struct sbitmap_queue *bt; + + if (op_is_sync(op) && !op_is_write(op)) + return; + + if (data->flags & BLK_MQ_REQ_RESERVED) { + if (unlikely(!tags->nr_reserved_tags)) { + WARN_ON_ONCE(1); + return; + } + bt = &tags->breserved_tags; + } else + bt = &tags->bitmap_tags; + + if (unlikely(bfqd->sb_shift != bt->sb.shift)) + bfq_update_depths(bfqd, bt); + + data->shallow_depth = + bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + + bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", + __func__, bfqd->wr_busy_queues, op_is_sync(op), + data->shallow_depth); +} + static struct bfq_queue * bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, sector_t sector, struct rb_node **ret_parent, @@ -5285,6 +5361,7 @@ static struct elv_fs_entry bfq_attrs[] = { static struct elevator_type iosched_bfq_mq = { .ops.mq = { + .limit_depth = bfq_limit_depth, .prepare_request = bfq_prepare_request, .finish_request = bfq_finish_request, .exit_icq = bfq_exit_icq, diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 5d47b58d5fc8..fcd941008127 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -629,6 +629,18 @@ struct bfq_data { struct bfq_io_cq *bio_bic; /* bfqq associated with the task issuing current bio for merging */ struct bfq_queue *bio_bfqq; + + /* + * Cached sbitmap shift, used to compute depth limits in + * bfq_update_depths. + */ + unsigned int sb_shift; + + /* + * Depth limits used in bfq_limit_depth (see comments on the + * function) + */ + unsigned int word_depths[2][2]; }; enum bfqq_state_flags { -- cgit v1.2.3 From 8a8747dc01cee6f92a52c03ba686e9f60cb46c87 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 13 Jan 2018 12:05:18 +0100 Subject: block, bfq: limit sectors served with interactive weight raising MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To maximise responsiveness, BFQ raises the weight, and performs device idling, for bfq_queues associated with processes deemed as interactive. In particular, weight raising has a maximum duration, equal to the time needed to start a large application. If a weight-raised process goes on doing I/O beyond this maximum duration, it loses weight-raising. This mechanism is evidently vulnerable to the following false positives: I/O-bound applications that will go on doing I/O for much longer than the duration of weight-raising. These applications have basically no benefit from being weight-raised at the beginning of their I/O. On the opposite end, while being weight-raised, these applications a) unjustly steal throughput to applications that may truly need low latency; b) make BFQ uselessly perform device idling; device idling results in loss of device throughput with most flash-based storage, and may increase latencies when used purposelessly. This commit adds a countermeasure to reduce both the above problems. To introduce this countermeasure, we provide the following extra piece of information (full details in the comments added by this commit). During the start-up of the large application used as a reference to set the duration of weight-raising, involved processes transfer at most ~110K sectors each. Accordingly, a process initially deemed as interactive has no right to be weight-raised any longer, once transferred 110K sectors or more. Basing on this consideration, this commit early-ends weight-raising for a bfq_queue if the latter happens to have received an amount of service at least equal to 110K sectors (actually, a little bit more, to keep a safety margin). I/O-bound applications that reach a high throughput, such as file copy, get to this threshold much before the allowed weight-raising period finishes. Thus this early ending of weight-raising reduces the amount of time during which these applications cause the problems described above. Tested-by: Oleksandr Natalenko Tested-by: Holger Hoffstätte Signed-off-by: Paolo Valente Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++------ block/bfq-iosched.h | 5 ++++ block/bfq-wf2q.c | 3 ++ 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index a7ab0cb50733..47e6ec7427c4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -209,15 +209,17 @@ static struct kmem_cache *bfq_pool; * interactive applications automatically, using the following formula: * duration = (R / r) * T, where r is the peak rate of the device, and * R and T are two reference parameters. - * In particular, R is the peak rate of the reference device (see below), - * and T is a reference time: given the systems that are likely to be - * installed on the reference device according to its speed class, T is - * about the maximum time needed, under BFQ and while reading two files in - * parallel, to load typical large applications on these systems. - * In practice, the slower/faster the device at hand is, the more/less it - * takes to load applications with respect to the reference device. - * Accordingly, the longer/shorter BFQ grants weight raising to interactive - * applications. + * In particular, R is the peak rate of the reference device (see + * below), and T is a reference time: given the systems that are + * likely to be installed on the reference device according to its + * speed class, T is about the maximum time needed, under BFQ and + * while reading two files in parallel, to load typical large + * applications on these systems (see the comments on + * max_service_from_wr below, for more details on how T is obtained). + * In practice, the slower/faster the device at hand is, the more/less + * it takes to load applications with respect to the reference device. + * Accordingly, the longer/shorter BFQ grants weight raising to + * interactive applications. * * BFQ uses four different reference pairs (R, T), depending on: * . whether the device is rotational or non-rotational; @@ -254,6 +256,60 @@ static int T_slow[2]; static int T_fast[2]; static int device_speed_thresh[2]; +/* + * BFQ uses the above-detailed, time-based weight-raising mechanism to + * privilege interactive tasks. This mechanism is vulnerable to the + * following false positives: I/O-bound applications that will go on + * doing I/O for much longer than the duration of weight + * raising. These applications have basically no benefit from being + * weight-raised at the beginning of their I/O. On the opposite end, + * while being weight-raised, these applications + * a) unjustly steal throughput to applications that may actually need + * low latency; + * b) make BFQ uselessly perform device idling; device idling results + * in loss of device throughput with most flash-based storage, and may + * increase latencies when used purposelessly. + * + * BFQ tries to reduce these problems, by adopting the following + * countermeasure. To introduce this countermeasure, we need first to + * finish explaining how the duration of weight-raising for + * interactive tasks is computed. + * + * For a bfq_queue deemed as interactive, the duration of weight + * raising is dynamically adjusted, as a function of the estimated + * peak rate of the device, so as to be equal to the time needed to + * execute the 'largest' interactive task we benchmarked so far. By + * largest task, we mean the task for which each involved process has + * to do more I/O than for any of the other tasks we benchmarked. This + * reference interactive task is the start-up of LibreOffice Writer, + * and in this task each process/bfq_queue needs to have at most ~110K + * sectors transferred. + * + * This last piece of information enables BFQ to reduce the actual + * duration of weight-raising for at least one class of I/O-bound + * applications: those doing sequential or quasi-sequential I/O. An + * example is file copy. In fact, once started, the main I/O-bound + * processes of these applications usually consume the above 110K + * sectors in much less time than the processes of an application that + * is starting, because these I/O-bound processes will greedily devote + * almost all their CPU cycles only to their target, + * throughput-friendly I/O operations. This is even more true if BFQ + * happens to be underestimating the device peak rate, and thus + * overestimating the duration of weight raising. But, according to + * our measurements, once transferred 110K sectors, these processes + * have no right to be weight-raised any longer. + * + * Basing on the last consideration, BFQ ends weight-raising for a + * bfq_queue if the latter happens to have received an amount of + * service at least equal to the following constant. The constant is + * set to slightly more than 110K, to have a minimum safety margin. + * + * This early ending of weight-raising reduces the amount of time + * during which interactive false positives cause the two problems + * described at the beginning of these comments. + */ +static const unsigned long max_service_from_wr = 120000; + #define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0]) #define RQ_BFQQ(rq) ((rq)->elv.priv[1]) @@ -1352,6 +1408,7 @@ static void bfq_update_bfqq_wr_on_rq_arrival(struct bfq_data *bfqd, if (old_wr_coeff == 1 && wr_or_deserves_wr) { /* start a weight-raising period */ if (interactive) { + bfqq->service_from_wr = 0; bfqq->wr_coeff = bfqd->bfq_wr_coeff; bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); } else { @@ -3665,6 +3722,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->entity.prio_changed = 1; } } + if (bfqq->wr_coeff > 1 && + bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time && + bfqq->service_from_wr > max_service_from_wr) { + /* see comments on max_service_from_wr */ + bfq_bfqq_end_wr(bfqq); + } } /* * To improve latency (for this or other queues), immediately diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index fcd941008127..350c39ae2896 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -337,6 +337,11 @@ struct bfq_queue { * last transition from idle to backlogged. */ unsigned long service_from_backlogged; + /* + * Cumulative service received from the @bfq_queue since its + * last transition to weight-raised state. + */ + unsigned long service_from_wr; /* * Value of wr start time when switching to soft rt diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 4456eda34e48..4498c43245e2 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -838,6 +838,9 @@ void bfq_bfqq_served(struct bfq_queue *bfqq, int served) if (!bfqq->service_from_backlogged) bfqq->first_IO_time = jiffies; + if (bfqq->wr_coeff > 1) + bfqq->service_from_wr += served; + bfqq->service_from_backlogged += served; for_each_entity(entity) { st = bfq_entity_service_tree(entity); -- cgit v1.2.3 From 7233982ade15eeac05c6f351e8d347406e6bcd2f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 18 Jan 2018 14:05:05 +0000 Subject: mtd: ubi: wl: Fix error return code in ubi_wl_init() Fix to return error code -ENOMEM from the kmem_cache_alloc() error handling case instead of 0, as done elsewhere in this function. Fixes: f78e5623f45b ("ubi: fastmap: Erase outdated anchor PEBs during attach") Signed-off-by: Wei Yongjun Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/wl.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 77ab49f2743b..2052a647220e 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1617,8 +1617,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + if (!e) { + err = -ENOMEM; goto out_free; + } e->pnum = aeb->pnum; e->ec = aeb->ec; @@ -1637,8 +1639,10 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) cond_resched(); e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); - if (!e) + if (!e) { + err = -ENOMEM; goto out_free; + } e->pnum = aeb->pnum; e->ec = aeb->ec; -- cgit v1.2.3 From 7f29ae9f977bcdc3654e68bc36d170223c52fd48 Mon Sep 17 00:00:00 2001 From: Bradley Bolen Date: Thu, 18 Jan 2018 08:55:20 -0500 Subject: ubi: block: Fix locking for idr_alloc/idr_remove This fixes a race with idr_alloc where gd->first_minor can be set to the same value for two simultaneous calls to ubiblock_create. Each instance calls device_add_disk with the same first_minor. device_add_disk calls bdi_register_owner which generates several warnings. WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/devices/virtual/bdi/252:2' WARNING: CPU: 1 PID: 179 at kernel-source/lib/kobject.c:240 kobject_add_internal+0x1ec/0x2f8 kobject_add_internal failed for 252:2 with -EEXIST, don't try to register things with the same name in the same directory WARNING: CPU: 1 PID: 179 at kernel-source/fs/sysfs/dir.c:31 sysfs_warn_dup+0x68/0x88 sysfs: cannot create duplicate filename '/dev/block/252:2' However, device_add_disk does not error out when bdi_register_owner returns an error. Control continues until reaching blk_register_queue. It then BUGs. kernel BUG at kernel-source/fs/sysfs/group.c:113! [] (internal_create_group) from [] (sysfs_create_group+0x20/0x24) [] (sysfs_create_group) from [] (blk_trace_init_sysfs+0x18/0x20) [] (blk_trace_init_sysfs) from [] (blk_register_queue+0xd8/0x154) [] (blk_register_queue) from [] (device_add_disk+0x194/0x44c) [] (device_add_disk) from [] (ubiblock_create+0x284/0x2e0) [] (ubiblock_create) from [] (vol_cdev_ioctl+0x450/0x554) [] (vol_cdev_ioctl) from [] (vfs_ioctl+0x30/0x44) [] (vfs_ioctl) from [] (do_vfs_ioctl+0xa0/0x790) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x44/0x68) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x34) Locking idr_alloc/idr_remove removes the race and keeps gd->first_minor unique. Fixes: 2bf50d42f3a4 ("UBI: block: Dynamically allocate minor numbers") Cc: stable@vger.kernel.org Signed-off-by: Bradley Bolen Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/block.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index b210fdb31c98..b1fc28f63882 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -99,6 +99,8 @@ struct ubiblock { /* Linked list of all ubiblock instances */ static LIST_HEAD(ubiblock_devices); +static DEFINE_IDR(ubiblock_minor_idr); +/* Protects ubiblock_devices and ubiblock_minor_idr */ static DEFINE_MUTEX(devices_mutex); static int ubiblock_major; @@ -351,8 +353,6 @@ static const struct blk_mq_ops ubiblock_mq_ops = { .init_request = ubiblock_init_request, }; -static DEFINE_IDR(ubiblock_minor_idr); - int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; @@ -365,14 +365,15 @@ int ubiblock_create(struct ubi_volume_info *vi) /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { - mutex_unlock(&devices_mutex); - return -EEXIST; + ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&devices_mutex); dev = kzalloc(sizeof(struct ubiblock), GFP_KERNEL); - if (!dev) - return -ENOMEM; + if (!dev) { + ret = -ENOMEM; + goto out_unlock; + } mutex_init(&dev->dev_mutex); @@ -437,14 +438,13 @@ int ubiblock_create(struct ubi_volume_info *vi) goto out_free_queue; } - mutex_lock(&devices_mutex); list_add_tail(&dev->list, &ubiblock_devices); - mutex_unlock(&devices_mutex); /* Must be the last step: anyone can call file ops from now on */ add_disk(dev->gd); dev_info(disk_to_dev(dev->gd), "created from ubi%d:%d(%s)", dev->ubi_num, dev->vol_id, vi->name); + mutex_unlock(&devices_mutex); return 0; out_free_queue: @@ -457,6 +457,8 @@ out_put_disk: put_disk(dev->gd); out_free_dev: kfree(dev); +out_unlock: + mutex_unlock(&devices_mutex); return ret; } @@ -478,30 +480,36 @@ static void ubiblock_cleanup(struct ubiblock *dev) int ubiblock_remove(struct ubi_volume_info *vi) { struct ubiblock *dev; + int ret; mutex_lock(&devices_mutex); dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { - mutex_unlock(&devices_mutex); - return -ENODEV; + ret = -ENODEV; + goto out_unlock; } /* Found a device, let's lock it so we can check if it's busy */ mutex_lock(&dev->dev_mutex); if (dev->refcnt > 0) { - mutex_unlock(&dev->dev_mutex); - mutex_unlock(&devices_mutex); - return -EBUSY; + ret = -EBUSY; + goto out_unlock_dev; } /* Remove from device list */ list_del(&dev->list); - mutex_unlock(&devices_mutex); - ubiblock_cleanup(dev); mutex_unlock(&dev->dev_mutex); + mutex_unlock(&devices_mutex); + kfree(dev); return 0; + +out_unlock_dev: + mutex_unlock(&dev->dev_mutex); +out_unlock: + mutex_unlock(&devices_mutex); + return ret; } static int ubiblock_resize(struct ubi_volume_info *vi) @@ -630,6 +638,7 @@ static void ubiblock_remove_all(void) struct ubiblock *next; struct ubiblock *dev; + mutex_lock(&devices_mutex); list_for_each_entry_safe(dev, next, &ubiblock_devices, list) { /* The module is being forcefully removed */ WARN_ON(dev->desc); @@ -638,6 +647,7 @@ static void ubiblock_remove_all(void) ubiblock_cleanup(dev); kfree(dev); } + mutex_unlock(&devices_mutex); } int __init ubiblock_init(void) -- cgit v1.2.3 From 872523233d640c21ce13ea51269c5c031ebb2f78 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jan 2018 13:07:00 -0300 Subject: perf bpf: Don't warn about unavailability of builtin clang, just fallback When clang is not linked with 'perf' we should just add a debug message about that before doing the fallback to calling the external compiler. I.e. just the "-95" warning below gets turned into a debug message: # cat sys_enter_open.c #include "bpf.h" SEC("syscalls:sys_enter_open") int func(void *ctx) { struct { char *ptr; char path[256]; } filename = { .ptr = *((char **)(ctx + 16)), }; int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr); if (len > 0) { if (len == 1) perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr)); else if (len < 256) perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr)); } return 0; } # trace -e open,sys_enter_open.c bpf: builtin compilation failed: -95, try external compiler 0.000 ( ): __bpf_stdout__:@......./proc/self/task/11160/comm..) 0.014 ( 0.116 ms): qemu-system-x8/6721 open(filename: /proc/self/task/11160/comm, flags: RDWR) = 91 2335.411 ( ): __bpf_stdout__:FB..~.../etc/resolv.conf....) 2335.421 ( 0.030 ms): chronyd/883 open(filename: /etc/resolv.conf, flags: CLOEXEC) = 5 ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-z5aak9oay448ffj37giz94yr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 72c107fcbc5a..ab2598af91eb 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -94,7 +94,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); perf_clang__cleanup(); if (err) { - pr_warning("bpf: builtin compilation failed: %d, try external compiler\n", err); + pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err); err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); if (err) return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); -- cgit v1.2.3 From 6ea9cdf3912a8ca532dde5296a0e81fc75a40fa3 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jan 2018 15:34:17 +0100 Subject: mmc: mmci: Don't pretend all variants to have MMCIMASK1 register Two mask registers are used in order to select which events have to actually generate an interrupt on each IRQ line. It seems that in the single-IRQ case it's assumed that the IRQs lines are simply OR-ed, while the two mask registers are still present. The driver still programs the two mask registers separately. However the STM32 variant has only one IRQ, and also has only one mask register. This patch prepares for STM32 variant support by making the driver using only one mask register. This patch also optimize the MMCIMASK1 mask usage by caching it into host->mask1_reg which avoid to read it into mmci_irq(). Tested only on STM32 variant. RFT for variants other than STM32 Signed-off-by: Andrea Merello Signed-off-by: Patrice Chotard Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 28 ++++++++++++++++++++++++---- drivers/mmc/host/mmci.h | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index e8a1bb1ae694..bc7669d50c38 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -82,6 +82,7 @@ static unsigned int fmax = 515633; * @qcom_fifo: enables qcom specific fifo pio read logic. * @qcom_dml: enables qcom specific dma glue for dma transfers. * @reversed_irq_handling: handle data irq before cmd irq. + * @mmcimask1: true if variant have a MMCIMASK1 register. */ struct variant_data { unsigned int clkreg; @@ -111,6 +112,7 @@ struct variant_data { bool qcom_fifo; bool qcom_dml; bool reversed_irq_handling; + bool mmcimask1; }; static struct variant_data variant_arm = { @@ -120,6 +122,7 @@ static struct variant_data variant_arm = { .pwrreg_powerup = MCI_PWR_UP, .f_max = 100000000, .reversed_irq_handling = true, + .mmcimask1 = true, }; static struct variant_data variant_arm_extended_fifo = { @@ -128,6 +131,7 @@ static struct variant_data variant_arm_extended_fifo = { .datalength_bits = 16, .pwrreg_powerup = MCI_PWR_UP, .f_max = 100000000, + .mmcimask1 = true, }; static struct variant_data variant_arm_extended_fifo_hwfc = { @@ -137,6 +141,7 @@ static struct variant_data variant_arm_extended_fifo_hwfc = { .datalength_bits = 16, .pwrreg_powerup = MCI_PWR_UP, .f_max = 100000000, + .mmcimask1 = true, }; static struct variant_data variant_u300 = { @@ -152,6 +157,7 @@ static struct variant_data variant_u300 = { .signal_direction = true, .pwrreg_clkgate = true, .pwrreg_nopower = true, + .mmcimask1 = true, }; static struct variant_data variant_nomadik = { @@ -168,6 +174,7 @@ static struct variant_data variant_nomadik = { .signal_direction = true, .pwrreg_clkgate = true, .pwrreg_nopower = true, + .mmcimask1 = true, }; static struct variant_data variant_ux500 = { @@ -190,6 +197,7 @@ static struct variant_data variant_ux500 = { .busy_detect_flag = MCI_ST_CARDBUSY, .busy_detect_mask = MCI_ST_BUSYENDMASK, .pwrreg_nopower = true, + .mmcimask1 = true, }; static struct variant_data variant_ux500v2 = { @@ -214,6 +222,7 @@ static struct variant_data variant_ux500v2 = { .busy_detect_flag = MCI_ST_CARDBUSY, .busy_detect_mask = MCI_ST_BUSYENDMASK, .pwrreg_nopower = true, + .mmcimask1 = true, }; static struct variant_data variant_qcom = { @@ -232,6 +241,7 @@ static struct variant_data variant_qcom = { .explicit_mclk_control = true, .qcom_fifo = true, .qcom_dml = true, + .mmcimask1 = true, }; /* Busy detection for the ST Micro variant */ @@ -396,6 +406,7 @@ mmci_request_end(struct mmci_host *host, struct mmc_request *mrq) static void mmci_set_mask1(struct mmci_host *host, unsigned int mask) { void __iomem *base = host->base; + struct variant_data *variant = host->variant; if (host->singleirq) { unsigned int mask0 = readl(base + MMCIMASK0); @@ -406,7 +417,10 @@ static void mmci_set_mask1(struct mmci_host *host, unsigned int mask) writel(mask0, base + MMCIMASK0); } - writel(mask, base + MMCIMASK1); + if (variant->mmcimask1) + writel(mask, base + MMCIMASK1); + + host->mask1_reg = mask; } static void mmci_stop_data(struct mmci_host *host) @@ -1286,7 +1300,7 @@ static irqreturn_t mmci_irq(int irq, void *dev_id) status = readl(host->base + MMCISTATUS); if (host->singleirq) { - if (status & readl(host->base + MMCIMASK1)) + if (status & host->mask1_reg) mmci_pio_irq(irq, dev_id); status &= ~MCI_IRQ1MASK; @@ -1729,7 +1743,10 @@ static int mmci_probe(struct amba_device *dev, spin_lock_init(&host->lock); writel(0, host->base + MMCIMASK0); - writel(0, host->base + MMCIMASK1); + + if (variant->mmcimask1) + writel(0, host->base + MMCIMASK1); + writel(0xfff, host->base + MMCICLEAR); /* @@ -1809,6 +1826,7 @@ static int mmci_remove(struct amba_device *dev) if (mmc) { struct mmci_host *host = mmc_priv(mmc); + struct variant_data *variant = host->variant; /* * Undo pm_runtime_put() in probe. We use the _sync @@ -1819,7 +1837,9 @@ static int mmci_remove(struct amba_device *dev) mmc_remove_host(mmc); writel(0, host->base + MMCIMASK0); - writel(0, host->base + MMCIMASK1); + + if (variant->mmcimask1) + writel(0, host->base + MMCIMASK1); writel(0, host->base + MMCICOMMAND); writel(0, host->base + MMCIDATACTRL); diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 4a8bef1aac8f..83160a9c4c77 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -223,6 +223,7 @@ struct mmci_host { u32 clk_reg; u32 datactrl_reg; u32 busy_status; + u32 mask1_reg; bool vqmmc_enabled; struct mmci_platform_data *plat; struct variant_data *variant; -- cgit v1.2.3 From 7f7b55036c567cffbb2cea4a35a971587240e6bd Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jan 2018 15:34:18 +0100 Subject: mmc: mmci: Don't pretend all variants to have MCI_STARBITERR flag This patch prepares for supporting the STM32 variant that has no such bit in the status register. Signed-off-by: Andrea Merello Signed-off-by: Patrice Chotard Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index bc7669d50c38..91a35b8dffc2 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -83,6 +83,8 @@ static unsigned int fmax = 515633; * @qcom_dml: enables qcom specific dma glue for dma transfers. * @reversed_irq_handling: handle data irq before cmd irq. * @mmcimask1: true if variant have a MMCIMASK1 register. + * @start_err: bitmask identifying the STARTBITERR bit inside MMCISTATUS + * register. */ struct variant_data { unsigned int clkreg; @@ -113,6 +115,7 @@ struct variant_data { bool qcom_dml; bool reversed_irq_handling; bool mmcimask1; + u32 start_err; }; static struct variant_data variant_arm = { @@ -123,6 +126,7 @@ static struct variant_data variant_arm = { .f_max = 100000000, .reversed_irq_handling = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_arm_extended_fifo = { @@ -132,6 +136,7 @@ static struct variant_data variant_arm_extended_fifo = { .pwrreg_powerup = MCI_PWR_UP, .f_max = 100000000, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_arm_extended_fifo_hwfc = { @@ -142,6 +147,7 @@ static struct variant_data variant_arm_extended_fifo_hwfc = { .pwrreg_powerup = MCI_PWR_UP, .f_max = 100000000, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_u300 = { @@ -158,6 +164,7 @@ static struct variant_data variant_u300 = { .pwrreg_clkgate = true, .pwrreg_nopower = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_nomadik = { @@ -175,6 +182,7 @@ static struct variant_data variant_nomadik = { .pwrreg_clkgate = true, .pwrreg_nopower = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_ux500 = { @@ -198,6 +206,7 @@ static struct variant_data variant_ux500 = { .busy_detect_mask = MCI_ST_BUSYENDMASK, .pwrreg_nopower = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_ux500v2 = { @@ -223,6 +232,7 @@ static struct variant_data variant_ux500v2 = { .busy_detect_mask = MCI_ST_BUSYENDMASK, .pwrreg_nopower = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; static struct variant_data variant_qcom = { @@ -242,6 +252,7 @@ static struct variant_data variant_qcom = { .qcom_fifo = true, .qcom_dml = true, .mmcimask1 = true, + .start_err = MCI_STARTBITERR, }; /* Busy detection for the ST Micro variant */ @@ -935,8 +946,9 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data, return; /* First check for errors */ - if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR| - MCI_TXUNDERRUN|MCI_RXOVERRUN)) { + if (status & (MCI_DATACRCFAIL | MCI_DATATIMEOUT | + host->variant->start_err | + MCI_TXUNDERRUN | MCI_RXOVERRUN)) { u32 remain, success; /* Terminate the DMA transfer */ -- cgit v1.2.3 From 11dfb9701175ead45be9f6621619fc67598ed4ec Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jan 2018 15:34:19 +0100 Subject: mmc: mmci: Don't pretend all variants to have OPENDRAIN bit This patch prepares for supporting STM32 variant which doesn't have opendrain bit in MMCIPOWER register. ST others variant (u300, nomadik and ux500) uses MCI_OD bit whereas others variants uses MCI_ROD bit. Signed-off-by: Patrice Chotard Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 91a35b8dffc2..c1123f644959 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -85,6 +85,7 @@ static unsigned int fmax = 515633; * @mmcimask1: true if variant have a MMCIMASK1 register. * @start_err: bitmask identifying the STARTBITERR bit inside MMCISTATUS * register. + * @opendrain: bitmask identifying the OPENDRAIN bit inside MMCIPOWER register */ struct variant_data { unsigned int clkreg; @@ -116,6 +117,7 @@ struct variant_data { bool reversed_irq_handling; bool mmcimask1; u32 start_err; + u32 opendrain; }; static struct variant_data variant_arm = { @@ -127,6 +129,7 @@ static struct variant_data variant_arm = { .reversed_irq_handling = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_ROD, }; static struct variant_data variant_arm_extended_fifo = { @@ -137,6 +140,7 @@ static struct variant_data variant_arm_extended_fifo = { .f_max = 100000000, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_ROD, }; static struct variant_data variant_arm_extended_fifo_hwfc = { @@ -148,6 +152,7 @@ static struct variant_data variant_arm_extended_fifo_hwfc = { .f_max = 100000000, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_ROD, }; static struct variant_data variant_u300 = { @@ -165,6 +170,7 @@ static struct variant_data variant_u300 = { .pwrreg_nopower = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_OD, }; static struct variant_data variant_nomadik = { @@ -183,6 +189,7 @@ static struct variant_data variant_nomadik = { .pwrreg_nopower = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_OD, }; static struct variant_data variant_ux500 = { @@ -207,6 +214,7 @@ static struct variant_data variant_ux500 = { .pwrreg_nopower = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_OD, }; static struct variant_data variant_ux500v2 = { @@ -233,6 +241,7 @@ static struct variant_data variant_ux500v2 = { .pwrreg_nopower = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_OD, }; static struct variant_data variant_qcom = { @@ -253,6 +262,7 @@ static struct variant_data variant_qcom = { .qcom_dml = true, .mmcimask1 = true, .start_err = MCI_STARTBITERR, + .opendrain = MCI_ROD, }; /* Busy detection for the ST Micro variant */ @@ -1455,17 +1465,8 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) ~MCI_ST_DATA2DIREN); } - if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) { - if (host->hw_designer != AMBA_VENDOR_ST) - pwr |= MCI_ROD; - else { - /* - * The ST Micro variant use the ROD bit for something - * else and only has OD (Open Drain). - */ - pwr |= MCI_OD; - } - } + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN && variant->opendrain) + pwr |= variant->opendrain; /* * If clock = 0 and the variant requires the MMCIPOWER to be used for -- cgit v1.2.3 From f9bb304ce855fad615c5adffae5e129941ff0b48 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jan 2018 15:34:20 +0100 Subject: mmc: mmci: Add support for setting pad type via pinctrl If variant hasn't the control bit to switch pads in opendrain mode, we can achieve the same result by asking to the pinmux driver to configure pins for us. This patch make the mmci driver able to do this whenever needed. Signed-off-by: Andrea Merello Signed-off-by: Patrice Chotard Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 41 +++++++++++++++++++++++++++++++++++++++-- drivers/mmc/host/mmci.h | 5 +++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index c1123f644959..f8a21f3c3b51 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1465,8 +1465,19 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) ~MCI_ST_DATA2DIREN); } - if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN && variant->opendrain) - pwr |= variant->opendrain; + if (variant->opendrain) { + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) + pwr |= variant->opendrain; + } else { + /* + * If the variant cannot configure the pads by its own, then we + * expect the pinctrl to be able to do that for us + */ + if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) + pinctrl_select_state(host->pinctrl, host->pins_opendrain); + else + pinctrl_select_state(host->pinctrl, host->pins_default); + } /* * If clock = 0 and the variant requires the MMCIPOWER to be used for @@ -1610,6 +1621,32 @@ static int mmci_probe(struct amba_device *dev, host = mmc_priv(mmc); host->mmc = mmc; + /* + * Some variant (STM32) doesn't have opendrain bit, nevertheless + * pins can be set accordingly using pinctrl + */ + if (!variant->opendrain) { + host->pinctrl = devm_pinctrl_get(&dev->dev); + if (IS_ERR(host->pinctrl)) { + dev_err(&dev->dev, "failed to get pinctrl"); + goto host_free; + } + + host->pins_default = pinctrl_lookup_state(host->pinctrl, + PINCTRL_STATE_DEFAULT); + if (IS_ERR(host->pins_default)) { + dev_err(mmc_dev(mmc), "Can't select default pins\n"); + goto host_free; + } + + host->pins_opendrain = pinctrl_lookup_state(host->pinctrl, + MMCI_PINCTRL_STATE_OPENDRAIN); + if (IS_ERR(host->pins_opendrain)) { + dev_err(mmc_dev(mmc), "Can't select opendrain pins\n"); + goto host_free; + } + } + host->hw_designer = amba_manf(dev); host->hw_revision = amba_rev(dev); dev_dbg(mmc_dev(mmc), "designer ID = 0x%02x\n", host->hw_designer); diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 83160a9c4c77..f91cdf7f6dae 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -192,6 +192,8 @@ #define NR_SG 128 +#define MMCI_PINCTRL_STATE_OPENDRAIN "opendrain" + struct clk; struct variant_data; struct dma_chan; @@ -227,6 +229,9 @@ struct mmci_host { bool vqmmc_enabled; struct mmci_platform_data *plat; struct variant_data *variant; + struct pinctrl *pinctrl; + struct pinctrl_state *pins_default; + struct pinctrl_state *pins_opendrain; u8 hw_designer; u8 hw_revision:4; -- cgit v1.2.3 From 2a9d6c8060894ce06855b09d62be64110e48f27e Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jan 2018 15:34:21 +0100 Subject: mmc: mmci: Add STM32 variant STM32F4 and STM32F7 MCUs has a SDIO controller that looks like an ARM PL810. This patch adds the STM32 variant so that mmci driver supports it. Signed-off-by: Andrea Merello Signed-off-by: Patrice Chotard Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index f8a21f3c3b51..6246eaada750 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -244,6 +244,23 @@ static struct variant_data variant_ux500v2 = { .opendrain = MCI_OD, }; +static struct variant_data variant_stm32 = { + .fifosize = 32 * 4, + .fifohalfsize = 8 * 4, + .clkreg = MCI_CLK_ENABLE, + .clkreg_enable = MCI_ST_UX500_HWFCEN, + .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS, + .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE, + .datalength_bits = 24, + .datactrl_mask_sdio = MCI_DPSM_ST_SDIOEN, + .st_sdio = true, + .st_clkdiv = true, + .pwrreg_powerup = MCI_PWR_ON, + .f_max = 48000000, + .pwrreg_clkgate = true, + .pwrreg_nopower = true, +}; + static struct variant_data variant_qcom = { .fifosize = 16 * 4, .fifohalfsize = 8 * 4, @@ -2021,6 +2038,11 @@ static const struct amba_id mmci_ids[] = { .mask = 0xf0ffffff, .data = &variant_ux500v2, }, + { + .id = 0x00880180, + .mask = 0x00ffffff, + .data = &variant_stm32, + }, /* Qualcomm variants */ { .id = 0x00051180, -- cgit v1.2.3 From 55edde9fff1ae4114c893c572e641620c76c9c21 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 18 Jan 2018 11:36:41 -0800 Subject: Input: synaptics-rmi4 - prevent UAF reported by KASAN KASAN found a UAF due to dangling pointer. As the report below says, rmi_f11_attention() accesses drvdata->attn_data.data, which was freed in rmi_irq_fn. [ 311.424062] BUG: KASAN: use-after-free in rmi_f11_attention+0x526/0x5e0 [rmi_core] [ 311.424067] Read of size 27 at addr ffff88041fd610db by task irq/131-i2c_hid/1162 [ 311.424075] CPU: 0 PID: 1162 Comm: irq/131-i2c_hid Not tainted 4.15.0-rc8+ #2 [ 311.424076] Hardware name: Razer Blade Stealth/Razer, BIOS 6.05 01/26/2017 [ 311.424078] Call Trace: [ 311.424086] dump_stack+0xae/0x12d [ 311.424090] ? _atomic_dec_and_lock+0x103/0x103 [ 311.424094] ? show_regs_print_info+0xa/0xa [ 311.424099] ? input_handle_event+0x10b/0x810 [ 311.424104] print_address_description+0x65/0x229 [ 311.424108] kasan_report.cold.5+0xa7/0x281 [ 311.424117] rmi_f11_attention+0x526/0x5e0 [rmi_core] [ 311.424123] ? memcpy+0x1f/0x50 [ 311.424132] ? rmi_f11_attention+0x526/0x5e0 [rmi_core] [ 311.424143] ? rmi_f11_probe+0x1e20/0x1e20 [rmi_core] [ 311.424153] ? rmi_process_interrupt_requests+0x220/0x2a0 [rmi_core] [ 311.424163] ? rmi_irq_fn+0x22c/0x270 [rmi_core] [ 311.424173] ? rmi_process_interrupt_requests+0x2a0/0x2a0 [rmi_core] [ 311.424177] ? free_irq+0xa0/0xa0 [ 311.424180] ? irq_finalize_oneshot.part.39+0xeb/0x180 [ 311.424190] ? rmi_process_interrupt_requests+0x2a0/0x2a0 [rmi_core] [ 311.424193] ? irq_thread_fn+0x3d/0x80 [ 311.424197] ? irq_finalize_oneshot.part.39+0x180/0x180 [ 311.424200] ? irq_thread+0x21d/0x290 [ 311.424203] ? irq_thread_check_affinity+0x170/0x170 [ 311.424207] ? remove_wait_queue+0x150/0x150 [ 311.424212] ? kasan_unpoison_shadow+0x30/0x40 [ 311.424214] ? __init_waitqueue_head+0xa0/0xd0 [ 311.424218] ? task_non_contending.cold.55+0x18/0x18 [ 311.424221] ? irq_forced_thread_fn+0xa0/0xa0 [ 311.424226] ? irq_thread_check_affinity+0x170/0x170 [ 311.424230] ? kthread+0x19e/0x1c0 [ 311.424233] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 311.424237] ? ret_from_fork+0x32/0x40 [ 311.424244] Allocated by task 899: [ 311.424249] kasan_kmalloc+0xbf/0xe0 [ 311.424252] __kmalloc_track_caller+0xd9/0x1f0 [ 311.424255] kmemdup+0x17/0x40 [ 311.424264] rmi_set_attn_data+0xa4/0x1b0 [rmi_core] [ 311.424269] rmi_raw_event+0x10b/0x1f0 [hid_rmi] [ 311.424278] hid_input_report+0x1a8/0x2c0 [hid] [ 311.424283] i2c_hid_irq+0x146/0x1d0 [i2c_hid] [ 311.424286] irq_thread_fn+0x3d/0x80 [ 311.424288] irq_thread+0x21d/0x290 [ 311.424291] kthread+0x19e/0x1c0 [ 311.424293] ret_from_fork+0x32/0x40 [ 311.424296] Freed by task 1162: [ 311.424300] kasan_slab_free+0x71/0xc0 [ 311.424303] kfree+0x90/0x190 [ 311.424311] rmi_irq_fn+0x1b2/0x270 [rmi_core] [ 311.424319] rmi_irq_fn+0x257/0x270 [rmi_core] [ 311.424322] irq_thread_fn+0x3d/0x80 [ 311.424324] irq_thread+0x21d/0x290 [ 311.424327] kthread+0x19e/0x1c0 [ 311.424330] ret_from_fork+0x32/0x40 [ 311.424334] The buggy address belongs to the object at ffff88041fd610c0 which belongs to the cache kmalloc-64 of size 64 [ 311.424340] The buggy address is located 27 bytes inside of 64-byte region [ffff88041fd610c0, ffff88041fd61100) [ 311.424344] The buggy address belongs to the page: [ 311.424348] page:ffffea00107f5840 count:1 mapcount:0 mapping: (null) index:0x0 [ 311.424353] flags: 0x17ffffc0000100(slab) [ 311.424358] raw: 0017ffffc0000100 0000000000000000 0000000000000000 00000001802a002a [ 311.424363] raw: dead000000000100 dead000000000200 ffff8804228036c0 0000000000000000 [ 311.424366] page dumped because: kasan: bad access detected [ 311.424369] Memory state around the buggy address: [ 311.424373] ffff88041fd60f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 311.424377] ffff88041fd61000: fb fb fb fb fb fb fb fb fc fc fc fc fb fb fb fb [ 311.424381] >ffff88041fd61080: fb fb fb fb fc fc fc fc fb fb fb fb fb fb fb fb [ 311.424384] ^ [ 311.424387] ffff88041fd61100: fc fc fc fc fb fb fb fb fb fb fb fb fc fc fc fc [ 311.424391] ffff88041fd61180: fb fb fb fb fb fb fb fb fc fc fc fc fb fb fb fb Cc: stable@vger.kernel.org Signed-off-by: Nick Desaulniers Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 4f2bb5947a4e..141ea228aac6 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -230,8 +230,10 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, "Failed to process interrupt request: %d\n", ret); - if (count) + if (count) { kfree(attn_data.data); + attn_data.data = NULL; + } if (!kfifo_is_empty(&drvdata->attn_fifo)) return rmi_irq_fn(irq, dev_id); -- cgit v1.2.3 From 83d016ac86428dbca8a62d3e4fdc29e3ea39e535 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 17 Jan 2018 11:48:08 -0800 Subject: block: Unexport elv_register_queue() and elv_unregister_queue() These two functions are only called from inside the block layer so unexport them. Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk.h | 3 +++ block/elevator.c | 2 -- include/linux/elevator.h | 2 -- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk.h b/block/blk.h index c84ae0e21ebd..b1771851ed92 100644 --- a/block/blk.h +++ b/block/blk.h @@ -162,6 +162,9 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq e->type->ops.sq.elevator_deactivate_req_fn(q, rq); } +int elv_register_queue(struct request_queue *q); +void elv_unregister_queue(struct request_queue *q); + struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); #ifdef CONFIG_FAIL_IO_TIMEOUT diff --git a/block/elevator.c b/block/elevator.c index 138faeb08a7c..4f00b53cd5fd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -886,7 +886,6 @@ int elv_register_queue(struct request_queue *q) } return error; } -EXPORT_SYMBOL(elv_register_queue); void elv_unregister_queue(struct request_queue *q) { @@ -900,7 +899,6 @@ void elv_unregister_queue(struct request_queue *q) wbt_enable_default(q); } } -EXPORT_SYMBOL(elv_unregister_queue); int elv_register(struct elevator_type *e) { diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 3d794b3dc532..6d9e230dffd2 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -198,8 +198,6 @@ extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -extern int elv_register_queue(struct request_queue *q); -extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, unsigned int); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *q, struct request *rq, -- cgit v1.2.3 From 14a23498ba97683c6790b1bcd8b2cdfe9ad99797 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 17 Jan 2018 11:48:09 -0800 Subject: block: Document scheduler modification locking requirements This patch does not change any functionality. Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/elevator.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/elevator.c b/block/elevator.c index 4f00b53cd5fd..e87e9b43aba0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -869,6 +869,8 @@ int elv_register_queue(struct request_queue *q) struct elevator_queue *e = q->elevator; int error; + lockdep_assert_held(&q->sysfs_lock); + error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); if (!error) { struct elv_fs_entry *attr = e->type->elevator_attrs; @@ -889,6 +891,8 @@ int elv_register_queue(struct request_queue *q) void elv_unregister_queue(struct request_queue *q) { + lockdep_assert_held(&q->sysfs_lock); + if (q) { struct elevator_queue *e = q->elevator; @@ -965,6 +969,8 @@ static int elevator_switch_mq(struct request_queue *q, { int ret; + lockdep_assert_held(&q->sysfs_lock); + blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); @@ -1010,6 +1016,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) bool old_registered = false; int err; + lockdep_assert_held(&q->sysfs_lock); + if (q->mq_ops) return elevator_switch_mq(q, new_e); -- cgit v1.2.3 From 2c2086afc2b8b974fac32cb028e73dc27bfae442 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 17 Jan 2018 11:48:10 -0800 Subject: block: Protect less code with sysfs_lock in blk_{un,}register_queue() The __blk_mq_register_dev(), blk_mq_unregister_dev(), elv_register_queue() and elv_unregister_queue() calls need to be protected with sysfs_lock but other code in these functions not. Hence protect only this code with sysfs_lock. This patch fixes a locking inversion issue in blk_unregister_queue() and also in an error path of blk_register_queue(): it is not allowed to hold sysfs_lock around the kobject_del(&q->kobj) call. Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4a6a40ffd78e..cbea895a5547 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -853,6 +853,10 @@ struct kobj_type blk_queue_ktype = { .release = blk_release_queue, }; +/** + * blk_register_queue - register a block layer queue with sysfs + * @disk: Disk of which the request queue should be registered with sysfs. + */ int blk_register_queue(struct gendisk *disk) { int ret; @@ -909,11 +913,12 @@ int blk_register_queue(struct gendisk *disk) if (q->request_fn || (q->mq_ops && q->elevator)) { ret = elv_register_queue(q); if (ret) { + mutex_unlock(&q->sysfs_lock); kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); - goto unlock; + return ret; } } ret = 0; @@ -923,6 +928,13 @@ unlock: } EXPORT_SYMBOL_GPL(blk_register_queue); +/** + * blk_unregister_queue - counterpart of blk_register_queue() + * @disk: Disk of which the request queue should be unregistered from sysfs. + * + * Note: the caller is responsible for guaranteeing that this function is called + * after blk_register_queue() has finished. + */ void blk_unregister_queue(struct gendisk *disk) { struct request_queue *q = disk->queue; @@ -935,8 +947,9 @@ void blk_unregister_queue(struct gendisk *disk) return; /* - * Protect against the 'queue' kobj being accessed - * while/after it is removed. + * Since sysfs_remove_dir() prevents adding new directory entries + * before removal of existing entries starts, protect against + * concurrent elv_iosched_store() calls. */ mutex_lock(&q->sysfs_lock); @@ -944,18 +957,24 @@ void blk_unregister_queue(struct gendisk *disk) queue_flag_clear(QUEUE_FLAG_REGISTERED, q); spin_unlock_irq(q->queue_lock); - wbt_exit(q); - + /* + * Remove the sysfs attributes before unregistering the queue data + * structures that can be modified through sysfs. + */ if (q->mq_ops) blk_mq_unregister_dev(disk_to_dev(disk), q); - - if (q->request_fn || (q->mq_ops && q->elevator)) - elv_unregister_queue(q); + mutex_unlock(&q->sysfs_lock); kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); - kobject_put(&disk_to_dev(disk)->kobj); + wbt_exit(q); + + mutex_lock(&q->sysfs_lock); + if (q->request_fn || (q->mq_ops && q->elevator)) + elv_unregister_queue(q); mutex_unlock(&q->sysfs_lock); + + kobject_put(&disk_to_dev(disk)->kobj); } -- cgit v1.2.3 From 17534c6f2c065ad8e34ff6f013e5afaa90428512 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Mon, 11 Dec 2017 22:56:25 +0800 Subject: blk-throttle: export io_serviced_recursive, io_service_bytes_recursive export these two interface for cgroup-v1. Acked-by: Tejun Heo Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-throttle.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e6..e8428417ac0a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1510,11 +1510,21 @@ static struct cftype throtl_legacy_files[] = { .private = (unsigned long)&blkcg_policy_throtl, .seq_show = blkg_print_stat_bytes, }, + { + .name = "throttle.io_service_bytes_recursive", + .private = (unsigned long)&blkcg_policy_throtl, + .seq_show = blkg_print_stat_bytes_recursive, + }, { .name = "throttle.io_serviced", .private = (unsigned long)&blkcg_policy_throtl, .seq_show = blkg_print_stat_ios, }, + { + .name = "throttle.io_serviced_recursive", + .private = (unsigned long)&blkcg_policy_throtl, + .seq_show = blkg_print_stat_ios_recursive, + }, { } /* terminate */ }; -- cgit v1.2.3 From 721c7fc701c71f693307d274d2b346a1ecd4a534 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 11 Jan 2018 14:09:11 +0100 Subject: block: fail op_is_write() requests to read-only partitions Regular block device writes go through blkdev_write_iter(), which does bdev_read_only(), while zeroout/discard/etc requests are never checked, both userspace- and kernel-triggered. Add a generic catch-all check to generic_make_request_checks() to actually enforce ioctl(BLKROSET) and set_disk_ro(), which is used by quite a few drivers for things like snapshots, read-only backing files/images, etc. Reviewed-by: Sagi Grimberg Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe --- block/blk-core.c | 56 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 55f338020254..c21a16e9fdf9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2062,6 +2062,21 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ +static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) +{ + if (part->policy && op_is_write(bio_op(bio))) { + char b[BDEVNAME_SIZE]; + + printk(KERN_ERR + "generic_make_request: Trying to write " + "to read-only block-device %s (partno %d)\n", + bio_devname(bio, b), part->partno); + return true; + } + + return false; +} + /* * Remap block n of partition p to block n+start(p) of the disk. */ @@ -2070,27 +2085,28 @@ static inline int blk_partition_remap(struct bio *bio) struct hd_struct *p; int ret = 0; + rcu_read_lock(); + p = __disk_get_part(bio->bi_disk, bio->bi_partno); + if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) || + bio_check_ro(bio, p))) { + ret = -EIO; + goto out; + } + /* * Zone reset does not include bi_size so bio_sectors() is always 0. * Include a test for the reset op code and perform the remap if needed. */ - if (!bio->bi_partno || - (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)) - return 0; + if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET) + goto out; - rcu_read_lock(); - p = __disk_get_part(bio->bi_disk, bio->bi_partno); - if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) { - bio->bi_iter.bi_sector += p->start_sect; - bio->bi_partno = 0; - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), - bio->bi_iter.bi_sector - p->start_sect); - } else { - printk("%s: fail for partition %d\n", __func__, bio->bi_partno); - ret = -EIO; - } - rcu_read_unlock(); + bio->bi_iter.bi_sector += p->start_sect; + bio->bi_partno = 0; + trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), + bio->bi_iter.bi_sector - p->start_sect); +out: + rcu_read_unlock(); return ret; } @@ -2149,15 +2165,19 @@ generic_make_request_checks(struct bio *bio) * For a REQ_NOWAIT based request, return -EOPNOTSUPP * if queue is not a request based queue. */ - if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) goto not_supported; if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) goto end_io; - if (blk_partition_remap(bio)) - goto end_io; + if (!bio->bi_partno) { + if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + goto end_io; + } else { + if (blk_partition_remap(bio)) + goto end_io; + } if (bio_check_eod(bio, nr_sectors)) goto end_io; -- cgit v1.2.3 From a13553c777375009584741e7d9982e775c4b0744 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 11 Jan 2018 14:09:12 +0100 Subject: block: add bdev_read_only() checks to common helpers Similar to blkdev_write_iter(), return -EPERM if the partition is read-only. This covers ioctl(), fallocate() and most in-kernel users but isn't meant to be exhaustive -- everything else will be caught in generic_make_request_checks(), fail with -EIO and can be fixed later. Reviewed-by: Sagi Grimberg Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe --- block/blk-lib.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/block/blk-lib.c b/block/blk-lib.c index 2bc544ce3d2e..a676084d4740 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -37,6 +37,9 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!q) return -ENXIO; + if (bdev_read_only(bdev)) + return -EPERM; + if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secure_erase(q)) return -EOPNOTSUPP; @@ -156,6 +159,9 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, if (!q) return -ENXIO; + if (bdev_read_only(bdev)) + return -EPERM; + bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; if ((sector | nr_sects) & bs_mask) return -EINVAL; @@ -233,6 +239,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, if (!q) return -ENXIO; + if (bdev_read_only(bdev)) + return -EPERM; + /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */ max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev); @@ -287,6 +296,9 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev, if (!q) return -ENXIO; + if (bdev_read_only(bdev)) + return -EPERM; + while (nr_sects != 0) { bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), gfp_mask); -- cgit v1.2.3 From cd443f1e91ca600a092e780e8250cd6a2954b763 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 18 Jan 2018 14:48:03 +0800 Subject: netlink: reset extack earlier in netlink_rcv_skb Move up the extack reset/initialization in netlink_rcv_skb, so that those 'goto ack' will not skip it. Otherwise, later on netlink_ack may use the uninitialized extack and cause kernel crash. Fixes: cbbdf8433a5f ("netlink: extack needs to be reset each time through loop") Reported-by: syzbot+03bee3680a37466775e7@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: David Ahern Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47ef2d8683d6..84a4e4c3be4b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2391,6 +2391,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, while (skb->len >= nlmsg_total_size(0)) { int msglen; + memset(&extack, 0, sizeof(extack)); nlh = nlmsg_hdr(skb); err = 0; @@ -2405,7 +2406,6 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_type < NLMSG_MIN_TYPE) goto ack; - memset(&extack, 0, sizeof(extack)); err = cb(skb, nlh, &extack); if (err == -EINTR) goto skip; -- cgit v1.2.3 From f61145f1a4bd7966aa0b15c5cd3950835b284f55 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 21 Dec 2017 14:17:22 -0800 Subject: drm/vc4: Flush the caches before the bin jobs, as well. If the frame samples from a render target that was just written, its cache flush during the binning step may have occurred before the previous frame's RCL was completed. Flush the texture caches again before starting each RCL job to make sure that the sampling of the previous RCL's output is correct. Fixes flickering in the top left of 3DMMES Taiji. Signed-off-by: Eric Anholt Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs") Link: https://patchwork.freedesktop.org/patch/msgid/20171221221722.23809-1-eric@anholt.net Reviewed-by: Boris Brezillon --- drivers/gpu/drm/vc4/vc4_gem.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 638540943c61..e3e868cdee79 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -436,6 +436,19 @@ vc4_flush_caches(struct drm_device *dev) VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); } +static void +vc4_flush_texture_caches(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + V3D_WRITE(V3D_L2CACTL, + V3D_L2CACTL_L2CCLR); + + V3D_WRITE(V3D_SLCACTL, + VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | + VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC)); +} + /* Sets the registers for the next job to be actually be executed in * the hardware. * @@ -474,6 +487,14 @@ vc4_submit_next_render_job(struct drm_device *dev) if (!exec) return; + /* A previous RCL may have written to one of our textures, and + * our full cache flush at bin time may have occurred before + * that RCL completed. Flush the texture cache now, but not + * the instructions or uniforms (since we don't write those + * from an RCL). + */ + vc4_flush_texture_caches(dev); + submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); } -- cgit v1.2.3 From 17b11b76b87afe9f8be199d7a5f442497133e2b0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 18 Jan 2018 15:58:21 +0100 Subject: drm/vc4: Fix NULL pointer dereference in vc4_save_hang_state() When saving BOs in the hang state we skip one entry of the kernel_state->bo[] array, thus leaving it to NULL. This leads to a NULL pointer dereference when, later in this function, we iterate over all BOs to check their ->madv state. Fixes: ca26d28bbaa3 ("drm/vc4: improve throughput by pipelining binning and rendering jobs") Cc: Signed-off-by: Boris Brezillon Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180118145821.22344-1-boris.brezillon@free-electrons.com --- drivers/gpu/drm/vc4/vc4_gem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e3e868cdee79..c94cce96544c 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -146,7 +146,7 @@ vc4_save_hang_state(struct drm_device *dev) struct vc4_exec_info *exec[2]; struct vc4_bo *bo; unsigned long irqflags; - unsigned int i, j, unref_list_count, prev_idx; + unsigned int i, j, k, unref_list_count; kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); if (!kernel_state) @@ -182,7 +182,7 @@ vc4_save_hang_state(struct drm_device *dev) return; } - prev_idx = 0; + k = 0; for (i = 0; i < 2; i++) { if (!exec[i]) continue; @@ -197,7 +197,7 @@ vc4_save_hang_state(struct drm_device *dev) WARN_ON(!refcount_read(&bo->usecnt)); refcount_inc(&bo->usecnt); drm_gem_object_get(&exec[i]->bo[j]->base); - kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; + kernel_state->bo[k++] = &exec[i]->bo[j]->base; } list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { @@ -205,12 +205,12 @@ vc4_save_hang_state(struct drm_device *dev) * because they are naturally unpurgeable. */ drm_gem_object_get(&bo->base.base); - kernel_state->bo[j + prev_idx] = &bo->base.base; - j++; + kernel_state->bo[k++] = &bo->base.base; } - prev_idx = j + 1; } + WARN_ON_ONCE(k != state->bo_count); + if (exec[0]) state->start_bin = exec[0]->ct0ca; if (exec[1]) -- cgit v1.2.3 From 5762d7d3eda25c03cc2d9d45227be3f5ab6bec9e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Jan 2018 23:20:22 +0100 Subject: cfg80211: fix station info handling bugs Fix two places where the structure isn't initialized to zero, and thus can't be filled properly by the driver. Fixes: 4a4b8169501b ("cfg80211: Accept multiple RSSI thresholds for CQM") Fixes: 9930380f0bd8 ("cfg80211: implement IWRATE") Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/wireless/nl80211.c | 2 +- net/wireless/wext-compat.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ed87a97fcb0b..542a4fc0a8d7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9809,7 +9809,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, */ if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && rdev->ops->get_station) { - struct station_info sinfo; + struct station_info sinfo = {}; u8 *mac_addr; mac_addr = wdev->current_bss->pub.bssid; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7ca04a7de85a..05186a47878f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - /* we are under RTNL - globally locked - so can use a static struct */ - static struct station_info sinfo; + struct station_info sinfo = {}; u8 addr[ETH_ALEN]; int err; -- cgit v1.2.3 From f35764e74f0e45e1d89ca9ed9c8299f5e746a4d1 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 15 Jan 2018 20:54:35 +0000 Subject: MIPS: Fix undefined reference to physical_memsize Since commit d41e6858ba58 ("MIPS: Kconfig: Set default MIPS system type as generic") switched the default platform to the "generic" platform, allmodconfig has been failing with the following linker error (among other errors): arch/mips/kernel/vpe-mt.o In function `vpe_run': (.text+0x59c): undefined reference to `physical_memsize' The Lantiq platform already worked around the same issue in commit 9050d50e2244 ("MIPS: lantiq: Set physical_memsize") by declaring physical_memsize with the initial value of 0 (on the assumption that the actual memory size will be hard-coded in the loaded VPE firmware), and the Malta platform already provided physical_memsize. Since all other platforms will fail to link with the VPE loader enabled, only allow Lantiq and Malta platforms to enable it, by way of a SYS_SUPPORTS_VPE_LOADER which is selected by those two platforms and which MIPS_VPE_LOADER depends on. SYS_SUPPORTS_MULTITHREADING is now a dependency of SYS_SUPPORTS_VPE_LOADER so that Kconfig emits a warning if SYS_SUPPORTS_VPE_LOADER is selected without SYS_SUPPORTS_MULTITHREADING. Fixes: d41e6858ba58 ("MIPS: Kconfig: Set default MIPS system type as generic") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: John Crispin Cc: Hauke Mehrtens Cc: Paul Burton Cc: Matt Redfearn Cc: Guenter Roeck Cc: linux-mips@linux-mips.org Tested-by: Guenter Roeck Patchwork: https://patchwork.linux-mips.org/patch/18453/ --- arch/mips/Kconfig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 659e0079487f..8e0b3702f1c0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -390,6 +390,7 @@ config LANTIQ select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING + select SYS_SUPPORTS_VPE_LOADER select SYS_HAS_EARLY_PRINTK select GPIOLIB select SWAP_IO_SPACE @@ -517,6 +518,7 @@ config MIPS_MALTA select SYS_SUPPORTS_MIPS16 select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS + select SYS_SUPPORTS_VPE_LOADER select SYS_SUPPORTS_ZBOOT select SYS_SUPPORTS_RELOCATABLE select USE_OF @@ -2282,9 +2284,16 @@ config MIPSR2_TO_R6_EMULATOR The only reason this is a build-time option is to save ~14K from the final kernel image. +config SYS_SUPPORTS_VPE_LOADER + bool + depends on SYS_SUPPORTS_MULTITHREADING + help + Indicates that the platform supports the VPE loader, and provides + physical_memsize. + config MIPS_VPE_LOADER bool "VPE loader support." - depends on SYS_SUPPORTS_MULTITHREADING && MODULES + depends on SYS_SUPPORTS_VPE_LOADER && MODULES select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT -- cgit v1.2.3 From 18696edce11e010a1151a779490d6025b497e400 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 10 Nov 2017 21:04:31 +0000 Subject: MAINTAINERS: Add James as MIPS co-maintainer I've been taking on some co-maintainer duties already, so lets make it official in the MAINTAINERS file. Link: https://lkml.kernel.org/r/33db77a2-32e4-6b2c-d463-9d116ba55623@imgtec.com Link: https://lkml.kernel.org/r/20171207110549.GM27409@jhogan-linux.mipstec.com Signed-off-by: James Hogan Acked-by: Paul Burton Acked-by: Aaro Koskinen Acked-by: David Daney Acked-by: Florian Fainelli Acked-by: Matt Redfearn Cc: Ralf Baechle Cc: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/18211/ --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d76af75a653a..018d50ed382e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9085,6 +9085,7 @@ F: drivers/usb/image/microtek.* MIPS M: Ralf Baechle +M: James Hogan L: linux-mips@linux-mips.org W: http://www.linux-mips.org/ T: git git://git.linux-mips.org/pub/scm/ralf/linux.git -- cgit v1.2.3 From 0164e0d7e803af3ee1c63770978c728f8778ad01 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 18 Jan 2018 15:42:09 -0500 Subject: ring-buffer: Fix duplicate results in mapping context to bits in recursive lock In bringing back the context checks, the code checks first if its normal (non-interrupt) context, and then for NMI then IRQ then softirq. The final check is redundant. Since the if branch is only hit if the context is one of NMI, IRQ, or SOFTIRQ, if it's not NMI or IRQ there's no reason to check if it is SOFTIRQ. The current code returns the same result even if its not a SOFTIRQ. Which is confusing. pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ Is redundant as RB_CTX_SOFTIRQ *is* 2! Fixes: a0e3a18f4baf ("ring-buffer: Bring back context level recursive checks") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0cddf60186da..5af2842dea96 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2579,8 +2579,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = RB_CTX_NORMAL; else bit = pc & NMI_MASK ? RB_CTX_NMI : - pc & HARDIRQ_MASK ? RB_CTX_IRQ : - pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ; + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; if (unlikely(val & (1 << bit))) return 1; -- cgit v1.2.3 From b200bfd6112a87283e58bcfcc4cb57a5517ae82f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 17 Jan 2018 07:57:32 -0800 Subject: fm10k: mark PM functions as __maybe_unused A cleanup of the PM code left an incorrect #ifdef in place, leading to a harmless build warning: drivers/net/ethernet/intel/fm10k/fm10k_pci.c:2502:12: error: 'fm10k_suspend' defined but not used [-Werror=unused-function] drivers/net/ethernet/intel/fm10k/fm10k_pci.c:2475:12: error: 'fm10k_resume' defined but not used [-Werror=unused-function] It's easier to use __maybe_unused attributes here, since you can't pick the wrong one. Fixes: 8249c47c6ba4 ("fm10k: use generic PM hooks instead of legacy PCIe power hooks") Signed-off-by: Arnd Bergmann Acked-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 7f605221a686..a434fecfdfeb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2463,7 +2463,6 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) return err; } -#ifdef CONFIG_PM /** * fm10k_resume - Generic PM resume hook * @dev: generic device structure @@ -2472,7 +2471,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) * suspend or hibernation. This function does not need to handle lower PCIe * device state as the stack takes care of that for us. **/ -static int fm10k_resume(struct device *dev) +static int __maybe_unused fm10k_resume(struct device *dev) { struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; @@ -2499,7 +2498,7 @@ static int fm10k_resume(struct device *dev) * system suspend or hibernation. This function does not need to handle lower * PCIe device state as the stack takes care of that for us. **/ -static int fm10k_suspend(struct device *dev) +static int __maybe_unused fm10k_suspend(struct device *dev) { struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; @@ -2511,8 +2510,6 @@ static int fm10k_suspend(struct device *dev) return 0; } -#endif /* CONFIG_PM */ - /** * fm10k_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device @@ -2643,11 +2640,9 @@ static struct pci_driver fm10k_driver = { .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, -#ifdef CONFIG_PM .driver = { .pm = &fm10k_pm_ops, }, -#endif /* CONFIG_PM */ .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; -- cgit v1.2.3 From 1ebe1eaf2f02784921759992ae1fde1a9bec8fd0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 18 Jan 2018 15:53:10 -0500 Subject: tracing: Fix converting enum's from the map in trace_event_eval_update() Since enums do not get converted by the TRACE_EVENT macro into their values, the event format displaces the enum name and not the value. This breaks tools like perf and trace-cmd that need to interpret the raw binary data. To solve this, an enum map was created to convert these enums into their actual numbers on boot up. This is done by TRACE_EVENTS() adding a TRACE_DEFINE_ENUM() macro. Some enums were not being converted. This was caused by an optization that had a bug in it. All calls get checked against this enum map to see if it should be converted or not, and it compares the call's system to the system that the enum map was created under. If they match, then they call is processed. To cut down on the number of iterations needed to find the maps with a matching system, since calls and maps are grouped by system, when a match is made, the index into the map array is saved, so that the next call, if it belongs to the same system as the previous call, could start right at that array index and not have to scan all the previous arrays. The problem was, the saved index was used as the variable to know if this is a call in a new system or not. If the index was zero, it was assumed that the call is in a new system and would keep incrementing the saved index until it found a matching system. The issue arises when the first matching system was at index zero. The next map, if it belonged to the same system, would then think it was the first match and increment the index to one. If the next call belong to the same system, it would begin its search of the maps off by one, and miss the first enum that should be converted. This left a single enum not converted properly. Also add a comment to describe exactly what that index was for. It took me a bit too long to figure out what I was thinking when debugging this issue. Link: http://lkml.kernel.org/r/717BE572-2070-4C1E-9902-9F2E0FEDA4F8@oracle.com Cc: stable@vger.kernel.org Fixes: 0c564a538aa93 ("tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values") Reported-by: Chuck Lever Teste-by: Chuck Lever Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ec0f9aa4e151..1b87157edbff 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2213,6 +2213,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; + bool first = false; int last_i; int i; @@ -2220,15 +2221,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) list_for_each_entry_safe(call, p, &ftrace_events, list) { /* events are usually grouped together with systems */ if (!last_system || call->class->system != last_system) { + first = true; last_i = 0; last_system = call->class->system; } + /* + * Since calls are grouped by systems, the likelyhood that the + * next call in the iteration belongs to the same system as the + * previous call is high. As an optimization, we skip seaching + * for a map[] that matches the call's system if the last call + * was from the same system. That's what last_i is for. If the + * call has the same system as the previous call, then last_i + * will be the index of the first map[] that has a matching + * system. + */ for (i = last_i; i < len; i++) { if (call->class->system == map[i]->system) { /* Save the first system if need be */ - if (!last_i) + if (first) { last_i = i; + first = false; + } update_event_printk(call, map[i]); } } -- cgit v1.2.3 From d0c081b49137cd3200f2023c0875723be66e7ce5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 17 Jan 2018 14:21:13 -0800 Subject: flow_dissector: properly cap thoff field syzbot reported yet another crash [1] that is caused by insufficient validation of DODGY packets. Two bugs are happening here to trigger the crash. 1) Flow dissection leaves with incorrect thoff field. 2) skb_probe_transport_header() sets transport header to this invalid thoff, even if pointing after skb valid data. 3) qdisc_pkt_len_init() reads out-of-bound data because it trusts tcp_hdrlen(skb) Possible fixes : - Full flow dissector validation before injecting bad DODGY packets in the stack. This approach was attempted here : https://patchwork.ozlabs.org/patch/ 861874/ - Have more robust functions in the core. This might be needed anyway for stable versions. This patch fixes the flow dissection issue. [1] CPU: 1 PID: 3144 Comm: syzkaller271204 Not tainted 4.15.0-rc4-mm1+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:355 [inline] kasan_report+0x23b/0x360 mm/kasan/report.c:413 __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:432 __tcp_hdrlen include/linux/tcp.h:35 [inline] tcp_hdrlen include/linux/tcp.h:40 [inline] qdisc_pkt_len_init net/core/dev.c:3160 [inline] __dev_queue_xmit+0x20d3/0x2200 net/core/dev.c:3465 dev_queue_xmit+0x17/0x20 net/core/dev.c:3554 packet_snd net/packet/af_packet.c:2943 [inline] packet_sendmsg+0x3ad5/0x60a0 net/packet/af_packet.c:2968 sock_sendmsg_nosec net/socket.c:628 [inline] sock_sendmsg+0xca/0x110 net/socket.c:638 sock_write_iter+0x31a/0x5d0 net/socket.c:907 call_write_iter include/linux/fs.h:1776 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x684/0x970 fs/read_write.c:482 vfs_write+0x189/0x510 fs/read_write.c:544 SYSC_write fs/read_write.c:589 [inline] SyS_write+0xef/0x220 fs/read_write.c:581 entry_SYSCALL_64_fastpath+0x1f/0x96 Fixes: 34fad54c2537 ("net: __skb_flow_dissect() must cap its return value") Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Jason Wang Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 15ce30063765..544bddf08e13 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -976,8 +976,8 @@ ip_proto_again: out_good: ret = true; - key_control->thoff = (u16)nhoff; out: + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); key_basic->n_proto = proto; key_basic->ip_proto = ip_proto; @@ -985,7 +985,6 @@ out: out_bad: ret = false; - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); goto out; } EXPORT_SYMBOL(__skb_flow_dissect); -- cgit v1.2.3 From b554b12addf939f826ec97c7c9ff0214a2801a0a Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 19 Jan 2018 07:24:12 +1000 Subject: drm/nouveau/drm/nouveau: Pass the proper arguments to nvif_object_map_handle() This is obviously wrong in the current code. Make sure to record the correct size of the arguments and pass the actual arguments to the nvif_object_map_handle() function. Suggested-by: Ben Skeggs Signed-off-by: Thierry Reding Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 435ff8662cfa..ef687414969e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1447,11 +1447,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) args.nv50.ro = 0; args.nv50.kind = mem->kind; args.nv50.comp = mem->comp; + argc = sizeof(args.nv50); break; case NVIF_CLASS_MEM_GF100: args.gf100.version = 0; args.gf100.ro = 0; args.gf100.kind = mem->kind; + argc = sizeof(args.gf100); break; default: WARN_ON(1); @@ -1459,7 +1461,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg) } ret = nvif_object_map_handle(&mem->mem.object, - &argc, argc, + &args, argc, &handle, &length); if (ret != 1) return ret ? ret : -EINVAL; -- cgit v1.2.3 From e062a01e6daa2555ed13cf3f4e8cd3a05bbe474c Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 4 Jan 2018 11:29:09 +0000 Subject: drm/nouveau/bar/gk20a: Avoid bar teardown during init Commit bbb163e18960 ("drm/nouveau/bar: implement bar1 teardown") introduced add a teardown helper function for BAR1. During initialisation of the Nouveau, initially all the teardown helpers are called once, before calling their init counterparts. For gk20a, after the BAR1 teardown function is called, the device is hanging during the initialisation of the FB sub-device. At this point it is unclear why this is happening and this is still under investigation. However, this change is preventing Tegra124 devices from booting when Nouveau is enabled. To allow Tegra124 to boot, remove the teardown helper for gk20a. This is based upon a previous patch by Guillaume Tucker but limits the workaround to only gk20a GPUs. Fixes: bbb163e18960 ("drm/nouveau/bar: implement bar1 teardown") Reported-by: Guillaume Tucker Signed-off-by: Jon Hunter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c | 3 ++- drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c index 9646adec57cb..243f0a5c8a62 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c @@ -73,7 +73,8 @@ static int nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_bar *bar = nvkm_bar(subdev); - bar->func->bar1.fini(bar); + if (bar->func->bar1.fini) + bar->func->bar1.fini(bar); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c index b10077d38839..35878fb538f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c @@ -26,7 +26,6 @@ gk20a_bar_func = { .dtor = gf100_bar_dtor, .oneinit = gf100_bar_oneinit, .bar1.init = gf100_bar_bar1_init, - .bar1.fini = gf100_bar_bar1_fini, .bar1.wait = gf100_bar_bar1_wait, .bar1.vmm = gf100_bar_bar1_vmm, .flush = g84_bar_flush, -- cgit v1.2.3 From 2ffa64eba94fc8cc23d431cbec7365f3f07ff0ae Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 19 Jan 2018 10:34:56 +1000 Subject: drm/nouveau/mmu/mcp77: fix regressions in stolen memory handling - Fixes addition of stolen memory base address to PTEs. - Removes support for compression. Signed-off-by: Ben Skeggs Tested-by: Pierre Moreau --- drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 4 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c | 41 ++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 10 +++++ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c | 45 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c | 16 ++++---- 7 files changed, 109 insertions(+), 10 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 975c42f620a0..542b7095b026 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -120,6 +120,7 @@ int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int mcp77_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 00eeaaffeae5..08e77cd55e6e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1251,7 +1251,7 @@ nvaa_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = g84_mmu_new, + .mmu = mcp77_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, @@ -1283,7 +1283,7 @@ nvac_chipset = { .i2c = g94_i2c_new, .imem = nv50_instmem_new, .mc = g98_mc_new, - .mmu = g84_mmu_new, + .mmu = mcp77_mmu_new, .mxm = nv50_mxm_new, .pci = g94_pci_new, .therm = g84_therm_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 352a65f9371c..67ee983bb026 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -4,6 +4,7 @@ nvkm-y += nvkm/subdev/mmu/nv41.o nvkm-y += nvkm/subdev/mmu/nv44.o nvkm-y += nvkm/subdev/mmu/nv50.o nvkm-y += nvkm/subdev/mmu/g84.o +nvkm-y += nvkm/subdev/mmu/mcp77.o nvkm-y += nvkm/subdev/mmu/gf100.o nvkm-y += nvkm/subdev/mmu/gk104.o nvkm-y += nvkm/subdev/mmu/gk20a.o @@ -22,6 +23,7 @@ nvkm-y += nvkm/subdev/mmu/vmmnv04.o nvkm-y += nvkm/subdev/mmu/vmmnv41.o nvkm-y += nvkm/subdev/mmu/vmmnv44.o nvkm-y += nvkm/subdev/mmu/vmmnv50.o +nvkm-y += nvkm/subdev/mmu/vmmmcp77.o nvkm-y += nvkm/subdev/mmu/vmmgf100.o nvkm-y += nvkm/subdev/mmu/vmmgk104.o nvkm-y += nvkm/subdev/mmu/vmmgk20a.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c new file mode 100644 index 000000000000..0527b50730d9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include + +static const struct nvkm_mmu_func +mcp77_mmu = { + .dma_bits = 40, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, mcp77_vmm_new, false, 0x0200 }, + .kind = nv50_mmu_kind, + .kind_sys = true, +}; + +int +mcp77_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&mcp77_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index 6d8f61ea467a..da06e64d8a7d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -95,6 +95,9 @@ struct nvkm_vmm_desc { const struct nvkm_vmm_desc_func *func; }; +extern const struct nvkm_vmm_desc nv50_vmm_desc_12[]; +extern const struct nvkm_vmm_desc nv50_vmm_desc_16[]; + extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[]; extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[]; extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[]; @@ -169,6 +172,11 @@ int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32, const char *, struct nvkm_vmm **); int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +int nv50_vmm_join(struct nvkm_vmm *, struct nvkm_memory *); +void nv50_vmm_part(struct nvkm_vmm *, struct nvkm_memory *); +int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); +void nv50_vmm_flush(struct nvkm_vmm *, int); + int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); @@ -200,6 +208,8 @@ int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); +int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, struct nvkm_vmm **); int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c new file mode 100644 index 000000000000..e63d984cbfd4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c @@ -0,0 +1,45 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +static const struct nvkm_vmm_func +mcp77_vmm = { + .join = nv50_vmm_join, + .part = nv50_vmm_part, + .valid = nv50_vmm_valid, + .flush = nv50_vmm_flush, + .page_block = 1 << 29, + .page = { + { 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxx }, + { 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx }, + {} + } +}; + +int +mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c index 863a2edd9861..64f75d906202 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c @@ -32,7 +32,7 @@ static inline void nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr) { - u64 next = addr | map->type, data; + u64 next = addr + map->type, data; u32 pten; int log2blk; @@ -69,7 +69,7 @@ nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes); nvkm_kmap(pt->memory); while (ptes--) { - const u64 data = *map->dma++ | map->type; + const u64 data = *map->dma++ + map->type; VMM_WO064(pt, vmm, ptei++ * 8, data); map->type += map->ctag; } @@ -163,21 +163,21 @@ nv50_vmm_pgd = { .pde = nv50_vmm_pgd_pde, }; -static const struct nvkm_vmm_desc +const struct nvkm_vmm_desc nv50_vmm_desc_12[] = { { PGT, 17, 8, 0x1000, &nv50_vmm_pgt }, { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, {} }; -static const struct nvkm_vmm_desc +const struct nvkm_vmm_desc nv50_vmm_desc_16[] = { { PGT, 13, 8, 0x1000, &nv50_vmm_pgt }, { PGD, 11, 0, 0x0000, &nv50_vmm_pgd }, {} }; -static void +void nv50_vmm_flush(struct nvkm_vmm *vmm, int level) { struct nvkm_subdev *subdev = &vmm->mmu->subdev; @@ -223,7 +223,7 @@ nv50_vmm_flush(struct nvkm_vmm *vmm, int level) mutex_unlock(&subdev->mutex); } -static int +int nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, struct nvkm_vmm_map *map) { @@ -321,7 +321,7 @@ nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, return 0; } -static void +void nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { struct nvkm_vmm_join *join; @@ -335,7 +335,7 @@ nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst) } } -static int +int nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) { const u32 pd_offset = vmm->mmu->func->vmm.pd_offset; -- cgit v1.2.3 From ed604c5da34d96ae289c67c46dedd7dfd9fa795e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 18 Jan 2018 15:42:10 +0100 Subject: mlxsw: spectrum_router: Free LPM tree upon failure When a new LPM tree is created, we try to replace the trees in the existing virtual routers with it. If we fail, the tree needs to be freed. Currently, this does not happen in the unlikely case where we fail to bind the tree to the first virtual router, since its reference count never transitions from 1 to 0. Fix that by taking a reference before binding the tree. Fixes: fc922bb0dd94 ("mlxsw: spectrum_router: Use one LPM tree for all virtual routers") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 434b3922b34f..6c0391c13fe0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -821,13 +821,18 @@ static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; int err; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); - if (err) - return err; fib->lpm_tree = new_tree; mlxsw_sp_lpm_tree_hold(new_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + goto err_tree_bind; mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); return 0; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); + fib->lpm_tree = old_tree; + return err; } static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, @@ -868,11 +873,14 @@ err_tree_replace: return err; no_replace: - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); - if (err) - return err; fib->lpm_tree = new_tree; mlxsw_sp_lpm_tree_hold(new_tree); + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) { + mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); + fib->lpm_tree = NULL; + return err; + } return 0; } -- cgit v1.2.3 From 128bb975dc3c25d00de04e503e2fe0a780d04459 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 18 Jan 2018 20:51:12 +0300 Subject: ip6_gre: init dev->mtu and dev->hard_header_len correctly Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") moved dev->mtu initialization from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a result, the previously set values, before ndo_init(), are reset in the following cases: * rtnl_create_link() can update dev->mtu from IFLA_MTU parameter. * ip6gre_tnl_link_config() is invoked before ndo_init() in netlink and ioctl setup, so ndo_init() can reset MTU adjustments with the lower device MTU as well, dev->mtu and dev->hard_header_len. Not applicable for ip6gretap because it has one more call to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init(). Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]' parameter if a user sets it manually on a device creation, and fix the second one by moving ip6gre_tnl_link_config() call after register_netdevice(). Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 772695960890..873549228ccb 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, 1); if (register_netdevice(dev) < 0) goto failed_free; + ip6gre_tnl_link_config(nt, 1); + /* Can use a lockless transmit, unless we generate output sequences */ if (!(nt->parms.o_flags & TUNNEL_SEQ)) dev->features |= NETIF_F_LLTX; @@ -1303,7 +1304,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { - struct ip6_tnl *tunnel; int ret; ret = ip6gre_tunnel_init_common(dev); @@ -1312,10 +1312,6 @@ static int ip6gre_tap_init(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - tunnel = netdev_priv(dev); - - ip6gre_tnl_link_config(tunnel, 1); - return 0; } @@ -1408,12 +1404,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, nt->dev = dev; nt->net = dev_net(dev); - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); err = register_netdevice(dev); if (err) goto out; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + if (tb[IFLA_MTU]) + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + dev_hold(dev); ip6gre_tunnel_link(ign, nt); -- cgit v1.2.3 From 591ff9ea51cec683e4cb378a3469228ba1d69010 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 18 Jan 2018 10:40:03 -0800 Subject: ipv6: don't let tb6_root node share routes with other node After commit 4512c43eac7e, if we add a route to the subtree of tb6_root which does not have any route attached to it yet, the current code will let tb6_root and the node in the subtree share the same route. This could cause problem cause tb6_root has RTN_INFO flag marked and the tree repair and clean up code will not work properly. This commit makes sure tb6_root->leaf points back to null_entry instead of sharing route with other node. It fixes the following syzkaller reported issue: BUG: KASAN: use-after-free in ipv6_prefix_equal include/net/ipv6.h:540 [inline] BUG: KASAN: use-after-free in fib6_add_1+0x165f/0x1790 net/ipv6/ip6_fib.c:618 Read of size 8 at addr ffff8801bc043498 by task syz-executor5/19819 CPU: 1 PID: 19819 Comm: syz-executor5 Not tainted 4.15.0-rc7+ #186 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x25b/0x340 mm/kasan/report.c:409 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 ipv6_prefix_equal include/net/ipv6.h:540 [inline] fib6_add_1+0x165f/0x1790 net/ipv6/ip6_fib.c:618 fib6_add+0x5fa/0x1540 net/ipv6/ip6_fib.c:1214 __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1003 ip6_route_add+0x141/0x190 net/ipv6/route.c:2790 ipv6_route_ioctl+0x4db/0x6b0 net/ipv6/route.c:3299 inet6_ioctl+0xef/0x1e0 net/ipv6/af_inet6.c:520 sock_do_ioctl+0x65/0xb0 net/socket.c:958 sock_ioctl+0x2c2/0x440 net/socket.c:1055 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686 SYSC_ioctl fs/ioctl.c:701 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692 entry_SYSCALL_64_fastpath+0x23/0x9a RIP: 0033:0x452ac9 RSP: 002b:00007fd42b321c58 EFLAGS: 00000212 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 000000000071bea0 RCX: 0000000000452ac9 RDX: 0000000020fd7000 RSI: 000000000000890b RDI: 0000000000000013 RBP: 000000000000049e R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: 00000000006f4f70 R13: 00000000ffffffff R14: 00007fd42b3226d4 R15: 0000000000000000 Fixes: 4512c43eac7e ("ipv6: remove null_entry before adding default route") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9dcc3924a975..217683d40f12 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1226,8 +1226,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } if (!rcu_access_pointer(fn->leaf)) { - atomic_inc(&rt->rt6i_ref); - rcu_assign_pointer(fn->leaf, rt); + if (fn->fn_flags & RTN_TL_ROOT) { + /* put back null_entry for root node */ + rcu_assign_pointer(fn->leaf, + info->nl_net->ipv6.ip6_null_entry); + } else { + atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(fn->leaf, rt); + } } fn = sn; } -- cgit v1.2.3 From f68979433deaa8a8a8b6396f944a0928a35713dc Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 18 Jan 2018 19:05:01 -0600 Subject: ibmvnic: Fix IP offload control buffer Set some missing fields in the IP control offload buffer. This buffer is used to enable checksum and TCP segmentation offload in the VNIC server. The buffer length field and the checksum offloading bits were not set properly, so fix that here. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4b3df17c7a45..0a3a844f6473 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3346,7 +3346,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) return; } + adapter->ip_offload_ctrl.len = + cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB); + adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum; + adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum; adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum; adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; -- cgit v1.2.3 From a0dca10fce42ae82651edbe682b1c637a8ecd365 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 18 Jan 2018 19:29:48 -0600 Subject: ibmvnic: Fix IPv6 packet descriptors Packet descriptor generation for IPv6 is broken. Properly set L3 and L4 protocol flags for IPv6 descriptors. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0a3a844f6473..ab2e1917cd04 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1276,6 +1276,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned char *dst; u64 *handle_array; int index = 0; + u8 proto = 0; int ret = 0; if (adapter->resetting) { @@ -1364,17 +1365,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) } if (skb->protocol == htons(ETH_P_IP)) { - if (ip_hdr(skb)->version == 4) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; - else if (ip_hdr(skb)->version == 6) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; - - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; - else if (ip_hdr(skb)->protocol != IPPROTO_TCP) - tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; + proto = ip_hdr(skb)->protocol; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; + proto = ipv6_hdr(skb)->nexthdr; } + if (proto == IPPROTO_TCP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; + else if (proto == IPPROTO_UDP) + tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; + if (skb->ip_summed == CHECKSUM_PARTIAL) { tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; hdrs += 2; -- cgit v1.2.3 From b889bf66d001a46a95deef18ddbe6db84645ed24 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 21 Nov 2017 09:38:30 +0800 Subject: blk-throttle: track read and write request individually In mixed read/write workload on SSD, write latency is much lower than read. But now we only track and record read latency and then use it as threshold base for both read and write io latency accounting. As a result, write io latency will always be considered as good and bad_bio_cnt is much smaller than 20% of bio_cnt. That is to mean, the tg to be checked will be treated as idle most of the time and still let others dispatch more ios, even it is truly running under low limit and wants its low limit to be guaranteed, which is not we expected in fact. So track read and write request individually, which can bring more precise latency control for low limit idle detection. Signed-off-by: Joseph Qi Reviewed-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 134 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 55 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e8428417ac0a..e136f5ef9577 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -216,9 +216,9 @@ struct throtl_data unsigned int scale; - struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE]; - struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; - struct latency_bucket __percpu *latency_buckets; + struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE]; + struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE]; + struct latency_bucket __percpu *latency_buckets[2]; unsigned long last_calculate_time; unsigned long filtered_latency; @@ -2050,10 +2050,10 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void throtl_update_latency_buckets(struct throtl_data *td) { - struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE]; - int i, cpu; - unsigned long last_latency = 0; - unsigned long latency; + struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE]; + int i, cpu, rw; + unsigned long last_latency[2] = { 0 }; + unsigned long latency[2]; if (!blk_queue_nonrot(td->queue)) return; @@ -2062,56 +2062,67 @@ static void throtl_update_latency_buckets(struct throtl_data *td) td->last_calculate_time = jiffies; memset(avg_latency, 0, sizeof(avg_latency)); - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { - struct latency_bucket *tmp = &td->tmp_buckets[i]; - - for_each_possible_cpu(cpu) { - struct latency_bucket *bucket; - - /* this isn't race free, but ok in practice */ - bucket = per_cpu_ptr(td->latency_buckets, cpu); - tmp->total_latency += bucket[i].total_latency; - tmp->samples += bucket[i].samples; - bucket[i].total_latency = 0; - bucket[i].samples = 0; - } + for (rw = READ; rw <= WRITE; rw++) { + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + struct latency_bucket *tmp = &td->tmp_buckets[rw][i]; + + for_each_possible_cpu(cpu) { + struct latency_bucket *bucket; + + /* this isn't race free, but ok in practice */ + bucket = per_cpu_ptr(td->latency_buckets[rw], + cpu); + tmp->total_latency += bucket[i].total_latency; + tmp->samples += bucket[i].samples; + bucket[i].total_latency = 0; + bucket[i].samples = 0; + } - if (tmp->samples >= 32) { - int samples = tmp->samples; + if (tmp->samples >= 32) { + int samples = tmp->samples; - latency = tmp->total_latency; + latency[rw] = tmp->total_latency; - tmp->total_latency = 0; - tmp->samples = 0; - latency /= samples; - if (latency == 0) - continue; - avg_latency[i].latency = latency; + tmp->total_latency = 0; + tmp->samples = 0; + latency[rw] /= samples; + if (latency[rw] == 0) + continue; + avg_latency[rw][i].latency = latency[rw]; + } } } - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { - if (!avg_latency[i].latency) { - if (td->avg_buckets[i].latency < last_latency) - td->avg_buckets[i].latency = last_latency; - continue; - } + for (rw = READ; rw <= WRITE; rw++) { + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + if (!avg_latency[rw][i].latency) { + if (td->avg_buckets[rw][i].latency < last_latency[rw]) + td->avg_buckets[rw][i].latency = + last_latency[rw]; + continue; + } - if (!td->avg_buckets[i].valid) - latency = avg_latency[i].latency; - else - latency = (td->avg_buckets[i].latency * 7 + - avg_latency[i].latency) >> 3; + if (!td->avg_buckets[rw][i].valid) + latency[rw] = avg_latency[rw][i].latency; + else + latency[rw] = (td->avg_buckets[rw][i].latency * 7 + + avg_latency[rw][i].latency) >> 3; - td->avg_buckets[i].latency = max(latency, last_latency); - td->avg_buckets[i].valid = true; - last_latency = td->avg_buckets[i].latency; + td->avg_buckets[rw][i].latency = max(latency[rw], + last_latency[rw]); + td->avg_buckets[rw][i].valid = true; + last_latency[rw] = td->avg_buckets[rw][i].latency; + } } for (i = 0; i < LATENCY_BUCKET_SIZE; i++) throtl_log(&td->service_queue, - "Latency bucket %d: latency=%ld, valid=%d", i, - td->avg_buckets[i].latency, td->avg_buckets[i].valid); + "Latency bucket %d: read latency=%ld, read valid=%d, " + "write latency=%ld, write valid=%d", i, + td->avg_buckets[READ][i].latency, + td->avg_buckets[READ][i].valid, + td->avg_buckets[WRITE][i].latency, + td->avg_buckets[WRITE][i].valid); } #else static inline void throtl_update_latency_buckets(struct throtl_data *td) @@ -2258,16 +2269,17 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size, struct latency_bucket *latency; int index; - if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ || + if (!td || td->limit_index != LIMIT_LOW || + !(op == REQ_OP_READ || op == REQ_OP_WRITE) || !blk_queue_nonrot(td->queue)) return; index = request_bucket_index(size); - latency = get_cpu_ptr(td->latency_buckets); + latency = get_cpu_ptr(td->latency_buckets[op]); latency[index].total_latency += time; latency[index].samples++; - put_cpu_ptr(td->latency_buckets); + put_cpu_ptr(td->latency_buckets[op]); } void blk_throtl_stat_add(struct request *rq, u64 time_ns) @@ -2286,6 +2298,7 @@ void blk_throtl_bio_endio(struct bio *bio) unsigned long finish_time; unsigned long start_time; unsigned long lat; + int rw = bio_data_dir(bio); tg = bio->bi_cg_private; if (!tg) @@ -2314,7 +2327,7 @@ void blk_throtl_bio_endio(struct bio *bio) bucket = request_bucket_index( blk_stat_size(&bio->bi_issue_stat)); - threshold = tg->td->avg_buckets[bucket].latency + + threshold = tg->td->avg_buckets[rw][bucket].latency + tg->latency_target; if (lat > threshold) tg->bad_bio_cnt++; @@ -2407,9 +2420,16 @@ int blk_throtl_init(struct request_queue *q) td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) return -ENOMEM; - td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) * + td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) * + LATENCY_BUCKET_SIZE, __alignof__(u64)); + if (!td->latency_buckets[READ]) { + kfree(td); + return -ENOMEM; + } + td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) * LATENCY_BUCKET_SIZE, __alignof__(u64)); - if (!td->latency_buckets) { + if (!td->latency_buckets[WRITE]) { + free_percpu(td->latency_buckets[READ]); kfree(td); return -ENOMEM; } @@ -2428,7 +2448,8 @@ int blk_throtl_init(struct request_queue *q) /* activate policy */ ret = blkcg_activate_policy(q, &blkcg_policy_throtl); if (ret) { - free_percpu(td->latency_buckets); + free_percpu(td->latency_buckets[READ]); + free_percpu(td->latency_buckets[WRITE]); kfree(td); } return ret; @@ -2439,7 +2460,8 @@ void blk_throtl_exit(struct request_queue *q) BUG_ON(!q->td); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); - free_percpu(q->td->latency_buckets); + free_percpu(q->td->latency_buckets[READ]); + free_percpu(q->td->latency_buckets[WRITE]); kfree(q->td); } @@ -2457,8 +2479,10 @@ void blk_throtl_register_queue(struct request_queue *q) } else { td->throtl_slice = DFL_THROTL_SLICE_HD; td->filtered_latency = LATENCY_FILTERED_HD; - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) - td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY; + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY; + td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY; + } } #ifndef CONFIG_BLK_DEV_THROTTLING_LOW /* if no low limit, use previous default */ -- cgit v1.2.3 From 3214d01f139b7544e870fc0b7fcce8da13c1cb51 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jan 2018 16:06:47 +1100 Subject: KVM: PPC: Book3S: Provide information about hardware/firmware CVE workarounds This adds a new ioctl, KVM_PPC_GET_CPU_CHAR, that gives userspace information about the underlying machine's level of vulnerability to the recently announced vulnerabilities CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754, and whether the machine provides instructions to assist software to work around the vulnerabilities. The ioctl returns two u64 words describing characteristics of the CPU and required software behaviour respectively, plus two mask words which indicate which bits have been filled in by the kernel, for extensibility. The bit definitions are the same as for the new H_GET_CPU_CHARACTERISTICS hypercall. There is also a new capability, KVM_CAP_PPC_GET_CPU_CHAR, which indicates whether the new ioctl is available. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 46 +++++++++++++ arch/powerpc/include/uapi/asm/kvm.h | 25 +++++++ arch/powerpc/kvm/powerpc.c | 131 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 3 + 4 files changed, 205 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 57d3ee9e4bde..fc3ae951bc07 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.108 KVM_PPC_GET_CPU_CHAR + +Capability: KVM_CAP_PPC_GET_CPU_CHAR +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_ppc_cpu_char (out) +Returns: 0 on successful completion + -EFAULT if struct kvm_ppc_cpu_char cannot be written + +This ioctl gives userspace information about certain characteristics +of the CPU relating to speculative execution of instructions and +possible information leakage resulting from speculative execution (see +CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is +returned in struct kvm_ppc_cpu_char, which looks like this: + +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +For extensibility, the character_mask and behaviour_mask fields +indicate which bits of character and behaviour have been filled in by +the kernel. If the set of defined bits is extended in future then +userspace will be able to tell whether it is running on a kernel that +knows about the new bits. + +The character field describes attributes of the CPU which can help +with preventing inadvertent information disclosure - specifically, +whether there is an instruction to flash-invalidate the L1 data cache +(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set +to a mode where entries can only be used by the thread that created +them, whether the bcctr[l] instruction prevents speculation, and +whether a speculation barrier instruction (ori 31,31,0) is provided. + +The behaviour field describes actions that software should take to +prevent inadvertent information disclosure, and thus describes which +vulnerabilities the hardware is subject to; specifically whether the +L1 data cache should be flushed when returning to user mode from the +kernel, and whether a speculation barrier should be placed between an +array bounds check and the array access. + +These fields use the same bit definitions as the new +H_GET_CPU_CHARACTERISTICS hypercall. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info { __u32 ap_encodings[8]; }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { + __u64 character; /* characteristics of the CPU */ + __u64 behaviour; /* recommended software behaviour */ + __u64 character_mask; /* valid bits in character */ + __u64 behaviour_mask; /* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) + /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1915e86cef6f..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@ #include #include #include +#ifdef CONFIG_PPC_PSERIES +#include +#include +#endif #include "timing.h" #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_KVM_XICS case KVM_CAP_IRQ_XICS: #endif + case KVM_CAP_PPC_GET_CPU_CHAR: r = 1; break; @@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds. The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct h_cpu_char_result c; + unsigned long rc; + + if (!machine_is(pseries)) + return -ENOTTY; + + rc = plpar_get_cpu_characteristics(&c); + if (rc == H_SUCCESS) { + cp->character = c.character; + cp->behaviour = c.behaviour; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | + KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + } + return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, + const char *state, const char *name) +{ + struct device_node *np; + bool r = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + r = of_property_read_bool(np, state); + of_node_put(np); + } + return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ + struct device_node *np, *fw_features; + int r; + + memset(cp, 0, sizeof(*cp)); + r = pseries_get_cpu_char(cp); + if (r != -ENOTTY) + return r; + + np = of_find_node_by_name(NULL, "ibm,opal"); + if (np) { + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + if (!fw_features) + return 0; + if (have_fw_feat(fw_features, "enabled", + "inst-spec-barrier-ori31,31,0")) + cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; + if (have_fw_feat(fw_features, "enabled", + "fw-bcctrl-serialized")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-ori30,30,0")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; + if (have_fw_feat(fw_features, "enabled", + "inst-l1d-flush-trig2")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; + if (have_fw_feat(fw_features, "enabled", + "fw-l1d-thread-split")) + cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-disabled")) + cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | + KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | + KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | + KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | + KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + + if (have_fw_feat(fw_features, "enabled", + "speculation-policy-favor-security")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; + if (!have_fw_feat(fw_features, "disabled", + "needs-l1d-flush-msr-pr-0-to-1")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; + if (!have_fw_feat(fw_features, "disabled", + "needs-spec-barrier-for-bound-checks")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | + KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + + of_node_put(fw_features); + } + + return 0; +} +#endif + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_GET_CPU_CHAR: { + struct kvm_ppc_cpu_char cpuchar; + + r = kvmppc_get_cpu_char(&cpuchar); + if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) + r = -EFAULT; + break; + } default: { struct kvm *kvm = filp->private_data; r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 496e59a2738b..7a99b98cf88e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 #ifdef KVM_CAP_IRQ_ROUTING @@ -1261,6 +1262,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From aa5dd6fa6f5d4bdc82a67e952bba8ad2e98d77e2 Mon Sep 17 00:00:00 2001 From: Aviad Yehezkel Date: Thu, 18 Jan 2018 15:41:51 +0200 Subject: xfrm: fix error flow in case of add state fails If add state fails in case of device offload, netdev refcount will be negative since gc task is attempting to dev_free this state. This is fixed by putting NULL in state dev field. Signed-off-by: Aviad Yehezkel Signed-off-by: Boris Pismeny Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 30e5746085b8..ac9477189d1c 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -102,6 +102,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { + xso->dev = NULL; dev_put(dev); return err; } -- cgit v1.2.3 From a912a7584ec39647fb032c1001eb69746f27b1d3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 17 Jan 2018 19:34:08 +0200 Subject: x86/platform/intel-mid: Move PCI initialization to arch_init() ACPI redefines x86_init.pci.init when enabled. Though we still need special treatment for MID platforms. Move our specific callback to x86_init.pci.arch_init() and, by calling acpi_noirq_set(), take back a control over IRQ assignment. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180117173409.88136-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/pci/intel_mid_pci.c | 1 + arch/x86/platform/intel-mid/intel-mid.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 511921045312..43867bc85368 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -300,6 +300,7 @@ int __init intel_mid_pci_init(void) pci_root_ops = intel_mid_pci_ops; pci_soc_mode = 1; /* Continue with standard init */ + acpi_noirq_set(); return 1; } diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 86676cec99a1..2c67bae6bb53 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -194,7 +194,7 @@ void __init x86_intel_mid_early_setup(void) x86_platform.calibrate_tsc = intel_mid_calibrate_tsc; x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; - x86_init.pci.init = intel_mid_pci_init; + x86_init.pci.arch_init = intel_mid_pci_init; x86_init.pci.fixup_irqs = x86_init_noop; legacy_pic = &null_legacy_pic; -- cgit v1.2.3 From a5c03c31af2291f13689d11760c0b59fb70c9a5a Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 16 Jan 2018 09:10:02 +0000 Subject: x86/efi: Clarify that reset attack mitigation needs appropriate userspace Some distributions have turned on the reset attack mitigation feature, which is designed to force the platform to clear the contents of RAM if the machine is shut down uncleanly. However, in order for the platform to be able to determine whether the shutdown was clean or not, userspace has to be configured to clear the MemoryOverwriteRequest flag on shutdown - otherwise the firmware will end up clearing RAM on every reboot, which is unnecessarily time consuming. Add some additional clarity to the kconfig text to reduce the risk of systems being configured this way. Signed-off-by: Matthew Garrett Acked-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index aab108e82f78..6047ed4e8a3d 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -159,7 +159,10 @@ config RESET_ATTACK_MITIGATION using the TCG Platform Reset Attack Mitigation specification. This protects against an attacker forcibly rebooting the system while it still contains secrets in RAM, booting another OS and extracting the - secrets. + secrets. This should only be enabled when userland is configured to + clear the MemoryOverwriteRequest flag on clean shutdown after secrets + have been evicted, since otherwise it will trigger even on clean + reboots. endmenu -- cgit v1.2.3 From e6b90db83f8b33735fe5a008ff171f69527a5305 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 17 Jan 2018 11:25:33 +0000 Subject: mtd: onenand: omap2: Remove redundant dev_err call in omap2_onenand_probe() There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Signed-off-by: Wei Yongjun Signed-off-by: Boris Brezillon --- drivers/mtd/onenand/omap2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index a4a2159bcfb7..87c34f607a75 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -532,10 +532,8 @@ static int omap2_onenand_probe(struct platform_device *pdev) c->phys_base = res->start; c->onenand.base = devm_ioremap_resource(dev, res); - if (IS_ERR(c->onenand.base)) { - dev_err(dev, "Cannot reserve memory region %pR\n", res); + if (IS_ERR(c->onenand.base)) return PTR_ERR(c->onenand.base); - } c->int_gpiod = devm_gpiod_get_optional(dev, "int", GPIOD_IN); if (IS_ERR(c->int_gpiod)) { -- cgit v1.2.3 From a76497dc49518bc162ebc8ee4f139a075b9f9ad0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 19 Jan 2018 07:55:31 +0000 Subject: mtd: nand: marvell: fix spelling mistake: "suceed"-> "succeed" Trivial fix to spelling mistakes in dev_err error message text. Signed-off-by: Colin Ian King Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/marvell_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/marvell_nand.c b/drivers/mtd/nand/marvell_nand.c index b8fec6093b75..4bd53b360277 100644 --- a/drivers/mtd/nand/marvell_nand.c +++ b/drivers/mtd/nand/marvell_nand.c @@ -517,7 +517,7 @@ static int marvell_nfc_prepare_cmd(struct nand_chip *chip) /* Poll ND_RUN and clear NDSR before issuing any command */ ret = marvell_nfc_wait_ndrun(chip); if (ret) { - dev_err(nfc->dev, "Last operation did not suceed\n"); + dev_err(nfc->dev, "Last operation did not succeed\n"); return ret; } -- cgit v1.2.3 From e06a181b5dad6b1904e09e62c924868d7cfeacb6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 19 Jan 2018 07:59:54 +0000 Subject: mtd: nand: marvell: remove redundant variable 'oob_len' Variable oob_len is assigned and never read, hence it is redundant and can be removed. Cleans up clang warnings: drivers/mtd/nand/marvell_nand.c:1356:6: warning: Value stored to 'oob_len' during its initialization is never read drivers/mtd/nand/marvell_nand.c:1369:4: warning: Value stored to 'oob_len' is never read Signed-off-by: Colin Ian King Acked-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/marvell_nand.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mtd/nand/marvell_nand.c b/drivers/mtd/nand/marvell_nand.c index 4bd53b360277..f15ab37edf4e 100644 --- a/drivers/mtd/nand/marvell_nand.c +++ b/drivers/mtd/nand/marvell_nand.c @@ -1353,7 +1353,6 @@ static int marvell_nfc_hw_ecc_bch_write_page_raw(struct mtd_info *mtd, int data_len = lt->data_bytes; int spare_len = lt->spare_bytes; int ecc_len = lt->ecc_bytes; - int oob_len = spare_len + ecc_len; int spare_offset = 0; int ecc_offset = (lt->full_chunk_cnt * lt->spare_bytes) + lt->last_spare_bytes; @@ -1366,7 +1365,6 @@ static int marvell_nfc_hw_ecc_bch_write_page_raw(struct mtd_info *mtd, data_len = lt->last_data_bytes; spare_len = lt->last_spare_bytes; ecc_len = lt->last_ecc_bytes; - oob_len = spare_len + ecc_len; } /* Point to the column of the next chunk */ -- cgit v1.2.3 From 1c9609e3a8cf5997bd35205cfda1ff2218ee793b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jan 2018 14:18:34 +0100 Subject: ALSA: hda - Reduce the suspend time consumption for ALC256 ALC256 has its own quirk to override the shutup call, and it contains the COEF update for pulling down the headset jack control. Currently, the COEF update is called after clearing the headphone pin, and this seems triggering a stall of the codec communication, and results in a long delay over a second at suspend. A quick resolution is to swap the calls: at first with the COEF update, then clear the headphone pin. Fixes: 4a219ef8f370 ("ALSA: hda/realtek - Add ALC256 HP depop function") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=198503 Reported-by: Paul Menzel Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0004e282a837..23475888192b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3154,11 +3154,13 @@ static void alc256_shutup(struct hda_codec *codec) if (hp_pin_sense) msleep(85); + /* 3k pull low control for Headset jack. */ + /* NOTE: call this before clearing the pin, otherwise codec stalls */ + alc_update_coef_idx(codec, 0x46, 0, 3 << 12); + snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0); - alc_update_coef_idx(codec, 0x46, 0, 3 << 12); /* 3k pull low control for Headset jack. */ - if (hp_pin_sense) msleep(100); -- cgit v1.2.3 From 6509614fdd2d05c6926d50901a45d5dfb852b715 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 19 Jan 2018 06:38:03 -0800 Subject: hwmon: (k10temp) Add temperature offset for Ryzen 1900X Like the other CPUs from the same series, the 1900X has a temperature offset of 27 degrees C. Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 0721e175664a..06b4e1c78bd8 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -86,6 +86,7 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen 7 1800X", 20000 }, { 0x17, "AMD Ryzen Threadripper 1950X", 27000 }, { 0x17, "AMD Ryzen Threadripper 1920X", 27000 }, + { 0x17, "AMD Ryzen Threadripper 1900X", 27000 }, { 0x17, "AMD Ryzen Threadripper 1950", 10000 }, { 0x17, "AMD Ryzen Threadripper 1920", 10000 }, { 0x17, "AMD Ryzen Threadripper 1910", 10000 }, -- cgit v1.2.3 From c13e7f313da33d1488355440f1a10feb1897480a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 19 Jan 2018 14:32:08 +0100 Subject: ARM: sunxi_defconfig: Enable CMA The DRM driver most notably, but also out of tree drivers (for now) like the VPU or GPU drivers, are quite big consumers of large, contiguous memory buffers. However, the sunxi_defconfig doesn't enable CMA in order to mitigate that, which makes them almost unusable. Enable it to make sure it somewhat works. Cc: Signed-off-by: Maxime Ripard Signed-off-by: Arnd Bergmann --- arch/arm/configs/sunxi_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 5caaf971fb50..df433abfcb02 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -10,6 +10,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y -- cgit v1.2.3 From b7563e2796f8b23c98afcfea7363194227fa089d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jan 2018 11:12:05 +0100 Subject: phy: work around 'phys' references to usb-nop-xceiv devices Stefan Wahren reports a problem with a warning fix that was merged for v4.15: we had lots of device nodes with a 'phys' property pointing to a device node that is not compliant with the binding documented in Documentation/devicetree/bindings/phy/phy-bindings.txt This generally works because USB HCD drivers that support both the generic phy subsystem and the older usb-phy subsystem ignore most errors from phy_get() and related calls and then use the usb-phy driver instead. However, it turns out that making the usb-nop-xceiv device compatible with the generic-phy binding changes the phy_get() return code from -EINVAL to -EPROBE_DEFER, and the dwc2 usb controller driver for bcm2835 now returns -EPROBE_DEFER from its probe function rather than ignoring the failure, breaking all USB support on raspberry-pi when CONFIG_GENERIC_PHY is enabled. The same code is used in the dwc3 driver and the usb_add_hcd() function, so a reasonable assumption would be that many other platforms are affected as well. I have reviewed all the related patches and concluded that "usb-nop-xceiv" is the only USB phy that is affected by the change, and since it is by far the most commonly referenced phy, all the other USB phy drivers appear to be used in ways that are are either safe in DT (they don't use the 'phys' property), or in the driver (they already ignore -EPROBE_DEFER from generic-phy when usb-phy is available). To work around the problem, this adds a special case to _of_phy_get() so we ignore any PHY node that is compatible with "usb-nop-xceiv", as we know that this can never load no matter how much we defer. In the future, we might implement a generic-phy driver for "usb-nop-xceiv" and then remove this workaround. Since we generally want older kernels to also want to work with the fixed devicetree files, it would be good to backport the patch into stable kernels as well (3.13+ are possibly affected), even though they don't contain any of the patches that may have caused regressions. Fixes: 014d6da6cb25 ARM: dts: bcm283x: Fix DTC warnings about missing phy-cells Fixes: c5bbf358b790 arm: dts: nspire: Add missing #phy-cells to usb-nop-xceiv Fixes: 44e5dced2ef6 arm: dts: marvell: Add missing #phy-cells to usb-nop-xceiv Fixes: f568f6f554b8 ARM: dts: omap: Add missing #phy-cells to usb-nop-xceiv Fixes: d745d5f277bf ARM: dts: imx51-zii-rdu1: Add missing #phy-cells to usb-nop-xceiv Fixes: 915fbe59cbf2 ARM: dts: imx: Add missing #phy-cells to usb-nop-xceiv Link: https://marc.info/?l=linux-usb&m=151518314314753&w=2 Link: https://patchwork.kernel.org/patch/10158145/ Cc: stable@vger.kernel.org Cc: Felipe Balbi Cc: Eric Anholt Tested-by: Stefan Wahren Acked-by: Rob Herring Tested-by: Hans Verkuil Acked-by: Kishon Vijay Abraham I Signed-off-by: Arnd Bergmann --- drivers/phy/phy-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index b4964b067aec..8f6e8e28996d 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -410,6 +410,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index) if (ret) return ERR_PTR(-ENODEV); + /* This phy type handled by the usb-phy subsystem for now */ + if (of_device_is_compatible(args.np, "usb-nop-xceiv")) + return ERR_PTR(-ENODEV); + mutex_lock(&phy_provider_mutex); phy_provider = of_phy_provider_lookup(args.np); if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) { -- cgit v1.2.3 From 6f41c34d69eb005e7848716bbcafc979b35037d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Jan 2018 16:28:26 +0100 Subject: x86/mce: Make machine check speculation protected The machine check idtentry uses an indirect branch directly from the low level code. This evades the speculation protection. Replace it by a direct call into C code and issue the indirect call there so the compiler can apply the proper speculation protection. Signed-off-by: Thomas Gleixner Reviewed-by:Borislav Petkov Reviewed-by: David Woodhouse Niced-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801181626290.1847@nanos --- arch/x86/entry/entry_64.S | 2 +- arch/x86/include/asm/traps.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d54a0ede61d1..63f4320602a3 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1258,7 +1258,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 #endif #ifdef CONFIG_X86_MCE -idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) +idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 31051f35cbb7..3de69330e6c5 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif +dotraplinkage void do_mce(struct pt_regs *, long); static inline int get_si_code(unsigned long condition) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3b413065c613..a9e898b71208 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1788,6 +1788,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; +dotraplinkage void do_mce(struct pt_regs *regs, long error_code) +{ + machine_check_vector(regs, error_code); +} + /* * Called for each booted CPU to set up machine checks. * Must be called with preempt off: -- cgit v1.2.3 From 736e80a4213e9bbce40a7c050337047128b472ac Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:21 +0900 Subject: retpoline: Introduce start/end markers of indirect thunk Introduce start/end markers of __x86_indirect_thunk_* functions. To make it easy, consolidate .text.__x86.indirect_thunk.* sections to one .text.__x86.indirect_thunk section and put it in the end of kernel text section and adds __indirect_thunk_start/end so that other subsystem (e.g. kprobes) can identify it. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/151629206178.10241.6828804696410044771.stgit@devbox --- arch/x86/include/asm/nospec-branch.h | 3 +++ arch/x86/kernel/vmlinux.lds.S | 6 ++++++ arch/x86/lib/retpoline.S | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7b45d8424150..19ba5ad19c65 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -194,6 +194,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +extern char __indirect_thunk_start[]; +extern char __indirect_thunk_end[]; + /* * On VMEXIT we must ensure that no RSB predictions learned in the guest * can be followed in the host, by overwriting the RSB completely. Both diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1e413a9326aa..9b138a06c1a4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -124,6 +124,12 @@ SECTIONS ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif +#ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; + *(.text.__x86.indirect_thunk) + __indirect_thunk_end = .; +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cb45c6cb465f..d3415dc30f82 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -9,7 +9,7 @@ #include .macro THUNK reg - .section .text.__x86.indirect_thunk.\reg + .section .text.__x86.indirect_thunk ENTRY(__x86_indirect_thunk_\reg) CFI_STARTPROC -- cgit v1.2.3 From c1804a236894ecc942da7dc6c5abe209e56cba93 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:14:51 +0900 Subject: kprobes/x86: Blacklist indirect thunk functions for kprobes Mark __x86_indirect_thunk_* functions as blacklist for kprobes because those functions can be called from anywhere in the kernel including blacklist functions of kprobes. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/151629209111.10241.5444852823378068683.stgit@devbox --- arch/x86/lib/retpoline.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index d3415dc30f82..dfb2ba91b670 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg) * than one per register with the correct names. So we do it * the simple and nasty way... */ -#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) #define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) GENERATE_THUNK(_ASM_AX) -- cgit v1.2.3 From c86a32c09f8ced67971a2310e3b0dda4d1749007 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 19 Jan 2018 01:15:20 +0900 Subject: kprobes/x86: Disable optimizing on the function jumps to indirect thunk Since indirect jump instructions will be replaced by jump to __x86_indirect_thunk_*, those jmp instruction must be treated as an indirect jump. Since optprobe prohibits to optimize probes in the function which uses an indirect jump, it also needs to find out the function which jump to __x86_indirect_thunk_* and disable optimization. Add a check that the jump target address is between the __indirect_thunk_start/end when optimizing kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Andi Kleen Cc: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/151629212062.10241.6991266100233002273.stgit@devbox --- arch/x86/kernel/kprobes/opt.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4f98aad38237..3668f28cf5fc 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "common.h" @@ -205,7 +206,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int insn_is_indirect_jump(struct insn *insn) +static int __insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -239,6 +240,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } +static int insn_is_indirect_jump(struct insn *insn) +{ + int ret = __insn_is_indirect_jump(insn); + +#ifdef CONFIG_RETPOLINE + /* + * Jump to x86_indirect_thunk_* is treated as an indirect jump. + * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with + * older gcc may use indirect jump. So we add this check instead of + * replace indirect-jump check. + */ + if (!ret) + ret = insn_jump_into_range(insn, + (unsigned long)__indirect_thunk_start, + (unsigned long)__indirect_thunk_end - + (unsigned long)__indirect_thunk_start); +#endif + return ret; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { -- cgit v1.2.3 From 98f0fceec7f84d80bc053e49e596088573086421 Mon Sep 17 00:00:00 2001 From: "zhenwei.pi" Date: Thu, 18 Jan 2018 09:04:52 +0800 Subject: x86/pti: Document fix wrong index In section <2. Runtime Cost>, fix wrong index. Signed-off-by: zhenwei.pi Signed-off-by: Thomas Gleixner Cc: dave.hansen@linux.intel.com Link: https://lkml.kernel.org/r/1516237492-27739-1-git-send-email-zhenwei.pi@youruncloud.com --- Documentation/x86/pti.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt index d11eff61fc9a..5cd58439ad2d 100644 --- a/Documentation/x86/pti.txt +++ b/Documentation/x86/pti.txt @@ -78,7 +78,7 @@ this protection comes at a cost: non-PTI SYSCALL entry code, so requires mapping fewer things into the userspace page tables. The downside is that stacks must be switched at entry time. - d. Global pages are disabled for all kernel structures not + c. Global pages are disabled for all kernel structures not mapped into both kernel and userspace page tables. This feature of the MMU allows different processes to share TLB entries mapping the kernel. Losing the feature means more -- cgit v1.2.3 From 3f7d875566d8e79c5e0b2c9a413e91b2c29e0854 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 17 Jan 2018 14:53:28 -0800 Subject: x86/retpoline: Optimize inline assembler for vmexit_fill_RSB The generated assembler for the C fill RSB inline asm operations has several issues: - The C code sets up the loop register, which is then immediately overwritten in __FILL_RETURN_BUFFER with the same value again. - The C code also passes in the iteration count in another register, which is not used at all. Remove these two unnecessary operations. Just rely on the single constant passed to the macro for the iterations. Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: dave.hansen@intel.com Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180117225328.15414-1-andi@firstfloor.org --- arch/x86/include/asm/nospec-branch.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ba5ad19c65..4ad41087ce0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -206,16 +206,17 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops = RSB_CLEAR_LOOPS / 2; + unsigned long loops; asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE("jmp 910f", __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), X86_FEATURE_RETPOLINE) "910:" - : "=&r" (loops), ASM_CALL_CONSTRAINT - : "r" (loops) : "memory" ); + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ -- cgit v1.2.3 From f80207727aaca3aa34a9cd80659393534de69cad Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 18 Jan 2018 16:33:50 -0800 Subject: mm/memory.c: release locked page in do_swap_page() James reported a bug in swap paging-in from his testing. It is that do_swap_page doesn't release locked page so system hang-up happens due to a deadlock on PG_locked. It was introduced by 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") because I missed swap cache hit places to update swapcache variable to work well with other logics against swapcache in do_swap_page. This patch fixes it. Debugged by James Bottomley. Link: http://lkml.kernel.org/r/<1514407817.4169.4.camel@HansenPartnership.com> Link: http://lkml.kernel.org/r/20180102235606.GA19438@bbox Signed-off-by: Minchan Kim Reported-by: James Bottomley Acked-by: Hugh Dickins Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ca5674cbaff2..793004608332 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2857,8 +2857,11 @@ int do_swap_page(struct vm_fault *vmf) int ret = 0; bool vma_readahead = swap_use_vma_readahead(); - if (vma_readahead) + if (vma_readahead) { page = swap_readahead_detect(vmf, &swap_ra); + swapcache = page; + } + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) { if (page) put_page(page); @@ -2889,9 +2892,12 @@ int do_swap_page(struct vm_fault *vmf) delayacct_set_flag(DELAYACCT_PF_SWAPIN); - if (!page) + if (!page) { page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, vmf->address); + swapcache = page; + } + if (!page) { struct swap_info_struct *si = swp_swap_info(entry); -- cgit v1.2.3 From 6bec6ad77fac3d29aed0d8e0b7526daedc964970 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 18 Jan 2018 16:33:53 -0800 Subject: mm/page_owner.c: remove drain_all_pages from init_early_allocated_pages When setting page_owner = on, the following warning can be seen in the boot log: WARNING: CPU: 0 PID: 0 at mm/page_alloc.c:2537 drain_all_pages+0x171/0x1a0 Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.15.0-rc7-next-20180109-1-default+ #7 Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS 1.11.3 11/09/2016 RIP: 0010:drain_all_pages+0x171/0x1a0 Call Trace: init_page_owner+0x4e/0x260 start_kernel+0x3e6/0x4a6 ? set_init_arg+0x55/0x55 secondary_startup_64+0xa5/0xb0 Code: c5 ed ff 89 df 48 c7 c6 20 3b 71 82 e8 f9 4b 52 00 3b 05 d7 0b f8 00 89 c3 72 d5 5b 5d 41 5 This warning is shown because we are calling drain_all_pages() in init_early_allocated_pages(), but mm_percpu_wq is not up yet, it is being set up later on in kernel_init_freeable() -> init_mm_internals(). Link: http://lkml.kernel.org/r/20180109153921.GA13070@techadventures.net Signed-off-by: Oscar Salvador Acked-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Michal Hocko Cc: Ayush Mittal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_owner.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index 8592543a0f15..270a8219ccd0 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -616,7 +616,6 @@ static void init_early_allocated_pages(void) { pg_data_t *pgdat; - drain_all_pages(NULL); for_each_online_pgdat(pgdat) init_zones_in_node(pgdat); } -- cgit v1.2.3 From be9fa663d325a102de53c9ab6d00a31dcb36bb73 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Jan 2018 16:33:57 -0800 Subject: scripts/decodecode: fix decoding for AArch64 (arm64) instructions There are a couple of problems with the decodecode script and arm64: 1. AArch64 objdump refuses to disassemble .4byte directives as instructions, insisting that they are data values and displaying them as: a94153f3 .word 0xa94153f3 <-- trapping instruction This is resolved by using the .inst directive instead. 2. Disassembly of branch instructions attempts to provide the target as an offset from a symbol, e.g.: 0: 34000082 cbz w2, 10 <.text+0x10> however this falls foul of the grep -v, which matches lines containing ".text" and ends up removing all branch instructions from the dump. This patch resolves both issues by using the .inst directive for 4-byte quantities on arm64 and stripping the resulting binaries (as is done on arm already) to remove the mapping symbols. Link: http://lkml.kernel.org/r/1506596147-23630-1-git-send-email-will.deacon@arm.com Signed-off-by: Will Deacon Reviewed-by: Dave Martin Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/decodecode | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/decodecode b/scripts/decodecode index 438120da1361..5ea071099330 100755 --- a/scripts/decodecode +++ b/scripts/decodecode @@ -59,6 +59,14 @@ disas() { ${CROSS_COMPILE}strip $1.o fi + if [ "$ARCH" = "arm64" ]; then + if [ $width -eq 4 ]; then + type=inst + fi + + ${CROSS_COMPILE}strip $1.o + fi + ${CROSS_COMPILE}objdump $OBJDUMPFLAGS -S $1.o | \ grep -v "/tmp\|Disassembly\|\.text\|^$" > $1.dis 2>&1 } -- cgit v1.2.3 From 883d50f56d263f70fd73c0d96b09eb36c34e9305 Mon Sep 17 00:00:00 2001 From: Xi Kangjie Date: Thu, 18 Jan 2018 16:34:00 -0800 Subject: scripts/gdb/linux/tasks.py: fix get_thread_info Since kernel 4.9, the thread_info has been moved into task_struct, no longer locates at the bottom of kernel stack. See commits c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") and 15f4eae70d36 ("x86: Move thread_info into task_struct"). Before fix: (gdb) set $current = $lx_current() (gdb) p $lx_thread_info($current) $1 = {flags = 1470918301} (gdb) p $current.thread_info $2 = {flags = 2147483648} After fix: (gdb) p $lx_thread_info($current) $1 = {flags = 2147483648} (gdb) p $current.thread_info $2 = {flags = 2147483648} Link: http://lkml.kernel.org/r/20180118210159.17223-1-imxikangjie@gmail.com Fixes: 15f4eae70d36 ("x86: Move thread_info into task_struct") Signed-off-by: Xi Kangjie Acked-by: Jan Kiszka Acked-by: Kieran Bingham Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py index 1bf949c43b76..f6ab3ccf698f 100644 --- a/scripts/gdb/linux/tasks.py +++ b/scripts/gdb/linux/tasks.py @@ -96,6 +96,8 @@ def get_thread_info(task): thread_info_addr = task.address + ia64_task_size thread_info = thread_info_addr.cast(thread_info_ptr_type) else: + if task.type.fields()[0].type == thread_info_type.get_type(): + return task['thread_info'] thread_info = task['stack'].cast(thread_info_ptr_type) return thread_info.dereference() -- cgit v1.2.3 From 8bb2ee192e482c5d500df9f2b1b26a560bd3026f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 18 Jan 2018 16:34:05 -0800 Subject: proc: fix coredump vs read /proc/*/stat race do_task_stat() accesses IP and SP of a task without bumping reference count of a stack (which became an entity with independent lifetime at some point). Steps to reproduce: #include #include #include #include #include #include #include #include int main(void) { setrlimit(RLIMIT_CORE, &(struct rlimit){}); while (1) { char buf[64]; char buf2[4096]; pid_t pid; int fd; pid = fork(); if (pid == 0) { *(volatile int *)0 = 0; } snprintf(buf, sizeof(buf), "/proc/%u/stat", pid); fd = open(buf, O_RDONLY); read(fd, buf2, sizeof(buf2)); close(fd); waitpid(pid, NULL, 0); } return 0; } BUG: unable to handle kernel paging request at 0000000000003fd8 IP: do_task_stat+0x8b4/0xaf0 PGD 800000003d73e067 P4D 800000003d73e067 PUD 3d558067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 1417 Comm: a.out Not tainted 4.15.0-rc8-dirty #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc27 04/01/2014 RIP: 0010:do_task_stat+0x8b4/0xaf0 Call Trace: proc_single_show+0x43/0x70 seq_read+0xe6/0x3b0 __vfs_read+0x1e/0x120 vfs_read+0x84/0x110 SyS_read+0x3d/0xa0 entry_SYSCALL_64_fastpath+0x13/0x6c RIP: 0033:0x7f4d7928cba0 RSP: 002b:00007ffddb245158 EFLAGS: 00000246 Code: 03 b7 a0 01 00 00 4c 8b 4c 24 70 4c 8b 44 24 78 4c 89 74 24 18 e9 91 f9 ff ff f6 45 4d 02 0f 84 fd f7 ff ff 48 8b 45 40 48 89 ef <48> 8b 80 d8 3f 00 00 48 89 44 24 20 e8 9b 97 eb ff 48 89 44 24 RIP: do_task_stat+0x8b4/0xaf0 RSP: ffffc90000607cc8 CR2: 0000000000003fd8 John Ogness said: for my tests I added an else case to verify that the race is hit and correctly mitigated. Link: http://lkml.kernel.org/r/20180116175054.GA11513@avx2 Signed-off-by: Alexey Dobriyan Reported-by: "Kohli, Gaurav" Tested-by: John Ogness Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 79375fc115d2..d67a72dcb92c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * safe because the task has stopped executing permanently. */ if (permitted && (task->flags & PF_DUMPCORE)) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); + if (try_get_task_stack(task)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + put_task_stack(task); + } } } -- cgit v1.2.3 From a3d6c976f71902388e444594daa902032b5a45fa Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 18 Jan 2018 16:34:08 -0800 Subject: sparse doesn't support struct randomization Without this patch, I drown in a sea of unknown attribute warnings Link: http://lkml.kernel.org/r/20180117024539.27354-1-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Kees Cook Cc: Ingo Molnar Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2272ded07496..631354acfa72 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -219,7 +219,7 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -#ifdef RANDSTRUCT_PLUGIN +#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #define __randomize_layout __attribute__((randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout)) #endif -- cgit v1.2.3 From 4b664e739f7743f91e1d12ebfb7a76307ebea702 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 18 Jan 2018 13:52:17 -0800 Subject: ia64: Rewrite atomic_add and atomic_sub Force __builtin_constant_p to evaluate whether the argument to atomic_add & atomic_sub is constant in the front-end before optimisations which can lead GCC to output a call to __bad_increment_for_ia64_fetch_and_add(). See GCC bugzilla 83653. Signed-off-by: Jakub Jelinek Signed-off-by: Matthew Wilcox Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/atomic.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 28e02c99be6d..762eeb0fcc1d 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v) \ ATOMIC_OPS(add, +) ATOMIC_OPS(sub, -) -#define atomic_add_return(i,v) \ +#ifdef __OPTIMIZE__ +#define __ia64_atomic_const(i) __builtin_constant_p(i) ? \ + ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ + (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0 + +#define atomic_add_return(i, v) \ ({ \ - int __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic_add(__ia64_aar_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) : \ + ia64_atomic_add(__i, v); \ }) -#define atomic_sub_return(i,v) \ +#define atomic_sub_return(i, v) \ ({ \ - int __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic_sub(__ia64_asr_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) : \ + ia64_atomic_sub(__i, v); \ }) +#else +#define atomic_add_return(i, v) ia64_atomic_add(i, v) +#define atomic_sub_return(i, v) ia64_atomic_sub(i, v) +#endif #define atomic_fetch_add(i,v) \ ({ \ -- cgit v1.2.3 From 8c7a8d1c4b9c30a2be3b31a2e6af1cefd45574eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 11:00:54 -0800 Subject: lib/scatterlist: Fix chaining support in sgl_alloc_order() This patch avoids that workloads with large block sizes (megabytes) can trigger the following call stack with the ib_srpt driver (that driver is the only driver that chains scatterlists allocated by sgl_alloc_order()): BUG: Bad page state in process kworker/0:1H pfn:2423a78 page:fffffb03d08e9e00 count:-3 mapcount:0 mapping: (null) index:0x0 flags: 0x57ffffc0000000() raw: 0057ffffc0000000 0000000000000000 0000000000000000 fffffffdffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: nonzero _count CPU: 0 PID: 733 Comm: kworker/0:1H Tainted: G I 4.15.0-rc7.bart+ #1 Hardware name: HP ProLiant DL380 G7, BIOS P67 08/16/2015 Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] Call Trace: dump_stack+0x5c/0x83 bad_page+0xf5/0x10f get_page_from_freelist+0xa46/0x11b0 __alloc_pages_nodemask+0x103/0x290 sgl_alloc_order+0x101/0x180 target_alloc_sgl+0x2c/0x40 [target_core_mod] srpt_alloc_rw_ctxs+0x173/0x2d0 [ib_srpt] srpt_handle_new_iu+0x61e/0x7f0 [ib_srpt] __ib_process_cq+0x55/0xa0 [ib_core] ib_cq_poll_work+0x1b/0x60 [ib_core] process_one_work+0x141/0x340 worker_thread+0x47/0x3e0 kthread+0xf5/0x130 ret_from_fork+0x1f/0x30 Fixes: e80a0af4759a ("lib/scatterlist: Introduce sgl_alloc() and sgl_free()") Reported-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Bart Van Assche Cc: Nicholas A. Bellinger Cc: Laurence Oberman Signed-off-by: Jens Axboe --- drivers/target/target_core_transport.c | 2 +- include/linux/scatterlist.h | 1 + lib/scatterlist.c | 32 +++++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index a001ba711cca..c03a78ee26cd 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2300,7 +2300,7 @@ queue_full: void target_free_sgl(struct scatterlist *sgl, int nents) { - sgl_free(sgl); + sgl_free_n_order(sgl, nents, 0); } EXPORT_SYMBOL(target_free_sgl); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index b8a7c1d1dbe3..22b2131bcdcd 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -282,6 +282,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, gfp_t gfp, unsigned int *nent_p); struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, unsigned int *nent_p); +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order); void sgl_free_order(struct scatterlist *sgl, int order); void sgl_free(struct scatterlist *sgl); #endif /* CONFIG_SGL_ALLOC */ diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 9afc9b432083..53728d391d3a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -512,7 +512,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, if (!sgl) return NULL; - sg_init_table(sgl, nent); + sg_init_table(sgl, nalloc); sg = sgl; while (length) { elem_len = min_t(u64, length, PAGE_SIZE << order); @@ -526,7 +526,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, length -= elem_len; sg = sg_next(sg); } - WARN_ON_ONCE(sg); + WARN_ONCE(length, "length = %lld\n", length); if (nent_p) *nent_p = nent; return sgl; @@ -549,22 +549,44 @@ struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, EXPORT_SYMBOL(sgl_alloc); /** - * sgl_free_order - free a scatterlist and its pages + * sgl_free_n_order - free a scatterlist and its pages * @sgl: Scatterlist with one or more elements + * @nents: Maximum number of elements to free * @order: Second argument for __free_pages() + * + * Notes: + * - If several scatterlists have been chained and each chain element is + * freed separately then it's essential to set nents correctly to avoid that a + * page would get freed twice. + * - All pages in a chained scatterlist can be freed at once by setting @nents + * to a high number. */ -void sgl_free_order(struct scatterlist *sgl, int order) +void sgl_free_n_order(struct scatterlist *sgl, int nents, int order) { struct scatterlist *sg; struct page *page; + int i; - for (sg = sgl; sg; sg = sg_next(sg)) { + for_each_sg(sgl, sg, nents, i) { + if (!sg) + break; page = sg_page(sg); if (page) __free_pages(page, order); } kfree(sgl); } +EXPORT_SYMBOL(sgl_free_n_order); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + sgl_free_n_order(sgl, INT_MAX, order); +} EXPORT_SYMBOL(sgl_free_order); /** -- cgit v1.2.3 From c77ff7fd03ddca8face268c4cf093c0edf4bcf1f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 08:58:54 -0800 Subject: blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() Most blk-mq functions have a name that follows the pattern blk_mq_${action}. However, the function name blk_mq_request_direct_issue is an exception. Hence rename this function. This patch does not change any functionality. Reviewed-by: Mike Snitzer Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-mq.c | 4 ++-- block/blk-mq.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c21a16e9fdf9..1645a1e54a37 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2520,7 +2520,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * * bypass a potential scheduler on the bottom device for * insert. */ - return blk_mq_request_direct_issue(rq); + return blk_mq_request_issue_directly(rq); } spin_lock_irqsave(q->queue_lock, flags); diff --git a/block/blk-mq.c b/block/blk-mq.c index 74a4f237ba91..0fc6c95e5a29 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1785,7 +1785,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, * RCU or SRCU read lock is needed before checking quiesced flag. * * When queue is stopped or quiesced, ignore 'bypass_insert' from - * blk_mq_request_direct_issue(), and return BLK_STS_OK to caller, + * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller, * and avoid driver to try to dispatch again. */ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) { @@ -1833,7 +1833,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, hctx_unlock(hctx, srcu_idx); } -blk_status_t blk_mq_request_direct_issue(struct request *rq) +blk_status_t blk_mq_request_issue_directly(struct request *rq) { blk_status_t ret; int srcu_idx; diff --git a/block/blk-mq.h b/block/blk-mq.h index e3ebc93646ca..88c558f71819 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -75,7 +75,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); /* Used by blk_insert_cloned_request() to issue request directly */ -blk_status_t blk_mq_request_direct_issue(struct request *rq); +blk_status_t blk_mq_request_issue_directly(struct request *rq); /* * CPU -> queue mappings -- cgit v1.2.3 From ae943d20624de0a6aac7dd0597616dce2c498029 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 08:58:55 -0800 Subject: blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays Make sure that calling blk_mq_run_hw_queue() or blk_mq_kick_requeue_list() triggers a queue run without delay even if blk_mq_delay_run_hw_queue() has been called recently and if its delay has not yet expired. Reviewed-by: Mike Snitzer Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0fc6c95e5a29..43e7449723e0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -785,7 +785,7 @@ EXPORT_SYMBOL(blk_mq_add_to_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q) { - kblockd_schedule_delayed_work(&q->requeue_work, 0); + kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0); } EXPORT_SYMBOL(blk_mq_kick_requeue_list); @@ -1401,9 +1401,8 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, put_cpu(); } - kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), - &hctx->run_work, - msecs_to_jiffies(msecs)); + kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, + msecs_to_jiffies(msecs)); } void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) -- cgit v1.2.3 From f5ced52aaa5494c1feb9f80252cb2a2cde0dace8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 19 Jan 2018 08:58:56 -0800 Subject: block: Remove kblockd_schedule_delayed_work{,_on}() The previous patch removed all users of these two functions. Hence also remove the functions themselves. Reviewed-by: Mike Snitzer Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-core.c | 14 -------------- include/linux/blkdev.h | 2 -- 2 files changed, 16 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 1645a1e54a37..cdae69be68e9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3441,20 +3441,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); -int kblockd_schedule_delayed_work(struct delayed_work *dwork, - unsigned long delay) -{ - return queue_delayed_work(kblockd_workqueue, dwork, delay); -} -EXPORT_SYMBOL(kblockd_schedule_delayed_work); - -int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, - unsigned long delay) -{ - return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); -} -EXPORT_SYMBOL(kblockd_schedule_delayed_work_on); - /** * blk_start_plug - initialize blk_plug and track it inside the task_struct * @plug: The &struct blk_plug that needs to be initialized diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 71a9371c8182..afc43fb63c16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1800,8 +1800,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); -int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); -int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP -- cgit v1.2.3 From 475a055e62a1eb92f4358ad8a9059df973c190ac Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Sat, 20 Jan 2018 07:34:25 +0800 Subject: blk-throttle: use queue_is_rq_based use queue_is_rq_based instead of open code. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e136f5ef9577..c475f0fe3530 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2489,7 +2489,7 @@ void blk_throtl_register_queue(struct request_queue *q) td->throtl_slice = DFL_THROTL_SLICE_HD; #endif - td->track_bio_latency = !q->mq_ops && !q->request_fn; + td->track_bio_latency = !queue_is_rq_based(q); if (!td->track_bio_latency) blk_stat_enable_accounting(q); } -- cgit v1.2.3 From 11f19ec025dd421c54978c69e42d86758fa310de Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 19 Jan 2018 11:06:17 +0100 Subject: x86/jailhouse: Set X86_FEATURE_TSC_KNOWN_FREQ Otherwise, Linux will not recognize precalibrated_tsc_khz and disable the tsc as clocksource. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: Jailhouse Link: https://lkml.kernel.org/r/975fbfc9-2a64-cc56-40d5-164992ec3916@siemens.com --- arch/x86/kernel/jailhouse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 7ade152133c7..2b7ebbe9043d 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -176,6 +176,7 @@ static void __init jailhouse_init_platform(void) pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport); precalibrated_tsc_khz = setup_data.tsc_khz; + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); pci_probe = 0; -- cgit v1.2.3 From 3b42349d56c96e144401d2317d8eeb9937511423 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 19 Jan 2018 11:06:30 +0100 Subject: x86/jailhouse: Respect pci=lastbus command line settings Limiting the scan width to the known last bus via the command line can accelerate the boot noteworthy. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Cc: Jailhouse Link: https://lkml.kernel.org/r/51f5fe62-ca8f-9286-5cdb-39df3fad78b4@siemens.com --- arch/x86/kernel/jailhouse.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 2b7ebbe9043d..b68fd895235a 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -119,8 +119,10 @@ static int __init jailhouse_pci_arch_init(void) /* * There are no bridges on the virtual PCI root bus under Jailhouse, * thus no other way to discover all devices than a full scan. + * Respect any overrides via the command line, though. */ - pcibios_last_bus = 0xff; + if (pcibios_last_bus < 0) + pcibios_last_bus = 0xff; return 0; } -- cgit v1.2.3 From c495a9275eeca0bbc9358de7200e58184e864aeb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 19 Jan 2018 18:39:01 +0100 Subject: mtd: nand: marvell: Fix missing memory allocation modifier The function marvell_nfc_init_dma() allocates a DMA buffer without the GFP_KERNEL modifier, that triggers this warning: "marvell_nfc_init_dma() error: no modifiers for allocation." Fix this by using (GFP_KERNEL | GFP_DMA) instead of only GFP_DMA as the probe happens in non-interrupt context. Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver") Reported-by: Dan Carpenter Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/marvell_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/marvell_nand.c b/drivers/mtd/nand/marvell_nand.c index f15ab37edf4e..2196f2a233d6 100644 --- a/drivers/mtd/nand/marvell_nand.c +++ b/drivers/mtd/nand/marvell_nand.c @@ -2649,7 +2649,7 @@ static int marvell_nfc_init_dma(struct marvell_nfc *nfc) * for DMA transfers and then copy the desired amount of data to * the provided buffer. */ - nfc->dma_buf = kmalloc(MAX_CHUNK_SIZE, GFP_DMA); + nfc->dma_buf = kmalloc(MAX_CHUNK_SIZE, GFP_KERNEL | GFP_DMA); if (!nfc->dma_buf) return -ENOMEM; -- cgit v1.2.3 From c1a72e2dbb4abb90bd408480d7c48ba40cb799ce Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 19 Jan 2018 19:11:27 +0100 Subject: mtd: nand: Fix build issues due to an anonymous union GCC-4.4.4 raises errors when assigning a parameter in an anonymous union, leading to this kind of failure: drivers/mtd/nand/marvell_nand.c:1936: warning: missing braces around initializer warning: (near initialization for '(anonymous)[1].') error: unknown field 'data' specified in initializer error: unknown field 'addr' specified in initializer Work around the situation by naming these unions. Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation") Reported-by: Andrew Morton Signed-off-by: Miquel Raynal Tested-by: Andrew Morton Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 13 +++++++------ include/linux/mtd/rawnand.h | 8 ++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3ff77bef9739..66b67014508f 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2335,23 +2335,24 @@ nand_op_parser_must_split_instr(const struct nand_op_parser_pattern_elem *pat, { switch (pat->type) { case NAND_OP_ADDR_INSTR: - if (!pat->addr.maxcycles) + if (!pat->ctx.addr.maxcycles) break; if (instr->ctx.addr.naddrs - *start_offset > - pat->addr.maxcycles) { - *start_offset += pat->addr.maxcycles; + pat->ctx.addr.maxcycles) { + *start_offset += pat->ctx.addr.maxcycles; return true; } break; case NAND_OP_DATA_IN_INSTR: case NAND_OP_DATA_OUT_INSTR: - if (!pat->data.maxlen) + if (!pat->ctx.data.maxlen) break; - if (instr->ctx.data.len - *start_offset > pat->data.maxlen) { - *start_offset += pat->data.maxlen; + if (instr->ctx.data.len - *start_offset > + pat->ctx.data.maxlen) { + *start_offset += pat->ctx.data.maxlen; return true; } break; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 469dc724f5df..56c5570aadbe 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -962,7 +962,7 @@ struct nand_op_parser_pattern_elem { union { struct nand_op_parser_addr_constraints addr; struct nand_op_parser_data_constraints data; - }; + } ctx; }; #define NAND_OP_PARSER_PAT_CMD_ELEM(_opt) \ @@ -975,21 +975,21 @@ struct nand_op_parser_pattern_elem { { \ .type = NAND_OP_ADDR_INSTR, \ .optional = _opt, \ - .addr.maxcycles = _maxcycles, \ + .ctx.addr.maxcycles = _maxcycles, \ } #define NAND_OP_PARSER_PAT_DATA_IN_ELEM(_opt, _maxlen) \ { \ .type = NAND_OP_DATA_IN_INSTR, \ .optional = _opt, \ - .data.maxlen = _maxlen, \ + .ctx.data.maxlen = _maxlen, \ } #define NAND_OP_PARSER_PAT_DATA_OUT_ELEM(_opt, _maxlen) \ { \ .type = NAND_OP_DATA_OUT_INSTR, \ .optional = _opt, \ - .data.maxlen = _maxlen, \ + .ctx.data.maxlen = _maxlen, \ } #define NAND_OP_PARSER_PAT_WAITRDY_ELEM(_opt) \ -- cgit v1.2.3 From 35b3fde6203b932b2b1a5b53b3d8808abc9c4f60 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 17 Jan 2018 14:44:34 +0100 Subject: KVM: s390: wire up bpb feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new firmware interfaces for branch prediction behaviour changes are transparently available for the guest. Nevertheless, there is new state attached that should be migrated and properly resetted. Provide a mechanism for handling reset, migration and VSIE. Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck [Changed capability number to 152. - Radim] Signed-off-by: Radim Krčmář --- arch/s390/include/asm/kvm_host.h | 3 ++- arch/s390/include/uapi/asm/kvm.h | 5 ++++- arch/s390/kvm/kvm-s390.c | 12 ++++++++++++ arch/s390/kvm/vsie.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e14f381757f6..c1b0a9ac1dc8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -207,7 +207,8 @@ struct kvm_s390_sie_block { __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ - __u8 reserved60; /* 0x0060 */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ #define ECB_GS 0x40 #define ECB_TE 0x10 #define ECB_SRSI 0x04 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 38535a57fef8..4cdaa55fabfe 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -224,6 +224,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) +#define KVM_SYNC_BPBC (1UL << 10) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -247,7 +248,9 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding1[52]; /* riccb needs to be 64byte aligned */ + __u8 bpbc : 1; /* bp mode */ + __u8 reserved2 : 7; + __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ union { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2c93cbbcd15e..2598cf243b86 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_GS: r = test_facility(133); break; + case KVM_CAP_S390_BPB: + r = test_facility(82); + break; default: r = 0; } @@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 82)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133)) vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With @@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) @@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; vcpu->arch.gs_enabled = 1; } + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) && + test_kvm_facility(vcpu->kvm, 82)) { + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5d6ae0326d9e..751348348477 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy(scb_o->gcr, scb_s->gcr, 128); scb_o->pp = scb_s->pp; + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) { + scb_o->fpf &= ~FPF_BPBC; + scb_o->fpf |= scb_s->fpf & FPF_BPBC; + } + /* interrupt intercept */ switch (scb_s->icptcode) { case ICPT_PROGI: @@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecb3 = 0; scb_s->ecd = 0; scb_s->fac = 0; + scb_s->fpf = 0; rc = prepare_cpuflags(vcpu, vsie_page); if (rc) @@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix_unmapped(vsie_page); scb_s->ecb |= scb_o->ecb & ECB_TE; } + /* branch prediction */ + if (test_kvm_facility(vcpu->kvm, 82)) + scb_s->fpf |= scb_o->fpf & FPF_BPBC; /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { scb_s->eca |= scb_o->eca & ECA_VX; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7a99b98cf88e..8fb90a0819c3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -933,6 +933,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 #define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 86be89939d11a84800f66e2a283b915b704bf33d Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 16 Jan 2018 11:52:59 +0000 Subject: alpha/PCI: Fix noname IRQ level detection The conversion of the alpha architecture PCI host bridge legacy IRQ mapping/swizzling to the new PCI host bridge map/swizzle hooks carried out through: commit 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") implies that IRQ for devices are now allocated through pci_assign_irq() function in pci_device_probe() that is called when a driver matching a device is found in order to probe the device through the device driver. Alpha noname platforms required IRQ level programming to be executed in sio_fixup_irq_levels(), that is called in noname_init_pci(), a platform hook called within a subsys_initcall. In noname_init_pci(), present IRQs are detected through sio_collect_irq_levels() that check the struct pci_dev->irq number to detect if an IRQ has been allocated for the device. By the time sio_collect_irq_levels() is called, some devices may still have not a matching driver loaded to match them (eg loadable module) therefore their IRQ allocation is still pending - which means that sio_collect_irq_levels() does not programme the correct IRQ level for those devices, causing their IRQ handling to be broken when the device driver is actually loaded and the device is probed. Fix the issue by adding code in the noname map_irq() function (noname_map_irq()) that, whilst mapping/swizzling the IRQ line, it also ensures that the correct IRQ level programming is executed at platform level, fixing the issue. Fixes: 0e4c2eeb758a ("alpha/PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Reported-by: Mikulas Patocka Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org # 4.14 Cc: Bjorn Helgaas Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Mikulas Patocka Cc: Meelis Roos Signed-off-by: Matt Turner --- arch/alpha/kernel/sys_sio.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index 37bd6d9b8eb9..a6bdc1da47ad 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -102,6 +102,15 @@ sio_pci_route(void) alpha_mv.sys.sio.route_tab); } +static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev) +{ + if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && + (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + return false; + + return true; +} + static unsigned int __init sio_collect_irq_levels(void) { @@ -110,8 +119,7 @@ sio_collect_irq_levels(void) /* Iterate through the devices, collecting IRQ levels. */ for_each_pci_dev(dev) { - if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) && - (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA)) + if (!sio_pci_dev_irq_needs_level(dev)) continue; if (dev->irq) @@ -120,8 +128,7 @@ sio_collect_irq_levels(void) return level_bits; } -static void __init -sio_fixup_irq_levels(unsigned int level_bits) +static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset) { unsigned int old_level_bits; @@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits) */ old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8); - level_bits |= (old_level_bits & 0x71ff); + if (reset) + old_level_bits &= 0x71ff; + + level_bits |= old_level_bits; outb((level_bits >> 0) & 0xff, 0x4d0); outb((level_bits >> 8) & 0xff, 0x4d1); } +static inline void +sio_fixup_irq_levels(unsigned int level_bits) +{ + __sio_fixup_irq_levels(level_bits, true); +} + static inline int noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { @@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5; int irq = COMMON_TABLE_LOOKUP, tmp; tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq); - return irq >= 0 ? tmp : -1; + + irq = irq >= 0 ? tmp : -1; + + /* Fixup IRQ level if an actual IRQ mapping is detected */ + if (sio_pci_dev_irq_needs_level(dev) && irq >= 0) + __sio_fixup_irq_levels(1 << irq, false); + + return irq; } static inline int -- cgit v1.2.3 From 91cfc88c66bf8ab95937606569670cf67fa73e09 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Sat, 20 Jan 2018 17:14:02 -0800 Subject: x86: Use __nostackprotect for sme_encrypt_kernel Commit bacf6b499e11 ("x86/mm: Use a struct to reduce parameters for SME PGD mapping") moved some parameters into a structure. The structure was large enough to trigger the stack protection canary in sme_encrypt_kernel which doesn't work this early, causing reboots. Mark sme_encrypt_kernel appropriately to not use the canary. Fixes: bacf6b499e11 ("x86/mm: Use a struct to reduce parameters for SME PGD mapping") Signed-off-by: Laura Abbott Cc: Tom Lendacky Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/mm/mem_encrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 3ef362f598e3..e1d61e8500f9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -738,7 +738,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return total; } -void __init sme_encrypt_kernel(struct boot_params *bp) +void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; -- cgit v1.2.3 From 0c5b9b5d9adbad4b60491f9ba0d2af38904bb4b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 21 Jan 2018 13:51:26 -0800 Subject: Linux 4.15-rc9 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bf5b8cbb9469..339397b838d3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = -rc9 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From e58edaa4863583b54409444f11b4f80dff0af1cd Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Sun, 21 Jan 2018 05:30:42 +0200 Subject: net/mlx5e: Fix fixpoint divide exception in mlx5e_am_stats_compare Helmut reported a bug about division by zero while running traffic and doing physical cable pull test. When the cable unplugged the ppms become zero, so when dividing the current ppms by the previous ppms in the next dim iteration there is division by zero. This patch prevent this division for both ppms and epms. Fixes: c3164d2fc48f ("net/mlx5e: Added BW check for DIM decision mechanism") Reported-by: Helmut Grauer Signed-off-by: Talat Batheesh Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index e401d9d245f3..b69a705fd787 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -201,9 +201,15 @@ static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr, return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER : MLX5E_AM_STATS_WORSE; + if (!prev->ppms) + return curr->ppms ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_SAME; + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER : MLX5E_AM_STATS_WORSE; + if (!prev->epms) + return MLX5E_AM_STATS_SAME; if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER : -- cgit v1.2.3 From 0d665e7b109d512b7cae3ccef6e8654714887844 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 19 Jan 2018 15:49:24 +0300 Subject: mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte() Tetsuo reported random crashes under memory pressure on 32-bit x86 system and tracked down to change that introduced page_vma_mapped_walk(). The root cause of the issue is the faulty pointer math in check_pte(). As ->pte may point to an arbitrary page we have to check that they are belong to the section before doing math. Otherwise it may lead to weird results. It wasn't noticed until now as mem_map[] is virtually contiguous on flatmem or vmemmap sparsemem. Pointer arithmetic just works against all 'struct page' pointers. But with classic sparsemem, it doesn't because each section memap is allocated separately and so consecutive pfns crossing two sections might have struct pages at completely unrelated addresses. Let's restructure code a bit and replace pointer arithmetic with operations on pfns. Signed-off-by: Kirill A. Shutemov Reported-and-tested-by: Tetsuo Handa Acked-by: Michal Hocko Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 21 +++++++++++++++++ mm/page_vma_mapped.c | 63 +++++++++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 9c5a2628d6ce..1d3877c39a00 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return pfn_to_page(swp_offset(entry)); @@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return false; } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return NULL; @@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } + +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { return NULL; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index d22b84310f6d..956015614395 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -30,10 +30,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) return true; } +/** + * check_pte - check if @pvmw->page is mapped at the @pvmw->pte + * + * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* + * mapped. check_pte() has to validate this. + * + * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary + * page. + * + * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration + * entry that points to @pvmw->page or any subpage in case of THP. + * + * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to + * @pvmw->page or any subpage in case of THP. + * + * Otherwise, return false. + * + */ static bool check_pte(struct page_vma_mapped_walk *pvmw) { + unsigned long pfn; + if (pvmw->flags & PVMW_MIGRATION) { -#ifdef CONFIG_MIGRATION swp_entry_t entry; if (!is_swap_pte(*pvmw->pte)) return false; @@ -41,37 +60,31 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) if (!is_migration_entry(entry)) return false; - if (migration_entry_to_page(entry) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (migration_entry_to_page(entry) < pvmw->page) - return false; -#else - WARN_ON_ONCE(1); -#endif - } else { - if (is_swap_pte(*pvmw->pte)) { - swp_entry_t entry; - entry = pte_to_swp_entry(*pvmw->pte); - if (is_device_private_entry(entry) && - device_private_entry_to_page(entry) == pvmw->page) - return true; - } + pfn = migration_entry_to_pfn(entry); + } else if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; - if (!pte_present(*pvmw->pte)) + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) return false; - /* THP can be referenced by any subpage */ - if (pte_page(*pvmw->pte) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (pte_page(*pvmw->pte) < pvmw->page) + pfn = device_private_entry_to_pfn(entry); + } else { + if (!pte_present(*pvmw->pte)) return false; + + pfn = pte_pfn(*pvmw->pte); } + if (pfn < page_to_pfn(pvmw->page)) + return false; + + /* THP can be referenced by any subpage */ + if (pfn - page_to_pfn(pvmw->page) >= hpage_nr_pages(pvmw->page)) + return false; + return true; } -- cgit v1.2.3 From 4a09d0b86bad0999a2bb0e2ee126a3c5246d1f51 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jan 2018 15:54:44 +0100 Subject: mmc: tmio: hide unused tmio_mmc_clk_disable/tmio_mmc_clk_enable functions When CONFIG_PM is disabled, we get a warning about the clock handling being unused: drivers/mmc/host/tmio_mmc_core.c:937:13: error: 'tmio_mmc_clk_disable' defined but not used [-Werror=unused-function] static void tmio_mmc_clk_disable(struct tmio_mmc_host *host) ^~~~~~~~~~~~~~~~~~~~ drivers/mmc/host/tmio_mmc_core.c:929:12: error: 'tmio_mmc_clk_enable' defined but not used [-Werror=unused-function] static int tmio_mmc_clk_enable(struct tmio_mmc_host *host) ^~~~~~~~~~~~~~~~~~~ As the clock handling is now done elsewhere, this is only used when power management is enabled. We could make the functions as __maybe_unused, but since there is already an #ifdef section, it seems easier to move the helpers closer to their callers. Fixes: b21fc294387e ("mmc: tmio: move clk_enable/disable out of tmio_mmc_host_probe()") Signed-off-by: Arnd Bergmann Reviewed-by: Masahiro Yamada Signed-off-by: Ulf Hansson --- drivers/mmc/host/tmio_mmc_core.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 6d8719be75a8..33494241245a 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -926,20 +926,6 @@ static void tmio_mmc_done_work(struct work_struct *work) tmio_mmc_finish_request(host); } -static int tmio_mmc_clk_enable(struct tmio_mmc_host *host) -{ - if (!host->clk_enable) - return -ENOTSUPP; - - return host->clk_enable(host); -} - -static void tmio_mmc_clk_disable(struct tmio_mmc_host *host) -{ - if (host->clk_disable) - host->clk_disable(host); -} - static void tmio_mmc_power_on(struct tmio_mmc_host *host, unsigned short vdd) { struct mmc_host *mmc = host->mmc; @@ -1337,6 +1323,20 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) EXPORT_SYMBOL_GPL(tmio_mmc_host_remove); #ifdef CONFIG_PM +static int tmio_mmc_clk_enable(struct tmio_mmc_host *host) +{ + if (!host->clk_enable) + return -ENOTSUPP; + + return host->clk_enable(host); +} + +static void tmio_mmc_clk_disable(struct tmio_mmc_host *host) +{ + if (host->clk_disable) + host->clk_disable(host); +} + int tmio_mmc_host_runtime_suspend(struct device *dev) { struct tmio_mmc_host *host = dev_get_drvdata(dev); -- cgit v1.2.3 From 6478f4e12b7663cf5ab5303c06f99e9ec8c2b859 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 21 Jan 2018 14:09:36 -0600 Subject: mmc: davinci: dont' use module_platform_driver_probe() This changes module_platform_driver_probe() to module_platform_driver() in the TI DaVinci MMC driver. On device tree systems, we can get a -EPROBE_DEFER when using a pinmux for the CD GPIO, which results in the driver never loading because module_platform_driver_probe() prevents it from being re-probed. So, we replace module_platform_driver_probe() with module_platform_driver() and removed the __init attributes accordingly. Signed-off-by: David Lechner Signed-off-by: Ulf Hansson --- drivers/mmc/host/davinci_mmc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 351330dfb954..c5309ccf502e 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -174,7 +174,7 @@ module_param(poll_loopcount, uint, S_IRUGO); MODULE_PARM_DESC(poll_loopcount, "Maximum polling loop count. Default = 32"); -static unsigned __initdata use_dma = 1; +static unsigned use_dma = 1; module_param(use_dma, uint, 0); MODULE_PARM_DESC(use_dma, "Whether to use DMA or not. Default = 1"); @@ -496,8 +496,7 @@ static int mmc_davinci_start_dma_transfer(struct mmc_davinci_host *host, return ret; } -static void __init_or_module -davinci_release_dma_channels(struct mmc_davinci_host *host) +static void davinci_release_dma_channels(struct mmc_davinci_host *host) { if (!host->use_dma) return; @@ -506,7 +505,7 @@ davinci_release_dma_channels(struct mmc_davinci_host *host) dma_release_channel(host->dma_rx); } -static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host) +static int davinci_acquire_dma_channels(struct mmc_davinci_host *host) { host->dma_tx = dma_request_chan(mmc_dev(host->mmc), "tx"); if (IS_ERR(host->dma_tx)) { @@ -1201,7 +1200,7 @@ static int mmc_davinci_parse_pdata(struct mmc_host *mmc) return 0; } -static int __init davinci_mmcsd_probe(struct platform_device *pdev) +static int davinci_mmcsd_probe(struct platform_device *pdev) { const struct of_device_id *match; struct mmc_davinci_host *host = NULL; @@ -1414,11 +1413,12 @@ static struct platform_driver davinci_mmcsd_driver = { .pm = davinci_mmcsd_pm_ops, .of_match_table = davinci_mmc_dt_ids, }, + .probe = davinci_mmcsd_probe, .remove = __exit_p(davinci_mmcsd_remove), .id_table = davinci_mmc_devtype, }; -module_platform_driver_probe(davinci_mmcsd_driver, davinci_mmcsd_probe); +module_platform_driver(davinci_mmcsd_driver); MODULE_AUTHOR("Texas Instruments India"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From f9de65fc61b51398cafb2e4db5cc787704fc49a1 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 21 Jan 2018 14:28:13 -0600 Subject: mmc: davinci: suppress error message on EPROBE_DEFER This suppresses printing an error message during probe of the TI DaVinci MMC driver when the error is EPROBE_DEFER. Signed-off-by: David Lechner Signed-off-by: Ulf Hansson --- drivers/mmc/host/davinci_mmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index c5309ccf502e..8e363174f9d6 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1253,8 +1253,9 @@ static int davinci_mmcsd_probe(struct platform_device *pdev) pdev->id_entry = match->data; ret = mmc_of_parse(mmc); if (ret) { - dev_err(&pdev->dev, - "could not parse of data: %d\n", ret); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "could not parse of data: %d\n", ret); goto parse_fail; } } else { -- cgit v1.2.3 From 1805f2ca3f27f25c8e637edc4b40bab237f9d07c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 20 Oct 2017 17:53:41 -0600 Subject: Btrfs: remove redundant btrfs_balance_delayed_items In functions like btrfs_create(), we run both btrfs_balance_delayed_items() and btrfs_btree_balance_dirty() after the operation, but btrfs_btree_balance_dirty() is surely going to run btrfs_balance_delayed_items(). This keeps only btrfs_btree_balance_dirty(). Signed-off-by: Liu Bo Reviewed-by: Lu Fengqi Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/inode.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e1a7f3cb5be9..2bab93f20328 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6560,7 +6560,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans); - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); if (drop_inode) { inode_dec_link_count(inode); @@ -6641,7 +6640,6 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return err; @@ -6716,7 +6714,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); } - btrfs_balance_delayed_items(fs_info); fail: if (trans) btrfs_end_transaction(trans); @@ -6794,7 +6791,6 @@ out_fail: inode_dec_link_count(inode); iput(inode); } - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return err; @@ -10688,7 +10684,6 @@ out: btrfs_end_transaction(trans); if (ret) iput(inode); - btrfs_balance_delayed_items(fs_info); btrfs_btree_balance_dirty(fs_info); return ret; -- cgit v1.2.3 From 9f5316c17b0833c606b94d91beb6518c2e8a8012 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 23 Oct 2017 23:02:54 -0600 Subject: Btrfs: free btrfs_device in place It's pointless to defer it to a kthread helper as we're not under a special context. For reference, commit 1f78160ce1b1 ("Btrfs: using rcu lock in the reader side of devices list") introduced RCU freeing for device structures. Originally the blkdev_put was called from free_device and rcu_barrier had to be called. This is no longer required, bdev and our device structures are now freed separately. Signed-off-by: Liu Bo Reviewed-by: Anand Jain Reviewed-by: David Sterba [ enhance changelog ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 14 ++------------ fs/btrfs/volumes.h | 1 - 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a25684287501..f3c4ad2ddfe6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -824,26 +824,16 @@ again: mutex_unlock(&uuid_mutex); } -static void __free_device(struct work_struct *work) +static void free_device(struct rcu_head *head) { struct btrfs_device *device; - device = container_of(work, struct btrfs_device, rcu_work); + device = container_of(head, struct btrfs_device, rcu); rcu_string_free(device->name); bio_put(device->flush_bio); kfree(device); } -static void free_device(struct rcu_head *head) -{ - struct btrfs_device *device; - - device = container_of(head, struct btrfs_device, rcu); - - INIT_WORK(&device->rcu_work, __free_device); - schedule_work(&device->rcu_work); -} - static void btrfs_close_bdev(struct btrfs_device *device) { if (device->bdev && device->writeable) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ff15208344a7..bf0d53a96b4a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,7 +133,6 @@ struct btrfs_device { struct btrfs_work work; struct rcu_head rcu; - struct work_struct rcu_work; /* readahead state */ spinlock_t reada_lock; -- cgit v1.2.3 From 1cb34c8ecd4623909c66bccb91ee7f6483e04441 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 21 Oct 2017 01:45:33 +0800 Subject: btrfs: clean up btrfs_dev_stat_inc usage btrfs_end_bio() is using btrfs_dev_stat_inc() and then btrfs_dev_stat_print_on_error() separately instead use btrfs_dev_stat_inc_and_print() directly. As of now there isn't any bio in btrfs which is - a non-empty write and also the REQ_PREFLUSH flag is set. So in actual the condition if (bio->bi_opf & REQ_PREFLUSH) is never true in btrfs_end_bio(), and so there won't be any redundant error log by using btrfs_dev_stat_inc_and_print() separately one for write and another for flush. This consolidation will help to add the device critical error handles in the function btrfs_dev_stat_inc_and_print() and which can be renamed as needed. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f3c4ad2ddfe6..baf047b8cf99 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5993,15 +5993,14 @@ static void btrfs_end_bio(struct bio *bio) dev = bbio->stripes[stripe_index].dev; if (dev->bdev) { if (bio_op(bio) == REQ_OP_WRITE) - btrfs_dev_stat_inc(dev, + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); else - btrfs_dev_stat_inc(dev, + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); if (bio->bi_opf & REQ_PREFLUSH) - btrfs_dev_stat_inc(dev, + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); - btrfs_dev_stat_print_on_error(dev); } } } -- cgit v1.2.3 From 9deae9689231964972a94bb56a79b669f9d47ac1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 24 Oct 2017 13:47:37 +0300 Subject: btrfs: Fix memory barriers usage with device stats counters Commit addc3fa74e5b ("Btrfs: Fix the problem that the dirty flag of dev stats is cleared") reworked the way device stats changes are tracked. A new atomic dev_stats_ccnt counter was introduced which is incremented every time any of the device stats counters are changed. This serves as a flag whether there are any pending stats changes. However, this patch only partially implemented the correct memory barriers necessary: - It only ordered the stores to the counters but not the reads e.g. btrfs_run_dev_stats - It completely omitted any comments documenting the intended design and how the memory barriers pair with each-other This patch provides the necessary comments as well as adds a missing smp_rmb in btrfs_run_dev_stats. Furthermore since dev_stats_cnt is only a snapshot at best there was no point in reading the counter twice - once in btrfs_dev_stats_dirty and then again when assigning stats_cnt. Just collapse both reads into 1. Fixes: addc3fa74e5b ("Btrfs: Fix the problem that the dirty flag of dev stats is cleared") Signed-off-by: Nikolay Borisov Reviewed-by: Mathieu Desnoyers Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 18 ++++++++++++++++-- fs/btrfs/volumes.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index baf047b8cf99..c96635a26803 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7080,10 +7080,24 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry(device, &fs_devices->devices, dev_list) { - if (!device->dev_stats_valid || !btrfs_dev_stats_dirty(device)) + stats_cnt = atomic_read(&device->dev_stats_ccnt); + if (!device->dev_stats_valid || stats_cnt == 0) continue; - stats_cnt = atomic_read(&device->dev_stats_ccnt); + + /* + * There is a LOAD-LOAD control dependency between the value of + * dev_stats_ccnt and updating the on-disk values which requires + * reading the in-memory counters. Such control dependencies + * require explicit read memory barriers. + * + * This memory barriers pairs with smp_mb__before_atomic in + * btrfs_dev_stat_inc/btrfs_dev_stat_set and with the full + * barrier implied by atomic_xchg in + * btrfs_dev_stats_read_and_reset + */ + smp_rmb(); + ret = update_dev_stat_item(trans, fs_info, device); if (!ret) atomic_sub(stats_cnt, &device->dev_stats_ccnt); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bf0d53a96b4a..38fd368ddfe7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -497,6 +497,12 @@ static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, int index) { atomic_inc(dev->dev_stat_values + index); + /* + * This memory barrier orders stores updating statistics before stores + * updating dev_stats_ccnt. + * + * It pairs with smp_rmb() in btrfs_run_dev_stats(). + */ smp_mb__before_atomic(); atomic_inc(&dev->dev_stats_ccnt); } @@ -522,6 +528,12 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev, int index, unsigned long val) { atomic_set(dev->dev_stat_values + index, val); + /* + * This memory barrier orders stores updating statistics before stores + * updating dev_stats_ccnt. + * + * It pairs with smp_rmb() in btrfs_run_dev_stats(). + */ smp_mb__before_atomic(); atomic_inc(&dev->dev_stats_ccnt); } -- cgit v1.2.3 From 4660c49f9b4ad50eab410de09eeacc2e3aebf1f5 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 20 Oct 2017 18:10:58 +0300 Subject: btrfs: Remove redundant memory barrier in dev stats As per atomic_t.txt documentation : - RMW operations that have a return value are fully ordered; atomic_xchg is one such operation so it already includes everything it needs w.r.t memory ordering and add a comment to be more explicit about that. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 38fd368ddfe7..177814a7c758 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -519,7 +519,13 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, int ret; ret = atomic_xchg(dev->dev_stat_values + index, 0); - smp_mb__before_atomic(); + /* + * atomic_xchg implies a full memory barriers as per atomic_t.txt: + * - RMW operations that have a return value are fully ordered; + * + * This implicit memory barriers is paired with the smp_rmb in + * btrfs_run_dev_stats + */ atomic_inc(&dev->dev_stats_ccnt); return ret; } -- cgit v1.2.3 From ac244ef1da76383842a9a4c3404e35d0669f2e53 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 20 Oct 2017 18:10:59 +0300 Subject: btrfs: Remove unused function It's sole callsite was removed in a previous patch so just nuke it for good. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/volumes.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 177814a7c758..294c4eb6a272 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -488,11 +488,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, int btrfs_remove_chunk(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 chunk_offset); -static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev) -{ - return atomic_read(&dev->dev_stats_ccnt); -} - static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, int index) { -- cgit v1.2.3 From d3fac6ba7dccc54f52087bb9d7b9be27aa6fa622 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 24 Oct 2017 11:50:39 +0300 Subject: btrfs: Remove redundant mirror_num arg The following callpath is always invoked with mirror_num set to 0, so let's remove it as an argument and directly pass 0 to __do_redpage. No functional change. extent_readpages __extent_readpages __do_contiguous_readpages __do_readpage Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 012d63870b99..c6e82a3e706d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3094,7 +3094,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, u64 start, u64 end, get_extent_t *get_extent, struct extent_map **em_cached, - struct bio **bio, int mirror_num, + struct bio **bio, unsigned long *bio_flags, u64 *prev_em_start) { @@ -3116,7 +3116,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, 0, prev_em_start); + 0, bio_flags, 0, prev_em_start); put_page(pages[index]); } } @@ -3125,8 +3125,7 @@ static void __extent_readpages(struct extent_io_tree *tree, struct page *pages[], int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, - struct bio **bio, int mirror_num, - unsigned long *bio_flags, + struct bio **bio, unsigned long *bio_flags, u64 *prev_em_start) { u64 start = 0; @@ -3147,7 +3146,7 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, - bio, mirror_num, bio_flags, + bio, bio_flags, prev_em_start); start = page_start; end = start + PAGE_SIZE - 1; @@ -3159,8 +3158,7 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, - prev_em_start); + bio_flags, prev_em_start); } static int __extent_read_full_page(struct extent_io_tree *tree, @@ -4171,12 +4169,12 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, &prev_em_start); + &bio, &bio_flags, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, &prev_em_start); + &bio, &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); -- cgit v1.2.3 From 617c54a88eaba611bfaa917918c773a91c53692a Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 23 Oct 2017 13:51:48 +0300 Subject: btrfs: Make btrfs_async_run_delayed_root use a loop rather than multiple labels Currently btrfs_async_run_delayed_root's implementation uses 3 goto labels to mimic the functionality of a simple do {} while loop. Refactor the function to use a do {} while construct, making intention clear and code easier to follow. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 52 +++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 056276101c63..cf0a6a25156c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1302,40 +1302,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work) if (!path) goto out; -again: - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) - goto free_path; + do { + if (atomic_read(&delayed_root->items) < + BTRFS_DELAYED_BACKGROUND / 2) + break; - delayed_node = btrfs_first_prepared_delayed_node(delayed_root); - if (!delayed_node) - goto free_path; + delayed_node = btrfs_first_prepared_delayed_node(delayed_root); + if (!delayed_node) + break; - path->leave_spinning = 1; - root = delayed_node->root; + path->leave_spinning = 1; + root = delayed_node->root; - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) - goto release_path; + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + btrfs_release_path(path); + btrfs_release_prepared_delayed_node(delayed_node); + total_done++; + continue; + } - block_rsv = trans->block_rsv; - trans->block_rsv = &root->fs_info->delayed_block_rsv; + block_rsv = trans->block_rsv; + trans->block_rsv = &root->fs_info->delayed_block_rsv; - __btrfs_commit_inode_delayed_items(trans, path, delayed_node); + __btrfs_commit_inode_delayed_items(trans, path, delayed_node); - trans->block_rsv = block_rsv; - btrfs_end_transaction(trans); - btrfs_btree_balance_dirty_nodelay(root->fs_info); + trans->block_rsv = block_rsv; + btrfs_end_transaction(trans); + btrfs_btree_balance_dirty_nodelay(root->fs_info); -release_path: - btrfs_release_path(path); - total_done++; + btrfs_release_path(path); + btrfs_release_prepared_delayed_node(delayed_node); + total_done++; - btrfs_release_prepared_delayed_node(delayed_node); - if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) || - total_done < async_work->nr) - goto again; + } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) + || total_done < async_work->nr); -free_path: btrfs_free_path(path); out: wake_up(&delayed_root->wait); -- cgit v1.2.3 From 8577787fac828a9873064ccec986f4be6156615d Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 23 Oct 2017 13:51:49 +0300 Subject: btrfs: Move checks from btrfs_wq_run_delayed_node to btrfs_balance_delayed_items btrfs_balance_delayed_items is the sole caller of btrfs_wq_run_delayed_node and already includes one of the checks whether the delayed inodes should be run. On the other hand btrfs_wq_run_delayed_node duplicates that check and performs an additional one for wq congestion. Let's remove the duplicate check and move the congestion one in btrfs_balance_delayed_items, leaving btrfs_wq_run_delayed_node to only care about setting up the wq run. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index cf0a6a25156c..1c0bab4080a0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1350,10 +1350,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, { struct btrfs_async_delayed_work *async_work; - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND || - btrfs_workqueue_normal_congested(fs_info->delayed_workers)) - return 0; - async_work = kmalloc(sizeof(*async_work), GFP_NOFS); if (!async_work) return -ENOMEM; @@ -1389,7 +1385,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) { struct btrfs_delayed_root *delayed_root = fs_info->delayed_root; - if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) + if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) || + btrfs_workqueue_normal_congested(fs_info->delayed_workers)) return; if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { -- cgit v1.2.3 From 47dba17171a76ea2a2a7153f9aa9f28579bfd350 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 10 Oct 2017 15:51:02 -0600 Subject: Btrfs: remove rcu_barrier in btrfs_close_devices It was introduced because btrfs used to do blkdev_put in a deferred work, now that btrfs has blkdev_put in place, this rcu_barrier can be removed. modprobe -r btrfs will do btrfs_cleanup_fs_uuids(), where it cleanup every %fs_devices on the list, but when we do btrfs_close_devices(), we have replaced the devices on the list with dummy ones which only have the same name and uuid, so modprobe -r btrfs will free those instead of what we were using, this change won't cause a problem for it. Signed-off-by: Liu Bo Reviewed-by: Anand Jain Reviewed-by: David Sterba [ copied 2nd paragraph from mailinglist discussion ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c96635a26803..7ea81e1b361f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -936,12 +936,6 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } - /* - * Wait for rcu kworkers under __btrfs_close_devices - * to finish all blkdev_puts so device is really - * free when umount is done. - */ - rcu_barrier(); return ret; } -- cgit v1.2.3 From 6b7d6e933433a43062ce9355a4126b59bd2519e3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 1 Nov 2017 11:32:18 +0200 Subject: btrfs: Move loop termination condition in while() Fallocating a file in btrfs goes through several stages. The one before actually inserting the fallocated extents is to create a qgroup reservation, covering the desired range. To this end there is a loop in btrfs_fallocate which checks to see if there are holes in the fallocated range or !PREALLOC extents past EOF and if so create qgroup reservations for them. Unfortunately, the main condition of the loop is burried right at the end of its body rather than in the actual while statement which makes it non-obvious. Fix this by moving the condition in the while statement where it belongs. No functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb1bac7c8553..89fb9eff714f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2922,7 +2922,7 @@ static long btrfs_fallocate(struct file *file, int mode, /* First, check if we exceed the qgroup limit */ INIT_LIST_HEAD(&reserve_list); - while (1) { + while (cur_offset < alloc_end) { em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset, alloc_end - cur_offset, 0); if (IS_ERR(em)) { @@ -2958,8 +2958,6 @@ static long btrfs_fallocate(struct file *file, int mode, } free_extent_map(em); cur_offset = last_byte; - if (cur_offset >= alloc_end) - break; } /* -- cgit v1.2.3 From 96b09dde92515956c992c1b330f00399487f47b3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 1 Nov 2017 11:36:05 +0200 Subject: btrfs: Use locked_end rather than open coding it Right before we go into this loop locked_end is set to alloc_end - 1 and is being used in nearby functions, no need to have exceptions. This just makes the code consistent, no functional changes. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 89fb9eff714f..559d716221df 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2896,8 +2896,8 @@ static long btrfs_fallocate(struct file *file, int mode, */ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state); - ordered = btrfs_lookup_first_ordered_extent(inode, - alloc_end - 1); + ordered = btrfs_lookup_first_ordered_extent(inode, locked_end); + if (ordered && ordered->file_offset + ordered->len > alloc_start && ordered->file_offset < alloc_end) { -- cgit v1.2.3 From 2c9973847fa0230ea82935a9c486cb38ef599893 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 6 Nov 2017 10:28:00 +0800 Subject: btrfs: move volume_mutex into the btrfs_rm_device() A cleanup patch no functional change, we hold volume_mutex before calling btrfs_rm_device, so move it into the function itself. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 4 ---- fs/btrfs/volumes.c | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2ef8acaac688..64b947487ca6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2675,14 +2675,12 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) goto out; } - mutex_lock(&fs_info->volume_mutex); if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { ret = btrfs_rm_device(fs_info, NULL, vol_args->devid); } else { vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; ret = btrfs_rm_device(fs_info, vol_args->name, 0); } - mutex_unlock(&fs_info->volume_mutex); clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); if (!ret) { @@ -2726,9 +2724,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; - mutex_lock(&fs_info->volume_mutex); ret = btrfs_rm_device(fs_info, vol_args->name, 0); - mutex_unlock(&fs_info->volume_mutex); if (!ret) btrfs_info(fs_info, "disk deleted %s", vol_args->name); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7ea81e1b361f..02b2c9ec6fcd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1842,6 +1842,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 num_devices; int ret = 0; + mutex_lock(&fs_info->volume_mutex); mutex_lock(&uuid_mutex); num_devices = fs_info->fs_devices->num_devices; @@ -1956,6 +1957,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, out: mutex_unlock(&uuid_mutex); + mutex_unlock(&fs_info->volume_mutex); return ret; error_undo: -- cgit v1.2.3 From 33d85fda13ea2712bb9c9e687176798583d2359c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 31 Oct 2017 14:08:16 +0800 Subject: btrfs: Don't generate UUID for non-fs tree btrfs_create_tree() will unconditionally generate UUID for any root. So for quota tree and data reloc tree created by kernel, they will have unique UUIDs. However UUID in root item is only referred by UUID tree, which only records UUID for fs trees. This makes unique UUIDs for quota/data reloc tree meaningless. Leave the UUID as zero for non-fs tree, making btrfs-debug-tree output less confusing. Reported-by: Misono Tomohiro Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a8ecccfc36de..ec9aa34bb6eb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1243,7 +1243,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root; struct btrfs_key key; int ret = 0; - uuid_le uuid; + uuid_le uuid = NULL_UUID_LE; root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) @@ -1284,7 +1284,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); - uuid_le_gen(&uuid); + if (is_fstree(objectid)) + uuid_le_gen(&uuid); memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE); root->root_item.drop_level = 0; -- cgit v1.2.3 From c74a0b0237c47806e43c1dc6f875c88fb9ee2525 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 6 Nov 2017 16:36:15 +0800 Subject: btrfs: rename btrfs_add_device to btrfs_add_dev_item Function btrfs_add_device() is adding the device item so rename to reflect that in the function. Similarly we have btrfs_rm_dev_item(). Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 02b2c9ec6fcd..9d2af9f53a65 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1645,7 +1645,7 @@ error: * the device information is stored in the chunk root * the btrfs_device struct should be fully filled in */ -static int btrfs_add_device(struct btrfs_trans_handle *trans, +static int btrfs_add_dev_item(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_device *device) { @@ -2435,7 +2435,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } } - ret = btrfs_add_device(trans, fs_info, device); + ret = btrfs_add_dev_item(trans, fs_info, device); if (ret) { btrfs_abort_transaction(trans, ret); goto error_sysfs; -- cgit v1.2.3 From f5c29bd9dbd3e90e03ab7697ecc373b49394e62e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 2 Nov 2017 17:21:50 -0600 Subject: Btrfs: add __init macro to btrfs init functions Adding __init macro gives kernel a hint that this function is only used during the initialization phase and its memory resources can be freed up after. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.h | 2 +- fs/btrfs/ctree.h | 6 +++--- fs/btrfs/delayed-ref.c | 2 +- fs/btrfs/delayed-ref.h | 2 +- fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 2 +- fs/btrfs/super.c | 4 ++-- fs/btrfs/sysfs.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0868cc554f14..6b692903a23c 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -75,7 +75,7 @@ struct compressed_bio { u32 sums; }; -void btrfs_init_compress(void); +void __init btrfs_init_compress(void); void btrfs_exit_compress(void); int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 13c260b525a1..a56d00311578 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3197,7 +3197,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); -int btrfs_init_cachep(void); +int __init btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, @@ -3248,7 +3248,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, struct file *dst_file, u64 dst_loff); /* file.c */ -int btrfs_auto_defrag_init(void); +int __init btrfs_auto_defrag_init(void); void btrfs_auto_defrag_exit(void); int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct btrfs_inode *inode); @@ -3283,7 +3283,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* sysfs.c */ -int btrfs_init_sysfs(void); +int __init btrfs_init_sysfs(void); void btrfs_exit_sysfs(void); int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info); void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 83be8f9fd906..a1a40cf382e3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -937,7 +937,7 @@ void btrfs_delayed_ref_exit(void) kmem_cache_destroy(btrfs_delayed_extent_op_cachep); } -int btrfs_delayed_ref_init(void) +int __init btrfs_delayed_ref_init(void) { btrfs_delayed_ref_head_cachep = kmem_cache_create( "btrfs_delayed_ref_head", diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index a43af432f859..c4f625e5a691 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -203,7 +203,7 @@ extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; extern struct kmem_cache *btrfs_delayed_data_ref_cachep; extern struct kmem_cache *btrfs_delayed_extent_op_cachep; -int btrfs_delayed_ref_init(void); +int __init btrfs_delayed_ref_init(void); void btrfs_delayed_ref_exit(void); static inline struct btrfs_delayed_extent_op * diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 559d716221df..d1eba3394660 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3143,7 +3143,7 @@ void btrfs_auto_defrag_exit(void) kmem_cache_destroy(btrfs_inode_defrag_cachep); } -int btrfs_auto_defrag_init(void) +int __init btrfs_auto_defrag_init(void) { btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", sizeof(struct inode_defrag), 0, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bab93f20328..c8a3e84d7114 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9569,7 +9569,7 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_free_space_cachep); } -int btrfs_init_cachep(void) +int __init btrfs_init_cachep(void) { btrfs_inode_cachep = kmem_cache_create("btrfs_inode", sizeof(struct btrfs_inode), 0, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3a4dce153645..7a7abe827ac4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2324,7 +2324,7 @@ static struct miscdevice btrfs_misc = { MODULE_ALIAS_MISCDEV(BTRFS_MINOR); MODULE_ALIAS("devname:btrfs-control"); -static int btrfs_interface_init(void) +static int __init btrfs_interface_init(void) { return misc_register(&btrfs_misc); } @@ -2334,7 +2334,7 @@ static void btrfs_interface_exit(void) misc_deregister(&btrfs_misc); } -static void btrfs_print_mod_info(void) +static void __init btrfs_print_mod_info(void) { pr_info("Btrfs loaded, crc32c=%s" #ifdef CONFIG_BTRFS_DEBUG diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index a28bba801264..a8bafed931f4 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -897,7 +897,7 @@ static int btrfs_init_debugfs(void) return 0; } -int btrfs_init_sysfs(void) +int __init btrfs_init_sysfs(void) { int ret; -- cgit v1.2.3 From 057aac3e628f38df984b419f7458455bf698eb7e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 7 Nov 2017 11:22:54 +0200 Subject: btrfs: Reduce scope of delayed_rsv->lock in may_commit_trans After commit 996478ca9c460886ac1 ("btrfs: change how we decide to commit transactions during flushing") there is no need to hold the delayed_rsv during the percpu_counter_compare call since we get the byte's snapshot earlier. So hold the lock only while reading delayed_rsv. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2f4328511ac8..41770ee1313e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4945,12 +4945,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, bytes = 0; else bytes -= delayed_rsv->size; + spin_unlock(&delayed_rsv->lock); + if (percpu_counter_compare(&space_info->total_bytes_pinned, bytes) < 0) { - spin_unlock(&delayed_rsv->lock); return -ENOSPC; } - spin_unlock(&delayed_rsv->lock); commit: trans = btrfs_join_transaction(fs_info->extent_root); -- cgit v1.2.3 From 4c274bc67b8104c6aff89b4176395e368e8c231c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 1 Nov 2017 17:19:27 -0600 Subject: Btrfs: document rules about bio async submit These rules have been hidden in several if-else and are not straightforward to follow, for example, dio submit hook's nocsum case has a bug , i.e. doing async submit instead of sync submit, which has been fixed recently. This is documenting the rules for reference. Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c8a3e84d7114..f259d89804b2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1951,7 +1951,21 @@ static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio, /* * extent_io.c submission hook. This does the right thing for csum calculation - * on write, or reading the csums from the tree before a read + * on write, or reading the csums from the tree before a read. + * + * Rules about async/sync submit, + * a) read: sync submit + * + * b) write without checksum: sync submit + * + * c) write with checksum: + * c-1) if bio is issued by fsync: sync submit + * (sync_writers != 0) + * + * c-2) if root is reloc root: sync submit + * (only in case of buffered IO) + * + * c-3) otherwise: async submit */ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio, int mirror_num, unsigned long bio_flags, @@ -8456,6 +8470,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, bool write = bio_op(bio) == REQ_OP_WRITE; blk_status_t ret; + /* Check btrfs_submit_bio_hook() for rules about async submit. */ if (async_submit) async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); -- cgit v1.2.3 From f06c5965abb79573a69874e2731581c7c336819f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 6 Jun 2017 17:08:23 +0200 Subject: btrfs: rename device free rcu helper to free_device_rcu Make it clear that it is an RCU helper, we want to use the name free_device for a wrapper freeing all device members. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9d2af9f53a65..869048830df9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -824,7 +824,7 @@ again: mutex_unlock(&uuid_mutex); } -static void free_device(struct rcu_head *head) +static void free_device_rcu(struct rcu_head *head) { struct btrfs_device *device; @@ -906,7 +906,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) struct btrfs_device, dev_list); list_del(&device->dev_list); btrfs_close_bdev(device); - call_rcu(&device->rcu, free_device); + call_rcu(&device->rcu, free_device_rcu); } WARN_ON(fs_devices->open_devices); @@ -1938,7 +1938,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, btrfs_scratch_superblocks(device->bdev, device->name->str); btrfs_close_bdev(device); - call_rcu(&device->rcu, free_device); + call_rcu(&device->rcu, free_device_rcu); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; @@ -2010,7 +2010,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, } btrfs_close_bdev(srcdev); - call_rcu(&srcdev->rcu, free_device); + call_rcu(&srcdev->rcu, free_device_rcu); /* if this is no devs we rather delete the fs_devices */ if (!fs_devices->num_devices) { @@ -2069,7 +2069,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); btrfs_close_bdev(tgtdev); - call_rcu(&tgtdev->rcu, free_device); + call_rcu(&tgtdev->rcu, free_device_rcu); } static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info, -- cgit v1.2.3 From 48dae9cf3f95acc4bd51b2324e10f8fd5c302be8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Oct 2017 18:10:25 +0100 Subject: btrfs: introduce free_device helper A helper to free a device and all it's dynamically allocated members, like the rcu_string name or flush_bio. This is going to replace all open coded places. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 869048830df9..6a1186e4ea57 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -180,6 +180,13 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) return fs_devs; } +static void free_device(struct btrfs_device *device) +{ + rcu_string_free(device->name); + bio_put(device->flush_bio); + kfree(device); +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -220,6 +227,11 @@ void btrfs_cleanup_fs_uuids(void) } } +/* + * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error. + * Returned struct is not linked onto any lists and must be destroyed using + * free_device. + */ static struct btrfs_device *__alloc_device(void) { struct btrfs_device *dev; @@ -6257,8 +6269,8 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, * is generated. * * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() - * on error. Returned struct is not linked onto any lists and can be - * destroyed with kfree() right away. + * on error. Returned struct is not linked onto any lists and must be + * destroyed with free_device. */ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, const u64 *devid, -- cgit v1.2.3 From 55de480346cccff9c436f802de499a44b25968a2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Oct 2017 18:55:47 +0100 Subject: btrfs: use free_device where opencoded Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6a1186e4ea57..f93994a49321 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -195,9 +195,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) device = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); list_del(&device->dev_list); - rcu_string_free(device->name); - bio_put(device->flush_bio); - kfree(device); + free_device(device); } kfree(fs_devices); } @@ -589,9 +587,7 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) } else { fs_devs->num_devices--; list_del(&dev->dev_list); - rcu_string_free(dev->name); - bio_put(dev->flush_bio); - kfree(dev); + free_device(dev); } break; } @@ -643,8 +639,7 @@ static noinline int device_list_add(const char *path, name = rcu_string_strdup(path, GFP_NOFS); if (!name) { - bio_put(device->flush_bio); - kfree(device); + free_device(device); return -ENOMEM; } rcu_assign_pointer(device->name, name); @@ -756,8 +751,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) name = rcu_string_strdup(orig_dev->name->str, GFP_KERNEL); if (!name) { - bio_put(device->flush_bio); - kfree(device); + free_device(device); goto error; } rcu_assign_pointer(device->name, name); @@ -821,9 +815,7 @@ again: } list_del_init(&device->dev_list); fs_devices->num_devices--; - rcu_string_free(device->name); - bio_put(device->flush_bio); - kfree(device); + free_device(device); } if (fs_devices->seed) { @@ -841,9 +833,7 @@ static void free_device_rcu(struct rcu_head *head) struct btrfs_device *device; device = container_of(head, struct btrfs_device, rcu); - rcu_string_free(device->name); - bio_put(device->flush_bio); - kfree(device); + free_device(device); } static void btrfs_close_bdev(struct btrfs_device *device) @@ -2355,8 +2345,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { - bio_put(device->flush_bio); - kfree(device); + free_device(device); ret = -ENOMEM; goto error; } @@ -2364,9 +2353,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - rcu_string_free(device->name); - bio_put(device->flush_bio); - kfree(device); + free_device(device); ret = PTR_ERR(trans); goto error; } @@ -2508,9 +2495,7 @@ error_trans: sb->s_flags |= SB_RDONLY; if (trans) btrfs_end_transaction(trans); - rcu_string_free(device->name); - bio_put(device->flush_bio); - kfree(device); + free_device(device); error: blkdev_put(bdev, FMODE_EXCL); if (seeding_dev && !unlocked) { @@ -2576,8 +2561,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { - bio_put(device->flush_bio); - kfree(device); + free_device(device); ret = -ENOMEM; goto error; } @@ -6293,8 +6277,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, ret = find_next_devid(fs_info, &tmp); if (ret) { - bio_put(dev->flush_bio); - kfree(dev); + free_device(dev); return ERR_PTR(ret); } } -- cgit v1.2.3 From 5c4cf6c91d717b76008b0c8fef7b2947b5da2a4f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Oct 2017 19:29:46 +0100 Subject: btrfs: simplify exit paths in btrfs_init_new_device Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f93994a49321..3fa83ff9b30f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2345,17 +2345,15 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { - free_device(device); ret = -ENOMEM; - goto error; + goto error_free_device; } rcu_assign_pointer(device->name, name); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - free_device(device); ret = PTR_ERR(trans); - goto error; + goto error_free_device; } q = bdev_get_queue(bdev); @@ -2495,6 +2493,7 @@ error_trans: sb->s_flags |= SB_RDONLY; if (trans) btrfs_end_transaction(trans); +error_free_device: free_device(device); error: blkdev_put(bdev, FMODE_EXCL); -- cgit v1.2.3 From 9c6b1c4de1c64cd35d029ed6e0428d0a57fa632b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Jun 2017 22:30:00 +0200 Subject: btrfs: document device locking Overview of the main locks protecting various device-related structures. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3fa83ff9b30f..372f515808d1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -145,6 +145,71 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, struct btrfs_bio **bbio_ret, int mirror_num, int need_raid_map); +/* + * Device locking + * ============== + * + * There are several mutexes that protect manipulation of devices and low-level + * structures like chunks but not block groups, extents or files + * + * uuid_mutex (global lock) + * ------------------------ + * protects the fs_uuids list that tracks all per-fs fs_devices, resulting from + * the SCAN_DEV ioctl registration or from mount either implicitly (the first + * device) or requested by the device= mount option + * + * the mutex can be very coarse and can cover long-running operations + * + * protects: updates to fs_devices counters like missing devices, rw devices, + * seeding, structure cloning, openning/closing devices at mount/umount time + * + * global::fs_devs - add, remove, updates to the global list + * + * does not protect: manipulation of the fs_devices::devices list! + * + * btrfs_device::name - renames (write side), read is RCU + * + * fs_devices::device_list_mutex (per-fs, with RCU) + * ------------------------------------------------ + * protects updates to fs_devices::devices, ie. adding and deleting + * + * simple list traversal with read-only actions can be done with RCU protection + * + * may be used to exclude some operations from running concurrently without any + * modifications to the list (see write_all_supers) + * + * volume_mutex + * ------------ + * coarse lock owned by a mounted filesystem; used to exclude some operations + * that cannot run in parallel and affect the higher-level properties of the + * filesystem like: device add/deleting/resize/replace, or balance + * + * balance_mutex + * ------------- + * protects balance structures (status, state) and context accessed from + * several places (internally, ioctl) + * + * chunk_mutex + * ----------- + * protects chunks, adding or removing during allocation, trim or when a new + * device is added/removed + * + * cleaner_mutex + * ------------- + * a big lock that is held by the cleaner thread and prevents running subvolume + * cleaning together with relocation or delayed iputs + * + * + * Lock nesting + * ============ + * + * uuid_mutex + * volume_mutex + * device_list_mutex + * chunk_mutex + * balance_mutex + */ + DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); struct list_head *btrfs_get_fs_uuids(void) -- cgit v1.2.3 From 08ffcae8c92e7154ba5fb78c926463a21cba6501 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 19 Jun 2017 16:55:35 +0200 Subject: btrfs: simplify btrfs_close_bdev Split the conditions a bit. Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 372f515808d1..96c6c0fa1531 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -903,13 +903,15 @@ static void free_device_rcu(struct rcu_head *head) static void btrfs_close_bdev(struct btrfs_device *device) { - if (device->bdev && device->writeable) { + if (!device->bdev) + return; + + if (device->writeable) { sync_blockdev(device->bdev); invalidate_bdev(device->bdev); } - if (device->bdev) - blkdev_put(device->bdev, device->mode); + blkdev_put(device->bdev, device->mode); } static void btrfs_prepare_close_one_device(struct btrfs_device *device) -- cgit v1.2.3 From c5593ca3c8a82eb397804e22a34d2c7061b55f56 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Jun 2017 00:09:21 +0200 Subject: btrfs: switch to RCU for device traversal in btrfs_ioctl_dev_info We don't need to use the mutex as we do not modify the devices nor the list itself and just read some information: does not change during device lifetime: - devid - uuid - name (ie. the path) may change in parallel to the ioctl call, but can lead only to reporting inacurracy: - bytes_used - total_bytes Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 64b947487ca6..f1884c075cd3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2775,7 +2775,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, { struct btrfs_ioctl_dev_info_args *di_args; struct btrfs_device *dev; - struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; int ret = 0; char *s_uuid = NULL; @@ -2786,7 +2785,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, if (!btrfs_is_empty_uuid(di_args->uuid)) s_uuid = di_args->uuid; - mutex_lock(&fs_devices->device_list_mutex); + rcu_read_lock(); dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL); if (!dev) { @@ -2801,17 +2800,15 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, if (dev->name) { struct rcu_string *name; - rcu_read_lock(); name = rcu_dereference(dev->name); strncpy(di_args->path, name->str, sizeof(di_args->path)); - rcu_read_unlock(); di_args->path[sizeof(di_args->path) - 1] = 0; } else { di_args->path[0] = '\0'; } out: - mutex_unlock(&fs_devices->device_list_mutex); + rcu_read_unlock(); if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) ret = -EFAULT; -- cgit v1.2.3 From d03262c75da97503d026516456da41f38aa82067 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Jun 2017 00:09:21 +0200 Subject: btrfs: switch to RCU for device traversal in btrfs_ioctl_fs_info We don't need to use the mutex as we do not modify the devices nor the list itself and just read information about device counts. Move copying fsid out of the protected section, not applicable to RCU same as the rest of the retrieved information. Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f1884c075cd3..ab7ff3efcba6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2749,16 +2749,16 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, if (!fi_args) return -ENOMEM; - mutex_lock(&fs_devices->device_list_mutex); + rcu_read_lock(); fi_args->num_devices = fs_devices->num_devices; - memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid)); - list_for_each_entry(device, &fs_devices->devices, dev_list) { + list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { if (device->devid > fi_args->max_id) fi_args->max_id = device->devid; } - mutex_unlock(&fs_devices->device_list_mutex); + rcu_read_unlock(); + memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid)); fi_args->nodesize = fs_info->nodesize; fi_args->sectorsize = fs_info->sectorsize; fi_args->clone_alignment = fs_info->sectorsize; -- cgit v1.2.3 From 1538e6c52e19175a5f4da48eb88e059d5d12d7a0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 16 Jun 2017 00:28:47 +0200 Subject: btrfs: use non-RCU list traversal in write_all_supers callees We take the fs_devices::device_list_mutex mutex in write_all_supers which will prevent any add/del changes to the device list. Therefore we don't need to use the RCU variant list_for_each_entry_rcu in any of the called functions. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec9aa34bb6eb..06c593775b82 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3395,9 +3395,10 @@ static int barrier_all_devices(struct btrfs_fs_info *info) int errors_wait = 0; blk_status_t ret; + lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ head = &info->fs_devices->devices; - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (dev->missing) continue; if (!dev->bdev) @@ -3410,7 +3411,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) } /* wait for all the barriers */ - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (dev->missing) continue; if (!dev->bdev) { @@ -3509,7 +3510,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) } } - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; @@ -3550,7 +3551,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) } total_errors = 0; - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!dev->in_fs_metadata || !dev->writeable) -- cgit v1.2.3 From 66b0c887bbf61555fde648587644485388dddb78 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 16:30:47 +0100 Subject: btrfs: prepare to drop gfp mask parameter from clear_extent_bit Use __clear_extent_bit directly in case we want to pass unknown gfp flags. Otherwise all clear_extent_bit callers use GFP_NOFS, so we can sink them to the function and reduce argument count, at the cost that __clear_extent_bit has to be exported. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/extent_io.h | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c6e82a3e706d..78f7787454f7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -581,7 +581,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err) * * This takes the tree lock, and returns 0 on success and < 0 on error. */ -static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, +int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached_state, gfp_t mask, struct extent_changeset *changeset) @@ -4232,9 +4232,9 @@ static int try_release_extent_state(struct extent_map_tree *map, * at this point we can safely clear everything except the * locked bit and the nodatasum bit */ - ret = clear_extent_bit(tree, start, end, + ret = __clear_extent_bit(tree, start, end, ~(EXTENT_LOCKED | EXTENT_NODATASUM), - 0, 0, NULL, mask); + 0, 0, NULL, mask, NULL); /* if clear_extent_bit failed for enomem reasons, * we can't allow the release to continue. diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 93dcae0c3183..2cdfc64a1356 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -301,6 +301,10 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask); +int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + unsigned bits, int wake, int delete, + struct extent_state **cached, gfp_t mask, + struct extent_changeset *changeset); static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) { @@ -311,8 +315,8 @@ static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, - mask); + return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, + mask, NULL); } static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, @@ -342,8 +346,8 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, - cached_state, mask); + return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, + cached_state, mask, NULL); } static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, -- cgit v1.2.3 From ae0f162534e98afccc7d055cfaa3d3e920a928f0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 16:37:52 +0100 Subject: btrfs: sink gfp parameter to clear_extent_bit All callers use GFP_NOFS, we don't have to pass it as an argument. The built-in tests pass GFP_KERNEL, but they run only at module load time and NOFS works there as well. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 10 +++++----- fs/btrfs/extent_io.h | 10 ++++------ fs/btrfs/file.c | 2 +- fs/btrfs/free-space-cache.c | 8 +++----- fs/btrfs/inode.c | 22 ++++++++++------------ fs/btrfs/ioctl.c | 2 +- fs/btrfs/qgroup.c | 3 +-- fs/btrfs/tests/inode-tests.c | 12 ++++-------- fs/btrfs/transaction.c | 3 +-- 9 files changed, 30 insertions(+), 42 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 78f7787454f7..88131863d827 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1295,10 +1295,10 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, - struct extent_state **cached, gfp_t mask) + struct extent_state **cached) { return __clear_extent_bit(tree, start, end, bits, wake, delete, - cached, mask, NULL); + cached, GFP_NOFS, NULL); } int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, @@ -1348,7 +1348,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) if (err == -EEXIST) { if (failed_start > start) clear_extent_bit(tree, start, failed_start - 1, - EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + EXTENT_LOCKED, 1, 0, NULL); return 0; } return 1; @@ -1744,7 +1744,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, unsigned long page_ops) { clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0, - NULL, GFP_NOFS); + NULL); __process_pages_contig(inode->i_mapping, locked_page, start >> PAGE_SHIFT, end >> PAGE_SHIFT, @@ -4207,7 +4207,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, - 1, 1, &cached_state, GFP_NOFS); + 1, 1, &cached_state); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2cdfc64a1356..b679309545d8 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -300,7 +300,7 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, struct extent_changeset *changeset); int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, - struct extent_state **cached, gfp_t mask); + struct extent_state **cached); int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask, @@ -308,8 +308,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) { - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, - GFP_NOFS); + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL); } static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, @@ -327,8 +326,7 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, if (bits & EXTENT_LOCKED) wake = 1; - return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, - GFP_NOFS); + return clear_extent_bit(tree, start, end, bits, wake, 0, NULL); } int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, @@ -362,7 +360,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start, { return clear_extent_bit(tree, start, end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); + EXTENT_DO_ACCOUNTING, 0, 0, NULL); } int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d1eba3394660..b85b6d7d0ccd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1519,7 +1519,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, clear_extent_bit(&inode->io_tree, start_pos, last_pos, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, cached_state, GFP_NOFS); + 0, 0, cached_state); *lockstart = start_pos; *lockend = last_pos; ret = 1; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4426d1c73e50..b8ab90c9a9fb 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -993,8 +993,7 @@ update_cache_item(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); goto fail; } leaf = path->nodes[0]; @@ -1008,7 +1007,7 @@ update_cache_item(struct btrfs_trans_handle *trans, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, - NULL, GFP_NOFS); + NULL); btrfs_release_path(path); goto fail; } @@ -1105,8 +1104,7 @@ static int flush_dirty_cache(struct inode *inode) ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f259d89804b2..ad7c6ddb4cbd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1203,7 +1203,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 cur_end; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, - 1, 0, NULL, GFP_NOFS); + 1, 0, NULL); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); BUG_ON(!async_cow); /* -ENOMEM */ @@ -3000,7 +3000,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) clear_extent_bit(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS); + EXTENT_DEFRAG, 0, 0, &cached_state); } if (nolock) @@ -3070,7 +3070,7 @@ out: ordered_extent->len - 1, clear_bits, (clear_bits & EXTENT_LOCKED) ? 1 : 0, - 0, &cached_state, GFP_NOFS); + 0, &cached_state); } if (trans) @@ -4812,7 +4812,7 @@ again: clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, &cached_state, GFP_NOFS); + 0, 0, &cached_state); ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, &cached_state, 0); @@ -5248,8 +5248,7 @@ static void evict_inode_truncate_pages(struct inode *inode) clear_extent_bit(io_tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 1, 1, - &cached_state, GFP_NOFS); + EXTENT_DEFRAG, 1, 1, &cached_state); cond_resched(); spin_lock(&io_tree->lock); @@ -7936,7 +7935,7 @@ unlock: if (lockstart < lockend) { clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, - &cached_state, GFP_NOFS); + &cached_state); } else { free_extent_state(cached_state); } @@ -7947,7 +7946,7 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - unlock_bits, 1, 0, &cached_state, GFP_NOFS); + unlock_bits, 1, 0, &cached_state); err: if (dio_data) current->journal_info = dio_data; @@ -8989,8 +8988,7 @@ again: EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | - EXTENT_DEFRAG, 1, 0, &cached_state, - GFP_NOFS); + EXTENT_DEFRAG, 1, 0, &cached_state); /* * whoever cleared the private bit is responsible * for the finish_ordered_io @@ -9047,7 +9045,7 @@ again: EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, - &cached_state, GFP_NOFS); + &cached_state); __btrfs_releasepage(page, GFP_NOFS); } @@ -9175,7 +9173,7 @@ again: clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, - 0, 0, &cached_state, GFP_NOFS); + 0, 0, &cached_state); ret = btrfs_set_extent_delalloc(inode, page_start, end, 0, &cached_state, 0); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab7ff3efcba6..e7f37c46d6a9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1190,7 +1190,7 @@ again: clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, - &cached_state, GFP_NOFS); + &cached_state); if (i_done != page_cnt) { spin_lock(&BTRFS_I(inode)->lock); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 168fd03ca3ac..9e61dd624f7b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2883,8 +2883,7 @@ cleanup: ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(&reserved->range_changed, &uiter))) clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, - unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, - GFP_NOFS); + unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); extent_changeset_release(reserved); return ret; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 30affb60da51..4a0a60d3275d 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -1001,8 +1001,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) BTRFS_MAX_EXTENT_SIZE >> 1, (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, EXTENT_DELALLOC | EXTENT_DIRTY | - EXTENT_UPTODATE, 0, 0, - NULL, GFP_KERNEL); + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1070,8 +1069,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) BTRFS_MAX_EXTENT_SIZE + sectorsize, BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_UPTODATE, 0, 0, - NULL, GFP_KERNEL); + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1104,8 +1102,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) /* Empty */ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_UPTODATE, 0, 0, - NULL, GFP_KERNEL); + EXTENT_UPTODATE, 0, 0, NULL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1121,8 +1118,7 @@ out: if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_UPTODATE, 0, 0, - NULL, GFP_KERNEL); + EXTENT_UPTODATE, 0, 0, NULL); iput(inode); btrfs_free_dummy_root(root); btrfs_free_dummy_fs_info(fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5a8c2649af2f..dac688c696c3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1016,8 +1016,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info, * it's safe to do it (through clear_btree_io_tree()). */ err = clear_extent_bit(dirty_pages, start, end, - EXTENT_NEED_WAIT, - 0, 0, &cached_state, GFP_NOFS); + EXTENT_NEED_WAIT, 0, 0, &cached_state); if (err == -ENOMEM) err = 0; if (!err) -- cgit v1.2.3 From f08dc36f781af622be5398ac3ab2ec9c3749889d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:02:39 +0100 Subject: btrfs: sink gfp parameter to clear_extent_uptodate There's only one callsite with GFP_NOFS. Signed-off-by: David Sterba --- fs/btrfs/extent_io.h | 4 ++-- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b679309545d8..9f6a5133ce8a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -342,10 +342,10 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, } static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state, gfp_t mask) + u64 end, struct extent_state **cached_state) { return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, - cached_state, mask, NULL); + cached_state, GFP_NOFS, NULL); } static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ad7c6ddb4cbd..f9efaecfc9d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3084,7 +3084,7 @@ out: else start = ordered_extent->file_offset; end = ordered_extent->file_offset + ordered_extent->len - 1; - clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS); + clear_extent_uptodate(io_tree, start, end, NULL); /* Drop the cache for the part of the extent we didn't write. */ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); -- cgit v1.2.3 From 712e36c5f2a7fa561aa3876b0e466df072aa6a1e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:08:27 +0100 Subject: btrfs: use GFP_KERNEL in btrfs_alloc_inode This callback is called directly from VFS, no locks are held at the allocation time. Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9efaecfc9d1..2e92c582cda0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9430,7 +9430,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) struct btrfs_inode *ei; struct inode *inode; - ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; -- cgit v1.2.3 From ad7b0368f33cffe67fecd302028915926e50ef7e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:25 +0800 Subject: btrfs: tree-checker: Add checker for dir item Add checker for dir item, for key types DIR_ITEM, DIR_INDEX and XATTR_ITEM. This checker does comprehensive checks for: 1) dir_item header and its data size Against item boundary and maximum name/xattr length. This part is mostly the same as old verify_dir_item(). 2) dir_type Against maximum file types, and against key type. Since XATTR key should only have FT_XATTR dir item, and normal dir item type should not have XATTR key. The check between key->type and dir_type is newly introduced by this patch. 3) name hash For XATTR and DIR_ITEM key, key->offset is name hash (crc32c). Check the hash of the name against the key to ensure it's correct. The name hash check is only found in btrfs-progs before this patch. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ce4ed6ec8f39..66dac0a4b01f 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -30,6 +30,7 @@ #include "tree-checker.h" #include "disk-io.h" #include "compression.h" +#include "hash.h" /* * Error message should follow the following format: @@ -222,6 +223,141 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, return 0; } +/* + * Customized reported for dir_item, only important new info is key->objectid, + * which represents inode number + */ +__printf(4, 5) +static void dir_item_err(const struct btrfs_root *root, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(root->fs_info, + "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, + btrfs_header_bytenr(eb), slot, key.objectid, &vaf); + va_end(args); +} + +static int check_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dir_item *di; + u32 item_size = btrfs_item_size_nr(leaf, slot); + u32 cur = 0; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + while (cur < item_size) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + u32 name_len; + u32 data_len; + u32 max_name_len; + u32 total_size; + u32 name_hash; + u8 dir_type; + + /* header itself should not cross item boundary */ + if (cur + sizeof(*di) > item_size) { + dir_item_err(root, leaf, slot, + "dir item header crosses item boundary, have %lu boundary %u", + cur + sizeof(*di), item_size); + return -EUCLEAN; + } + + /* dir type check */ + dir_type = btrfs_dir_type(leaf, di); + if (dir_type >= BTRFS_FT_MAX) { + dir_item_err(root, leaf, slot, + "invalid dir item type, have %u expect [0, %u)", + dir_type, BTRFS_FT_MAX); + return -EUCLEAN; + } + + if (key->type == BTRFS_XATTR_ITEM_KEY && + dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "invalid dir item type for XATTR key, have %u expect %u", + dir_type, BTRFS_FT_XATTR); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR && + key->type != BTRFS_XATTR_ITEM_KEY) { + dir_item_err(root, leaf, slot, + "xattr dir type found for non-XATTR key"); + return -EUCLEAN; + } + if (dir_type == BTRFS_FT_XATTR) + max_name_len = XATTR_NAME_MAX; + else + max_name_len = BTRFS_NAME_LEN; + + /* Name/data length check */ + name_len = btrfs_dir_name_len(leaf, di); + data_len = btrfs_dir_data_len(leaf, di); + if (name_len > max_name_len) { + dir_item_err(root, leaf, slot, + "dir item name len too long, have %u max %u", + name_len, max_name_len); + return -EUCLEAN; + } + if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) { + dir_item_err(root, leaf, slot, + "dir item name and data len too long, have %u max %u", + name_len + data_len, + BTRFS_MAX_XATTR_SIZE(root->fs_info)); + return -EUCLEAN; + } + + if (data_len && dir_type != BTRFS_FT_XATTR) { + dir_item_err(root, leaf, slot, + "dir item with invalid data len, have %u expect 0", + data_len); + return -EUCLEAN; + } + + total_size = sizeof(*di) + name_len + data_len; + + /* header and name/data should not cross item boundary */ + if (cur + total_size > item_size) { + dir_item_err(root, leaf, slot, + "dir item data crosses item boundary, have %u boundary %u", + cur + total_size, item_size); + return -EUCLEAN; + } + + /* + * Special check for XATTR/DIR_ITEM, as key->offset is name + * hash, should match its name + */ + if (key->type == BTRFS_DIR_ITEM_KEY || + key->type == BTRFS_XATTR_ITEM_KEY) { + read_extent_buffer(leaf, namebuf, + (unsigned long)(di + 1), name_len); + name_hash = btrfs_name_hash(namebuf, name_len); + if (key->offset != name_hash) { + dir_item_err(root, leaf, slot, + "name hash mismatch with key, have 0x%016x expect 0x%016llx", + name_hash, key->offset); + return -EUCLEAN; + } + } + cur += total_size; + di = (struct btrfs_dir_item *)((void *)di + total_size); + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -238,6 +374,11 @@ static int check_leaf_item(struct btrfs_root *root, case BTRFS_EXTENT_CSUM_KEY: ret = check_csum_item(root, leaf, key, slot); break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + case BTRFS_XATTR_ITEM_KEY: + ret = check_dir_item(root, leaf, key, slot); + break; } return ret; } -- cgit v1.2.3 From bae15d95e247f94ceb32caaf13d1d71ecbfc8735 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 8 Nov 2017 08:54:26 +0800 Subject: btrfs: Cleanup existing name_len checks Since tree-checker has verified leaf when reading from disk, we don't need the existing verify_dir_item() or btrfs_is_name_len_valid() checks. Signed-off-by: Qu Wenruo Reviewed-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 --- fs/btrfs/dir-item.c | 108 --------------------------------------------------- fs/btrfs/export.c | 5 --- fs/btrfs/inode.c | 4 -- fs/btrfs/props.c | 7 ---- fs/btrfs/root-tree.c | 7 ---- fs/btrfs/send.c | 6 --- fs/btrfs/tree-log.c | 47 +++++----------------- fs/btrfs/xattr.c | 6 --- 9 files changed, 9 insertions(+), 186 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a56d00311578..09b72b6996ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3060,15 +3060,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 dir, const char *name, u16 name_len, int mod); -int verify_dir_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, int slot, - struct btrfs_dir_item *dir_item); struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, struct btrfs_path *path, const char *name, int name_len); -bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot, - unsigned long start, u16 name_len); /* orphan.c */ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 41cb9196eaa8..cbe421605cd5 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -403,8 +403,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info, btrfs_dir_data_len(leaf, dir_item); name_ptr = (unsigned long)(dir_item + 1); - if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item)) - return NULL; if (btrfs_dir_name_len(leaf, dir_item) == name_len && memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; @@ -450,109 +448,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, } return ret; } - -int verify_dir_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, - int slot, - struct btrfs_dir_item *dir_item) -{ - u16 namelen = BTRFS_NAME_LEN; - int ret; - u8 type = btrfs_dir_type(leaf, dir_item); - - if (type >= BTRFS_FT_MAX) { - btrfs_crit(fs_info, "invalid dir item type: %d", (int)type); - return 1; - } - - if (type == BTRFS_FT_XATTR) - namelen = XATTR_NAME_MAX; - - if (btrfs_dir_name_len(leaf, dir_item) > namelen) { - btrfs_crit(fs_info, "invalid dir item name len: %u", - (unsigned)btrfs_dir_name_len(leaf, dir_item)); - return 1; - } - - namelen = btrfs_dir_name_len(leaf, dir_item); - ret = btrfs_is_name_len_valid(leaf, slot, - (unsigned long)(dir_item + 1), namelen); - if (!ret) - return 1; - - /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ - if ((btrfs_dir_data_len(leaf, dir_item) + - btrfs_dir_name_len(leaf, dir_item)) > - BTRFS_MAX_XATTR_SIZE(fs_info)) { - btrfs_crit(fs_info, "invalid dir item name + data len: %u + %u", - (unsigned)btrfs_dir_name_len(leaf, dir_item), - (unsigned)btrfs_dir_data_len(leaf, dir_item)); - return 1; - } - - return 0; -} - -bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot, - unsigned long start, u16 name_len) -{ - struct btrfs_fs_info *fs_info = leaf->fs_info; - struct btrfs_key key; - u32 read_start; - u32 read_end; - u32 item_start; - u32 item_end; - u32 size; - bool ret = true; - - ASSERT(start > BTRFS_LEAF_DATA_OFFSET); - - read_start = start - BTRFS_LEAF_DATA_OFFSET; - read_end = read_start + name_len; - item_start = btrfs_item_offset_nr(leaf, slot); - item_end = btrfs_item_end_nr(leaf, slot); - - btrfs_item_key_to_cpu(leaf, &key, slot); - - switch (key.type) { - case BTRFS_DIR_ITEM_KEY: - case BTRFS_XATTR_ITEM_KEY: - case BTRFS_DIR_INDEX_KEY: - size = sizeof(struct btrfs_dir_item); - break; - case BTRFS_INODE_REF_KEY: - size = sizeof(struct btrfs_inode_ref); - break; - case BTRFS_INODE_EXTREF_KEY: - size = sizeof(struct btrfs_inode_extref); - break; - case BTRFS_ROOT_REF_KEY: - case BTRFS_ROOT_BACKREF_KEY: - size = sizeof(struct btrfs_root_ref); - break; - default: - ret = false; - goto out; - } - - if (read_start < item_start) { - ret = false; - goto out; - } - if (read_end > item_end) { - ret = false; - goto out; - } - - /* there shall be item(s) before name */ - if (read_start - item_start < size) { - ret = false; - goto out; - } - -out: - if (!ret) - btrfs_crit(fs_info, "invalid dir item name len: %u", - (unsigned int)name_len); - return ret; -} diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 3aeb5770f896..ddaccad469f8 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -283,11 +283,6 @@ static int btrfs_get_name(struct dentry *parent, char *name, name_len = btrfs_inode_ref_name_len(leaf, iref); } - ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len); - if (!ret) { - btrfs_free_path(path); - return -EIO; - } read_extent_buffer(leaf, name, name_ptr, name_len); btrfs_free_path(path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e92c582cda0..ff91b2e3979a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5907,7 +5907,6 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx) static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_private *private = file->private_data; struct btrfs_dir_item *di; @@ -5975,9 +5974,6 @@ again: if (btrfs_should_delete_dir_index(&del_list, found_key.offset)) goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - if (verify_dir_item(fs_info, leaf, slot, di)) - goto next; - name_len = btrfs_dir_name_len(leaf, di); if ((total_len + sizeof(struct dir_entry) + name_len) >= PAGE_SIZE) { diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index f6a05f836629..c39a940d0c75 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -164,7 +164,6 @@ static int iterate_object_props(struct btrfs_root *root, size_t), void *ctx) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret; char *name_buf = NULL; char *value_buf = NULL; @@ -215,12 +214,6 @@ static int iterate_object_props(struct btrfs_root *root, name_ptr = (unsigned long)(di + 1); data_ptr = name_ptr + name_len; - if (verify_dir_item(fs_info, leaf, - path->slots[0], di)) { - ret = -EIO; - goto out; - } - if (name_len <= XATTR_BTRFS_PREFIX_LEN || memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX, name_ptr, diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3338407ef0f0..aab0194efe46 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -387,13 +387,6 @@ again: WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); ptr = (unsigned long)(ref + 1); - ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr, - name_len); - if (!ret) { - err = -EIO; - goto out; - } - WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); *sequence = btrfs_root_ref_sequence(leaf, ref); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 20d3300bd268..f306c608dc28 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1059,12 +1059,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, } } - ret = btrfs_is_name_len_valid(eb, path->slots[0], - (unsigned long)(di + 1), name_len + data_len); - if (!ret) { - ret = -EIO; - goto out; - } if (name_len + data_len > buf_len) { buf_len = name_len + data_len; if (is_vmalloc_addr(buf)) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7bf9b31561db..a806182dfea6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1173,19 +1173,15 @@ next: return 0; } -static int extref_get_fields(struct extent_buffer *eb, int slot, - unsigned long ref_ptr, u32 *namelen, char **name, - u64 *index, u64 *parent_objectid) +static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, + u32 *namelen, char **name, u64 *index, + u64 *parent_objectid) { struct btrfs_inode_extref *extref; extref = (struct btrfs_inode_extref *)ref_ptr; *namelen = btrfs_inode_extref_name_len(eb, extref); - if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name, - *namelen)) - return -EIO; - *name = kmalloc(*namelen, GFP_NOFS); if (*name == NULL) return -ENOMEM; @@ -1200,19 +1196,14 @@ static int extref_get_fields(struct extent_buffer *eb, int slot, return 0; } -static int ref_get_fields(struct extent_buffer *eb, int slot, - unsigned long ref_ptr, u32 *namelen, char **name, - u64 *index) +static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr, + u32 *namelen, char **name, u64 *index) { struct btrfs_inode_ref *ref; ref = (struct btrfs_inode_ref *)ref_ptr; *namelen = btrfs_inode_ref_name_len(eb, ref); - if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1), - *namelen)) - return -EIO; - *name = kmalloc(*namelen, GFP_NOFS); if (*name == NULL) return -ENOMEM; @@ -1287,8 +1278,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, while (ref_ptr < ref_end) { if (log_ref_ver) { - ret = extref_get_fields(eb, slot, ref_ptr, &namelen, - &name, &ref_index, &parent_objectid); + ret = extref_get_fields(eb, ref_ptr, &namelen, &name, + &ref_index, &parent_objectid); /* * parent object can change from one array * item to another. @@ -1300,8 +1291,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, goto out; } } else { - ret = ref_get_fields(eb, slot, ref_ptr, &namelen, - &name, &ref_index); + ret = ref_get_fields(eb, ref_ptr, &namelen, &name, + &ref_index); } if (ret) goto out; @@ -1835,7 +1826,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; u32 item_size = btrfs_item_size_nr(eb, slot); struct btrfs_dir_item *di; @@ -1848,8 +1838,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ptr_end = ptr + item_size; while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; - if (verify_dir_item(fs_info, eb, slot, di)) - return -EIO; name_len = btrfs_dir_name_len(eb, di); ret = replay_one_name(trans, root, path, eb, di, key); if (ret < 0) @@ -2024,11 +2012,6 @@ again: ptr_end = ptr + item_size; while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; - if (verify_dir_item(fs_info, eb, slot, di)) { - ret = -EIO; - goto out; - } - name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); if (!name) { @@ -2109,7 +2092,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans, struct btrfs_path *path, const u64 ino) { - struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key search_key; struct btrfs_path *log_path; int i; @@ -2151,11 +2133,6 @@ process_leaf: u32 this_len = sizeof(*di) + name_len + data_len; char *name; - ret = verify_dir_item(fs_info, path->nodes[0], i, di); - if (ret) { - ret = -EIO; - goto out; - } name = kmalloc(name_len, GFP_NOFS); if (!name) { ret = -ENOMEM; @@ -4572,12 +4549,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb, this_len = sizeof(*extref) + this_name_len; } - ret = btrfs_is_name_len_valid(eb, slot, name_ptr, - this_name_len); - if (!ret) { - ret = -EIO; - goto out; - } if (this_name_len > name_len) { char *new_name; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 2c7e53f9ff1b..ad298c248da4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -267,7 +267,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct btrfs_key key; struct inode *inode = d_inode(dentry); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; int ret = 0; @@ -336,11 +335,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) u32 this_len = sizeof(*di) + name_len + data_len; unsigned long name_ptr = (unsigned long)(di + 1); - if (verify_dir_item(fs_info, leaf, slot, di)) { - ret = -EIO; - goto err; - } - total_size += name_len + 1; /* * We are just looking for how big our buffer needs to -- cgit v1.2.3 From 433175992c1775db6cbc7c92294345408a333bee Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 03:46:07 +0200 Subject: btrfs: sink get_extent parameter to extent_writepages There's only one caller. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 +-- fs/btrfs/extent_io.h | 1 - fs/btrfs/inode.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 88131863d827..1d7aebd020ad 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4121,14 +4121,13 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, - get_extent_t *get_extent, struct writeback_control *wbc) { int ret = 0; struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = get_extent, + .get_extent = btrfs_get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9f6a5133ce8a..e07f9e1c34e8 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -411,7 +411,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, int mode); int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, - get_extent_t *get_extent, struct writeback_control *wbc); int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff91b2e3979a..a49dd803fb03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8903,7 +8903,7 @@ static int btrfs_writepages(struct address_space *mapping, struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; - return extent_writepages(tree, mapping, btrfs_get_extent, wbc); + return extent_writepages(tree, mapping, wbc); } static int -- cgit v1.2.3 From 916b929831a92a2a432274cd264311893f22a46d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 03:47:28 +0200 Subject: btrfs: sink get_extent parameter to extent_write_locked_range There's only one caller. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++--- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1d7aebd020ad..d9a76e33ac3c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4077,8 +4077,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, } int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, - u64 start, u64 end, get_extent_t *get_extent, - int mode) + u64 start, u64 end, int mode) { int ret = 0; struct address_space *mapping = inode->i_mapping; @@ -4089,7 +4088,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = get_extent, + .get_extent = btrfs_get_extent, .extent_locked = 1, .sync_io = mode == WB_SYNC_ALL, }; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e07f9e1c34e8..b67fc0153b73 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -407,8 +407,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc); int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, - u64 start, u64 end, get_extent_t *get_extent, - int mode); + u64 start, u64 end, int mode); int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, struct writeback_control *wbc); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a49dd803fb03..ae62d59edd4b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -769,7 +769,6 @@ retry: inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - btrfs_get_extent, WB_SYNC_ALL); else if (ret) unlock_page(async_cow->locked_page); -- cgit v1.2.3 From deac642d7e0fd83efd3372c4093fe60ac7436db6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 03:47:28 +0200 Subject: btrfs: sink get_extent parameter to extent_write_full_page There's only one caller. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 +-- fs/btrfs/extent_io.h | 1 - fs/btrfs/inode.c | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d9a76e33ac3c..7754124b07b8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4058,14 +4058,13 @@ static noinline void flush_write_bio(void *data) } int extent_write_full_page(struct extent_io_tree *tree, struct page *page, - get_extent_t *get_extent, struct writeback_control *wbc) { int ret; struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = get_extent, + .get_extent = btrfs_get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b67fc0153b73..abe4feea1539 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -404,7 +404,6 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, - get_extent_t *get_extent, struct writeback_control *wbc); int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, u64 start, u64 end, int mode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ae62d59edd4b..7403ef8856bd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8891,7 +8891,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } tree = &BTRFS_I(page->mapping->host)->io_tree; - ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc); + ret = extent_write_full_page(tree, page, wbc); btrfs_add_delayed_iput(inode); return ret; } -- cgit v1.2.3 From 3c98c62f7a761ab132950cf36795c8001be72b47 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:01:08 +0200 Subject: btrfs: drop get_extent from extent_page_data Previous patches cleaned up all places where extent_page_data::get_extent was set and it was btrfs_get_extent all the time, so we can simply call that instead. This also reduces size of extent_page_data by 8 bytes which has positive effect on stack consumption on various functions on the write out path. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7754124b07b8..3ecd3ab73981 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -109,8 +109,6 @@ struct tree_entry { struct extent_page_data { struct bio *bio; struct extent_io_tree *tree; - get_extent_t *get_extent; - /* tells writepage not to lock the state bits for this range * it still does the unlocking */ @@ -3373,7 +3371,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, page_end, NULL, 1); break; } - em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur, + em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur, end - cur + 1, 1); if (IS_ERR_OR_NULL(em)) { SetPageError(page); @@ -4064,7 +4062,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = btrfs_get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; @@ -4087,7 +4084,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = btrfs_get_extent, .extent_locked = 1, .sync_io = mode == WB_SYNC_ALL, }; @@ -4125,7 +4121,6 @@ int extent_writepages(struct extent_io_tree *tree, struct extent_page_data epd = { .bio = NULL, .tree = tree, - .get_extent = btrfs_get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; -- cgit v1.2.3 From 2135fb9bb4b8d05d288d994c4f9f8077ce90d890 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to extent_fiemap All callers pass btrfs_get_extent_fiemap and we don't expect anything else in the context of extent_fiemap. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/extent_io.h | 2 +- fs/btrfs/inode.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3ecd3ab73981..d62e0194fc3b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4441,7 +4441,7 @@ static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info, } int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, get_extent_t *get_extent) + __u64 start, __u64 len) { int ret = 0; u64 off = start; @@ -4524,7 +4524,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, &cached_state); em = get_extent_skip_holes(inode, start, last_for_get_extent, - get_extent); + btrfs_get_extent_fiemap); if (!em) goto out; if (IS_ERR(em)) { @@ -4613,7 +4613,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* now scan forward to see if this is really the last extent. */ em = get_extent_skip_holes(inode, off, last_for_get_extent, - get_extent); + btrfs_get_extent_fiemap); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index abe4feea1539..afc169d04b22 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -417,7 +417,7 @@ int extent_readpages(struct extent_io_tree *tree, struct list_head *pages, unsigned nr_pages, get_extent_t get_extent); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len, get_extent_t *get_extent); + __u64 start, __u64 len); void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7403ef8856bd..63cf6cd174d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8859,7 +8859,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret) return ret; - return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); + return extent_fiemap(inode, fieinfo, start, len); } int btrfs_readpage(struct file *file, struct page *page) -- cgit v1.2.3 From e3350e16eaa61422d37e541e1468ce7476b44a4e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to get_extent_skip_holes All callers pass btrfs_get_extent_fiemap and get_extent_skip_holes itself is used only as a fiemap helper. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d62e0194fc3b..3f5debc5e56f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4292,9 +4292,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, * This maps until we find something past 'last' */ static struct extent_map *get_extent_skip_holes(struct inode *inode, - u64 offset, - u64 last, - get_extent_t *get_extent) + u64 offset, u64 last) { u64 sectorsize = btrfs_inode_sectorsize(inode); struct extent_map *em; @@ -4308,7 +4306,8 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, if (len == 0) break; len = ALIGN(len, sectorsize); - em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0); + em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset, + len, 0); if (IS_ERR_OR_NULL(em)) return em; @@ -4523,8 +4522,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state); - em = get_extent_skip_holes(inode, start, last_for_get_extent, - btrfs_get_extent_fiemap); + em = get_extent_skip_holes(inode, start, last_for_get_extent); if (!em) goto out; if (IS_ERR(em)) { @@ -4612,8 +4610,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } /* now scan forward to see if this is really the last extent. */ - em = get_extent_skip_holes(inode, off, last_for_get_extent, - btrfs_get_extent_fiemap); + em = get_extent_skip_holes(inode, off, last_for_get_extent); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; -- cgit v1.2.3 From 0932584b66e97aea91eb8c0b610e1d1083951b32 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to extent_readpages There's only one caller that passes btrfs_get_extent. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 11 +++++------ fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 3 +-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3f5debc5e56f..f86a6aae545a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4133,8 +4133,7 @@ int extent_writepages(struct extent_io_tree *tree, int extent_readpages(struct extent_io_tree *tree, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) + struct list_head *pages, unsigned nr_pages) { struct bio *bio = NULL; unsigned page_idx; @@ -4160,13 +4159,13 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, &bio_flags, &prev_em_start); + __extent_readpages(tree, pagepool, nr, btrfs_get_extent, + &em_cached, &bio, &bio_flags, &prev_em_start); nr = 0; } if (nr) - __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, &bio_flags, &prev_em_start); + __extent_readpages(tree, pagepool, nr, btrfs_get_extent, + &em_cached, &bio, &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index afc169d04b22..6cf4a0e5b0ea 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -414,8 +414,7 @@ int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc); int extent_readpages(struct extent_io_tree *tree, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); + struct list_head *pages, unsigned nr_pages); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); void set_page_extent_mapped(struct page *page); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 63cf6cd174d1..72c7b38fb17e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8911,8 +8911,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, { struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; - return extent_readpages(tree, mapping, pages, nr_pages, - btrfs_get_extent); + return extent_readpages(tree, mapping, pages, nr_pages); } static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { -- cgit v1.2.3 From e4d17ef50719dc38ada5b4166cb670e0651ee4de Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to __extent_readpages All callers pass btrfs_get_extent. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f86a6aae545a..4e7c5d572667 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3121,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, static void __extent_readpages(struct extent_io_tree *tree, struct page *pages[], - int nr_pages, get_extent_t *get_extent, + int nr_pages, struct extent_map **em_cached, struct bio **bio, unsigned long *bio_flags, u64 *prev_em_start) @@ -3143,7 +3143,8 @@ static void __extent_readpages(struct extent_io_tree *tree, } else { __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, get_extent, em_cached, + end, btrfs_get_extent, + em_cached, bio, bio_flags, prev_em_start); start = page_start; @@ -3155,7 +3156,7 @@ static void __extent_readpages(struct extent_io_tree *tree, if (end) __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, get_extent, em_cached, bio, + end, btrfs_get_extent, em_cached, bio, bio_flags, prev_em_start); } @@ -4159,13 +4160,13 @@ int extent_readpages(struct extent_io_tree *tree, pagepool[nr++] = page; if (nr < ARRAY_SIZE(pagepool)) continue; - __extent_readpages(tree, pagepool, nr, btrfs_get_extent, - &em_cached, &bio, &bio_flags, &prev_em_start); + __extent_readpages(tree, pagepool, nr, &em_cached, &bio, + &bio_flags, &prev_em_start); nr = 0; } if (nr) - __extent_readpages(tree, pagepool, nr, btrfs_get_extent, - &em_cached, &bio, &bio_flags, &prev_em_start); + __extent_readpages(tree, pagepool, nr, &em_cached, &bio, + &bio_flags, &prev_em_start); if (em_cached) free_extent_map(em_cached); -- cgit v1.2.3 From 4ef77695a0f28a42a67a027473d87f5cafa35674 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to __do_contiguous_readpages All callers pass btrfs_get_extent. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4e7c5d572667..eda8fe363132 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3090,7 +3090,6 @@ out: static inline void __do_contiguous_readpages(struct extent_io_tree *tree, struct page *pages[], int nr_pages, u64 start, u64 end, - get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, unsigned long *bio_flags, @@ -3113,8 +3112,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, } for (index = 0; index < nr_pages; index++) { - __do_readpage(tree, pages[index], get_extent, em_cached, bio, - 0, bio_flags, 0, prev_em_start); + __do_readpage(tree, pages[index], btrfs_get_extent, em_cached, + bio, 0, bio_flags, 0, prev_em_start); put_page(pages[index]); } } @@ -3143,8 +3142,7 @@ static void __extent_readpages(struct extent_io_tree *tree, } else { __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, btrfs_get_extent, - em_cached, + end, em_cached, bio, bio_flags, prev_em_start); start = page_start; @@ -3156,7 +3154,7 @@ static void __extent_readpages(struct extent_io_tree *tree, if (end) __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, - end, btrfs_get_extent, em_cached, bio, + end, em_cached, bio, bio_flags, prev_em_start); } -- cgit v1.2.3 From 6af49dbde9532c95f53d2c45fe9cc0012226c5e7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:09:57 +0200 Subject: btrfs: sink get_extent parameter to read_extent_buffer_pages All callers pass btree_get_extent, which needs to be exported. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/disk-io.h | 3 +++ fs/btrfs/extent_io.c | 6 +++--- fs/btrfs/extent_io.h | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06c593775b82..cbf37df05a88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -220,7 +220,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ -static struct extent_map *btree_get_extent(struct btrfs_inode *inode, +struct extent_map *btree_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) { @@ -455,7 +455,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info, io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, - btree_get_extent, mirror_num); + mirror_num); if (!ret) { if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) @@ -1012,7 +1012,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr) if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, btree_get_extent, 0); + buf, WAIT_NONE, 0); free_extent_buffer(buf); } @@ -1031,7 +1031,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr, set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, - btree_get_extent, mirror_num); + mirror_num); if (ret) { free_extent_buffer(buf); return ret; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7f7c35d6347a..301151a50ac1 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -149,6 +149,9 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid); int btree_lock_page_hook(struct page *page, void *data, void (*flush_fn)(void *)); +struct extent_map *btree_get_extent(struct btrfs_inode *inode, + struct page *page, size_t pg_offset, u64 start, u64 len, + int create); int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int __init btrfs_end_io_wq_init(void); void btrfs_end_io_wq_exit(void); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eda8fe363132..932d805a81e3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -21,6 +21,7 @@ #include "locking.h" #include "rcu-string.h" #include "backref.h" +#include "disk-io.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -5248,8 +5249,7 @@ int extent_buffer_uptodate(struct extent_buffer *eb) } int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, int wait, - get_extent_t *get_extent, int mirror_num) + struct extent_buffer *eb, int wait, int mirror_num) { unsigned long i; struct page *page; @@ -5309,7 +5309,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ClearPageError(page); err = __extent_read_full_page(tree, page, - get_extent, &bio, + btree_get_extent, &bio, mirror_num, &bio_flags, REQ_META); if (err) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 6cf4a0e5b0ea..c28f5ef88f42 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -435,7 +435,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_PAGE_LOCK 2 int read_extent_buffer_pages(struct extent_io_tree *tree, struct extent_buffer *eb, int wait, - get_extent_t *get_extent, int mirror_num); + int mirror_num); void wait_on_extent_buffer_writeback(struct extent_buffer *eb); static inline unsigned long num_extent_pages(u64 start, u64 len) -- cgit v1.2.3 From 02cfe779ccaec421cd4ed53a574c8a6354efa7fe Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Nov 2017 16:04:40 +0100 Subject: btrfs: ref-verify: Remove unused parameter from walk_up_tree() to kill warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-4.1.2: fs/btrfs/ref-verify.c: In function ‘btrfs_build_ref_tree’: fs/btrfs/ref-verify.c:1017: warning: ‘root’ is used uninitialized in this function The variable is indeed passed uninitialized, but it is never used by the callee. However, not all versions of gcc are smart enough to notice. Hence remove the unused parameter from walk_up_tree() to silence the compiler warning. Signed-off-by: Geert Uytterhoeven Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 34878699d363..171f3cce30e6 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -606,8 +606,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, } /* Walk up to the next node that needs to be processed */ -static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, - int *level) +static int walk_up_tree(struct btrfs_path *path, int *level) { int l; @@ -984,7 +983,6 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) { struct btrfs_path *path; - struct btrfs_root *root; struct extent_buffer *eb; u64 bytenr = 0, num_bytes = 0; int ret, level; @@ -1014,7 +1012,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) &bytenr, &num_bytes); if (ret) break; - ret = walk_up_tree(root, path, &level); + ret = walk_up_tree(path, &level); if (ret < 0) break; if (ret > 0) { -- cgit v1.2.3 From 71f8a8d2c1ee65589b3f0ec5f2306723ea76c1ee Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 9 Nov 2017 23:45:23 +0800 Subject: btrfs: set fs_devices->seed directly This is in preparation to move a section of code in __btrfs_open_devices() into a new function so that it can be reused. As we set seeding if any of the device is having SB flag BTRFS_SUPER_FLAG_SEEDING, so do it in the device list loop itself. No functional changes. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 96c6c0fa1531..51445cc496c0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1019,7 +1019,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, struct buffer_head *bh; struct btrfs_super_block *disk_super; u64 devid; - int seeding = 1; int ret = 0; flags |= FMODE_EXCL; @@ -1051,9 +1050,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { device->writeable = 0; + fs_devices->seeding = 1; } else { device->writeable = !bdev_read_only(bdev); - seeding = 0; } q = bdev_get_queue(bdev); @@ -1085,7 +1084,6 @@ error_brelse: ret = -EINVAL; goto out; } - fs_devices->seeding = seeding; fs_devices->opened = 1; fs_devices->latest_bdev = latest_dev->bdev; fs_devices->total_rw_bytes = 0; -- cgit v1.2.3 From 9f050db43e0f671ff558f34479b06aafd5103eab Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 9 Nov 2017 23:45:25 +0800 Subject: btrfs: move check for device generation to the last No functional changes. This helps to move the entire section into a new function. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 51445cc496c0..04d205226656 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1044,9 +1044,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, goto error_brelse; device->generation = btrfs_super_generation(disk_super); - if (!latest_dev || - device->generation > latest_dev->generation) - latest_dev = device; if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { device->writeable = 0; @@ -1073,6 +1070,11 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, &fs_devices->alloc_list); } brelse(bh); + + if (!latest_dev || + device->generation > latest_dev->generation) + latest_dev = device; + continue; error_brelse: -- cgit v1.2.3 From 0fb08bccbce2c1900d18f7ecc01ff8b8f677ce3e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 9 Nov 2017 23:45:24 +0800 Subject: btrfs: factor __btrfs_open_devices() to create btrfs_open_one_device() No functional changes, create btrfs_open_one_device() from __btrfs_open_devices(). This is a preparatory work to add dynamic device scan. Signed-off-by: Anand Jain [ minor whitespace fixes ] Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 121 ++++++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 56 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 04d205226656..20039d625210 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -659,6 +659,70 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) } } +static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, + struct btrfs_device *device, fmode_t flags, + void *holder) +{ + struct request_queue *q; + struct block_device *bdev; + struct buffer_head *bh; + struct btrfs_super_block *disk_super; + u64 devid; + int ret; + + if (device->bdev) + return -EINVAL; + if (!device->name) + return -EINVAL; + + ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, + &bdev, &bh); + if (ret) + return ret; + + disk_super = (struct btrfs_super_block *)bh->b_data; + devid = btrfs_stack_device_id(&disk_super->dev_item); + if (devid != device->devid) + goto error_brelse; + + if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE)) + goto error_brelse; + + device->generation = btrfs_super_generation(disk_super); + + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { + device->writeable = 0; + fs_devices->seeding = 1; + } else { + device->writeable = !bdev_read_only(bdev); + } + + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) + device->can_discard = 1; + if (!blk_queue_nonrot(q)) + fs_devices->rotating = 1; + + device->bdev = bdev; + device->in_fs_metadata = 0; + device->mode = flags; + + fs_devices->open_devices++; + if (device->writeable && device->devid != BTRFS_DEV_REPLACE_DEVID) { + fs_devices->rw_devices++; + list_add(&device->dev_alloc_list, &fs_devices->alloc_list); + } + brelse(bh); + + return 0; + +error_brelse: + brelse(bh); + blkdev_put(bdev, flags); + + return -EINVAL; +} + /* * Add new device to list of registered devices * @@ -1011,76 +1075,21 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder) { - struct request_queue *q; - struct block_device *bdev; struct list_head *head = &fs_devices->devices; struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; - struct buffer_head *bh; - struct btrfs_super_block *disk_super; - u64 devid; int ret = 0; flags |= FMODE_EXCL; list_for_each_entry(device, head, dev_list) { - if (device->bdev) - continue; - if (!device->name) - continue; - /* Just open everything we can; ignore failures here */ - if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, - &bdev, &bh)) + if (btrfs_open_one_device(fs_devices, device, flags, holder)) continue; - disk_super = (struct btrfs_super_block *)bh->b_data; - devid = btrfs_stack_device_id(&disk_super->dev_item); - if (devid != device->devid) - goto error_brelse; - - if (memcmp(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE)) - goto error_brelse; - - device->generation = btrfs_super_generation(disk_super); - - if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { - device->writeable = 0; - fs_devices->seeding = 1; - } else { - device->writeable = !bdev_read_only(bdev); - } - - q = bdev_get_queue(bdev); - if (blk_queue_discard(q)) - device->can_discard = 1; - if (!blk_queue_nonrot(q)) - fs_devices->rotating = 1; - - device->bdev = bdev; - device->in_fs_metadata = 0; - device->mode = flags; - - fs_devices->open_devices++; - if (device->writeable && - device->devid != BTRFS_DEV_REPLACE_DEVID) { - fs_devices->rw_devices++; - list_add(&device->dev_alloc_list, - &fs_devices->alloc_list); - } - brelse(bh); - if (!latest_dev || device->generation > latest_dev->generation) latest_dev = device; - - continue; - -error_brelse: - brelse(bh); - blkdev_put(bdev, flags); - continue; } if (fs_devices->open_devices == 0) { ret = -EINVAL; -- cgit v1.2.3 From 343e4fc1c60971b0734de26dbbd475d433950982 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 15 Nov 2017 16:10:28 -0700 Subject: Btrfs: set plug for fsync Setting plug can merge adjacent IOs before dispatching IOs to the disk driver. Without plug, it'd not be a problem for single disk usecases, but for multiple disks using raid profile, a large IO can be split to several IOs of stripe length, and plug can be helpful to bring them together for each disk so that we can save several disk access. Moreover, fsync issues synchronous writes, so plug can really take effect. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b85b6d7d0ccd..1096398e1351 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2019,10 +2019,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp) static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) { int ret; + struct blk_plug plug; + /* + * This is only called in fsync, which would do synchronous writes, so + * a plug can merge adjacent IOs as much as possible. Esp. in case of + * multiple disks using raid profile, a large IO can be split to + * several segments of stripe length (currently 64K). + */ + blk_start_plug(&plug); atomic_inc(&BTRFS_I(inode)->sync_writers); ret = btrfs_fdatawrite_range(inode, start, end); atomic_dec(&BTRFS_I(inode)->sync_writers); + blk_finish_plug(&plug); return ret; } -- cgit v1.2.3 From 71a635516ca521931be418827150f782b0a03ae7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 6 Nov 2017 19:23:00 +0100 Subject: btrfs: switch to on-stack csum buffer in csum_tree_block The maximum size of a checksum buffer is known, BTRFS_CSUM_SIZE, and we don't have to allocate it dynamically. This code path is not used at all as we have only the crc32c and use an on-stack buffer already. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cbf37df05a88..5f9430063c50 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -285,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, int verify) { u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char *result = NULL; + char result[BTRFS_CSUM_SIZE]; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -294,7 +294,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, unsigned long map_len; int err; u32 crc = ~(u32)0; - unsigned long inline_result; len = buf->len - offset; while (len > 0) { @@ -308,13 +307,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, len -= cur_len; offset += cur_len; } - if (csum_size > sizeof(inline_result)) { - result = kzalloc(csum_size, GFP_NOFS); - if (!result) - return -ENOMEM; - } else { - result = (char *)&inline_result; - } + memset(result, 0, BTRFS_CSUM_SIZE); btrfs_csum_final(crc, result); @@ -329,15 +322,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, "%s checksum verify failed on %llu wanted %X found %X level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); - if (result != (char *)&inline_result) - kfree(result); return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); } - if (result != (char *)&inline_result) - kfree(result); + return 0; } -- cgit v1.2.3 From 41a1eadad719168ce5f6cfcfda6ea45134f2424a Mon Sep 17 00:00:00 2001 From: Edmund Nadolski Date: Mon, 20 Nov 2017 13:24:47 -0700 Subject: btrfs: btrfs_inode_log_parent should use defined inode_only values. Replace hardcoded numeric argument values for inode_only with the constants defined for that use. Signed-off-by: Edmund Nadolski Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a806182dfea6..ee1aaed1330e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5403,11 +5403,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct dentry *parent, const loff_t start, const loff_t end, - int exists_only, + int inode_only, struct btrfs_log_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; - int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; struct super_block *sb; struct dentry *old_parent = NULL; int ret = 0; @@ -5573,7 +5572,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, int ret; ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)), - parent, start, end, 0, ctx); + parent, start, end, LOG_INODE_ALL, ctx); dput(parent); return ret; @@ -5836,6 +5835,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, return 0; return btrfs_log_inode_parent(trans, root, inode, parent, 0, - LLONG_MAX, 1, NULL); + LLONG_MAX, LOG_INODE_EXISTS, NULL); } -- cgit v1.2.3 From bf46f52db96795e0c10292d55888640ab57f42c7 Mon Sep 17 00:00:00 2001 From: Edmund Nadolski Date: Mon, 20 Nov 2017 13:24:49 -0700 Subject: btrfs: remove dead code from btrfs_get_extent Due to new_inline logic, the create == 0 is always true at this point in the code, so the create != 0 branch can be removed. Signed-off-by: Edmund Nadolski Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/inode.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 72c7b38fb17e..70a75b5f3046 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6946,7 +6946,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct btrfs_trans_handle *trans = NULL; const bool new_inline = !page || create; -again: read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) @@ -7087,7 +7086,7 @@ next: em->orig_block_len = em->len; em->orig_start = em->start; ptr = btrfs_file_extent_inline_start(item) + extent_offset; - if (create == 0 && !PageUptodate(page)) { + if (!PageUptodate(page)) { if (btrfs_file_extent_compression(leaf, item) != BTRFS_COMPRESS_NONE) { ret = uncompress_inline(path, page, pg_offset, @@ -7108,25 +7107,6 @@ next: kunmap(page); } flush_dcache_page(page); - } else if (create && PageUptodate(page)) { - BUG(); - if (!trans) { - kunmap(page); - free_extent_map(em); - em = NULL; - - btrfs_release_path(path); - trans = btrfs_join_transaction(root); - - if (IS_ERR(trans)) - return ERR_CAST(trans); - goto again; - } - map = kmap(page); - write_extent_buffer(leaf, map + pg_offset, ptr, - copy_size); - kunmap(page); - btrfs_mark_buffer_dirty(leaf); } set_extent_uptodate(io_tree, em->start, extent_map_end(em) - 1, NULL, GFP_NOFS); -- cgit v1.2.3 From 7c2871a2f4695fadc35ea463552b23180c7a7e34 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 01:07:43 +0100 Subject: btrfs: switch btrfs_trans_handle::adding_csums to bool The semantics of adding_csums matches bool, 'short' was most likely used to save space in a698d0755adb6f2 ("Btrfs: add a type field for the transaction handle"). Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- fs/btrfs/transaction.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 70a75b5f3046..1c704eb82487 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2036,10 +2036,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sum; list_for_each_entry(sum, list, list) { - trans->adding_csums = 1; + trans->adding_csums = true; btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); - trans->adding_csums = 0; + trans->adding_csums = false; } return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c55e44560103..a673142c003e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -118,7 +118,7 @@ struct btrfs_trans_handle { struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; short aborted; - short adding_csums; + bool adding_csums; bool allocating_chunk; bool can_flush_pending_bgs; bool reloc_reserved; -- cgit v1.2.3 From 2dbda74ed9e5497697b913c780c928e25e70d832 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 01:32:48 +0100 Subject: btrfs: remove unused member of btrfs_trans_handle Last user was removed in a monster commit a22285a6a32390195235171 ("Btrfs: Integrate metadata reservation with start_transaction") in 2010. Signed-off-by: David Sterba --- fs/btrfs/transaction.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index a673142c003e..c48a4a03f1b4 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -112,7 +112,6 @@ struct btrfs_trans_handle { u64 bytes_reserved; u64 chunk_bytes_reserved; unsigned long use_count; - unsigned long blocks_reserved; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; -- cgit v1.2.3 From b50fff816cbd670ea545ce98ae374356f08f2d75 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 01:39:58 +0100 Subject: btrfs: switch to refcount_t type for btrfs_trans_handle::use_count The use_count is a reference counter, we can use the refcount_t type, though we don't use the atomicity. This is not a performance critical code and we could catch the underflows. The type is changed from long, but the number of references will fit an int. Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 12 ++++++------ fs/btrfs/transaction.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dac688c696c3..6348573e26a7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -495,8 +495,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, if (current->journal_info) { WARN_ON(type & TRANS_EXTWRITERS); h = current->journal_info; - h->use_count++; - WARN_ON(h->use_count > 2); + refcount_inc(&h->use_count); + WARN_ON(refcount_read(&h->use_count) > 2); h->orig_rsv = h->block_rsv; h->block_rsv = NULL; goto got_it; @@ -567,7 +567,7 @@ again: h->transid = cur_trans->transid; h->transaction = cur_trans; h->root = root; - h->use_count = 1; + refcount_set(&h->use_count, 1); h->fs_info = root->fs_info; h->type = type; @@ -837,8 +837,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, int err = 0; int must_run_delayed_refs = 0; - if (trans->use_count > 1) { - trans->use_count--; + if (refcount_read(&trans->use_count) > 1) { + refcount_dec(&trans->use_count); trans->block_rsv = trans->orig_rsv; return 0; } @@ -1868,7 +1868,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans = trans->transaction; DEFINE_WAIT(wait); - WARN_ON(trans->use_count > 1); + WARN_ON(refcount_read(&trans->use_count) > 1); btrfs_abort_transaction(trans, err); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c48a4a03f1b4..afa88f035654 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -111,7 +111,7 @@ struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; u64 chunk_bytes_reserved; - unsigned long use_count; + refcount_t use_count; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; -- cgit v1.2.3 From 1ca4bb63f6bcc0b4fa3cc6d5aea0a503186a3e20 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 01:54:33 +0100 Subject: btrfs: reorder btrfs_trans_handle members for better packing Recent updates to the structure left some holes, reorder the types so the packing is tight. The size goes from 112 to 104 on 64bit. Signed-off-by: David Sterba --- fs/btrfs/transaction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index afa88f035654..edf53112a6f2 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -111,11 +111,12 @@ struct btrfs_trans_handle { u64 transid; u64 bytes_reserved; u64 chunk_bytes_reserved; - refcount_t use_count; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; + refcount_t use_count; + unsigned int type; short aborted; bool adding_csums; bool allocating_chunk; @@ -123,7 +124,6 @@ struct btrfs_trans_handle { bool reloc_reserved; bool sync; bool dirty; - unsigned int type; struct btrfs_root *root; struct btrfs_fs_info *fs_info; struct list_head new_bgs; -- cgit v1.2.3 From 165c8b022c492f7eb33f7c936ac063a6fd4e90a3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 02:12:57 +0100 Subject: btrfs: use narrower type for btrfs_transaction::num_dirty_bgs The u64 is an overkill here, we could not possibly create that many blockgroups in one transaction. Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/transaction.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 41770ee1313e..c3ccd9d2e946 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2894,7 +2894,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *global_rsv; u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; - u64 num_dirty_bgs = trans->transaction->num_dirty_bgs; + unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs; u64 num_bytes, num_dirty_bgs_bytes; int ret = 0; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index edf53112a6f2..1805fd101767 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -70,7 +70,7 @@ struct btrfs_transaction { struct list_head dirty_bgs; struct list_head io_bgs; struct list_head dropped_roots; - u64 num_dirty_bgs; + unsigned int num_dirty_bgs; /* * we need to make sure block group deletion doesn't race with -- cgit v1.2.3 From 5302e0896445ac3a9c707bd42c39c58a49959980 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 8 Nov 2017 01:54:33 +0100 Subject: btrfs: reorder btrfs_transaction members for better packing There are now 20 bytes of holes, we can reduce that to 4 by minor changes. Moving 'aborted' to the status and flags is also more logical, similar for num_dirty_bgs. The size goes from 432 to 416. Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/transaction.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1805fd101767..6beee072b1bd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -58,6 +58,7 @@ struct btrfs_transaction { /* Be protected by fs_info->trans_lock when we want to change it. */ enum btrfs_trans_state state; + int aborted; struct list_head list; struct extent_io_tree dirty_pages; unsigned long start_time; @@ -70,7 +71,6 @@ struct btrfs_transaction { struct list_head dirty_bgs; struct list_head io_bgs; struct list_head dropped_roots; - unsigned int num_dirty_bgs; /* * we need to make sure block group deletion doesn't race with @@ -79,11 +79,11 @@ struct btrfs_transaction { */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; + unsigned int num_dirty_bgs; /* Protected by spin lock fs_info->unused_bgs_lock. */ struct list_head deleted_bgs; spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; - int aborted; struct btrfs_fs_info *fs_info; }; -- cgit v1.2.3 From 431e98226cbcaab40b77aeef2e6ba20a47649962 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 15 Nov 2017 18:27:39 +0100 Subject: btrfs: move some zstd work data from stack to workspace * ZSTD_inBuffer in_buf * ZSTD_outBuffer out_buf are used in all functions to pass the compression parameters and the local variables consume some space. We can move them to the workspace and reduce the stack consumption: zstd.c:zstd_decompress -24 (136 -> 112) zstd.c:zstd_decompress_bio -24 (144 -> 120) zstd.c:zstd_compress_pages -24 (264 -> 240) Signed-off-by: David Sterba Reviewed-by: Nick Terrell Signed-off-by: David Sterba --- fs/btrfs/zstd.c | 132 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 67 insertions(+), 65 deletions(-) diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 17f2dd8fddb8..01a4eab602a3 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -43,6 +43,8 @@ struct workspace { size_t size; char *buf; struct list_head list; + ZSTD_inBuffer in_buf; + ZSTD_outBuffer out_buf; }; static void zstd_free_workspace(struct list_head *ws) @@ -94,8 +96,6 @@ static int zstd_compress_pages(struct list_head *ws, int nr_pages = 0; struct page *in_page = NULL; /* The current page to read */ struct page *out_page = NULL; /* The current page to write to */ - ZSTD_inBuffer in_buf = { NULL, 0, 0 }; - ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long tot_in = 0; unsigned long tot_out = 0; unsigned long len = *total_out; @@ -118,9 +118,9 @@ static int zstd_compress_pages(struct list_head *ws, /* map in the first page of input data */ in_page = find_get_page(mapping, start >> PAGE_SHIFT); - in_buf.src = kmap(in_page); - in_buf.pos = 0; - in_buf.size = min_t(size_t, len, PAGE_SIZE); + workspace->in_buf.src = kmap(in_page); + workspace->in_buf.pos = 0; + workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); /* Allocate and map in the output buffer */ @@ -130,14 +130,15 @@ static int zstd_compress_pages(struct list_head *ws, goto out; } pages[nr_pages++] = out_page; - out_buf.dst = kmap(out_page); - out_buf.pos = 0; - out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + workspace->out_buf.dst = kmap(out_page); + workspace->out_buf.pos = 0; + workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); while (1) { size_t ret2; - ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); + ret2 = ZSTD_compressStream(stream, &workspace->out_buf, + &workspace->in_buf); if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_compressStream returned %d\n", ZSTD_getErrorCode(ret2)); @@ -146,22 +147,22 @@ static int zstd_compress_pages(struct list_head *ws, } /* Check to see if we are making it bigger */ - if (tot_in + in_buf.pos > 8192 && - tot_in + in_buf.pos < - tot_out + out_buf.pos) { + if (tot_in + workspace->in_buf.pos > 8192 && + tot_in + workspace->in_buf.pos < + tot_out + workspace->out_buf.pos) { ret = -E2BIG; goto out; } /* We've reached the end of our output range */ - if (out_buf.pos >= max_out) { - tot_out += out_buf.pos; + if (workspace->out_buf.pos >= max_out) { + tot_out += workspace->out_buf.pos; ret = -E2BIG; goto out; } /* Check if we need more output space */ - if (out_buf.pos == out_buf.size) { + if (workspace->out_buf.pos == workspace->out_buf.size) { tot_out += PAGE_SIZE; max_out -= PAGE_SIZE; kunmap(out_page); @@ -176,19 +177,20 @@ static int zstd_compress_pages(struct list_head *ws, goto out; } pages[nr_pages++] = out_page; - out_buf.dst = kmap(out_page); - out_buf.pos = 0; - out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + workspace->out_buf.dst = kmap(out_page); + workspace->out_buf.pos = 0; + workspace->out_buf.size = min_t(size_t, max_out, + PAGE_SIZE); } /* We've reached the end of the input */ - if (in_buf.pos >= len) { - tot_in += in_buf.pos; + if (workspace->in_buf.pos >= len) { + tot_in += workspace->in_buf.pos; break; } /* Check if we need more input */ - if (in_buf.pos == in_buf.size) { + if (workspace->in_buf.pos == workspace->in_buf.size) { tot_in += PAGE_SIZE; kunmap(in_page); put_page(in_page); @@ -196,15 +198,15 @@ static int zstd_compress_pages(struct list_head *ws, start += PAGE_SIZE; len -= PAGE_SIZE; in_page = find_get_page(mapping, start >> PAGE_SHIFT); - in_buf.src = kmap(in_page); - in_buf.pos = 0; - in_buf.size = min_t(size_t, len, PAGE_SIZE); + workspace->in_buf.src = kmap(in_page); + workspace->in_buf.pos = 0; + workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE); } } while (1) { size_t ret2; - ret2 = ZSTD_endStream(stream, &out_buf); + ret2 = ZSTD_endStream(stream, &workspace->out_buf); if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_endStream returned %d\n", ZSTD_getErrorCode(ret2)); @@ -212,11 +214,11 @@ static int zstd_compress_pages(struct list_head *ws, goto out; } if (ret2 == 0) { - tot_out += out_buf.pos; + tot_out += workspace->out_buf.pos; break; } - if (out_buf.pos >= max_out) { - tot_out += out_buf.pos; + if (workspace->out_buf.pos >= max_out) { + tot_out += workspace->out_buf.pos; ret = -E2BIG; goto out; } @@ -235,9 +237,9 @@ static int zstd_compress_pages(struct list_head *ws, goto out; } pages[nr_pages++] = out_page; - out_buf.dst = kmap(out_page); - out_buf.pos = 0; - out_buf.size = min_t(size_t, max_out, PAGE_SIZE); + workspace->out_buf.dst = kmap(out_page); + workspace->out_buf.pos = 0; + workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE); } if (tot_out >= tot_in) { @@ -273,8 +275,6 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long total_out = 0; - ZSTD_inBuffer in_buf = { NULL, 0, 0 }; - ZSTD_outBuffer out_buf = { NULL, 0, 0 }; stream = ZSTD_initDStream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); @@ -284,18 +284,19 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) goto done; } - in_buf.src = kmap(pages_in[page_in_index]); - in_buf.pos = 0; - in_buf.size = min_t(size_t, srclen, PAGE_SIZE); + workspace->in_buf.src = kmap(pages_in[page_in_index]); + workspace->in_buf.pos = 0; + workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); - out_buf.dst = workspace->buf; - out_buf.pos = 0; - out_buf.size = PAGE_SIZE; + workspace->out_buf.dst = workspace->buf; + workspace->out_buf.pos = 0; + workspace->out_buf.size = PAGE_SIZE; while (1) { size_t ret2; - ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + &workspace->in_buf); if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ZSTD_getErrorCode(ret2)); @@ -303,38 +304,38 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) goto done; } buf_start = total_out; - total_out += out_buf.pos; - out_buf.pos = 0; + total_out += workspace->out_buf.pos; + workspace->out_buf.pos = 0; - ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, - total_out, disk_start, orig_bio); + ret = btrfs_decompress_buf2page(workspace->out_buf.dst, + buf_start, total_out, disk_start, orig_bio); if (ret == 0) break; - if (in_buf.pos >= srclen) + if (workspace->in_buf.pos >= srclen) break; /* Check if we've hit the end of a frame */ if (ret2 == 0) break; - if (in_buf.pos == in_buf.size) { + if (workspace->in_buf.pos == workspace->in_buf.size) { kunmap(pages_in[page_in_index++]); if (page_in_index >= total_pages_in) { - in_buf.src = NULL; + workspace->in_buf.src = NULL; ret = -EIO; goto done; } srclen -= PAGE_SIZE; - in_buf.src = kmap(pages_in[page_in_index]); - in_buf.pos = 0; - in_buf.size = min_t(size_t, srclen, PAGE_SIZE); + workspace->in_buf.src = kmap(pages_in[page_in_index]); + workspace->in_buf.pos = 0; + workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE); } } ret = 0; zero_fill_bio(orig_bio); done: - if (in_buf.src) + if (workspace->in_buf.src) kunmap(pages_in[page_in_index]); return ret; } @@ -348,8 +349,6 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, ZSTD_DStream *stream; int ret = 0; size_t ret2; - ZSTD_inBuffer in_buf = { NULL, 0, 0 }; - ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long total_out = 0; unsigned long pg_offset = 0; char *kaddr; @@ -364,16 +363,17 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, destlen = min_t(size_t, destlen, PAGE_SIZE); - in_buf.src = data_in; - in_buf.pos = 0; - in_buf.size = srclen; + workspace->in_buf.src = data_in; + workspace->in_buf.pos = 0; + workspace->in_buf.size = srclen; - out_buf.dst = workspace->buf; - out_buf.pos = 0; - out_buf.size = PAGE_SIZE; + workspace->out_buf.dst = workspace->buf; + workspace->out_buf.pos = 0; + workspace->out_buf.size = PAGE_SIZE; ret2 = 1; - while (pg_offset < destlen && in_buf.pos < in_buf.size) { + while (pg_offset < destlen + && workspace->in_buf.pos < workspace->in_buf.size) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; @@ -384,7 +384,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, ret = -EIO; goto finish; } - ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + &workspace->in_buf); if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ZSTD_getErrorCode(ret2)); @@ -393,8 +394,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, } buf_start = total_out; - total_out += out_buf.pos; - out_buf.pos = 0; + total_out += workspace->out_buf.pos; + workspace->out_buf.pos = 0; if (total_out <= start_byte) continue; @@ -405,10 +406,11 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, buf_offset = 0; bytes = min_t(unsigned long, destlen - pg_offset, - out_buf.size - buf_offset); + workspace->out_buf.size - buf_offset); kaddr = kmap_atomic(dest_page); - memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); + memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset, + bytes); kunmap_atomic(kaddr); pg_offset += bytes; -- cgit v1.2.3 From 3f2dd7a0cef4d7ff964bb5e35beb500cbf3b7bcf Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Nov 2017 15:14:19 +0800 Subject: btrfs: extent-tree: Make btrfs_inode_rsv_refill function static This function is no longer used outside of extent-tree.c. Make it static. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c3ccd9d2e946..696275b7b66a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5738,8 +5738,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, * or return if we already have enough space. This will also handle the resreve * tracepoint for the reserved amount. */ -int btrfs_inode_rsv_refill(struct btrfs_inode *inode, - enum btrfs_reserve_flush_enum flush) +static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, + enum btrfs_reserve_flush_enum flush) { struct btrfs_root *root = inode->root; struct btrfs_block_rsv *block_rsv = &inode->block_rsv; -- cgit v1.2.3 From 4a2d25cd93cbd2e5ad6c9aabd90da362b2dd9984 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 23 Nov 2017 10:51:43 +0200 Subject: btrfs: Remove redundant FLAG_VACANCY Commit 9036c10208e1 ("Btrfs: update hole handling v2") added the FLAG_VACANCY to denote holes, however there was already a consistent way of flagging extents which represent hole - ->block_start = EXTENT_MAP_HOLE. And also the only place where this flag is checked is in the fiemap code, but the block_start value is also checked and every other place in the filesystem detects holes by using block_start value's. So remove the extra flag. This survived a full xfstest run. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 +--- fs/btrfs/extent_map.h | 1 - fs/btrfs/inode.c | 1 - fs/btrfs/tests/inode-tests.c | 5 ----- include/trace/events/btrfs.h | 1 - 5 files changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 932d805a81e3..ef72efef8b39 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4311,10 +4311,8 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, return em; /* if this isn't a hole return it */ - if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) && - em->block_start != EXTENT_MAP_HOLE) { + if (em->block_start != EXTENT_MAP_HOLE) return em; - } /* this is a hole, advance to the next extent */ offset = extent_map_end(em); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 64365bbc9b16..e9e285d45c7e 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -13,7 +13,6 @@ /* bits for the flags field */ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ #define EXTENT_FLAG_COMPRESSED 1 -#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1c704eb82487..57785eadb95c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7118,7 +7118,6 @@ not_found: em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - set_bit(EXTENT_FLAG_VACANCY, &em->flags); insert: btrfs_release_path(path); if (em->start > start || extent_map_end(em) <= start) { diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 4a0a60d3275d..13420cd19ef0 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -288,10 +288,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) test_msg("Expected a hole, got %llu\n", em->block_start); goto out; } - if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { - test_msg("Vacancy flag wasn't set properly\n"); - goto out; - } free_extent_map(em); btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); @@ -1130,7 +1126,6 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize) int ret; set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); - set_bit(EXTENT_FLAG_VACANCY, &vacancy_only); set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); test_msg("Running btrfs_get_extent tests\n"); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4342a329821f..c3ac5ec86519 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -193,7 +193,6 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, __print_flags(flag, "|", \ { (1 << EXTENT_FLAG_PINNED), "PINNED" },\ { (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\ - { (1 << EXTENT_FLAG_VACANCY), "VACANCY" },\ { (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\ { (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\ { (1 << EXTENT_FLAG_FILLING), "FILLING" },\ -- cgit v1.2.3 From ccc8dc758da0deeddaec2d8a37524401654e99de Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 30 Nov 2017 12:14:47 +0000 Subject: btrfs: make function update_share_count static The function update_share_count is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: fs/btrfs/backref.c:219:6: warning: symbol 'update_share_count' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7d0dc100a09a..e4054e533f6d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -216,7 +216,8 @@ static int prelim_ref_compare(struct prelim_ref *ref1, return 0; } -void update_share_count(struct share_check *sc, int oldcount, int newcount) +static void update_share_count(struct share_check *sc, int oldcount, + int newcount) { if ((!sc) || (oldcount == 0 && newcount < 1)) return; -- cgit v1.2.3 From 38b5f68e98117daa221c7df813608f48c374aef4 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 29 Nov 2017 18:53:43 +0800 Subject: btrfs: drop btrfs_device::can_discard to query directly We can query the bdev directly when needed at btrfs_discard_extent() so drop btrfs_device::can_discard. Signed-off-by: Anand Jain Suggested-by: Nikolay Borisov Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 5 ++++- fs/btrfs/volumes.c | 8 -------- fs/btrfs/volumes.h | 1 - 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 696275b7b66a..7411f65099d1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2145,7 +2145,10 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, for (i = 0; i < bbio->num_stripes; i++, stripe++) { u64 bytes; - if (!stripe->dev->can_discard) + struct request_queue *req_q; + + req_q = bdev_get_queue(stripe->dev->bdev); + if (!blk_queue_discard(req_q)) continue; ret = btrfs_issue_discard(stripe->dev->bdev, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 20039d625210..3e348743271a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -698,8 +698,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, } q = bdev_get_queue(bdev); - if (blk_queue_discard(q)) - device->can_discard = 1; if (!blk_queue_nonrot(q)) fs_devices->rotating = 1; @@ -2433,8 +2431,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } q = bdev_get_queue(bdev); - if (blk_queue_discard(q)) - device->can_discard = 1; device->writeable = 1; device->generation = trans->transid; device->io_width = fs_info->sectorsize; @@ -2585,7 +2581,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev, struct btrfs_device **device_out) { - struct request_queue *q; struct btrfs_device *device; struct block_device *bdev; struct list_head *devices; @@ -2642,9 +2637,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, } rcu_assign_pointer(device->name, name); - q = bdev_get_queue(bdev); - if (blk_queue_discard(q)) - device->can_discard = 1; mutex_lock(&fs_info->fs_devices->device_list_mutex); device->writeable = 1; device->generation = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 294c4eb6a272..60588c259665 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -72,7 +72,6 @@ struct btrfs_device { int writeable; int in_fs_metadata; int missing; - int can_discard; int is_tgtdev_for_dev_replace; blk_status_t last_flush_error; int flush_bio_sent; -- cgit v1.2.3 From 3c958bd23b60c22947b857d2cb13196e2cc58c58 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 28 Nov 2017 10:43:10 +0800 Subject: btrfs: add helper for device path or missing This patch creates a helper function to get either the rcu device path or missing. Signed-off-by: Anand Jain [ rename to btrfs_dev_name, switch to if/else ] Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7c655f9a7a50..483eb62b9b27 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -304,6 +304,14 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) dev_replace->cursor_left_last_write_of_item; } +static char* btrfs_dev_name(struct btrfs_device *device) +{ + if (device->missing) + return ""; + else + return rcu_str_deref(device->name); +} + int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, const char *tgtdev_name, u64 srcdevid, const char *srcdev_name, int read_src) @@ -363,8 +371,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, btrfs_info_in_rcu(fs_info, "dev_replace from %s (devid %llu) to %s started", - src_device->missing ? "" : - rcu_str_deref(src_device->name), + btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name)); @@ -538,8 +545,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } else { btrfs_err_in_rcu(fs_info, "btrfs_scrub_dev(%s, %llu, %s) failed %d", - src_device->missing ? "" : - rcu_str_deref(src_device->name), + btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); btrfs_dev_replace_unlock(dev_replace, 1); @@ -557,8 +563,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_info_in_rcu(fs_info, "dev_replace from %s (devid %llu) to %s finished", - src_device->missing ? "" : - rcu_str_deref(src_device->name), + btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name)); tgt_device->is_tgtdev_for_dev_replace = 0; @@ -814,12 +819,10 @@ static int btrfs_dev_replace_kthread(void *data) progress = btrfs_dev_replace_progress(fs_info); progress = div_u64(progress, 10); btrfs_info_in_rcu(fs_info, - "continuing dev_replace from %s (devid %llu) to %s @%u%%", - dev_replace->srcdev->missing ? "" - : rcu_str_deref(dev_replace->srcdev->name), + "continuing dev_replace from %s (devid %llu) to target %s @%u%%", + btrfs_dev_name(dev_replace->srcdev), dev_replace->srcdev->devid, - dev_replace->tgtdev ? rcu_str_deref(dev_replace->tgtdev->name) - : "", + btrfs_dev_name(dev_replace->tgtdev), (unsigned int)progress); btrfs_dev_replace_continue_on_mount(fs_info); -- cgit v1.2.3 From ebbede42d47dc77d1c20e7468418826e5efa6b29 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 4 Dec 2017 12:54:52 +0800 Subject: btrfs: cleanup device states define BTRFS_DEV_STATE_WRITEABLE Currently device state is being managed by each individual int variable such as struct btrfs_device::writeable. Instead of that declare device state BTRFS_DEV_STATE_WRITEABLE and use the bit operations. Signed-off-by: Anand Jain [ whitespace adjustments ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 12 +++++++---- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_io.c | 3 ++- fs/btrfs/ioctl.c | 2 +- fs/btrfs/scrub.c | 3 ++- fs/btrfs/volumes.c | 57 ++++++++++++++++++++++++++++---------------------- fs/btrfs/volumes.h | 4 +++- 7 files changed, 49 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5f9430063c50..396f8183ee86 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3393,7 +3393,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!dev->in_fs_metadata || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; write_dev_flush(dev); @@ -3408,7 +3409,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) errors_wait++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!dev->in_fs_metadata || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; ret = wait_dev_flush(dev); @@ -3505,7 +3507,8 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) total_errors++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!dev->in_fs_metadata || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; btrfs_set_stack_device_generation(dev_item, 0); @@ -3544,7 +3547,8 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!dev->in_fs_metadata || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; ret = wait_dev_supers(dev, max_mirrors); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7411f65099d1..53cb2a1d025f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10878,7 +10878,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, *trimmed = 0; /* Not writeable = nothing to do. */ - if (!device->writeable) + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return 0; /* No free space = nothing to do. */ diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ef72efef8b39..16ae832bdb5d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2026,7 +2026,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, bio->bi_iter.bi_sector = sector; dev = bbio->stripes[bbio->mirror_num - 1].dev; btrfs_put_bbio(bbio); - if (!dev || !dev->bdev || !dev->writeable) { + if (!dev || !dev->bdev || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { btrfs_bio_counter_dec(fs_info); bio_put(bio); return -EIO; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e7f37c46d6a9..8c75e555e3f8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1503,7 +1503,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, goto out_free; } - if (!device->writeable) { + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { btrfs_info(fs_info, "resizer unable to apply on readonly device %llu", devid); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b2f871d80982..7927307652e9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4117,7 +4117,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -ENODEV; } - if (!is_dev_replace && !readonly && !dev->writeable) { + if (!is_dev_replace && !readonly && + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); rcu_read_lock(); name = rcu_dereference(dev->name); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3e348743271a..c6db1ae4ac56 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -691,10 +691,13 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->generation = btrfs_super_generation(disk_super); if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { - device->writeable = 0; + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); fs_devices->seeding = 1; } else { - device->writeable = !bdev_read_only(bdev); + if (bdev_read_only(bdev)) + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + else + set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); } q = bdev_get_queue(bdev); @@ -706,7 +709,8 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, device->mode = flags; fs_devices->open_devices++; - if (device->writeable && device->devid != BTRFS_DEV_REPLACE_DEVID) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && + device->devid != BTRFS_DEV_REPLACE_DEVID) { fs_devices->rw_devices++; list_add(&device->dev_alloc_list, &fs_devices->alloc_list); } @@ -934,9 +938,9 @@ again: device->bdev = NULL; fs_devices->open_devices--; } - if (device->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); - device->writeable = 0; + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); if (!device->is_tgtdev_for_dev_replace) fs_devices->rw_devices--; } @@ -968,7 +972,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) if (!device->bdev) return; - if (device->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { sync_blockdev(device->bdev); invalidate_bdev(device->bdev); } @@ -985,7 +989,7 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device) if (device->bdev) fs_devices->open_devices--; - if (device->writeable && + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; @@ -1943,12 +1947,13 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, goto out; } - if (device->writeable && fs_info->fs_devices->rw_devices == 1) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && + fs_info->fs_devices->rw_devices == 1) { ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; goto out; } - if (device->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); list_del_init(&device->dev_alloc_list); device->fs_devices->rw_devices--; @@ -2010,7 +2015,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, * the devices list. All that's left is to zero out the old * supers and free the device. */ - if (device->writeable) + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) btrfs_scratch_superblocks(device->bdev, device->name->str); btrfs_close_bdev(device); @@ -2037,7 +2042,7 @@ out: return ret; error_undo: - if (device->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { mutex_lock(&fs_info->chunk_mutex); list_add(&device->dev_alloc_list, &fs_info->fs_devices->alloc_list); @@ -2068,7 +2073,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->missing) fs_devices->missing_devices--; - if (srcdev->writeable) + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) fs_devices->rw_devices--; if (srcdev->bdev) @@ -2080,7 +2085,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, { struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; - if (srcdev->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { /* zero out the old super if it is writable */ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } @@ -2431,7 +2436,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } q = bdev_get_queue(bdev); - device->writeable = 1; + set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = trans->transid; device->io_width = fs_info->sectorsize; device->io_align = fs_info->sectorsize; @@ -2638,7 +2643,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, rcu_assign_pointer(device->name, name); mutex_lock(&fs_info->fs_devices->device_list_mutex); - device->writeable = 1; + set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = 0; device->io_width = fs_info->sectorsize; device->io_align = fs_info->sectorsize; @@ -2738,7 +2743,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, u64 old_total; u64 diff; - if (!device->writeable) + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) return -EACCES; new_size = round_down(new_size, fs_info->sectorsize); @@ -3558,7 +3563,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) old_size = btrfs_device_get_total_bytes(device); size_to_free = div_factor(old_size, 1); size_to_free = min_t(u64, size_to_free, SZ_1M); - if (!device->writeable || + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) || btrfs_device_get_total_bytes(device) - btrfs_device_get_bytes_used(device) > size_to_free || device->is_tgtdev_for_dev_replace) @@ -4441,7 +4446,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) mutex_lock(&fs_info->chunk_mutex); btrfs_device_set_total_bytes(device, new_size); - if (device->writeable) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { device->fs_devices->total_rw_bytes -= diff; atomic64_sub(diff, &fs_info->free_chunk_space); } @@ -4566,7 +4571,7 @@ done: if (ret) { mutex_lock(&fs_info->chunk_mutex); btrfs_device_set_total_bytes(device, old_size); - if (device->writeable) + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) device->fs_devices->total_rw_bytes += diff; atomic64_add(diff, &fs_info->free_chunk_space); mutex_unlock(&fs_info->chunk_mutex); @@ -4726,7 +4731,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 max_avail; u64 dev_offset; - if (!device->writeable) { + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { WARN(1, KERN_ERR "BTRFS: read-only device in alloc_list\n"); continue; @@ -5085,8 +5090,8 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) miss_ndevs++; continue; } - - if (!map->stripes[i].dev->writeable) { + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, + &map->stripes[i].dev->dev_state)) { readonly = 1; goto end; } @@ -6255,7 +6260,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, for (dev_nr = 0; dev_nr < total_devs; dev_nr++) { dev = bbio->stripes[dev_nr].dev; if (!dev || !dev->bdev || - (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) { + (bio_op(first_bio) == REQ_OP_WRITE && + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { bbio_error(bbio, first_bio, logical); continue; } @@ -6691,7 +6697,7 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, } if (device->fs_devices != fs_info->fs_devices) { - BUG_ON(device->writeable); + BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)); if (device->generation != btrfs_device_generation(leaf, dev_item)) return -EINVAL; @@ -6699,7 +6705,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, fill_device_from_item(leaf, dev_item, device); device->in_fs_metadata = 1; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && + !device->is_tgtdev_for_dev_replace) { device->fs_devices->total_rw_bytes += device->total_bytes; atomic64_add(device->total_bytes - device->bytes_used, &fs_info->free_chunk_space); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 60588c259665..893e283c7f15 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -47,6 +47,8 @@ struct btrfs_pending_bios { #define btrfs_device_data_ordered_init(device) do { } while (0) #endif +#define BTRFS_DEV_STATE_WRITEABLE (0) + struct btrfs_device { struct list_head dev_list; struct list_head dev_alloc_list; @@ -69,7 +71,7 @@ struct btrfs_device { /* the mode sent to blkdev_get */ fmode_t mode; - int writeable; + unsigned long dev_state; int in_fs_metadata; int missing; int is_tgtdev_for_dev_replace; -- cgit v1.2.3 From e12c96214d28f9211b4035cf20e76d677ff5611f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 4 Dec 2017 12:54:53 +0800 Subject: btrfs: cleanup device states define BTRFS_DEV_STATE_IN_FS_METADATA Currently device state is being managed by each individual int variable such as struct btrfs_device::in_fs_metadata. Instead of that declare device state BTRFS_DEV_STATE_IN_FS_METADATA and use the bit operations. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov [ whitespace adjustments ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- fs/btrfs/scrub.c | 3 ++- fs/btrfs/super.c | 5 +++-- fs/btrfs/volumes.c | 29 +++++++++++++++++------------ fs/btrfs/volumes.h | 2 +- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 396f8183ee86..44573cdda1fd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3393,7 +3393,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; if (!dev->bdev) continue; - if (!dev->in_fs_metadata || + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; @@ -3409,7 +3409,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) errors_wait++; continue; } - if (!dev->in_fs_metadata || + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; @@ -3507,7 +3507,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) total_errors++; continue; } - if (!dev->in_fs_metadata || + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; @@ -3547,7 +3547,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; - if (!dev->in_fs_metadata || + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7927307652e9..dc58005cb9f6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4129,7 +4129,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, } mutex_lock(&fs_info->scrub_lock); - if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); return -EIO; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7a7abe827ac4..bc55c5e6badd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1972,8 +1972,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { - if (!device->in_fs_metadata || !device->bdev || - device->is_tgtdev_for_dev_replace) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &device->dev_state) || + !device->bdev || device->is_tgtdev_for_dev_replace) continue; if (i >= nr_devices) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6db1ae4ac56..e782f6853757 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -705,7 +705,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, fs_devices->rotating = 1; device->bdev = bdev; - device->in_fs_metadata = 0; + clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); device->mode = flags; fs_devices->open_devices++; @@ -909,7 +909,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step) again: /* This is the initialized path, it is safe to release the devices. */ list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { - if (device->in_fs_metadata) { + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &device->dev_state)) { if (!device->is_tgtdev_for_dev_replace && (!latest_dev || device->generation > latest_dev->generation)) { @@ -1634,7 +1635,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; - WARN_ON(!device->in_fs_metadata); + WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)); WARN_ON(device->is_tgtdev_for_dev_replace); path = btrfs_alloc_path(); if (!path) @@ -1975,7 +1976,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (ret) goto error_undo; - device->in_fs_metadata = 0; + clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); btrfs_scrub_cancel_dev(fs_info, device); /* @@ -2195,7 +2196,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info, * is held by the caller. */ list_for_each_entry(tmp, devices, dev_list) { - if (tmp->in_fs_metadata && !tmp->bdev) { + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &tmp->dev_state) && !tmp->bdev) { *device = tmp; break; } @@ -2447,7 +2449,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->commit_total_bytes = device->total_bytes; device->fs_info = fs_info; device->bdev = bdev; - device->in_fs_metadata = 1; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; @@ -2656,7 +2658,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->commit_bytes_used = device->bytes_used; device->fs_info = fs_info; device->bdev = bdev; - device->in_fs_metadata = 1; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; @@ -2685,7 +2687,7 @@ void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, tgtdev->io_align = sectorsize; tgtdev->sector_size = sectorsize; tgtdev->fs_info = fs_info; - tgtdev->in_fs_metadata = 1; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state); } static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, @@ -4737,8 +4739,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, continue; } - if (!device->in_fs_metadata || - device->is_tgtdev_for_dev_replace) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &device->dev_state) || + device->is_tgtdev_for_dev_replace) continue; if (device->total_bytes > device->bytes_used) @@ -6529,7 +6532,9 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, } btrfs_report_missing_device(fs_info, devid, uuid, false); } - map->stripes[i].dev->in_fs_metadata = 1; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, + &(map->stripes[i].dev->dev_state)); + } write_lock(&map_tree->map_tree.lock); @@ -6704,7 +6709,7 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, } fill_device_from_item(leaf, dev_item, device); - device->in_fs_metadata = 1; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && !device->is_tgtdev_for_dev_replace) { device->fs_devices->total_rw_bytes += device->total_bytes; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 893e283c7f15..d290641658cc 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -48,6 +48,7 @@ struct btrfs_pending_bios { #endif #define BTRFS_DEV_STATE_WRITEABLE (0) +#define BTRFS_DEV_STATE_IN_FS_METADATA (1) struct btrfs_device { struct list_head dev_list; @@ -72,7 +73,6 @@ struct btrfs_device { fmode_t mode; unsigned long dev_state; - int in_fs_metadata; int missing; int is_tgtdev_for_dev_replace; blk_status_t last_flush_error; -- cgit v1.2.3 From e6e674bd4d54fe8d47a06914f3b90752785b4882 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 4 Dec 2017 12:54:54 +0800 Subject: btrfs: cleanup device states define BTRFS_DEV_STATE_MISSING Currently device state is being managed by each individual int variable such as struct btrfs_device::missing. Instead of that declare btrfs_device::dev_state BTRFS_DEV_STATE_MISSING and use the bit operations. Signed-off-by: Anand Jain Reviewed-by : Nikolay Borisov [ whitespace adjustments ] Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/scrub.c | 7 ++++--- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.c | 32 +++++++++++++++++++------------- fs/btrfs/volumes.h | 2 +- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 483eb62b9b27..d2c820c6f91e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -306,7 +306,7 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) static char* btrfs_dev_name(struct btrfs_device *device) { - if (device->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) return ""; else return rcu_str_deref(device->name); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 44573cdda1fd..4f7d2a38865c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3389,7 +3389,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry(dev, head, dev_list) { - if (dev->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) continue; @@ -3403,7 +3403,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) /* wait for all the barriers */ list_for_each_entry(dev, head, dev_list) { - if (dev->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) { errors_wait++; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index dc58005cb9f6..f8a073ce490b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2535,7 +2535,7 @@ leave_nomem: } WARN_ON(sblock->page_count == 0); - if (dev->missing) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) { /* * This case should only be hit for RAID 5/6 device replace. See * the comment in scrub_missing_raid56_pages() for details. @@ -2870,7 +2870,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, u8 csum[BTRFS_CSUM_SIZE]; u32 blocksize; - if (dev->missing) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) { scrub_parity_mark_sectors_error(sparity, logical, len); return 0; } @@ -4112,7 +4112,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info, devid, NULL, NULL); - if (!dev || (dev->missing && !is_dev_replace)) { + if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) && + !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); return -ENODEV; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bc55c5e6badd..ba5324523995 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2270,7 +2270,7 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root) while (cur_devices) { head = &cur_devices->devices; list_for_each_entry(dev, head, dev_list) { - if (dev->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->name) continue; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e782f6853757..a102e77b35b0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -825,9 +825,9 @@ static noinline int device_list_add(const char *path, return -ENOMEM; rcu_string_free(device->name); rcu_assign_pointer(device->name, name); - if (device->missing) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { fs_devices->missing_devices--; - device->missing = 0; + clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); } } @@ -996,7 +996,7 @@ static void btrfs_prepare_close_one_device(struct btrfs_device *device) fs_devices->rw_devices--; } - if (device->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) fs_devices->missing_devices--; new_device = btrfs_alloc_device(NULL, &device->devid, @@ -1882,7 +1882,8 @@ static struct btrfs_device * btrfs_find_next_active_device( list_for_each_entry(next_device, &fs_devs->devices, dev_list) { if (next_device != device && - !next_device->missing && next_device->bdev) + !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state) + && next_device->bdev) return next_device; } @@ -1996,7 +1997,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, device->fs_devices->num_devices--; device->fs_devices->total_devices--; - if (device->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) device->fs_devices->missing_devices--; btrfs_assign_next_active_device(fs_info, device, NULL); @@ -2071,7 +2072,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, list_del_rcu(&srcdev->dev_list); list_del(&srcdev->dev_alloc_list); fs_devices->num_devices--; - if (srcdev->missing) + if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state)) fs_devices->missing_devices--; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) @@ -5089,7 +5090,8 @@ int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset) map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { - if (map->stripes[i].dev->missing) { + if (test_bit(BTRFS_DEV_STATE_MISSING, + &map->stripes[i].dev->dev_state)) { miss_ndevs++; continue; } @@ -6117,7 +6119,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device, int should_queue = 1; struct btrfs_pending_bios *pending_bios; - if (device->missing || !device->bdev) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) || + !device->bdev) { bio_io_error(bio); return; } @@ -6313,7 +6316,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, device->fs_devices = fs_devices; fs_devices->num_devices++; - device->missing = 1; + set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); fs_devices->missing_devices++; return device; @@ -6675,7 +6678,8 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, dev_uuid, false); } - if(!device->bdev && !device->missing) { + if (!device->bdev && + !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { /* * this happens when a device that was properly setup * in the device info lists suddenly goes bad. @@ -6683,12 +6687,13 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, * device->missing to one here */ device->fs_devices->missing_devices++; - device->missing = 1; + set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); } /* Move the device to its own fs_devices */ if (device->fs_devices != fs_devices) { - ASSERT(device->missing); + ASSERT(test_bit(BTRFS_DEV_STATE_MISSING, + &device->dev_state)); list_move(&device->dev_list, &fs_devices->devices); device->fs_devices->num_devices--; @@ -6874,7 +6879,8 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info) for (i = 0; i < map->num_stripes; i++) { struct btrfs_device *dev = map->stripes[i].dev; - if (!dev || !dev->bdev || dev->missing || + if (!dev || !dev->bdev || + test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || dev->last_flush_error) missing++; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d290641658cc..68affb06502a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -49,6 +49,7 @@ struct btrfs_pending_bios { #define BTRFS_DEV_STATE_WRITEABLE (0) #define BTRFS_DEV_STATE_IN_FS_METADATA (1) +#define BTRFS_DEV_STATE_MISSING (2) struct btrfs_device { struct list_head dev_list; @@ -73,7 +74,6 @@ struct btrfs_device { fmode_t mode; unsigned long dev_state; - int missing; int is_tgtdev_for_dev_replace; blk_status_t last_flush_error; int flush_bio_sent; -- cgit v1.2.3 From 401e29c124eac2b9373eb72f36f202561da339d2 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 4 Dec 2017 12:54:55 +0800 Subject: btrfs: cleanup device states define BTRFS_DEV_STATE_REPLACE_TGT Currently device state is being managed by each individual int variable such as struct btrfs_device::is_tgtdev_for_dev_replace. Instead of that declare btrfs_device::dev_state BTRFS_DEV_STATE_MISSING and use the bit operations. Signed-off-by: Anand Jain [ whitespace adjustments ] Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 5 +++-- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/super.c | 3 ++- fs/btrfs/volumes.c | 39 ++++++++++++++++++++++----------------- fs/btrfs/volumes.h | 2 +- 7 files changed, 31 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index d2c820c6f91e..7efbc4d1128b 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -172,7 +172,8 @@ no_valid_dev_replace_entry_found: dev_replace->tgtdev->commit_bytes_used = dev_replace->srcdev->commit_bytes_used; } - dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; + set_bit(BTRFS_DEV_STATE_REPLACE_TGT, + &dev_replace->tgtdev->dev_state); btrfs_init_dev_replace_tgtdev_for_resume(fs_info, dev_replace->tgtdev); } @@ -566,7 +567,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name)); - tgt_device->is_tgtdev_for_dev_replace = 0; + clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &tgt_device->dev_state); tgt_device->devid = src_device->devid; src_device->devid = BTRFS_DEV_REPLACE_DEVID; memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53cb2a1d025f..8d51e4bb67c1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9693,7 +9693,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr) * space to fit our block group in. */ if (device->total_bytes > device->bytes_used + min_free && - !device->is_tgtdev_for_dev_replace) { + !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { ret = find_free_dev_extent(trans, device, min_free, &dev_offset, NULL); if (!ret) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8c75e555e3f8..a1fd5f7f8298 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,7 +1528,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, } } - if (device->is_tgtdev_for_dev_replace) { + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { ret = -EPERM; goto out_free; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f8a073ce490b..03da807c43cf 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4131,7 +4131,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->scrub_lock); if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || - dev->is_tgtdev_for_dev_replace) { + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); return -EIO; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ba5324523995..84707e77c051 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1974,7 +1974,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) || - !device->bdev || device->is_tgtdev_for_dev_replace) + !device->bdev || + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; if (i >= nr_devices) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a102e77b35b0..3690822d469a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -911,9 +911,10 @@ again: list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) { - if (!device->is_tgtdev_for_dev_replace && - (!latest_dev || - device->generation > latest_dev->generation)) { + if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, + &device->dev_state) && + (!latest_dev || + device->generation > latest_dev->generation)) { latest_dev = device; } continue; @@ -930,7 +931,8 @@ again: * not, which means whether this device is * used or whether it should be removed. */ - if (step == 0 || device->is_tgtdev_for_dev_replace) { + if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, + &device->dev_state)) { continue; } } @@ -942,7 +944,8 @@ again: if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - if (!device->is_tgtdev_for_dev_replace) + if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, + &device->dev_state)) fs_devices->rw_devices--; } list_del_init(&device->dev_list); @@ -1250,7 +1253,8 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, *length = 0; - if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace) + if (start >= device->total_bytes || + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) return 0; path = btrfs_alloc_path(); @@ -1428,7 +1432,8 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, max_hole_size = 0; again: - if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { + if (search_start >= search_end || + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { ret = -ENOSPC; goto out; } @@ -1636,7 +1641,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)); - WARN_ON(device->is_tgtdev_for_dev_replace); + WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1944,7 +1949,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (ret) goto out; - if (device->is_tgtdev_for_dev_replace) { + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { ret = BTRFS_ERROR_DEV_TGT_REPLACE; goto out; } @@ -2451,7 +2456,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->fs_info = fs_info; device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->is_tgtdev_for_dev_replace = 0; + clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->mode = FMODE_EXCL; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); @@ -2660,7 +2665,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->fs_info = fs_info; device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->is_tgtdev_for_dev_replace = 1; + set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->mode = FMODE_EXCL; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); @@ -2756,7 +2761,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, diff = round_down(new_size - device->total_bytes, fs_info->sectorsize); if (new_size <= device->total_bytes || - device->is_tgtdev_for_dev_replace) { + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { mutex_unlock(&fs_info->chunk_mutex); return -EINVAL; } @@ -3569,7 +3574,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) || btrfs_device_get_total_bytes(device) - btrfs_device_get_bytes_used(device) > size_to_free || - device->is_tgtdev_for_dev_replace) + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); @@ -4437,7 +4442,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) new_size = round_down(new_size, fs_info->sectorsize); diff = round_down(old_size - new_size, fs_info->sectorsize); - if (device->is_tgtdev_for_dev_replace) + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) return -EINVAL; path = btrfs_alloc_path(); @@ -4742,7 +4747,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) || - device->is_tgtdev_for_dev_replace) + test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; if (device->total_bytes > device->bytes_used) @@ -6566,7 +6571,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->io_width = btrfs_device_io_width(leaf, dev_item); device->sector_size = btrfs_device_sector_size(leaf, dev_item); WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); - device->is_tgtdev_for_dev_replace = 0; + clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); @@ -6716,7 +6721,7 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, fill_device_from_item(leaf, dev_item, device); set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && - !device->is_tgtdev_for_dev_replace) { + !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { device->fs_devices->total_rw_bytes += device->total_bytes; atomic64_add(device->total_bytes - device->bytes_used, &fs_info->free_chunk_space); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 68affb06502a..2ac123154c8d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -50,6 +50,7 @@ struct btrfs_pending_bios { #define BTRFS_DEV_STATE_WRITEABLE (0) #define BTRFS_DEV_STATE_IN_FS_METADATA (1) #define BTRFS_DEV_STATE_MISSING (2) +#define BTRFS_DEV_STATE_REPLACE_TGT (3) struct btrfs_device { struct list_head dev_list; @@ -74,7 +75,6 @@ struct btrfs_device { fmode_t mode; unsigned long dev_state; - int is_tgtdev_for_dev_replace; blk_status_t last_flush_error; int flush_bio_sent; -- cgit v1.2.3 From 1c3063b6dbfa03e469a53371fae149a022a41bfd Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 4 Dec 2017 12:54:56 +0800 Subject: btrfs: cleanup device states define BTRFS_DEV_STATE_FLUSH_SENT Currently device state is being managed by each individual int variable such as struct btrfs_device::is_tgtdev_for_dev_replace. Instead of that declare btrfs_device::dev_state BTRFS_DEV_STATE_FLUSH_SENT and use the bit operations. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- fs/btrfs/volumes.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f7d2a38865c..392d6cde4308 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3348,7 +3348,7 @@ static void write_dev_flush(struct btrfs_device *device) bio->bi_private = &device->flush_wait; btrfsic_submit_bio(bio); - device->flush_bio_sent = 1; + set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); } /* @@ -3358,10 +3358,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) { struct bio *bio = device->flush_bio; - if (!device->flush_bio_sent) + if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return BLK_STS_OK; - device->flush_bio_sent = 0; + clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); wait_for_completion_io(&device->flush_wait); return bio->bi_status; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2ac123154c8d..3e3ae44b6ccc 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -51,6 +51,7 @@ struct btrfs_pending_bios { #define BTRFS_DEV_STATE_IN_FS_METADATA (1) #define BTRFS_DEV_STATE_MISSING (2) #define BTRFS_DEV_STATE_REPLACE_TGT (3) +#define BTRFS_DEV_STATE_FLUSH_SENT (4) struct btrfs_device { struct list_head dev_list; -- cgit v1.2.3 From 440c840cb49f7de91e68a4cc7bca79a75cd298ae Mon Sep 17 00:00:00 2001 From: Timofey Titovets Date: Mon, 4 Dec 2017 00:30:33 +0300 Subject: Btrfs: compression heuristic: replace heap sort with radix sort Slowest part of heuristic for now is kernel heap sort() It's can take up to 55% of runtime on sorting bucket items. As sorting will always call on most data sets to get correctly byte_core_set_size, the only way to speed up heuristic, is to speed up sort on bucket. Add a general radix_sort function. Radix sort require 2 buffers, one full size of input array and one for store counters (jump addresses). That increase usage per heuristic workspace +1KiB 8KiB + 1KiB -> 8KiB + 2KiB That is LSD Radix, i use 4 bit as a base for calculating, to make counters array acceptable small (16 elements * 8 byte). That Radix sort implementation have several points to adjust, I added him to make radix sort general usable in kernel, like heap sort, if needed. Performance tested in userspace copy of heuristic code, throughput: - average <-> random data: ~3500 MiB/s - heap sort - average <-> random data: ~6000 MiB/s - radix sort Signed-off-by: Timofey Titovets [ coding style fixes ] Signed-off-by: David Sterba --- fs/btrfs/compression.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5982c8a71f02..8cd48d7c3f76 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include "ctree.h" #include "disk-io.h" @@ -752,6 +751,8 @@ struct heuristic_ws { u32 sample_size; /* Buckets store counters for each byte value */ struct bucket_item *bucket; + /* Sorting buffer */ + struct bucket_item *bucket_b; struct list_head list; }; @@ -763,6 +764,7 @@ static void free_heuristic_ws(struct list_head *ws) kvfree(workspace->sample); kfree(workspace->bucket); + kfree(workspace->bucket_b); kfree(workspace); } @@ -782,6 +784,10 @@ static struct list_head *alloc_heuristic_ws(void) if (!ws->bucket) goto fail; + ws->bucket_b = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket_b), GFP_KERNEL); + if (!ws->bucket_b) + goto fail; + INIT_LIST_HEAD(&ws->list); return &ws->list; fail: @@ -1278,13 +1284,122 @@ static u32 shannon_entropy(struct heuristic_ws *ws) return entropy_sum * 100 / entropy_max; } -/* Compare buckets by size, ascending */ -static int bucket_comp_rev(const void *lv, const void *rv) +#define RADIX_BASE 4U +#define COUNTERS_SIZE (1U << RADIX_BASE) + +static u8 get4bits(u64 num, int shift) { + u8 low4bits; + + num >>= shift; + /* Reverse order */ + low4bits = (COUNTERS_SIZE - 1) - (num % COUNTERS_SIZE); + return low4bits; +} + +static void copy_cell(void *dst, int dest_i, void *src, int src_i) { - const struct bucket_item *l = (const struct bucket_item *)lv; - const struct bucket_item *r = (const struct bucket_item *)rv; + struct bucket_item *dstv = (struct bucket_item *)dst; + struct bucket_item *srcv = (struct bucket_item *)src; + dstv[dest_i] = srcv[src_i]; +} + +static u64 get_num(const void *a, int i) +{ + struct bucket_item *av = (struct bucket_item *)a; + return av[i].count; +} - return r->count - l->count; +/* + * Use 4 bits as radix base + * Use 16 u32 counters for calculating new possition in buf array + * + * @array - array that will be sorted + * @array_buf - buffer array to store sorting results + * must be equal in size to @array + * @num - array size + * @get_num - function to extract number from array + * @copy_cell - function to copy data from array to array_buf and vice versa + * @get4bits - function to get 4 bits from number at specified offset + */ +static void radix_sort(void *array, void *array_buf, int num, + u64 (*get_num)(const void *, int i), + void (*copy_cell)(void *dest, int dest_i, + void* src, int src_i), + u8 (*get4bits)(u64 num, int shift)) +{ + u64 max_num; + u64 buf_num; + u32 counters[COUNTERS_SIZE]; + u32 new_addr; + u32 addr; + int bitlen; + int shift; + int i; + + /* + * Try avoid useless loop iterations for small numbers stored in big + * counters. Example: 48 33 4 ... in 64bit array + */ + max_num = get_num(array, 0); + for (i = 1; i < num; i++) { + buf_num = get_num(array, i); + if (buf_num > max_num) + max_num = buf_num; + } + + buf_num = ilog2(max_num); + bitlen = ALIGN(buf_num, RADIX_BASE * 2); + + shift = 0; + while (shift < bitlen) { + memset(counters, 0, sizeof(counters)); + + for (i = 0; i < num; i++) { + buf_num = get_num(array, i); + addr = get4bits(buf_num, shift); + counters[addr]++; + } + + for (i = 1; i < COUNTERS_SIZE; i++) + counters[i] += counters[i - 1]; + + for (i = num - 1; i >= 0; i--) { + buf_num = get_num(array, i); + addr = get4bits(buf_num, shift); + counters[addr]--; + new_addr = counters[addr]; + copy_cell(array_buf, new_addr, array, i); + } + + shift += RADIX_BASE; + + /* + * Normal radix expects to move data from a temporary array, to + * the main one. But that requires some CPU time. Avoid that + * by doing another sort iteration to original array instead of + * memcpy() + */ + memset(counters, 0, sizeof(counters)); + + for (i = 0; i < num; i ++) { + buf_num = get_num(array_buf, i); + addr = get4bits(buf_num, shift); + counters[addr]++; + } + + for (i = 1; i < COUNTERS_SIZE; i++) + counters[i] += counters[i - 1]; + + for (i = num - 1; i >= 0; i--) { + buf_num = get_num(array_buf, i); + addr = get4bits(buf_num, shift); + counters[addr]--; + new_addr = counters[addr]; + copy_cell(array, new_addr, array_buf, i); + } + + shift += RADIX_BASE; + } } /* @@ -1314,7 +1429,8 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get_num, copy_cell, + get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From e9679de3fdcb11a13cfe3873c9923b761c8067ab Mon Sep 17 00:00:00 2001 From: Timofey Titovets Date: Tue, 24 Oct 2017 01:29:48 +0300 Subject: Btrfs: compress_file_range() change page dirty status once We need to call extent_range_clear_dirty_for_io() on compression range to prevent application from changing page content, while pages compressing. extent_range_clear_dirty_for_io() runs on each loop iteration, "(end - start)" can be much (up to 1024 times) bigger then compression range (BTRFS_MAX_UNCOMPRESSED). The start pointer is advanced each time we manage to compress part of the range. The end pointer does not change so we could redirty the remaining parts repeatedly. Fix that behaviour by call extent_range_clear_dirty_for_io() only once, the first time it happens. This is the safest but probably not the best behaviour. Previous iterations of the patch tried to redirty only the range that we were not able to compress. This has been refused by David for safety reasons, the writeout callchain is complex and there could be some path that relies on redirtying the entire unwritten range. Signed-off-by: Timofey Titovets Reviewed-by: David Sterba [ enhance changelog, the history and safety concerns, add comment ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57785eadb95c..e87ec11c0986 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -536,9 +536,14 @@ again: * * If the compression fails for any reason, we set the pages * dirty again later on. + * + * Note that the remaining part is redirtied, the start pointer + * has moved, the end is the original one. */ - extent_range_clear_dirty_for_io(inode, start, end); - redirty = 1; + if (!redirty) { + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; + } /* Compression level is applied here and only here */ ret = btrfs_compress_pages( -- cgit v1.2.3 From c9f540fa6f4cab5a506b157e7bf2e5373c0241c2 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Dec 2017 18:09:42 -0700 Subject: Btrfs: remove unused variable wait in lock_stripe_add The defined wait is not used anywhere. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a7f79254ecca..3940906533a2 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -670,7 +670,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; unsigned long flags; - DEFINE_WAIT(wait); struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; -- cgit v1.2.3 From b4ff5ad72e9331aabfa92456403946d17c4959d5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 30 Nov 2017 17:26:39 -0700 Subject: Btrfs: use struct completion in scrub_submit_raid56_bio_wait This changes to use struct completion directly and removes 'struct scrub_bio_ret' along with the code using it. This struct is used to get the return value from bio, but the caller can access bio to get the return value directly and is holding a reference on it so it won't go away underneath us and can be removed safely. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 03da807c43cf..d766c73eb29a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1666,17 +1666,9 @@ leave_nomem: return 0; } -struct scrub_bio_ret { - struct completion event; - blk_status_t status; -}; - static void scrub_bio_wait_endio(struct bio *bio) { - struct scrub_bio_ret *ret = bio->bi_private; - - ret->status = bio->bi_status; - complete(&ret->event); + complete(bio->bi_private); } static inline int scrub_is_page_on_raid56(struct scrub_page *page) @@ -1689,11 +1681,9 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, struct bio *bio, struct scrub_page *page) { - struct scrub_bio_ret done; + DECLARE_COMPLETION_ONSTACK(done); int ret; - init_completion(&done.event); - done.status = 0; bio->bi_iter.bi_sector = page->logical >> 9; bio->bi_private = &done; bio->bi_end_io = scrub_bio_wait_endio; @@ -1704,11 +1694,8 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, if (ret) return ret; - wait_for_completion_io(&done.event); - if (done.status) - return -EIO; - - return 0; + wait_for_completion_io(&done); + return blk_status_to_errno(bio->bi_status); } /* -- cgit v1.2.3 From 7cfad65297bfe0aa2996cd72d21c898aa84436d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Dec 2017 15:18:14 +0100 Subject: btrfs: tree-checker: use %zu format string for size_t The return value of sizeof() is of type size_t, so we must print it using the %z format modifier rather than %l to avoid this warning on some architectures: fs/btrfs/tree-checker.c: In function 'check_dir_item': fs/btrfs/tree-checker.c:273:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 5 has type 'u32' {aka 'unsigned int'} [-Werror=format=] Fixes: 005887f2e3e0 ("btrfs: tree-checker: Add checker for dir item") Signed-off-by: Arnd Bergmann Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 66dac0a4b01f..7c55e3ba5a6c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -270,7 +270,7 @@ static int check_dir_item(struct btrfs_root *root, /* header itself should not cross item boundary */ if (cur + sizeof(*di) > item_size) { dir_item_err(root, leaf, slot, - "dir item header crosses item boundary, have %lu boundary %u", + "dir item header crosses item boundary, have %zu boundary %u", cur + sizeof(*di), item_size); return -EUCLEAN; } -- cgit v1.2.3 From 5c9a702ed1724f1c856c7299fe1faf9b3b5b4eb0 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 1 Dec 2017 11:19:40 +0200 Subject: btrfs: Remove unused variable in btrfs_get_extent trans was statically assigned to NULL and this never changed over the course of btrfs_get_extent. So remove any code which checks whether trans != NULL and just hardcode the fact trans is always NULL. Resolves-coverity-id: 112806 Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e87ec11c0986..8b6c59a068d2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6948,7 +6948,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct extent_map *em = NULL; struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_io_tree *io_tree = &inode->io_tree; - struct btrfs_trans_handle *trans = NULL; const bool new_inline = !page || create; read_lock(&em_tree->lock); @@ -6989,8 +6988,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, path->reada = READA_FORWARD; } - ret = btrfs_lookup_file_extent(trans, root, path, - objectid, start, trans != NULL); + ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0); if (ret < 0) { err = ret; goto out; @@ -7186,11 +7184,6 @@ out: trace_btrfs_get_extent(root, inode, em); btrfs_free_path(path); - if (trans) { - ret = btrfs_end_transaction(trans); - if (!err) - err = ret; - } if (err) { free_extent_map(em); return ERR_PTR(err); -- cgit v1.2.3 From bf8d32b9b3c5fb183aa4ac5cb7bae9580dcb0dfb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 1 Dec 2017 11:19:43 +0200 Subject: btrfs: remove redundant check in btrfs_get_extent_fiemap Before returning hole_em in btrfs_get_fiemap_extent we check if it's different than null. However, by the time this null check is triggered we already know hole_em is not null because it means it points to the em we found and it has already been dereferenced. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8b6c59a068d2..034d7333b14d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7305,7 +7305,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, em->block_start = EXTENT_MAP_DELALLOC; em->block_len = found; } - } else if (hole_em) { + } else { return hole_em; } out: -- cgit v1.2.3 From e128f9c3f7242318e1c76d204c7ae32bc878b8c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:24:26 +0100 Subject: btrfs: compression: add helper for type to string conversion There are several places opencoding this conversion, add a helper now that we have 3 compression algorithms. Signed-off-by: David Sterba --- fs/btrfs/compression.c | 15 +++++++++++++++ fs/btrfs/compression.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 8cd48d7c3f76..28c3940062b7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -44,6 +44,21 @@ #include "extent_io.h" #include "extent_map.h" +static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" }; + +const char* btrfs_compress_type2str(enum btrfs_compression_type type) +{ + switch (type) { + case BTRFS_COMPRESS_ZLIB: + case BTRFS_COMPRESS_LZO: + case BTRFS_COMPRESS_ZSTD: + case BTRFS_COMPRESS_NONE: + return btrfs_compress_types[type]; + } + + return NULL; +} + static int btrfs_decompress_bio(struct compressed_bio *cb); static inline int compressed_bio_size(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 6b692903a23c..677fa4aa0bd7 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -137,6 +137,8 @@ extern const struct btrfs_compress_op btrfs_zlib_compress; extern const struct btrfs_compress_op btrfs_lzo_compress; extern const struct btrfs_compress_op btrfs_zstd_compress; +const char* btrfs_compress_type2str(enum btrfs_compression_type type); + int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end); #endif -- cgit v1.2.3 From 93370509c24cc41f994d467d145811ba9c9a48f7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:32:41 +0100 Subject: btrfs: SETFLAGS ioctl: use helper for compression type conversion Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a1fd5f7f8298..be5bd81b3669 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -307,12 +307,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags |= BTRFS_INODE_COMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS; - if (fs_info->compress_type == BTRFS_COMPRESS_LZO) - comp = "lzo"; - else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) - comp = "zlib"; - else - comp = "zstd"; + comp = btrfs_compress_type2str(fs_info->compress_type); + if (!comp || comp[0] == 0) + comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); + ret = btrfs_set_prop(inode, "btrfs.compression", comp, strlen(comp), 0); if (ret) -- cgit v1.2.3 From 802a5c69584a0e48ab9797e743fb087c7b56a264 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 17:55:14 +0100 Subject: btrfs: prop: use common helper for type to string conversion Use the helper for conversion, keep the semantics. Signed-off-by: David Sterba --- fs/btrfs/props.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index c39a940d0c75..b30a056963ab 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -423,11 +423,11 @@ static const char *prop_compression_extract(struct inode *inode) { switch (BTRFS_I(inode)->prop_compress) { case BTRFS_COMPRESS_ZLIB: - return "zlib"; case BTRFS_COMPRESS_LZO: - return "lzo"; case BTRFS_COMPRESS_ZSTD: - return "zstd"; + return btrfs_compress_type2str(BTRFS_I(inode)->prop_compress); + default: + break; } return NULL; -- cgit v1.2.3 From 0f628c632d4009527aef597dac3a4a09e628b051 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 Oct 2017 18:06:34 +0100 Subject: btrfs: show options: use helper to convert compression type string Use the helper, if the COMPRESS option is set, the result is always defined and not empty. Signed-off-by: David Sterba --- fs/btrfs/super.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 84707e77c051..baa8add64681 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1243,7 +1243,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); - char *compress_type; + const char *compress_type; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); @@ -1259,12 +1259,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); if (btrfs_test_opt(info, COMPRESS)) { - if (info->compress_type == BTRFS_COMPRESS_ZLIB) - compress_type = "zlib"; - else if (info->compress_type == BTRFS_COMPRESS_LZO) - compress_type = "lzo"; - else - compress_type = "zstd"; + compress_type = btrfs_compress_type2str(info->compress_type); if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else -- cgit v1.2.3 From ad8bc4d005576e3f380ba2dab24c183519f4e9fa Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 6 Dec 2017 11:40:10 +0800 Subject: btrfs: put btrfs_ioctl_vol_args_v2 related defines together Just a code spatial rearrangement, no functional change. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index ce615b75e855..c8d99b9ca550 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -33,7 +33,12 @@ struct btrfs_ioctl_vol_args { char name[BTRFS_PATH_NAME_MAX + 1]; }; -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 +#define BTRFS_SUBVOL_NAME_MAX 4039 + +#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) +#define BTRFS_SUBVOL_RDONLY (1ULL << 1) +#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) #define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3) @@ -101,11 +106,7 @@ struct btrfs_ioctl_qgroup_limit_args { * - BTRFS_IOC_SUBVOL_GETFLAGS * - BTRFS_IOC_SUBVOL_SETFLAGS */ -#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) -#define BTRFS_SUBVOL_RDONLY (1ULL << 1) -#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) -#define BTRFS_SUBVOL_NAME_MAX 4039 struct btrfs_ioctl_vol_args_v2 { __s64 fd; __u64 transid; -- cgit v1.2.3 From f3038ee3a3f1017a1cbe9907e31fa12d366c5dcb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 5 Dec 2017 09:29:19 +0200 Subject: btrfs: Handle btrfs_set_extent_delalloc failure in fixup worker This function was introduced by 247e743cbe6e ("Btrfs: Use async helpers to deal with pages that have been improperly dirtied") and it didn't do any error handling then. This function might very well fail in ENOMEM situation, yet it's not handled, this could lead to inconsistent state. So let's handle the failure by setting the mapping error bit. Cc: stable@vger.kernel.org Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 034d7333b14d..9ad8c9321c8f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2116,8 +2116,15 @@ again: goto out; } - btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state, - 0); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, + &cached_state, 0); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + ClearPageChecked(page); set_page_dirty(page); btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); -- cgit v1.2.3 From 87c46ec70068cd28442954ec3fd0784eb90ca19b Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 22:14:31 +0530 Subject: btrfs: remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Signed-off-by: David Sterba --- fs/btrfs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index baa8add64681..428841a17acb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,7 +61,6 @@ #include "tests/btrfs-tests.h" #include "qgroup.h" -#include "backref.h" #define CREATE_TRACE_POINTS #include -- cgit v1.2.3 From 9ea2c7c9da13c9073e371c046cbbc45481ecb459 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 12 Dec 2017 11:14:49 +0200 Subject: btrfs: Fix out of bounds access in btrfs_search_slot When modifying a tree where the root is at BTRFS_MAX_LEVEL - 1 then the level variable is going to be 7 (this is the max height of the tree). On the other hand btrfs_cow_block is always called with "level + 1" as an index into the nodes and slots arrays. This leads to an out of bounds access. Admittdely this will be benign since an OOB access of the nodes array will likely read the 0th element from the slots array, which in this case is going to be 0 (since we start CoW at the top of the tree). The OOB access into the slots array in turn will read the 0th and 1st values of the locks array, which would both be 0 at the time. However, this benign behavior relies on the fact that the path being passed hasn't been initialised, if it has already been used to query a btree then it could potentially have populated the nodes/slots arrays. Fix it by explicitly checking if we are at level 7 (the maximum allowed index in nodes/slots arrays) and explicitly call the CoW routine with NULL for parent's node/slot. Signed-off-by: Nikolay Borisov Fixes-coverity-id: 711515 Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1e74cf826532..5361f69433a3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2774,6 +2774,8 @@ again: * contention with the cow code */ if (cow) { + bool last_level = (level == (BTRFS_MAX_LEVEL - 1)); + /* * if we don't really need to cow this block * then we don't want to set the path blocking, @@ -2798,9 +2800,13 @@ again: } btrfs_set_path_blocking(p); - err = btrfs_cow_block(trans, root, b, - p->nodes[level + 1], - p->slots[level + 1], &b); + if (last_level) + err = btrfs_cow_block(trans, root, b, NULL, 0, + &b); + else + err = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &b); if (err) { ret = err; goto done; -- cgit v1.2.3 From 3e798068a8ef400049cc3bb4f01f9701a29e1f86 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 11 Dec 2017 16:38:48 +0200 Subject: btrfs: Remove pair of bio_get/put in btrfs_schedule_bio This code was added in 492bb6deee34 ("Btrfs: Hold a reference on bios during submit_bio, add some extra bio checks"). However, holding a reference on a bio is necessary only if it's going to be referenced after the submit_bio returns and the bio is completed. In this particular instance this is not the case so there is no need to hold an extra reference since we directly return. Signed-off-by: Nikolay Borisov Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3690822d469a..9a04245003ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6132,9 +6132,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device, /* don't bother with additional async steps for reads, right now */ if (bio_op(bio) == REQ_OP_READ) { - bio_get(bio); btrfsic_submit_bio(bio); - bio_put(bio); return; } -- cgit v1.2.3 From 5e3ee23648a20dfaf72eeb88f884aae25ea7d8fb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 8 Dec 2017 15:55:58 +0200 Subject: btrfs: sink extent_write_locked_range tree parameter This function is called only from submit_compressed_extents and the io tree being passed is always that of the inode. But we are also passing the inode, so just move getting the io tree pointer in extent_write_locked_range to simplify the signature. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 +++-- fs/btrfs/extent_io.h | 4 ++-- fs/btrfs/inode.c | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 16ae832bdb5d..c0b2bf65d6b0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4073,11 +4073,12 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, return ret; } -int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, - u64 start, u64 end, int mode) +int extent_write_locked_range(struct inode *inode, u64 start, u64 end, + int mode) { int ret = 0; struct address_space *mapping = inode->i_mapping; + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long nr_pages = (end - start + PAGE_SIZE) >> PAGE_SHIFT; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c28f5ef88f42..f2cbabb2306a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -405,8 +405,8 @@ int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_io_tree *tree, struct page *page, struct writeback_control *wbc); -int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, - u64 start, u64 end, int mode); +int extent_write_locked_range(struct inode *inode, u64 start, u64 end, + int mode); int extent_writepages(struct extent_io_tree *tree, struct address_space *mapping, struct writeback_control *wbc); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9ad8c9321c8f..8a7da59292b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -770,8 +770,8 @@ retry: * all those pages down to the drive. */ if (!page_started && !ret) - extent_write_locked_range(io_tree, - inode, async_extent->start, + extent_write_locked_range(inode, + async_extent->start, async_extent->start + async_extent->ram_size - 1, WB_SYNC_ALL); -- cgit v1.2.3 From 0a9b0e5351818d43ac013c00a1474cc3601fc5bb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 8 Dec 2017 15:55:59 +0200 Subject: btrfs: sink extent_write_full_page tree argument The tree argument passed to extent_write_full_page is referenced from the page being passed to the same function. Since we already have enough information to get the reference, remove the function parameter. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++--- fs/btrfs/extent_io.h | 3 +-- fs/btrfs/inode.c | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c0b2bf65d6b0..6cd3da16f114 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4056,13 +4056,12 @@ static noinline void flush_write_bio(void *data) flush_epd_write_bio(epd); } -int extent_write_full_page(struct extent_io_tree *tree, struct page *page, - struct writeback_control *wbc) +int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; struct extent_page_data epd = { .bio = NULL, - .tree = tree, + .tree = &BTRFS_I(page->mapping->host)->io_tree, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f2cbabb2306a..db2558b0cad4 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -403,8 +403,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, struct extent_state **cached_state); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_io_tree *tree, struct page *page, - struct writeback_control *wbc); +int extent_write_full_page(struct page *page, struct writeback_control *wbc); int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int mode); int extent_writepages(struct extent_io_tree *tree, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8a7da59292b7..46df5e2a64e7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8855,7 +8855,6 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_io_tree *tree; struct inode *inode = page->mapping->host; int ret; @@ -8874,8 +8873,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } - tree = &BTRFS_I(page->mapping->host)->io_tree; - ret = extent_write_full_page(tree, page, wbc); + ret = extent_write_full_page(page, wbc); btrfs_add_delayed_iput(inode); return ret; } -- cgit v1.2.3 From a74b35ec876df1c2d11b980bb5e4f697ea4fba94 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 8 Dec 2017 16:27:43 +0200 Subject: btrfs: Rename bin_search -> btrfs_bin_search Currently there are 2 function doing binary search on btrfs nodes: bin_search and btrfs_bin_search. The latter being a simple wrapper for the former. So eliminate the wrapper and just rename bin_search to btrfs_bin_search. No functional changes Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5361f69433a3..2a09577580b8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1807,8 +1807,8 @@ static noinline int generic_bin_search(struct extent_buffer *eb, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, - int level, int *slot) +int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, + int level, int *slot) { if (level == 0) return generic_bin_search(eb, @@ -1824,12 +1824,6 @@ static int bin_search(struct extent_buffer *eb, const struct btrfs_key *key, slot); } -int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, - int level, int *slot) -{ - return bin_search(eb, key, level, slot); -} - static void root_add_used(struct btrfs_root *root, u32 size) { spin_lock(&root->accounting_lock); @@ -2614,7 +2608,7 @@ static int key_search(struct extent_buffer *b, const struct btrfs_key *key, int level, int *prev_cmp, int *slot) { if (*prev_cmp != 0) { - *prev_cmp = bin_search(b, key, level, slot); + *prev_cmp = btrfs_bin_search(b, key, level, slot); return *prev_cmp; } @@ -5181,7 +5175,7 @@ again: while (1) { nritems = btrfs_header_nritems(cur); level = btrfs_header_level(cur); - sret = bin_search(cur, min_key, level, &slot); + sret = btrfs_bin_search(cur, min_key, level, &slot); /* at the lowest level, we're done, setup the path and exit */ if (level == path->lowest_level) { -- cgit v1.2.3 From e2932ee08e46629b0d39eda920e1d795e6d83946 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:16:17 +0200 Subject: btrfs: merge two flush_write_bio helpers flush_epd_write_bio is same as flush_write_bio, no point having two such functions. Merge them to flush_write_bio. The 'noinline' attribute is removed as it does not have any meaning. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6cd3da16f114..f1842d59fffe 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -138,7 +138,8 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits, BUG_ON(ret < 0); } -static noinline void flush_write_bio(void *data); +static void flush_write_bio(void *data); + static inline struct btrfs_fs_info * tree_fs_info(struct extent_io_tree *tree) { @@ -4039,8 +4040,10 @@ retry: return ret; } -static void flush_epd_write_bio(struct extent_page_data *epd) +static void flush_write_bio(void *data) { + struct extent_page_data *epd = data; + if (epd->bio) { int ret; @@ -4050,12 +4053,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd) } } -static noinline void flush_write_bio(void *data) -{ - struct extent_page_data *epd = data; - flush_epd_write_bio(epd); -} - int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; @@ -4068,7 +4065,7 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) ret = __extent_writepage(page, wbc, &epd); - flush_epd_write_bio(&epd); + flush_write_bio(&epd); return ret; } @@ -4110,7 +4107,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, start += PAGE_SIZE; } - flush_epd_write_bio(&epd); + flush_write_bio(&epd); return ret; } @@ -4128,7 +4125,7 @@ int extent_writepages(struct extent_io_tree *tree, ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, flush_write_bio); - flush_epd_write_bio(&epd); + flush_write_bio(&epd); return ret; } -- cgit v1.2.3 From 25b860e038915ffb6c3d8d3fe8c46272a8d471fe Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:30:28 +0200 Subject: btrfs: sink flush_fn to extent_write_cache_pages All callers pass the same value flush_write_bio. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1842d59fffe..cf106d46a2fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3906,8 +3906,7 @@ retry: */ static int extent_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, - writepage_t writepage, void *data, - void (*flush_fn)(void *)) + writepage_t writepage, void *data) { struct inode *inode = mapping->host; int ret = 0; @@ -3971,7 +3970,7 @@ retry: * mapping */ if (!trylock_page(page)) { - flush_fn(data); + flush_write_bio(data); lock_page(page); } @@ -3982,7 +3981,7 @@ retry: if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(page)) - flush_fn(data); + flush_write_bio(data); wait_on_page_writeback(page); } @@ -4123,8 +4122,7 @@ int extent_writepages(struct extent_io_tree *tree, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; - ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, - flush_write_bio); + ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd); flush_write_bio(&epd); return ret; } -- cgit v1.2.3 From 935db8531fa4d6cc951729a2b5063b6406b71432 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 23 Jun 2017 04:30:28 +0200 Subject: btrfs: sink writepage parameter to extent_write_cache_pages The function extent_write_cache_pages is modelled after write_cache_pages which is a generic interface and the writepage parameter makes sense there. In btrfs we know exactly which callback we're going to use, so we can pass it directly. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index cf106d46a2fd..0ceb427a8e63 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3893,8 +3893,7 @@ retry: * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function + * @data: data passed to __extent_writepage function * * If a page is already under I/O, write_cache_pages() skips it, even * if it's dirty. This is desirable behaviour for memory-cleaning writeback, @@ -3906,7 +3905,7 @@ retry: */ static int extent_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, - writepage_t writepage, void *data) + void *data) { struct inode *inode = mapping->host; int ret = 0; @@ -3991,7 +3990,7 @@ retry: continue; } - ret = (*writepage)(page, wbc, data); + ret = __extent_writepage(page, wbc, data); if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { unlock_page(page); @@ -4122,7 +4121,7 @@ int extent_writepages(struct extent_io_tree *tree, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; - ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd); + ret = extent_write_cache_pages(mapping, wbc, &epd); flush_write_bio(&epd); return ret; } -- cgit v1.2.3 From aab6e9edf07f2f4747fbc4aa04e14683fbe0c8ac Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 30 Nov 2017 18:00:02 +0100 Subject: btrfs: unify extent_page_data type passed as void Functions called from extent_write_cache_pages used void* as generic callback data, but all of them convert it to extent_page_data, or use it directly. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0ceb427a8e63..eee9cc5db9ff 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -138,7 +138,7 @@ static void add_extent_changeset(struct extent_state *state, unsigned bits, BUG_ON(ret < 0); } -static void flush_write_bio(void *data); +static void flush_write_bio(struct extent_page_data *epd); static inline struct btrfs_fs_info * tree_fs_info(struct extent_io_tree *tree) @@ -3456,10 +3456,9 @@ done: * and the end_io handler clears the writeback ranges */ static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) + struct extent_page_data *epd) { struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; u64 start = page_offset(page); u64 page_end = start + PAGE_SIZE - 1; int ret; @@ -3905,7 +3904,7 @@ retry: */ static int extent_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, - void *data) + struct extent_page_data *epd) { struct inode *inode = mapping->host; int ret = 0; @@ -3969,7 +3968,7 @@ retry: * mapping */ if (!trylock_page(page)) { - flush_write_bio(data); + flush_write_bio(epd); lock_page(page); } @@ -3980,7 +3979,7 @@ retry: if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(page)) - flush_write_bio(data); + flush_write_bio(epd); wait_on_page_writeback(page); } @@ -3990,7 +3989,7 @@ retry: continue; } - ret = __extent_writepage(page, wbc, data); + ret = __extent_writepage(page, wbc, epd); if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { unlock_page(page); @@ -4038,10 +4037,8 @@ retry: return ret; } -static void flush_write_bio(void *data) +static void flush_write_bio(struct extent_page_data *epd) { - struct extent_page_data *epd = data; - if (epd->bio) { int ret; -- cgit v1.2.3 From 72fa39f5c7a1c9d95c24ddf6605581ea05d6081c Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Thu, 14 Dec 2017 17:24:30 +0900 Subject: btrfs: add btrfs_mount_root() and new file_system_type Add btrfs_mount_root() and new file_system_type for preparation of cleanup of btrfs_mount(). Code path is not changed yet. btrfs_mount_root() is almost the same as current btrfs_mount(), but doesn't have subvolume related part. Signed-off-by: Tomohiro Misono Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 428841a17acb..ce78906bc7d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -65,7 +65,15 @@ #include static const struct super_operations btrfs_super_ops; + +/* + * Types for mounting the default subvolume and a subvolume explicitly + * requested by subvol=/path. That way the callchain is straightforward and we + * don't have to play tricks with the mount options and recursive calls to + * btrfs_mount. + */ static struct file_system_type btrfs_fs_type; +static struct file_system_type btrfs_root_fs_type; static int btrfs_remount(struct super_block *sb, int *flags, char *data); @@ -1549,6 +1557,112 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, return ret; } +static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, + int flags, const char *device_name, void *data) +{ + struct block_device *bdev = NULL; + struct super_block *s; + struct btrfs_fs_devices *fs_devices = NULL; + struct btrfs_fs_info *fs_info = NULL; + struct security_mnt_opts new_sec_opts; + fmode_t mode = FMODE_READ; + char *subvol_name = NULL; + u64 subvol_objectid = 0; + int error = 0; + + if (!(flags & SB_RDONLY)) + mode |= FMODE_WRITE; + + error = btrfs_parse_early_options(data, mode, fs_type, + &subvol_name, &subvol_objectid, + &fs_devices); + if (error) { + kfree(subvol_name); + return ERR_PTR(error); + } + + security_init_mnt_opts(&new_sec_opts); + if (data) { + error = parse_security_options(data, &new_sec_opts); + if (error) + return ERR_PTR(error); + } + + error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); + if (error) + goto error_sec_opts; + + /* + * Setup a dummy root and fs_info for test/set super. This is because + * we don't actually fill this stuff out until open_ctree, but we need + * it for searching for existing supers, so this lets us do that and + * then open_ctree will properly initialize everything later. + */ + fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); + if (!fs_info) { + error = -ENOMEM; + goto error_sec_opts; + } + + fs_info->fs_devices = fs_devices; + + fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); + fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); + security_init_mnt_opts(&fs_info->security_opts); + if (!fs_info->super_copy || !fs_info->super_for_commit) { + error = -ENOMEM; + goto error_fs_info; + } + + error = btrfs_open_devices(fs_devices, mode, fs_type); + if (error) + goto error_fs_info; + + if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { + error = -EACCES; + goto error_close_devices; + } + + bdev = fs_devices->latest_bdev; + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, + fs_info); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto error_close_devices; + } + + if (s->s_root) { + btrfs_close_devices(fs_devices); + free_fs_info(fs_info); + if ((flags ^ s->s_flags) & SB_RDONLY) + error = -EBUSY; + } else { + snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); + btrfs_sb(s)->bdev_holder = fs_type; + error = btrfs_fill_super(s, fs_devices, data); + } + if (error) { + deactivate_locked_super(s); + goto error_sec_opts; + } + + fs_info = btrfs_sb(s); + error = setup_security_options(fs_info, s, &new_sec_opts); + if (error) { + deactivate_locked_super(s); + goto error_sec_opts; + } + + return dget(s->s_root); + +error_close_devices: + btrfs_close_devices(fs_devices); +error_fs_info: + free_fs_info(fs_info); +error_sec_opts: + security_free_mnt_opts(&new_sec_opts); + return ERR_PTR(error); +} /* * Find a superblock for the given device / mount point. * @@ -2170,6 +2284,15 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, }; + +static struct file_system_type btrfs_root_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .mount = btrfs_mount_root, + .kill_sb = btrfs_kill_super, + .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, +}; + MODULE_ALIAS_FS("btrfs"); static int btrfs_control_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 312c89fbca06896cb25a0daf4fa5f44c29bbb1b1 Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Thu, 14 Dec 2017 17:25:01 +0900 Subject: btrfs: cleanup btrfs_mount() using btrfs_mount_root() Cleanup btrfs_mount() by using btrfs_mount_root(). This avoids getting btrfs_mount() called twice in mount path. Old btrfs_mount() will do: 0. VFS layer calls vfs_kern_mount() with registered file_system_type (for btrfs, btrfs_fs_type). btrfs_mount() is called on the way. 1. btrfs_parse_early_options() parses "subvolid=" mount option and set the value to subvol_objectid. Otherwise, subvol_objectid has the initial value of 0 2. check subvol_objectid is 5 or not. Assume this time id is not 5, then btrfs_mount() returns by calling mount_subvol() 3. In mount_subvol(), original mount options are modified to contain "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with btrfs_fs_type and new options 4. btrfs_mount() is called again 5. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0) to subvol_objectid 6. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol() is not called. btrfs_mount() finishes mounting a root 7. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it calls mount_subtree() 8. return subvolume's dentry Reusing the same file_system_type (and btrfs_mount()) for vfs_kern_mount() is the cause of complication. Instead, new btrfs_mount() will do: 1. parse subvol id related options for later use in mount_subvol() 2. mount device's root by calling vfs_kern_mount() with btrfs_root_fs_type, which is not registered to VFS by register_filesystem(). As a result, btrfs_mount_root() is called 3. return by calling mount_subvol() The code of 2. is moved from the first part of mount_subvol(). The semantics of device holder changes from btrfs_fs_type to btrfs_root_fs_type and has to be used in all contexts. Otherwise we'd get wrong results when mount and dev scan would not check the same thing. (this has been found indendently and the fix is folded into this patch) Signed-off-by: Tomohiro Misono Reviewed-by: David Sterba [ fold the btrfs_control_ioctl fixup, extend the comment ] Signed-off-by: David Sterba --- fs/btrfs/super.c | 193 ++++++++++++++++++------------------------------------- 1 file changed, 63 insertions(+), 130 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ce78906bc7d7..adf0f8b8829c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -71,6 +71,8 @@ static const struct super_operations btrfs_super_ops; * requested by subvol=/path. That way the callchain is straightforward and we * don't have to play tricks with the mount options and recursive calls to * btrfs_mount. + * + * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. */ static struct file_system_type btrfs_fs_type; static struct file_system_type btrfs_root_fs_type; @@ -1405,48 +1407,11 @@ static char *setup_root_args(char *args) static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, int flags, const char *device_name, - char *data) + char *data, struct vfsmount *mnt) { struct dentry *root; - struct vfsmount *mnt = NULL; - char *newargs; int ret; - newargs = setup_root_args(data); - if (!newargs) { - root = ERR_PTR(-ENOMEM); - goto out; - } - - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); - if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) { - if (flags & SB_RDONLY) { - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY, - device_name, newargs); - } else { - mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY, - device_name, newargs); - if (IS_ERR(mnt)) { - root = ERR_CAST(mnt); - mnt = NULL; - goto out; - } - - down_write(&mnt->mnt_sb->s_umount); - ret = btrfs_remount(mnt->mnt_sb, &flags, NULL); - up_write(&mnt->mnt_sb->s_umount); - if (ret < 0) { - root = ERR_PTR(ret); - goto out; - } - } - } - if (IS_ERR(mnt)) { - root = ERR_CAST(mnt); - mnt = NULL; - goto out; - } - if (!subvol_name) { if (!subvol_objectid) { ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), @@ -1502,7 +1467,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, out: mntput(mnt); - kfree(newargs); kfree(subvol_name); return root; } @@ -1557,6 +1521,12 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, return ret; } +/* + * Find a superblock for the given device / mount point. + * + * Note: This is based on mount_bdev from fs/super.c with a few additions + * for multiple device setup. Make sure to keep it in sync. + */ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { @@ -1663,20 +1633,35 @@ error_sec_opts: security_free_mnt_opts(&new_sec_opts); return ERR_PTR(error); } + /* - * Find a superblock for the given device / mount point. + * Mount function which is called by VFS layer. * - * Note: This is based on get_sb_bdev from fs/super.c with a few additions - * for multiple device setup. Make sure to keep it in sync. + * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() + * which needs vfsmount* of device's root (/). This means device's root has to + * be mounted internally in any case. + * + * Operation flow: + * 1. Parse subvol id related options for later use in mount_subvol(). + * + * 2. Mount device's root (/) by calling vfs_kern_mount(). + * + * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the + * first place. In order to avoid calling btrfs_mount() again, we use + * different file_system_type which is not registered to VFS by + * register_filesystem() (btrfs_root_fs_type). As a result, + * btrfs_mount_root() is called. The return value will be used by + * mount_subtree() in mount_subvol(). + * + * 3. Call mount_subvol() to get the dentry of subvolume. Since there is + * "btrfs subvolume set-default", mount_subvol() is called always. */ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { - struct block_device *bdev = NULL; - struct super_block *s; struct btrfs_fs_devices *fs_devices = NULL; - struct btrfs_fs_info *fs_info = NULL; - struct security_mnt_opts new_sec_opts; + struct vfsmount *mnt_root; + struct dentry *root; fmode_t mode = FMODE_READ; char *subvol_name = NULL; u64 subvol_objectid = 0; @@ -1693,93 +1678,41 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, return ERR_PTR(error); } - if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) { - /* mount_subvol() will free subvol_name. */ - return mount_subvol(subvol_name, subvol_objectid, flags, - device_name, data); - } - - security_init_mnt_opts(&new_sec_opts); - if (data) { - error = parse_security_options(data, &new_sec_opts); - if (error) - return ERR_PTR(error); - } - - error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); - if (error) - goto error_sec_opts; - - /* - * Setup a dummy root and fs_info for test/set super. This is because - * we don't actually fill this stuff out until open_ctree, but we need - * it for searching for existing supers, so this lets us do that and - * then open_ctree will properly initialize everything later. - */ - fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); - if (!fs_info) { - error = -ENOMEM; - goto error_sec_opts; - } - - fs_info->fs_devices = fs_devices; - - fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); - fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); - security_init_mnt_opts(&fs_info->security_opts); - if (!fs_info->super_copy || !fs_info->super_for_commit) { - error = -ENOMEM; - goto error_fs_info; - } - - error = btrfs_open_devices(fs_devices, mode, fs_type); - if (error) - goto error_fs_info; - - if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { - error = -EACCES; - goto error_close_devices; - } - - bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, - fs_info); - if (IS_ERR(s)) { - error = PTR_ERR(s); - goto error_close_devices; - } + /* mount device's root (/) */ + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); + if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { + if (flags & SB_RDONLY) { + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, + flags & ~SB_RDONLY, device_name, data); + } else { + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, + flags | SB_RDONLY, device_name, data); + if (IS_ERR(mnt_root)) { + root = ERR_CAST(mnt_root); + goto out; + } - if (s->s_root) { - btrfs_close_devices(fs_devices); - free_fs_info(fs_info); - if ((flags ^ s->s_flags) & SB_RDONLY) - error = -EBUSY; - } else { - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); - btrfs_sb(s)->bdev_holder = fs_type; - error = btrfs_fill_super(s, fs_devices, data); - } - if (error) { - deactivate_locked_super(s); - goto error_sec_opts; + down_write(&mnt_root->mnt_sb->s_umount); + error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); + up_write(&mnt_root->mnt_sb->s_umount); + if (error < 0) { + root = ERR_PTR(error); + mntput(mnt_root); + goto out; + } + } } - - fs_info = btrfs_sb(s); - error = setup_security_options(fs_info, s, &new_sec_opts); - if (error) { - deactivate_locked_super(s); - goto error_sec_opts; + if (IS_ERR(mnt_root)) { + root = ERR_CAST(mnt_root); + goto out; } - return dget(s->s_root); + /* mount_subvol() will free subvol_name and mnt_root */ + root = mount_subvol(subvol_name, subvol_objectid, flags, device_name, + data, mnt_root); -error_close_devices: - btrfs_close_devices(fs_devices); -error_fs_info: - free_fs_info(fs_info); -error_sec_opts: - security_free_mnt_opts(&new_sec_opts); - return ERR_PTR(error); +out: + return root; } static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, @@ -2326,11 +2259,11 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case BTRFS_IOC_SCAN_DEV: ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_fs_type, &fs_devices); + &btrfs_root_fs_type, &fs_devices); break; case BTRFS_IOC_DEVICES_READY: ret = btrfs_scan_one_device(vol->name, FMODE_READ, - &btrfs_fs_type, &fs_devices); + &btrfs_root_fs_type, &fs_devices); if (ret) break; ret = !(fs_devices->num_devices == fs_devices->total_devices); -- cgit v1.2.3 From d7407606564c595e4b9775cc6b14913e92217782 Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Thu, 14 Dec 2017 17:25:28 +0900 Subject: btrfs: split parse_early_options() in two Now parse_early_options() is used by both btrfs_mount() and btrfs_mount_root(). However, the former only needs subvol related part and the latter needs the others. Therefore extract the subvol related parts from parse_early_options() and move it to new parse function (parse_subvol_options()). Signed-off-by: Tomohiro Misono Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 82 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index adf0f8b8829c..0d615d02eb4a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -463,7 +463,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, case Opt_subvolrootid: case Opt_device: /* - * These are parsed by btrfs_parse_early_options + * These are parsed by btrfs_parse_subvol_options + * and btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -897,11 +898,60 @@ out: * only when we need to allocate a new super block. */ static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, u64 *subvol_objectid, - struct btrfs_fs_devices **fs_devices) + void *holder, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *device_name, *opts, *orig, *p; + int error = 0; + + if (!options) + return 0; + + /* + * strsep changes the string, duplicate it because btrfs_parse_options + * gets called later + */ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + orig = opts; + + while ((p = strsep(&opts, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + if (token == Opt_device) { + device_name = match_strdup(&args[0]); + if (!device_name) { + error = -ENOMEM; + goto out; + } + error = btrfs_scan_one_device(device_name, + flags, holder, fs_devices); + kfree(device_name); + if (error) + goto out; + } + } + +out: + kfree(orig); + return error; +} + +/* + * Parse mount options that are related to subvolume id + * + * The value is later passed to mount_subvol() + */ +static int btrfs_parse_subvol_options(const char *options, fmode_t flags, + void *holder, char **subvol_name, u64 *subvol_objectid) +{ + substring_t args[MAX_OPT_ARGS]; + char *opts, *orig, *p; char *num = NULL; int error = 0; @@ -909,8 +959,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return 0; /* - * strsep changes the string, duplicate it because parse_options - * gets called twice + * strsep changes the string, duplicate it because + * btrfs_parse_early_options gets called later */ opts = kstrdup(options, GFP_KERNEL); if (!opts) @@ -949,18 +999,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvolrootid: pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n"); break; - case Opt_device: - device_name = match_strdup(&args[0]); - if (!device_name) { - error = -ENOMEM; - goto out; - } - error = btrfs_scan_one_device(device_name, - flags, holder, fs_devices); - kfree(device_name); - if (error) - goto out; - break; default: break; } @@ -1536,18 +1574,14 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, struct btrfs_fs_info *fs_info = NULL; struct security_mnt_opts new_sec_opts; fmode_t mode = FMODE_READ; - char *subvol_name = NULL; - u64 subvol_objectid = 0; int error = 0; if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, - &subvol_name, &subvol_objectid, &fs_devices); if (error) { - kfree(subvol_name); return ERR_PTR(error); } @@ -1659,7 +1693,6 @@ error_sec_opts: static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, const char *device_name, void *data) { - struct btrfs_fs_devices *fs_devices = NULL; struct vfsmount *mnt_root; struct dentry *root; fmode_t mode = FMODE_READ; @@ -1670,9 +1703,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - error = btrfs_parse_early_options(data, mode, fs_type, - &subvol_name, &subvol_objectid, - &fs_devices); + error = btrfs_parse_subvol_options(data, mode, fs_type, + &subvol_name, &subvol_objectid); if (error) { kfree(subvol_name); return ERR_PTR(error); -- cgit v1.2.3 From 83085935cc38b9752215556c02b2f080c96bf1be Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Thu, 14 Dec 2017 17:25:54 +0900 Subject: btrfs: remove unused setup_root_args() Since setup_root_args() is not used anymore, just remove it. Signed-off-by: Tomohiro Misono Signed-off-by: David Sterba --- fs/btrfs/super.c | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0d615d02eb4a..89333e118c7a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1407,42 +1407,6 @@ static inline int is_subvolume_inode(struct inode *inode) return 0; } -/* - * This will add subvolid=0 to the argument string while removing any subvol= - * and subvolid= arguments to make sure we get the top-level root for path - * walking to the subvol we want. - */ -static char *setup_root_args(char *args) -{ - char *buf, *dst, *sep; - - if (!args) - return kstrdup("subvolid=0", GFP_KERNEL); - - /* The worst case is that we add ",subvolid=0" to the end. */ - buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, - GFP_KERNEL); - if (!buf) - return NULL; - - while (1) { - sep = strchrnul(args, ','); - if (!strstarts(args, "subvol=") && - !strstarts(args, "subvolid=")) { - memcpy(dst, args, sep - args); - dst += sep - args; - *dst++ = ','; - } - if (*sep) - args = sep + 1; - else - break; - } - strcpy(dst, "subvolid=0"); - - return buf; -} - static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, int flags, const char *device_name, char *data, struct vfsmount *mnt) -- cgit v1.2.3 From 78f6beacd024e3ab8091a2a5c12ee7031f9ccc38 Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Wed, 17 Jan 2018 17:38:31 +0900 Subject: btrfs: remove unused arg from parse_subvol_options() Remove unused arg 'holder' from parse_subvol_options(), which has been forgotten to be cleaned in the commit b99beb110e2d ("btrfs: split parse_early_options() in two"). Signed-off-by: Tomohiro Misono Signed-off-by: David Sterba --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 89333e118c7a..03367a64b5c1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -948,7 +948,7 @@ out: * The value is later passed to mount_subvol() */ static int btrfs_parse_subvol_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, u64 *subvol_objectid) + char **subvol_name, u64 *subvol_objectid) { substring_t args[MAX_OPT_ARGS]; char *opts, *orig, *p; @@ -1667,7 +1667,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, if (!(flags & SB_RDONLY)) mode |= FMODE_WRITE; - error = btrfs_parse_subvol_options(data, mode, fs_type, + error = btrfs_parse_subvol_options(data, mode, &subvol_name, &subvol_objectid); if (error) { kfree(subvol_name); -- cgit v1.2.3 From 23ae8c63aaf82967536cba8893e5166b80b6d99a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: open code get_num callback of radix sort The callback is trivial and we don't need the abstraction for our purposes. Let's open code it and also make the array types explicit. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28c3940062b7..37a69d4b04ce 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1318,12 +1318,6 @@ static void copy_cell(void *dst, int dest_i, void *src, int src_i) dstv[dest_i] = srcv[src_i]; } -static u64 get_num(const void *a, int i) -{ - struct bucket_item *av = (struct bucket_item *)a; - return av[i].count; -} - /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new possition in buf array @@ -1332,12 +1326,11 @@ static u64 get_num(const void *a, int i) * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @get_num - function to extract number from array * @copy_cell - function to copy data from array to array_buf and vice versa * @get4bits - function to get 4 bits from number at specified offset */ -static void radix_sort(void *array, void *array_buf, int num, - u64 (*get_num)(const void *, int i), +static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, + int num, void (*copy_cell)(void *dest, int dest_i, void* src, int src_i), u8 (*get4bits)(u64 num, int shift)) @@ -1355,9 +1348,9 @@ static void radix_sort(void *array, void *array_buf, int num, * Try avoid useless loop iterations for small numbers stored in big * counters. Example: 48 33 4 ... in 64bit array */ - max_num = get_num(array, 0); + max_num = array[0].count; for (i = 1; i < num; i++) { - buf_num = get_num(array, i); + buf_num = array[i].count; if (buf_num > max_num) max_num = buf_num; } @@ -1370,7 +1363,7 @@ static void radix_sort(void *array, void *array_buf, int num, memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i++) { - buf_num = get_num(array, i); + buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } @@ -1379,7 +1372,7 @@ static void radix_sort(void *array, void *array_buf, int num, counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { - buf_num = get_num(array, i); + buf_num = array[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; @@ -1397,7 +1390,7 @@ static void radix_sort(void *array, void *array_buf, int num, memset(counters, 0, sizeof(counters)); for (i = 0; i < num; i ++) { - buf_num = get_num(array_buf, i); + buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]++; } @@ -1406,7 +1399,7 @@ static void radix_sort(void *array, void *array_buf, int num, counters[i] += counters[i - 1]; for (i = num - 1; i >= 0; i--) { - buf_num = get_num(array_buf, i); + buf_num = array_buf[i].count; addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; @@ -1444,7 +1437,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get_num, copy_cell, + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, copy_cell, get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) -- cgit v1.2.3 From 7add17befcfc0811b583e4c3c70849a3095f0080 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: open code copy_call callback of radix sort The callback is trivial and we don't need the abstraction for our purposes. Let's open code it. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 37a69d4b04ce..935acabc0ea7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1311,13 +1311,6 @@ static u8 get4bits(u64 num, int shift) { return low4bits; } -static void copy_cell(void *dst, int dest_i, void *src, int src_i) -{ - struct bucket_item *dstv = (struct bucket_item *)dst; - struct bucket_item *srcv = (struct bucket_item *)src; - dstv[dest_i] = srcv[src_i]; -} - /* * Use 4 bits as radix base * Use 16 u32 counters for calculating new possition in buf array @@ -1326,13 +1319,10 @@ static void copy_cell(void *dst, int dest_i, void *src, int src_i) * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @copy_cell - function to copy data from array to array_buf and vice versa * @get4bits - function to get 4 bits from number at specified offset */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, int num, - void (*copy_cell)(void *dest, int dest_i, - void* src, int src_i), u8 (*get4bits)(u64 num, int shift)) { u64 max_num; @@ -1376,7 +1366,7 @@ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; - copy_cell(array_buf, new_addr, array, i); + array_buf[new_addr] = array[i]; } shift += RADIX_BASE; @@ -1403,7 +1393,7 @@ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, addr = get4bits(buf_num, shift); counters[addr]--; new_addr = counters[addr]; - copy_cell(array, new_addr, array_buf, i); + array[new_addr] = array_buf[i]; } shift += RADIX_BASE; @@ -1437,8 +1427,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, copy_cell, - get4bits); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get4bits); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From 36243c9199d6df63a0fbebd4fc49a1af21f3d8a8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 20:35:02 +0100 Subject: btrfs: heuristic: call get4bits directly As it's a single instance and local to the file, we don't need to pass it as an argument. Reviewed-by: Timofey Titovets Signed-off-by: David Sterba --- fs/btrfs/compression.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 935acabc0ea7..208334aa6c6e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1319,11 +1319,9 @@ static u8 get4bits(u64 num, int shift) { * @array_buf - buffer array to store sorting results * must be equal in size to @array * @num - array size - * @get4bits - function to get 4 bits from number at specified offset */ static void radix_sort(struct bucket_item *array, struct bucket_item *array_buf, - int num, - u8 (*get4bits)(u64 num, int shift)) + int num) { u64 max_num; u64 buf_num; @@ -1427,7 +1425,7 @@ static int byte_core_set_size(struct heuristic_ws *ws) struct bucket_item *bucket = ws->bucket; /* Sort in reverse order */ - radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE, get4bits); + radix_sort(ws->bucket, ws->bucket_b, BUCKET_SIZE); for (i = 0; i < BYTE_CORE_SET_LOW; i++) coreset_sum += bucket[i].count; -- cgit v1.2.3 From 4271ecea64f40683d28d83ad433ddc43e5ca2ee9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 09:38:14 +0200 Subject: btrfs: Improve btrfs_search_slot description Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2a09577580b8..b88a79e69ddf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2654,17 +2654,29 @@ int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, } /* - * look for key in the tree. path is filled in with nodes along the way - * if key is found, we return zero and you can find the item in the leaf - * level of the path (level 0) + * btrfs_search_slot - look for a key in a tree and perform necessary + * modifications to preserve tree invariants. * - * If the key isn't found, the path points to the slot where it should - * be inserted, and 1 is returned. If there are other errors during the - * search a negative error number is returned. + * @trans: Handle of transaction, used when modifying the tree + * @p: Holds all btree nodes along the search path + * @root: The root node of the tree + * @key: The key we are looking for + * @ins_len: Indicates purpose of search, for inserts it is 1, for + * deletions it's -1. 0 for plain searches + * @cow: boolean should CoW operations be performed. Must always be 1 + * when modifying the tree. * - * if ins_len > 0, nodes and leaves will be split as we walk down the - * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if - * possible) + * If @ins_len > 0, nodes and leaves will be split as we walk down the tree. + * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible) + * + * If @key is found, 0 is returned and you can find the item in the leaf level + * of the path (level 0) + * + * If @key isn't found, 1 is returned and the leaf level of the path (level 0) + * points to the slot where it should be inserted + * + * If an error is encountered while searching the tree a negative error number + * is returned */ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, const struct btrfs_key *key, struct btrfs_path *p, -- cgit v1.2.3 From 32506af595dcaa8b71b7858bb54ccaa310d274fb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 10:25:37 +0200 Subject: btrfs: Remove redundant bio_get/set calls in compressed read/write paths bio_get/set is necessary only if the bio is going to be referenced following submissions. In the code paths where such calls are made we don't really need them since the bio is referenced only if btrfs_map_bio returns an error. And this function can return an error prior to submission only. So referencing the bio is safe. Furthermore we do call bio_endio which will consume the last reference. So let's remove the redundant calls. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 208334aa6c6e..5abcc0461ee1 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -362,8 +362,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, page->mapping = NULL; if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_get(bio); - /* * inc the count before we submit the bio so * we know the end IO handler won't happen before @@ -386,8 +384,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } - bio_put(bio); - bio = btrfs_bio_alloc(bdev, first_byte); bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; @@ -403,7 +399,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, first_byte += PAGE_SIZE; cond_resched(); } - bio_get(bio); ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -419,7 +414,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } - bio_put(bio); return 0; } @@ -652,8 +646,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page->mapping = NULL; if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -680,8 +672,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - bio_put(comp_bio); - comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte); bio_set_op_attrs(comp_bio, REQ_OP_READ, 0); comp_bio->bi_private = cb; @@ -691,7 +681,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } cur_disk_byte += PAGE_SIZE; } - bio_get(comp_bio); ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ @@ -707,7 +696,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } - bio_put(comp_bio); return 0; fail2: -- cgit v1.2.3 From ea057f6dafcb6d21dd2c8be0e45ba3979b98e2b3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 10:25:38 +0200 Subject: btrfs: Remove redundant bio_get/set from submit_dio_repair_bio The bio that is passsed is the newly created repair bio which already has a reference count of 1, which is going to be consumed by the endio routine on successful submission. On error the handler also calls bio_put. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 46df5e2a64e7..bacf71ccf0f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7941,15 +7941,12 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode, BUG_ON(bio_op(bio) == REQ_OP_WRITE); - bio_get(bio); - ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR); if (ret) - goto err; + return ret; ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); -err: - bio_put(bio); + return ret; } -- cgit v1.2.3 From ffc9c8dd7d5c902b31a3caae3e55f019a85c90ea Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 10:25:39 +0200 Subject: btrfs: Remove redundant bio_get/bio_set pair from submit_one_bio The bio is never referenced after it has been submitted so there is no point in getting an extra reference. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eee9cc5db9ff..c40591486f23 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2733,7 +2733,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, start = page_offset(page) + bvec->bv_offset; bio->bi_private = NULL; - bio_get(bio); if (tree->ops) ret = tree->ops->submit_bio_hook(tree->private_data, bio, @@ -2741,7 +2740,6 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, else btrfsic_submit_bio(bio); - bio_put(bio); return blk_status_to_errno(ret); } -- cgit v1.2.3 From 36f7894f66f286e454bf00d308b989c3423ba0da Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 13 Dec 2017 10:25:40 +0200 Subject: btrfs: Remove redundant pair of bio_get/set in __btrfs_submit_dio_bio The bio is not referenced after it has been submitted and the endio is going to consume the sole reference on successful submission. On error, the callers of __btrfs_submit_dio_bio do invoke bio_put so we don't leak it either. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bacf71ccf0f2..0c0e61c6b927 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8449,8 +8449,6 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, if (async_submit) async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); - bio_get(bio); - if (!write) { ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA); if (ret) @@ -8483,7 +8481,6 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, map: ret = btrfs_map_bio(fs_info, bio, 0, 0); err: - bio_put(bio); return ret; } -- cgit v1.2.3 From 203e02d934ed0570551b87c8d0a0a9cf917487cb Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 22 Dec 2017 16:23:01 -0700 Subject: Btrfs: remove unused wait in btrfs_stripe_hash In fact nobody is waiting on @wait's waitqueue, it can be safely removed. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 1 - fs/btrfs/raid56.c | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09b72b6996ce..1a462ab85c49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -679,7 +679,6 @@ enum btrfs_orphan_cleanup_state { /* used by the raid56 code to lock stripes for read/modify/write */ struct btrfs_stripe_hash { struct list_head hash_list; - wait_queue_head_t wait; spinlock_t lock; }; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 3940906533a2..9fa45e061953 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) cur = h + i; INIT_LIST_HEAD(&cur->hash_list); spin_lock_init(&cur->lock); - init_waitqueue_head(&cur->wait); } x = cmpxchg(&info->stripe_hash_table, NULL, table); @@ -815,15 +814,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) } goto done_nolock; - /* - * The barrier for this waitqueue_active is not needed, - * we're protected by h->lock and can't miss a wakeup. - */ - } else if (waitqueue_active(&h->wait)) { - spin_unlock(&rbio->bio_list_lock); - spin_unlock_irqrestore(&h->lock, flags); - wake_up(&h->wait); - goto done_nolock; } } done: -- cgit v1.2.3 From e215772cd2abcf33adad8857c0f5a8214a6e7d22 Mon Sep 17 00:00:00 2001 From: "Misono, Tomohiro" Date: Thu, 14 Dec 2017 17:28:00 +0900 Subject: btrfs: cleanup unnecessary string dup in btrfs_parse_options() Long ago, commit edf24abe51493 ("btrfs: sanity mount option parsing and early mount code") split the btrfs_parse_options() into two parts (btrfs_parse_early_options() and btrfs_parse_options()). As a result, btrfs_parse_optins no longer gets called twice and is the last one to parse mount option string. Therefore there is no need to dup it. Signed-off-by: Tomohiro Misono Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 03367a64b5c1..1d33744a9326 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -414,7 +414,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, unsigned long new_flags) { substring_t args[MAX_OPT_ARGS]; - char *p, *num, *orig = NULL; + char *p, *num; u64 cache_gen; int intarg; int ret = 0; @@ -437,16 +437,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, if (!options) goto check; - /* - * strsep changes the string, duplicate it because parse_options - * gets called twice - */ - options = kstrdup(options, GFP_KERNEL); - if (!options) - return -ENOMEM; - - orig = options; - while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) @@ -887,7 +877,6 @@ out: btrfs_info(info, "disk space caching is enabled"); if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE)) btrfs_info(info, "using free space tree"); - kfree(orig); return ret; } -- cgit v1.2.3 From 5bedc48a8f9e1d62fb693c4171ffddb990d034f6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 2 Jan 2018 18:19:50 +0100 Subject: btrfs: drop unused parameters from mount_subvol Recent patches reworking the mount path left some unused parameters. We pass a vfsmount to mount_subvol, the flags and data (ie. mount options) have been already applied and we will not need them. Signed-off-by: David Sterba --- fs/btrfs/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1d33744a9326..f40352843c0b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1397,8 +1397,7 @@ static inline int is_subvolume_inode(struct inode *inode) } static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, - int flags, const char *device_name, - char *data, struct vfsmount *mnt) + const char *device_name, struct vfsmount *mnt) { struct dentry *root; int ret; @@ -1693,8 +1692,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } /* mount_subvol() will free subvol_name and mnt_root */ - root = mount_subvol(subvol_name, subvol_objectid, flags, device_name, - data, mnt_root); + root = mount_subvol(subvol_name, subvol_objectid, device_name, mnt_root); out: return root; -- cgit v1.2.3 From d810a4be1a625aafb2602c56c1256047f1e27380 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 7 Dec 2017 18:52:54 +0100 Subject: btrfs: add separate helper for unlock_extent_cached with GFP_ATOMIC There's only one instance where we pass different gfp mask to unlock_extent_cached. Add a separate helper for that and then we can drop the gfp parameter from unlock_extent_cached. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 2 +- fs/btrfs/extent_io.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c40591486f23..87809458b3ac 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2493,7 +2493,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, if (uptodate && tree->track_uptodate) set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); - unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC); + unlock_extent_cached_atomic(tree, start, end, &cached); } /* diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index db2558b0cad4..978351e8e8dc 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -318,6 +318,13 @@ static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, mask, NULL); } +static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree, + u64 start, u64 end, struct extent_state **cached) +{ + return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, + GFP_ATOMIC, NULL); +} + static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits) { -- cgit v1.2.3 From e43bbe5e16d87b40f3b382b3a43b0142d6d1193d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 12 Dec 2017 21:43:52 +0100 Subject: btrfs: sink unlock_extent parameter gfp_flags All callers pass either GFP_NOFS or GFP_KERNEL now, so we can sink the parameter to the function, though we lose some of the slightly better semantics of GFP_KERNEL in some places, it's worth cleaning up the callchains. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_io.c | 10 ++++------ fs/btrfs/extent_io.h | 4 ++-- fs/btrfs/file.c | 15 +++++++-------- fs/btrfs/free-space-cache.c | 5 ++--- fs/btrfs/inode.c | 26 ++++++++++++-------------- fs/btrfs/ioctl.c | 7 +++---- fs/btrfs/scrub.c | 3 +-- 8 files changed, 32 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 392d6cde4308..41d151b9ffd2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,7 +381,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, clear_extent_buffer_uptodate(eb); out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, - &cached_state, GFP_NOFS); + &cached_state); if (need_lock) btrfs_tree_read_unlock_blocking(eb); return ret; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 87809458b3ac..930c1ea77e91 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1648,7 +1648,7 @@ again: EXTENT_DELALLOC, 1, cached_state); if (!ret) { unlock_extent_cached(tree, delalloc_start, delalloc_end, - &cached_state, GFP_NOFS); + &cached_state); __unlock_for_delalloc(inode, locked_page, delalloc_start, delalloc_end); cond_resched(); @@ -2941,8 +2941,7 @@ static int __do_readpage(struct extent_io_tree *tree, set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, - cur + iosize - 1, - &cached, GFP_NOFS); + cur + iosize - 1, &cached); break; } em = __get_extent_map(inode, page, pg_offset, cur, @@ -3035,8 +3034,7 @@ static int __do_readpage(struct extent_io_tree *tree, set_extent_uptodate(tree, cur, cur + iosize - 1, &cached, GFP_NOFS); unlock_extent_cached(tree, cur, - cur + iosize - 1, - &cached, GFP_NOFS); + cur + iosize - 1, &cached); cur = cur + iosize; pg_offset += iosize; continue; @@ -4621,7 +4619,7 @@ out_free: out: btrfs_free_path(path); unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, - &cached_state, GFP_NOFS); + &cached_state); return ret; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 978351e8e8dc..72e5af2965a8 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -312,10 +312,10 @@ static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) } static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached, gfp_t mask) + u64 end, struct extent_state **cached) { return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, - mask, NULL); + GFP_NOFS, NULL); } static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1096398e1351..1ed2e6e9e204 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1504,7 +1504,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ordered->file_offset + ordered->len > start_pos && ordered->file_offset <= last_pos) { unlock_extent_cached(&inode->io_tree, start_pos, - last_pos, cached_state, GFP_NOFS); + last_pos, cached_state); for (i = 0; i < num_pages; i++) { unlock_page(pages[i]); put_page(pages[i]); @@ -1758,8 +1758,7 @@ again: pos, copied, NULL); if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, - lockstart, lockend, &cached_state, - GFP_NOFS); + lockstart, lockend, &cached_state); btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); if (ret) { btrfs_drop_pages(pages, num_pages); @@ -2600,7 +2599,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ordered) btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, - lockend, &cached_state, GFP_NOFS); + lockend, &cached_state); ret = btrfs_wait_ordered_range(inode, lockstart, lockend - lockstart + 1); if (ret) { @@ -2751,7 +2750,7 @@ out_free: btrfs_free_block_rsv(fs_info, rsv); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state, GFP_NOFS); + &cached_state); out_only_mutex: if (!updated_inode && truncated_block && !ret && !err) { /* @@ -2913,7 +2912,7 @@ static long btrfs_fallocate(struct file *file, int mode, btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_KERNEL); + &cached_state); /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -3015,7 +3014,7 @@ static long btrfs_fallocate(struct file *file, int mode, } out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - &cached_state, GFP_KERNEL); + &cached_state); out: inode_unlock(inode); /* Let go of our reservation. */ @@ -3088,7 +3087,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) *offset = min_t(loff_t, start, inode->i_size); } unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state, GFP_NOFS); + &cached_state); return ret; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b8ab90c9a9fb..014f3c090231 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1125,8 +1125,7 @@ cleanup_write_cache_enospc(struct inode *inode, { io_ctl_drop_pages(io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, cached_state, - GFP_NOFS); + i_size_read(inode) - 1, cached_state); } static int __btrfs_wait_cache_io(struct btrfs_root *root, @@ -1320,7 +1319,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, io_ctl_drop_pages(io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, GFP_NOFS); + i_size_read(inode) - 1, &cached_state); /* * at this point the pages are under IO and we're happy, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0c0e61c6b927..eebfe2615428 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2100,7 +2100,7 @@ again: PAGE_SIZE); if (ordered) { unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, - page_end, &cached_state, GFP_NOFS); + page_end, &cached_state); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -2130,7 +2130,7 @@ again: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); out_page: unlock_page(page); put_page(page); @@ -2722,7 +2722,7 @@ out_free_path: btrfs_end_transaction(trans); out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end, - &cached, GFP_NOFS); + &cached); iput(inode); return ret; } @@ -4812,7 +4812,7 @@ again: ordered = btrfs_lookup_ordered_extent(inode, block_start); if (ordered) { unlock_extent_cached(io_tree, block_start, block_end, - &cached_state, GFP_NOFS); + &cached_state); unlock_page(page); put_page(page); btrfs_start_ordered_extent(inode, ordered, 1); @@ -4829,7 +4829,7 @@ again: &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, - &cached_state, GFP_NOFS); + &cached_state); goto out_unlock; } @@ -4848,8 +4848,7 @@ again: } ClearPageChecked(page); set_page_dirty(page); - unlock_extent_cached(io_tree, block_start, block_end, &cached_state, - GFP_NOFS); + unlock_extent_cached(io_tree, block_start, block_end, &cached_state); out_unlock: if (ret) @@ -4950,7 +4949,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) if (!ordered) break; unlock_extent_cached(io_tree, hole_start, block_end - 1, - &cached_state, GFP_NOFS); + &cached_state); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } @@ -5015,8 +5014,7 @@ next: break; } free_extent_map(em); - unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, - GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state); return err; } @@ -7629,7 +7627,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, break; unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, - cached_state, GFP_NOFS); + cached_state); if (ordered) { /* @@ -9116,7 +9114,7 @@ again: PAGE_SIZE); if (ordered) { unlock_extent_cached(io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -9149,7 +9147,7 @@ again: &cached_state, 0); if (ret) { unlock_extent_cached(io_tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); ret = VM_FAULT_SIGBUS; goto out_unlock; } @@ -9175,7 +9173,7 @@ again: BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; - unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, &cached_state); out_unlock: if (!ret) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index be5bd81b3669..f6d4d5810cc1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -977,7 +977,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) /* get the big lock and read metadata off disk */ lock_extent_bits(io_tree, start, end, &cached); em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); - unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); + unlock_extent_cached(io_tree, start, end, &cached); if (IS_ERR(em)) return NULL; @@ -1128,7 +1128,7 @@ again: ordered = btrfs_lookup_ordered_extent(inode, page_start); unlock_extent_cached(tree, page_start, page_end, - &cached_state, GFP_NOFS); + &cached_state); if (!ordered) break; @@ -1204,8 +1204,7 @@ again: &cached_state); unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, &cached_state, - GFP_NOFS); + page_start, page_end - 1, &cached_state); for (i = 0; i < i_done; i++) { clear_page_dirty_for_io(pages[i]); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index d766c73eb29a..ecfe3118d9dd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4468,8 +4468,7 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len, free_extent_map(em); out_unlock: - unlock_extent_cached(io_tree, lockstart, lockend, &cached_state, - GFP_NOFS); + unlock_extent_cached(io_tree, lockstart, lockend, &cached_state); return ret; } -- cgit v1.2.3 From 6528b99d3d20795ff947d9b3fd736affe901acef Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Mon, 18 Dec 2017 17:08:59 +0800 Subject: btrfs: factor btrfs_check_rw_degradable() to check given device Update btrfs_check_rw_degradable() to check against the given device if its lost. We can use this function to know if the volume is going to be in degraded mode OR failed state, when the given device fails. Which is needed when we are handling the device failed state. A preparatory patch does not affect the flow as such. Signed-off-by: Anand Jain Reviewed-by: Qu Wenruo [ enhance comment ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/super.c | 2 +- fs/btrfs/volumes.c | 10 ++++++++-- fs/btrfs/volumes.h | 4 ++-- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41d151b9ffd2..73ab44159d82 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2866,7 +2866,7 @@ retry_root_backup: goto fail_sysfs; } - if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { + if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writeable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@ -3369,7 +3369,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) static int check_barrier_error(struct btrfs_fs_info *fs_info) { - if (!btrfs_check_rw_degradable(fs_info)) + if (!btrfs_check_rw_degradable(fs_info, NULL)) return -EIO; return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f40352843c0b..8af7590a5638 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1844,7 +1844,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) goto restore; } - if (!btrfs_check_rw_degradable(fs_info)) { + if (!btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "too many missing devices, writeable remount is not allowed"); ret = -EACCES; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9a04245003ab..f85991a2585c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6851,10 +6851,13 @@ out_short_read: /* * Check if all chunks in the fs are OK for read-write degraded mount * + * If the @failing_dev is specified, it's accounted as missing. + * * Return true if all chunks meet the minimal RW mount requirements. * Return false if any chunk doesn't meet the minimal RW mount requirements. */ -bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info) +bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, + struct btrfs_device *failing_dev) { struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; @@ -6886,9 +6889,12 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info) test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || dev->last_flush_error) missing++; + else if (failing_dev && failing_dev == dev) + missing++; } if (missing > max_tolerated) { - btrfs_warn(fs_info, + if (!failing_dev) + btrfs_warn(fs_info, "chunk %llu missing %d devices, max tolerance is %d for writeable mount", em->start, missing, max_tolerated); free_extent_map(em); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3e3ae44b6ccc..a339bcf72feb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -554,7 +554,7 @@ void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info, struct list_head *btrfs_get_fs_uuids(void); void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); - -bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); +bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, + struct btrfs_device *failing_dev); #endif -- cgit v1.2.3 From 762221f095e3932669093466aaf4b85ed9ad2ac1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 2 Jan 2018 13:36:42 -0700 Subject: Btrfs: fix scrub to repair raid6 corruption The raid6 corruption is that, suppose that all disks can be read without problems and if the content that was read out doesn't match its checksum, currently for raid6 btrfs at most retries twice, - the 1st retry is to rebuild with all other stripes, it'll eventually be a raid5 xor rebuild, - if the 1st fails, the 2nd retry will deliberately fail parity p so that it will do raid6 style rebuild, however, the chances are that another non-parity stripe content also has something corrupted, so that the above retries are not able to return correct content. We've fixed normal reads to rebuild raid6 correctly with more retries in Patch "Btrfs: make raid6 rebuild retry more"[1], this is to fix scrub to do the exactly same rebuild process. [1]: https://patchwork.kernel.org/patch/10091755/ Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ecfe3118d9dd..b5b299b7113f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -301,6 +301,11 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_put_ctx(struct scrub_ctx *sctx); +static inline int scrub_is_page_on_raid56(struct scrub_page *page) +{ + return page->recover && + (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); +} static void scrub_pending_bio_inc(struct scrub_ctx *sctx) { @@ -1323,15 +1328,34 @@ nodatasum_case: * could happen otherwise that a correct page would be * overwritten by a bad one). */ - for (mirror_index = 0; - mirror_index < BTRFS_MAX_MIRRORS && - sblocks_for_recheck[mirror_index].page_count > 0; - mirror_index++) { + for (mirror_index = 0; ;mirror_index++) { struct scrub_block *sblock_other; if (mirror_index == failed_mirror_index) continue; - sblock_other = sblocks_for_recheck + mirror_index; + + /* raid56's mirror can be more than BTRFS_MAX_MIRRORS */ + if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) { + if (mirror_index >= BTRFS_MAX_MIRRORS) + break; + if (!sblocks_for_recheck[mirror_index].page_count) + break; + + sblock_other = sblocks_for_recheck + mirror_index; + } else { + struct scrub_recover *r = sblock_bad->pagev[0]->recover; + int max_allowed = r->bbio->num_stripes - + r->bbio->num_tgtdevs; + + if (mirror_index >= max_allowed) + break; + if (!sblocks_for_recheck[1].page_count) + break; + + ASSERT(failed_mirror_index == 0); + sblock_other = sblocks_for_recheck + 1; + sblock_other->pagev[0]->mirror_num = 1 + mirror_index; + } /* build and submit the bios, check checksums */ scrub_recheck_block(fs_info, sblock_other, 0); @@ -1671,26 +1695,22 @@ static void scrub_bio_wait_endio(struct bio *bio) complete(bio->bi_private); } -static inline int scrub_is_page_on_raid56(struct scrub_page *page) -{ - return page->recover && - (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK); -} - static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info, struct bio *bio, struct scrub_page *page) { DECLARE_COMPLETION_ONSTACK(done); int ret; + int mirror_num; bio->bi_iter.bi_sector = page->logical >> 9; bio->bi_private = &done; bio->bi_end_io = scrub_bio_wait_endio; + mirror_num = page->sblock->pagev[0]->mirror_num; ret = raid56_parity_recover(fs_info, bio, page->recover->bbio, page->recover->map_length, - page->mirror_num, 0); + mirror_num, 0); if (ret) return ret; -- cgit v1.2.3 From 8810f7517a3bc4ca2d41d022446d3f5fd6b77c09 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 2 Jan 2018 13:36:41 -0700 Subject: Btrfs: make raid6 rebuild retry more There is a scenario that can end up with rebuild process failing to return good content, i.e. suppose that all disks can be read without problems and if the content that was read out doesn't match its checksum, currently for raid6 btrfs at most retries twice, - the 1st retry is to rebuild with all other stripes, it'll eventually be a raid5 xor rebuild, - if the 1st fails, the 2nd retry will deliberately fail parity p so that it will do raid6 style rebuild, however, the chances are that another non-parity stripe content also has something corrupted, so that the above retries are not able to return correct content, and users will think of this as data loss. More seriouly, if the loss happens on some important internal btree roots, it could refuse to mount. This extends btrfs to do more retries and each retry fails only one stripe. Since raid6 can tolerate 2 disk failures, if there is one more failure besides the failure on which we're recovering, this can always work. The worst case is to retry as many times as the number of raid6 disks, but given the fact that such a scenario is really rare in practice, it's still acceptable. Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 18 ++++++++++++++---- fs/btrfs/volumes.c | 9 ++++++++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9fa45e061953..a1dfdd35e7ab 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2159,11 +2159,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, } /* - * reconstruct from the q stripe if they are - * asking for mirror 3 + * Loop retry: + * for 'mirror == 2', reconstruct from all other stripes. + * for 'mirror_num > 2', select a stripe to fail on every retry. */ - if (mirror_num == 3) - rbio->failb = rbio->real_stripes - 2; + if (mirror_num > 2) { + /* + * 'mirror == 3' is to fail the p stripe and + * reconstruct from the q stripe. 'mirror > 3' is to + * fail a data stripe and reconstruct from p+q stripe. + */ + rbio->failb = rbio->real_stripes - (mirror_num - 1); + ASSERT(rbio->failb > 0); + if (rbio->failb <= rbio->faila) + rbio->failb--; + } ret = lock_stripe_add(rbio); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f85991a2585c..7ebf53cb4269 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5167,7 +5167,14 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) else if (map->type & BTRFS_BLOCK_GROUP_RAID5) ret = 2; else if (map->type & BTRFS_BLOCK_GROUP_RAID6) - ret = 3; + /* + * There could be two corrupted data stripes, we need + * to loop retry in order to rebuild the correct data. + * + * Fail a stripe at a time on every retry except the + * stripe under reconstruction. + */ + ret = map->num_stripes; else ret = 1; free_extent_map(em); -- cgit v1.2.3 From 61ecda68652591c3a7131e6bdb51639612a1244c Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 4 Jan 2018 18:01:54 +0800 Subject: btrfs: remove check for BTRFS_FS_STATE_ERROR which we just set __btrfs_handle_fs_error() sets BTRFS_FS_STATE_ERROR, and calls btrfs_handle_error() so no need to check if the BTRFS_FS_STATE_ERROR is set in btrfs_handle_error(). And there is no other user of btrfs_handle_error() as well. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/super.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8af7590a5638..786e8bc04f9c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -115,20 +115,18 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) if (sb_rdonly(sb)) return; - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { - sb->s_flags |= SB_RDONLY; - btrfs_info(fs_info, "forced readonly"); - /* - * Note that a running device replace operation is not - * canceled here although there is no way to update - * the progress. It would add the risk of a deadlock, - * therefore the canceling is omitted. The only penalty - * is that some I/O remains active until the procedure - * completes. The next time when the filesystem is - * mounted writeable again, the device replace - * operation continues. - */ - } + sb->s_flags |= SB_RDONLY; + btrfs_info(fs_info, "forced readonly"); + /* + * Note that a running device replace operation is not + * canceled here although there is no way to update + * the progress. It would add the risk of a deadlock, + * therefore the canceling is omitted. The only penalty + * is that some I/O remains active until the procedure + * completes. The next time when the filesystem is + * mounted writeable again, the device replace + * operation continues. + */ } /* -- cgit v1.2.3 From 922ea8994a39ae56d1babbdd98228aef0543bf52 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 4 Jan 2018 18:01:55 +0800 Subject: btrfS: collapse btrfs_handle_error() into __btrfs_handle_fs_error() There is no other consumer for btrfs_handle_error() other than __btrfs_handle_fs_error(), further this function quite small. Merge it into its parent. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov [ reformat comment ] Signed-off-by: David Sterba --- fs/btrfs/super.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 786e8bc04f9c..6e71a2a78363 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -107,28 +107,6 @@ const char *btrfs_decode_error(int errno) return errstr; } -/* btrfs handle error by forcing the filesystem readonly */ -static void btrfs_handle_error(struct btrfs_fs_info *fs_info) -{ - struct super_block *sb = fs_info->sb; - - if (sb_rdonly(sb)) - return; - - sb->s_flags |= SB_RDONLY; - btrfs_info(fs_info, "forced readonly"); - /* - * Note that a running device replace operation is not - * canceled here although there is no way to update - * the progress. It would add the risk of a deadlock, - * therefore the canceling is omitted. The only penalty - * is that some I/O remains active until the procedure - * completes. The next time when the filesystem is - * mounted writeable again, the device replace - * operation continues. - */ -} - /* * __btrfs_handle_fs_error decodes expected errors from the caller and * invokes the approciate error response. @@ -175,8 +153,23 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); /* Don't go through full error handling during mount */ - if (sb->s_flags & SB_BORN) - btrfs_handle_error(fs_info); + if (!(sb->s_flags & SB_BORN)) + return; + + if (sb_rdonly(sb)) + return; + + /* btrfs handle error by forcing the filesystem readonly */ + sb->s_flags |= SB_RDONLY; + btrfs_info(fs_info, "forced readonly"); + /* + * Note that a running device replace operation is not canceled here + * although there is no way to update the progress. It would add the + * risk of a deadlock, therefore the canceling is omitted. The only + * penalty is that some I/O remains active until the procedure + * completes. The next time when the filesystem is mounted writeable + * again, the device replace operation continues. + */ } #ifdef CONFIG_PRINTK -- cgit v1.2.3 From cadbc0a0670e32e42282b388ac8b7539100ac0f2 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 3 Jan 2018 16:08:30 +0800 Subject: btrfs: rename btrfs_device::scrub_device to scrub_ctx btrfs_device::scrub_device is not a device which is being scrubbed, but it holds the scrub context, so rename to reflect the same. No functional changes here. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 14 +++++++------- fs/btrfs/volumes.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b5b299b7113f..ec56f33feea9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4145,7 +4145,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, } btrfs_dev_replace_lock(&fs_info->dev_replace, 0); - if (dev->scrub_device || + if (dev->scrub_ctx || (!is_dev_replace && btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); @@ -4170,7 +4170,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return PTR_ERR(sctx); } sctx->readonly = readonly; - dev->scrub_device = sctx; + dev->scrub_ctx = sctx; mutex_unlock(&fs_info->fs_devices->device_list_mutex); /* @@ -4205,7 +4205,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, memcpy(progress, &sctx->stat, sizeof(*progress)); mutex_lock(&fs_info->scrub_lock); - dev->scrub_device = NULL; + dev->scrub_ctx = NULL; scrub_workers_put(fs_info); mutex_unlock(&fs_info->scrub_lock); @@ -4262,16 +4262,16 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, struct scrub_ctx *sctx; mutex_lock(&fs_info->scrub_lock); - sctx = dev->scrub_device; + sctx = dev->scrub_ctx; if (!sctx) { mutex_unlock(&fs_info->scrub_lock); return -ENOTCONN; } atomic_inc(&sctx->cancel_req); - while (dev->scrub_device) { + while (dev->scrub_ctx) { mutex_unlock(&fs_info->scrub_lock); wait_event(fs_info->scrub_pause_wait, - dev->scrub_device == NULL); + dev->scrub_ctx == NULL); mutex_lock(&fs_info->scrub_lock); } mutex_unlock(&fs_info->scrub_lock); @@ -4288,7 +4288,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info, devid, NULL, NULL); if (dev) - sctx = dev->scrub_device; + sctx = dev->scrub_ctx; if (sctx) memcpy(progress, &sctx->stat, sizeof(*progress)); mutex_unlock(&fs_info->fs_devices->device_list_mutex); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a339bcf72feb..15216fed918b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -131,7 +131,7 @@ struct btrfs_device { struct completion flush_wait; /* per-device scrub information */ - struct scrub_ctx *scrub_device; + struct scrub_ctx *scrub_ctx; struct btrfs_work work; struct rcu_head rcu; -- cgit v1.2.3 From c1f32b7c1f3be98386f3d268b786660030a44437 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 20 Dec 2017 14:42:26 +0800 Subject: btrfs: simplify mutex unlocking code in btrfs_commit_transaction No functional change rearrange the mutex_unlock. Signed-off-by: Anand Jain Reviewed-by: Nikolay Borisov [ edit subject ] Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6348573e26a7..04f07144b45c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2265,16 +2265,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) } ret = write_all_supers(fs_info, 0); - if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); - goto scrub_continue; - } - /* * the super is written, we can safely allow the tree-loggers * to go about their business */ mutex_unlock(&fs_info->tree_log_mutex); + if (ret) + goto scrub_continue; btrfs_finish_extent_commit(trans, fs_info); -- cgit v1.2.3 From 05a5c55dfc308544200752f2f0860c6bdd9b7281 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 15 Dec 2017 15:40:16 +0800 Subject: btrfs: minor style cleanups in btrfs_scan_one_device Assign ret = -EINVAL where it is actually required. Remove { } around single line if else code. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7ebf53cb4269..2333d48ca1d7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1186,7 +1186,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_super_block *disk_super; struct block_device *bdev; struct page *page; - int ret = -EINVAL; + int ret; u64 devid; u64 transid; u64 total_devices; @@ -1208,8 +1208,10 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, goto error; } - if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) + if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) { + ret = -EINVAL; goto error_bdev_put; + } devid = btrfs_stack_device_id(&disk_super->dev_item); transid = btrfs_super_generation(disk_super); @@ -1217,11 +1219,10 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (ret > 0) { - if (disk_super->label[0]) { + if (disk_super->label[0]) pr_info("BTRFS: device label %s ", disk_super->label); - } else { + else pr_info("BTRFS: device fsid %pU ", disk_super->fsid); - } pr_cont("devid %llu transid %llu %s\n", devid, transid, path); ret = 0; -- cgit v1.2.3 From db34be19c4f0f5512a4adf63713e4f5494618ead Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 4 Dec 2017 15:40:35 -0700 Subject: Btrfs: remove redundant check in rbio_can_merge Given the above ' if (last->operation != cur->operation) return 0; ', it's guaranteed that two operations are same. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a1dfdd35e7ab..945e940d2368 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -594,12 +594,10 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, * bio list here, anyone else that wants to * change this stripe needs to do their own rmw. */ - if (last->operation == BTRFS_RBIO_PARITY_SCRUB || - cur->operation == BTRFS_RBIO_PARITY_SCRUB) + if (last->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; - if (last->operation == BTRFS_RBIO_REBUILD_MISSING || - cur->operation == BTRFS_RBIO_REBUILD_MISSING) + if (last->operation == BTRFS_RBIO_REBUILD_MISSING) return 0; return 1; -- cgit v1.2.3 From cc54ff626a56fdd7c22febd55ed24676fda8ed83 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 11 Dec 2017 14:56:31 -0700 Subject: Btrfs: do not merge rbios if their fail stripe index are not identical Since fail stripe index in rbio would be used to decide which algorithm reconstruction would be run, we cannot merge rbios if their's fail striped indexes are different, otherwise, one of the two reconstructions would fail. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 945e940d2368..7b382eeada72 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -600,6 +600,25 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, if (last->operation == BTRFS_RBIO_REBUILD_MISSING) return 0; + if (last->operation == BTRFS_RBIO_READ_REBUILD) { + int fa = last->faila; + int fb = last->failb; + int cur_fa = cur->faila; + int cur_fb = cur->failb; + + if (last->faila >= last->failb) { + fa = last->failb; + fb = last->faila; + } + + if (cur->faila >= cur->failb) { + cur_fa = cur->failb; + cur_fb = cur->faila; + } + + if (fa != cur_fa || fb != cur_fb) + return 0; + } return 1; } -- cgit v1.2.3 From f27451f229966874a8793995b8e6b74326d125df Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 25 Oct 2017 11:55:28 +0100 Subject: Btrfs: add support for fallocate's zero range operation This implements support the zero range operation of fallocate. For now at least it's as simple as possible while reusing most of the existing fallocate and hole punching infrastructure. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 338 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 276 insertions(+), 62 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1ed2e6e9e204..16c8031db645 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2458,6 +2458,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len) return ret; } +static int btrfs_punch_hole_lock_range(struct inode *inode, + const u64 lockstart, + const u64 lockend, + struct extent_state **cached_state) +{ + while (1) { + struct btrfs_ordered_extent *ordered; + int ret; + + truncate_pagecache_range(inode, lockstart, lockend); + + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, + cached_state); + ordered = btrfs_lookup_first_ordered_extent(inode, lockend); + + /* + * We need to make sure we have no ordered extents in this range + * and nobody raced in and read a page in this range, if we did + * we need to try again. + */ + if ((!ordered || + (ordered->file_offset + ordered->len <= lockstart || + ordered->file_offset > lockend)) && + !btrfs_page_exists_in_range(inode, lockstart, lockend)) { + if (ordered) + btrfs_put_ordered_extent(ordered); + break; + } + if (ordered) + btrfs_put_ordered_extent(ordered); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, cached_state); + ret = btrfs_wait_ordered_range(inode, lockstart, + lockend - lockstart + 1); + if (ret) + return ret; + } + return 0; +} + static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -2574,38 +2614,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) goto out_only_mutex; } - while (1) { - struct btrfs_ordered_extent *ordered; - - truncate_pagecache_range(inode, lockstart, lockend); - - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - &cached_state); - ordered = btrfs_lookup_first_ordered_extent(inode, lockend); - - /* - * We need to make sure we have no ordered extents in this range - * and nobody raced in and read a page in this range, if we did - * we need to try again. - */ - if ((!ordered || - (ordered->file_offset + ordered->len <= lockstart || - ordered->file_offset > lockend)) && - !btrfs_page_exists_in_range(inode, lockstart, lockend)) { - if (ordered) - btrfs_put_ordered_extent(ordered); - break; - } - if (ordered) - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, - lockend, &cached_state); - ret = btrfs_wait_ordered_range(inode, lockstart, - lockend - lockstart + 1); - if (ret) { - inode_unlock(inode); - return ret; - } + ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, + &cached_state); + if (ret) { + inode_unlock(inode); + goto out_only_mutex; } path = btrfs_alloc_path(); @@ -2814,6 +2827,217 @@ insert: return 0; } +static int btrfs_fallocate_update_isize(struct inode *inode, + const u64 end, + const int mode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + int ret2; + + if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) + return 0; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + inode->i_ctime = current_time(inode); + i_size_write(inode, end); + btrfs_ordered_update_i_size(inode, end, NULL); + ret = btrfs_update_inode(trans, root, inode); + ret2 = btrfs_end_transaction(trans); + + return ret ? ret : ret2; +} + +static int btrfs_zero_range_check_range_boundary(struct inode *inode, + u64 offset) +{ + const u64 sectorsize = btrfs_inode_sectorsize(inode); + struct extent_map *em; + int ret = 0; + + offset = round_down(offset, sectorsize); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); + if (IS_ERR(em)) + return PTR_ERR(em); + + if (em->block_start == EXTENT_MAP_HOLE) + ret = 1; + + free_extent_map(em); + return ret; +} + +static int btrfs_zero_range(struct inode *inode, + loff_t offset, + loff_t len, + const int mode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct extent_map *em; + struct extent_changeset *data_reserved = NULL; + int ret; + u64 alloc_hint = 0; + const u64 sectorsize = btrfs_inode_sectorsize(inode); + u64 alloc_start = round_down(offset, sectorsize); + u64 alloc_end = round_up(offset + len, sectorsize); + u64 bytes_to_reserve = 0; + bool space_reserved = false; + + inode_dio_wait(inode); + + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, + alloc_start, alloc_end - alloc_start, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + + /* + * Avoid hole punching and extent allocation for some cases. More cases + * could be considered, but these are unlikely common and we keep things + * as simple as possible for now. Also, intentionally, if the target + * range contains one or more prealloc extents together with regular + * extents and holes, we drop all the existing extents and allocate a + * new prealloc extent, so that we get a larger contiguous disk extent. + */ + if (em->start <= alloc_start && + test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { + const u64 em_end = em->start + em->len; + + if (em_end >= offset + len) { + /* + * The whole range is already a prealloc extent, + * do nothing except updating the inode's i_size if + * needed. + */ + free_extent_map(em); + ret = btrfs_fallocate_update_isize(inode, offset + len, + mode); + goto out; + } + /* + * Part of the range is already a prealloc extent, so operate + * only on the remaining part of the range. + */ + alloc_start = em_end; + ASSERT(IS_ALIGNED(alloc_start, sectorsize)); + len = offset + len - alloc_start; + offset = alloc_start; + alloc_hint = em->block_start + em->len; + } + free_extent_map(em); + + if (BTRFS_BYTES_TO_BLKS(fs_info, offset) == + BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) { + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, + alloc_start, sectorsize, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { + free_extent_map(em); + ret = btrfs_fallocate_update_isize(inode, offset + len, + mode); + goto out; + } + if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) { + free_extent_map(em); + ret = btrfs_truncate_block(inode, offset, len, 0); + if (!ret) + ret = btrfs_fallocate_update_isize(inode, + offset + len, + mode); + return ret; + } + free_extent_map(em); + alloc_start = round_down(offset, sectorsize); + alloc_end = alloc_start + sectorsize; + goto reserve_space; + } + + alloc_start = round_up(offset, sectorsize); + alloc_end = round_down(offset + len, sectorsize); + + /* + * For unaligned ranges, check the pages at the boundaries, they might + * map to an extent, in which case we need to partially zero them, or + * they might map to a hole, in which case we need our allocation range + * to cover them. + */ + if (!IS_ALIGNED(offset, sectorsize)) { + ret = btrfs_zero_range_check_range_boundary(inode, offset); + if (ret < 0) + goto out; + if (ret) { + alloc_start = round_down(offset, sectorsize); + ret = 0; + } else { + ret = btrfs_truncate_block(inode, offset, 0, 0); + if (ret) + goto out; + } + } + + if (!IS_ALIGNED(offset + len, sectorsize)) { + ret = btrfs_zero_range_check_range_boundary(inode, + offset + len); + if (ret < 0) + goto out; + if (ret) { + alloc_end = round_up(offset + len, sectorsize); + ret = 0; + } else { + ret = btrfs_truncate_block(inode, offset + len, 0, 1); + if (ret) + goto out; + } + } + +reserve_space: + if (alloc_start < alloc_end) { + struct extent_state *cached_state = NULL; + const u64 lockstart = alloc_start; + const u64 lockend = alloc_end - 1; + + bytes_to_reserve = alloc_end - alloc_start; + ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), + bytes_to_reserve); + if (ret < 0) + goto out; + space_reserved = true; + ret = btrfs_qgroup_reserve_data(inode, &data_reserved, + alloc_start, bytes_to_reserve); + if (ret) + goto out; + ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend, + &cached_state); + if (ret) + goto out; + ret = btrfs_prealloc_file_range(inode, mode, alloc_start, + alloc_end - alloc_start, + i_blocksize(inode), + offset + len, &alloc_hint); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); + /* btrfs_prealloc_file_range releases reserved space on error */ + if (ret) + space_reserved = false; + } + out: + if (ret && space_reserved) + btrfs_free_reserved_data_space(inode, data_reserved, + alloc_start, bytes_to_reserve); + extent_changeset_free(data_reserved); + + return ret; +} + static long btrfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { @@ -2839,7 +3063,8 @@ static long btrfs_fallocate(struct file *file, int mode, cur_offset = alloc_start; /* Make sure we aren't being give some crap mode */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; if (mode & FALLOC_FL_PUNCH_HOLE) @@ -2850,10 +3075,12 @@ static long btrfs_fallocate(struct file *file, int mode, * * For qgroup space, it will be checked later. */ - ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), - alloc_end - alloc_start); - if (ret < 0) - return ret; + if (!(mode & FALLOC_FL_ZERO_RANGE)) { + ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), + alloc_end - alloc_start); + if (ret < 0) + return ret; + } inode_lock(inode); @@ -2895,6 +3122,12 @@ static long btrfs_fallocate(struct file *file, int mode, if (ret) goto out; + if (mode & FALLOC_FL_ZERO_RANGE) { + ret = btrfs_zero_range(inode, offset, len, mode); + inode_unlock(inode); + return ret; + } + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -2988,37 +3221,18 @@ static long btrfs_fallocate(struct file *file, int mode, if (ret < 0) goto out_unlock; - if (actual_end > inode->i_size && - !(mode & FALLOC_FL_KEEP_SIZE)) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - - /* - * We didn't need to allocate any more space, but we - * still extended the size of the file so we need to - * update i_size and the inode item. - */ - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - } else { - inode->i_ctime = current_time(inode); - i_size_write(inode, actual_end); - btrfs_ordered_update_i_size(inode, actual_end, NULL); - ret = btrfs_update_inode(trans, root, inode); - if (ret) - btrfs_end_transaction(trans); - else - ret = btrfs_end_transaction(trans); - } - } + /* + * We didn't need to allocate any more space, but we still extended the + * size of the file so we need to update i_size and the inode item. + */ + ret = btrfs_fallocate_update_isize(inode, actual_end, mode); out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state); out: inode_unlock(inode); /* Let go of our reservation. */ - if (ret != 0) + if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE)) btrfs_free_reserved_data_space(inode, data_reserved, alloc_start, alloc_end - cur_offset); extent_changeset_free(data_reserved); -- cgit v1.2.3 From 94f450712ac9cb4e165b5115e5eb0ab10055a64b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 31 Oct 2017 17:59:54 +0000 Subject: Btrfs: use cached state when dirtying pages during buffered write During a buffered IO write, we can have an extent state that we got when we locked the range (if the range starts at an offset lower than eof), so always pass it to btrfs_dirty_pages() so that setting the delalloc bit in the range does not need to do a full search in the inode's io tree, saving time and reducing the amount of time we hold the io tree's lock. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 16c8031db645..cba2ac371ce0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1755,7 +1755,7 @@ again: if (copied > 0) ret = btrfs_dirty_pages(inode, pages, dirty_pages, - pos, copied, NULL); + pos, copied, &cached_state); if (extents_locked) unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); -- cgit v1.2.3 From 9f13ce743b1bd4e764193980e6311bfcdf424bb2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 18 Jan 2018 11:34:20 +0000 Subject: Btrfs: fix missing inode i_size update after zero range operation For a fallocate's zero range operation that targets a range with an end that is not aligned to the sector size, we can end up not updating the inode's i_size. This happens when the last page of the range maps to an unwritten (prealloc) extent and before that last page we have either a hole or a written extent. This is because in this scenario we relied on a call to btrfs_prealloc_file_range() to update the inode's i_size, however it can only update the i_size to the "down aligned" end of the range. Example: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ xfs_io -f -c "pwrite -S 0xff 0 428K" /mnt/foobar $ xfs_io -c "falloc -k 428K 4K" /mnt/foobar $ xfs_io -c "fzero 0 430K" /mnt/foobar $ du --bytes /mnt/foobar 438272 /mnt/foobar The inode's i_size was left as 428Kb (438272 bytes) when it should have been updated to 430Kb (440320 bytes). Fix this by always updating the inode's i_size explicitly after zeroing the range. Fixes: ba6d5887946ff86d93dc ("Btrfs: add support for fallocate's zero range operation") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cba2ac371ce0..baad81c1f9a3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3026,9 +3026,12 @@ reserve_space: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); /* btrfs_prealloc_file_range releases reserved space on error */ - if (ret) + if (ret) { space_reserved = false; + goto out; + } } + ret = btrfs_fallocate_update_isize(inode, offset + len, mode); out: if (ret && space_reserved) btrfs_free_reserved_data_space(inode, data_reserved, -- cgit v1.2.3 From 81fdf6382b3b92f6fc5f34f9c8cd9074b25f6c0e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 18 Jan 2018 11:34:31 +0000 Subject: Btrfs: fix space leak after fallocate and zero range operations If we do a buffered write after a zero range operation that has an unaligned (with the filesystem's sector size) end which also falls within an unwritten (prealloc) extent that is currently beyond the inode's i_size, and the zero range operation has the flag FALLOC_FL_KEEP_SIZE, we end up leaking data and metadata space. This happens because when zeroing a range we call btrfs_truncate_block(), which does delalloc (loads the page and partially zeroes its content), and in the buffered write path we only clear existing delalloc space reservation for the range we are writing into if that range starts at an offset smaller then the inode's i_size, which makes sense since we can not have delalloc extents beyond the i_size, only unwritten extents are allowed. Example reproducer: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "falloc -k 428K 4K" /mnt/foobar $ xfs_io -c "fzero -k 0 430K" /mnt/foobar $ xfs_io -c "pwrite -S 0xaa 428K 4K" /mnt/foobar $ umount /mnt After the unmount we get the metadata and data space leaks reported in dmesg/syslog: [95794.602253] ------------[ cut here ]------------ [95794.603322] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9561 btrfs_destroy_inode+0x4e/0x206 [btrfs] [95794.605167] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.613000] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.614448] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.615972] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.617114] RIP: 0010:btrfs_destroy_inode+0x4e/0x206 [btrfs] [95794.618001] RSP: 0018:ffffc90001737d00 EFLAGS: 00010202 [95794.618721] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c [95794.619645] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418 [95794.620711] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000 [95794.621932] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000 [95794.623124] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0 [95794.624188] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.625578] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.626522] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.627647] Call Trace: [95794.628128] destroy_inode+0x3d/0x55 [95794.628573] evict+0x177/0x17e [95794.629010] dispose_list+0x50/0x71 [95794.629478] evict_inodes+0x132/0x141 [95794.630289] generic_shutdown_super+0x3f/0x10b [95794.630864] kill_anon_super+0x12/0x1c [95794.631383] btrfs_kill_super+0x16/0x21 [btrfs] [95794.631930] deactivate_locked_super+0x30/0x68 [95794.632539] deactivate_super+0x36/0x39 [95794.633200] cleanup_mnt+0x49/0x67 [95794.633818] __cleanup_mnt+0x12/0x14 [95794.634416] task_work_run+0x82/0xa6 [95794.634902] prepare_exit_to_usermode+0xe1/0x10c [95794.635525] syscall_return_slowpath+0x18c/0x1af [95794.636122] entry_SYSCALL_64_fastpath+0xab/0xad [95794.636834] RIP: 0033:0x7fa678cb99a7 [95794.637370] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.638672] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.639596] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.640703] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.641773] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.643150] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.644249] Code: ff 4c 8b a8 80 06 00 00 48 8b 87 c0 01 00 00 48 85 c0 74 02 0f ff 48 83 bb e0 02 00 00 00 74 02 0f ff 83 bb 3c ff ff ff 00 74 02 <0f> ff 83 bb 40 ff ff ff 00 74 02 0f ff 48 83 bb f8 fe ff ff 00 [95794.646929] ---[ end trace e95877675c6ec007 ]--- [95794.647751] ------------[ cut here ]------------ [95794.648509] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9562 btrfs_destroy_inode+0x59/0x206 [btrfs] [95794.649842] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.654659] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.655894] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.657546] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.658433] RIP: 0010:btrfs_destroy_inode+0x59/0x206 [btrfs] [95794.659279] RSP: 0018:ffffc90001737d00 EFLAGS: 00010202 [95794.660054] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c [95794.660753] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418 [95794.661513] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000 [95794.662289] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000 [95794.663393] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0 [95794.664342] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.665673] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.666593] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.667629] Call Trace: [95794.668065] destroy_inode+0x3d/0x55 [95794.668637] evict+0x177/0x17e [95794.669179] dispose_list+0x50/0x71 [95794.669830] evict_inodes+0x132/0x141 [95794.670416] generic_shutdown_super+0x3f/0x10b [95794.671103] kill_anon_super+0x12/0x1c [95794.671786] btrfs_kill_super+0x16/0x21 [btrfs] [95794.672552] deactivate_locked_super+0x30/0x68 [95794.673393] deactivate_super+0x36/0x39 [95794.674107] cleanup_mnt+0x49/0x67 [95794.674706] __cleanup_mnt+0x12/0x14 [95794.675279] task_work_run+0x82/0xa6 [95794.675795] prepare_exit_to_usermode+0xe1/0x10c [95794.676507] syscall_return_slowpath+0x18c/0x1af [95794.677275] entry_SYSCALL_64_fastpath+0xab/0xad [95794.678006] RIP: 0033:0x7fa678cb99a7 [95794.678600] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.679739] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.680779] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.681837] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.682867] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.683891] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.684843] Code: c0 01 00 00 48 85 c0 74 02 0f ff 48 83 bb e0 02 00 00 00 74 02 0f ff 83 bb 3c ff ff ff 00 74 02 0f ff 83 bb 40 ff ff ff 00 74 02 <0f> ff 48 83 bb f8 fe ff ff 00 74 02 0f ff 48 83 bb 00 ff ff ff [95794.687156] ---[ end trace e95877675c6ec008 ]--- [95794.687876] ------------[ cut here ]------------ [95794.688579] WARNING: CPU: 0 PID: 31496 at fs/btrfs/inode.c:9565 btrfs_destroy_inode+0x7d/0x206 [btrfs] [95794.689735] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.695015] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.696396] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.697956] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.698925] RIP: 0010:btrfs_destroy_inode+0x7d/0x206 [btrfs] [95794.699763] RSP: 0018:ffffc90001737d00 EFLAGS: 00010206 [95794.700434] RAX: 0000000000000000 RBX: ffff880070fa1418 RCX: ffffc90001737c7c [95794.701445] RDX: 0000000175aa0240 RSI: 0000000000000001 RDI: ffff880070fa1418 [95794.702448] RBP: ffffc90001737d38 R08: 0000000000000000 R09: 0000000000000000 [95794.703557] R10: ffffc90001737c48 R11: ffff88007123e158 R12: ffff880075b6a000 [95794.704441] R13: ffff88006145c000 R14: ffff880070fa1418 R15: ffff880070c3b4a0 [95794.705270] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.706341] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.707001] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.708030] Call Trace: [95794.708466] destroy_inode+0x3d/0x55 [95794.709071] evict+0x177/0x17e [95794.709497] dispose_list+0x50/0x71 [95794.709973] evict_inodes+0x132/0x141 [95794.710564] generic_shutdown_super+0x3f/0x10b [95794.711200] kill_anon_super+0x12/0x1c [95794.711633] btrfs_kill_super+0x16/0x21 [btrfs] [95794.712139] deactivate_locked_super+0x30/0x68 [95794.712608] deactivate_super+0x36/0x39 [95794.713093] cleanup_mnt+0x49/0x67 [95794.713514] __cleanup_mnt+0x12/0x14 [95794.713933] task_work_run+0x82/0xa6 [95794.714543] prepare_exit_to_usermode+0xe1/0x10c [95794.715247] syscall_return_slowpath+0x18c/0x1af [95794.715952] entry_SYSCALL_64_fastpath+0xab/0xad [95794.716653] RIP: 0033:0x7fa678cb99a7 [95794.721100] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.722052] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.722856] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.723698] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.724736] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.725928] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.726728] Code: 40 ff ff ff 00 74 02 0f ff 48 83 bb f8 fe ff ff 00 74 02 0f ff 48 83 bb 00 ff ff ff 00 74 02 0f ff 48 83 bb 30 ff ff ff 00 74 02 <0f> ff 48 83 bb 08 ff ff ff 00 74 02 0f ff 4d 85 e4 0f 84 52 01 [95794.729203] ---[ end trace e95877675c6ec009 ]--- [95794.841054] ------------[ cut here ]------------ [95794.841829] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:5831 btrfs_free_block_groups+0x235/0x36a [btrfs] [95794.843425] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.850658] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.852590] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.854752] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.855812] RIP: 0010:btrfs_free_block_groups+0x235/0x36a [btrfs] [95794.856811] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206 [95794.857805] RAX: 0000000080000000 RBX: ffff88006145c000 RCX: 0000000000000001 [95794.859014] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff [95794.860270] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9 [95794.861525] R10: ffffc90001737c80 R11: 00000000000337fd R12: 0000000000000000 [95794.862700] R13: ffff88006145c0c0 R14: ffff88021b61a800 R15: ffff88006145c100 [95794.863810] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.865149] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.866099] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.867198] Call Trace: [95794.867626] close_ctree+0x1db/0x2b8 [btrfs] [95794.868188] ? evict_inodes+0x132/0x141 [95794.869037] btrfs_put_super+0x15/0x17 [btrfs] [95794.870400] generic_shutdown_super+0x6a/0x10b [95794.871262] kill_anon_super+0x12/0x1c [95794.872046] btrfs_kill_super+0x16/0x21 [btrfs] [95794.872746] deactivate_locked_super+0x30/0x68 [95794.873687] deactivate_super+0x36/0x39 [95794.874639] cleanup_mnt+0x49/0x67 [95794.875504] __cleanup_mnt+0x12/0x14 [95794.876126] task_work_run+0x82/0xa6 [95794.876788] prepare_exit_to_usermode+0xe1/0x10c [95794.877777] syscall_return_slowpath+0x18c/0x1af [95794.878381] entry_SYSCALL_64_fastpath+0xab/0xad [95794.878888] RIP: 0033:0x7fa678cb99a7 [95794.879307] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.880204] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.881640] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.882690] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.883538] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.884562] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.885664] Code: 89 ef e8 07 ec 32 e1 e8 9d c0 ea e0 48 8d b3 28 02 00 00 48 83 c9 ff 31 d2 48 89 df e8 29 c5 ff ff 48 83 bb 80 02 00 00 00 74 02 <0f> ff 48 83 bb 88 02 00 00 00 74 02 0f ff 48 83 bb d8 02 00 00 [95794.887980] ---[ end trace e95877675c6ec00a ]--- [95794.888739] ------------[ cut here ]------------ [95794.889405] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:5832 btrfs_free_block_groups+0x241/0x36a [btrfs] [95794.891020] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.897551] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.898509] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.899685] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.900592] RIP: 0010:btrfs_free_block_groups+0x241/0x36a [btrfs] [95794.901387] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206 [95794.902300] RAX: 0000000080000000 RBX: ffff88006145c000 RCX: 0000000000000001 [95794.903260] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff [95794.904332] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9 [95794.905300] R10: ffffc90001737c80 R11: 00000000000337fd R12: 0000000000000000 [95794.906439] R13: ffff88006145c0c0 R14: ffff88021b61a800 R15: ffff88006145c100 [95794.907459] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.908625] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.909511] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.910630] Call Trace: [95794.911153] close_ctree+0x1db/0x2b8 [btrfs] [95794.911837] ? evict_inodes+0x132/0x141 [95794.912344] btrfs_put_super+0x15/0x17 [btrfs] [95794.912975] generic_shutdown_super+0x6a/0x10b [95794.913788] kill_anon_super+0x12/0x1c [95794.914424] btrfs_kill_super+0x16/0x21 [btrfs] [95794.915142] deactivate_locked_super+0x30/0x68 [95794.915831] deactivate_super+0x36/0x39 [95794.916433] cleanup_mnt+0x49/0x67 [95794.917045] __cleanup_mnt+0x12/0x14 [95794.917665] task_work_run+0x82/0xa6 [95794.918309] prepare_exit_to_usermode+0xe1/0x10c [95794.919021] syscall_return_slowpath+0x18c/0x1af [95794.919722] entry_SYSCALL_64_fastpath+0xab/0xad [95794.920426] RIP: 0033:0x7fa678cb99a7 [95794.921039] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.922303] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.923335] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.924364] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.925435] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.926533] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.927557] Code: 48 8d b3 28 02 00 00 48 83 c9 ff 31 d2 48 89 df e8 29 c5 ff ff 48 83 bb 80 02 00 00 00 74 02 0f ff 48 83 bb 88 02 00 00 00 74 02 <0f> ff 48 83 bb d8 02 00 00 00 74 02 0f ff 48 83 bb e0 02 00 00 [95794.930166] ---[ end trace e95877675c6ec00b ]--- [95794.930961] ------------[ cut here ]------------ [95794.931727] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:9953 btrfs_free_block_groups+0x2bc/0x36a [btrfs] [95794.932729] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.938394] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.939842] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.941455] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.942336] RIP: 0010:btrfs_free_block_groups+0x2bc/0x36a [btrfs] [95794.943268] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206 [95794.944127] RAX: ffff8802004fd0e8 RBX: ffff88006145c000 RCX: 0000000000000001 [95794.945211] RDX: 00000001810af668 RSI: 0000000000000002 RDI: 00000000ffffffff [95794.946316] RBP: ffffc90001737d98 R08: 0000000000000000 R09: ffffffff817e22b9 [95794.947271] R10: ffffc90001737c80 R11: 00000000000337fd R12: ffff8802004fd0e8 [95794.948219] R13: ffff88006145c0c0 R14: ffff88006145e598 R15: ffff88006145c100 [95794.949193] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.950495] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.951338] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95794.952361] Call Trace: [95794.952811] close_ctree+0x1db/0x2b8 [btrfs] [95794.953522] ? evict_inodes+0x132/0x141 [95794.954543] btrfs_put_super+0x15/0x17 [btrfs] [95794.955231] generic_shutdown_super+0x6a/0x10b [95794.955916] kill_anon_super+0x12/0x1c [95794.956414] btrfs_kill_super+0x16/0x21 [btrfs] [95794.956953] deactivate_locked_super+0x30/0x68 [95794.957635] deactivate_super+0x36/0x39 [95794.958256] cleanup_mnt+0x49/0x67 [95794.958701] __cleanup_mnt+0x12/0x14 [95794.959181] task_work_run+0x82/0xa6 [95794.959635] prepare_exit_to_usermode+0xe1/0x10c [95794.960182] syscall_return_slowpath+0x18c/0x1af [95794.960731] entry_SYSCALL_64_fastpath+0xab/0xad [95794.961438] RIP: 0033:0x7fa678cb99a7 [95794.961990] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95794.963111] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95794.963975] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95794.964680] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95794.965763] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95794.966868] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95794.967800] Code: 00 00 00 4c 8b a3 98 25 00 00 49 83 bc 24 60 ff ff ff 00 75 16 49 83 bc 24 68 ff ff ff 00 75 0b 49 83 bc 24 70 ff ff ff 00 74 16 <0f> ff 49 8d b4 24 18 ff ff ff 31 c9 31 d2 48 89 df e8 93 7a ff [95794.970629] ---[ end trace e95877675c6ec00c ]--- [95794.971451] BTRFS info (device sdi): space_info 1 has 7680000 free, is not full [95794.972351] BTRFS info (device sdi): space_info total=8388608, used=704512, pinned=0, reserved=0, may_use=4096, readonly=0 [95794.973595] ------------[ cut here ]------------ [95794.974353] WARNING: CPU: 0 PID: 31496 at fs/btrfs/extent-tree.c:9953 btrfs_free_block_groups+0x2bc/0x36a [btrfs] [95794.980163] Modules linked in: btrfs xfs ppdev ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper parport_pc psmouse sg i2c_piix4 parport i2c_core evdev pcspkr button serio_raw sunrpc loop autofs4 ext4 crc16 mbcache jbd2 zstd_decompress zstd_compress xxhash raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod sd_mod virtio_scsi ata_generic crc32c_intel ata_piix floppy virtio_pci virtio_ring virtio libata scsi_mod e1000 [last unloaded: btrfs] [95794.986461] CPU: 0 PID: 31496 Comm: umount Tainted: G W 4.14.0-rc6-btrfs-next-54+ #1 [95794.987591] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [95794.988929] task: ffff880075aa0240 task.stack: ffffc90001734000 [95794.989922] RIP: 0010:btrfs_free_block_groups+0x2bc/0x36a [btrfs] [95794.990715] RSP: 0018:ffffc90001737d70 EFLAGS: 00010206 [95794.991431] RAX: ffff88020f6e70e8 RBX: ffff88006145c000 RCX: ffffffff8115a906 [95794.992455] RDX: ffffffff8115a902 RSI: ffff880075aa0b40 RDI: ffff880075aa0b40 [95794.993535] RBP: ffffc90001737d98 R08: 0000000000000020 R09: fffffffffffffff7 [95794.994573] R10: 00000000ffffffc4 R11: ffff8800633b1bc0 R12: ffff88020f6e70e8 [95794.996250] R13: 0000000000000038 R14: ffff88006145e598 R15: 0000000000000000 [95794.997233] FS: 00007fa6793c92c0(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 [95794.998592] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95794.999484] CR2: 000056338670d048 CR3: 00000000610dc005 CR4: 00000000001606f0 [95795.000542] Call Trace: [95795.001138] close_ctree+0x1db/0x2b8 [btrfs] [95795.001885] ? evict_inodes+0x132/0x141 [95795.002407] btrfs_put_super+0x15/0x17 [btrfs] [95795.003093] generic_shutdown_super+0x6a/0x10b [95795.003720] kill_anon_super+0x12/0x1c [95795.004353] btrfs_kill_super+0x16/0x21 [btrfs] [95795.005095] deactivate_locked_super+0x30/0x68 [95795.005716] deactivate_super+0x36/0x39 [95795.006388] cleanup_mnt+0x49/0x67 [95795.006939] __cleanup_mnt+0x12/0x14 [95795.007512] task_work_run+0x82/0xa6 [95795.008124] prepare_exit_to_usermode+0xe1/0x10c [95795.008994] syscall_return_slowpath+0x18c/0x1af [95795.009831] entry_SYSCALL_64_fastpath+0xab/0xad [95795.010610] RIP: 0033:0x7fa678cb99a7 [95795.011193] RSP: 002b:00007ffccf0aaed8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 [95795.012327] RAX: 0000000000000000 RBX: 0000563386706030 RCX: 00007fa678cb99a7 [95795.013432] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000056338670ca90 [95795.014558] RBP: 000056338670ca90 R08: 000056338670c740 R09: 0000000000000015 [95795.015577] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fa6791bae64 [95795.016569] R13: 0000000000000000 R14: 0000563386706210 R15: 00007ffccf0ab160 [95795.017662] Code: 00 00 00 4c 8b a3 98 25 00 00 49 83 bc 24 60 ff ff ff 00 75 16 49 83 bc 24 68 ff ff ff 00 75 0b 49 83 bc 24 70 ff ff ff 00 74 16 <0f> ff 49 8d b4 24 18 ff ff ff 31 c9 31 d2 48 89 df e8 93 7a ff [95795.020538] ---[ end trace e95877675c6ec00d ]--- [95795.021259] BTRFS info (device sdi): space_info 4 has 1072775168 free, is not full [95795.022390] BTRFS info (device sdi): space_info total=1073741824, used=114688, pinned=0, reserved=0, may_use=786432, readonly=65536 Fix this by ensuring the zero range operation does not call btrfs_truncate_block() if the corresponding extent is an unwritten one (it's pointless anyway, since reading from an unwritten extent yields zeroes). Signed-off-by: Filipe Manana Tested-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/file.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index baad81c1f9a3..06a631f89b1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2852,12 +2852,18 @@ static int btrfs_fallocate_update_isize(struct inode *inode, return ret ? ret : ret2; } +enum { + RANGE_BOUNDARY_WRITTEN_EXTENT = 0, + RANGE_BOUNDARY_PREALLOC_EXTENT = 1, + RANGE_BOUNDARY_HOLE = 2, +}; + static int btrfs_zero_range_check_range_boundary(struct inode *inode, u64 offset) { const u64 sectorsize = btrfs_inode_sectorsize(inode); struct extent_map *em; - int ret = 0; + int ret; offset = round_down(offset, sectorsize); em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0); @@ -2865,7 +2871,11 @@ static int btrfs_zero_range_check_range_boundary(struct inode *inode, return PTR_ERR(em); if (em->block_start == EXTENT_MAP_HOLE) - ret = 1; + ret = RANGE_BOUNDARY_HOLE; + else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + ret = RANGE_BOUNDARY_PREALLOC_EXTENT; + else + ret = RANGE_BOUNDARY_WRITTEN_EXTENT; free_extent_map(em); return ret; @@ -2974,13 +2984,15 @@ static int btrfs_zero_range(struct inode *inode, ret = btrfs_zero_range_check_range_boundary(inode, offset); if (ret < 0) goto out; - if (ret) { + if (ret == RANGE_BOUNDARY_HOLE) { alloc_start = round_down(offset, sectorsize); ret = 0; - } else { + } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { ret = btrfs_truncate_block(inode, offset, 0, 0); if (ret) goto out; + } else { + ret = 0; } } @@ -2989,13 +3001,15 @@ static int btrfs_zero_range(struct inode *inode, offset + len); if (ret < 0) goto out; - if (ret) { + if (ret == RANGE_BOUNDARY_HOLE) { alloc_end = round_up(offset + len, sectorsize); ret = 0; - } else { + } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) { ret = btrfs_truncate_block(inode, offset + len, 0, 1); if (ret) goto out; + } else { + ret = 0; } } -- cgit v1.2.3 From a6f93c71d412ba8ed743152c3a54ad0b78dcd9c7 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 15 Nov 2017 16:28:11 -0700 Subject: Btrfs: avoid losing data raid profile when deleting a device We've avoided data losing raid profile when doing balance, but it turns out that deleting a device could also result in the same problem. Say we have 3 disks, and they're created with '-d raid1' profile. - We have chunk P (the only data chunk on the empty btrfs). - Suppose that chunk P's two raid1 copies reside in disk A and disk B. - Now, 'btrfs device remove disk B' btrfs_rm_device() -> btrfs_shrink_device() -> btrfs_relocate_chunk() #relocate any chunk on disk B to other places. - Chunk P will be removed and a new chunk will be created to hold those data, but as chunk P is the only one holding raid1 profile, after it goes away, the new chunk will be created as single profile which is our default profile. This fixes the problem by creating an empty data chunk before relocating the data chunk. Metadata/System chunk are supposed to have non-zero bytes all the time so their raid profile is preserved. Reported-by: James Alandt Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 84 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 65 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2333d48ca1d7..c831a089471f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3106,6 +3106,48 @@ error: return ret; } +/* + * return 1 : allocate a data chunk successfully, + * return <0: errors during allocating a data chunk, + * return 0 : no need to allocate a data chunk. + */ +static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, + u64 chunk_offset) +{ + struct btrfs_block_group_cache *cache; + u64 bytes_used; + u64 chunk_type; + + cache = btrfs_lookup_block_group(fs_info, chunk_offset); + ASSERT(cache); + chunk_type = cache->flags; + btrfs_put_block_group(cache); + + if (chunk_type & BTRFS_BLOCK_GROUP_DATA) { + spin_lock(&fs_info->data_sinfo->lock); + bytes_used = fs_info->data_sinfo->bytes_used; + spin_unlock(&fs_info->data_sinfo->lock); + + if (!bytes_used) { + struct btrfs_trans_handle *trans; + int ret; + + trans = btrfs_join_transaction(fs_info->tree_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + ret = btrfs_force_chunk_alloc(trans, fs_info, + BTRFS_BLOCK_GROUP_DATA); + btrfs_end_transaction(trans); + if (ret < 0) + return ret; + + return 1; + } + } + return 0; +} + static int insert_balance_item(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl) { @@ -3564,7 +3606,6 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) u32 count_meta = 0; u32 count_sys = 0; int chunk_reserved = 0; - u64 bytes_used = 0; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -3723,28 +3764,21 @@ again: goto loop; } - ASSERT(fs_info->data_sinfo); - spin_lock(&fs_info->data_sinfo->lock); - bytes_used = fs_info->data_sinfo->bytes_used; - spin_unlock(&fs_info->data_sinfo->lock); - - if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && - !chunk_reserved && !bytes_used) { - trans = btrfs_start_transaction(chunk_root, 0); - if (IS_ERR(trans)) { - mutex_unlock(&fs_info->delete_unused_bgs_mutex); - ret = PTR_ERR(trans); - goto error; - } - - ret = btrfs_force_chunk_alloc(trans, fs_info, - BTRFS_BLOCK_GROUP_DATA); - btrfs_end_transaction(trans); + if (!chunk_reserved) { + /* + * We may be relocating the only data chunk we have, + * which could potentially end up with losing data's + * raid profile, so lets allocate an empty one in + * advance. + */ + ret = btrfs_may_alloc_data_chunk(fs_info, + found_key.offset); if (ret < 0) { mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } else if (ret == 1) { + chunk_reserved = 1; } - chunk_reserved = 1; } ret = btrfs_relocate_chunk(fs_info, found_key.offset); @@ -4507,6 +4541,18 @@ again: chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); btrfs_release_path(path); + /* + * We may be relocating the only data chunk we have, + * which could potentially end up with losing data's + * raid profile, so lets allocate an empty one in + * advance. + */ + ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset); + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); + goto done; + } + ret = btrfs_relocate_chunk(fs_info, chunk_offset); mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) -- cgit v1.2.3 From e2731e55884f2138a252b0a3d7b24d57e49c3c59 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 9 Jan 2018 09:05:41 +0800 Subject: btrfs: define SUPER_FLAG_METADUMP_V2 btrfs-progs uses super flag bit BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34). So just define that in kernel so that we know its been used. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 ++- include/uapi/linux/btrfs_tree.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 73ab44159d82..1916016e9fbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -61,7 +61,8 @@ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_SEEDING |\ - BTRFS_SUPER_FLAG_METADUMP) + BTRFS_SUPER_FLAG_METADUMP |\ + BTRFS_SUPER_FLAG_METADUMP_V2) static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 6d6e5da51527..38ab0e06259a 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -456,6 +456,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) /* -- cgit v1.2.3 From 98820a7e244b17b8a4d9e9d1ff9d3b4e5bfca58b Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 9 Jan 2018 09:05:42 +0800 Subject: btrfs: add support for SUPER_FLAG_CHANGING_FSID The UUID change by btrfstune sets SUPER_FLAG_CHANGING_FSID and resets it only when changing fsid is complete. Its not a good idea to mount the device anything in between, reading metadata blocks would fail with UUID mismatch. This patch doesn't add SUPER_FLAG_CHANGING_FSID into BTRFS_SUPER_FLAG_SUPP list, so mount will fail (along with the fix in the next patch) when SUPER_FLAG_CHANGING_FSID is set. Signed-off-by: Anand Jain Reviewed-by: Qu Wenruo Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 38ab0e06259a..aff1356c2bb8 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -457,6 +457,7 @@ struct btrfs_free_space_header { #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) +#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) /* -- cgit v1.2.3 From 6f794e3c5c8f8fdd3b5bb20d9ded894e685b5bbe Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Tue, 9 Jan 2018 09:05:43 +0800 Subject: btrfs: fail mount when sb flag is not in BTRFS_SUPER_FLAG_SUPP It appears from the original commit [1] that there isn't any design specific reason not to fail the mount instead of just warning. This patch will change it to fail. [1] commit 319e4d0661e5323c9f9945f0f8fb5905e5fe74c3 btrfs: Enhance super validation check Fixes: 319e4d0661e5323 ("btrfs: Enhance super validation check") Signed-off-by: Anand Jain Reviewed-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1916016e9fbb..ed095202942f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3907,9 +3907,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } - if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) - btrfs_warn(fs_info, "unrecognized super flag: %llu", + if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) { + btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); + ret = -EINVAL; + } if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); -- cgit v1.2.3 From 6670d4c2d9b7d352cbf90cd18c516bb6444acf34 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 8 Jan 2018 19:51:22 +0800 Subject: btrfs: use correct string length in DEV_INFO ioctl gcc-8 reports: fs/btrfs/ioctl.c: In function 'btrfs_ioctl': ./include/linux/string.h:245:9: warning: '__builtin_strncpy' specified bound 1024 equals destination size [-Wstringop-truncation] We need one less byte or call strlcpy() to make it a nul-terminated string. This is done on the next line anyway, but we want to avoid the warning. Signed-off-by: Xiongfeng Wang Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f6d4d5810cc1..f573cad72b7e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2798,7 +2798,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, struct rcu_string *name; name = rcu_dereference(dev->name); - strncpy(di_args->path, name->str, sizeof(di_args->path)); + strncpy(di_args->path, name->str, sizeof(di_args->path) - 1); di_args->path[sizeof(di_args->path) - 1] = 0; } else { di_args->path[0] = '\0'; -- cgit v1.2.3 From e2683fc9d219430f5b78889b50cde7f40efeba7b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2018 15:13:07 +0100 Subject: btrfs: tree-check: reduce stack consumption in check_dir_item I've noticed that the updated item checker stack consumption increased dramatically in 542f5385e20cf97447 ("btrfs: tree-checker: Add checker for dir item") tree-checker.c:check_leaf +552 (176 -> 728) The array is 255 bytes long, dynamic allocation would slow down the sanity checks so it's more reasonable to keep it on-stack. Moving the variable to the scope of use reduces the stack usage again tree-checker.c:check_leaf -264 (728 -> 464) Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 7c55e3ba5a6c..c3c8d48f6618 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -259,7 +259,6 @@ static int check_dir_item(struct btrfs_root *root, di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); while (cur < item_size) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; u32 name_len; u32 data_len; u32 max_name_len; @@ -342,6 +341,8 @@ static int check_dir_item(struct btrfs_root *root, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { + char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); name_hash = btrfs_name_hash(namebuf, name_len); -- cgit v1.2.3 From 1c94da9dd913e69c67fbc77603d56ffa61e454c2 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 10 Jan 2018 13:15:18 +0800 Subject: btrfs: cleanup btrfs_free_stale_device() usage We call btrfs_free_stale_device() only when we alloc a new struct btrfs_device (ret=1), so move it closer to where we alloc the new device. Also drop the comments. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c831a089471f..11fccf2fcb1f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -782,6 +782,7 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; + btrfs_free_stale_device(device); } else if (!device->name || strcmp(device->name->str, path)) { /* * When FS is already mounted. @@ -840,13 +841,6 @@ static noinline int device_list_add(const char *path, if (!fs_devices->opened) device->generation = found_transid; - /* - * if there is new btrfs on an already registered device, - * then remove the stale device entry. - */ - if (ret > 0) - btrfs_free_stale_device(device); - *fs_devices_ret = fs_devices; return ret; -- cgit v1.2.3 From d2560ebd231e1f8f6f125d1721cf14bc8d5422b3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 11 Jan 2018 15:46:44 +0200 Subject: btrfs: Make btrfs_inode_rsv_release static It's not used outside of extent-tree so there is no reason to not be static. Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8d51e4bb67c1..05751a677da4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5773,7 +5773,7 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode, * This is the same as btrfs_block_rsv_release, except that it handles the * tracepoint for the reservation. */ -void btrfs_inode_rsv_release(struct btrfs_inode *inode) +static void btrfs_inode_rsv_release(struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; -- cgit v1.2.3 From df6703e15c493fcc7b7f7576dff17d82e460945b Mon Sep 17 00:00:00 2001 From: Su Yue Date: Fri, 12 Jan 2018 11:08:02 +0800 Subject: btrfs: correct wrong comment about magic number of index_cnt There is no function named btrfs_get_inode_index_count. Explanation for magic number index_cnt=2 in btrfs_new_inode() is actually located in btrfs_set_inode_index_count(). So replace 'btrfs_get_inode_index_count' in the comment by 'btrfs_set_inode_index_count'. Signed-off-by: Su Yue Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eebfe2615428..633b83b709a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6315,7 +6315,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, } /* * index_cnt is ignored for everything but a dir, - * btrfs_get_inode_index_count has an explanation for the magic + * btrfs_set_inode_index_count has an explanation for the magic * number */ BTRFS_I(inode)->index_cnt = 2; -- cgit v1.2.3 From 0198e5b707cfeb5defbd1b71b1ec6e71580d7db9 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 12 Jan 2018 18:07:01 -0700 Subject: Btrfs: raid56: iterate raid56 internal bio with bio_for_each_segment_all Bio iterated by set_bio_pages_uptodate() is raid56 internal one, so it will never be a BIO_CLONED bio, and since this is called by end_io functions, bio->bi_iter.bi_size is zero, we mustn't use bio_for_each_segment() as that is a no-op if bi_size is zero. Fixes: 6592e58c6b68e61f003a01ba29a3716e7e2e9484 ("Btrfs: fix write corruption due to bio cloning on raid5/6") Cc: # v4.12-rc6+ Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 7b382eeada72..f86ba6a319fe 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1441,14 +1441,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, */ static void set_bio_pages_uptodate(struct bio *bio) { - struct bio_vec bvec; - struct bvec_iter iter; + struct bio_vec *bvec; + int i; - if (bio_flagged(bio, BIO_CLONED)) - bio->bi_iter = btrfs_io_bio(bio)->iter; + ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment(bvec, bio, iter) - SetPageUptodate(bvec.bv_page); + bio_for_each_segment_all(bvec, bio, i) + SetPageUptodate(bvec->bv_page); } /* -- cgit v1.2.3 From 44ac474def527a9c595a88400f4717dacae03b8a Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 12 Jan 2018 18:07:02 -0700 Subject: Btrfs: do not cache rbio pages if using raid6 recover Since raid6 recover tries all possible combinations of failed stripes, - when raid6 rebuild algorithm is used, i.e. raid6_datap_recov() and raid6_2data_recov(), it may change the in-memory content of failed stripes, if such a raid bio is cached, a later raid write rmw or recover can steal @stripe_pages from it instead of reading from disks, such that it carries the wrong content to do write rmw or recovery and ends up with corruption or recovery failures. - when raid5 rebuild algorithm is used, i.e. xor, raid bio can be cached because the only failed stripe which contains @rbio->bio_pages gets modified, others remain the same so that their in-memory content is consistent with their on-disk content. This adds a check to skip caching rbio if using raid6 recover. Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f86ba6a319fe..2f1ff7007280 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1974,7 +1974,22 @@ cleanup: cleanup_io: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == BLK_STS_OK) + /* + * - In case of two failures, where rbio->failb != -1: + * + * Do not cache this rbio since the above read reconstruction + * (raid6_datap_recov() or raid6_2data_recov()) may have + * changed some content of stripes which are not identical to + * on-disk content any more, otherwise, a later write/recover + * may steal stripe_pages from this rbio and end up with + * corruptions or rebuild failures. + * + * - In case of single failure, where rbio->failb == -1: + * + * Cache this rbio iff the above read reconstruction is + * excuted without problems. + */ + if (err == BLK_STS_OK && rbio->failb < 0) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); -- cgit v1.2.3 From 7583d8d088ff2c323b1d4f15b191ca2c23d32558 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 9 Jan 2018 18:36:25 -0700 Subject: Btrfs: raid56: fix race between merge_bio and rbio_orig_end_io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before rbio_orig_end_io() goes to free rbio, rbio may get merged with more bios from other rbios and rbio->bio_list becomes non-empty, in that case, these newly merged bios don't end properly. Once unlock_stripe() is done, rbio->bio_list will not be updated any more and we can call bio_endio() on all queued bios. It should only happen in error-out cases, the normal path of recover and full stripe write have already set RBIO_RMW_LOCKED_BIT to disable merge before doing IO, so rbio_orig_end_io() called by them doesn't have the above issue. Reported-by: Jérôme Carretero Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 2f1ff7007280..dec0907dfb8a 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -864,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) kfree(rbio); } -static void free_raid_bio(struct btrfs_raid_bio *rbio) +static void rbio_endio_bio_list(struct bio *cur, blk_status_t err) { - unlock_stripe(rbio); - __free_raid_bio(rbio); + struct bio *next; + + while (cur) { + next = cur->bi_next; + cur->bi_next = NULL; + cur->bi_status = err; + bio_endio(cur); + cur = next; + } } /* @@ -877,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio) static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) { struct bio *cur = bio_list_get(&rbio->bio_list); - struct bio *next; + struct bio *extra; if (rbio->generic_bio_cnt) btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); - free_raid_bio(rbio); + /* + * At this moment, rbio->bio_list is empty, however since rbio does not + * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the + * hash list, rbio may be merged with others so that rbio->bio_list + * becomes non-empty. + * Once unlock_stripe() is done, rbio->bio_list will not be updated any + * more and we can call bio_endio() on all queued bios. + */ + unlock_stripe(rbio); + extra = bio_list_get(&rbio->bio_list); + __free_raid_bio(rbio); - while (cur) { - next = cur->bi_next; - cur->bi_next = NULL; - cur->bi_status = err; - bio_endio(cur); - cur = next; - } + rbio_endio_bio_list(cur, err); + if (extra) + rbio_endio_bio_list(extra, err); } /* -- cgit v1.2.3 From 3cbf26da5ec85f1d3177eee534e377c707ed89e1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 17 Jan 2018 12:21:49 -0800 Subject: btrfs: Remove unused readahead spinlock The reada_lock in struct btrfs_device was only initialised, and not actually used. That's good because there's another lock also called reada_lock in the btrfs_fs_info that was quite heavily used. Remove this one. Signed-off-by: Matthew Wilcox Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 1 - fs/btrfs/volumes.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 11fccf2fcb1f..5750cd9df417 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -319,7 +319,6 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->io_lock); - spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); btrfs_device_data_ordered_init(dev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 15216fed918b..28c28eeadff3 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -137,7 +137,6 @@ struct btrfs_device { struct rcu_head rcu; /* readahead state */ - spinlock_t reada_lock; atomic_t reada_in_flight; u64 reada_next; struct reada_zone *reada_curr_zone; -- cgit v1.2.3 From a520a7e0b548690ea8eb77bb45800459351a7cf5 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:08 -0700 Subject: Btrfs: fix incorrect block_len in merge_extent_mapping %block_len could be checked on deciding if two em are mergeable. merge_extent_mapping() has only added the front pad if the front part of em gets truncated, but it's possible that the end part gets truncated. For both compressed extent and inline extent, em->block_len is not adjusted accordingly, and for regular extent, em->block_len always equals to em->len, hence this sets em->block_len with em->len. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 633b83b709a1..f9fd8b14ef08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6874,7 +6874,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, if (em->block_start < EXTENT_MAP_LAST_BYTE && !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { em->block_start += start_diff; - em->block_len -= start_diff; + em->block_len = em->len; } return add_extent_mapping(em_tree, em, 0); } -- cgit v1.2.3 From 18e83ac75bfe67009c4ddcdd581bba8eb16f4030 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:09 -0700 Subject: Btrfs: fix unexpected EEXIST from btrfs_get_extent This fixes a corner case that is caused by a race of dio write vs dio read/write. Here is how the race could happen. Suppose that no extent map has been loaded into memory yet. There is a file extent [0, 32K), two jobs are running concurrently against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio read from [0, 4K) or [4K, 8K). t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K). ------------------------------------------------------ t1 t2 btrfs_get_blocks_direct() btrfs_get_blocks_direct() -> btrfs_get_extent() -> btrfs_get_extent() -> lookup_extent_mapping() -> add_extent_mapping() -> lookup_extent_mapping() # load [0, 32K) -> btrfs_new_extent_direct() -> btrfs_drop_extent_cache() # split [0, 32K) and # drop [8K, 32K) -> add_extent_mapping() # add [8K, 32K) -> add_extent_mapping() # handle -EEXIST when adding # [0, 32K) ------------------------------------------------------ About how t2(dio read/write) runs into -EEXIST: a) add_extent_mapping() gets -EEXIST for adding em [0, 32k), b) search_extent_mapping() then returns [0, 8k) as the existing em, even though start == existing->start, em is [0, 32k) so that extent_map_end(em) > extent_map_end(existing), i.e. 32k > 8k, c) then it goes thru merge_extent_mapping() which tries to add a [8k, 8k) (with a length 0) and returns -EEXIST as [8k, 32k) is already in tree, d) so btrfs_get_extent() ends up returning -EEXIST to dio read/write, which is confusing applications. Here I conclude all the possible situations, 1) start < existing->start +-----------+em+-----------+ +--prev---+ | +-------------+ | | | | | | | +---------+ + +---+existing++ ++ + | + start 2) start == existing->start +------------em------------+ | +-------------+ | | | | | + +----existing-+ + | | + start 3) start > existing->start && start < (existing->start + existing->len) +------------em------------+ | +-------------+ | | | | | + +----existing-+ + | | + start 4) start >= (existing->start + existing->len) +-----------+em+-----------+ | +-------------+ | +--next---+ | | | | | | + +---+existing++ + +---------+ + | + start As we can see, it turns out that if start is within existing em (front inclusive), then the existing em should be returned as is, otherwise, we try our best to merge candidate em with sibling ems to form a larger em (in order to reduce the total number of em). Reported-by: David Vallender Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f9fd8b14ef08..4ab713bd4139 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7153,19 +7153,12 @@ insert: * existing will always be non-NULL, since there must be * extent causing the -EEXIST. */ - if (existing->start == em->start && - extent_map_end(existing) >= extent_map_end(em) && - em->block_start == existing->block_start) { - /* - * The existing extent map already encompasses the - * entire extent map we tried to add. - */ + if (start >= existing->start && + start < extent_map_end(existing)) { free_extent_map(em); em = existing; err = 0; - - } else if (start >= extent_map_end(existing) || - start <= existing->start) { + } else { /* * The existing extent map is the one nearest to * the [start, start + len) range which overlaps @@ -7177,10 +7170,6 @@ insert: free_extent_map(em); em = NULL; } - } else { - free_extent_map(em); - em = existing; - err = 0; } } write_unlock(&em_tree->lock); -- cgit v1.2.3 From 7b4df058b051fb67db61ea371f7d278131cb6e7b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:10 -0700 Subject: Btrfs: add helper for em merge logic This is a prepare work for the following extent map selftest, which runs tests against em merge logic. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 80 ++++++++++++++++++++++++++++++++------------------------ 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1a462ab85c49..1e05fc7e0e35 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3143,6 +3143,8 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, int delay_iput); void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); +int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map **em_in, u64 start, u64 len); struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4ab713bd4139..c6a05ee3d74b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6925,6 +6925,51 @@ static noinline int uncompress_inline(struct btrfs_path *path, return ret; } +int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map **em_in, u64 start, u64 len) +{ + int ret; + struct extent_map *em = *em_in; + + ret = add_extent_mapping(em_tree, em, 0); + /* it is possible that someone inserted the extent into the tree + * while we had the lock dropped. It is also possible that + * an overlapping map exists in the tree + */ + if (ret == -EEXIST) { + struct extent_map *existing; + + ret = 0; + + existing = search_extent_mapping(em_tree, start, len); + /* + * existing will always be non-NULL, since there must be + * extent causing the -EEXIST. + */ + if (start >= existing->start && + start < extent_map_end(existing)) { + free_extent_map(em); + *em_in = existing; + ret = 0; + } else { + /* + * The existing extent map is the one nearest to + * the [start, start + len) range which overlaps + */ + ret = merge_extent_mapping(em_tree, existing, + em, start); + free_extent_map(existing); + if (ret) { + free_extent_map(em); + *em_in = NULL; + } + } + } + + ASSERT(ret == 0 || ret == -EEXIST); + return ret; +} + /* * a bit scary, this does extent mapping from logical file offset to the disk. * the ugly parts come from merging extents from the disk with the in-ram @@ -7138,40 +7183,7 @@ insert: err = 0; write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 0); - /* it is possible that someone inserted the extent into the tree - * while we had the lock dropped. It is also possible that - * an overlapping map exists in the tree - */ - if (ret == -EEXIST) { - struct extent_map *existing; - - ret = 0; - - existing = search_extent_mapping(em_tree, start, len); - /* - * existing will always be non-NULL, since there must be - * extent causing the -EEXIST. - */ - if (start >= existing->start && - start < extent_map_end(existing)) { - free_extent_map(em); - em = existing; - err = 0; - } else { - /* - * The existing extent map is the one nearest to - * the [start, start + len) range which overlaps - */ - err = merge_extent_mapping(em_tree, existing, - em, start); - free_extent_map(existing); - if (err) { - free_extent_map(em); - em = NULL; - } - } - } + err = btrfs_add_extent_mapping(em_tree, &em, start, len); write_unlock(&em_tree->lock); out: -- cgit v1.2.3 From c04e61b5e41b0e8ace4aa4b67685fbe68ac37a46 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:11 -0700 Subject: Btrfs: move extent map specific code to extent_map.c These helpers are extent map specific, move them to extent_map.c. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 - fs/btrfs/extent_map.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_map.h | 2 + fs/btrfs/inode.c | 107 ------------------------------------------ 4 files changed, 127 insertions(+), 109 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1e05fc7e0e35..1a462ab85c49 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3143,8 +3143,6 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, int delay_iput); void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); -int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, - struct extent_map **em_in, u64 start, u64 len); struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2e348fb0b280..6fe8b14e11cf 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -454,3 +454,128 @@ void replace_extent_mapping(struct extent_map_tree *tree, setup_extent_mapping(tree, new, modified); } + +static struct extent_map *next_extent_map(struct extent_map *em) +{ + struct rb_node *next; + + next = rb_next(&em->rb_node); + if (!next) + return NULL; + return container_of(next, struct extent_map, rb_node); +} + +static struct extent_map *prev_extent_map(struct extent_map *em) +{ + struct rb_node *prev; + + prev = rb_prev(&em->rb_node); + if (!prev) + return NULL; + return container_of(prev, struct extent_map, rb_node); +} + +/* helper for btfs_get_extent. Given an existing extent in the tree, + * the existing extent is the nearest extent to map_start, + * and an extent that you want to insert, deal with overlap and insert + * the best fitted new extent into the tree. + */ +static int merge_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map *existing, + struct extent_map *em, + u64 map_start) +{ + struct extent_map *prev; + struct extent_map *next; + u64 start; + u64 end; + u64 start_diff; + + BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); + + if (existing->start > map_start) { + next = existing; + prev = prev_extent_map(next); + } else { + prev = existing; + next = next_extent_map(prev); + } + + start = prev ? extent_map_end(prev) : em->start; + start = max_t(u64, start, em->start); + end = next ? next->start : extent_map_end(em); + end = min_t(u64, end, extent_map_end(em)); + start_diff = start - em->start; + em->start = start; + em->len = end - start; + if (em->block_start < EXTENT_MAP_LAST_BYTE && + !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { + em->block_start += start_diff; + em->block_len = em->len; + } + return add_extent_mapping(em_tree, em, 0); +} + +/** + * btrfs_add_extent_mapping - add extent mapping into em_tree + * @em_tree - the extent tree into which we want to insert the extent mapping + * @em_in - extent we are inserting + * @start - start of the logical range btrfs_get_extent() is requesting + * @len - length of the logical range btrfs_get_extent() is requesting + * + * Note that @em_in's range may be different from [start, start+len), + * but they must be overlapped. + * + * Insert @em_in into @em_tree. In case there is an overlapping range, handle + * the -EEXIST by either: + * a) Returning the existing extent in @em_in if @start is within the + * existing em. + * b) Merge the existing extent with @em_in passed in. + * + * Return 0 on success, otherwise -EEXIST. + * + */ +int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map **em_in, u64 start, u64 len) +{ + int ret; + struct extent_map *em = *em_in; + + ret = add_extent_mapping(em_tree, em, 0); + /* it is possible that someone inserted the extent into the tree + * while we had the lock dropped. It is also possible that + * an overlapping map exists in the tree + */ + if (ret == -EEXIST) { + struct extent_map *existing; + + ret = 0; + + existing = search_extent_mapping(em_tree, start, len); + /* + * existing will always be non-NULL, since there must be + * extent causing the -EEXIST. + */ + if (start >= existing->start && + start < extent_map_end(existing)) { + free_extent_map(em); + *em_in = existing; + ret = 0; + } else { + /* + * The existing extent map is the one nearest to + * the [start, start + len) range which overlaps + */ + ret = merge_extent_mapping(em_tree, existing, + em, start); + free_extent_map(existing); + if (ret) { + free_extent_map(em); + *em_in = NULL; + } + } + } + + ASSERT(ret == 0 || ret == -EEXIST); + return ret; +} diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index e9e285d45c7e..b29f77bc0732 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -91,4 +91,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em); struct extent_map *search_extent_mapping(struct extent_map_tree *tree, u64 start, u64 len); +int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map **em_in, u64 start, u64 len); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c6a05ee3d74b..d55aef46e8ae 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6817,68 +6817,6 @@ out_fail_inode: goto out_fail; } -/* Find next extent map of a given extent map, caller needs to ensure locks */ -static struct extent_map *next_extent_map(struct extent_map *em) -{ - struct rb_node *next; - - next = rb_next(&em->rb_node); - if (!next) - return NULL; - return container_of(next, struct extent_map, rb_node); -} - -static struct extent_map *prev_extent_map(struct extent_map *em) -{ - struct rb_node *prev; - - prev = rb_prev(&em->rb_node); - if (!prev) - return NULL; - return container_of(prev, struct extent_map, rb_node); -} - -/* helper for btfs_get_extent. Given an existing extent in the tree, - * the existing extent is the nearest extent to map_start, - * and an extent that you want to insert, deal with overlap and insert - * the best fitted new extent into the tree. - */ -static int merge_extent_mapping(struct extent_map_tree *em_tree, - struct extent_map *existing, - struct extent_map *em, - u64 map_start) -{ - struct extent_map *prev; - struct extent_map *next; - u64 start; - u64 end; - u64 start_diff; - - BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); - - if (existing->start > map_start) { - next = existing; - prev = prev_extent_map(next); - } else { - prev = existing; - next = next_extent_map(prev); - } - - start = prev ? extent_map_end(prev) : em->start; - start = max_t(u64, start, em->start); - end = next ? next->start : extent_map_end(em); - end = min_t(u64, end, extent_map_end(em)); - start_diff = start - em->start; - em->start = start; - em->len = end - start; - if (em->block_start < EXTENT_MAP_LAST_BYTE && - !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - em->block_start += start_diff; - em->block_len = em->len; - } - return add_extent_mapping(em_tree, em, 0); -} - static noinline int uncompress_inline(struct btrfs_path *path, struct page *page, size_t pg_offset, u64 extent_offset, @@ -6925,51 +6863,6 @@ static noinline int uncompress_inline(struct btrfs_path *path, return ret; } -int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, - struct extent_map **em_in, u64 start, u64 len) -{ - int ret; - struct extent_map *em = *em_in; - - ret = add_extent_mapping(em_tree, em, 0); - /* it is possible that someone inserted the extent into the tree - * while we had the lock dropped. It is also possible that - * an overlapping map exists in the tree - */ - if (ret == -EEXIST) { - struct extent_map *existing; - - ret = 0; - - existing = search_extent_mapping(em_tree, start, len); - /* - * existing will always be non-NULL, since there must be - * extent causing the -EEXIST. - */ - if (start >= existing->start && - start < extent_map_end(existing)) { - free_extent_map(em); - *em_in = existing; - ret = 0; - } else { - /* - * The existing extent map is the one nearest to - * the [start, start + len) range which overlaps - */ - ret = merge_extent_mapping(em_tree, existing, - em, start); - free_extent_map(existing); - if (ret) { - free_extent_map(em); - *em_in = NULL; - } - } - } - - ASSERT(ret == 0 || ret == -EEXIST); - return ret; -} - /* * a bit scary, this does extent mapping from logical file offset to the disk. * the ugly parts come from merging extents from the disk with the in-ram -- cgit v1.2.3 From 72b28077a20a6a1f14494602466c219241f45d89 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:12 -0700 Subject: Btrfs: add extent map selftests We've observed that btrfs_get_extent() and merge_extent_mapping() could return -EEXIST in several cases, and they are caused by some racy condition, e.g dio read vs dio write, which makes the problem very tricky to reproduce. This adds extent map selftests in order to simulate those racy situations. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik [ minor string adjustments ] Signed-off-by: David Sterba --- fs/btrfs/Makefile | 2 +- fs/btrfs/tests/btrfs-tests.c | 3 + fs/btrfs/tests/btrfs-tests.h | 1 + fs/btrfs/tests/extent-map-tests.c | 203 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/tests/extent-map-tests.c diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6fe881d5cb38..0c4373628eb4 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -19,4 +19,4 @@ btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ tests/extent-buffer-tests.o tests/btrfs-tests.o \ tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \ - tests/free-space-tree-tests.o + tests/free-space-tree-tests.o tests/extent-map-tests.o diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index d3f25376a0f8..9786d8cd0aa6 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -277,6 +277,9 @@ int btrfs_run_sanity_tests(void) goto out; } } + ret = btrfs_test_extent_map(); + if (ret) + goto out; out: btrfs_destroy_test_fs(); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index 266f1e3d1784..bc0615bac3cc 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -33,6 +33,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize); int btrfs_test_inodes(u32 sectorsize, u32 nodesize); int btrfs_test_qgroups(u32 sectorsize, u32 nodesize); int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize); +int btrfs_test_extent_map(void); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize); void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c new file mode 100644 index 000000000000..e6f0dd18392e --- /dev/null +++ b/fs/btrfs/tests/extent-map-tests.c @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2017 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "btrfs-tests.h" +#include "../ctree.h" + +static void free_extent_map_tree(struct extent_map_tree *em_tree) +{ + struct extent_map *em; + struct rb_node *node; + + while (!RB_EMPTY_ROOT(&em_tree->map)) { + node = rb_first(&em_tree->map); + em = rb_entry(node, struct extent_map, rb_node); + remove_extent_mapping(em_tree, em); + +#ifdef CONFIG_BTRFS_DEBUG + if (refcount_read(&em->refs) != 1) { + test_msg( +"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n", + em->start, em->len, em->block_start, + em->block_len, refcount_read(&em->refs)); + + refcount_set(&em->refs, 1); + } +#endif + free_extent_map(em); + } +} + +/* + * Test scenario: + * + * Suppose that no extent map has been loaded into memory yet, there is a file + * extent [0, 16K), followed by another file extent [16K, 20K), two dio reads + * are entering btrfs_get_extent() concurrently, t1 is reading [8K, 16K), t2 is + * reading [0, 8K) + * + * t1 t2 + * btrfs_get_extent() btrfs_get_extent() + * -> lookup_extent_mapping() ->lookup_extent_mapping() + * -> add_extent_mapping(0, 16K) + * -> return em + * ->add_extent_mapping(0, 16K) + * -> #handle -EEXIST + */ +static void test_case_1(struct extent_map_tree *em_tree) +{ + struct extent_map *em; + u64 start = 0; + u64 len = SZ_8K; + int ret; + + em = alloc_extent_map(); + if (!em) + /* Skip the test on error. */ + return; + + /* Add [0, 16K) */ + em->start = 0; + em->len = SZ_16K; + em->block_start = 0; + em->block_len = SZ_16K; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + /* Add [16K, 20K) following [0, 16K) */ + em = alloc_extent_map(); + if (!em) + goto out; + + em->start = SZ_16K; + em->len = SZ_4K; + em->block_start = SZ_32K; /* avoid merging */ + em->block_len = SZ_4K; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + em = alloc_extent_map(); + if (!em) + goto out; + + /* Add [0, 8K), should return [0, 16K) instead. */ + em->start = start; + em->len = len; + em->block_start = start; + em->block_len = len; + ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len); + if (ret) + test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret); + if (em && + (em->start != 0 || extent_map_end(em) != SZ_16K || + em->block_start != 0 || em->block_len != SZ_16K)) + test_msg( +"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n", + start, start + len, ret, em->start, em->len, + em->block_start, em->block_len); + free_extent_map(em); +out: + /* free memory */ + free_extent_map_tree(em_tree); +} + +/* + * Test scenario: + * + * Reading the inline ending up with EEXIST, ie. read an inline + * extent and discard page cache and read it again. + */ +static void test_case_2(struct extent_map_tree *em_tree) +{ + struct extent_map *em; + int ret; + + em = alloc_extent_map(); + if (!em) + /* Skip the test on error. */ + return; + + /* Add [0, 1K) */ + em->start = 0; + em->len = SZ_1K; + em->block_start = EXTENT_MAP_INLINE; + em->block_len = (u64)-1; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + /* Add [4K, 4K) following [0, 1K) */ + em = alloc_extent_map(); + if (!em) + goto out; + + em->start = SZ_4K; + em->len = SZ_4K; + em->block_start = SZ_4K; + em->block_len = SZ_4K; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + em = alloc_extent_map(); + if (!em) + goto out; + + /* Add [0, 1K) */ + em->start = 0; + em->len = SZ_1K; + em->block_start = EXTENT_MAP_INLINE; + em->block_len = (u64)-1; + ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len); + if (ret) + test_msg("case2 [0 1K]: ret %d\n", ret); + if (em && + (em->start != 0 || extent_map_end(em) != SZ_1K || + em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1)) + test_msg( +"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n", + ret, em->start, em->len, em->block_start, + em->block_len); + free_extent_map(em); +out: + /* free memory */ + free_extent_map_tree(em_tree); +} + +int btrfs_test_extent_map() +{ + struct extent_map_tree *em_tree; + + test_msg("Running extent_map tests\n"); + + em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL); + if (!em_tree) + /* Skip the test on error. */ + return 0; + + extent_map_tree_init(em_tree); + + test_case_1(em_tree); + test_case_2(em_tree); + + kfree(em_tree); + return 0; +} -- cgit v1.2.3 From fd87526fada701295656b3c695ae20cb037fdd95 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:13 -0700 Subject: Btrfs: extent map selftest: buffered write vs dio read This test case simulates the racy situation of buffered write vs dio read, and see if btrfs_get_extent() would return -EEXIST. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 74 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index e6f0dd18392e..16830bf88f46 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -182,6 +182,79 @@ out: free_extent_map_tree(em_tree); } +static void __test_case_3(struct extent_map_tree *em_tree, u64 start) +{ + struct extent_map *em; + u64 len = SZ_4K; + int ret; + + em = alloc_extent_map(); + if (!em) + /* Skip this test on error. */ + return; + + /* Add [4K, 8K) */ + em->start = SZ_4K; + em->len = SZ_4K; + em->block_start = SZ_4K; + em->block_len = SZ_4K; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + em = alloc_extent_map(); + if (!em) + goto out; + + /* Add [0, 16K) */ + em->start = 0; + em->len = SZ_16K; + em->block_start = 0; + em->block_len = SZ_16K; + ret = btrfs_add_extent_mapping(em_tree, &em, start, len); + if (ret) + test_msg("case3 [0x%llx 0x%llx): ret %d\n", + start, start + len, ret); + /* + * Since bytes within em are contiguous, em->block_start is identical to + * em->start. + */ + if (em && + (start < em->start || start + len > extent_map_end(em) || + em->start != em->block_start || em->len != em->block_len)) + test_msg( +"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n", + start, start + len, ret, em->start, em->len, + em->block_start, em->block_len); + free_extent_map(em); +out: + /* free memory */ + free_extent_map_tree(em_tree); +} + +/* + * Test scenario: + * + * Suppose that no extent map has been loaded into memory yet. + * There is a file extent [0, 16K), two jobs are running concurrently + * against it, t1 is buffered writing to [4K, 8K) and t2 is doing dio + * read from [0, 4K) or [8K, 12K) or [12K, 16K). + * + * t1 goes ahead of t2 and adds em [4K, 8K) into tree. + * + * t1 t2 + * cow_file_range() btrfs_get_extent() + * -> lookup_extent_mapping() + * -> add_extent_mapping() + * -> add_extent_mapping() + */ +static void test_case_3(struct extent_map_tree *em_tree) +{ + __test_case_3(em_tree, 0); + __test_case_3(em_tree, SZ_8K); + __test_case_3(em_tree, (12 * 1024ULL)); +} + int btrfs_test_extent_map() { struct extent_map_tree *em_tree; @@ -197,6 +270,7 @@ int btrfs_test_extent_map() test_case_1(em_tree); test_case_2(em_tree); + test_case_3(em_tree); kfree(em_tree); return 0; -- cgit v1.2.3 From cd77f4f8363602e5fbee481f38241110e65ff014 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:14 -0700 Subject: Btrfs: extent map selftest: dio write vs dio read This test case simulates the racy situation of dio write vs dio read, and see if btrfs_get_extent() would return -EEXIST. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/tests/extent-map-tests.c | 89 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 16830bf88f46..70c993f01670 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -255,6 +255,94 @@ static void test_case_3(struct extent_map_tree *em_tree) __test_case_3(em_tree, (12 * 1024ULL)); } +static void __test_case_4(struct extent_map_tree *em_tree, u64 start) +{ + struct extent_map *em; + u64 len = SZ_4K; + int ret; + + em = alloc_extent_map(); + if (!em) + /* Skip this test on error. */ + return; + + /* Add [0K, 8K) */ + em->start = 0; + em->len = SZ_8K; + em->block_start = 0; + em->block_len = SZ_8K; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + em = alloc_extent_map(); + if (!em) + goto out; + + /* Add [8K, 24K) */ + em->start = SZ_8K; + em->len = 24 * 1024ULL; + em->block_start = SZ_16K; /* avoid merging */ + em->block_len = 24 * 1024ULL; + ret = add_extent_mapping(em_tree, em, 0); + ASSERT(ret == 0); + free_extent_map(em); + + em = alloc_extent_map(); + if (!em) + goto out; + /* Add [0K, 32K) */ + em->start = 0; + em->len = SZ_32K; + em->block_start = 0; + em->block_len = SZ_32K; + ret = btrfs_add_extent_mapping(em_tree, &em, start, len); + if (ret) + test_msg("case4 [0x%llx 0x%llx): ret %d\n", + start, len, ret); + if (em && + (start < em->start || start + len > extent_map_end(em))) + test_msg( +"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n", + start, len, ret, em->start, em->len, em->block_start, + em->block_len); + free_extent_map(em); +out: + /* free memory */ + free_extent_map_tree(em_tree); +} + +/* + * Test scenario: + * + * Suppose that no extent map has been loaded into memory yet. + * There is a file extent [0, 32K), two jobs are running concurrently + * against it, t1 is doing dio write to [8K, 32K) and t2 is doing dio + * read from [0, 4K) or [4K, 8K). + * + * t1 goes ahead of t2 and splits em [0, 32K) to em [0K, 8K) and [8K 32K). + * + * t1 t2 + * btrfs_get_blocks_direct() btrfs_get_blocks_direct() + * -> btrfs_get_extent() -> btrfs_get_extent() + * -> lookup_extent_mapping() + * -> add_extent_mapping() -> lookup_extent_mapping() + * # load [0, 32K) + * -> btrfs_new_extent_direct() + * -> btrfs_drop_extent_cache() + * # split [0, 32K) + * -> add_extent_mapping() + * # add [8K, 32K) + * -> add_extent_mapping() + * # handle -EEXIST when adding + * # [0, 32K) + */ +static void test_case_4(struct extent_map_tree *em_tree) +{ + __test_case_4(em_tree, 0); + __test_case_4(em_tree, SZ_4K); +} + int btrfs_test_extent_map() { struct extent_map_tree *em_tree; @@ -271,6 +359,7 @@ int btrfs_test_extent_map() test_case_1(em_tree); test_case_2(em_tree); test_case_3(em_tree); + test_case_4(em_tree); kfree(em_tree); return 0; -- cgit v1.2.3 From 9a7e10e7ba66ce23c8fdc1cac18cade7a0f6840d Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:15 -0700 Subject: Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping This is a subtle case, so in order to understand the problem, it'd be good to know the content of existing and em when any error occurs. Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 6fe8b14e11cf..914662428dbd 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -562,17 +562,24 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree, *em_in = existing; ret = 0; } else { + u64 orig_start = em->start; + u64 orig_len = em->len; + /* * The existing extent map is the one nearest to * the [start, start + len) range which overlaps */ ret = merge_extent_mapping(em_tree, existing, em, start); - free_extent_map(existing); if (ret) { free_extent_map(em); *em_in = NULL; + WARN_ONCE(ret, +"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n", + ret, existing->start, existing->len, + orig_start, orig_len); } + free_extent_map(existing); } } -- cgit v1.2.3 From 5f4791f4a6479cdeb8caae1a3e5895c8a6e99c09 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 5 Jan 2018 12:51:17 -0700 Subject: Btrfs: noinline merge_extent_mapping In order to debug subtle bugs around merge_extent_mapping(), perf probe can be used to check the arguments, but sometimes merge_extent_mapping() got inlined by compiler and couldn't be probed. This is adding noinline attribute to merge_extent_mapping(). Signed-off-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 914662428dbd..d3bd02105d1c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -480,10 +480,10 @@ static struct extent_map *prev_extent_map(struct extent_map *em) * and an extent that you want to insert, deal with overlap and insert * the best fitted new extent into the tree. */ -static int merge_extent_mapping(struct extent_map_tree *em_tree, - struct extent_map *existing, - struct extent_map *em, - u64 map_start) +static noinline int merge_extent_mapping(struct extent_map_tree *em_tree, + struct extent_map *existing, + struct extent_map *em, + u64 map_start) { struct extent_map *prev; struct extent_map *next; -- cgit v1.2.3 From b03ebd992ff329889d2e1f342cb75ee3ebc21adc Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 18 Jan 2018 14:47:06 +0200 Subject: btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it No functional changes, just makes the code more readable Signed-off-by: Nikolay Borisov Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d55aef46e8ae..c5f31817778b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4769,8 +4769,8 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, u64 block_start; u64 block_end; - if ((offset & (blocksize - 1)) == 0 && - (!len || ((len & (blocksize - 1)) == 0))) + if (IS_ALIGNED(offset, blocksize) && + (!len || IS_ALIGNED(len, blocksize))) goto out; block_start = round_down(from, blocksize); -- cgit v1.2.3 From a848b3e547d167692483ff08622bb56dd3e4a9d6 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:00:33 +0800 Subject: btrfs: no need to check for btrfs_fs_devices::seeding There is no need to check for btrfs_fs_devices::seeding when we have checked for btrfs_fs_devices::opened, because we can't sprout without its seed FS being opened. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5750cd9df417..011bb9b116e6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -618,8 +618,6 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) if (fs_devs->opened) continue; - if (fs_devs->seeding) - continue; list_for_each_entry(dev, &fs_devs->devices, dev_list) { -- cgit v1.2.3 From 38cf665d338fca33af4b16f9ec7cad6637fc0fec Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:00:34 +0800 Subject: btrfs: make btrfs_free_stale_device() to iterate all stales Let the list iterator iterate further and find other stale devices and delete it. This is in preparation to add support for user land request-able stale devices cleanup. Also rename btrfs_free_stale_device() to btrfs_free_stale_devices(). Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 011bb9b116e6..207014fce7ea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -605,21 +605,22 @@ static void pending_bios_fn(struct btrfs_work *work) } -static void btrfs_free_stale_device(struct btrfs_device *cur_dev) +static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) { - struct btrfs_fs_devices *fs_devs; - struct btrfs_device *dev; + struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; + struct btrfs_device *dev, *tmp_dev; if (!cur_dev->name) return; - list_for_each_entry(fs_devs, &fs_uuids, list) { - int del = 1; + list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) { if (fs_devs->opened) continue; - list_for_each_entry(dev, &fs_devs->devices, dev_list) { + list_for_each_entry_safe(dev, tmp_dev, + &fs_devs->devices, dev_list) { + int not_found; if (dev == cur_dev) continue; @@ -633,14 +634,12 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) * either use mapper or non mapper path throughout. */ rcu_read_lock(); - del = strcmp(rcu_str_deref(dev->name), + not_found = strcmp(rcu_str_deref(dev->name), rcu_str_deref(cur_dev->name)); rcu_read_unlock(); - if (!del) - break; - } + if (not_found) + continue; - if (!del) { /* delete the stale device */ if (fs_devs->num_devices == 1) { btrfs_sysfs_remove_fsid(fs_devs); @@ -651,7 +650,6 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev) list_del(&dev->dev_list); free_device(dev); } - break; } } } @@ -779,7 +777,7 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; - btrfs_free_stale_device(device); + btrfs_free_stale_devices(device); } else if (!device->name || strcmp(device->name->str, path)) { /* * When FS is already mounted. -- cgit v1.2.3 From 522f1b45e41d893e4d1debd7f80c1e25e060c137 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:00:35 +0800 Subject: btrfs: make btrfs_free_stale_devices() argument optional This updates btrfs_free_stale_devices() helper function to delete all unmouted devices, when arg is NULL. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 207014fce7ea..9712377862d9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -610,9 +610,6 @@ static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; struct btrfs_device *dev, *tmp_dev; - if (!cur_dev->name) - return; - list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) { if (fs_devs->opened) @@ -620,11 +617,9 @@ static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) list_for_each_entry_safe(dev, tmp_dev, &fs_devs->devices, dev_list) { - int not_found; + int not_found = 0; - if (dev == cur_dev) - continue; - if (!dev->name) + if (cur_dev && (cur_dev == dev || !dev->name)) continue; /* @@ -634,8 +629,9 @@ static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) * either use mapper or non mapper path throughout. */ rcu_read_lock(); - not_found = strcmp(rcu_str_deref(dev->name), - rcu_str_deref(cur_dev->name)); + if (cur_dev) + not_found = strcmp(rcu_str_deref(dev->name), + rcu_str_deref(cur_dev->name)); rcu_read_unlock(); if (not_found) continue; -- cgit v1.2.3 From 0d34097f66831843a0e27f65fed23c41d07d44aa Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:00:36 +0800 Subject: btrfs: rename btrfs_free_stale_devices() arg to skip_dev No functional changes. Rename btrfs_free_stale_devices() arg to skip_dev, so that it reflects what that arg for. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9712377862d9..7d33bd2a0fb4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -605,7 +605,7 @@ static void pending_bios_fn(struct btrfs_work *work) } -static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) +static void btrfs_free_stale_devices(struct btrfs_device *skip_dev) { struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; struct btrfs_device *dev, *tmp_dev; @@ -619,7 +619,7 @@ static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) &fs_devs->devices, dev_list) { int not_found = 0; - if (cur_dev && (cur_dev == dev || !dev->name)) + if (skip_dev && (skip_dev == dev || !dev->name)) continue; /* @@ -629,9 +629,9 @@ static void btrfs_free_stale_devices(struct btrfs_device *cur_dev) * either use mapper or non mapper path throughout. */ rcu_read_lock(); - if (cur_dev) + if (skip_dev) not_found = strcmp(rcu_str_deref(dev->name), - rcu_str_deref(cur_dev->name)); + rcu_str_deref(skip_dev->name)); rcu_read_unlock(); if (not_found) continue; -- cgit v1.2.3 From d8367db30a101123f82a9408c8c6dd3fb287653f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:00:37 +0800 Subject: btrfs: make btrfs_free_stale_devices() to match the path The btrfs_free_stale_devices() is updated to match for the given device path and delete it. (It searches for only unmounted list of devices.) Also drop the comment about different path being used for the same device, since now we will have cli to clean any device that's not a concern any more. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7d33bd2a0fb4..fe5ecc1d84ad 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -604,8 +604,16 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } - -static void btrfs_free_stale_devices(struct btrfs_device *skip_dev) +/* + * Search and remove all stale (devices which are not mounted) devices. + * When both inputs are NULL, it will search and release all stale devices. + * path: Optional. When provided will it release all unmounted devices + * matching this path only. + * skip_dev: Optional. Will skip this device when searching for the stale + * devices. + */ +static void btrfs_free_stale_devices(const char *path, + struct btrfs_device *skip_dev) { struct btrfs_fs_devices *fs_devs, *tmp_fs_devs; struct btrfs_device *dev, *tmp_dev; @@ -619,19 +627,15 @@ static void btrfs_free_stale_devices(struct btrfs_device *skip_dev) &fs_devs->devices, dev_list) { int not_found = 0; - if (skip_dev && (skip_dev == dev || !dev->name)) + if (skip_dev && skip_dev == dev) + continue; + if (path && !dev->name) continue; - /* - * Todo: This won't be enough. What if the same device - * comes back (with new uuid and) with its mapper path? - * But for now, this does help as mostly an admin will - * either use mapper or non mapper path throughout. - */ rcu_read_lock(); - if (skip_dev) + if (path) not_found = strcmp(rcu_str_deref(dev->name), - rcu_str_deref(skip_dev->name)); + path); rcu_read_unlock(); if (not_found) continue; @@ -773,7 +777,7 @@ static noinline int device_list_add(const char *path, ret = 1; device->fs_devices = fs_devices; - btrfs_free_stale_devices(device); + btrfs_free_stale_devices(path, device); } else if (!device->name || strcmp(device->name->str, path)) { /* * When FS is already mounted. -- cgit v1.2.3 From 327f18cc7f44bc09170200514f42e61bce25d022 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:02:33 +0800 Subject: btrfs: move pr_info into device_list_add Commit 60999ca4b403 ("btrfs: make device scan less noisy") adds return value 1 to device_list_add(), so that parent function can call pr_info only when new device is added. Move the pr_info() part into device_list_add() so that this function can be kept simple. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fe5ecc1d84ad..46228e9312b2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -724,8 +724,7 @@ error_brelse: * Add new device to list of registered devices * * Returns: - * 1 - first time device is seen - * 0 - device already known + * 0 - device already known or newly added * < 0 - error */ static noinline int device_list_add(const char *path, @@ -735,7 +734,6 @@ static noinline int device_list_add(const char *path, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; struct rcu_string *name; - int ret = 0; u64 found_transid = btrfs_super_generation(disk_super); fs_devices = find_fsid(disk_super->fsid); @@ -775,9 +773,16 @@ static noinline int device_list_add(const char *path, fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); - ret = 1; device->fs_devices = fs_devices; btrfs_free_stale_devices(path, device); + + if (disk_super->label[0]) + pr_info("BTRFS: device label %s devid %llu transid %llu %s\n", + disk_super->label, devid, found_transid, path); + else + pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n", + disk_super->fsid, devid, found_transid, path); + } else if (!device->name || strcmp(device->name->str, path)) { /* * When FS is already mounted. @@ -838,7 +843,7 @@ static noinline int device_list_add(const char *path, *fs_devices_ret = fs_devices; - return ret; + return 0; } static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) @@ -1177,7 +1182,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct page *page; int ret; u64 devid; - u64 transid; u64 total_devices; u64 bytenr; @@ -1203,19 +1207,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, } devid = btrfs_stack_device_id(&disk_super->dev_item); - transid = btrfs_super_generation(disk_super); total_devices = btrfs_super_num_devices(disk_super); ret = device_list_add(path, disk_super, devid, fs_devices_ret); - if (ret > 0) { - if (disk_super->label[0]) - pr_info("BTRFS: device label %s ", disk_super->label); - else - pr_info("BTRFS: device fsid %pU ", disk_super->fsid); - - pr_cont("devid %llu transid %llu %s\n", devid, transid, path); - ret = 0; - } if (!ret && fs_devices_ret) (*fs_devices_ret)->total_devices = total_devices; -- cgit v1.2.3 From f2788d2f76cafef160e959999624c6a87e71d26d Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:02:34 +0800 Subject: btrfs: set the total_devices in device_list_add() There is no other parent for device_list_add() except for btrfs_scan_one_device(), which would set btrfs_fs_devices::total_devices if device_list_add is successful and this can be done with in device_list_add() itself. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46228e9312b2..4224a735493b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -841,6 +841,8 @@ static noinline int device_list_add(const char *path, if (!fs_devices->opened) device->generation = found_transid; + fs_devices->total_devices = btrfs_super_num_devices(disk_super); + *fs_devices_ret = fs_devices; return 0; @@ -1182,7 +1184,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct page *page; int ret; u64 devid; - u64 total_devices; u64 bytenr; /* @@ -1207,11 +1208,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, } devid = btrfs_stack_device_id(&disk_super->dev_item); - total_devices = btrfs_super_num_devices(disk_super); ret = device_list_add(path, disk_super, devid, fs_devices_ret); - if (!ret && fs_devices_ret) - (*fs_devices_ret)->total_devices = total_devices; btrfs_release_disk_super(page); -- cgit v1.2.3 From f30fefd8949290f505b3af2ba3c7c4c831f199b3 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Mon, 22 Jan 2018 11:23:27 -0800 Subject: Input: stmfts,s6sy671 - add SPDX identifier Replace the original license statement with the SPDX identifier. Update also the copyright owner adding myself as co-owner of the copyright. Signed-off-by: Andi Shyti Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/s6sy761.c | 15 +++++---------- drivers/input/touchscreen/stmfts.c | 15 +++++---------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/drivers/input/touchscreen/s6sy761.c b/drivers/input/touchscreen/s6sy761.c index 26b1cb8a88ec..675efa93d444 100644 --- a/drivers/input/touchscreen/s6sy761.c +++ b/drivers/input/touchscreen/s6sy761.c @@ -1,13 +1,8 @@ -/* - * Copyright (c) 2017 Samsung Electronics Co., Ltd. - * Author: Andi Shyti - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Samsung S6SY761 Touchscreen device driver - */ +// SPDX-License-Identifier: GPL-2.0 +// Samsung S6SY761 Touchscreen device driver +// +// Copyright (c) 2017 Samsung Electronics Co., Ltd. +// Copyright (c) 2017 Andi Shyti #include #include diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index c12d01899939..2a123e20a42e 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -1,13 +1,8 @@ -/* - * Copyright (c) 2017 Samsung Electronics Co., Ltd. - * Author: Andi Shyti - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * STMicroelectronics FTS Touchscreen device driver - */ +// SPDX-License-Identifier: GPL-2.0 +// STMicroelectronics FTS Touchscreen device driver +// +// Copyright (c) 2017 Samsung Electronics Co., Ltd. +// Copyright (c) 2017 Andi Shyti #include #include -- cgit v1.2.3 From e5c9c6a885fad00aa559b49d8fc23a60e290824e Mon Sep 17 00:00:00 2001 From: Mark Furneaux Date: Mon, 22 Jan 2018 11:24:17 -0800 Subject: Input: xpad - add support for PDP Xbox One controllers Adds support for the current lineup of Xbox One controllers from PDP (Performance Designed Products). These controllers are very picky with their initialization sequence and require an additional 2 packets before they send any input reports. Signed-off-by: Mark Furneaux Reviewed-by: Cameron Gutman Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index d86e59515b9c..d88d3e0f59fb 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -229,6 +229,7 @@ static const struct xpad_device { { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, + { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, @@ -475,6 +476,22 @@ static const u8 xboxone_hori_init[] = { 0x00, 0x00, 0x00, 0x80, 0x00 }; +/* + * This packet is required for some of the PDP pads to start + * sending input reports. One of those pads is (0x0e6f:0x02ab). + */ +static const u8 xboxone_pdp_init1[] = { + 0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14 +}; + +/* + * This packet is required for some of the PDP pads to start + * sending input reports. One of those pads is (0x0e6f:0x02ab). + */ +static const u8 xboxone_pdp_init2[] = { + 0x06, 0x20, 0x00, 0x02, 0x01, 0x00 +}; + /* * A specific rumble packet is required for some PowerA pads to start * sending input reports. One of those pads is (0x24c6:0x543a). @@ -505,6 +522,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_init), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_init), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init), + XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1), + XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), -- cgit v1.2.3 From 7222708e823afc98dfb769d36bf2f26b6420b7ce Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 22 Jan 2018 12:22:30 +0300 Subject: mm, page_vma_mapped: Introduce pfn_in_hpage() The new helper would check if the pfn belongs to the page. For huge pages it checks if the PFN is within range covered by the huge page. The helper is used in check_pte(). The original code the helper replaces had two call to page_to_pfn(). page_to_pfn() is relatively costly. Although current GCC is able to optimize code to have one call, it's better to do this explicitly. Signed-off-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/page_vma_mapped.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 956015614395..ae3c2a35d61b 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -30,6 +30,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) return true; } +static inline bool pfn_in_hpage(struct page *hpage, unsigned long pfn) +{ + unsigned long hpage_pfn = page_to_pfn(hpage); + + /* THP can be referenced by any subpage */ + return pfn >= hpage_pfn && pfn - hpage_pfn < hpage_nr_pages(hpage); +} + /** * check_pte - check if @pvmw->page is mapped at the @pvmw->pte * @@ -78,14 +86,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) pfn = pte_pfn(*pvmw->pte); } - if (pfn < page_to_pfn(pvmw->page)) - return false; - - /* THP can be referenced by any subpage */ - if (pfn - page_to_pfn(pvmw->page) >= hpage_nr_pages(pvmw->page)) - return false; - - return true; + return pfn_in_hpage(pvmw->page, pfn); } /** -- cgit v1.2.3 From d728f13102cd4b2afdbb183babde2b43ac2c3130 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 17 Jan 2018 12:13:33 +0530 Subject: cxgb4: set filter type to 1 for ETH_P_IPV6 For ethtype_key = ETH_P_IPV6, set filter type as 1 in cxgb4_tc_flower code when processing flow match parameters. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index d4a548a6a55c..276edcbb3259 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -111,6 +111,9 @@ static void cxgb4_process_flow_match(struct net_device *dev, ethtype_mask = 0; } + if (ethtype_key == ETH_P_IPV6) + fs->type = 1; + fs->val.ethtype = ethtype_key; fs->mask.ethtype = ethtype_mask; fs->val.proto = key->ip_proto; -- cgit v1.2.3 From 100d39af5048aecb00522a128ec264781ba7ecfa Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Wed, 17 Jan 2018 12:13:34 +0530 Subject: cxgb4: fix endianness for vlan value in cxgb4_tc_flower Don't change endianness when assigning vlan value in cxgb4_tc_flower code when processing flow match parameters. The value gets converted to network order as part of filtering code in set_filter_wr. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 276edcbb3259..a452d5a1b0f3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -208,8 +208,8 @@ static void cxgb4_process_flow_match(struct net_device *dev, VLAN_PRIO_SHIFT); vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << VLAN_PRIO_SHIFT); - fs->val.ivlan = cpu_to_be16(vlan_tci); - fs->mask.ivlan = cpu_to_be16(vlan_tci_mask); + fs->val.ivlan = vlan_tci; + fs->mask.ivlan = vlan_tci_mask; /* Chelsio adapters use ivlan_vld bit to match vlan packets * as 802.1Q. Also, when vlan tag is present in packets, -- cgit v1.2.3 From ab18a9c9ca27f8208c4960a1b25e8475b9f9dda5 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 17 Jan 2018 16:33:54 +0100 Subject: usbnet: silence an unnecessary warning That a kevent could not be scheduled is not an error. Such handlers must be able to deal with multiple events anyway. As the successful scheduling of a work is a debug event, make the failure debug priority, too. V2: coding style Signed-off-by: Oliver Neukum Reported-by: Cristian Caravena Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index d56fe32bf48d..8a22ff67b026 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -457,12 +457,10 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb, void usbnet_defer_kevent (struct usbnet *dev, int work) { set_bit (work, &dev->flags); - if (!schedule_work (&dev->kevent)) { - if (net_ratelimit()) - netdev_err(dev->net, "kevent %d may have been dropped\n", work); - } else { + if (!schedule_work (&dev->kevent)) + netdev_dbg(dev->net, "kevent %d may have been dropped\n", work); + else netdev_dbg(dev->net, "kevent %d scheduled\n", work); - } } EXPORT_SYMBOL_GPL(usbnet_defer_kevent); -- cgit v1.2.3 From b589513e6354a5fd6934823b7fd66bffad41137a Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 18 Jan 2018 13:11:07 -0800 Subject: rds: tcp: compute m_ack_seq as offset from ->write_seq rds-tcp uses m_ack_seq to track the tcp ack# that indicates that the peer has received a rds_message. The m_ack_seq is used in rds_tcp_is_acked() to figure out when it is safe to drop the rds_message from the RDS retransmit queue. The m_ack_seq must be calculated as an offset from the right edge of the in-flight tcp buffer, i.e., it should be based on the ->write_seq, not the ->snd_nxt. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 5 +++-- net/rds/tcp.h | 2 +- net/rds/tcp_send.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 6b7ee71f40c6..ab7356e0ba83 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock) sizeof(val)); } -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { - return tcp_sk(tc->t_sock->sk)->snd_nxt; + /* seq# of the last byte of data in tcp send buffer */ + return tcp_sk(tc->t_sock->sk)->write_seq; } u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 1aafbf7c3011..864ca7d8f019 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -54,7 +54,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc); u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index dc860d1bb608..9b76e0fa1722 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, * m_ack_seq is set to the sequence number of the last byte of * header and data. see rds_tcp_is_acked(). */ - tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); + tc->t_last_sent_nxt = rds_tcp_write_seq(tc); rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; @@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", - rm, rds_tcp_snd_nxt(tc), + rm, rds_tcp_write_seq(tc), (unsigned long long)rm->m_ack_seq); } -- cgit v1.2.3 From 896d86959fee58113fc510c70cd8d10e82aa3e6a Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 18 Jan 2018 16:26:31 -0600 Subject: ibmvnic: Modify buffer size and number of queues on failover Using newer backing devices can cause the required padding at the end of buffer as well as the number of queues to change after a failover. Since we currently assume that these values never change, after a failover to a backing device with different capabilities, we can get errors from the vnic server, attempt to free long term buffers that are no longer there, or not free long term buffers that should be freed. This patch resolves the issue by checking whether any of these values change, and if so perform the necessary re-allocations. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 45 ++++++++++++++++++++++++++++++++------ drivers/net/ethernet/ibm/ibmvnic.h | 2 ++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index ab2e1917cd04..7f75d01432ef 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -410,6 +410,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) struct ibmvnic_rx_pool *rx_pool; int rx_scrqs; int i, j, rc; + u64 *size_array; + + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); for (i = 0; i < rx_scrqs; i++) { @@ -417,7 +421,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i); - rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff); + if (rx_pool->buff_size != be64_to_cpu(size_array[i])) { + free_long_term_buff(adapter, &rx_pool->long_term_buff); + rx_pool->buff_size = be64_to_cpu(size_array[i]); + alloc_long_term_buff(adapter, &rx_pool->long_term_buff, + rx_pool->size * + rx_pool->buff_size); + } else { + rc = reset_long_term_buff(adapter, + &rx_pool->long_term_buff); + } + if (rc) return rc; @@ -439,14 +453,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) static void release_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; - int rx_scrqs; int i, j; if (!adapter->rx_pool) return; - rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); - for (i = 0; i < rx_scrqs; i++) { + for (i = 0; i < adapter->num_active_rx_pools; i++) { rx_pool = &adapter->rx_pool[i]; netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); @@ -469,6 +481,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->rx_pool); adapter->rx_pool = NULL; + adapter->num_active_rx_pools = 0; } static int init_rx_pools(struct net_device *netdev) @@ -493,6 +506,8 @@ static int init_rx_pools(struct net_device *netdev) return -1; } + adapter->num_active_rx_pools = 0; + for (i = 0; i < rxadd_subcrqs; i++) { rx_pool = &adapter->rx_pool[i]; @@ -536,6 +551,8 @@ static int init_rx_pools(struct net_device *netdev) rx_pool->next_free = 0; } + adapter->num_active_rx_pools = rxadd_subcrqs; + return 0; } @@ -586,13 +603,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter) static void release_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; - int i, tx_scrqs; + int i; if (!adapter->tx_pool) return; - tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); - for (i = 0; i < tx_scrqs; i++) { + for (i = 0; i < adapter->num_active_tx_pools; i++) { netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i); tx_pool = &adapter->tx_pool[i]; kfree(tx_pool->tx_buff); @@ -603,6 +619,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) kfree(adapter->tx_pool); adapter->tx_pool = NULL; + adapter->num_active_tx_pools = 0; } static int init_tx_pools(struct net_device *netdev) @@ -619,6 +636,8 @@ static int init_tx_pools(struct net_device *netdev) if (!adapter->tx_pool) return -1; + adapter->num_active_tx_pools = 0; + for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; @@ -666,6 +685,8 @@ static int init_tx_pools(struct net_device *netdev) tx_pool->producer_index = 0; } + adapter->num_active_tx_pools = tx_subcrqs; + return 0; } @@ -1548,6 +1569,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) static int do_reset(struct ibmvnic_adapter *adapter, struct ibmvnic_rwi *rwi, u32 reset_state) { + u64 old_num_rx_queues, old_num_tx_queues; struct net_device *netdev = adapter->netdev; int i, rc; @@ -1557,6 +1579,9 @@ static int do_reset(struct ibmvnic_adapter *adapter, netif_carrier_off(netdev); adapter->reset_reason = rwi->reset_reason; + old_num_rx_queues = adapter->req_rx_queues; + old_num_tx_queues = adapter->req_tx_queues; + if (rwi->reset_reason == VNIC_RESET_MOBILITY) { rc = ibmvnic_reenable_crq_queue(adapter); if (rc) @@ -1601,6 +1626,12 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = init_resources(adapter); if (rc) return rc; + } else if (adapter->req_rx_queues != old_num_rx_queues || + adapter->req_tx_queues != old_num_tx_queues) { + release_rx_pools(adapter); + release_tx_pools(adapter); + init_rx_pools(netdev); + init_tx_pools(netdev); } else { rc = reset_tx_pools(adapter); if (rc) diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4487f1e2c266..3aec42118db2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1091,6 +1091,8 @@ struct ibmvnic_adapter { u64 opt_rxba_entries_per_subcrq; __be64 tx_rx_desc_req; u8 map_id; + u64 num_active_rx_pools; + u64 num_active_tx_pools; struct tasklet_struct tasklet; enum vnic_state state; -- cgit v1.2.3 From e791380340685698dbdd38c7e3f3fcbf70a3c832 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 18 Jan 2018 16:27:12 -0600 Subject: ibmvnic: Revert to previous mtu when unsupported value requested If we request an unsupported mtu value, the vnic server will suggest a different value. Currently we take the suggested value without question and login with that value. However, the behavior doesn't seem completely sane as attempting to change the mtu to some specific value will change the mtu to some completely different value most of the time. This patch fixes the issue by logging in with the previously used mtu value and printing an error message saying that the given mtu is unsupported. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 7f75d01432ef..cc36b254917c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3623,7 +3623,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, *req_value, (long int)be64_to_cpu(crq->request_capability_rsp. number), name); - *req_value = be64_to_cpu(crq->request_capability_rsp.number); + + if (be16_to_cpu(crq->request_capability_rsp.capability) == + REQ_MTU) { + pr_err("mtu of %llu is not supported. Reverting.\n", + *req_value); + *req_value = adapter->fallback.mtu; + } else { + *req_value = + be64_to_cpu(crq->request_capability_rsp.number); + } + ibmvnic_send_req_caps(adapter, 1); return; default: -- cgit v1.2.3 From 69d08dcbbe34347cbc044629cf6f25d062593dfe Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 18 Jan 2018 16:27:58 -0600 Subject: ibmvnic: Allocate and request vpd in init_resources In reset events in which our memory allocations need to be reallocated, VPD data is being freed, but never reallocated. This can cause issues if we later attempt to access that memory or reset and attempt to free the memory. This patch moves the allocation of the VPD data to init_resources so that it will be symmetrically freed during release resources. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cc36b254917c..b65f5f3ac034 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -881,7 +881,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) if (adapter->vpd->buff) len = adapter->vpd->len; - reinit_completion(&adapter->fw_done); + init_completion(&adapter->fw_done); crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; crq.get_vpd_size.cmd = GET_VPD_SIZE; ibmvnic_send_crq(adapter, &crq); @@ -943,6 +943,13 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (!adapter->vpd) return -ENOMEM; + /* Vital Product Data (VPD) */ + rc = ibmvnic_get_vpd(adapter); + if (rc) { + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + return rc; + } + adapter->map_id = 1; adapter->napi = kcalloc(adapter->req_rx_queues, sizeof(struct napi_struct), GFP_KERNEL); @@ -1016,7 +1023,7 @@ static int __ibmvnic_open(struct net_device *netdev) static int ibmvnic_open(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int rc, vpd; + int rc; mutex_lock(&adapter->reset_lock); @@ -1039,11 +1046,6 @@ static int ibmvnic_open(struct net_device *netdev) rc = __ibmvnic_open(netdev); netif_carrier_on(netdev); - /* Vital Product Data (VPD) */ - vpd = ibmvnic_get_vpd(adapter); - if (vpd) - netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); - mutex_unlock(&adapter->reset_lock); return rc; -- cgit v1.2.3 From 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jan 2018 19:59:19 -0800 Subject: net: qdisc_pkt_len_init() should be more robust Without proper validation of DODGY packets, we might very well feed qdisc_pkt_len_init() with invalid GSO packets. tcp_hdrlen() might access out-of-bound data, so let's use skb_header_pointer() and proper checks. Whole story is described in commit d0c081b49137 ("flow_dissector: properly cap thoff field") We have the goal of validating DODGY packets earlier in the stack, so we might very well revert this fix in the future. Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Jason Wang Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com Acked-by: Jason Wang Signed-off-by: David S. Miller --- net/core/dev.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 0e0ba36eeac9..613fb4066be7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3151,10 +3151,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, -- cgit v1.2.3 From 121d57af308d0cf943f08f4738d24d3966c38cd9 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 19 Jan 2018 09:29:18 -0500 Subject: gso: validate gso_type in GSO handlers Validate gso_type during segmentation as SKB_GSO_DODGY sources may pass packets where the gso_type does not match the contents. Syzkaller was able to enter the SCTP gso handler with a packet of gso_type SKB_GSO_TCPV4. On entry of transport layer gso handlers, verify that the gso_type matches the transport protocol. Fixes: 90017accff61 ("sctp: Add GSO support") Link: http://lkml.kernel.org/r/<001a1137452496ffc305617e5fe0@google.com> Reported-by: syzbot+fee64147a25aecd48055@syzkaller.appspotmail.com Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/esp4_offload.c | 3 +++ net/ipv4/tcp_offload.c | 3 +++ net/ipv4/udp_offload.c | 3 +++ net/ipv6/esp6_offload.c | 3 +++ net/ipv6/tcpv6_offload.c | 3 +++ net/ipv6/udp_offload.c | 3 +++ net/sctp/offload.c | 3 +++ 7 files changed, 21 insertions(+) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index b1338e576d00..29b333a62ab0 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -122,6 +122,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!xo) goto out; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + goto out; + seq = xo->seq.low; x = skb->sp->xvec[skb->sp->len - 1]; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b6a2aa1dcf56..4d58e2ce0b5b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..ea6e6e7df0ee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index dd9627490c7c..f52c314d4c97 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -149,6 +149,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!xo) goto out; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + goto out; + seq = xo->seq.low; x = skb->sp->xvec[skb->sp->len - 1]; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a0f89ad76f9d..2a04dc9c781b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 275925b93b29..35bc7106d182 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; -- cgit v1.2.3 From ad23b750933ea7bf962678972a286c78a8fa36aa Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 19 Jan 2018 11:50:46 +0100 Subject: net: igmp: fix source address check for IGMPv3 reports Commit "net: igmp: Use correct source address on IGMPv3 reports" introduced a check to validate the source address of locally generated IGMPv3 packets. Instead of checking the local interface address directly, it uses inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the local subnet (or equal to the point-to-point address if used). This breaks for point-to-point interfaces, so check against ifa->ifa_local directly. Cc: Kevin Cernekee Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") Reported-by: Sebastian Gottschall Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 726f6b608274..2d49717a7421 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); -- cgit v1.2.3 From 52acf06451930eb4cefabd5ecea56e2d46c32f76 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 19 Jan 2018 20:23:50 +0100 Subject: be2net: restore properly promisc mode after queues reconfiguration The commit 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") modified be_update_queues() so the IFACE (HW representation of the netdevice) is destroyed and then re-created. This causes a regression because potential promiscuous mode is not restored properly during be_open() because the driver thinks that the HW has promiscuous mode already enabled. Note that Lancer is not affected by this bug because RX-filter flags are disabled during be_close() for this chipset. Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Fixes: 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index c6e859a27ee6..e180657a02ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4634,6 +4634,15 @@ int be_update_queues(struct be_adapter *adapter) be_schedule_worker(adapter); + /* + * The IF was destroyed and re-created. We need to clear + * all promiscuous flags valid for the destroyed IF. + * Without this promisc mode is not restored during + * be_open() because the driver thinks that it is + * already enabled in HW. + */ + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + if (netif_running(netdev)) status = be_open(netdev); -- cgit v1.2.3 From 7a8c4dd9be91a7e8f8f0e0419a560663adc694a3 Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Fri, 19 Jan 2018 12:30:13 -0800 Subject: tls: Correct length of scatterlist in tls_sw_sendpage The scatterlist is reused by both sendmsg and sendfile. If a sendmsg of smaller number of pages is followed by a sendfile of larger number of pages, the scatterlist may be too short, resulting in a crash in gcm_encrypt. Add sg_unmark_end to make the list the correct length. tls_sw_sendmsg already calls sg_unmark_end correctly when it allocates memory in alloc_sg, or in zerocopy_from_iter. Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 61f394d369bf..0a9b72fbd761 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -577,6 +577,8 @@ alloc_payload: get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); + sg_unmark_end(sg); + ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); -- cgit v1.2.3 From 0afc0decf247f65b7aba666a76a0a68adf4bc435 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:51 -0500 Subject: orangefs: use list_for_each_entry_safe in purge_waiting_ops set_op_state_purged can delete the op. Signed-off-by: Martin Brandenburg Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/orangefs/waitqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 835c6e148afc..0577d6dba8c8 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s */ void purge_waiting_ops(void) { - struct orangefs_kernel_op_s *op; + struct orangefs_kernel_op_s *op, *tmp; spin_lock(&orangefs_request_list_lock); - list_for_each_entry(op, &orangefs_request_list, list) { + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) { gossip_debug(GOSSIP_WAIT_DEBUG, "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), -- cgit v1.2.3 From a0ec1ded22e6a6bc41981fae22406835b006a66e Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Mon, 22 Jan 2018 15:44:52 -0500 Subject: orangefs: initialize op on loop restart in orangefs_devreq_read In orangefs_devreq_read, there is a loop which picks an op off the list of pending ops. If the loop fails to find an op, there is nothing to read, and it returns EAGAIN. If the op has been given up on, the loop is restarted via a goto. The bug is that the variable which the found op is written to is not reinitialized, so if there are no more eligible ops on the list, the code runs again on the already handled op. This is triggered by interrupting a process while the op is being copied to the client-core. It's a fairly small window, but it's there. Signed-off-by: Martin Brandenburg Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/orangefs/devorangefs-req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index ded456f17de6..c584ad8d023c 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file, struct orangefs_kernel_op_s *op, *temp; __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; static __s32 magic = ORANGEFS_DEVREQ_MAGIC; - struct orangefs_kernel_op_s *cur_op = NULL; + struct orangefs_kernel_op_s *cur_op; unsigned long ret; /* We do not support blocking IO. */ @@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file, return -EAGAIN; restart: + cur_op = NULL; /* Get next op (if any) from top of list. */ spin_lock(&orangefs_request_list_lock); list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { -- cgit v1.2.3 From f5d07b9e98022d50720e38aa936fc11c67868ece Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 19 Jan 2018 09:43:39 -0800 Subject: Input: trackpoint - force 3 buttons if 0 button is reported Lenovo introduced trackpoint compatible sticks with minimum PS/2 commands. They supposed to reply with 0x02, 0x03, or 0x04 in response to the "Read Extended ID" command, so we would know not to try certain extended commands. Unfortunately even some trackpoints reporting the original IBM version (0x01 firmware 0x0e) now respond with incorrect data to the "Get Extended Buttons" command: thinkpad_acpi: ThinkPad BIOS R0DET87W (1.87 ), EC unknown thinkpad_acpi: Lenovo ThinkPad E470, model 20H1004SGE psmouse serio2: trackpoint: IBM TrackPoint firmware: 0x0e, buttons: 0/0 Since there are no trackpoints without buttons, let's assume the trackpoint has 3 buttons when we get 0 response to the extended buttons query. Signed-off-by: Aaron Ma Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=196253 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 0871010f18d5..92a8898682a6 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) if (trackpoint_read(ps2dev, TP_EXT_BTN, &button_info)) { psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); button_info = 0x33; + } else if (!button_info) { + psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); -- cgit v1.2.3 From 2a924d71794c530e55e73d0ce2cc77233307eaa9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 5 Jan 2018 13:28:47 -0800 Subject: Input: trackpoint - only expose supported controls for Elan, ALPS and NXP The newer trackpoints from ALPS, Elan and NXP implement a very limited subset of extended commands and controls that the original trackpoints implemented, so we should not be exposing not working controls in sysfs. The newer trackpoints also do not implement "Power On Reset" or "Read Extended Button Status", so we should not be using these commands during initialization. While we are at it, let's change "unsigned char" to u8 for byte data or bool for booleans and use better suited error codes instead of -1. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 248 ++++++++++++++++++++++++--------------- drivers/input/mouse/trackpoint.h | 34 ++++-- 2 files changed, 172 insertions(+), 110 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 92a8898682a6..bbd29220dbe9 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -19,6 +19,13 @@ #include "psmouse.h" #include "trackpoint.h" +static const char * const trackpoint_variants[] = { + [TP_VARIANT_IBM] = "IBM", + [TP_VARIANT_ALPS] = "ALPS", + [TP_VARIANT_ELAN] = "Elan", + [TP_VARIANT_NXP] = "NXP", +}; + /* * Power-on Reset: Resets all trackpoint parameters, including RAM values, * to defaults. @@ -26,7 +33,7 @@ */ static int trackpoint_power_on_reset(struct ps2dev *ps2dev) { - unsigned char results[2]; + u8 results[2]; int tries = 0; /* Issue POR command, and repeat up to once if 0xFC00 received */ @@ -38,7 +45,7 @@ static int trackpoint_power_on_reset(struct ps2dev *ps2dev) /* Check for success response -- 0xAA00 */ if (results[0] != 0xAA || results[1] != 0x00) - return -1; + return -ENODEV; return 0; } @@ -46,8 +53,7 @@ static int trackpoint_power_on_reset(struct ps2dev *ps2dev) /* * Device IO: read, write and toggle bit */ -static int trackpoint_read(struct ps2dev *ps2dev, - unsigned char loc, unsigned char *results) +static int trackpoint_read(struct ps2dev *ps2dev, u8 loc, u8 *results) { if (ps2_command(ps2dev, NULL, MAKE_PS2_CMD(0, 0, TP_COMMAND)) || ps2_command(ps2dev, results, MAKE_PS2_CMD(0, 1, loc))) { @@ -57,8 +63,7 @@ static int trackpoint_read(struct ps2dev *ps2dev, return 0; } -static int trackpoint_write(struct ps2dev *ps2dev, - unsigned char loc, unsigned char val) +static int trackpoint_write(struct ps2dev *ps2dev, u8 loc, u8 val) { if (ps2_command(ps2dev, NULL, MAKE_PS2_CMD(0, 0, TP_COMMAND)) || ps2_command(ps2dev, NULL, MAKE_PS2_CMD(0, 0, TP_WRITE_MEM)) || @@ -70,8 +75,7 @@ static int trackpoint_write(struct ps2dev *ps2dev, return 0; } -static int trackpoint_toggle_bit(struct ps2dev *ps2dev, - unsigned char loc, unsigned char mask) +static int trackpoint_toggle_bit(struct ps2dev *ps2dev, u8 loc, u8 mask) { /* Bad things will happen if the loc param isn't in this range */ if (loc < 0x20 || loc >= 0x2F) @@ -87,11 +91,11 @@ static int trackpoint_toggle_bit(struct ps2dev *ps2dev, return 0; } -static int trackpoint_update_bit(struct ps2dev *ps2dev, unsigned char loc, - unsigned char mask, unsigned char value) +static int trackpoint_update_bit(struct ps2dev *ps2dev, + u8 loc, u8 mask, u8 value) { int retval = 0; - unsigned char data; + u8 data; trackpoint_read(ps2dev, loc, &data); if (((data & mask) == mask) != !!value) @@ -105,17 +109,18 @@ static int trackpoint_update_bit(struct ps2dev *ps2dev, unsigned char loc, */ struct trackpoint_attr_data { size_t field_offset; - unsigned char command; - unsigned char mask; - unsigned char inverted; - unsigned char power_on_default; + u8 command; + u8 mask; + bool inverted; + u8 power_on_default; }; -static ssize_t trackpoint_show_int_attr(struct psmouse *psmouse, void *data, char *buf) +static ssize_t trackpoint_show_int_attr(struct psmouse *psmouse, + void *data, char *buf) { struct trackpoint_data *tp = psmouse->private; struct trackpoint_attr_data *attr = data; - unsigned char value = *(unsigned char *)((char *)tp + attr->field_offset); + u8 value = *(u8 *)((void *)tp + attr->field_offset); if (attr->inverted) value = !value; @@ -128,8 +133,8 @@ static ssize_t trackpoint_set_int_attr(struct psmouse *psmouse, void *data, { struct trackpoint_data *tp = psmouse->private; struct trackpoint_attr_data *attr = data; - unsigned char *field = (unsigned char *)((char *)tp + attr->field_offset); - unsigned char value; + u8 *field = (void *)tp + attr->field_offset; + u8 value; int err; err = kstrtou8(buf, 10, &value); @@ -157,17 +162,14 @@ static ssize_t trackpoint_set_bit_attr(struct psmouse *psmouse, void *data, { struct trackpoint_data *tp = psmouse->private; struct trackpoint_attr_data *attr = data; - unsigned char *field = (unsigned char *)((char *)tp + attr->field_offset); - unsigned int value; + bool *field = (void *)tp + attr->field_offset; + bool value; int err; - err = kstrtouint(buf, 10, &value); + err = kstrtobool(buf, &value); if (err) return err; - if (value > 1) - return -EINVAL; - if (attr->inverted) value = !value; @@ -193,30 +195,6 @@ PSMOUSE_DEFINE_ATTR(_name, S_IWUSR | S_IRUGO, \ &trackpoint_attr_##_name, \ trackpoint_show_int_attr, trackpoint_set_bit_attr) -#define TRACKPOINT_UPDATE_BIT(_psmouse, _tp, _name) \ -do { \ - struct trackpoint_attr_data *_attr = &trackpoint_attr_##_name; \ - \ - trackpoint_update_bit(&_psmouse->ps2dev, \ - _attr->command, _attr->mask, _tp->_name); \ -} while (0) - -#define TRACKPOINT_UPDATE(_power_on, _psmouse, _tp, _name) \ -do { \ - if (!_power_on || \ - _tp->_name != trackpoint_attr_##_name.power_on_default) { \ - if (!trackpoint_attr_##_name.mask) \ - trackpoint_write(&_psmouse->ps2dev, \ - trackpoint_attr_##_name.command, \ - _tp->_name); \ - else \ - TRACKPOINT_UPDATE_BIT(_psmouse, _tp, _name); \ - } \ -} while (0) - -#define TRACKPOINT_SET_POWER_ON_DEFAULT(_tp, _name) \ - (_tp->_name = trackpoint_attr_##_name.power_on_default) - TRACKPOINT_INT_ATTR(sensitivity, TP_SENS, TP_DEF_SENS); TRACKPOINT_INT_ATTR(speed, TP_SPEED, TP_DEF_SPEED); TRACKPOINT_INT_ATTR(inertia, TP_INERTIA, TP_DEF_INERTIA); @@ -229,13 +207,33 @@ TRACKPOINT_INT_ATTR(ztime, TP_Z_TIME, TP_DEF_Z_TIME); TRACKPOINT_INT_ATTR(jenks, TP_JENKS_CURV, TP_DEF_JENKS_CURV); TRACKPOINT_INT_ATTR(drift_time, TP_DRIFT_TIME, TP_DEF_DRIFT_TIME); -TRACKPOINT_BIT_ATTR(press_to_select, TP_TOGGLE_PTSON, TP_MASK_PTSON, 0, +TRACKPOINT_BIT_ATTR(press_to_select, TP_TOGGLE_PTSON, TP_MASK_PTSON, false, TP_DEF_PTSON); -TRACKPOINT_BIT_ATTR(skipback, TP_TOGGLE_SKIPBACK, TP_MASK_SKIPBACK, 0, +TRACKPOINT_BIT_ATTR(skipback, TP_TOGGLE_SKIPBACK, TP_MASK_SKIPBACK, false, TP_DEF_SKIPBACK); -TRACKPOINT_BIT_ATTR(ext_dev, TP_TOGGLE_EXT_DEV, TP_MASK_EXT_DEV, 1, +TRACKPOINT_BIT_ATTR(ext_dev, TP_TOGGLE_EXT_DEV, TP_MASK_EXT_DEV, true, TP_DEF_EXT_DEV); +static bool trackpoint_is_attr_available(struct psmouse *psmouse, + struct attribute *attr) +{ + struct trackpoint_data *tp = psmouse->private; + + return tp->variant_id == TP_VARIANT_IBM || + attr == &psmouse_attr_sensitivity.dattr.attr || + attr == &psmouse_attr_press_to_select.dattr.attr; +} + +static umode_t trackpoint_is_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct serio *serio = to_serio_port(dev); + struct psmouse *psmouse = serio_get_drvdata(serio); + + return trackpoint_is_attr_available(psmouse, attr) ? attr->mode : 0; +} + static struct attribute *trackpoint_attrs[] = { &psmouse_attr_sensitivity.dattr.attr, &psmouse_attr_speed.dattr.attr, @@ -255,24 +253,56 @@ static struct attribute *trackpoint_attrs[] = { }; static struct attribute_group trackpoint_attr_group = { - .attrs = trackpoint_attrs, + .is_visible = trackpoint_is_attr_visible, + .attrs = trackpoint_attrs, }; -static int trackpoint_start_protocol(struct psmouse *psmouse, unsigned char *firmware_id) -{ - unsigned char param[2] = { 0 }; +#define TRACKPOINT_UPDATE(_power_on, _psmouse, _tp, _name) \ +do { \ + struct trackpoint_attr_data *_attr = &trackpoint_attr_##_name; \ + \ + if ((!_power_on || _tp->_name != _attr->power_on_default) && \ + trackpoint_is_attr_available(_psmouse, \ + &psmouse_attr_##_name.dattr.attr)) { \ + if (!_attr->mask) \ + trackpoint_write(&_psmouse->ps2dev, \ + _attr->command, _tp->_name); \ + else \ + trackpoint_update_bit(&_psmouse->ps2dev, \ + _attr->command, _attr->mask, \ + _tp->_name); \ + } \ +} while (0) - if (ps2_command(&psmouse->ps2dev, param, MAKE_PS2_CMD(0, 2, TP_READ_ID))) - return -1; +#define TRACKPOINT_SET_POWER_ON_DEFAULT(_tp, _name) \ +do { \ + _tp->_name = trackpoint_attr_##_name.power_on_default; \ +} while (0) - /* add new TP ID. */ - if (!(param[0] & TP_MAGIC_IDENT)) - return -1; +static int trackpoint_start_protocol(struct psmouse *psmouse, + u8 *variant_id, u8 *firmware_id) +{ + u8 param[2] = { 0 }; + int error; - if (firmware_id) - *firmware_id = param[1]; + error = ps2_command(&psmouse->ps2dev, + param, MAKE_PS2_CMD(0, 2, TP_READ_ID)); + if (error) + return error; + + switch (param[0]) { + case TP_VARIANT_IBM: + case TP_VARIANT_ALPS: + case TP_VARIANT_ELAN: + case TP_VARIANT_NXP: + if (variant_id) + *variant_id = param[0]; + if (firmware_id) + *firmware_id = param[1]; + return 0; + } - return 0; + return -ENODEV; } /* @@ -285,7 +315,7 @@ static int trackpoint_sync(struct psmouse *psmouse, bool in_power_on_state) { struct trackpoint_data *tp = psmouse->private; - if (!in_power_on_state) { + if (!in_power_on_state && tp->variant_id == TP_VARIANT_IBM) { /* * Disable features that may make device unusable * with this driver. @@ -347,7 +377,8 @@ static void trackpoint_defaults(struct trackpoint_data *tp) static void trackpoint_disconnect(struct psmouse *psmouse) { - sysfs_remove_group(&psmouse->ps2dev.serio->dev.kobj, &trackpoint_attr_group); + device_remove_group(&psmouse->ps2dev.serio->dev, + &trackpoint_attr_group); kfree(psmouse->private); psmouse->private = NULL; @@ -355,14 +386,20 @@ static void trackpoint_disconnect(struct psmouse *psmouse) static int trackpoint_reconnect(struct psmouse *psmouse) { - int reset_fail; + struct trackpoint_data *tp = psmouse->private; + int error; + bool was_reset; - if (trackpoint_start_protocol(psmouse, NULL)) - return -1; + error = trackpoint_start_protocol(psmouse, NULL, NULL); + if (error) + return error; - reset_fail = trackpoint_power_on_reset(&psmouse->ps2dev); - if (trackpoint_sync(psmouse, !reset_fail)) - return -1; + was_reset = tp->variant_id == TP_VARIANT_IBM && + trackpoint_power_on_reset(&psmouse->ps2dev) == 0; + + error = trackpoint_sync(psmouse, was_reset); + if (error) + return error; return 0; } @@ -370,49 +407,66 @@ static int trackpoint_reconnect(struct psmouse *psmouse) int trackpoint_detect(struct psmouse *psmouse, bool set_properties) { struct ps2dev *ps2dev = &psmouse->ps2dev; - unsigned char firmware_id; - unsigned char button_info; + struct trackpoint_data *tp; + u8 variant_id; + u8 firmware_id; + u8 button_info; int error; - if (trackpoint_start_protocol(psmouse, &firmware_id)) - return -1; + error = trackpoint_start_protocol(psmouse, &variant_id, &firmware_id); + if (error) + return error; if (!set_properties) return 0; - if (trackpoint_read(ps2dev, TP_EXT_BTN, &button_info)) { - psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); - button_info = 0x33; - } else if (!button_info) { - psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n"); - button_info = 0x33; - } - - psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); - if (!psmouse->private) + tp = kzalloc(sizeof(*tp), GFP_KERNEL); + if (!tp) return -ENOMEM; - psmouse->vendor = "IBM"; + trackpoint_defaults(tp); + tp->variant_id = variant_id; + tp->firmware_id = firmware_id; + + psmouse->private = tp; + + psmouse->vendor = trackpoint_variants[variant_id]; psmouse->name = "TrackPoint"; psmouse->reconnect = trackpoint_reconnect; psmouse->disconnect = trackpoint_disconnect; + if (variant_id != TP_VARIANT_IBM) { + /* Newer variants do not support extended button query. */ + button_info = 0x33; + } else { + error = trackpoint_read(ps2dev, TP_EXT_BTN, &button_info); + if (error) { + psmouse_warn(psmouse, + "failed to get extended button data, assuming 3 buttons\n"); + button_info = 0x33; + } else if (!button_info) { + psmouse_warn(psmouse, + "got 0 in extended button data, assuming 3 buttons\n"); + button_info = 0x33; + } + } + if ((button_info & 0x0f) >= 3) - __set_bit(BTN_MIDDLE, psmouse->dev->keybit); + input_set_capability(psmouse->dev, EV_KEY, BTN_MIDDLE); __set_bit(INPUT_PROP_POINTER, psmouse->dev->propbit); __set_bit(INPUT_PROP_POINTING_STICK, psmouse->dev->propbit); - trackpoint_defaults(psmouse->private); - - error = trackpoint_power_on_reset(ps2dev); - - /* Write defaults to TP only if reset fails. */ - if (error) + if (variant_id != TP_VARIANT_IBM || + trackpoint_power_on_reset(ps2dev) != 0) { + /* + * Write defaults to TP if we did not reset the trackpoint. + */ trackpoint_sync(psmouse, false); + } - error = sysfs_create_group(&ps2dev->serio->dev.kobj, &trackpoint_attr_group); + error = device_add_group(&ps2dev->serio->dev, &trackpoint_attr_group); if (error) { psmouse_err(psmouse, "failed to create sysfs attributes, error: %d\n", @@ -423,8 +477,8 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) } psmouse_info(psmouse, - "IBM TrackPoint firmware: 0x%02x, buttons: %d/%d\n", - firmware_id, + "%s TrackPoint firmware: 0x%02x, buttons: %d/%d\n", + psmouse->vendor, firmware_id, (button_info & 0xf0) >> 4, button_info & 0x0f); return 0; diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 88055755f82e..10a039148234 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -21,10 +21,16 @@ #define TP_COMMAND 0xE2 /* Commands start with this */ #define TP_READ_ID 0xE1 /* Sent for device identification */ -#define TP_MAGIC_IDENT 0x03 /* Sent after a TP_READ_ID followed */ - /* by the firmware ID */ - /* Firmware ID includes 0x1, 0x2, 0x3 */ +/* + * Valid first byte responses to the "Read Secondary ID" (0xE1) command. + * 0x01 was the original IBM trackpoint, others implement very limited + * subset of trackpoint features. + */ +#define TP_VARIANT_IBM 0x01 +#define TP_VARIANT_ALPS 0x02 +#define TP_VARIANT_ELAN 0x03 +#define TP_VARIANT_NXP 0x04 /* * Commands @@ -136,18 +142,20 @@ #define MAKE_PS2_CMD(params, results, cmd) ((params<<12) | (results<<8) | (cmd)) -struct trackpoint_data -{ - unsigned char sensitivity, speed, inertia, reach; - unsigned char draghys, mindrag; - unsigned char thresh, upthresh; - unsigned char ztime, jenks; - unsigned char drift_time; +struct trackpoint_data { + u8 variant_id; + u8 firmware_id; + + u8 sensitivity, speed, inertia, reach; + u8 draghys, mindrag; + u8 thresh, upthresh; + u8 ztime, jenks; + u8 drift_time; /* toggles */ - unsigned char press_to_select; - unsigned char skipback; - unsigned char ext_dev; + bool press_to_select; + bool skipback; + bool ext_dev; }; #ifdef CONFIG_MOUSE_PS2_TRACKPOINT -- cgit v1.2.3 From 1995266727fa8143897e89b55f5d3c79aa828420 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:11:06 +0000 Subject: nfsd: auth: Fix gid sorting when rootsquash enabled Commit bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility group_info allocators") appears to break nfsd rootsquash in a pretty major way. It adds a call to groups_sort() inside the loop that copies/squashes gids, which means the valid gids are sorted along with the following garbage. The net result is that the highest numbered valid gids are replaced with any lower-valued garbage gids, possibly including 0. We should sort only once, after filling in all the gids. Fixes: bdcf0a423ea1 ("kernel: make groups_sort calling a responsibility ...") Signed-off-by: Ben Hutchings Acked-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- fs/nfsd/auth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index f650e475d8f0..fdf2aad73470 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -60,10 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) gi->gid[i] = exp->ex_anon_gid; else gi->gid[i] = rqgi->gid[i]; - - /* Each thread allocates its own gi, no race */ - groups_sort(gi); } + + /* Each thread allocates its own gi, no race */ + groups_sort(gi); } else { gi = get_group_info(rqgi); } -- cgit v1.2.3 From 5efec5c655dd31944af440ff2b0a93facc4a7762 Mon Sep 17 00:00:00 2001 From: Yossi Kuperman Date: Tue, 23 Jan 2018 00:16:21 +0200 Subject: xfrm: Fix eth_hdr(skb)->h_proto to reflect inner IP version IPSec tunnel mode supports encapsulation of IPv4 over IPv6 and vice-versa. The outer IP header is stripped and the inner IP inherits the original Ethernet header. Tcpdump fails to properly decode the inner packet in case that h_proto is different than the inner IP version. Fix h_proto to reflect the inner IP version. Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_mode_tunnel.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index e6265e2c274e..20ca486b3cad 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 02556e356f87..dc93002ff9d1 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; -- cgit v1.2.3 From 545d8ae7affff7fb4f8bfd327c7c7790056535c4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 Jan 2018 16:34:09 -0600 Subject: xfrm: fix boolean assignment in xfrm_get_type_offload Assign true or false to boolean variables instead of an integer value. This issue was detected with the help of Coccinelle. Fixes: ffdb5211da1c ("xfrm: Auto-load xfrm offload modules") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2d486492acdb..a3785f538018 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -317,7 +317,7 @@ retry: if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); - try_load = 0; + try_load = false; goto retry; } -- cgit v1.2.3 From 5c61d70e55ab1cb917752f4a0d5f4683b4869e32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 17:11:30 -0300 Subject: perf tools: Move conditional O_CLOEXEC to util.h To be more generally available and get the build on centos:5 to work after we use O_CLOEXEC in the next patch, in the util/dso.c file. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Wang YanQing Link: https://lkml.kernel.org/n/tip-vsjbiydh15pfqomxw1kx64ex@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 10 ---------- tools/perf/util/util.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 48094fde0a68..d8cfc19ddb10 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -12,16 +12,6 @@ #include "util.h" #include "debug.h" -#ifndef O_CLOEXEC -#ifdef __sparc__ -#define O_CLOEXEC 0x400000 -#elif defined(__alpha__) || defined(__hppa__) -#define O_CLOEXEC 010000000 -#else -#define O_CLOEXEC 02000000 -#endif -#endif - static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 01434509c2e9..9496365da3d7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -68,4 +68,14 @@ extern bool perf_singlethreaded; void perf_set_singlethreaded(void); void perf_set_multithreaded(void); +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC 0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC 010000000 +#else +#define O_CLOEXEC 02000000 +#endif +#endif + #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3 From 4c0d8d27954d9efb2a02ec9fc16f39b02f248bb7 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Mon, 15 Jan 2018 13:04:48 +0800 Subject: perf symbols: Using O_CLOEXEC in do_open I've meet a strange behavior with these commands on my gentoo box: 1: perf kmem record 2: CTRL-C to stop 1 3: perf report 4: "Enter", "Enter", "Run scripts for all samples", "event_analyzing_sample". Then 'perf report' says: " No kallsyms or vmlinux with build-id xxxx was found /lib/modules/4.10.0+/build/vmlinux with build id xxxx not found, continuing without symbols ". It is strange because I am sure /lib/modules/4.10.0+/build/vmlinux is right for perf.data. After digging, I found out the reason is that "perf report" generates many open fds, then "script_browse" uses popen to run "perf script" which run out of open files. The gentoo box has a small default value for "max open files", 1024. Yes, "ulimit -n " with a bigger number could fix it, but I think that using O_CLOEXEC in do_open is a better way. Signed-off-by: Wang YanQing Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180115050448.GA20759@udknight [ Make sure O_CLOEXEC is available in old systems by adding a patch just before this one, to keep this bisectable in such systems ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d5b6f7f5baff..36ef45b2e89d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -446,7 +446,7 @@ static int do_open(char *name) char sbuf[STRERR_BUFSIZE]; do { - fd = open(name, O_RDONLY); + fd = open(name, O_RDONLY|O_CLOEXEC); if (fd >= 0) return fd; -- cgit v1.2.3 From 631e8f0a9748d7ef1eb6a84d0d5b9e81a79433ef Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Jan 2018 13:31:52 -0700 Subject: perf report: Fix regression when decoding intel_pt traces Commit (93d10af26bb7 perf tools: Optimize sample parsing for ordered events) breaks intelPT trace decoding by invariably returning an error if the event type isn't a PERF_SAMPLE_TIME. With this patch the timestamp is initialised and processing is allowed to continue if the error returned by function perf_evlist__parse_sample_timestamp() is not a fault. Signed-off-by: Mathieu Poirier Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: 93d10af26bb7 ("perf tools: Optimize sample parsing for ordered events") Link: http://lkml.kernel.org/r/1515616312-27645-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8d0fa2f8da16..c71ced7db152 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1508,10 +1508,10 @@ static s64 perf_session__process_event(struct perf_session *session, return perf_session__process_user_event(session, event, file_offset); if (tool->ordered_events) { - u64 timestamp; + u64 timestamp = -1ULL; ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); - if (ret) + if (ret && ret != -1) return ret; ret = perf_session__queue_event(session, event, timestamp, file_offset); -- cgit v1.2.3 From 99402e0683ecea11db44fd1c59a65b4eb3bd2672 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jan 2018 10:26:46 +0100 Subject: perf build: Display EXTRA features for VF=1 build Display the state of the rest of the features (FEATURE_TESTS_EXTRA) on a 'make VF=1' build. These features are detected manually by perf's Makefile.config so they can't be displayed with the main list, but only after we're done in Makefile.config. $ make VF=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ on ] SNIP ... timerfd: [ on ] ... sched_getcpu: [ on ] ... sdt: [ on ] ... setns: [ on ] extra features: ... bionic: [ OFF ] ... compile-32: [ on ] ... compile-x32: [ OFF ] ... cplus-demangle: [ on ] ... hello: [ OFF ] ... libbabeltrace: [ on ] ... liberty: [ on ] ... liberty-z: [ on ] ... libunwind-debug-frame: [ OFF ] ... libunwind-debug-frame-arm: [ OFF ] ... libunwind-debug-frame-aarch64: [ OFF ] SNIP Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180109092646.GB11520@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 12dec6ea5ed2..92265b32dddd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -947,6 +947,10 @@ define print_var_code endef ifeq ($(VF),1) + # Display EXTRA features which are detected manualy + # from here with feature_check call and thus cannot + # be partof global state output. + $(foreach feat,$(FEATURE_TESTS_EXTRA),$(call feature_print_status,$(feat),)) $(call print_var,prefix) $(call print_var,bindir) $(call print_var,libdir) -- cgit v1.2.3 From 95f28190aa012b18eab14799b905b6db3cf31529 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:13 +0100 Subject: tools include arch: Grab a copy of errno.h for arch's supported by perf For each arch in tools/perf/arch, grab a copy of errno.h. This is a pre-req to generate an architecture specific mapping of errno numbers to their names. This errno mapping can be used by perf trace to support cross-architecture trace reports and to get rid of the audit-libs dependency. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-73azjhrzpjsskwi129020i2u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/alpha/include/uapi/asm/errno.h | 128 +++++++++++++++++++++++++++ tools/arch/mips/include/asm/errno.h | 17 ++++ tools/arch/mips/include/uapi/asm/errno.h | 130 ++++++++++++++++++++++++++++ tools/arch/parisc/include/uapi/asm/errno.h | 128 +++++++++++++++++++++++++++ tools/arch/powerpc/include/uapi/asm/errno.h | 10 +++ tools/arch/sparc/include/uapi/asm/errno.h | 118 +++++++++++++++++++++++++ tools/arch/x86/include/uapi/asm/errno.h | 1 + tools/perf/check-headers.sh | 7 ++ 8 files changed, 539 insertions(+) create mode 100644 tools/arch/alpha/include/uapi/asm/errno.h create mode 100644 tools/arch/mips/include/asm/errno.h create mode 100644 tools/arch/mips/include/uapi/asm/errno.h create mode 100644 tools/arch/parisc/include/uapi/asm/errno.h create mode 100644 tools/arch/powerpc/include/uapi/asm/errno.h create mode 100644 tools/arch/sparc/include/uapi/asm/errno.h create mode 100644 tools/arch/x86/include/uapi/asm/errno.h diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h new file mode 100644 index 000000000000..3d265f6babaf --- /dev/null +++ b/tools/arch/alpha/include/uapi/asm/errno.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ALPHA_ERRNO_H +#define _ALPHA_ERRNO_H + +#include + +#undef EAGAIN /* 11 in errno-base.h */ + +#define EDEADLK 11 /* Resource deadlock would occur */ + +#define EAGAIN 35 /* Try again */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Network dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ + +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ + +#define ENOLCK 77 /* No record locks available */ +#define ENOSYS 78 /* Function not implemented */ + +#define ENOMSG 80 /* No message of desired type */ +#define EIDRM 81 /* Identifier removed */ +#define ENOSR 82 /* Out of streams resources */ +#define ETIME 83 /* Timer expired */ +#define EBADMSG 84 /* Not a data message */ +#define EPROTO 85 /* Protocol error */ +#define ENODATA 86 /* No data available */ +#define ENOSTR 87 /* Device not a stream */ + +#define ENOPKG 92 /* Package not installed */ + +#define EILSEQ 116 /* Illegal byte sequence */ + +/* The following are just random noise.. */ +#define ECHRNG 88 /* Channel number out of range */ +#define EL2NSYNC 89 /* Level 2 not synchronized */ +#define EL3HLT 90 /* Level 3 halted */ +#define EL3RST 91 /* Level 3 reset */ + +#define ELNRNG 93 /* Link number out of range */ +#define EUNATCH 94 /* Protocol driver not attached */ +#define ENOCSI 95 /* No CSI structure available */ +#define EL2HLT 96 /* Level 2 halted */ +#define EBADE 97 /* Invalid exchange */ +#define EBADR 98 /* Invalid request descriptor */ +#define EXFULL 99 /* Exchange full */ +#define ENOANO 100 /* No anode */ +#define EBADRQC 101 /* Invalid request code */ +#define EBADSLT 102 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 104 /* Bad font file format */ +#define ENONET 105 /* Machine is not on the network */ +#define ENOLINK 106 /* Link has been severed */ +#define EADV 107 /* Advertise error */ +#define ESRMNT 108 /* Srmount error */ +#define ECOMM 109 /* Communication error on send */ +#define EMULTIHOP 110 /* Multihop attempted */ +#define EDOTDOT 111 /* RFS specific error */ +#define EOVERFLOW 112 /* Value too large for defined data type */ +#define ENOTUNIQ 113 /* Name not unique on network */ +#define EBADFD 114 /* File descriptor in bad state */ +#define EREMCHG 115 /* Remote address changed */ + +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ + +#define ELIBACC 122 /* Can not access a needed shared library */ +#define ELIBBAD 123 /* Accessing a corrupted shared library */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ +#define ELIBMAX 125 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 126 /* Cannot exec a shared library directly */ +#define ERESTART 127 /* Interrupted system call should be restarted */ +#define ESTRPIPE 128 /* Streams pipe error */ + +#define ENOMEDIUM 129 /* No medium found */ +#define EMEDIUMTYPE 130 /* Wrong medium type */ +#define ECANCELED 131 /* Operation Cancelled */ +#define ENOKEY 132 /* Required key not available */ +#define EKEYEXPIRED 133 /* Key has expired */ +#define EKEYREVOKED 134 /* Key has been revoked */ +#define EKEYREJECTED 135 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 136 /* Owner died */ +#define ENOTRECOVERABLE 137 /* State not recoverable */ + +#define ERFKILL 138 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 139 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/mips/include/asm/errno.h b/tools/arch/mips/include/asm/errno.h new file mode 100644 index 000000000000..21d91cdfe3c9 --- /dev/null +++ b/tools/arch/mips/include/asm/errno.h @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle + */ +#ifndef _ASM_ERRNO_H +#define _ASM_ERRNO_H + +#include + + +/* The biggest error number defined here or in . */ +#define EMAXERRNO 1133 + +#endif /* _ASM_ERRNO_H */ diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h new file mode 100644 index 000000000000..2fb714e2d6d8 --- /dev/null +++ b/tools/arch/mips/include/uapi/asm/errno.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle + */ +#ifndef _UAPI_ASM_ERRNO_H +#define _UAPI_ASM_ERRNO_H + +/* + * These error numbers are intended to be MIPS ABI compatible + */ + +#include + +#define ENOMSG 35 /* No message of desired type */ +#define EIDRM 36 /* Identifier removed */ +#define ECHRNG 37 /* Channel number out of range */ +#define EL2NSYNC 38 /* Level 2 not synchronized */ +#define EL3HLT 39 /* Level 3 halted */ +#define EL3RST 40 /* Level 3 reset */ +#define ELNRNG 41 /* Link number out of range */ +#define EUNATCH 42 /* Protocol driver not attached */ +#define ENOCSI 43 /* No CSI structure available */ +#define EL2HLT 44 /* Level 2 halted */ +#define EDEADLK 45 /* Resource deadlock would occur */ +#define ENOLCK 46 /* No record locks available */ +#define EBADE 50 /* Invalid exchange */ +#define EBADR 51 /* Invalid request descriptor */ +#define EXFULL 52 /* Exchange full */ +#define ENOANO 53 /* No anode */ +#define EBADRQC 54 /* Invalid request code */ +#define EBADSLT 55 /* Invalid slot */ +#define EDEADLOCK 56 /* File locking deadlock error */ +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EDOTDOT 73 /* RFS specific error */ +#define EMULTIHOP 74 /* Multihop attempted */ +#define EBADMSG 77 /* Not a data message */ +#define ENAMETOOLONG 78 /* File name too long */ +#define EOVERFLOW 79 /* Value too large for defined data type */ +#define ENOTUNIQ 80 /* Name not unique on network */ +#define EBADFD 81 /* File descriptor in bad state */ +#define EREMCHG 82 /* Remote address changed */ +#define ELIBACC 83 /* Can not access a needed shared library */ +#define ELIBBAD 84 /* Accessing a corrupted shared library */ +#define ELIBSCN 85 /* .lib section in a.out corrupted */ +#define ELIBMAX 86 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 87 /* Cannot exec a shared library directly */ +#define EILSEQ 88 /* Illegal byte sequence */ +#define ENOSYS 89 /* Function not implemented */ +#define ELOOP 90 /* Too many symbolic links encountered */ +#define ERESTART 91 /* Interrupted system call should be restarted */ +#define ESTRPIPE 92 /* Streams pipe error */ +#define ENOTEMPTY 93 /* Directory not empty */ +#define EUSERS 94 /* Too many users */ +#define ENOTSOCK 95 /* Socket operation on non-socket */ +#define EDESTADDRREQ 96 /* Destination address required */ +#define EMSGSIZE 97 /* Message too long */ +#define EPROTOTYPE 98 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 99 /* Protocol not available */ +#define EPROTONOSUPPORT 120 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 121 /* Socket type not supported */ +#define EOPNOTSUPP 122 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 123 /* Protocol family not supported */ +#define EAFNOSUPPORT 124 /* Address family not supported by protocol */ +#define EADDRINUSE 125 /* Address already in use */ +#define EADDRNOTAVAIL 126 /* Cannot assign requested address */ +#define ENETDOWN 127 /* Network is down */ +#define ENETUNREACH 128 /* Network is unreachable */ +#define ENETRESET 129 /* Network dropped connection because of reset */ +#define ECONNABORTED 130 /* Software caused connection abort */ +#define ECONNRESET 131 /* Connection reset by peer */ +#define ENOBUFS 132 /* No buffer space available */ +#define EISCONN 133 /* Transport endpoint is already connected */ +#define ENOTCONN 134 /* Transport endpoint is not connected */ +#define EUCLEAN 135 /* Structure needs cleaning */ +#define ENOTNAM 137 /* Not a XENIX named type file */ +#define ENAVAIL 138 /* No XENIX semaphores available */ +#define EISNAM 139 /* Is a named type file */ +#define EREMOTEIO 140 /* Remote I/O error */ +#define EINIT 141 /* Reserved */ +#define EREMDEV 142 /* Error 142 */ +#define ESHUTDOWN 143 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 144 /* Too many references: cannot splice */ +#define ETIMEDOUT 145 /* Connection timed out */ +#define ECONNREFUSED 146 /* Connection refused */ +#define EHOSTDOWN 147 /* Host is down */ +#define EHOSTUNREACH 148 /* No route to host */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EALREADY 149 /* Operation already in progress */ +#define EINPROGRESS 150 /* Operation now in progress */ +#define ESTALE 151 /* Stale file handle */ +#define ECANCELED 158 /* AIO operation canceled */ + +/* + * These error are Linux extensions. + */ +#define ENOMEDIUM 159 /* No medium found */ +#define EMEDIUMTYPE 160 /* Wrong medium type */ +#define ENOKEY 161 /* Required key not available */ +#define EKEYEXPIRED 162 /* Key has expired */ +#define EKEYREVOKED 163 /* Key has been revoked */ +#define EKEYREJECTED 164 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 165 /* Owner died */ +#define ENOTRECOVERABLE 166 /* State not recoverable */ + +#define ERFKILL 167 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 168 /* Memory page has hardware error */ + +#define EDQUOT 1133 /* Quota exceeded */ + + +#endif /* _UAPI_ASM_ERRNO_H */ diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..fc0df353ff0d --- /dev/null +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _PARISC_ERRNO_H +#define _PARISC_ERRNO_H + +#include + +#define ENOMSG 35 /* No message of desired type */ +#define EIDRM 36 /* Identifier removed */ +#define ECHRNG 37 /* Channel number out of range */ +#define EL2NSYNC 38 /* Level 2 not synchronized */ +#define EL3HLT 39 /* Level 3 halted */ +#define EL3RST 40 /* Level 3 reset */ +#define ELNRNG 41 /* Link number out of range */ +#define EUNATCH 42 /* Protocol driver not attached */ +#define ENOCSI 43 /* No CSI structure available */ +#define EL2HLT 44 /* Level 2 halted */ +#define EDEADLK 45 /* Resource deadlock would occur */ +#define EDEADLOCK EDEADLK +#define ENOLCK 46 /* No record locks available */ +#define EILSEQ 47 /* Illegal byte sequence */ + +#define ENONET 50 /* Machine is not on the network */ +#define ENODATA 51 /* No data available */ +#define ETIME 52 /* Timer expired */ +#define ENOSR 53 /* Out of streams resources */ +#define ENOSTR 54 /* Device not a stream */ +#define ENOPKG 55 /* Package not installed */ + +#define ENOLINK 57 /* Link has been severed */ +#define EADV 58 /* Advertise error */ +#define ESRMNT 59 /* Srmount error */ +#define ECOMM 60 /* Communication error on send */ +#define EPROTO 61 /* Protocol error */ + +#define EMULTIHOP 64 /* Multihop attempted */ + +#define EDOTDOT 66 /* RFS specific error */ +#define EBADMSG 67 /* Not a data message */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ +#define EOVERFLOW 72 /* Value too large for defined data type */ + +/* these errnos are defined by Linux but not HPUX. */ + +#define EBADE 160 /* Invalid exchange */ +#define EBADR 161 /* Invalid request descriptor */ +#define EXFULL 162 /* Exchange full */ +#define ENOANO 163 /* No anode */ +#define EBADRQC 164 /* Invalid request code */ +#define EBADSLT 165 /* Invalid slot */ +#define EBFONT 166 /* Bad font file format */ +#define ENOTUNIQ 167 /* Name not unique on network */ +#define EBADFD 168 /* File descriptor in bad state */ +#define EREMCHG 169 /* Remote address changed */ +#define ELIBACC 170 /* Can not access a needed shared library */ +#define ELIBBAD 171 /* Accessing a corrupted shared library */ +#define ELIBSCN 172 /* .lib section in a.out corrupted */ +#define ELIBMAX 173 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 174 /* Cannot exec a shared library directly */ +#define ERESTART 175 /* Interrupted system call should be restarted */ +#define ESTRPIPE 176 /* Streams pipe error */ +#define EUCLEAN 177 /* Structure needs cleaning */ +#define ENOTNAM 178 /* Not a XENIX named type file */ +#define ENAVAIL 179 /* No XENIX semaphores available */ +#define EISNAM 180 /* Is a named type file */ +#define EREMOTEIO 181 /* Remote I/O error */ +#define ENOMEDIUM 182 /* No medium found */ +#define EMEDIUMTYPE 183 /* Wrong medium type */ +#define ENOKEY 184 /* Required key not available */ +#define EKEYEXPIRED 185 /* Key has expired */ +#define EKEYREVOKED 186 /* Key has been revoked */ +#define EKEYREJECTED 187 /* Key was rejected by service */ + +/* We now return you to your regularly scheduled HPUX. */ + +#define ENOSYM 215 /* symbol does not exist in executable */ +#define ENOTSOCK 216 /* Socket operation on non-socket */ +#define EDESTADDRREQ 217 /* Destination address required */ +#define EMSGSIZE 218 /* Message too long */ +#define EPROTOTYPE 219 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 220 /* Protocol not available */ +#define EPROTONOSUPPORT 221 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 222 /* Socket type not supported */ +#define EOPNOTSUPP 223 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 224 /* Protocol family not supported */ +#define EAFNOSUPPORT 225 /* Address family not supported by protocol */ +#define EADDRINUSE 226 /* Address already in use */ +#define EADDRNOTAVAIL 227 /* Cannot assign requested address */ +#define ENETDOWN 228 /* Network is down */ +#define ENETUNREACH 229 /* Network is unreachable */ +#define ENETRESET 230 /* Network dropped connection because of reset */ +#define ECONNABORTED 231 /* Software caused connection abort */ +#define ECONNRESET 232 /* Connection reset by peer */ +#define ENOBUFS 233 /* No buffer space available */ +#define EISCONN 234 /* Transport endpoint is already connected */ +#define ENOTCONN 235 /* Transport endpoint is not connected */ +#define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 237 /* Too many references: cannot splice */ +#define ETIMEDOUT 238 /* Connection timed out */ +#define ECONNREFUSED 239 /* Connection refused */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EHOSTDOWN 241 /* Host is down */ +#define EHOSTUNREACH 242 /* No route to host */ + +#define EALREADY 244 /* Operation already in progress */ +#define EINPROGRESS 245 /* Operation now in progress */ +#define EWOULDBLOCK EAGAIN /* Operation would block (Not HPUX compliant) */ +#define ENOTEMPTY 247 /* Directory not empty */ +#define ENAMETOOLONG 248 /* File name too long */ +#define ELOOP 249 /* Too many symbolic links encountered */ +#define ENOSYS 251 /* Function not implemented */ + +#define ENOTSUP 252 /* Function not implemented (POSIX.4 / HPUX) */ +#define ECANCELLED 253 /* aio request was canceled before complete (POSIX.4 / HPUX) */ +#define ECANCELED ECANCELLED /* SuSv3 and Solaris wants one 'L' */ + +/* for robust mutexes */ +#define EOWNERDEAD 254 /* Owner died */ +#define ENOTRECOVERABLE 255 /* State not recoverable */ + +#define ERFKILL 256 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 257 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..cc79856896a1 --- /dev/null +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_POWERPC_ERRNO_H +#define _ASM_POWERPC_ERRNO_H + +#include + +#undef EDEADLOCK +#define EDEADLOCK 58 /* File locking deadlock error */ + +#endif /* _ASM_POWERPC_ERRNO_H */ diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..81a732b902ee --- /dev/null +++ b/tools/arch/sparc/include/uapi/asm/errno.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _SPARC_ERRNO_H +#define _SPARC_ERRNO_H + +/* These match the SunOS error numbering scheme. */ + +#include + +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Op not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Net dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* No send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ +#define EPROCLIM 67 /* SUNOS: Too many processes */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ +#define ENOSTR 72 /* Device not a stream */ +#define ETIME 73 /* Timer expired */ +#define ENOSR 74 /* Out of streams resources */ +#define ENOMSG 75 /* No message of desired type */ +#define EBADMSG 76 /* Not a data message */ +#define EIDRM 77 /* Identifier removed */ +#define EDEADLK 78 /* Resource deadlock would occur */ +#define ENOLCK 79 /* No record locks available */ +#define ENONET 80 /* Machine is not on the network */ +#define ERREMOTE 81 /* SunOS: Too many lvls of remote in path */ +#define ENOLINK 82 /* Link has been severed */ +#define EADV 83 /* Advertise error */ +#define ESRMNT 84 /* Srmount error */ +#define ECOMM 85 /* Communication error on send */ +#define EPROTO 86 /* Protocol error */ +#define EMULTIHOP 87 /* Multihop attempted */ +#define EDOTDOT 88 /* RFS specific error */ +#define EREMCHG 89 /* Remote address changed */ +#define ENOSYS 90 /* Function not implemented */ + +/* The rest have no SunOS equivalent. */ +#define ESTRPIPE 91 /* Streams pipe error */ +#define EOVERFLOW 92 /* Value too large for defined data type */ +#define EBADFD 93 /* File descriptor in bad state */ +#define ECHRNG 94 /* Channel number out of range */ +#define EL2NSYNC 95 /* Level 2 not synchronized */ +#define EL3HLT 96 /* Level 3 halted */ +#define EL3RST 97 /* Level 3 reset */ +#define ELNRNG 98 /* Link number out of range */ +#define EUNATCH 99 /* Protocol driver not attached */ +#define ENOCSI 100 /* No CSI structure available */ +#define EL2HLT 101 /* Level 2 halted */ +#define EBADE 102 /* Invalid exchange */ +#define EBADR 103 /* Invalid request descriptor */ +#define EXFULL 104 /* Exchange full */ +#define ENOANO 105 /* No anode */ +#define EBADRQC 106 /* Invalid request code */ +#define EBADSLT 107 /* Invalid slot */ +#define EDEADLOCK 108 /* File locking deadlock error */ +#define EBFONT 109 /* Bad font file format */ +#define ELIBEXEC 110 /* Cannot exec a shared library directly */ +#define ENODATA 111 /* No data available */ +#define ELIBBAD 112 /* Accessing a corrupted shared library */ +#define ENOPKG 113 /* Package not installed */ +#define ELIBACC 114 /* Can not access a needed shared library */ +#define ENOTUNIQ 115 /* Name not unique on network */ +#define ERESTART 116 /* Interrupted syscall should be restarted */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EILSEQ 122 /* Illegal byte sequence */ +#define ELIBMAX 123 /* Atmpt to link in too many shared libs */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ + +#define ENOMEDIUM 125 /* No medium found */ +#define EMEDIUMTYPE 126 /* Wrong medium type */ +#define ECANCELED 127 /* Operation Cancelled */ +#define ENOKEY 128 /* Required key not available */ +#define EKEYEXPIRED 129 /* Key has expired */ +#define EKEYREVOKED 130 /* Key has been revoked */ +#define EKEYREJECTED 131 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 132 /* Owner died */ +#define ENOTRECOVERABLE 133 /* State not recoverable */ + +#define ERFKILL 134 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 135 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/x86/include/uapi/asm/errno.h b/tools/arch/x86/include/uapi/asm/errno.h new file mode 100644 index 000000000000..4c82b503d92f --- /dev/null +++ b/tools/arch/x86/include/uapi/asm/errno.h @@ -0,0 +1 @@ +#include diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f81ca508700c..83a65d305298 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -36,6 +36,13 @@ arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h +arch/alpha/include/uapi/asm/errno.h +arch/mips/include/asm/errno.h +arch/mips/include/uapi/asm/errno.h +arch/parisc/include/uapi/asm/errno.h +arch/powerpc/include/uapi/asm/errno.h +arch/sparc/include/uapi/asm/errno.h +arch/x86/include/uapi/asm/errno.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/__fls.h -- cgit v1.2.3 From 28b8f954003e59dbf0b56be6df0d81f83e64f36b Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:14 +0100 Subject: tools include asm-generic: Grab errno.h and errno-base.h This is a pre-req to generate an architecture specific mapping of errno numbers to their names. This errno mapping can be used by perf trace to support cross-architecture trace reports and to get rid of the audit-libs dependency. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-3-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-q13ystrw4sjz4wyvd3654cnm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/errno-base.h | 40 +++++++++ tools/include/uapi/asm-generic/errno.h | 123 ++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 2 + 3 files changed, 165 insertions(+) create mode 100644 tools/include/uapi/asm-generic/errno-base.h create mode 100644 tools/include/uapi/asm-generic/errno.h diff --git a/tools/include/uapi/asm-generic/errno-base.h b/tools/include/uapi/asm-generic/errno-base.h new file mode 100644 index 000000000000..9653140bff92 --- /dev/null +++ b/tools/include/uapi/asm-generic/errno-base.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_GENERIC_ERRNO_BASE_H +#define _ASM_GENERIC_ERRNO_BASE_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#endif diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h new file mode 100644 index 000000000000..cf9c51ac49f9 --- /dev/null +++ b/tools/include/uapi/asm-generic/errno.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_GENERIC_ERRNO_H +#define _ASM_GENERIC_ERRNO_H + +#include + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ + +/* + * This error code is special: arch syscall entry code will return + * -ENOSYS if users try to call a syscall that doesn't exist. To keep + * failures of syscalls that really do exist distinguishable from + * failures due to attempts to use a nonexistent syscall, syscall + * implementations should refrain from returning -ENOSYS. + */ +#define ENOSYS 38 /* Invalid system call number */ + +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + +#define ERFKILL 132 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 133 /* Memory page has hardware error */ + +#endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 83a65d305298..51abdb0a4047 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -49,6 +49,8 @@ include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/asm-generic/bitops/fls64.h include/linux/coresight-pmu.h +include/uapi/asm-generic/errno.h +include/uapi/asm-generic/errno-base.h include/uapi/asm-generic/ioctls.h include/uapi/asm-generic/mman-common.h ' -- cgit v1.2.3 From 0337cf74ccf2a43437bff2e23b278e4f2dc4c6e2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:15 +0100 Subject: perf util: Introduce architecture specific errno/name mapping Introduce a script that generates a mapping of errno numbers to their names for each architecture that is supported by perf (i.e. has a subdirectory in tools/perf/arch/). The errno mapping is generated as part of the trace beautifiers and can be used by including the trace/beauty/arch_errno_names.c file. Then, use arch_syscalls__strerrno() to look up an errno value to obtain the errno name (e.g. ENOENT) for a particular architecture. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Suggested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-4-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-8zlsjnuoep2ww39aq5z41fno@git.kernel.org [ Make x86 be the first arch, most common, add newline to last line, fixing build on centos:5 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 13 +++- tools/perf/trace/beauty/arch_errno_names.c | 1 + tools/perf/trace/beauty/arch_errno_names.sh | 100 ++++++++++++++++++++++++++++ tools/perf/trace/beauty/beauty.h | 2 + 4 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 tools/perf/trace/beauty/arch_errno_names.c create mode 100755 tools/perf/trace/beauty/arch_errno_names.sh diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9fdefd748e2e..9a9b528a88bb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -462,6 +462,13 @@ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh $(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl) $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@ +arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c +arch_errno_hdr_dir := $(srctree)/tools +arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh + +$(arch_errno_name_array): $(arch_errno_tbl) + $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -565,7 +572,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(perf_ioctl_array) \ - $(prctl_option_array) + $(prctl_option_array) \ + $(arch_errno_name_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -847,7 +855,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(kcmp_type_array) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ - $(OUTPUT)$(prctl_option_array) + $(OUTPUT)$(prctl_option_array) \ + $(OUTPUT)$(arch_errno_name_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean # diff --git a/tools/perf/trace/beauty/arch_errno_names.c b/tools/perf/trace/beauty/arch_errno_names.c new file mode 100644 index 000000000000..ede031c3a9e0 --- /dev/null +++ b/tools/perf/trace/beauty/arch_errno_names.c @@ -0,0 +1 @@ +#include "trace/beauty/generated/arch_errno_name_array.c" diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh new file mode 100755 index 000000000000..22c9fc900c84 --- /dev/null +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -0,0 +1,100 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate C file mapping errno codes to errno names. +# +# Copyright IBM Corp. 2018 +# Author(s): Hendrik Brueckner + +gcc="$1" +toolsdir="$2" +include_path="-I$toolsdir/include/uapi" + +arch_string() +{ + echo "$1" |sed -e 'y/- /__/' |tr '[[:upper:]]' '[[:lower:]]' +} + +asm_errno_file() +{ + local arch="$1" + local header + + header="$toolsdir/arch/$arch/include/uapi/asm/errno.h" + if test -r "$header"; then + echo "$header" + else + echo "$toolsdir/include/uapi/asm-generic/errno.h" + fi +} + +create_errno_lookup_func() +{ + local arch=$(arch_string "$1") + local nr name + + cat < + +EoHEADER + +# Create list of architectures and ignore those that do not appear +# in tools/perf/arch +archlist="" +for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do + test -d arch/$arch && archlist="$archlist $arch" +done + +for arch in x86 $archlist generic; do + process_arch "$arch" +done +create_arch_errno_table_func "x86 $archlist" "generic" diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index a6dfd04beaee..d8f6b2ec7fc5 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -114,4 +114,6 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); +const char *arch_syscalls__strerrno(const char *arch, int err); + #endif /* _PERF_TRACE_BEAUTY_H */ -- cgit v1.2.3 From 092bd3cd7169085b09e4a5307de95e461d0581d7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:16 +0100 Subject: perf trace: Obtain errno strings by using arch_syscalls__strerrno() Replace the errno_to_name() from the audit-libs with the newly introduced arch_syscalls__strerrno() function. With this change: 1. With replacing errno_to_name() from audit-libs, perf trace does no longer require audit-lib interfaces. 2. In addition to 1, the audit-libs dependency can be removed for architectures that support syscall tables in perf. This is achieved in a follow-up commit. 3. With the architecture specific errno number/name mapping, perf trace reports can work across architectures. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-5-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-xjvoqzhwmu4wn4kl9ng11rvs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 531d43bf57e1..7dece5e0cdbb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -21,6 +21,7 @@ #include "builtin.h" #include "util/color.h" #include "util/debug.h" +#include "util/env.h" #include "util/event.h" #include "util/evlist.h" #include @@ -45,14 +46,12 @@ #include #include -#include /* FIXME: Still needed for audit_errno_to_name */ #include #include #include #include #include #include -#include #include #include #include @@ -545,6 +544,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, { .scnprintf = SCA_STRARRAY, \ .parm = &strarray__##array, } +#include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" #include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" @@ -1660,6 +1660,14 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); } +static const char *errno_to_name(struct perf_evsel *evsel, int err) +{ + struct perf_env *env = perf_evsel__env(evsel); + const char *arch_name = perf_env__arch(env); + + return arch_syscalls__strerrno(arch_name, err); +} + static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1730,7 +1738,7 @@ signed_print: errno_print: { char bf[STRERR_BUFSIZE]; const char *emsg = str_error_r(-ret, bf, sizeof(bf)), - *e = audit_errno_to_name(-ret); + *e = errno_to_name(evsel, -ret); fprintf(trace->output, ") = -1 %s %s", e, emsg); } -- cgit v1.2.3 From b3fa38963a6a95bef888350ff3125182462c523c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:17 +0100 Subject: perf trace: Remove audit-libs dependency if syscall tables are present Change the Makefile and build process to no longer require audit-libs interfaces when the architecture provides system call tables. Committer notes: Its not enough to hook into the NO_LIBAUDIT makefile block, we need to define a CONFIG_TRACE that gets selected by both architectures generating the syscall tables from the kernel headers and from detecting the availability of libaudit. With that in place we will not link against libaudit even if the necessary files are available for that, in fact we will not even try to detect its availability, speeding up a bit the feature detection phase. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-6-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-j68lub6ipm8apvy52vd3l4cm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 4 ++-- tools/perf/Makefile.config | 20 ++++++++++++-------- tools/perf/builtin-help.c | 2 +- tools/perf/perf.c | 4 ++-- tools/perf/util/Build | 2 +- tools/perf/util/generate-cmdlist.sh | 2 +- 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tools/perf/Build b/tools/perf/Build index b48ca40fccf9..e5232d567611 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -25,7 +25,7 @@ perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o -perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_TRACE) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ @@ -50,6 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ -libperf-$(CONFIG_AUDIT) += trace/beauty/ +libperf-$(CONFIG_TRACE) += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 92265b32dddd..a042ccca4e93 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -531,14 +531,18 @@ ifndef NO_LIBUNWIND EXTLIBS += $(EXTLIBS_LIBUNWIND) endif -ifndef NO_LIBAUDIT - ifneq ($(feature-libaudit), 1) - msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); - NO_LIBAUDIT := 1 - else - CFLAGS += -DHAVE_LIBAUDIT_SUPPORT - EXTLIBS += -laudit - $(call detected,CONFIG_AUDIT) +ifeq ($(NO_SYSCALL_TABLE),0) + $(call detected,CONFIG_TRACE) +else + ifndef NO_LIBAUDIT + ifneq ($(feature-libaudit), 1) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + NO_LIBAUDIT := 1 + else + CFLAGS += -DHAVE_LIBAUDIT_SUPPORT + EXTLIBS += -laudit + $(call detected,CONFIG_TRACE) + endif endif endif diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index a0f7ed2b869b..4aca13f23b9d 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) "trace", #endif NULL }; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 62b13518bc6e..1b3fc8ec0fa2 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -485,7 +485,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7c6a8b461e24..4eef0c243306 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -44,7 +44,7 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o -libperf-$(CONFIG_AUDIT) += syscalltbl.o +libperf-$(CONFIG_TRACE) += syscalltbl.o libperf-y += ordered-events.o libperf-y += namespaces.o libperf-y += comm.o diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 9bbcec4e3365..ff17920a5ebc 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#ifdef HAVE_LIBAUDIT_SUPPORT" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd -- cgit v1.2.3 From 20d59023c5ec4426284af492808bcea1f39787ef Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 23 Jan 2018 09:10:19 -0700 Subject: block: Set BIO_TRACE_COMPLETION on new bio during split We inadvertently set it again on the source bio, but we need to set it on the new split bio instead. Fixes: fbbaf700e7b1 ("block: trace completion of all bios.") Signed-off-by: Goldwyn Rodrigues Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index fe1efbeaf4aa..77993fb4bac6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1808,7 +1808,7 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_advance(bio, split->bi_iter.bi_size); if (bio_flagged(bio, BIO_TRACE_COMPLETION)) - bio_set_flag(bio, BIO_TRACE_COMPLETION); + bio_set_flag(split, BIO_TRACE_COMPLETION); return split; } -- cgit v1.2.3 From e2ac83d74a4d753cea88407e65136c84a0cb60b2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 22 Jan 2018 22:07:46 -0600 Subject: x86/ftrace: Fix ORC unwinding from ftrace handlers Steven Rostedt discovered that the ftrace stack tracer is broken when it's used with the ORC unwinder. The problem is that objtool is instructed by the Makefile to ignore the ftrace_64.S code, so it doesn't generate any ORC data for it. Fix it by making the asm code objtool-friendly: - Objtool doesn't like the fact that save_mcount_regs pushes RBP at the beginning, but it's never restored (directly, at least). So just skip the original RBP push, which is only needed for frame pointers anyway. - Annotate some functions as normal callable functions with ENTRY/ENDPROC. - Add an empty unwind hint to return_to_handler(). The return address isn't on the stack, so there's nothing ORC can do there. It will just punt in the unlikely case it tries to unwind from that code. With all that fixed, remove the OBJECT_FILES_NON_STANDARD Makefile annotation so objtool can read the file. Link: http://lkml.kernel.org/r/20180123040746.ih4ep3tk4pbjvg7c@treble Reported-by: Steven Rostedt Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/Makefile | 5 ++++- arch/x86/kernel/ftrace_64.S | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 81bb565f4497..7e2baf7304ae 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,10 +29,13 @@ KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y +ifdef CONFIG_FRAME_POINTER +OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y +endif + # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, # boot, dumpstack/stacktrace, etc are either non-interesting or can lead to diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7cb8ba08beb9..ef61f540cf0a 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -8,6 +8,7 @@ #include #include #include +#include .code64 .section .entry.text, "ax" @@ -20,7 +21,6 @@ EXPORT_SYMBOL(__fentry__) EXPORT_SYMBOL(mcount) #endif -/* All cases save the original rbp (8 bytes) */ #ifdef CONFIG_FRAME_POINTER # ifdef CC_USING_FENTRY /* Save parent and function stack frames (rip and rbp) */ @@ -31,7 +31,7 @@ EXPORT_SYMBOL(mcount) # endif #else /* No need to save a stack frame */ -# define MCOUNT_FRAME_SIZE 8 +# define MCOUNT_FRAME_SIZE 0 #endif /* CONFIG_FRAME_POINTER */ /* Size of stack used to save mcount regs in save_mcount_regs */ @@ -64,10 +64,10 @@ EXPORT_SYMBOL(mcount) */ .macro save_mcount_regs added=0 - /* Always save the original rbp */ +#ifdef CONFIG_FRAME_POINTER + /* Save the original rbp */ pushq %rbp -#ifdef CONFIG_FRAME_POINTER /* * Stack traces will stop at the ftrace trampoline if the frame pointer * is not set up properly. If fentry is used, we need to save a frame @@ -105,7 +105,11 @@ EXPORT_SYMBOL(mcount) * Save the original RBP. Even though the mcount ABI does not * require this, it helps out callers. */ +#ifdef CONFIG_FRAME_POINTER movq MCOUNT_REG_SIZE-8(%rsp), %rdx +#else + movq %rbp, %rdx +#endif movq %rdx, RBP(%rsp) /* Copy the parent address into %rsi (second parameter) */ @@ -148,7 +152,7 @@ EXPORT_SYMBOL(mcount) ENTRY(function_hook) retq -END(function_hook) +ENDPROC(function_hook) ENTRY(ftrace_caller) /* save_mcount_regs fills in first two parameters */ @@ -184,7 +188,7 @@ GLOBAL(ftrace_graph_call) /* This is weak to keep gas from relaxing the jumps */ WEAK(ftrace_stub) retq -END(ftrace_caller) +ENDPROC(ftrace_caller) ENTRY(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ @@ -255,7 +259,7 @@ GLOBAL(ftrace_regs_caller_end) jmp ftrace_epilogue -END(ftrace_regs_caller) +ENDPROC(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -313,9 +317,10 @@ ENTRY(ftrace_graph_caller) restore_mcount_regs retq -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) -GLOBAL(return_to_handler) +ENTRY(return_to_handler) + UNWIND_HINT_EMPTY subq $24, %rsp /* Save the return values */ @@ -330,4 +335,5 @@ GLOBAL(return_to_handler) movq (%rsp), %rax addq $24, %rsp JMP_NOSPEC %rdi +END(return_to_handler) #endif -- cgit v1.2.3 From 62314e405fa101dbb82563394f9dfc225e3f1167 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 23 Jan 2018 09:16:19 -0700 Subject: nvme-pci: Fix queue double allocations The queue count says the highest queue that's been allocated, so don't reallocate a queue lower than that. Fixes: 147b27e4bd0 ("nvme-pci: allocate device queues storage space at probe") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a2ffb557b616..c46c239cc1ff 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1384,6 +1384,9 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, { struct nvme_queue *nvmeq = &dev->queues[qid]; + if (dev->ctrl.queue_count > qid) + return 0; + nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), &nvmeq->cq_dma_addr, GFP_KERNEL); if (!nvmeq->cqes) -- cgit v1.2.3 From 6be7fa3c74d1e0cd50f2157b5c1524f152bf641e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 22 Jan 2018 22:32:51 -0500 Subject: ftrace, orc, x86: Handle ftrace dynamically allocated trampolines The function tracer can create a dynamically allocated trampoline that is called by the function mcount or fentry hook that is used to call the function callback that is registered. The problem is that the orc undwinder will bail if it encounters one of these trampolines. This breaks the stack trace of function callbacks, which include the stack tracer and setting the stack trace for individual functions. Since these dynamic trampolines are basically copies of the static ftrace trampolines defined in ftrace_*.S, we do not need to create new orc entries for the dynamic trampolines. Finding the return address on the stack will be identical as the functions that were copied to create the dynamic trampolines. When encountering a ftrace dynamic trampoline, we can just use the orc entry of the ftrace static function that was copied for that trampoline. Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/unwind_orc.c | 48 +++++++++++++++++++++++++++++++++++++++++++- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 29 +++++++++++++++----------- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index be86a865087a..1f9188f5357c 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -74,8 +74,50 @@ static struct orc_entry *orc_module_find(unsigned long ip) } #endif +#ifdef CONFIG_DYNAMIC_FTRACE +static struct orc_entry *orc_find(unsigned long ip); + +/* + * Ftrace dynamic trampolines do not have orc entries of their own. + * But they are copies of the ftrace entries that are static and + * defined in ftrace_*.S, which do have orc entries. + * + * If the undwinder comes across a ftrace trampoline, then find the + * ftrace function that was used to create it, and use that ftrace + * function's orc entrie, as the placement of the return code in + * the stack will be identical. + */ +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + struct ftrace_ops *ops; + unsigned long caller; + + ops = ftrace_ops_trampoline(ip); + if (!ops) + return NULL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + caller = (unsigned long)ftrace_regs_call; + else + caller = (unsigned long)ftrace_call; + + /* Prevent unlikely recursion */ + if (ip == caller) + return NULL; + + return orc_find(caller); +} +#else +static struct orc_entry *orc_ftrace_find(unsigned long ip) +{ + return NULL; +} +#endif + static struct orc_entry *orc_find(unsigned long ip) { + static struct orc_entry *orc; + if (!orc_init) return NULL; @@ -111,7 +153,11 @@ static struct orc_entry *orc_find(unsigned long ip) __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); /* Module lookup: */ - return orc_module_find(ip); + orc = orc_module_find(ip); + if (orc) + return orc; + + return orc_ftrace_find(ip); } static void orc_sort_swap(void *_a, void *_b, int size) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2bab81951ced..3319df9727aa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -332,6 +332,8 @@ extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); +struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr); + bool is_ftrace_trampoline(unsigned long addr); /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ccdf3664e4a9..554b517c61a0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1119,15 +1119,11 @@ static struct ftrace_ops global_ops = { }; /* - * This is used by __kernel_text_address() to return true if the - * address is on a dynamically allocated trampoline that would - * not return true for either core_kernel_text() or - * is_module_text_address(). + * Used by the stack undwinder to know about dynamic ftrace trampolines. */ -bool is_ftrace_trampoline(unsigned long addr) +struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) { - struct ftrace_ops *op; - bool ret = false; + struct ftrace_ops *op = NULL; /* * Some of the ops may be dynamically allocated, @@ -1144,15 +1140,24 @@ bool is_ftrace_trampoline(unsigned long addr) if (op->trampoline && op->trampoline_size) if (addr >= op->trampoline && addr < op->trampoline + op->trampoline_size) { - ret = true; - goto out; + preempt_enable_notrace(); + return op; } } while_for_each_ftrace_op(op); - - out: preempt_enable_notrace(); - return ret; + return NULL; +} + +/* + * This is used by __kernel_text_address() to return true if the + * address is on a dynamically allocated trampoline that would + * not return true for either core_kernel_text() or + * is_module_text_address(). + */ +bool is_ftrace_trampoline(unsigned long addr) +{ + return ftrace_ops_trampoline(addr) != NULL; } struct ftrace_page { -- cgit v1.2.3 From 2ee5b92a2598d9e403337185fdf88f661dee8616 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 23 Jan 2018 13:25:04 -0500 Subject: tracing: Update stack trace skipping for ORC unwinder With the addition of ORC unwinder and FRAME POINTER unwinder, the stack trace skipping requirements have changed. I went through the tracing stack trace dumps with ORC and with frame pointers and recalculated the proper values. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 34 ++++++++++++++----------- kernel/trace/trace_events_trigger.c | 13 ++++++++-- kernel/trace/trace_functions.c | 49 +++++++++++++++++++++++++++---------- 3 files changed, 67 insertions(+), 29 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a8d8a294345..8e3f20a18a06 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2374,6 +2374,15 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); +/* + * Skip 3: + * + * trace_buffer_unlock_commit_regs() + * trace_event_buffer_commit() + * trace_event_raw_event_xxx() +*/ +# define STACK_SKIP 3 + void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct ring_buffer *buffer, struct ring_buffer_event *event, @@ -2383,16 +2392,12 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, __buffer_unlock_commit(buffer, event); /* - * If regs is not set, then skip the following callers: - * trace_buffer_unlock_commit_regs - * event_trigger_unlock_commit - * trace_event_buffer_commit - * trace_event_raw_event_sched_switch + * If regs is not set, then skip the necessary functions. * Note, we can still get here via blktrace, wakeup tracer * and mmiotrace, but that's ok if they lose a function or - * two. They are that meaningful. + * two. They are not that meaningful. */ - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs); + ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } @@ -2579,11 +2584,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, trace.skip = skip; /* - * Add two, for this function and the call to save_stack_trace() + * Add one, for this function and the call to save_stack_trace() * If regs is set, then these functions will not be in the way. */ +#ifndef CONFIG_UNWINDER_ORC if (!regs) - trace.skip += 2; + trace.skip++; +#endif /* * Since events can happen in NMIs there's no safe way to @@ -2711,11 +2718,10 @@ void trace_dump_stack(int skip) local_save_flags(flags); - /* - * Skip 3 more, seems to get us at the caller of - * this function. - */ - skip += 3; +#ifndef CONFIG_UNWINDER_ORC + /* Skip 1 to skip this function. */ + skip++; +#endif __ftrace_trace_stack(global_trace.trace_buffer.buffer, flags, skip, preempt_count(), NULL); } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f2ac9d44f6c4..87411482a46f 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1123,13 +1123,22 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } #endif /* CONFIG_TRACER_SNAPSHOT */ #ifdef CONFIG_STACKTRACE +#ifdef CONFIG_UNWINDER_ORC +/* Skip 2: + * event_triggers_post_call() + * trace_event_raw_event_xxx() + */ +# define STACK_SKIP 2 +#else /* - * Skip 3: + * Skip 4: * stacktrace_trigger() * event_triggers_post_call() + * trace_event_buffer_commit() * trace_event_raw_event_xxx() */ -#define STACK_SKIP 3 +#define STACK_SKIP 4 +#endif static void stacktrace_trigger(struct event_trigger_data *data, void *rec) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 27f7ad12c4b1..b611cd36e22d 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -154,6 +154,24 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, preempt_enable_notrace(); } +#ifdef CONFIG_UNWINDER_ORC +/* + * Skip 2: + * + * function_stack_trace_call() + * ftrace_call() + */ +#define STACK_SKIP 2 +#else +/* + * Skip 3: + * __trace_stack() + * function_stack_trace_call() + * ftrace_call() + */ +#define STACK_SKIP 3 +#endif + static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) @@ -180,15 +198,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip, if (likely(disabled == 1)) { pc = preempt_count(); trace_function(tr, ip, parent_ip, flags, pc); - /* - * skip over 5 funcs: - * __ftrace_trace_stack, - * __trace_stack, - * function_stack_trace_call - * ftrace_list_func - * ftrace_call - */ - __trace_stack(tr, flags, 5, pc); + __trace_stack(tr, flags, STACK_SKIP, pc); } atomic_dec(&data->disabled); @@ -367,14 +377,27 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, tracer_tracing_off(tr); } +#ifdef CONFIG_UNWINDER_ORC /* - * Skip 4: + * Skip 3: + * + * function_trace_probe_call() + * ftrace_ops_assist_func() + * ftrace_call() + */ +#define FTRACE_STACK_SKIP 3 +#else +/* + * Skip 5: + * + * __trace_stack() * ftrace_stacktrace() * function_trace_probe_call() - * ftrace_ops_list_func() + * ftrace_ops_assist_func() * ftrace_call() */ -#define STACK_SKIP 4 +#define FTRACE_STACK_SKIP 5 +#endif static __always_inline void trace_stack(struct trace_array *tr) { @@ -384,7 +407,7 @@ static __always_inline void trace_stack(struct trace_array *tr) local_save_flags(flags); pc = preempt_count(); - __trace_stack(tr, flags, STACK_SKIP, pc); + __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); } static void -- cgit v1.2.3 From 02612bb05e51df8489db5e94d0cf8d1c81f87b0c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 22 Jan 2018 18:06:37 +0100 Subject: pppoe: take ->needed_headroom of lower device into account on xmit In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom was probably fine before the introduction of ->needed_headroom in commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). But now, virtual devices typically advertise the size of their overhead in dev->needed_headroom, so we must also take it into account in skb_reserve(). Allocation size of skb is also updated to take dev->needed_tailroom into account and replace the arbitrary 32 bytes with the real size of a PPPoE header. This issue was discovered by syzbot, who connected a pppoe socket to a gre device which had dev->header_ops->create == ipgre_header and dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any headroom, and dev_hard_header() crashed when ipgre_header() tried to prepend its header to skb->data. skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted 4.15.0-rc7-next-20180115+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_under_panic net/core/skbuff.c:114 [inline] skb_push+0xce/0xf0 net/core/skbuff.c:1714 ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 dev_hard_header include/linux/netdevice.h:2723 [inline] pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg+0xca/0x110 net/socket.c:640 sock_write_iter+0x31a/0x5d0 net/socket.c:909 call_write_iter include/linux/fs.h:1775 [inline] do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 do_iter_write+0x154/0x540 fs/read_write.c:932 vfs_writev+0x18a/0x340 fs/read_write.c:977 do_writev+0xfc/0x2a0 fs/read_write.c:1012 SYSC_writev fs/read_write.c:1085 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1082 entry_SYSCALL_64_fastpath+0x29/0xa0 Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like interfaces, but reserving space for ->needed_headroom is a more fundamental issue that needs to be addressed first. Same problem exists for __pppoe_xmit(), which also needs to take dev->needed_headroom into account in skb_cow_head(). Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com Signed-off-by: Guillaume Nault Reviewed-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 4e1da1645b15..5aa59f41bf8c 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, struct pppoe_hdr *ph; struct net_device *dev; char *start; + int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, if (total_len > (dev->mtu + dev->hard_header_len)) goto end; - - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, - 0, GFP_KERNEL); + hlen = LL_RESERVED_SPACE(dev); + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ - skb_reserve(skb, dev->hard_header_len); + skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) /* Copy the data if there is no space for the header or if it's * read-only. */ - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); -- cgit v1.2.3 From e9191ffb65d8e159680ce0ad2224e1acbde6985c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 22 Jan 2018 20:06:42 +0000 Subject: ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL Commit 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl setting") removed the initialisation of ipv6_pinfo::autoflowlabel and added a second flag to indicate whether this field or the net namespace default should be used. The getsockopt() handling for this case was not updated, so it currently returns 0 for all sockets for which IPV6_AUTOFLOWLABEL is not explicitly enabled. Fix it to return the effective value, whether that has been set at the socket or net namespace level. Fixes: 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl ...") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f73797e2fa60..221238254eb7 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -331,6 +331,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, int flags); int ip6_flowlabel_init(void); void ip6_flowlabel_cleanup(void); +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); static inline void fl6_sock_release(struct ip6_flowlabel *fl) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4f7d8de56611..3763dc01e374 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,7 +166,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { if (!np->autoflowlabel_set) return ip6_default_np_autolabel(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2d4680e0376f..e8ffb5b5d84e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; case IPV6_RECVFRAGSIZE: -- cgit v1.2.3 From 848b159835ddef99cc4193083f7e786c3992f580 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Jan 2018 16:06:37 -0500 Subject: vmxnet3: repair memory leak with the introduction of commit b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info is improperly handled. While it is heap allocated when an rx queue is setup, and freed when torn down, an old line of code in vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] being set to NULL prior to its being freed, causing a memory leak, which eventually exhausts the system on repeated create/destroy operations (for example, when the mtu of a vmxnet3 interface is changed frequently. Fix is pretty straight forward, just move the NULL set to after the free. Tested by myself with successful results Applies to net, and should likely be queued for stable, please Signed-off-by: Neil Horman Reported-By: boyang@redhat.com CC: boyang@redhat.com CC: Shrikrishna Khare CC: "VMware, Inc." CC: David S. Miller Acked-by: Shrikrishna Khare Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d1c7029ded7c..cf95290b160c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->rx_ring[i].basePA); rq->rx_ring[i].base = NULL; } - rq->buf_info[i] = NULL; } if (rq->data_ring.base) { @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, (rq->rx_ring[0].size + rq->rx_ring[1].size); dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], rq->buf_info_pa); + rq->buf_info[0] = rq->buf_info[1] = NULL; } } -- cgit v1.2.3 From a97cb0e7b3f4c6297fd857055ae8e895f402f501 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 11:39:47 +0100 Subject: futex: Fix OWNER_DEAD fixup Both Geert and DaveJ reported that the recent futex commit: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") introduced a problem with setting OWNER_DEAD. We set the bit on an uninitialized variable and then entirely optimize it away as a dead-store. Move the setting of the bit to where it is more useful. Reported-by: Geert Uytterhoeven Reported-by: Dave Jones Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") Link: http://lkml.kernel.org/r/20180122103947.GD2228@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/futex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 8c5424dd5924..7f719d110908 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2311,9 +2311,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); oldowner = pi_state->owner; - /* Owner died? */ - if (!pi_state->owner) - newtid |= FUTEX_OWNER_DIED; /* * We are here because either: @@ -2374,6 +2371,9 @@ retry: } newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; + /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; if (get_futex_value_locked(&uval, uaddr)) goto handle_fault; -- cgit v1.2.3 From 88f1c87de11a86d839f4ce5313e552d96709b990 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Jan 2018 14:00:55 -0800 Subject: locking/lockdep: Avoid triggering hardlockup from debug_show_all_locks() debug_show_all_locks() iterates all tasks and print held locks whole holding tasklist_lock. This can take a while on a slow console device and may end up triggering NMI hardlockup detector if someone else ends up waiting for tasklist_lock. Touch the NMI watchdog while printing the held locks to avoid spuriously triggering the hardlockup detector. Signed-off-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20180122220055.GB1771050@devbig577.frc2.facebook.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 5fa1324a4f29..521659044719 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -4490,6 +4491,7 @@ retry: if (!unlock) if (read_trylock(&tasklist_lock)) unlock = 1; + touch_nmi_watchdog(); } while_each_thread(g, p); pr_warn("\n"); -- cgit v1.2.3 From ce48c146495a1a50e48cdbfbfaba3e708be7c07c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 22:53:28 +0100 Subject: sched/core: Fix cpu.max vs. cpuhotplug deadlock Tejun reported the following cpu-hotplug lock (percpu-rwsem) read recursion: tg_set_cfs_bandwidth() get_online_cpus() cpus_read_lock() cfs_bandwidth_usage_inc() static_key_slow_inc() cpus_read_lock() Reported-by: Tejun Heo Tested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180122215328.GP3397@worktop Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 7 +++++++ kernel/jump_label.c | 12 +++++++++--- kernel/sched/fair.c | 4 ++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index c7b368c734af..e0340ca08d98 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key) atomic_dec(&key->enabled); } +#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) +#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) + static inline int jump_label_text_reserved(void *start, void *end) { return 0; @@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void); #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) +#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) +#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 8594d24e4adc..b4517095db6a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -79,7 +79,7 @@ int static_key_count(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_count); -static void static_key_slow_inc_cpuslocked(struct static_key *key) +void static_key_slow_inc_cpuslocked(struct static_key *key) { int v, v1; @@ -180,7 +180,7 @@ void static_key_disable(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_disable); -static void static_key_slow_dec_cpuslocked(struct static_key *key, +static void __static_key_slow_dec_cpuslocked(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { @@ -211,7 +211,7 @@ static void __static_key_slow_dec(struct static_key *key, struct delayed_work *work) { cpus_read_lock(); - static_key_slow_dec_cpuslocked(key, rate_limit, work); + __static_key_slow_dec_cpuslocked(key, rate_limit, work); cpus_read_unlock(); } @@ -229,6 +229,12 @@ void static_key_slow_dec(struct static_key *key) } EXPORT_SYMBOL_GPL(static_key_slow_dec); +void static_key_slow_dec_cpuslocked(struct static_key *key) +{ + STATIC_KEY_CHECK_USE(key); + __static_key_slow_dec_cpuslocked(key, 0, NULL); +} + void static_key_slow_dec_deferred(struct static_key_deferred *key) { STATIC_KEY_CHECK_USE(key); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2fe3aa853e4d..26a71ebcd3c2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4365,12 +4365,12 @@ static inline bool cfs_bandwidth_used(void) void cfs_bandwidth_usage_inc(void) { - static_key_slow_inc(&__cfs_bandwidth_used); + static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used); } void cfs_bandwidth_usage_dec(void) { - static_key_slow_dec(&__cfs_bandwidth_used); + static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) -- cgit v1.2.3 From 1df37383a8aeabb9b418698f0bcdffea01f4b1b2 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 22 Jan 2018 17:09:34 -0500 Subject: x86/retpoline: Remove the esp/rsp thunk It doesn't make sense to have an indirect call thunk with esp/rsp as retpoline code won't work correctly with the stack pointer register. Removing it will help compiler writers to catch error in case such a thunk call is emitted incorrectly. Fixes: 76b043848fd2 ("x86/retpoline: Add initial retpoline support") Suggested-by: Jeff Law Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Acked-by: David Woodhouse Cc: Tom Lendacky Cc: Kees Cook Cc: Andi Kleen Cc: Tim Chen Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1516658974-27852-1-git-send-email-longman@redhat.com --- arch/x86/include/asm/asm-prototypes.h | 1 - arch/x86/lib/retpoline.S | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 0927cdc4f946..1908214b9125 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,5 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -INDIRECT_THUNK(sp) #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index dfb2ba91b670..c909961e678a 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -36,7 +36,6 @@ GENERATE_THUNK(_ASM_DX) GENERATE_THUNK(_ASM_SI) GENERATE_THUNK(_ASM_DI) GENERATE_THUNK(_ASM_BP) -GENERATE_THUNK(_ASM_SP) #ifdef CONFIG_64BIT GENERATE_THUNK(r8) GENERATE_THUNK(r9) -- cgit v1.2.3 From c5baa1be8f559d5f33c412d00cc1c86762a8bbbf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Jan 2018 14:26:47 +0000 Subject: irqdomain: Kill CONFIG_IRQ_DOMAIN_DEBUG CONFIG_IRQ_DOMAIN_DEBUG is similar to CONFIG_GENERIC_IRQ_DEBUGFS, just with less information. Spring cleanup time. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Yang Shunyong Link: https://lkml.kernel.org/r/20180117142647.23622-1-marc.zyngier@arm.com --- Documentation/IRQ-domain.txt | 36 +-------- arch/arm/configs/aspeed_g4_defconfig | 1 - arch/arm/configs/aspeed_g5_defconfig | 1 - arch/arm/configs/hisi_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/mvebu_v7_defconfig | 1 - arch/arm/configs/pxa_defconfig | 1 - arch/arm/configs/sama5_defconfig | 1 - arch/arm/configs/tegra_defconfig | 1 - arch/arm/configs/vt8500_v6_v7_defconfig | 1 - arch/powerpc/configs/fsl-emb-nonhw.config | 1 - arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/configs/pseries_defconfig | 1 - arch/xtensa/configs/audio_kc705_defconfig | 1 - arch/xtensa/configs/cadence_csp_defconfig | 1 - arch/xtensa/configs/generic_kc705_defconfig | 1 - arch/xtensa/configs/nommu_kc705_defconfig | 1 - arch/xtensa/configs/smp_lx200_defconfig | 1 - kernel/irq/Kconfig | 10 --- kernel/irq/irqdomain.c | 118 ---------------------------- 21 files changed, 2 insertions(+), 180 deletions(-) diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt index 4a1cd7645d85..507775cce753 100644 --- a/Documentation/IRQ-domain.txt +++ b/Documentation/IRQ-domain.txt @@ -265,37 +265,5 @@ support other architectures, such as ARM, ARM64 etc. === Debugging === -If you switch on CONFIG_IRQ_DOMAIN_DEBUG (which depends on -CONFIG_IRQ_DOMAIN and CONFIG_DEBUG_FS), you will find a new file in -your debugfs mount point, called irq_domain_mapping. This file -contains a live snapshot of all the IRQ domains in the system: - - name mapped linear-max direct-max devtree-node - pl061 8 8 0 /smb/gpio@e0080000 - pl061 8 8 0 /smb/gpio@e1050000 - pMSI 0 0 0 /interrupt-controller@e1101000/v2m@e0080000 - MSI 37 0 0 /interrupt-controller@e1101000/v2m@e0080000 - GICv2m 37 0 0 /interrupt-controller@e1101000/v2m@e0080000 - GICv2 448 448 0 /interrupt-controller@e1101000 - -it also iterates over the interrupts to display their mapping in the -domains, and makes the domain stacking visible: - - -irq hwirq chip name chip data active type domain - 1 0x00019 GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 2 0x0001d GICv2 0xffff00000916bfd8 LINEAR GICv2 - 3 0x0001e GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 4 0x0001b GICv2 0xffff00000916bfd8 * LINEAR GICv2 - 5 0x0001a GICv2 0xffff00000916bfd8 LINEAR GICv2 -[...] - 96 0x81808 MSI 0x (null) RADIX MSI - 96+ 0x00063 GICv2m 0xffff8003ee116980 RADIX GICv2m - 96+ 0x00063 GICv2 0xffff00000916bfd8 LINEAR GICv2 - 97 0x08800 MSI 0x (null) * RADIX MSI - 97+ 0x00064 GICv2m 0xffff8003ee116980 * RADIX GICv2m - 97+ 0x00064 GICv2 0xffff00000916bfd8 * LINEAR GICv2 - -Here, interrupts 1-5 are only using a single domain, while 96 and 97 -are build out of a stack of three domain, each level performing a -particular function. +Most of the internals of the IRQ subsystem are exposed in debugfs by +turning CONFIG_GENERIC_IRQ_DEBUGFS on. diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index d23b9d56a88b..95946dee9c77 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig @@ -1,7 +1,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index c0ad7b82086b..8c7ea033cdc2 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -1,7 +1,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig index b2e340b272ee..74d611e41e02 100644 --- a/arch/arm/configs/hisi_defconfig +++ b/arch/arm/configs/hisi_defconfig @@ -1,4 +1,3 @@ -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 61509c4b769f..b659244902cd 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_CGROUPS=y diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 69553704f2dc..ee61be093633 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -1,6 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index 830e817a028a..837d0c9c8b0e 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig index 6529cb43e0fd..2080025556b5 100644 --- a/arch/arm/configs/sama5_defconfig +++ b/arch/arm/configs/sama5_defconfig @@ -2,7 +2,6 @@ # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 6678f2929356..c819be04187e 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -1,5 +1,4 @@ CONFIG_SYSVIPC=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig index 1bfaa7bfc392..9b85326ba287 100644 --- a/arch/arm/configs/vt8500_v6_v7_defconfig +++ b/arch/arm/configs/vt8500_v6_v7_defconfig @@ -1,4 +1,3 @@ -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index cc49c95494da..e0567dc41968 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -71,7 +71,6 @@ CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_SCTP=m CONFIG_IPV6=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_ISO9660_FS=m CONFIG_JFFS2_FS_DEBUG=1 CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 4891bbed6258..73dab7a37386 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -4,7 +4,6 @@ CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6ddca80c52c3..5033e630afea 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -1,7 +1,6 @@ CONFIG_PPC64=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index bde2cd1005a2..0dd5cf7b566d 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -3,7 +3,6 @@ CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_TASKSTATS=y diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig index 8d16925765cb..2bf964df37ba 100644 --- a/arch/xtensa/configs/audio_kc705_defconfig +++ b/arch/xtensa/configs/audio_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig index f2d3094aa1d1..3221b7053fa3 100644 --- a/arch/xtensa/configs/cadence_csp_defconfig +++ b/arch/xtensa/configs/cadence_csp_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_USELIB=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig index 744adeaf2945..985fa8546e4e 100644 --- a/arch/xtensa/configs/generic_kc705_defconfig +++ b/arch/xtensa/configs/generic_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig index 78c2529d0459..624f9b3a3878 100644 --- a/arch/xtensa/configs/nommu_kc705_defconfig +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig index 14e3ca353ac8..11fed6c06a7c 100644 --- a/arch/xtensa/configs/smp_lx200_defconfig +++ b/arch/xtensa/configs/smp_lx200_defconfig @@ -1,7 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IRQ_TIME_ACCOUNTING=y diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 89e355866450..6fc87ccda1d7 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -103,16 +103,6 @@ config GENERIC_IRQ_MATRIX_ALLOCATOR config GENERIC_IRQ_RESERVATION_MODE bool -config IRQ_DOMAIN_DEBUG - bool "Expose hardware/virtual IRQ mapping via debugfs" - depends on IRQ_DOMAIN && DEBUG_FS - help - This option will show the mapping relationship between hardware irq - numbers and Linux irq numbers. The mapping is exposed via debugfs - in the file "irq_domain_mapping". - - If you don't know what this means you don't need it. - # Support forced irq threading config IRQ_FORCED_THREADING bool diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 62068ad46930..e6a9c36470ee 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -897,124 +897,6 @@ unsigned int irq_find_mapping(struct irq_domain *domain, } EXPORT_SYMBOL_GPL(irq_find_mapping); -#ifdef CONFIG_IRQ_DOMAIN_DEBUG -static void virq_debug_show_one(struct seq_file *m, struct irq_desc *desc) -{ - struct irq_domain *domain; - struct irq_data *data; - - domain = desc->irq_data.domain; - data = &desc->irq_data; - - while (domain) { - unsigned int irq = data->irq; - unsigned long hwirq = data->hwirq; - struct irq_chip *chip; - bool direct; - - if (data == &desc->irq_data) - seq_printf(m, "%5d ", irq); - else - seq_printf(m, "%5d+ ", irq); - seq_printf(m, "0x%05lx ", hwirq); - - chip = irq_data_get_irq_chip(data); - seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none"); - - seq_printf(m, "0x%p ", irq_data_get_irq_chip_data(data)); - - seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' '); - direct = (irq == hwirq) && (irq < domain->revmap_direct_max_irq); - seq_printf(m, "%6s%-8s ", - (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX", - direct ? "(DIRECT)" : ""); - seq_printf(m, "%s\n", domain->name); -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - domain = domain->parent; - data = data->parent_data; -#else - domain = NULL; -#endif - } -} - -static int virq_debug_show(struct seq_file *m, void *private) -{ - unsigned long flags; - struct irq_desc *desc; - struct irq_domain *domain; - struct radix_tree_iter iter; - void __rcu **slot; - int i; - - seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", - "name", "mapped", "linear-max", "direct-max", "devtree-node"); - mutex_lock(&irq_domain_mutex); - list_for_each_entry(domain, &irq_domain_list, link) { - struct device_node *of_node; - const char *name; - - int count = 0; - - of_node = irq_domain_get_of_node(domain); - if (of_node) - name = of_node_full_name(of_node); - else if (is_fwnode_irqchip(domain->fwnode)) - name = container_of(domain->fwnode, struct irqchip_fwid, - fwnode)->name; - else - name = ""; - - radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0) - count++; - seq_printf(m, "%c%-16s %6u %10u %10u %s\n", - domain == irq_default_domain ? '*' : ' ', domain->name, - domain->revmap_size + count, domain->revmap_size, - domain->revmap_direct_max_irq, - name); - } - mutex_unlock(&irq_domain_mutex); - - seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq", - "chip name", (int)(2 * sizeof(void *) + 2), "chip data", - "active", "type", "domain"); - - for (i = 1; i < nr_irqs; i++) { - desc = irq_to_desc(i); - if (!desc) - continue; - - raw_spin_lock_irqsave(&desc->lock, flags); - virq_debug_show_one(m, desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - } - - return 0; -} - -static int virq_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, virq_debug_show, inode->i_private); -} - -static const struct file_operations virq_debug_fops = { - .open = virq_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init irq_debugfs_init(void) -{ - if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL, - NULL, &virq_debug_fops) == NULL) - return -ENOMEM; - - return 0; -} -__initcall(irq_debugfs_init); -#endif /* CONFIG_IRQ_DOMAIN_DEBUG */ - /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings * -- cgit v1.2.3 From 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 Mon Sep 17 00:00:00 2001 From: Xiao Liang Date: Mon, 22 Jan 2018 14:12:52 +0800 Subject: perf/x86/amd/power: Do not load AMD power module on !AMD platforms The AMD power module can be loaded on non AMD platforms, but unload fails with the following Oops: BUG: unable to handle kernel NULL pointer dereference at (null) IP: __list_del_entry_valid+0x29/0x90 Call Trace: perf_pmu_unregister+0x25/0xf0 amd_power_pmu_exit+0x1c/0xd23 [power] SyS_delete_module+0x1a8/0x2b0 ? exit_to_usermode_loop+0x8f/0xb0 entry_SYSCALL_64_fastpath+0x20/0x83 Return -ENODEV instead of 0 from the module init function if the CPU does not match. Fixes: c7ab62bfbe0e ("perf/x86/amd/power: Add AMD accumulated power reporting mechanism") Signed-off-by: Xiao Liang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180122061252.6394-1-xiliang@redhat.com --- arch/x86/events/amd/power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index a6eee5ac4f58..2aefacf5c5b2 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void) int ret; if (!x86_match_cpu(cpu_match)) - return 0; + return -ENODEV; if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) return -ENODEV; -- cgit v1.2.3 From 7e702d17ed138cf4ae7c00e8c00681ed464587c7 Mon Sep 17 00:00:00 2001 From: Jia Zhang Date: Tue, 23 Jan 2018 11:41:32 +0100 Subject: x86/microcode/intel: Extend BDW late-loading further with LLC size check Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") reduced the impact of erratum BDF90 for Broadwell model 79. The impact can be reduced further by checking the size of the last level cache portion per core. Tony: "The erratum says the problem only occurs on the large-cache SKUs. So we only need to avoid the update if we are on a big cache SKU that is also running old microcode." For more details, see erratum BDF90 in document #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family Specification Update) from September 2017. Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") Signed-off-by: Jia Zhang Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com --- arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index d9e460fc7a3b..f7c55b0e753a 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; /* Current microcode patch used in early patching on the APs. */ static struct microcode_intel *intel_ucode_patch; +/* last level cache size per core */ +static int llc_size_per_core; + static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, unsigned int s2, unsigned int p2) { @@ -912,12 +915,14 @@ static bool is_blacklisted(unsigned int cpu) /* * Late loading on model 79 with microcode revision less than 0x0b000021 - * may result in a system hang. This behavior is documented in item - * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + * and LLC size per core bigger than 2.5MB may result in a system hang. + * This behavior is documented in item BDF90, #334165 (Intel Xeon + * Processor E7-8800/4800 v4 Product Family). */ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X && c->x86_mask == 0x01 && + llc_size_per_core > 2621440 && c->microcode < 0x0b000021) { pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); @@ -975,6 +980,15 @@ static struct microcode_ops microcode_intel_ops = { .apply_microcode = apply_microcode_intel, }; +static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) +{ + u64 llc_size = c->x86_cache_size * 1024; + + do_div(llc_size, c->x86_max_cores); + + return (int)llc_size; +} + struct microcode_ops * __init init_intel_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -985,5 +999,7 @@ struct microcode_ops * __init init_intel_microcode(void) return NULL; } + llc_size_per_core = calc_llc_size_per_core(c); + return µcode_intel_ops; } -- cgit v1.2.3 From 1d080f096fe33f031d26e19b3ef0146f66b8b0f1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 23 Jan 2018 11:41:33 +0100 Subject: x86/microcode: Fix again accessing initrd after having been freed Commit 24c2503255d3 ("x86/microcode: Do not access the initrd after it has been freed") fixed attempts to access initrd from the microcode loader after it has been freed. However, a similar KASAN warning was reported (stack trace edited): smpboot: Booting Node 0 Processor 1 APIC 0x11 ================================================================== BUG: KASAN: use-after-free in find_cpio_data+0x9b5/0xa50 Read of size 1 at addr ffff880035ffd000 by task swapper/1/0 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.14.8-slack #7 Hardware name: System manufacturer System Product Name/A88X-PLUS, BIOS 3003 03/10/2016 Call Trace: dump_stack print_address_description kasan_report ? find_cpio_data __asan_report_load1_noabort find_cpio_data find_microcode_in_initrd __load_ucode_amd load_ucode_amd_ap load_ucode_ap After some investigation, it turned out that a merge was done using the wrong side to resolve, leading to picking up the previous state, before the 24c2503255d3 fix. Therefore the Fixes tag below contains a merge commit. Revert the mismerge by catching the save_microcode_in_initrd_amd() retval and thus letting the function exit with the last return statement so that initrd_gone can be set to true. Fixes: f26483eaedec ("Merge branch 'x86/urgent' into x86/microcode, to resolve conflicts") Reported-by: Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=198295 Link: https://lkml.kernel.org/r/20180123104133.918-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c4fa4a85d4cb..e4fc595cd6ea 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -239,7 +239,7 @@ static int __init save_microcode_in_initrd(void) break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(cpuid_eax(1)); + ret = save_microcode_in_initrd_amd(cpuid_eax(1)); break; default: break; -- cgit v1.2.3 From fe6daab1ee9dfe7f89974ee6c486cccb0f18a61d Mon Sep 17 00:00:00 2001 From: davidwang Date: Mon, 22 Jan 2018 18:14:17 +0800 Subject: x86/centaur: Mark TSC invariant Centaur CPU has a constant frequency TSC and that TSC does not stop in C-States. But because the corresponding TSC feature flags are not set for that CPU, the TSC is treated as not constant frequency and assumed to stop in C-States, which makes it an unreliable and unusable clock source. Setting those flags tells the kernel that the TSC is usable, so it will select it over HPET. The effect of this is that reading time stamps (from kernel or user space) will be faster and more efficent. Signed-off-by: davidwang Signed-off-by: Thomas Gleixner Cc: qiyuanwang@zhaoxin.com Cc: linux-pm@vger.kernel.org Cc: brucechang@via-alliance.com Cc: cooperyan@zhaoxin.com Cc: benjaminpan@viatech.com Link: https://lkml.kernel.org/r/1516616057-5158-1-git-send-email-davidwang@zhaoxin.com --- arch/x86/kernel/cpu/centaur.c | 4 ++++ drivers/acpi/processor_idle.c | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 68bc6d9b3132..c578cd29c2d2 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -106,6 +106,10 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d50a7b6ccddd..5f0071c7e2e1 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -207,6 +207,7 @@ static void tsc_check_state(int state) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: case X86_VENDOR_INTEL: + case X86_VENDOR_CENTAUR: /* * AMD Fam10h TSC will tick in all * C/P/S0/S1 states when this bit is set. -- cgit v1.2.3 From 310eb252a78307fc2ac4c4c755290a578c0304d0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Jan 2018 02:09:13 +0000 Subject: mmc: mmci: fix error return code in mmci_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: f9bb304ce855 ("mmc: mmci: Add support for setting pad type via pinctrl") Signed-off-by: Wei Yongjun Reviewed-by: Patrice Chotard Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 6246eaada750..70b0df8b9c78 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1646,6 +1646,7 @@ static int mmci_probe(struct amba_device *dev, host->pinctrl = devm_pinctrl_get(&dev->dev); if (IS_ERR(host->pinctrl)) { dev_err(&dev->dev, "failed to get pinctrl"); + ret = PTR_ERR(host->pinctrl); goto host_free; } @@ -1653,6 +1654,7 @@ static int mmci_probe(struct amba_device *dev, PINCTRL_STATE_DEFAULT); if (IS_ERR(host->pins_default)) { dev_err(mmc_dev(mmc), "Can't select default pins\n"); + ret = PTR_ERR(host->pins_default); goto host_free; } @@ -1660,6 +1662,7 @@ static int mmci_probe(struct amba_device *dev, MMCI_PINCTRL_STATE_OPENDRAIN); if (IS_ERR(host->pins_opendrain)) { dev_err(mmc_dev(mmc), "Can't select opendrain pins\n"); + ret = PTR_ERR(host->pins_opendrain); goto host_free; } } -- cgit v1.2.3 From 1de1ea7efeb9e8543212210e34518b4049ccd285 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 22 Dec 2017 10:54:20 +0100 Subject: KVM: s390: add proper locking for CMMA migration bitmap Some parts of the cmma migration bitmap is already protected with the kvm->lock (e.g. the migration start). On the other hand the read of the cmma bits is not protected against a concurrent free, neither is the emulation of the ESSA instruction. Let's extend the locking to all related ioctls by using the slots lock for - kvm_s390_vm_start_migration - kvm_s390_vm_stop_migration - kvm_s390_set_cmma_bits - kvm_s390_get_cmma_bits In addition to that, we use synchronize_srcu before freeing the migration structure as all users hold kvm->srcu for read. (e.g. the ESSA handler). Reported-by: David Hildenbrand Signed-off-by: Christian Borntraeger Cc: stable@vger.kernel.org # 4.13+ Fixes: 190df4a212a7 (KVM: s390: CMMA tracking, ESSA emulation, migration mode) Reviewed-by: Claudio Imbrenda Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index abcd24fdde3f..52880e980a33 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -766,7 +766,7 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req) /* * Must be called with kvm->srcu held to avoid races on memslots, and with - * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration. + * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration. */ static int kvm_s390_vm_start_migration(struct kvm *kvm) { @@ -822,7 +822,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) } /* - * Must be called with kvm->lock to avoid races with ourselves and + * Must be called with kvm->slots_lock to avoid races with ourselves and * kvm_s390_vm_start_migration. */ static int kvm_s390_vm_stop_migration(struct kvm *kvm) @@ -837,6 +837,8 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); + /* We have to wait for the essa emulation to finish */ + synchronize_srcu(&kvm->srcu); vfree(mgs->pgste_bitmap); } kfree(mgs); @@ -846,14 +848,12 @@ static int kvm_s390_vm_stop_migration(struct kvm *kvm) static int kvm_s390_vm_set_migration(struct kvm *kvm, struct kvm_device_attr *attr) { - int idx, res = -ENXIO; + int res = -ENXIO; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->slots_lock); switch (attr->attr) { case KVM_S390_VM_MIGRATION_START: - idx = srcu_read_lock(&kvm->srcu); res = kvm_s390_vm_start_migration(kvm); - srcu_read_unlock(&kvm->srcu, idx); break; case KVM_S390_VM_MIGRATION_STOP: res = kvm_s390_vm_stop_migration(kvm); @@ -861,7 +861,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm, default: break; } - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->slots_lock); return res; } @@ -1751,7 +1751,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break; + mutex_lock(&kvm->slots_lock); r = kvm_s390_get_cmma_bits(kvm, &args); + mutex_unlock(&kvm->slots_lock); if (!r) { r = copy_to_user(argp, &args, sizeof(args)); if (r) @@ -1765,7 +1767,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break; + mutex_lock(&kvm->slots_lock); r = kvm_s390_set_cmma_bits(kvm, &args); + mutex_unlock(&kvm->slots_lock); break; } default: -- cgit v1.2.3 From 162f8debc01f48ac984ed6d7291743053ec90271 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 24 Jan 2018 15:10:47 +0100 Subject: spi: a3700: Remove endianness swapping functions when accessing FIFOs Fixes the following sparse warnings : line 504: warning: incorrect type in assignment (different base types) line 504: expected unsigned int [unsigned] [usertype] val line 504: got restricted __le32 [usertype] line 527: warning: cast to restricted __le32 This is solved by removing endian-converson functions, since the converted values are going through readl/writel anyway, which take care of the conversion. Fixes: 6fd6fd68c9e2 ("spi: armada-3700: Fix padding when sending not 4-byte aligned data") Signed-off-by: Maxime Chevallier Reviewed-by: Gregory CLEMENT Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index fdc35dabcda2..f32b83c7209f 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -493,7 +493,7 @@ static int a3700_spi_fifo_write(struct a3700_spi *a3700_spi) u32 val; while (!a3700_is_wfifo_full(a3700_spi) && a3700_spi->buf_len) { - val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); + val = *(u32 *)a3700_spi->tx_buf; spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); a3700_spi->buf_len -= 4; a3700_spi->tx_buf += 4; @@ -516,9 +516,8 @@ static int a3700_spi_fifo_read(struct a3700_spi *a3700_spi) while (!a3700_is_rfifo_empty(a3700_spi) && a3700_spi->buf_len) { val = spireg_read(a3700_spi, A3700_SPI_DATA_IN_REG); if (a3700_spi->buf_len >= 4) { - u32 data = le32_to_cpu(val); - memcpy(a3700_spi->rx_buf, &data, 4); + memcpy(a3700_spi->rx_buf, &val, 4); a3700_spi->buf_len -= 4; a3700_spi->rx_buf += 4; -- cgit v1.2.3 From 34b1fcaeb21de2a64004a95a1dc52d7e9998b733 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 24 Jan 2018 15:10:48 +0100 Subject: spi: a3700: Remove endianness swapping for full-duplex transfers Fixes the following sparse warnings : line 767: warning: incorrect type in assignment (different base types) line 767: expected unsigned int [unsigned] [assigned] [usertype] val_out line 767: got restricted __le32 [usertype] line 776: warning: cast to restricted __le32 This takes advantage of readl/writel to do the endianness reordering, and removes an extra variable in the function. Fixes: f68a7dcb91b7 ("spi: a3700: Add full-duplex support") Signed-off-by: Maxime Chevallier Reviewed-by: Gregory CLEMENT Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index f32b83c7209f..1f42bd04e630 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -739,7 +739,7 @@ static int a3700_spi_transfer_one_full_duplex(struct spi_master *master, struct spi_transfer *xfer) { struct a3700_spi *a3700_spi = spi_master_get_devdata(master); - u32 val_in, val_out; + u32 val; /* Disable FIFO mode */ a3700_spi_fifo_mode_set(a3700_spi, false); @@ -753,21 +753,20 @@ static int a3700_spi_transfer_one_full_duplex(struct spi_master *master, a3700_spi_bytelen_set(a3700_spi, 1); if (a3700_spi->byte_len == 1) - val_out = *a3700_spi->tx_buf; + val = *a3700_spi->tx_buf; else - val_out = cpu_to_le32(*(u32 *)a3700_spi->tx_buf); + val = *(u32 *)a3700_spi->tx_buf; - spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val_out); + spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val); /* Wait for all the data to be shifted in / out */ while (!(spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG) & A3700_SPI_XFER_DONE)) cpu_relax(); - val_in = le32_to_cpu(spireg_read(a3700_spi, - A3700_SPI_DATA_IN_REG)); + val = spireg_read(a3700_spi, A3700_SPI_DATA_IN_REG); - memcpy(a3700_spi->rx_buf, &val_in, a3700_spi->byte_len); + memcpy(a3700_spi->rx_buf, &val, a3700_spi->byte_len); a3700_spi->buf_len -= a3700_spi->byte_len; a3700_spi->tx_buf += a3700_spi->byte_len; -- cgit v1.2.3 From 1ecdaea02ca6bfacf2ecda500dc1af51e9780c42 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Wed, 24 Jan 2018 10:02:09 +0100 Subject: mlxsw: spectrum_router: Don't log an error on missing neighbor Driver periodically samples all neighbors configured in device in order to update the kernel regarding their state. When finding an entry configured in HW that doesn't show in neigh_lookup() driver logs an error message. This introduces a race when removing multiple neighbors - it's possible that a given entry would still be configured in HW as its removal is still being processed but is already removed from the kernel's neighbor tables. Simply remove the error message and gracefully accept such events. Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") Fixes: 60f040ca11b9 ("mlxsw: spectrum_router: Periodically dump active IPv6 neighbours") Signed-off-by: Yuval Mintz Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6c0391c13fe0..7042c855a5d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1942,11 +1942,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, dipn = htonl(dip); dev = mlxsw_sp->router->rifs[rif]->dev; n = neigh_lookup(&arp_tbl, &dipn, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); neigh_event_send(n, NULL); @@ -1973,11 +1970,8 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, dev = mlxsw_sp->router->rifs[rif]->dev; n = neigh_lookup(&nd_tbl, &dip, dev); - if (!n) { - netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", - &dip); + if (!n) return; - } netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); neigh_event_send(n, NULL); -- cgit v1.2.3 From 6b136a24b05c81a24e0b648a4bd938bcd0c4f69e Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Wed, 24 Jan 2018 01:20:00 +0800 Subject: blk-mq-debugfs: don't allow write on attributes with seq_operations set Attributes that only implement .seq_ops are read-only, any write to them should be rejected. But currently kernel would crash when writing to such debugfs entries, e.g. chmod +w /sys/kernel/debug/block//requeue_list echo 0 > /sys/kernel/debug/block//requeue_list chmod -w /sys/kernel/debug/block//requeue_list Fix it by returning -EPERM in blk_mq_debugfs_write() when writing to such attributes. Cc: Ming Lei Signed-off-by: Eryu Guan Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index fa31ceaa8de6..21cbc1f071c6 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -697,7 +697,11 @@ static ssize_t blk_mq_debugfs_write(struct file *file, const char __user *buf, const struct blk_mq_debugfs_attr *attr = m->private; void *data = d_inode(file->f_path.dentry->d_parent)->i_private; - if (!attr->write) + /* + * Attributes that only implement .seq_ops are read-only and 'attr' is + * the same with 'data' in this case. + */ + if (attr == data || !attr->write) return -EPERM; return attr->write(data, buf, count, ppos); -- cgit v1.2.3 From 3124b65dad946c20feaaf08959ee38ec27361da9 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 24 Jan 2018 09:50:06 -0700 Subject: bsg: use pr_debug instead of hand crafted macros Use pr_debug instead of hand crafted macros. This way it is not needed to re-compile the kernel to enable bsg debug outputs and it's possible to selectively enable specific prints. Cc: Joe Perches Reviewed-by: Bart Van Assche Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/bsg.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 452f94f1c5d4..a1bcbb6ba50b 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -32,6 +32,9 @@ #define BSG_DESCRIPTION "Block layer SCSI generic (bsg) driver" #define BSG_VERSION "0.4" +#define bsg_dbg(bd, fmt, ...) \ + pr_debug("%s: " fmt, (bd)->name, ##__VA_ARGS__) + struct bsg_device { struct request_queue *queue; spinlock_t lock; @@ -55,14 +58,6 @@ enum { #define BSG_DEFAULT_CMDS 64 #define BSG_MAX_DEVS 32768 -#undef BSG_DEBUG - -#ifdef BSG_DEBUG -#define dprintk(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ##args) -#else -#define dprintk(fmt, args...) -#endif - static DEFINE_MUTEX(bsg_mutex); static DEFINE_IDR(bsg_minor_idr); @@ -123,7 +118,7 @@ static struct bsg_command *bsg_alloc_command(struct bsg_device *bd) bc->bd = bd; INIT_LIST_HEAD(&bc->list); - dprintk("%s: returning free cmd %p\n", bd->name, bc); + bsg_dbg(bd, "returning free cmd %p\n", bc); return bc; out: spin_unlock_irq(&bd->lock); @@ -222,7 +217,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode) if (!bcd->class_dev) return ERR_PTR(-ENXIO); - dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp, + bsg_dbg(bd, "map hdr %llx/%u %llx/%u\n", + (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, hdr->din_xfer_len); @@ -299,8 +295,8 @@ static void bsg_rq_end_io(struct request *rq, blk_status_t status) struct bsg_device *bd = bc->bd; unsigned long flags; - dprintk("%s: finished rq %p bc %p, bio %p\n", - bd->name, rq, bc, bc->bio); + bsg_dbg(bd, "finished rq %p bc %p, bio %p\n", + rq, bc, bc->bio); bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); @@ -333,7 +329,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, list_add_tail(&bc->list, &bd->busy_list); spin_unlock_irq(&bd->lock); - dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); + bsg_dbg(bd, "queueing rq %p, bc %p\n", rq, bc); rq->end_io_data = bc; blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io); @@ -379,7 +375,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd) } } while (1); - dprintk("%s: returning done %p\n", bd->name, bc); + bsg_dbg(bd, "returning done %p\n", bc); return bc; } @@ -390,7 +386,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, struct scsi_request *req = scsi_req(rq); int ret = 0; - dprintk("rq %p bio %p 0x%x\n", rq, bio, req->result); + pr_debug("rq %p bio %p 0x%x\n", rq, bio, req->result); /* * fill in all the output members */ @@ -469,7 +465,7 @@ static int bsg_complete_all_commands(struct bsg_device *bd) struct bsg_command *bc; int ret, tret; - dprintk("%s: entered\n", bd->name); + bsg_dbg(bd, "entered\n"); /* * wait for all commands to complete @@ -572,7 +568,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ssize_t bytes_read; - dprintk("%s: read %zd bytes\n", bd->name, count); + bsg_dbg(bd, "read %zd bytes\n", count); bsg_set_block(bd, file); @@ -646,7 +642,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) ssize_t bytes_written; int ret; - dprintk("%s: write %zd bytes\n", bd->name, count); + bsg_dbg(bd, "write %zd bytes\n", count); if (unlikely(uaccess_kernel())) return -EINVAL; @@ -664,7 +660,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) if (!bytes_written || err_block_err(ret)) bytes_written = ret; - dprintk("%s: returning %zd\n", bd->name, bytes_written); + bsg_dbg(bd, "returning %zd\n", bytes_written); return bytes_written; } @@ -717,7 +713,7 @@ static int bsg_put_device(struct bsg_device *bd) hlist_del(&bd->dev_list); mutex_unlock(&bsg_mutex); - dprintk("%s: tearing down\n", bd->name); + bsg_dbg(bd, "tearing down\n"); /* * close can always block @@ -744,9 +740,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, struct file *file) { struct bsg_device *bd; -#ifdef BSG_DEBUG unsigned char buf[32]; -#endif if (!blk_queue_scsi_passthrough(rq)) { WARN_ONCE(true, "Attempt to register a non-SCSI queue\n"); @@ -771,7 +765,7 @@ static struct bsg_device *bsg_add_device(struct inode *inode, hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); - dprintk("bound to <%s>, max queue %d\n", + bsg_dbg(bd, "bound to <%s>, max queue %d\n", format_dev_t(buf, inode->i_rdev), bd->max_queue); mutex_unlock(&bsg_mutex); -- cgit v1.2.3 From 5132ede0fe8092b043dae09a7cc32b8ae7272baa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Jan 2018 15:28:17 +0100 Subject: Revert "module: Add retpoline tag to VERMAGIC" This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. Turns out distros do not want to make retpoline as part of their "ABI", so this patch should not have been merged. Sorry Andi, this was my fault, I suggested it when your original patch was the "correct" way of doing this instead. Reported-by: Jiri Kosina Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/vermagic.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 853291714ae0..bae807eb2933 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -31,17 +31,11 @@ #else #define MODULE_RANDSTRUCT_PLUGIN #endif -#ifdef RETPOLINE -#define MODULE_VERMAGIC_RETPOLINE "retpoline " -#else -#define MODULE_VERMAGIC_RETPOLINE "" -#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ MODULE_ARCH_VERMAGIC \ - MODULE_RANDSTRUCT_PLUGIN \ - MODULE_VERMAGIC_RETPOLINE + MODULE_RANDSTRUCT_PLUGIN -- cgit v1.2.3 From 9d4853322ffcc76a2eb62d720ec3903d427b39bc Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 29 Nov 2017 22:02:46 +0800 Subject: regulator: qcom_spmi: Use regmap helpers for enable/disable/is_enabled callback Setup .enable_reg/.enable_mask/.enable_val fields, then we can use the regmap helpers for enable/disable/is_enabled callback implementation. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- drivers/regulator/qcom_spmi-regulator.c | 84 ++++++++++++--------------------- 1 file changed, 31 insertions(+), 53 deletions(-) diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c index 0241ada47d04..63c7a0c17777 100644 --- a/drivers/regulator/qcom_spmi-regulator.c +++ b/drivers/regulator/qcom_spmi-regulator.c @@ -486,24 +486,6 @@ static int spmi_vreg_update_bits(struct spmi_regulator *vreg, u16 addr, u8 val, return regmap_update_bits(vreg->regmap, vreg->base + addr, mask, val); } -static int spmi_regulator_common_is_enabled(struct regulator_dev *rdev) -{ - struct spmi_regulator *vreg = rdev_get_drvdata(rdev); - u8 reg; - - spmi_vreg_read(vreg, SPMI_COMMON_REG_ENABLE, ®, 1); - - return (reg & SPMI_COMMON_ENABLE_MASK) == SPMI_COMMON_ENABLE; -} - -static int spmi_regulator_common_enable(struct regulator_dev *rdev) -{ - struct spmi_regulator *vreg = rdev_get_drvdata(rdev); - - return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_ENABLE, - SPMI_COMMON_ENABLE, SPMI_COMMON_ENABLE_MASK); -} - static int spmi_regulator_vs_enable(struct regulator_dev *rdev) { struct spmi_regulator *vreg = rdev_get_drvdata(rdev); @@ -513,7 +495,7 @@ static int spmi_regulator_vs_enable(struct regulator_dev *rdev) vreg->vs_enable_time = ktime_get(); } - return spmi_regulator_common_enable(rdev); + return regulator_enable_regmap(rdev); } static int spmi_regulator_vs_ocp(struct regulator_dev *rdev) @@ -524,14 +506,6 @@ static int spmi_regulator_vs_ocp(struct regulator_dev *rdev) return spmi_vreg_write(vreg, SPMI_VS_REG_OCP, ®, 1); } -static int spmi_regulator_common_disable(struct regulator_dev *rdev) -{ - struct spmi_regulator *vreg = rdev_get_drvdata(rdev); - - return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_ENABLE, - SPMI_COMMON_DISABLE, SPMI_COMMON_ENABLE_MASK); -} - static int spmi_regulator_select_voltage(struct spmi_regulator *vreg, int min_uV, int max_uV) { @@ -1062,9 +1036,9 @@ static irqreturn_t spmi_regulator_vs_ocp_isr(int irq, void *data) } static struct regulator_ops spmi_smps_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_common_set_voltage, .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel, .get_voltage_sel = spmi_regulator_common_get_voltage, @@ -1077,9 +1051,9 @@ static struct regulator_ops spmi_smps_ops = { }; static struct regulator_ops spmi_ldo_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_common_set_voltage, .get_voltage_sel = spmi_regulator_common_get_voltage, .map_voltage = spmi_regulator_common_map_voltage, @@ -1094,9 +1068,9 @@ static struct regulator_ops spmi_ldo_ops = { }; static struct regulator_ops spmi_ln_ldo_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_common_set_voltage, .get_voltage_sel = spmi_regulator_common_get_voltage, .map_voltage = spmi_regulator_common_map_voltage, @@ -1107,8 +1081,8 @@ static struct regulator_ops spmi_ln_ldo_ops = { static struct regulator_ops spmi_vs_ops = { .enable = spmi_regulator_vs_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_pull_down = spmi_regulator_common_set_pull_down, .set_soft_start = spmi_regulator_common_set_soft_start, .set_over_current_protection = spmi_regulator_vs_ocp, @@ -1117,9 +1091,9 @@ static struct regulator_ops spmi_vs_ops = { }; static struct regulator_ops spmi_boost_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_single_range_set_voltage, .get_voltage_sel = spmi_regulator_single_range_get_voltage, .map_voltage = spmi_regulator_single_map_voltage, @@ -1128,9 +1102,9 @@ static struct regulator_ops spmi_boost_ops = { }; static struct regulator_ops spmi_ftsmps_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_common_set_voltage, .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel, .get_voltage_sel = spmi_regulator_common_get_voltage, @@ -1143,9 +1117,9 @@ static struct regulator_ops spmi_ftsmps_ops = { }; static struct regulator_ops spmi_ult_lo_smps_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_ult_lo_smps_set_voltage, .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel, .get_voltage_sel = spmi_regulator_ult_lo_smps_get_voltage, @@ -1157,9 +1131,9 @@ static struct regulator_ops spmi_ult_lo_smps_ops = { }; static struct regulator_ops spmi_ult_ho_smps_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_single_range_set_voltage, .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel, .get_voltage_sel = spmi_regulator_single_range_get_voltage, @@ -1172,9 +1146,9 @@ static struct regulator_ops spmi_ult_ho_smps_ops = { }; static struct regulator_ops spmi_ult_ldo_ops = { - .enable = spmi_regulator_common_enable, - .disable = spmi_regulator_common_disable, - .is_enabled = spmi_regulator_common_is_enabled, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, .set_voltage_sel = spmi_regulator_single_range_set_voltage, .get_voltage_sel = spmi_regulator_single_range_get_voltage, .map_voltage = spmi_regulator_single_map_voltage, @@ -1711,6 +1685,9 @@ static int qcom_spmi_regulator_probe(struct platform_device *pdev) vreg->desc.id = -1; vreg->desc.owner = THIS_MODULE; vreg->desc.type = REGULATOR_VOLTAGE; + vreg->desc.enable_reg = reg->base + SPMI_COMMON_REG_ENABLE; + vreg->desc.enable_mask = SPMI_COMMON_ENABLE_MASK; + vreg->desc.enable_val = SPMI_COMMON_ENABLE; vreg->desc.name = name = reg->name; vreg->desc.supply_name = reg->supply; vreg->desc.of_match = reg->name; @@ -1723,6 +1700,7 @@ static int qcom_spmi_regulator_probe(struct platform_device *pdev) config.dev = dev; config.driver_data = vreg; + config.regmap = regmap; rdev = devm_regulator_register(dev, &vreg->desc, &config); if (IS_ERR(rdev)) { dev_err(dev, "failed to register %s\n", name); -- cgit v1.2.3 From ce30f264b33d9e3d27e34638976c52b578648b92 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 4 Jan 2018 14:31:25 +0100 Subject: MAINTAINERS: clarify that only verified bugs should be submitted to security@ We're seeing a raise of automated reports from testing tools and reports about address leaks that are not really exploitable as-is, many of which do not represent an immediate risk justifying to work in closed places. Signed-off-by: Willy Tarreau Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- MAINTAINERS | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e3581413420c..fec88c5ccedf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -62,7 +62,15 @@ trivial patch so apply some common sense. 7. When sending security related changes or reports to a maintainer please Cc: security@kernel.org, especially if the maintainer - does not respond. + does not respond. Please keep in mind that the security team is + a small set of people who can be efficient only when working on + verified bugs. Please only Cc: this list when you have identified + that the bug would present a short-term risk to other users if it + were publicly disclosed. For example, reports of address leaks do + not represent an immediate threat and are better handled publicly, + and ideally, should come with a patch proposal. Please do not send + automated reports to this list either. Such bugs will be handled + better and faster in the usual public places. 8. Happy hacking. -- cgit v1.2.3 From e4fd493c0541d36953f7b9d3bfced67a1321792f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Jan 2018 15:17:05 -0500 Subject: Btrfs: fix stale entries in readdir In fixing the readdir+pagefault deadlock I accidentally introduced a stale entry regression in readdir. If we get close to full for the temporary buffer, and then skip a few delayed deletions, and then try to add another entry that won't fit, we will emit the entries we found and retry. Unfortunately we delete entries from our del_list as we find them, assuming we won't need them. However our pos will be with whatever our last entry was, which could be before the delayed deletions we skipped, so the next search will add the deleted entries back into our readdir buffer. So instead don't delete entries we find in our del_list so we can make sure we always find our delayed deletions. This is a slight perf hit for readdir with lots of pending deletions, but hopefully this isn't a common occurrence. If it is we can revist this and optimize it. cc: stable@vger.kernel.org Fixes: 23b5ec74943f ("btrfs: fix readdir deadlock with pagefault") Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 056276101c63..a6226cd6063c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1633,28 +1633,18 @@ void btrfs_readdir_put_delayed_items(struct inode *inode, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index) { - struct btrfs_delayed_item *curr, *next; - int ret; - - if (list_empty(del_list)) - return 0; + struct btrfs_delayed_item *curr; + int ret = 0; - list_for_each_entry_safe(curr, next, del_list, readdir_list) { + list_for_each_entry(curr, del_list, readdir_list) { if (curr->key.offset > index) break; - - list_del(&curr->readdir_list); - ret = (curr->key.offset == index); - - if (refcount_dec_and_test(&curr->refs)) - kfree(curr); - - if (ret) - return 1; - else - continue; + if (curr->key.offset == index) { + ret = 1; + break; + } } - return 0; + return ret; } /* -- cgit v1.2.3 From 560a66075d694e6ec24c60967b4d93d97cbb33d1 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Thu, 18 Jan 2018 11:32:35 +0100 Subject: net: sched: em_nbyte: don't add the data offset twice 'ptr' is shifted by the offset and then validated, the memcmp should not add it a second time. Signed-off-by: Wolfgang Bumiller Signed-off-by: David S. Miller --- net/sched/em_nbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index df3110d69585..07c10bac06a0 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) return 0; - return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); + return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); } static struct tcf_ematch_ops em_nbyte_ops = { -- cgit v1.2.3 From d3303a65a00c94372ddab831570647488e6c06e2 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Thu, 18 Jan 2018 11:32:36 +0100 Subject: net: sched: fix TCF_LAYER_LINK case in tcf_get_base_ptr TCF_LAYER_LINK and TCF_LAYER_NETWORK returned the same pointer as skb->data points to the network header. Use skb_mac_header instead. Signed-off-by: Wolfgang Bumiller Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 8e08b6da72f3..753ac9361154 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -522,7 +522,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) { switch (layer) { case TCF_LAYER_LINK: - return skb->data; + return skb_mac_header(skb); case TCF_LAYER_NETWORK: return skb_network_header(skb); case TCF_LAYER_TRANSPORT: -- cgit v1.2.3 From 3eab2ad9162e7467c988b91f50395eac51a1e650 Mon Sep 17 00:00:00 2001 From: James Morris Date: Thu, 25 Jan 2018 07:53:57 +1100 Subject: MAINTAINERS: update email address for James Morris Update my email address. Signed-off-by: James Morris --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fec88c5ccedf..810d5d990f4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12241,7 +12241,7 @@ M: Security Officers S: Supported SECURITY SUBSYSTEM -M: James Morris +M: James Morris M: "Serge E. Hallyn" L: linux-security-module@vger.kernel.org (suggested Cc:) T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git -- cgit v1.2.3 From 581e7226a5d43f629eb6399a121f85f6a15f81be Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:40 -0800 Subject: kcm: Only allow TCP sockets to be attached to a KCM mux TCP sockets for IPv4 and IPv6 that are not listeners or in closed stated are allowed to be attached to a KCM mux. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+8865eaff7f9acd593945@syzkaller.appspotmail.com Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d4e98f20fc2a..7632797fb68e 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; - /* We must prevent loops or risk deadlock ! */ - if (csk->sk_family == PF_KCM) + /* Only allow TCP sockets to be attached for now */ + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || + csk->sk_protocol != IPPROTO_TCP) + return -EOPNOTSUPP; + + /* Don't allow listeners or closed sockets */ + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) return -EOPNOTSUPP; psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); -- cgit v1.2.3 From e5571240236c5652f3e079b1d5866716a7ad819c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 24 Jan 2018 12:35:41 -0800 Subject: kcm: Check if sk_user_data already set in kcm_attach This is needed to prevent sk_user_data being overwritten. The check is done under the callback lock. This should prevent a socket from being attached twice to a KCM mux. It also prevents a socket from being attached for other use cases of sk_user_data as long as the other cases set sk_user_data under the lock. Followup work is needed to unify all the use cases of sk_user_data to use the same locking. Reported-by: syzbot+114b15f2be420a8886c3@syzkaller.appspotmail.com Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Signed-off-by: Tom Herbert Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 7632797fb68e..4a8d407f8902 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1410,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, return err; } - sock_hold(csk); - write_lock_bh(&csk->sk_callback_lock); + + /* Check if sk_user_data is aready by KCM or someone else. + * Must be done under lock to prevent race conditions. + */ + if (csk->sk_user_data) { + write_unlock_bh(&csk->sk_callback_lock); + strp_done(&psock->strp); + kmem_cache_free(kcm_psockp, psock); + return -EALREADY; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; @@ -1420,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, csk->sk_data_ready = psock_data_ready; csk->sk_write_space = psock_write_space; csk->sk_state_change = psock_state_change; + write_unlock_bh(&csk->sk_callback_lock); + sock_hold(csk); + /* Finished initialization, now add the psock to the MUX. */ spin_lock_bh(&mux->lock); head = &mux->psocks; -- cgit v1.2.3 From 4de49474b18936d62797d1dd451c6c4db1a7b119 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 23 Jan 2018 11:33:46 +0200 Subject: qed: Remove reserveration of dpi for kernel Double reservation for kernel dedicated dpi was performed. Once in the core module and once in qedr. Remove the reservation from core. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index c8c4b3940564..9d6e2d43d4de 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -615,9 +615,6 @@ static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) { struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; - /* The first DPI is reserved for the Kernel */ - __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); - /* Tid 0 will be used as the key for "reserved MR". * The driver should allocate memory for it so it can be loaded but no * ramrod should be passed on it. -- cgit v1.2.3 From 1fe280a056dff50774bd59c3e61187cf8c0ccf10 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 23 Jan 2018 11:33:47 +0200 Subject: qed: Free reserved MR tid A tid was allocated for reserved MR during initialization but not freed. This lead to an annoying output message during rdma unload flow. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 9d6e2d43d4de..b7abb8205d3a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -358,10 +358,27 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) kfree(p_rdma_info); } +static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static void qed_rdma_free_reserved_lkey(struct qed_hwfn *p_hwfn) +{ + qed_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey); +} + static void qed_rdma_free(struct qed_hwfn *p_hwfn) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); + qed_rdma_free_reserved_lkey(p_hwfn); qed_rdma_resc_free(p_hwfn); } @@ -794,17 +811,6 @@ static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } -static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); - - spin_lock_bh(&p_hwfn->p_rdma_info->lock); - qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); - spin_unlock_bh(&p_hwfn->p_rdma_info->lock); -} - static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) { struct qed_hwfn *p_hwfn; -- cgit v1.2.3 From aebb48f5e465772576359c1705c4a84abea92027 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 23 Jan 2018 14:33:14 +0000 Subject: sparc64: fix typo in CONFIG_CRYPTO_DES_SPARC64 => CONFIG_CRYPTO_CAMELLIA_SPARC64 This patch fixes the typo CONFIG_CRYPTO_DES_SPARC64 => CONFIG_CRYPTO_CAMELLIA_SPARC64 Fixes: 81658ad0d923 ("sparc64: Add CAMELLIA driver making use of the new camellia opcodes.") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- arch/sparc/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index 818d3aa5172e..d257186c27d1 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o -obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o +obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o -- cgit v1.2.3 From b7051cb8dadd69f85da5989017af2bb35b418950 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 23 Jan 2018 00:08:40 -0800 Subject: i40e: flower: check if TC offload is enabled on a netdev Since TC block changes drivers are required to check if the TC hw offload flag is set on the interface themselves. Fixes: 2f4b411a3d67 ("i40e: Enable cloud filters via tc-flower") Fixes: 44ae12a768b7 ("net: sched: move the can_offload check from binding phase to rule insertion phase") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Jiri Pirko Acked-by: Amritha Nambiar Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 42dcaefc4c19..af792112a2d3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7505,6 +7505,8 @@ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, { struct i40e_vsi *vsi = np->vsi; + if (!tc_can_offload(vsi->netdev)) + return -EOPNOTSUPP; if (cls_flower->common.chain_index) return -EOPNOTSUPP; -- cgit v1.2.3 From e9cb4239134c860e5f92c75bf5321bd377bb505b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Jan 2018 17:27:25 +0800 Subject: vhost: use mutex_lock_nested() in vhost_dev_lock_vqs() We used to call mutex_lock() in vhost_dev_lock_vqs() which tries to hold mutexes of all virtqueues. This may confuse lockdep to report a possible deadlock because of trying to hold locks belong to same class. Switch to use mutex_lock_nested() to avoid false positive. Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API") Reported-by: syzbot+dbb7c1161485e61b0241@syzkaller.appspotmail.com Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 33ac2b186b85..549771a0cd8b 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -904,7 +904,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d) { int i = 0; for (i = 0; i < d->nvqs; ++i) - mutex_lock(&d->vqs[i]->mutex); + mutex_lock_nested(&d->vqs[i]->mutex, i); } static void vhost_dev_unlock_vqs(struct vhost_dev *d) -- cgit v1.2.3 From 6f3180afbb22106d96a1320e175562f36a4d3506 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 23 Jan 2018 17:27:26 +0800 Subject: vhost: do not try to access device IOTLB when not initialized The code will try to access dev->iotlb when processing VHOST_IOTLB_INVALIDATE even if it was not initialized which may lead to NULL pointer dereference. Fixes this by check dev->iotlb before. Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API") Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/vhost.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 549771a0cd8b..5727b186b3ca 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1015,6 +1015,10 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev, vhost_iotlb_notify_vq(dev, msg); break; case VHOST_IOTLB_INVALIDATE: + if (!dev->iotlb) { + ret = -EFAULT; + break; + } vhost_vq_meta_reset(dev); vhost_del_umem_range(dev->iotlb, msg->iova, msg->iova + msg->size - 1); -- cgit v1.2.3 From 060403f34008af90e310d7e0e7531ebb3acf9557 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Wed, 24 Jan 2018 13:46:04 -0800 Subject: Revert "Input: synaptics_rmi4 - use devm_device_add_group() for attributes in F01" Since the sysfs attribute hangs off the RMI bus, which doesn't go away during firmware flash, it needs to be explicitly removed, otherwise we would try and register the same attribute twice. This reverts commit 36a44af5c176d619552d99697433261141dd1296. Signed-off-by: Nick Dyer Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f01.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c index ae966e333a2f..8a07ae147df6 100644 --- a/drivers/input/rmi4/rmi_f01.c +++ b/drivers/input/rmi4/rmi_f01.c @@ -570,14 +570,19 @@ static int rmi_f01_probe(struct rmi_function *fn) dev_set_drvdata(&fn->dev, f01); - error = devm_device_add_group(&fn->rmi_dev->dev, &rmi_f01_attr_group); + error = sysfs_create_group(&fn->rmi_dev->dev.kobj, &rmi_f01_attr_group); if (error) - dev_warn(&fn->dev, - "Failed to create attribute group: %d\n", error); + dev_warn(&fn->dev, "Failed to create sysfs group: %d\n", error); return 0; } +static void rmi_f01_remove(struct rmi_function *fn) +{ + /* Note that the bus device is used, not the F01 device */ + sysfs_remove_group(&fn->rmi_dev->dev.kobj, &rmi_f01_attr_group); +} + static int rmi_f01_config(struct rmi_function *fn) { struct f01_data *f01 = dev_get_drvdata(&fn->dev); @@ -717,6 +722,7 @@ struct rmi_function_handler rmi_f01_handler = { }, .func = 0x01, .probe = rmi_f01_probe, + .remove = rmi_f01_remove, .config = rmi_f01_config, .attention = rmi_f01_attention, .suspend = rmi_f01_suspend, -- cgit v1.2.3 From 45d6e545505fd32edb812f085be7de45b6a5c0af Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 24 Jan 2018 15:53:24 +0300 Subject: net/ibm/emac: add 8192 rx/tx fifo size emac4syn chips has availability to use 8192 rx/tx fifo buffer sizes, in current state if we set it up in dts 8192 as example, we will get only 2048 which may impact on network speed. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 6 ++++++ drivers/net/ethernet/ibm/emac/emac.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 7feff2450ed6..241db3199b88 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -494,6 +494,9 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s case 16384: ret |= EMAC_MR1_RFS_16K; break; + case 8192: + ret |= EMAC4_MR1_RFS_8K; + break; case 4096: ret |= EMAC_MR1_RFS_4K; break; @@ -516,6 +519,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ case 16384: ret |= EMAC4_MR1_TFS_16K; break; + case 8192: + ret |= EMAC4_MR1_TFS_8K; + break; case 4096: ret |= EMAC4_MR1_TFS_4K; break; diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index 5afcc27ceebb..d0a0e3b3f283 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -151,9 +151,11 @@ struct emac_regs { #define EMAC4_MR1_RFS_2K 0x00100000 #define EMAC4_MR1_RFS_4K 0x00180000 +#define EMAC4_MR1_RFS_8K 0x00200000 #define EMAC4_MR1_RFS_16K 0x00280000 #define EMAC4_MR1_TFS_2K 0x00020000 #define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_8K 0x00040000 #define EMAC4_MR1_TFS_16K 0x00050000 #define EMAC4_MR1_TR 0x00008000 #define EMAC4_MR1_MWSW_001 0x00001000 -- cgit v1.2.3 From 624ca9c33c8a853a4a589836e310d776620f4ab9 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 24 Jan 2018 15:53:25 +0300 Subject: net/ibm/emac: wrong bit is used for STA control register write STA control register has areas of mode and opcodes for opeations. 18 bit is using for mode selection, where 0 is old MIO/MDIO access method and 1 is indirect access mode. 19-20 bits are using for setting up read/write operation(STA opcodes). In current state 'read' is set into old MIO/MDIO mode with 19 bit and write operation is set into 18 bit which is mode selection, not a write operation. To correlate write with read we set it into 20 bit. All those bit operations are MSB 0 based. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/emac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h index d0a0e3b3f283..c26d2631ca30 100644 --- a/drivers/net/ethernet/ibm/emac/emac.h +++ b/drivers/net/ethernet/ibm/emac/emac.h @@ -244,7 +244,7 @@ struct emac_regs { #define EMAC_STACR_PHYE 0x00004000 #define EMAC_STACR_STAC_MASK 0x00003000 #define EMAC_STACR_STAC_READ 0x00001000 -#define EMAC_STACR_STAC_WRITE 0x00002000 +#define EMAC_STACR_STAC_WRITE 0x00000800 #define EMAC_STACR_OPBC_MASK 0x00000C00 #define EMAC_STACR_OPBC_50 0x00000000 #define EMAC_STACR_OPBC_66 0x00000400 -- cgit v1.2.3 From 83b7739180de16a0bbf5bfeb0a3b48733e56ccc9 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:15 +1100 Subject: cifs: Add smb2_send_recv This function is similar to SendReceive2 except it does not expect a 4 byte rfc1002 length header in the first io vector. Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 4 ++++ fs/cifs/transport.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 4143c9dec463..93d565186698 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -106,6 +106,10 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */, const int flags, struct kvec * /* resp vec */); +extern int smb2_send_recv(const unsigned int xid, struct cifs_ses *pses, + struct kvec *pkvec, int nvec_to_send, + int *pbuftype, const int flags, + struct kvec *presp); extern int SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *ptcon, struct smb_hdr *in_buf , diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 7efbab013957..e678307bb7a0 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -827,6 +827,44 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, return rc; } +/* Like SendReceive2 but iov[0] does not contain an rfc1002 header */ +int +smb2_send_recv(const unsigned int xid, struct cifs_ses *ses, + struct kvec *iov, int n_vec, int *resp_buf_type /* ret */, + const int flags, struct kvec *resp_iov) +{ + struct smb_rqst rqst; + struct kvec *new_iov; + int rc; + int i; + __u32 count; + __be32 rfc1002_marker; + + new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL); + if (!new_iov) + return -ENOMEM; + + /* 1st iov is an RFC1002 Session Message length */ + memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); + + count = 0; + for (i = 1; i < n_vec + 1; i++) + count += new_iov[i].iov_len; + + rfc1002_marker = cpu_to_be32(count); + + new_iov[0].iov_base = &rfc1002_marker; + new_iov[0].iov_len = 4; + + memset(&rqst, 0, sizeof(struct smb_rqst)); + rqst.rq_iov = new_iov; + rqst.rq_nvec = n_vec + 1; + + rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov); + kfree(new_iov); + return rc; +} + int SendReceive(const unsigned int xid, struct cifs_ses *ses, struct smb_hdr *in_buf, struct smb_hdr *out_buf, -- cgit v1.2.3 From 13cacea7bb20fbbd3cb400953b3142cde139abaa Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:30 +1100 Subject: cifs: remove rfc1002 header from smb2_negotiate_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 38 ++++++++++++++++++++------------------ fs/cifs/smb2pdu.h | 2 +- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 01346b8b6edb..b1c2cc8ef55f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -398,8 +398,8 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, } #ifdef CONFIG_CIFS_SMB311 -/* offset is sizeof smb2_negotiate_req - 4 but rounded up to 8 bytes */ -#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) - 4 */ +/* offset is sizeof smb2_negotiate_req but rounded up to 8 bytes */ +#define OFFSET_OF_NEG_CONTEXT 0x68 /* sizeof(struct smb2_negotiate_req) */ #define SMB2_PREAUTH_INTEGRITY_CAPABILITIES cpu_to_le16(1) @@ -427,23 +427,25 @@ build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt) } static void -assemble_neg_contexts(struct smb2_negotiate_req *req) +assemble_neg_contexts(struct smb2_negotiate_req *req, + unsigned int *total_len) { - - /* +4 is to account for the RFC1001 len field */ - char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT + 4; + char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT; build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt); /* Add 2 to size to round to 8 byte boundary */ + pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context); build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextCount = cpu_to_le16(2); - inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) - + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ + + *total_len += 4 + sizeof(struct smb2_preauth_neg_context) + + sizeof(struct smb2_encryption_neg_context); } #else -static void assemble_neg_contexts(struct smb2_negotiate_req *req) +static void assemble_neg_contexts(struct smb2_negotiate_req *req, + unsigned int *total_len) { return; } @@ -477,6 +479,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) int blob_offset, blob_length; char *security_blob; int flags = CIFS_NEG_OP; + unsigned int total_len; cifs_dbg(FYI, "Negotiate protocol\n"); @@ -485,30 +488,30 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) return -EIO; } - rc = small_smb2_init(SMB2_NEGOTIATE, NULL, (void **) &req); + rc = smb2_plain_req_init(SMB2_NEGOTIATE, NULL, (void **) &req, &total_len); if (rc) return rc; - req->hdr.sync_hdr.SessionId = 0; + req->sync_hdr.SessionId = 0; if (strcmp(ses->server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); req->DialectCount = cpu_to_le16(2); - inc_rfc1001_len(req, 4); + total_len += 4; } else if (strcmp(ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); req->DialectCount = cpu_to_le16(3); - inc_rfc1001_len(req, 6); + total_len += 6; } else { /* otherwise send specific dialect */ req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); req->DialectCount = cpu_to_le16(1); - inc_rfc1001_len(req, 2); + total_len += 2; } /* only one of SMB2 signing flags may be set in SMB2 request */ @@ -528,13 +531,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) memcpy(req->ClientGUID, server->client_guid, SMB2_CLIENT_GUID_SIZE); if (ses->server->vals->protocol_id == SMB311_PROT_ID) - assemble_neg_contexts(req); + assemble_neg_contexts(req, &total_len); } iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov[0].iov_len = get_rfc1002_length(req) + 4; + iov[0].iov_len = total_len; - rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_negotiate_rsp *)rsp_iov.iov_base; /* diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index c2ec934be968..0fe2382597ad 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -195,7 +195,7 @@ struct smb2_symlink_err_rsp { #define SMB2_CLIENT_GUID_SIZE 16 struct smb2_negotiate_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 36 */ __le16 DialectCount; __le16 SecurityMode; -- cgit v1.2.3 From 45305eda6bfb5b90624f8cf525eb88d037eafe02 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:17 +1100 Subject: cifs: remove rfc1002 header from smb2_logoff_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 17 +++++++++++++---- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b1c2cc8ef55f..4295cb535c85 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1204,6 +1204,10 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) int rc = 0; struct TCP_Server_Info *server; int flags = 0; + unsigned int total_len; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; cifs_dbg(FYI, "disconnect session %p\n", ses); @@ -1216,19 +1220,24 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses) if (ses->need_reconnect) goto smb2_session_already_dead; - rc = small_smb2_init(SMB2_LOGOFF, NULL, (void **) &req); + rc = smb2_plain_req_init(SMB2_LOGOFF, NULL, (void **) &req, &total_len); if (rc) return rc; /* since no tcon, smb2_init can not do this, so do here */ - req->hdr.sync_hdr.SessionId = ses->Suid; + req->sync_hdr.SessionId = ses->Suid; if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) flags |= CIFS_TRANSFORM_REQ; else if (server->sign) - req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + + flags |= CIFS_NO_RESP; + + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; - rc = SendReceiveNoRsp(xid, ses, (char *) req, flags); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); /* * No tcon so can't do diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0fe2382597ad..0799e0957499 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -308,7 +308,7 @@ struct smb2_sess_setup_rsp { } __packed; struct smb2_logoff_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 4 */ __le16 Reserved; } __packed; -- cgit v1.2.3 From 4eecf4cfe168cd0cf18eab6580b3eb27a2d26a1f Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:18 +1100 Subject: cifs: remove rfc1002 header from smb2_tree_disconnect_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 14 ++++++++++++-- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4295cb535c85..0c58bff30de0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1400,6 +1400,10 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) int rc = 0; struct cifs_ses *ses = tcon->ses; int flags = 0; + unsigned int total_len; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; cifs_dbg(FYI, "Tree Disconnect\n"); @@ -1409,14 +1413,20 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon) if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) return 0; - rc = small_smb2_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req, + &total_len); if (rc) return rc; if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - rc = SendReceiveNoRsp(xid, ses, (char *)req, flags); + flags |= CIFS_NO_RESP; + + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; + + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) cifs_stats_fail_inc(tcon, SMB2_TREE_DISCONNECT_HE); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0799e0957499..8b7aadefd4a5 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -375,7 +375,7 @@ struct smb2_tree_connect_rsp { #define SMB2_SHARE_CAP_ASYMMETRIC cpu_to_le32(0x00000080) /* 3.02 */ struct smb2_tree_disconnect_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 4 */ __le16 Reserved; } __packed; -- cgit v1.2.3 From afcccefdc3c53c79abada8d556b3c14604cd30fc Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:19 +1100 Subject: cifs: remove rfc1002 header from smb2_close_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 8 ++++---- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0c58bff30de0..5e3fa87da541 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2094,13 +2094,14 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype; int rc = 0; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "Close\n"); if (!ses || !(ses->server)) return -EIO; - rc = small_smb2_init(SMB2_CLOSE, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_CLOSE, tcon, (void **) &req, &total_len); if (rc) return rc; @@ -2111,10 +2112,9 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, req->VolatileFileId = volatile_fid; iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov[0].iov_len = get_rfc1002_length(req) + 4; + iov[0].iov_len = total_len; - rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_close_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 8b7aadefd4a5..5404207d9ee7 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -789,7 +789,7 @@ struct smb2_ioctl_rsp { /* Currently defined values for close flags */ #define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) struct smb2_close_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 24 */ __le16 Flags; __le32 Reserved; -- cgit v1.2.3 From 9775468020dad9c4e39d78b3d2d361136abecce0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:20 +1100 Subject: cifs: remove rfc1002 header from smb2_ioctl_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 22 +++++++++++----------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5e3fa87da541..904766a79548 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1912,6 +1912,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int n_iov; int rc = 0; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "SMB2 IOCTL\n"); @@ -1930,7 +1931,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (!ses || !(ses->server)) return -EIO; - rc = small_smb2_init(SMB2_IOCTL, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_IOCTL, tcon, (void **) &req, &total_len); if (rc) return rc; @@ -1941,8 +1942,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, } cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n", - req->hdr.sync_hdr.TreeId, ses->ipc_tid); - req->hdr.sync_hdr.TreeId = ses->ipc_tid; + req->sync_hdr.TreeId, ses->ipc_tid); + req->sync_hdr.TreeId = ses->ipc_tid; } if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -1955,7 +1956,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, req->InputCount = cpu_to_le32(indatalen); /* do not set InputOffset if no input data */ req->InputOffset = - cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer) - 4); + cpu_to_le32(offsetof(struct smb2_ioctl_req, Buffer)); iov[1].iov_base = in_data; iov[1].iov_len = indatalen; n_iov = 2; @@ -1990,21 +1991,20 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, * but if input data passed to ioctl, we do not * want to double count this, so we do not send * the dummy one byte of data in iovec[0] if sending - * input data (in iovec[1]). We also must add 4 bytes - * in first iovec to allow for rfc1002 length field. + * input data (in iovec[1]). */ if (indatalen) { - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - inc_rfc1001_len(req, indatalen - 1); + iov[0].iov_len = total_len - 1; } else - iov[0].iov_len = get_rfc1002_length(req) + 4; + iov[0].iov_len = total_len; /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) - req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; - rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags, + &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 5404207d9ee7..2c743d338a11 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -753,7 +753,7 @@ struct duplicate_extents_to_file { } __packed; struct smb2_ioctl_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 57 */ __u16 Reserved; __le32 CtlCode; -- cgit v1.2.3 From 7f7ae759fb6ec6fd4cd3d23ac712d45171f87615 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:21 +1100 Subject: cifs: remove rfc1002 header from smb2_echo_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 14 ++++++++------ fs/cifs/smb2pdu.h | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 904766a79548..d00d7da93bd2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2397,6 +2397,8 @@ SMB2_echo(struct TCP_Server_Info *server) struct kvec iov[2]; struct smb_rqst rqst = { .rq_iov = iov, .rq_nvec = 2 }; + unsigned int total_len; + __be32 rfc1002_marker; cifs_dbg(FYI, "In echo request\n"); @@ -2406,17 +2408,17 @@ SMB2_echo(struct TCP_Server_Info *server) return rc; } - rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req); + rc = smb2_plain_req_init(SMB2_ECHO, NULL, (void **)&req, &total_len); if (rc) return rc; - req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); + req->sync_hdr.CreditRequest = cpu_to_le16(1); - /* 4 for rfc1002 length field */ iov[0].iov_len = 4; - iov[0].iov_base = (char *)req; - iov[1].iov_len = get_rfc1002_length(req); - iov[1].iov_base = (char *)req + 4; + rfc1002_marker = cpu_to_be32(total_len); + iov[0].iov_base = &rfc1002_marker; + iov[1].iov_len = total_len; + iov[1].iov_base = (char *)req; rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, NULL, server, CIFS_ECHO_OP); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 2c743d338a11..b22bf8c6753e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -924,7 +924,7 @@ struct smb2_lock_rsp { } __packed; struct smb2_echo_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 4 */ __u16 Reserved; } __packed; -- cgit v1.2.3 From 661bb943a98de5b3cf7c7ffc2d96141df4ac842e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 9 Nov 2017 12:14:23 +1100 Subject: cifs: remove rfc1002 header from smb2_tree_connect_req Signed-off-by: Ronnie Sahlberg Reviewed-by: Aurelien Aptel Signed-off-by: Steve French Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 18 +++++++++--------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d00d7da93bd2..db7410462dc3 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1276,6 +1276,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, int unc_path_len; __le16 *unc_path = NULL; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "TCON\n"); @@ -1297,7 +1298,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if (tcon) tcon->tid = 0; - rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req, + &total_len); if (rc) { kfree(unc_path); return rc; @@ -1308,26 +1310,24 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, flags |= CIFS_TRANSFORM_REQ; /* since no tcon, smb2_init can not do this, so do here */ - req->hdr.sync_hdr.SessionId = ses->Suid; + req->sync_hdr.SessionId = ses->Suid; if (ses->server->sign) - req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED; + req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; } else if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field and 1 for pad */ - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + /* 1 for pad */ + iov[0].iov_len = total_len - 1; /* Testing shows that buffer offset must be at location of Buffer[0] */ req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req) - - 1 /* pad */ - 4 /* do not count rfc1001 len field */); + - 1 /* pad */); req->PathLength = cpu_to_le16(unc_path_len - 2); iov[1].iov_base = unc_path; iov[1].iov_len = unc_path_len; - inc_rfc1001_len(req, unc_path_len - 1 /* pad */); - - rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index b22bf8c6753e..f708f978dcf1 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -323,7 +323,7 @@ struct smb2_logoff_rsp { #define SMB2_SHAREFLAG_CLUSTER_RECONNECT 0x0001 struct smb2_tree_connect_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 9 */ __le16 Reserved; /* Flags in SMB3.1.1 */ __le16 PathOffset; -- cgit v1.2.3 From 88ea5cb7d4cc816b1e629cfc3477ceeb99fd248c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:36 +1100 Subject: cifs: remove rfc1002 header from smb2_sess_setup_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 27 +++++++++++++-------------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index db7410462dc3..544daec74de0 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -808,20 +808,22 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; struct smb2_sess_setup_req *req; struct TCP_Server_Info *server = ses->server; + unsigned int total_len; - rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req); + rc = smb2_plain_req_init(SMB2_SESSION_SETUP, NULL, (void **) &req, + &total_len); if (rc) return rc; /* First session, not a reauthenticate */ - req->hdr.sync_hdr.SessionId = 0; + req->sync_hdr.SessionId = 0; /* if reconnect, we need to send previous sess id, otherwise it is 0 */ req->PreviousSessionId = sess_data->previous_session; req->Flags = 0; /* MBZ */ /* to enable echos and oplocks */ - req->hdr.sync_hdr.CreditRequest = cpu_to_le16(3); + req->sync_hdr.CreditRequest = cpu_to_le16(3); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -835,8 +837,8 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) req->Channel = 0; /* MBZ */ sess_data->iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field and 1 for pad */ - sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + /* 1 for pad */ + sess_data->iov[0].iov_len = total_len - 1; /* * This variable will be used to clear the buffer * allocated above in case of any error in the calling function. @@ -862,18 +864,15 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data) /* Testing shows that buffer offset must be at location of Buffer[0] */ req->SecurityBufferOffset = - cpu_to_le16(sizeof(struct smb2_sess_setup_req) - - 1 /* pad */ - 4 /* rfc1001 len */); + cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */); req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len); - inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */); - /* BB add code to build os and lm fields */ - rc = SendReceive2(sess_data->xid, sess_data->ses, - sess_data->iov, 2, - &sess_data->buf0_type, - CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov); + rc = smb2_send_recv(sess_data->xid, sess_data->ses, + sess_data->iov, 2, + &sess_data->buf0_type, + CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov); cifs_small_buf_release(sess_data->iov[0].iov_base); memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec)); @@ -1094,7 +1093,7 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data) goto out; req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base; - req->hdr.sync_hdr.SessionId = ses->Suid; + req->sync_hdr.SessionId = ses->Suid; rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses, sess_data->nls_cp); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f708f978dcf1..4f80b95d02ae 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -282,7 +282,7 @@ struct smb2_negotiate_rsp { #define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA 0x04 struct smb2_sess_setup_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 25 */ __u8 Flags; __u8 SecurityMode; -- cgit v1.2.3 From 4f33bc35875ae6df5058f5f646fd3f0d3f7c8b04 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:38 +1100 Subject: cifs: remove rfc1002 header from smb2_create_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 30 ++++++++++++------------------ fs/cifs/smb2pdu.h | 2 +- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 544daec74de0..d0616398503a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1525,11 +1525,10 @@ add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; if (!req->CreateContextsOffset) req->CreateContextsOffset = cpu_to_le32( - sizeof(struct smb2_create_req) - 4 + + sizeof(struct smb2_create_req) + iov[num - 1].iov_len); le32_add_cpu(&req->CreateContextsLength, server->vals->create_lease_size); - inc_rfc1001_len(&req->hdr, server->vals->create_lease_size); *num_iovec = num + 1; return 0; } @@ -1609,10 +1608,9 @@ add_durable_v2_context(struct kvec *iov, unsigned int *num_iovec, iov[num].iov_len = sizeof(struct create_durable_v2); if (!req->CreateContextsOffset) req->CreateContextsOffset = - cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + cpu_to_le32(sizeof(struct smb2_create_req) + iov[1].iov_len); le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_v2)); - inc_rfc1001_len(&req->hdr, sizeof(struct create_durable_v2)); *num_iovec = num + 1; return 0; } @@ -1633,12 +1631,10 @@ add_durable_reconnect_v2_context(struct kvec *iov, unsigned int *num_iovec, iov[num].iov_len = sizeof(struct create_durable_handle_reconnect_v2); if (!req->CreateContextsOffset) req->CreateContextsOffset = - cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + cpu_to_le32(sizeof(struct smb2_create_req) + iov[1].iov_len); le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable_handle_reconnect_v2)); - inc_rfc1001_len(&req->hdr, - sizeof(struct create_durable_handle_reconnect_v2)); *num_iovec = num + 1; return 0; } @@ -1669,10 +1665,9 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec, iov[num].iov_len = sizeof(struct create_durable); if (!req->CreateContextsOffset) req->CreateContextsOffset = - cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + cpu_to_le32(sizeof(struct smb2_create_req) + iov[1].iov_len); le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable)); - inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); *num_iovec = num + 1; return 0; } @@ -1743,6 +1738,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u32 file_attributes = 0; char *dhc_buf = NULL, *lc_buf = NULL; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "create/open\n"); @@ -1751,7 +1747,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, else return -EIO; - rc = small_smb2_init(SMB2_CREATE, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_CREATE, tcon, (void **) &req, &total_len); + if (rc) return rc; @@ -1772,12 +1769,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov[0].iov_len = get_rfc1002_length(req) + 4; /* -1 since last byte is buf[0] which is sent below (path) */ - iov[0].iov_len--; + iov[0].iov_len = total_len - 1; - req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4); + req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)); /* [MS-SMB2] 2.2.13 NameOffset: * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of @@ -1790,7 +1785,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (tcon->share_flags & SHI1005_FLAGS_DFS) { int name_len; - req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; + req->sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS; rc = alloc_path_with_tree_prefix(©_path, ©_size, &name_len, tcon->treeName, path); @@ -1817,8 +1812,6 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, iov[1].iov_len = uni_path_len; iov[1].iov_base = path; - /* -1 since last byte is buf[0] which was counted in smb2_buf_len */ - inc_rfc1001_len(req, uni_path_len - 1); if (!server->oplocks) *oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -1856,7 +1849,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, dhc_buf = iov[n_iov-1].iov_base; } - rc = SendReceive2(xid, ses, iov, n_iov, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags, + &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_create_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4f80b95d02ae..6e1874a81014 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -496,7 +496,7 @@ struct smb2_tree_disconnect_rsp { #define SVHDX_OPEN_DEVICE_CONTEXT 0x83CE6F1AD851E0986E34401CC9BCFCE9 struct smb2_create_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 57 */ __u8 SecurityFlags; __u8 RequestedOplockLevel; -- cgit v1.2.3 From 1f444e4c06e76e6f463a1c1a19f05a14b100e07b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:39 +1100 Subject: cifs: remove rfc1002 header from smb2_flush_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 8 ++++---- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d0616398503a..057d9e5e769a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2433,13 +2433,14 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, int resp_buftype; int rc = 0; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "Flush\n"); if (!ses || !(ses->server)) return -EIO; - rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_FLUSH, tcon, (void **) &req, &total_len); if (rc) return rc; @@ -2450,10 +2451,9 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, req->VolatileFileId = volatile_fid; iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov[0].iov_len = get_rfc1002_length(req) + 4; + iov[0].iov_len = total_len; - rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); if (rc != 0) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6e1874a81014..a8102e5f4ebb 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -812,7 +812,7 @@ struct smb2_close_rsp { } __packed; struct smb2_flush_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 24 */ __le16 Reserved1; __le32 Reserved2; -- cgit v1.2.3 From ced93679cb1634846c93854b9993e11ce0315428 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 10:07:27 +1100 Subject: cifs: remove rfc1002 header from smb2_lock_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 13 ++++++------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 057d9e5e769a..1b0f3c9d1426 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3521,34 +3521,33 @@ smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, int resp_buf_type; unsigned int count; int flags = CIFS_NO_RESP; + unsigned int total_len; cifs_dbg(FYI, "smb2_lockv num lock %d\n", num_lock); - rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_LOCK, tcon, (void **) &req, &total_len); if (rc) return rc; if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid); + req->sync_hdr.ProcessId = cpu_to_le32(pid); req->LockCount = cpu_to_le16(num_lock); req->PersistentFileId = persist_fid; req->VolatileFileId = volatile_fid; count = num_lock * sizeof(struct smb2_lock_element); - inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element)); iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field and count for all locks */ - iov[0].iov_len = get_rfc1002_length(req) + 4 - count; + iov[0].iov_len = total_len - sizeof(struct smb2_lock_element); iov[1].iov_base = (char *)buf; iov[1].iov_len = count; cifs_stats_inc(&tcon->stats.cifs_stats.num_locks); - rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, flags, - &rsp_iov); + rc = smb2_send_recv(xid, tcon->ses, iov, 2, &resp_buf_type, flags, + &rsp_iov); cifs_small_buf_release(req); if (rc) { cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index a8102e5f4ebb..0c33fc8cce71 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -907,7 +907,7 @@ struct smb2_lock_element { } __packed; struct smb2_lock_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 48 */ __le16 LockCount; __le32 Reserved; -- cgit v1.2.3 From f5688a6d7c5c52ce3e9f787694ae4e3c75aa6d5a Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:41 +1100 Subject: cifs: remove rfc1002 header from smb2 read/write requests Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 51 ++++++++++++++++++++++----------------------------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1b0f3c9d1426..9cce02d20576 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2666,10 +2666,9 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, struct smb2_read_plain_req *req = NULL; struct smb2_read_rsp *rsp = NULL; struct smb2_sync_hdr *shdr; - struct kvec iov[2]; + struct kvec iov[1]; struct kvec rsp_iov; unsigned int total_len; - __be32 req_len; struct smb_rqst rqst = { .rq_iov = iov, .rq_nvec = 2 }; int flags = CIFS_LOG_ERROR; @@ -2683,14 +2682,10 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, if (encryption_required(io_parms->tcon)) flags |= CIFS_TRANSFORM_REQ; - req_len = cpu_to_be32(total_len); - - iov[0].iov_base = &req_len; - iov[0].iov_len = sizeof(__be32); - iov[1].iov_base = req; - iov[1].iov_len = total_len; + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; - rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; @@ -2792,8 +2787,10 @@ smb2_async_writev(struct cifs_writedata *wdata, struct TCP_Server_Info *server = tcon->ses->server; struct kvec iov[2]; struct smb_rqst rqst = { }; + unsigned int total_len; + __be32 rfc1002_marker; - rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_WRITE, tcon, (void **) &req, &total_len); if (rc) { if (rc == -EAGAIN && wdata->credits) { /* credits was reset by reconnect */ @@ -2809,7 +2806,7 @@ smb2_async_writev(struct cifs_writedata *wdata, if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - shdr = get_sync_hdr(req); + shdr = (struct smb2_sync_hdr *)req; shdr->ProcessId = cpu_to_le32(wdata->cfile->pid); req->PersistentFileId = wdata->cfile->fid.persistent_fid; @@ -2818,16 +2815,16 @@ smb2_async_writev(struct cifs_writedata *wdata, req->WriteChannelInfoLength = 0; req->Channel = 0; req->Offset = cpu_to_le64(wdata->offset); - /* 4 for rfc1002 length field */ req->DataOffset = cpu_to_le16( - offsetof(struct smb2_write_req, Buffer) - 4); + offsetof(struct smb2_write_req, Buffer)); req->RemainingBytes = 0; /* 4 for rfc1002 length field and 1 for Buffer */ iov[0].iov_len = 4; - iov[0].iov_base = req; - iov[1].iov_len = get_rfc1002_length(req) - 1; - iov[1].iov_base = (char *)req + 4; + rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes); + iov[0].iov_base = &rfc1002_marker; + iov[1].iov_len = total_len - 1; + iov[1].iov_base = (char *)req; rqst.rq_iov = iov; rqst.rq_nvec = 2; @@ -2841,8 +2838,6 @@ smb2_async_writev(struct cifs_writedata *wdata, req->Length = cpu_to_le32(wdata->bytes); - inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); - if (wdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); @@ -2885,13 +2880,15 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, int resp_buftype; struct kvec rsp_iov; int flags = 0; + unsigned int total_len; *nbytes = 0; if (n_vec < 1) return rc; - rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_WRITE, io_parms->tcon, (void **) &req, + &total_len); if (rc) return rc; @@ -2901,7 +2898,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, if (encryption_required(io_parms->tcon)) flags |= CIFS_TRANSFORM_REQ; - req->hdr.sync_hdr.ProcessId = cpu_to_le32(io_parms->pid); + req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid); req->PersistentFileId = io_parms->persistent_fid; req->VolatileFileId = io_parms->volatile_fid; @@ -2910,20 +2907,16 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms, req->Channel = 0; req->Length = cpu_to_le32(io_parms->length); req->Offset = cpu_to_le64(io_parms->offset); - /* 4 for rfc1002 length field */ req->DataOffset = cpu_to_le16( - offsetof(struct smb2_write_req, Buffer) - 4); + offsetof(struct smb2_write_req, Buffer)); req->RemainingBytes = 0; iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field and 1 for Buffer */ - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - - /* length of entire message including data to be written */ - inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */); + /* 1 for Buffer */ + iov[0].iov_len = total_len - 1; - rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1, - &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, io_parms->tcon->ses, iov, n_vec + 1, + &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_write_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0c33fc8cce71..3c856f058be7 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -868,7 +868,7 @@ struct smb2_read_rsp { #define SMB2_WRITEFLAG_WRITE_UNBUFFERED 0x00000002 /* SMB3.02 or later */ struct smb2_write_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 49 */ __le16 DataOffset; /* offset from start of SMB2 header to write data */ __le32 Length; -- cgit v1.2.3 From 2fc803efe6141675ce59e4c30f78320a3ff30294 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:44 +1100 Subject: cifs: remove rfc1002 header from smb2_set_info_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 19 +++++++++---------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9cce02d20576..d9a0fabe61d1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3119,6 +3119,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, unsigned int i; struct cifs_ses *ses = tcon->ses; int flags = 0; + unsigned int total_len; if (!ses || !(ses->server)) return -EIO; @@ -3130,7 +3131,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, if (!iov) return -ENOMEM; - rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len); if (rc) { kfree(iov); return rc; @@ -3139,7 +3140,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->hdr.sync_hdr.ProcessId = cpu_to_le32(pid); + req->sync_hdr.ProcessId = cpu_to_le32(pid); req->InfoType = info_type; req->FileInfoClass = info_class; @@ -3147,27 +3148,25 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, req->VolatileFileId = volatile_fid; req->AdditionalInformation = cpu_to_le32(additional_info); - /* 4 for RFC1001 length and 1 for Buffer */ req->BufferOffset = - cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4); + cpu_to_le16(sizeof(struct smb2_set_info_req) - 1); req->BufferLength = cpu_to_le32(*size); - inc_rfc1001_len(req, *size - 1 /* Buffer */); - memcpy(req->Buffer, *data, *size); + total_len += *size; iov[0].iov_base = (char *)req; - /* 4 for RFC1001 length */ - iov[0].iov_len = get_rfc1002_length(req) + 4; + /* 1 for Buffer */ + iov[0].iov_len = total_len - 1; for (i = 1; i < num; i++) { - inc_rfc1001_len(req, size[i]); le32_add_cpu(&req->BufferLength, size[i]); iov[i].iov_base = (char *)data[i]; iov[i].iov_len = size[i]; } - rc = SendReceive2(xid, ses, iov, num, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, num, &resp_buftype, flags, + &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 3c856f058be7..831745a0a496 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1013,7 +1013,7 @@ struct smb2_query_info_rsp { } __packed; struct smb2_set_info_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 33 */ __u8 InfoType; __u8 FileInfoClass; -- cgit v1.2.3 From 7c00c3a625f818fe81573bec1fc27f19122a198d Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:45 +1100 Subject: cifs: remove rfc1002 header from smb2_query_directory_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 14 +++++++------- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d9a0fabe61d1..eb8f79d83336 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2993,13 +2993,15 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, unsigned int output_size = CIFSMaxBufSize; size_t info_buf_size; int flags = 0; + unsigned int total_len; if (ses && (ses->server)) server = ses->server; else return -EIO; - rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req, + &total_len); if (rc) return rc; @@ -3031,7 +3033,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, memcpy(bufptr, &asteriks, len); req->FileNameOffset = - cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4); + cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1); req->FileNameLength = cpu_to_le16(len); /* * BB could be 30 bytes or so longer if we used SMB2 specific @@ -3042,15 +3044,13 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, req->OutputBufferLength = cpu_to_le32(output_size); iov[0].iov_base = (char *)req; - /* 4 for RFC1001 length and 1 for Buffer */ - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; + /* 1 for Buffer */ + iov[0].iov_len = total_len - 1; iov[1].iov_base = (char *)(req->Buffer); iov[1].iov_len = len; - inc_rfc1001_len(req, len - 1 /* Buffer */); - - rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_query_directory_rsp *)rsp_iov.iov_base; diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 831745a0a496..0b215bb4c58b 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -942,7 +942,7 @@ struct smb2_echo_rsp { #define SMB2_REOPEN 0x10 struct smb2_query_directory_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 33 */ __u8 FileInformationClass; __u8 Flags; -- cgit v1.2.3 From b2fb7fecc9357c307b2ffe7695ea529cd3b15474 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:46 +1100 Subject: cifs: remove rfc1002 header from smb2_query_info_req Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 26 ++++++++++++++------------ fs/cifs/smb2pdu.h | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index eb8f79d83336..9421393a5c88 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2194,13 +2194,15 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype; struct cifs_ses *ses = tcon->ses; int flags = 0; + unsigned int total_len; cifs_dbg(FYI, "Query Info\n"); if (!ses || !(ses->server)) return -EIO; - rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req, + &total_len); if (rc) return rc; @@ -2217,15 +2219,14 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, * We do not use the input buffer (do not send extra byte) */ req->InputBufferOffset = 0; - inc_rfc1001_len(req, -1); req->OutputBufferLength = cpu_to_le32(output_len); iov[0].iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov[0].iov_len = get_rfc1002_length(req) + 4; + /* 1 for Buffer */ + iov[0].iov_len = total_len - 1; - rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(req); rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base; @@ -3363,13 +3364,15 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, { int rc; struct smb2_query_info_req *req; + unsigned int total_len; cifs_dbg(FYI, "Query FSInfo level %d\n", level); if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) return -EIO; - rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_QUERY_INFO, tcon, (void **) &req, + &total_len); if (rc) return rc; @@ -3377,15 +3380,14 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level, req->FileInfoClass = level; req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; - /* 4 for rfc1002 length field and 1 for pad */ + /* 1 for pad */ req->InputBufferOffset = - cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); + cpu_to_le16(sizeof(struct smb2_query_info_req) - 1); req->OutputBufferLength = cpu_to_le32( outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4); iov->iov_base = (char *)req; - /* 4 for rfc1002 length field */ - iov->iov_len = get_rfc1002_length(req) + 4; + iov->iov_len = total_len; return 0; } @@ -3411,7 +3413,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(iov.iov_base); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); @@ -3467,7 +3469,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); + rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov); cifs_small_buf_release(iov.iov_base); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 0b215bb4c58b..9157d95d0c4e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -989,7 +989,7 @@ struct smb2_query_directory_rsp { #define SL_INDEX_SPECIFIED 0x00000004 struct smb2_query_info_req { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 41 */ __u8 InfoType; __u8 FileInfoClass; -- cgit v1.2.3 From 21ad9487ca3250c90ec36d68a8a3ee9f659450e3 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 20 Nov 2017 11:24:43 +1100 Subject: cifs: remove rfc1002 header from smb2_oplock_break we get from server Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2misc.c | 2 +- fs/cifs/smb2pdu.c | 19 +++++++++++++++---- fs/cifs/smb2pdu.h | 14 +++++++++++++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 7b08a1446a7f..76d03abaa38c 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -578,7 +578,7 @@ smb2_is_valid_lease_break(char *buffer) bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) { - struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer; struct list_head *tmp, *tmp1, *tmp2; struct cifs_ses *ses; struct cifs_tcon *tcon; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9421393a5c88..08e78ce5156f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3319,11 +3319,17 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, __u8 oplock_level) { int rc; - struct smb2_oplock_break *req = NULL; + struct smb2_oplock_break_req *req = NULL; + struct cifs_ses *ses = tcon->ses; int flags = CIFS_OBREAK_OP; + unsigned int total_len; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; cifs_dbg(FYI, "SMB2_oplock_break\n"); - rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req, + &total_len); if (rc) return rc; @@ -3333,9 +3339,14 @@ SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, req->VolatileFid = volatile_fid; req->PersistentFid = persistent_fid; req->OplockLevel = oplock_level; - req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); + req->sync_hdr.CreditRequest = cpu_to_le16(1); - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags); + flags |= CIFS_NO_RESP; + + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; + + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 9157d95d0c4e..da6a8ec885d1 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1031,7 +1031,19 @@ struct smb2_set_info_rsp { __le16 StructureSize; /* Must be 2 */ } __packed; -struct smb2_oplock_break { +/* oplock break without an rfc1002 header */ +struct smb2_oplock_break_req { + struct smb2_sync_hdr sync_hdr; + __le16 StructureSize; /* Must be 24 */ + __u8 OplockLevel; + __u8 Reserved; + __le32 Reserved2; + __u64 PersistentFid; + __u64 VolatileFid; +} __packed; + +/* oplock break with an rfc1002 header */ +struct smb2_oplock_break_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 24 */ __u8 OplockLevel; -- cgit v1.2.3 From 5dfe69a407dccae64a18c49149479b221a648cc5 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 09:57:45 +1100 Subject: cifs: remove unused variable from SMB2_read Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 08e78ce5156f..3e5546c7d0b8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2670,8 +2670,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, struct kvec iov[1]; struct kvec rsp_iov; unsigned int total_len; - struct smb_rqst rqst = { .rq_iov = iov, - .rq_nvec = 2 }; int flags = CIFS_LOG_ERROR; struct cifs_ses *ses = io_parms->tcon->ses; -- cgit v1.2.3 From 8eb7998e791fdba366f3c2b7d95b40daf313509f Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 11:04:37 +1100 Subject: cifs: remove rfc1002 header from smb2_lease_ack Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Acked-by: Pavel Shilovsky --- fs/cifs/smb2pdu.c | 19 +++++++++++++++---- fs/cifs/smb2pdu.h | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3e5546c7d0b8..5cdcf1aafdf1 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3583,24 +3583,35 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, { int rc; struct smb2_lease_ack *req = NULL; + struct cifs_ses *ses = tcon->ses; int flags = CIFS_OBREAK_OP; + unsigned int total_len; + struct kvec iov[1]; + struct kvec rsp_iov; + int resp_buf_type; cifs_dbg(FYI, "SMB2_lease_break\n"); - rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req); + rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req, + &total_len); if (rc) return rc; if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; - req->hdr.sync_hdr.CreditRequest = cpu_to_le16(1); + req->sync_hdr.CreditRequest = cpu_to_le16(1); req->StructureSize = cpu_to_le16(36); - inc_rfc1001_len(req, 12); + total_len += 12; memcpy(req->LeaseKey, lease_key, 16); req->LeaseState = lease_state; - rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, flags); + flags |= CIFS_NO_RESP; + + iov[0].iov_base = (char *)req; + iov[0].iov_len = total_len; + + rc = smb2_send_recv(xid, ses, iov, 1, &resp_buf_type, flags, &rsp_iov); cifs_small_buf_release(req); if (rc) { diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index da6a8ec885d1..19d34881815f 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1069,7 +1069,7 @@ struct smb2_lease_break { } __packed; struct smb2_lease_ack { - struct smb2_hdr hdr; + struct smb2_sync_hdr sync_hdr; __le16 StructureSize; /* Must be 36 */ __le16 Reserved; __le32 Flags; -- cgit v1.2.3 From 305428acf02490dfaf68d2052d03fe2d74cad241 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 11:04:42 +1100 Subject: cifs: remove small_smb2_init Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Acked-by: Pavel Shilovsky Reviewed-by: Aurelien Aptel --- fs/cifs/smb2pdu.c | 53 ++++++----------------------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5cdcf1aafdf1..9f883f4db23d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -319,54 +319,16 @@ fill_small_buf(__le16 smb2_command, struct cifs_tcon *tcon, void *buf, *total_len = parmsize + sizeof(struct smb2_sync_hdr); } -/* init request without RFC1001 length at the beginning */ -static int -smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, - void **request_buf, unsigned int *total_len) -{ - int rc; - struct smb2_sync_hdr *shdr; - - rc = smb2_reconnect(smb2_command, tcon); - if (rc) - return rc; - - /* BB eventually switch this to SMB2 specific small buf size */ - *request_buf = cifs_small_buf_get(); - if (*request_buf == NULL) { - /* BB should we add a retry in here if not a writepage? */ - return -ENOMEM; - } - - shdr = (struct smb2_sync_hdr *)(*request_buf); - - fill_small_buf(smb2_command, tcon, shdr, total_len); - - if (tcon != NULL) { -#ifdef CONFIG_CIFS_STATS2 - uint16_t com_code = le16_to_cpu(smb2_command); - - cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_sent[com_code]); -#endif - cifs_stats_inc(&tcon->num_smbs_sent); - } - - return rc; -} - /* * Allocate and return pointer to an SMB request hdr, and set basic * SMB information in the SMB header. If the return code is zero, this - * function must have filled in request_buf pointer. The returned buffer - * has RFC1001 length at the beginning. + * function must have filled in request_buf pointer. */ static int -small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, - void **request_buf) +smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, + void **request_buf, unsigned int *total_len) { int rc; - unsigned int total_len; - struct smb2_pdu *pdu; rc = smb2_reconnect(smb2_command, tcon); if (rc) @@ -379,12 +341,9 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon, return -ENOMEM; } - pdu = (struct smb2_pdu *)(*request_buf); - - fill_small_buf(smb2_command, tcon, get_sync_hdr(pdu), &total_len); - - /* Note this is only network field converted to big endian */ - pdu->hdr.smb2_buf_length = cpu_to_be32(total_len); + fill_small_buf(smb2_command, tcon, + (struct smb2_sync_hdr *)(*request_buf), + total_len); if (tcon != NULL) { #ifdef CONFIG_CIFS_STATS2 -- cgit v1.2.3 From 3cecf4865cd3ea31272750edf38e73c59ff7540c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 21 Nov 2017 15:08:07 +1100 Subject: cifs: avoid a kmalloc in smb2_send_recv/SendReceive2 for the common case In both functions, use an array of 8 (arbitrary but should be big enough for all current uses) iov and avoid having to kmalloc the array for the common case. If 8 is too small, then fall back to the original behaviour and use kmalloc/kfree. This should not change any behaviour but should save us a tiny amount of cpu cycles. Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/transport.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e678307bb7a0..510f41a435c8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -38,6 +38,9 @@ #include "cifsproto.h" #include "cifs_debug.h" +/* Max number of iovectors we can use off the stack when sending requests. */ +#define CIFS_MAX_IOV_SIZE 8 + void cifs_wake_up_task(struct mid_q_entry *mid) { @@ -803,12 +806,16 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, const int flags, struct kvec *resp_iov) { struct smb_rqst rqst; - struct kvec *new_iov; + struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; int rc; - new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL); - if (!new_iov) - return -ENOMEM; + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { + new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), + GFP_KERNEL); + if (!new_iov) + return -ENOMEM; + } else + new_iov = s_iov; /* 1st iov is a RFC1001 length followed by the rest of the packet */ memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); @@ -823,7 +830,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rqst.rq_nvec = n_vec + 1; rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov); - kfree(new_iov); + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) + kfree(new_iov); return rc; } @@ -834,15 +842,19 @@ smb2_send_recv(const unsigned int xid, struct cifs_ses *ses, const int flags, struct kvec *resp_iov) { struct smb_rqst rqst; - struct kvec *new_iov; + struct kvec s_iov[CIFS_MAX_IOV_SIZE], *new_iov; int rc; int i; __u32 count; __be32 rfc1002_marker; - new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), GFP_KERNEL); - if (!new_iov) - return -ENOMEM; + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) { + new_iov = kmalloc(sizeof(struct kvec) * (n_vec + 1), + GFP_KERNEL); + if (!new_iov) + return -ENOMEM; + } else + new_iov = s_iov; /* 1st iov is an RFC1002 Session Message length */ memcpy(new_iov + 1, iov, (sizeof(struct kvec) * n_vec)); @@ -861,7 +873,8 @@ smb2_send_recv(const unsigned int xid, struct cifs_ses *ses, rqst.rq_nvec = n_vec + 1; rc = cifs_send_recv(xid, ses, &rqst, resp_buf_type, flags, resp_iov); - kfree(new_iov); + if (n_vec + 1 > CIFS_MAX_IOV_SIZE) + kfree(new_iov); return rc; } -- cgit v1.2.3 From 2dabfd5baba641588b82ba499ef81c9a378bbb23 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 7 Nov 2017 01:54:53 -0700 Subject: CIFS: SMBD: Add parameter rdata to smb2_new_read_req This patch is for preparing upper layer for doing SMB read via RDMA write. When we assemble the SMB read packet header, we need to know the I/O layout if this request is to use a RDMA write. rdata has all the information we need for memory registration. Add rdata to smb2_new_read_req. Signed-off-by: Long Li Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Acked-by: Ronnie Sahlberg --- fs/cifs/smb2pdu.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9f883f4db23d..c0dc0491af93 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2429,18 +2429,21 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, */ static int smb2_new_read_req(void **buf, unsigned int *total_len, - struct cifs_io_parms *io_parms, unsigned int remaining_bytes, - int request_type) + struct cifs_io_parms *io_parms, struct cifs_readdata *rdata, + unsigned int remaining_bytes, int request_type) { int rc = -EACCES; struct smb2_read_plain_req *req = NULL; struct smb2_sync_hdr *shdr; + struct TCP_Server_Info *server; rc = smb2_plain_req_init(SMB2_READ, io_parms->tcon, (void **) &req, total_len); if (rc) return rc; - if (io_parms->tcon->ses->server == NULL) + + server = io_parms->tcon->ses->server; + if (server == NULL) return -ECONNABORTED; shdr = &req->sync_hdr; @@ -2568,7 +2571,8 @@ smb2_async_readv(struct cifs_readdata *rdata) server = io_parms.tcon->ses->server; - rc = smb2_new_read_req((void **) &buf, &total_len, &io_parms, 0, 0); + rc = smb2_new_read_req( + (void **) &buf, &total_len, &io_parms, rdata, 0, 0); if (rc) { if (rc == -EAGAIN && rdata->credits) { /* credits was reset by reconnect */ @@ -2633,7 +2637,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms, struct cifs_ses *ses = io_parms->tcon->ses; *nbytes = 0; - rc = smb2_new_read_req((void **)&req, &total_len, io_parms, 0, 0); + rc = smb2_new_read_req((void **)&req, &total_len, io_parms, NULL, 0, 0); if (rc) return rc; -- cgit v1.2.3 From 2b6ed88037cf11fadbf74b4a676aed5e1f6f39c3 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 7 Nov 2017 01:54:54 -0700 Subject: CIFS: SMBD: Introduce kernel config option CONFIG_CIFS_SMB_DIRECT Build SMB Direct code when this option is set. Signed-off-by: Long Li Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Acked-by: Ronnie Sahlberg --- fs/cifs/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index d5b2e12b5d02..500fd69fb58b 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -196,6 +196,14 @@ config CIFS_SMB311 This dialect includes improved security negotiation features. If unsure, say N +config CIFS_SMB_DIRECT + bool "SMB Direct support (Experimental)" + depends on CIFS && INFINIBAND + help + Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. + SMB Direct allows transferring SMB packets over RDMA. If unsure, + say N. + config CIFS_FSCACHE bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y -- cgit v1.2.3 From 8339dd32fbad71834d61b9103e8884ada9bf3e1c Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 7 Nov 2017 01:54:55 -0700 Subject: CIFS: SMBD: Add rdma mount option Add "rdma" to CIFS mount options to connect to SMB Direct. Add checks to validate this is used on SMB 3.X dialects. To connect to SMBDirect, use "mount.cifs -o rdma,vers=3.x". At the time of this patch, 3.x can be 3.0, 3.02 or 3.1.1. Signed-off-by: Long Li Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Acked-by: Ronnie Sahlberg --- fs/cifs/cifs_debug.c | 2 ++ fs/cifs/cifsfs.c | 2 ++ fs/cifs/cifsglob.h | 5 +++++ fs/cifs/connect.c | 15 ++++++++++++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index cbb9534b89b4..42a57ebde630 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -176,6 +176,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) ses->ses_count, ses->serverOS, ses->serverNOS, ses->capabilities, ses->status); } + if (server->rdma) + seq_printf(m, "RDMA\n\t"); seq_printf(m, "TCP status: %d\n\tLocal Users To " "Server: %d SecMode: 0x%x Req On Wire: %d", server->tcpStatus, server->srv_count, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 31b7565b1617..801b63b7900f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -327,6 +327,8 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) default: seq_puts(s, "(unknown)"); } + if (server->rdma) + seq_puts(s, ",rdma"); } static void diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b16583594d1a..573a7bef8817 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -532,6 +532,7 @@ struct smb_vol { bool nopersistent:1; bool resilient:1; /* noresilient not required since not fored for CA */ bool domainauto:1; + bool rdma:1; unsigned int rsize; unsigned int wsize; bool sockopt_tcp_nodelay:1; @@ -648,6 +649,10 @@ struct TCP_Server_Info { bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool large_buf; /* is current buffer large? */ + /* use SMBD connection instead of socket */ + bool rdma; + /* point to the SMBD connection if RDMA is used instead of socket */ + struct smbd_connection *smbd_conn; struct delayed_work echo; /* echo ping workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0bfc2280436d..64be6f9e54a2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -92,7 +92,7 @@ enum { Opt_multiuser, Opt_sloppy, Opt_nosharesock, Opt_persistent, Opt_nopersistent, Opt_resilient, Opt_noresilient, - Opt_domainauto, + Opt_domainauto, Opt_rdma, /* Mount options which take numeric value */ Opt_backupuid, Opt_backupgid, Opt_uid, @@ -183,6 +183,7 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_resilient, "resilienthandles"}, { Opt_noresilient, "noresilienthandles"}, { Opt_domainauto, "domainauto"}, + { Opt_rdma, "rdma"}, { Opt_backupuid, "backupuid=%s" }, { Opt_backupgid, "backupgid=%s" }, @@ -1550,6 +1551,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_domainauto: vol->domainauto = true; break; + case Opt_rdma: + vol->rdma = true; + break; /* Numeric Values */ case Opt_backupuid: @@ -1951,6 +1955,11 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } + if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) { + cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n"); + goto cifs_parse_mount_err; + } + #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { @@ -2162,6 +2171,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) if (server->echo_interval != vol->echo_interval * HZ) return 0; + if (server->rdma != vol->rdma) + return 0; + return 1; } @@ -2260,6 +2272,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->noblocksnd = volume_info->noblocksnd; tcp_ses->noautotune = volume_info->noautotune; tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay; + tcp_ses->rdma = volume_info->rdma; tcp_ses->in_flight = 0; tcp_ses->credits = 1; init_waitqueue_head(&tcp_ses->response_q); -- cgit v1.2.3 From 03bee01d6215d0438e8a36b4ecb1a002625d0713 Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 7 Nov 2017 01:54:56 -0700 Subject: CIFS: SMBD: Add SMB Direct protocol initial values and constants To prepare for protocol implementation, add constants and user-configurable values for the SMB Direct protocol. Signed-off-by: Long Li Signed-off-by: Steve French Acked-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/smbdirect.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 21 +++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 fs/cifs/smbdirect.c create mode 100644 fs/cifs/smbdirect.h diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c new file mode 100644 index 000000000000..d3c16f872fe1 --- /dev/null +++ b/fs/cifs/smbdirect.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2017, Microsoft Corporation. + * + * Author(s): Long Li + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ +#include "smbdirect.h" + +/* SMBD version number */ +#define SMBD_V1 0x0100 + +/* Port numbers for SMBD transport */ +#define SMB_PORT 445 +#define SMBD_PORT 5445 + +/* Address lookup and resolve timeout in ms */ +#define RDMA_RESOLVE_TIMEOUT 5000 + +/* SMBD negotiation timeout in seconds */ +#define SMBD_NEGOTIATE_TIMEOUT 120 + +/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ +#define SMBD_MIN_RECEIVE_SIZE 128 +#define SMBD_MIN_FRAGMENTED_SIZE 131072 + +/* + * Default maximum number of RDMA read/write outstanding on this connection + * This value is possibly decreased during QP creation on hardware limit + */ +#define SMBD_CM_RESPONDER_RESOURCES 32 + +/* Maximum number of retries on data transfer operations */ +#define SMBD_CM_RETRY 6 +/* No need to retry on Receiver Not Ready since SMBD manages credits */ +#define SMBD_CM_RNR_RETRY 0 + +/* + * User configurable initial values per SMBD transport connection + * as defined in [MS-SMBD] 3.1.1.1 + * Those may change after a SMBD negotiation + */ +/* The local peer's maximum number of credits to grant to the peer */ +int smbd_receive_credit_max = 255; + +/* The remote peer's credit request of local peer */ +int smbd_send_credit_target = 255; + +/* The maximum single message size can be sent to remote peer */ +int smbd_max_send_size = 1364; + +/* The maximum fragmented upper-layer payload receive size supported */ +int smbd_max_fragmented_recv_size = 1024 * 1024; + +/* The maximum single-message size which can be received */ +int smbd_max_receive_size = 8192; + +/* The timeout to initiate send of a keepalive message on idle */ +int smbd_keep_alive_interval = 120; + +/* + * User configurable initial values for RDMA transport + * The actual values used may be lower and are limited to hardware capabilities + */ +/* Default maximum number of SGEs in a RDMA write/read */ +int smbd_max_frmr_depth = 2048; + +/* If payload is less than this byte, use RDMA send/recv not read/write */ +int rdma_readwrite_threshold = 4096; diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h new file mode 100644 index 000000000000..c55f28bf7a7f --- /dev/null +++ b/fs/cifs/smbdirect.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2017, Microsoft Corporation. + * + * Author(s): Long Li + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + */ +#ifndef _SMBDIRECT_H +#define _SMBDIRECT_H + +/* Default maximum number of SGEs in a RDMA send/recv */ +#define SMBDIRECT_MAX_SGE 16 +#endif -- cgit v1.2.3 From f198186aa9bbd60fae7a2061f4feec614d880299 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sat, 4 Nov 2017 18:17:24 -0700 Subject: CIFS: SMBD: Establish SMB Direct connection Add code to implement the core functions to establish a SMB Direct connection. 1. Establish an RDMA connection to SMB server. 2. Negotiate and setup SMB Direct protocol. 3. Implement idle connection timer and credit management. SMB Direct is enabled by setting CONFIG_CIFS_SMB_DIRECT. Add to Makefile to enable building SMB Direct. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/Makefile | 2 + fs/cifs/smbdirect.c | 1571 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 230 ++++++++ 3 files changed, 1803 insertions(+) diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 7134f182720b..7e4a1e2f0696 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -19,3 +19,5 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o + +cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index d3c16f872fe1..2ecd5c19d313 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -13,7 +13,33 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. */ +#include #include "smbdirect.h" +#include "cifs_debug.h" + +static struct smbd_response *get_empty_queue_buffer( + struct smbd_connection *info); +static struct smbd_response *get_receive_buffer( + struct smbd_connection *info); +static void put_receive_buffer( + struct smbd_connection *info, + struct smbd_response *response); +static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); +static void destroy_receive_buffers(struct smbd_connection *info); + +static void put_empty_packet( + struct smbd_connection *info, struct smbd_response *response); +static void enqueue_reassembly( + struct smbd_connection *info, + struct smbd_response *response, int data_length); +static struct smbd_response *_get_first_reassembly( + struct smbd_connection *info); + +static int smbd_post_recv( + struct smbd_connection *info, + struct smbd_response *response); + +static int smbd_post_send_empty(struct smbd_connection *info); /* SMBD version number */ #define SMBD_V1 0x0100 @@ -75,3 +101,1548 @@ int smbd_max_frmr_depth = 2048; /* If payload is less than this byte, use RDMA send/recv not read/write */ int rdma_readwrite_threshold = 4096; + +/* Transport logging functions + * Logging are defined as classes. They can be OR'ed to define the actual + * logging level via module parameter smbd_logging_class + * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and + * log_rdma_event() + */ +#define LOG_OUTGOING 0x1 +#define LOG_INCOMING 0x2 +#define LOG_READ 0x4 +#define LOG_WRITE 0x8 +#define LOG_RDMA_SEND 0x10 +#define LOG_RDMA_RECV 0x20 +#define LOG_KEEP_ALIVE 0x40 +#define LOG_RDMA_EVENT 0x80 +#define LOG_RDMA_MR 0x100 +static unsigned int smbd_logging_class; +module_param(smbd_logging_class, uint, 0644); +MODULE_PARM_DESC(smbd_logging_class, + "Logging class for SMBD transport 0x0 to 0x100"); + +#define ERR 0x0 +#define INFO 0x1 +static unsigned int smbd_logging_level = ERR; +module_param(smbd_logging_level, uint, 0644); +MODULE_PARM_DESC(smbd_logging_level, + "Logging level for SMBD transport, 0 (default): error, 1: info"); + +#define log_rdma(level, class, fmt, args...) \ +do { \ + if (level <= smbd_logging_level || class & smbd_logging_class) \ + cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\ +} while (0) + +#define log_outgoing(level, fmt, args...) \ + log_rdma(level, LOG_OUTGOING, fmt, ##args) +#define log_incoming(level, fmt, args...) \ + log_rdma(level, LOG_INCOMING, fmt, ##args) +#define log_read(level, fmt, args...) log_rdma(level, LOG_READ, fmt, ##args) +#define log_write(level, fmt, args...) log_rdma(level, LOG_WRITE, fmt, ##args) +#define log_rdma_send(level, fmt, args...) \ + log_rdma(level, LOG_RDMA_SEND, fmt, ##args) +#define log_rdma_recv(level, fmt, args...) \ + log_rdma(level, LOG_RDMA_RECV, fmt, ##args) +#define log_keep_alive(level, fmt, args...) \ + log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args) +#define log_rdma_event(level, fmt, args...) \ + log_rdma(level, LOG_RDMA_EVENT, fmt, ##args) +#define log_rdma_mr(level, fmt, args...) \ + log_rdma(level, LOG_RDMA_MR, fmt, ##args) + +/* + * Destroy the transport and related RDMA and memory resources + * Need to go through all the pending counters and make sure on one is using + * the transport while it is destroyed + */ +static void smbd_destroy_rdma_work(struct work_struct *work) +{ + struct smbd_response *response; + struct smbd_connection *info = + container_of(work, struct smbd_connection, destroy_work); + unsigned long flags; + + log_rdma_event(INFO, "destroying qp\n"); + ib_drain_qp(info->id->qp); + rdma_destroy_qp(info->id); + + /* Unblock all I/O waiting on the send queue */ + wake_up_interruptible_all(&info->wait_send_queue); + + log_rdma_event(INFO, "cancelling idle timer\n"); + cancel_delayed_work_sync(&info->idle_timer_work); + log_rdma_event(INFO, "cancelling send immediate work\n"); + cancel_delayed_work_sync(&info->send_immediate_work); + + log_rdma_event(INFO, "wait for all recv to finish\n"); + wake_up_interruptible(&info->wait_reassembly_queue); + + log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); + wait_event(info->wait_send_payload_pending, + atomic_read(&info->send_payload_pending) == 0); + + /* It's not posssible for upper layer to get to reassembly */ + log_rdma_event(INFO, "drain the reassembly queue\n"); + do { + spin_lock_irqsave(&info->reassembly_queue_lock, flags); + response = _get_first_reassembly(info); + if (response) { + list_del(&response->list); + spin_unlock_irqrestore( + &info->reassembly_queue_lock, flags); + put_receive_buffer(info, response); + } + } while (response); + spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); + info->reassembly_data_length = 0; + + log_rdma_event(INFO, "free receive buffers\n"); + wait_event(info->wait_receive_queues, + info->count_receive_queue + info->count_empty_packet_queue + == info->receive_credit_max); + destroy_receive_buffers(info); + + ib_free_cq(info->send_cq); + ib_free_cq(info->recv_cq); + ib_dealloc_pd(info->pd); + rdma_destroy_id(info->id); + + /* free mempools */ + mempool_destroy(info->request_mempool); + kmem_cache_destroy(info->request_cache); + + mempool_destroy(info->response_mempool); + kmem_cache_destroy(info->response_cache); + + info->transport_status = SMBD_DESTROYED; + wake_up_all(&info->wait_destroy); +} + +static int smbd_process_disconnected(struct smbd_connection *info) +{ + schedule_work(&info->destroy_work); + return 0; +} + +static void smbd_disconnect_rdma_work(struct work_struct *work) +{ + struct smbd_connection *info = + container_of(work, struct smbd_connection, disconnect_work); + + if (info->transport_status == SMBD_CONNECTED) { + info->transport_status = SMBD_DISCONNECTING; + rdma_disconnect(info->id); + } +} + +static void smbd_disconnect_rdma_connection(struct smbd_connection *info) +{ + queue_work(info->workqueue, &info->disconnect_work); +} + +/* Upcall from RDMA CM */ +static int smbd_conn_upcall( + struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct smbd_connection *info = id->context; + + log_rdma_event(INFO, "event=%d status=%d\n", + event->event, event->status); + + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + case RDMA_CM_EVENT_ROUTE_RESOLVED: + info->ri_rc = 0; + complete(&info->ri_done); + break; + + case RDMA_CM_EVENT_ADDR_ERROR: + info->ri_rc = -EHOSTUNREACH; + complete(&info->ri_done); + break; + + case RDMA_CM_EVENT_ROUTE_ERROR: + info->ri_rc = -ENETUNREACH; + complete(&info->ri_done); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + log_rdma_event(INFO, "connected event=%d\n", event->event); + info->transport_status = SMBD_CONNECTED; + wake_up_interruptible(&info->conn_wait); + break; + + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + log_rdma_event(INFO, "connecting failed event=%d\n", event->event); + info->transport_status = SMBD_DISCONNECTED; + wake_up_interruptible(&info->conn_wait); + break; + + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_DISCONNECTED: + /* This happenes when we fail the negotiation */ + if (info->transport_status == SMBD_NEGOTIATE_FAILED) { + info->transport_status = SMBD_DISCONNECTED; + wake_up(&info->conn_wait); + break; + } + + info->transport_status = SMBD_DISCONNECTED; + smbd_process_disconnected(info); + break; + + default: + break; + } + + return 0; +} + +/* Upcall from RDMA QP */ +static void +smbd_qp_async_error_upcall(struct ib_event *event, void *context) +{ + struct smbd_connection *info = context; + + log_rdma_event(ERR, "%s on device %s info %p\n", + ib_event_msg(event->event), event->device->name, info); + + switch (event->event) { + case IB_EVENT_CQ_ERR: + case IB_EVENT_QP_FATAL: + smbd_disconnect_rdma_connection(info); + + default: + break; + } +} + +static inline void *smbd_request_payload(struct smbd_request *request) +{ + return (void *)request->packet; +} + +static inline void *smbd_response_payload(struct smbd_response *response) +{ + return (void *)response->packet; +} + +/* Called when a RDMA send is done */ +static void send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + int i; + struct smbd_request *request = + container_of(wc->wr_cqe, struct smbd_request, cqe); + + log_rdma_send(INFO, "smbd_request %p completed wc->status=%d\n", + request, wc->status); + + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { + log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", + wc->status, wc->opcode); + smbd_disconnect_rdma_connection(request->info); + } + + for (i = 0; i < request->num_sge; i++) + ib_dma_unmap_single(request->info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + + if (request->has_payload) { + if (atomic_dec_and_test(&request->info->send_payload_pending)) + wake_up(&request->info->wait_send_payload_pending); + } else { + if (atomic_dec_and_test(&request->info->send_pending)) + wake_up(&request->info->wait_send_pending); + } + + mempool_free(request, request->info->request_mempool); +} + +static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) +{ + log_rdma_event(INFO, "resp message min_version %u max_version %u " + "negotiated_version %u credits_requested %u " + "credits_granted %u status %u max_readwrite_size %u " + "preferred_send_size %u max_receive_size %u " + "max_fragmented_size %u\n", + resp->min_version, resp->max_version, resp->negotiated_version, + resp->credits_requested, resp->credits_granted, resp->status, + resp->max_readwrite_size, resp->preferred_send_size, + resp->max_receive_size, resp->max_fragmented_size); +} + +/* + * Process a negotiation response message, according to [MS-SMBD]3.1.5.7 + * response, packet_length: the negotiation response message + * return value: true if negotiation is a success, false if failed + */ +static bool process_negotiation_response( + struct smbd_response *response, int packet_length) +{ + struct smbd_connection *info = response->info; + struct smbd_negotiate_resp *packet = smbd_response_payload(response); + + if (packet_length < sizeof(struct smbd_negotiate_resp)) { + log_rdma_event(ERR, + "error: packet_length=%d\n", packet_length); + return false; + } + + if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { + log_rdma_event(ERR, "error: negotiated_version=%x\n", + le16_to_cpu(packet->negotiated_version)); + return false; + } + info->protocol = le16_to_cpu(packet->negotiated_version); + + if (packet->credits_requested == 0) { + log_rdma_event(ERR, "error: credits_requested==0\n"); + return false; + } + info->receive_credit_target = le16_to_cpu(packet->credits_requested); + + if (packet->credits_granted == 0) { + log_rdma_event(ERR, "error: credits_granted==0\n"); + return false; + } + atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); + + atomic_set(&info->receive_credits, 0); + + if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { + log_rdma_event(ERR, "error: preferred_send_size=%d\n", + le32_to_cpu(packet->preferred_send_size)); + return false; + } + info->max_receive_size = le32_to_cpu(packet->preferred_send_size); + + if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { + log_rdma_event(ERR, "error: max_receive_size=%d\n", + le32_to_cpu(packet->max_receive_size)); + return false; + } + info->max_send_size = min_t(int, info->max_send_size, + le32_to_cpu(packet->max_receive_size)); + + if (le32_to_cpu(packet->max_fragmented_size) < + SMBD_MIN_FRAGMENTED_SIZE) { + log_rdma_event(ERR, "error: max_fragmented_size=%d\n", + le32_to_cpu(packet->max_fragmented_size)); + return false; + } + info->max_fragmented_send_size = + le32_to_cpu(packet->max_fragmented_size); + + return true; +} + +/* + * Check and schedule to send an immediate packet + * This is used to extend credtis to remote peer to keep the transport busy + */ +static void check_and_send_immediate(struct smbd_connection *info) +{ + if (info->transport_status != SMBD_CONNECTED) + return; + + info->send_immediate = true; + + /* + * Promptly send a packet if our peer is running low on receive + * credits + */ + if (atomic_read(&info->receive_credits) < + info->receive_credit_target - 1) + queue_delayed_work( + info->workqueue, &info->send_immediate_work, 0); +} + +static void smbd_post_send_credits(struct work_struct *work) +{ + int ret = 0; + int use_receive_queue = 1; + int rc; + struct smbd_response *response; + struct smbd_connection *info = + container_of(work, struct smbd_connection, + post_send_credits_work); + + if (info->transport_status != SMBD_CONNECTED) { + wake_up(&info->wait_receive_queues); + return; + } + + if (info->receive_credit_target > + atomic_read(&info->receive_credits)) { + while (true) { + if (use_receive_queue) + response = get_receive_buffer(info); + else + response = get_empty_queue_buffer(info); + if (!response) { + /* now switch to emtpy packet queue */ + if (use_receive_queue) { + use_receive_queue = 0; + continue; + } else + break; + } + + response->type = SMBD_TRANSFER_DATA; + response->first_segment = false; + rc = smbd_post_recv(info, response); + if (rc) { + log_rdma_recv(ERR, + "post_recv failed rc=%d\n", rc); + put_receive_buffer(info, response); + break; + } + + ret++; + } + } + + spin_lock(&info->lock_new_credits_offered); + info->new_credits_offered += ret; + spin_unlock(&info->lock_new_credits_offered); + + atomic_add(ret, &info->receive_credits); + + /* Check if we can post new receive and grant credits to peer */ + check_and_send_immediate(info); +} + +static void smbd_recv_done_work(struct work_struct *work) +{ + struct smbd_connection *info = + container_of(work, struct smbd_connection, recv_done_work); + + /* + * We may have new send credits granted from remote peer + * If any sender is blcoked on lack of credets, unblock it + */ + if (atomic_read(&info->send_credits)) + wake_up_interruptible(&info->wait_send_queue); + + /* + * Check if we need to send something to remote peer to + * grant more credits or respond to KEEP_ALIVE packet + */ + check_and_send_immediate(info); +} + +/* Called from softirq, when recv is done */ +static void recv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbd_data_transfer *data_transfer; + struct smbd_response *response = + container_of(wc->wr_cqe, struct smbd_response, cqe); + struct smbd_connection *info = response->info; + int data_length = 0; + + log_rdma_recv(INFO, "response=%p type=%d wc status=%d wc opcode %d " + "byte_len=%d pkey_index=%x\n", + response, response->type, wc->status, wc->opcode, + wc->byte_len, wc->pkey_index); + + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { + log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", + wc->status, wc->opcode); + smbd_disconnect_rdma_connection(info); + goto error; + } + + ib_dma_sync_single_for_cpu( + wc->qp->device, + response->sge.addr, + response->sge.length, + DMA_FROM_DEVICE); + + switch (response->type) { + /* SMBD negotiation response */ + case SMBD_NEGOTIATE_RESP: + dump_smbd_negotiate_resp(smbd_response_payload(response)); + info->full_packet_received = true; + info->negotiate_done = + process_negotiation_response(response, wc->byte_len); + complete(&info->negotiate_completion); + break; + + /* SMBD data transfer packet */ + case SMBD_TRANSFER_DATA: + data_transfer = smbd_response_payload(response); + data_length = le32_to_cpu(data_transfer->data_length); + + /* + * If this is a packet with data playload place the data in + * reassembly queue and wake up the reading thread + */ + if (data_length) { + if (info->full_packet_received) + response->first_segment = true; + + if (le32_to_cpu(data_transfer->remaining_data_length)) + info->full_packet_received = false; + else + info->full_packet_received = true; + + enqueue_reassembly( + info, + response, + data_length); + } else + put_empty_packet(info, response); + + if (data_length) + wake_up_interruptible(&info->wait_reassembly_queue); + + atomic_dec(&info->receive_credits); + info->receive_credit_target = + le16_to_cpu(data_transfer->credits_requested); + atomic_add(le16_to_cpu(data_transfer->credits_granted), + &info->send_credits); + + log_incoming(INFO, "data flags %d data_offset %d " + "data_length %d remaining_data_length %d\n", + le16_to_cpu(data_transfer->flags), + le32_to_cpu(data_transfer->data_offset), + le32_to_cpu(data_transfer->data_length), + le32_to_cpu(data_transfer->remaining_data_length)); + + /* Send a KEEP_ALIVE response right away if requested */ + info->keep_alive_requested = KEEP_ALIVE_NONE; + if (le16_to_cpu(data_transfer->flags) & + SMB_DIRECT_RESPONSE_REQUESTED) { + info->keep_alive_requested = KEEP_ALIVE_PENDING; + } + + queue_work(info->workqueue, &info->recv_done_work); + return; + + default: + log_rdma_recv(ERR, + "unexpected response type=%d\n", response->type); + } + +error: + put_receive_buffer(info, response); +} + +static struct rdma_cm_id *smbd_create_id( + struct smbd_connection *info, + struct sockaddr *dstaddr, int port) +{ + struct rdma_cm_id *id; + int rc; + __be16 *sport; + + id = rdma_create_id(&init_net, smbd_conn_upcall, info, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc); + return id; + } + + if (dstaddr->sa_family == AF_INET6) + sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port; + else + sport = &((struct sockaddr_in *)dstaddr)->sin_port; + + *sport = htons(port); + + init_completion(&info->ri_done); + info->ri_rc = -ETIMEDOUT; + + rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, + RDMA_RESOLVE_TIMEOUT); + if (rc) { + log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); + goto out; + } + wait_for_completion_interruptible_timeout( + &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = info->ri_rc; + if (rc) { + log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); + goto out; + } + + info->ri_rc = -ETIMEDOUT; + rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + if (rc) { + log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); + goto out; + } + wait_for_completion_interruptible_timeout( + &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = info->ri_rc; + if (rc) { + log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); + goto out; + } + + return id; + +out: + rdma_destroy_id(id); + return ERR_PTR(rc); +} + +/* + * Test if FRWR (Fast Registration Work Requests) is supported on the device + * This implementation requries FRWR on RDMA read/write + * return value: true if it is supported + */ +static bool frwr_is_supported(struct ib_device_attr *attrs) +{ + if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) + return false; + if (attrs->max_fast_reg_page_list_len == 0) + return false; + return true; +} + +static int smbd_ia_open( + struct smbd_connection *info, + struct sockaddr *dstaddr, int port) +{ + int rc; + + info->id = smbd_create_id(info, dstaddr, port); + if (IS_ERR(info->id)) { + rc = PTR_ERR(info->id); + goto out1; + } + + if (!frwr_is_supported(&info->id->device->attrs)) { + log_rdma_event(ERR, + "Fast Registration Work Requests " + "(FRWR) is not supported\n"); + log_rdma_event(ERR, + "Device capability flags = %llx " + "max_fast_reg_page_list_len = %u\n", + info->id->device->attrs.device_cap_flags, + info->id->device->attrs.max_fast_reg_page_list_len); + rc = -EPROTONOSUPPORT; + goto out2; + } + + info->pd = ib_alloc_pd(info->id->device, 0); + if (IS_ERR(info->pd)) { + rc = PTR_ERR(info->pd); + log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); + goto out2; + } + + return 0; + +out2: + rdma_destroy_id(info->id); + info->id = NULL; + +out1: + return rc; +} + +/* + * Send a negotiation request message to the peer + * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 + * After negotiation, the transport is connected and ready for + * carrying upper layer SMB payload + */ +static int smbd_post_send_negotiate_req(struct smbd_connection *info) +{ + struct ib_send_wr send_wr, *send_wr_fail; + int rc = -ENOMEM; + struct smbd_request *request; + struct smbd_negotiate_req *packet; + + request = mempool_alloc(info->request_mempool, GFP_KERNEL); + if (!request) + return rc; + + request->info = info; + + packet = smbd_request_payload(request); + packet->min_version = cpu_to_le16(SMBD_V1); + packet->max_version = cpu_to_le16(SMBD_V1); + packet->reserved = 0; + packet->credits_requested = cpu_to_le16(info->send_credit_target); + packet->preferred_send_size = cpu_to_le32(info->max_send_size); + packet->max_receive_size = cpu_to_le32(info->max_receive_size); + packet->max_fragmented_size = + cpu_to_le32(info->max_fragmented_recv_size); + + request->num_sge = 1; + request->sge[0].addr = ib_dma_map_single( + info->id->device, (void *)packet, + sizeof(*packet), DMA_TO_DEVICE); + if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + rc = -EIO; + goto dma_mapping_failed; + } + + request->sge[0].length = sizeof(*packet); + request->sge[0].lkey = info->pd->local_dma_lkey; + + ib_dma_sync_single_for_device( + info->id->device, request->sge[0].addr, + request->sge[0].length, DMA_TO_DEVICE); + + request->cqe.done = send_done; + + send_wr.next = NULL; + send_wr.wr_cqe = &request->cqe; + send_wr.sg_list = request->sge; + send_wr.num_sge = request->num_sge; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + log_rdma_send(INFO, "sge addr=%llx length=%x lkey=%x\n", + request->sge[0].addr, + request->sge[0].length, request->sge[0].lkey); + + request->has_payload = false; + atomic_inc(&info->send_pending); + rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + if (!rc) + return 0; + + /* if we reach here, post send failed */ + log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); + atomic_dec(&info->send_pending); + ib_dma_unmap_single(info->id->device, request->sge[0].addr, + request->sge[0].length, DMA_TO_DEVICE); + +dma_mapping_failed: + mempool_free(request, info->request_mempool); + return rc; +} + +/* + * Extend the credits to remote peer + * This implements [MS-SMBD] 3.1.5.9 + * The idea is that we should extend credits to remote peer as quickly as + * it's allowed, to maintain data flow. We allocate as much receive + * buffer as possible, and extend the receive credits to remote peer + * return value: the new credtis being granted. + */ +static int manage_credits_prior_sending(struct smbd_connection *info) +{ + int new_credits; + + spin_lock(&info->lock_new_credits_offered); + new_credits = info->new_credits_offered; + info->new_credits_offered = 0; + spin_unlock(&info->lock_new_credits_offered); + + return new_credits; +} + +/* + * Check if we need to send a KEEP_ALIVE message + * The idle connection timer triggers a KEEP_ALIVE message when expires + * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send + * back a response. + * return value: + * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set + * 0: otherwise + */ +static int manage_keep_alive_before_sending(struct smbd_connection *info) +{ + if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { + info->keep_alive_requested = KEEP_ALIVE_SENT; + return 1; + } + return 0; +} + +/* + * Build and prepare the SMBD packet header + * This function waits for avaialbe send credits and build a SMBD packet + * header. The caller then optional append payload to the packet after + * the header + * intput values + * size: the size of the payload + * remaining_data_length: remaining data to send if this is part of a + * fragmented packet + * output values + * request_out: the request allocated from this function + * return values: 0 on success, otherwise actual error code returned + */ +static int smbd_create_header(struct smbd_connection *info, + int size, int remaining_data_length, + struct smbd_request **request_out) +{ + struct smbd_request *request; + struct smbd_data_transfer *packet; + int header_length; + int rc; + + /* Wait for send credits. A SMBD packet needs one credit */ + rc = wait_event_interruptible(info->wait_send_queue, + atomic_read(&info->send_credits) > 0 || + info->transport_status != SMBD_CONNECTED); + if (rc) + return rc; + + if (info->transport_status != SMBD_CONNECTED) { + log_outgoing(ERR, "disconnected not sending\n"); + return -ENOENT; + } + atomic_dec(&info->send_credits); + + request = mempool_alloc(info->request_mempool, GFP_KERNEL); + if (!request) { + rc = -ENOMEM; + goto err; + } + + request->info = info; + + /* Fill in the packet header */ + packet = smbd_request_payload(request); + packet->credits_requested = cpu_to_le16(info->send_credit_target); + packet->credits_granted = + cpu_to_le16(manage_credits_prior_sending(info)); + info->send_immediate = false; + + packet->flags = 0; + if (manage_keep_alive_before_sending(info)) + packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); + + packet->reserved = 0; + if (!size) + packet->data_offset = 0; + else + packet->data_offset = cpu_to_le32(24); + packet->data_length = cpu_to_le32(size); + packet->remaining_data_length = cpu_to_le32(remaining_data_length); + packet->padding = 0; + + log_outgoing(INFO, "credits_requested=%d credits_granted=%d " + "data_offset=%d data_length=%d remaining_data_length=%d\n", + le16_to_cpu(packet->credits_requested), + le16_to_cpu(packet->credits_granted), + le32_to_cpu(packet->data_offset), + le32_to_cpu(packet->data_length), + le32_to_cpu(packet->remaining_data_length)); + + /* Map the packet to DMA */ + header_length = sizeof(struct smbd_data_transfer); + /* If this is a packet without payload, don't send padding */ + if (!size) + header_length = offsetof(struct smbd_data_transfer, padding); + + request->num_sge = 1; + request->sge[0].addr = ib_dma_map_single(info->id->device, + (void *)packet, + header_length, + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + mempool_free(request, info->request_mempool); + rc = -EIO; + goto err; + } + + request->sge[0].length = header_length; + request->sge[0].lkey = info->pd->local_dma_lkey; + + *request_out = request; + return 0; + +err: + atomic_inc(&info->send_credits); + return rc; +} + +static void smbd_destroy_header(struct smbd_connection *info, + struct smbd_request *request) +{ + + ib_dma_unmap_single(info->id->device, + request->sge[0].addr, + request->sge[0].length, + DMA_TO_DEVICE); + mempool_free(request, info->request_mempool); + atomic_inc(&info->send_credits); +} + +/* Post the send request */ +static int smbd_post_send(struct smbd_connection *info, + struct smbd_request *request, bool has_payload) +{ + struct ib_send_wr send_wr, *send_wr_fail; + int rc, i; + + for (i = 0; i < request->num_sge; i++) { + log_rdma_send(INFO, + "rdma_request sge[%d] addr=%llu legnth=%u\n", + i, request->sge[0].addr, request->sge[0].length); + ib_dma_sync_single_for_device( + info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + } + + request->cqe.done = send_done; + + send_wr.next = NULL; + send_wr.wr_cqe = &request->cqe; + send_wr.sg_list = request->sge; + send_wr.num_sge = request->num_sge; + send_wr.opcode = IB_WR_SEND; + send_wr.send_flags = IB_SEND_SIGNALED; + + if (has_payload) { + request->has_payload = true; + atomic_inc(&info->send_payload_pending); + } else { + request->has_payload = false; + atomic_inc(&info->send_pending); + } + + rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); + if (rc) { + log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); + if (has_payload) { + if (atomic_dec_and_test(&info->send_payload_pending)) + wake_up(&info->wait_send_payload_pending); + } else { + if (atomic_dec_and_test(&info->send_pending)) + wake_up(&info->wait_send_pending); + } + } else + /* Reset timer for idle connection after packet is sent */ + mod_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); + + return rc; +} + +static int smbd_post_send_sgl(struct smbd_connection *info, + struct scatterlist *sgl, int data_length, int remaining_data_length) +{ + int num_sgs; + int i, rc; + struct smbd_request *request; + struct scatterlist *sg; + + rc = smbd_create_header( + info, data_length, remaining_data_length, &request); + if (rc) + return rc; + + num_sgs = sgl ? sg_nents(sgl) : 0; + for_each_sg(sgl, sg, num_sgs, i) { + request->sge[i+1].addr = + ib_dma_map_page(info->id->device, sg_page(sg), + sg->offset, sg->length, DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error( + info->id->device, request->sge[i+1].addr)) { + rc = -EIO; + request->sge[i+1].addr = 0; + goto dma_mapping_failure; + } + request->sge[i+1].length = sg->length; + request->sge[i+1].lkey = info->pd->local_dma_lkey; + request->num_sge++; + } + + rc = smbd_post_send(info, request, data_length); + if (!rc) + return 0; + +dma_mapping_failure: + for (i = 1; i < request->num_sge; i++) + if (request->sge[i].addr) + ib_dma_unmap_single(info->id->device, + request->sge[i].addr, + request->sge[i].length, + DMA_TO_DEVICE); + smbd_destroy_header(info, request); + return rc; +} + +/* + * Send an empty message + * Empty message is used to extend credits to peer to for keep live + * while there is no upper layer payload to send at the time + */ +static int smbd_post_send_empty(struct smbd_connection *info) +{ + info->count_send_empty++; + return smbd_post_send_sgl(info, NULL, 0, 0); +} + +/* + * Post a receive request to the transport + * The remote peer can only send data when a receive request is posted + * The interaction is controlled by send/receive credit system + */ +static int smbd_post_recv( + struct smbd_connection *info, struct smbd_response *response) +{ + struct ib_recv_wr recv_wr, *recv_wr_fail = NULL; + int rc = -EIO; + + response->sge.addr = ib_dma_map_single( + info->id->device, response->packet, + info->max_receive_size, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(info->id->device, response->sge.addr)) + return rc; + + response->sge.length = info->max_receive_size; + response->sge.lkey = info->pd->local_dma_lkey; + + response->cqe.done = recv_done; + + recv_wr.wr_cqe = &response->cqe; + recv_wr.next = NULL; + recv_wr.sg_list = &response->sge; + recv_wr.num_sge = 1; + + rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail); + if (rc) { + ib_dma_unmap_single(info->id->device, response->sge.addr, + response->sge.length, DMA_FROM_DEVICE); + + log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); + } + + return rc; +} + +/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ +static int smbd_negotiate(struct smbd_connection *info) +{ + int rc; + struct smbd_response *response = get_receive_buffer(info); + + response->type = SMBD_NEGOTIATE_RESP; + rc = smbd_post_recv(info, response); + log_rdma_event(INFO, + "smbd_post_recv rc=%d iov.addr=%llx iov.length=%x " + "iov.lkey=%x\n", + rc, response->sge.addr, + response->sge.length, response->sge.lkey); + if (rc) + return rc; + + init_completion(&info->negotiate_completion); + info->negotiate_done = false; + rc = smbd_post_send_negotiate_req(info); + if (rc) + return rc; + + rc = wait_for_completion_interruptible_timeout( + &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); + log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); + + if (info->negotiate_done) + return 0; + + if (rc == 0) + rc = -ETIMEDOUT; + else if (rc == -ERESTARTSYS) + rc = -EINTR; + else + rc = -ENOTCONN; + + return rc; +} + +static void put_empty_packet( + struct smbd_connection *info, struct smbd_response *response) +{ + spin_lock(&info->empty_packet_queue_lock); + list_add_tail(&response->list, &info->empty_packet_queue); + info->count_empty_packet_queue++; + spin_unlock(&info->empty_packet_queue_lock); + + queue_work(info->workqueue, &info->post_send_credits_work); +} + +/* + * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 + * This is a queue for reassembling upper layer payload and present to upper + * layer. All the inncoming payload go to the reassembly queue, regardless of + * if reassembly is required. The uuper layer code reads from the queue for all + * incoming payloads. + * Put a received packet to the reassembly queue + * response: the packet received + * data_length: the size of payload in this packet + */ +static void enqueue_reassembly( + struct smbd_connection *info, + struct smbd_response *response, + int data_length) +{ + spin_lock(&info->reassembly_queue_lock); + list_add_tail(&response->list, &info->reassembly_queue); + info->reassembly_queue_length++; + /* + * Make sure reassembly_data_length is updated after list and + * reassembly_queue_length are updated. On the dequeue side + * reassembly_data_length is checked without a lock to determine + * if reassembly_queue_length and list is up to date + */ + virt_wmb(); + info->reassembly_data_length += data_length; + spin_unlock(&info->reassembly_queue_lock); + info->count_reassembly_queue++; + info->count_enqueue_reassembly_queue++; +} + +/* + * Get the first entry at the front of reassembly queue + * Caller is responsible for locking + * return value: the first entry if any, NULL if queue is empty + */ +static struct smbd_response *_get_first_reassembly(struct smbd_connection *info) +{ + struct smbd_response *ret = NULL; + + if (!list_empty(&info->reassembly_queue)) { + ret = list_first_entry( + &info->reassembly_queue, + struct smbd_response, list); + } + return ret; +} + +static struct smbd_response *get_empty_queue_buffer( + struct smbd_connection *info) +{ + struct smbd_response *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&info->empty_packet_queue_lock, flags); + if (!list_empty(&info->empty_packet_queue)) { + ret = list_first_entry( + &info->empty_packet_queue, + struct smbd_response, list); + list_del(&ret->list); + info->count_empty_packet_queue--; + } + spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags); + + return ret; +} + +/* + * Get a receive buffer + * For each remote send, we need to post a receive. The receive buffers are + * pre-allocated in advance. + * return value: the receive buffer, NULL if none is available + */ +static struct smbd_response *get_receive_buffer(struct smbd_connection *info) +{ + struct smbd_response *ret = NULL; + unsigned long flags; + + spin_lock_irqsave(&info->receive_queue_lock, flags); + if (!list_empty(&info->receive_queue)) { + ret = list_first_entry( + &info->receive_queue, + struct smbd_response, list); + list_del(&ret->list); + info->count_receive_queue--; + info->count_get_receive_buffer++; + } + spin_unlock_irqrestore(&info->receive_queue_lock, flags); + + return ret; +} + +/* + * Return a receive buffer + * Upon returning of a receive buffer, we can post new receive and extend + * more receive credits to remote peer. This is done immediately after a + * receive buffer is returned. + */ +static void put_receive_buffer( + struct smbd_connection *info, struct smbd_response *response) +{ + unsigned long flags; + + ib_dma_unmap_single(info->id->device, response->sge.addr, + response->sge.length, DMA_FROM_DEVICE); + + spin_lock_irqsave(&info->receive_queue_lock, flags); + list_add_tail(&response->list, &info->receive_queue); + info->count_receive_queue++; + info->count_put_receive_buffer++; + spin_unlock_irqrestore(&info->receive_queue_lock, flags); + + queue_work(info->workqueue, &info->post_send_credits_work); +} + +/* Preallocate all receive buffer on transport establishment */ +static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) +{ + int i; + struct smbd_response *response; + + INIT_LIST_HEAD(&info->reassembly_queue); + spin_lock_init(&info->reassembly_queue_lock); + info->reassembly_data_length = 0; + info->reassembly_queue_length = 0; + + INIT_LIST_HEAD(&info->receive_queue); + spin_lock_init(&info->receive_queue_lock); + info->count_receive_queue = 0; + + INIT_LIST_HEAD(&info->empty_packet_queue); + spin_lock_init(&info->empty_packet_queue_lock); + info->count_empty_packet_queue = 0; + + init_waitqueue_head(&info->wait_receive_queues); + + for (i = 0; i < num_buf; i++) { + response = mempool_alloc(info->response_mempool, GFP_KERNEL); + if (!response) + goto allocate_failed; + + response->info = info; + list_add_tail(&response->list, &info->receive_queue); + info->count_receive_queue++; + } + + return 0; + +allocate_failed: + while (!list_empty(&info->receive_queue)) { + response = list_first_entry( + &info->receive_queue, + struct smbd_response, list); + list_del(&response->list); + info->count_receive_queue--; + + mempool_free(response, info->response_mempool); + } + return -ENOMEM; +} + +static void destroy_receive_buffers(struct smbd_connection *info) +{ + struct smbd_response *response; + + while ((response = get_receive_buffer(info))) + mempool_free(response, info->response_mempool); + + while ((response = get_empty_queue_buffer(info))) + mempool_free(response, info->response_mempool); +} + +/* + * Check and send an immediate or keep alive packet + * The condition to send those packets are defined in [MS-SMBD] 3.1.1.1 + * Connection.KeepaliveRequested and Connection.SendImmediate + * The idea is to extend credits to server as soon as it becomes available + */ +static void send_immediate_work(struct work_struct *work) +{ + struct smbd_connection *info = container_of( + work, struct smbd_connection, + send_immediate_work.work); + + if (info->keep_alive_requested == KEEP_ALIVE_PENDING || + info->send_immediate) { + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(info); + } +} + +/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ +static void idle_connection_timer(struct work_struct *work) +{ + struct smbd_connection *info = container_of( + work, struct smbd_connection, + idle_timer_work.work); + + if (info->keep_alive_requested != KEEP_ALIVE_NONE) { + log_keep_alive(ERR, + "error status info->keep_alive_requested=%d\n", + info->keep_alive_requested); + smbd_disconnect_rdma_connection(info); + return; + } + + log_keep_alive(INFO, "about to send an empty idle message\n"); + smbd_post_send_empty(info); + + /* Setup the next idle timeout work */ + queue_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); +} + +static void destroy_caches_and_workqueue(struct smbd_connection *info) +{ + destroy_receive_buffers(info); + destroy_workqueue(info->workqueue); + mempool_destroy(info->response_mempool); + kmem_cache_destroy(info->response_cache); + mempool_destroy(info->request_mempool); + kmem_cache_destroy(info->request_cache); +} + +#define MAX_NAME_LEN 80 +static int allocate_caches_and_workqueue(struct smbd_connection *info) +{ + char name[MAX_NAME_LEN]; + int rc; + + snprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); + info->request_cache = + kmem_cache_create( + name, + sizeof(struct smbd_request) + + sizeof(struct smbd_data_transfer), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!info->request_cache) + return -ENOMEM; + + info->request_mempool = + mempool_create(info->send_credit_target, mempool_alloc_slab, + mempool_free_slab, info->request_cache); + if (!info->request_mempool) + goto out1; + + snprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + info->response_cache = + kmem_cache_create( + name, + sizeof(struct smbd_response) + + info->max_receive_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!info->response_cache) + goto out2; + + info->response_mempool = + mempool_create(info->receive_credit_max, mempool_alloc_slab, + mempool_free_slab, info->response_cache); + if (!info->response_mempool) + goto out3; + + snprintf(name, MAX_NAME_LEN, "smbd_%p", info); + info->workqueue = create_workqueue(name); + if (!info->workqueue) + goto out4; + + rc = allocate_receive_buffers(info, info->receive_credit_max); + if (rc) { + log_rdma_event(ERR, "failed to allocate receive buffers\n"); + goto out5; + } + + return 0; + +out5: + destroy_workqueue(info->workqueue); +out4: + mempool_destroy(info->response_mempool); +out3: + kmem_cache_destroy(info->response_cache); +out2: + mempool_destroy(info->request_mempool); +out1: + kmem_cache_destroy(info->request_cache); + return -ENOMEM; +} + +/* Create a SMBD connection, called by upper layer */ +struct smbd_connection *_smbd_get_connection( + struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) +{ + int rc; + struct smbd_connection *info; + struct rdma_conn_param conn_param; + struct ib_qp_init_attr qp_attr; + struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; + + info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); + if (!info) + return NULL; + + info->transport_status = SMBD_CONNECTING; + rc = smbd_ia_open(info, dstaddr, port); + if (rc) { + log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); + goto create_id_failed; + } + + if (smbd_send_credit_target > info->id->device->attrs.max_cqe || + smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { + log_rdma_event(ERR, + "consider lowering send_credit_target = %d. " + "Possible CQE overrun, device " + "reporting max_cpe %d max_qp_wr %d\n", + smbd_send_credit_target, + info->id->device->attrs.max_cqe, + info->id->device->attrs.max_qp_wr); + goto config_failed; + } + + if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || + smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { + log_rdma_event(ERR, + "consider lowering receive_credit_max = %d. " + "Possible CQE overrun, device " + "reporting max_cpe %d max_qp_wr %d\n", + smbd_receive_credit_max, + info->id->device->attrs.max_cqe, + info->id->device->attrs.max_qp_wr); + goto config_failed; + } + + info->receive_credit_max = smbd_receive_credit_max; + info->send_credit_target = smbd_send_credit_target; + info->max_send_size = smbd_max_send_size; + info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + info->max_receive_size = smbd_max_receive_size; + info->keep_alive_interval = smbd_keep_alive_interval; + + if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) { + log_rdma_event(ERR, "warning: device max_sge = %d too small\n", + info->id->device->attrs.max_sge); + log_rdma_event(ERR, "Queue Pair creation may fail\n"); + } + + info->send_cq = NULL; + info->recv_cq = NULL; + info->send_cq = ib_alloc_cq(info->id->device, info, + info->send_credit_target, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(info->send_cq)) { + info->send_cq = NULL; + goto alloc_cq_failed; + } + + info->recv_cq = ib_alloc_cq(info->id->device, info, + info->receive_credit_max, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(info->recv_cq)) { + info->recv_cq = NULL; + goto alloc_cq_failed; + } + + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.event_handler = smbd_qp_async_error_upcall; + qp_attr.qp_context = info; + qp_attr.cap.max_send_wr = info->send_credit_target; + qp_attr.cap.max_recv_wr = info->receive_credit_max; + qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE; + qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE; + qp_attr.cap.max_inline_data = 0; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = info->send_cq; + qp_attr.recv_cq = info->recv_cq; + qp_attr.port_num = ~0; + + rc = rdma_create_qp(info->id, info->pd, &qp_attr); + if (rc) { + log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); + goto create_qp_failed; + } + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.initiator_depth = 0; + + conn_param.retry_count = SMBD_CM_RETRY; + conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; + conn_param.flow_control = 0; + init_waitqueue_head(&info->wait_destroy); + + log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", + &addr_in->sin_addr, port); + + init_waitqueue_head(&info->conn_wait); + rc = rdma_connect(info->id, &conn_param); + if (rc) { + log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); + goto rdma_connect_failed; + } + + wait_event_interruptible( + info->conn_wait, info->transport_status != SMBD_CONNECTING); + + if (info->transport_status != SMBD_CONNECTED) { + log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); + goto rdma_connect_failed; + } + + log_rdma_event(INFO, "rdma_connect connected\n"); + + rc = allocate_caches_and_workqueue(info); + if (rc) { + log_rdma_event(ERR, "cache allocation failed\n"); + goto allocate_cache_failed; + } + + init_waitqueue_head(&info->wait_send_queue); + init_waitqueue_head(&info->wait_reassembly_queue); + + INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); + INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); + queue_delayed_work(info->workqueue, &info->idle_timer_work, + info->keep_alive_interval*HZ); + + init_waitqueue_head(&info->wait_send_pending); + atomic_set(&info->send_pending, 0); + + init_waitqueue_head(&info->wait_send_payload_pending); + atomic_set(&info->send_payload_pending, 0); + + INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); + INIT_WORK(&info->destroy_work, smbd_destroy_rdma_work); + INIT_WORK(&info->recv_done_work, smbd_recv_done_work); + INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); + info->new_credits_offered = 0; + spin_lock_init(&info->lock_new_credits_offered); + + rc = smbd_negotiate(info); + if (rc) { + log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); + goto negotiation_failed; + } + + return info; + +negotiation_failed: + cancel_delayed_work_sync(&info->idle_timer_work); + destroy_caches_and_workqueue(info); + info->transport_status = SMBD_NEGOTIATE_FAILED; + init_waitqueue_head(&info->conn_wait); + rdma_disconnect(info->id); + wait_event(info->conn_wait, + info->transport_status == SMBD_DISCONNECTED); + +allocate_cache_failed: +rdma_connect_failed: + rdma_destroy_qp(info->id); + +create_qp_failed: +alloc_cq_failed: + if (info->send_cq) + ib_free_cq(info->send_cq); + if (info->recv_cq) + ib_free_cq(info->recv_cq); + +config_failed: + ib_dealloc_pd(info->pd); + rdma_destroy_id(info->id); + +create_id_failed: + kfree(info); + return NULL; +} diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index c55f28bf7a7f..e4e65d3c6de8 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -16,6 +16,236 @@ #ifndef _SMBDIRECT_H #define _SMBDIRECT_H +#ifdef CONFIG_CIFS_SMB_DIRECT +#define cifs_rdma_enabled(server) ((server)->rdma) + +#include "cifsglob.h" +#include +#include +#include + +enum keep_alive_status { + KEEP_ALIVE_NONE, + KEEP_ALIVE_PENDING, + KEEP_ALIVE_SENT, +}; + +enum smbd_connection_status { + SMBD_CREATED, + SMBD_CONNECTING, + SMBD_CONNECTED, + SMBD_NEGOTIATE_FAILED, + SMBD_DISCONNECTING, + SMBD_DISCONNECTED, + SMBD_DESTROYED +}; + +/* + * The context for the SMBDirect transport + * Everything related to the transport is here. It has several logical parts + * 1. RDMA related structures + * 2. SMBDirect connection parameters + * 3. Memory registrations + * 4. Receive and reassembly queues for data receive path + * 5. mempools for allocating packets + */ +struct smbd_connection { + enum smbd_connection_status transport_status; + + /* RDMA related */ + struct rdma_cm_id *id; + struct ib_qp_init_attr qp_attr; + struct ib_pd *pd; + struct ib_cq *send_cq, *recv_cq; + struct ib_device_attr dev_attr; + int ri_rc; + struct completion ri_done; + wait_queue_head_t conn_wait; + wait_queue_head_t wait_destroy; + + struct completion negotiate_completion; + bool negotiate_done; + + struct work_struct destroy_work; + struct work_struct disconnect_work; + struct work_struct recv_done_work; + struct work_struct post_send_credits_work; + + spinlock_t lock_new_credits_offered; + int new_credits_offered; + + /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */ + int receive_credit_max; + int send_credit_target; + int max_send_size; + int max_fragmented_recv_size; + int max_fragmented_send_size; + int max_receive_size; + int keep_alive_interval; + int max_readwrite_size; + enum keep_alive_status keep_alive_requested; + int protocol; + atomic_t send_credits; + atomic_t receive_credits; + int receive_credit_target; + int fragment_reassembly_remaining; + + /* Activity accoutning */ + + atomic_t send_pending; + wait_queue_head_t wait_send_pending; + atomic_t send_payload_pending; + wait_queue_head_t wait_send_payload_pending; + + /* Receive queue */ + struct list_head receive_queue; + int count_receive_queue; + spinlock_t receive_queue_lock; + + struct list_head empty_packet_queue; + int count_empty_packet_queue; + spinlock_t empty_packet_queue_lock; + + wait_queue_head_t wait_receive_queues; + + /* Reassembly queue */ + struct list_head reassembly_queue; + spinlock_t reassembly_queue_lock; + wait_queue_head_t wait_reassembly_queue; + + /* total data length of reassembly queue */ + int reassembly_data_length; + int reassembly_queue_length; + /* the offset to first buffer in reassembly queue */ + int first_entry_offset; + + bool send_immediate; + + wait_queue_head_t wait_send_queue; + + /* + * Indicate if we have received a full packet on the connection + * This is used to identify the first SMBD packet of a assembled + * payload (SMB packet) in reassembly queue so we can return a + * RFC1002 length to upper layer to indicate the length of the SMB + * packet received + */ + bool full_packet_received; + + struct workqueue_struct *workqueue; + struct delayed_work idle_timer_work; + struct delayed_work send_immediate_work; + + /* Memory pool for preallocating buffers */ + /* request pool for RDMA send */ + struct kmem_cache *request_cache; + mempool_t *request_mempool; + + /* response pool for RDMA receive */ + struct kmem_cache *response_cache; + mempool_t *response_mempool; + + /* for debug purposes */ + unsigned int count_get_receive_buffer; + unsigned int count_put_receive_buffer; + unsigned int count_reassembly_queue; + unsigned int count_enqueue_reassembly_queue; + unsigned int count_dequeue_reassembly_queue; + unsigned int count_send_empty; +}; + +enum smbd_message_type { + SMBD_NEGOTIATE_RESP, + SMBD_TRANSFER_DATA, +}; + +#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001 + +/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ +struct smbd_negotiate_req { + __le16 min_version; + __le16 max_version; + __le16 reserved; + __le16 credits_requested; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ +struct smbd_negotiate_resp { + __le16 min_version; + __le16 max_version; + __le16 negotiated_version; + __le16 reserved; + __le16 credits_requested; + __le16 credits_granted; + __le32 status; + __le32 max_readwrite_size; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ +struct smbd_data_transfer { + __le16 credits_requested; + __le16 credits_granted; + __le16 flags; + __le16 reserved; + __le32 remaining_data_length; + __le32 data_offset; + __le32 data_length; + __le32 padding; + __u8 buffer[]; +} __packed; + +/* The packet fields for a registered RDMA buffer */ +struct smbd_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + /* Default maximum number of SGEs in a RDMA send/recv */ #define SMBDIRECT_MAX_SGE 16 +/* The context for a SMBD request */ +struct smbd_request { + struct smbd_connection *info; + struct ib_cqe cqe; + + /* true if this request carries upper layer payload */ + bool has_payload; + + /* the SGE entries for this packet */ + struct ib_sge sge[SMBDIRECT_MAX_SGE]; + int num_sge; + + /* SMBD packet header follows this structure */ + u8 packet[]; +}; + +/* The context for a SMBD response */ +struct smbd_response { + struct smbd_connection *info; + struct ib_cqe cqe; + struct ib_sge sge; + + enum smbd_message_type type; + + /* Link to receive queue or reassembly queue */ + struct list_head list; + + /* Indicate if this is the 1st packet of a payload */ + bool first_segment; + + /* SMBD packet header and payload follows this structure */ + u8 packet[]; +}; + +#else +#define cifs_rdma_enabled(server) 0 +struct smbd_connection {}; +#endif + #endif -- cgit v1.2.3 From 399f9539d951adf26a1078e38c1b0f10cf6c3e71 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 17 Nov 2017 17:26:52 -0800 Subject: CIFS: SMBD: Implement function to create a SMB Direct connection The upper layer calls this function to connect to peer through SMB Direct. Each SMB Direct connection is based on a RDMA RC Queue Pair. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smbdirect.c | 17 +++++++++++++++++ fs/cifs/smbdirect.h | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 2ecd5c19d313..862cdf9424cb 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1646,3 +1646,20 @@ create_id_failed: kfree(info); return NULL; } + +struct smbd_connection *smbd_get_connection( + struct TCP_Server_Info *server, struct sockaddr *dstaddr) +{ + struct smbd_connection *ret; + int port = SMBD_PORT; + +try_again: + ret = _smbd_get_connection(server, dstaddr, port); + + /* Try SMB_PORT if SMBD_PORT doesn't work */ + if (!ret && port == SMBD_PORT) { + port = SMB_PORT; + goto try_again; + } + return ret; +} diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index e4e65d3c6de8..25b3782cc692 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -243,9 +243,15 @@ struct smbd_response { u8 packet[]; }; +/* Create a SMBDirect session */ +struct smbd_connection *smbd_get_connection( + struct TCP_Server_Info *server, struct sockaddr *dstaddr); + #else #define cifs_rdma_enabled(server) 0 struct smbd_connection {}; +static inline void *smbd_get_connection( + struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} #endif #endif -- cgit v1.2.3 From d8ec913b178156661c2b941f94ec22487225d3dc Mon Sep 17 00:00:00 2001 From: Long Li Date: Tue, 7 Nov 2017 01:54:58 -0700 Subject: CIFS: SMBD: export protocol initial values For use-configurable SMB Direct protocol values, export them to /proc/fs/cifs. Signed-off-by: Long Li Signed-off-by: Steve French Acked-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/cifs_debug.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 42a57ebde630..e6045fd5c856 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -30,6 +30,9 @@ #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +#ifdef CONFIG_CIFS_SMB_DIRECT +#include "smbdirect.h" +#endif void cifs_dump_mem(char *label, void *data, int length) @@ -376,6 +379,54 @@ static const struct file_operations cifs_stats_proc_fops = { }; #endif /* STATS */ +#ifdef CONFIG_CIFS_SMB_DIRECT +#define PROC_FILE_DEFINE(name) \ +static ssize_t name##_write(struct file *file, const char __user *buffer, \ + size_t count, loff_t *ppos) \ +{ \ + int rc; \ + rc = kstrtoint_from_user(buffer, count, 10, & name); \ + if (rc) \ + return rc; \ + return count; \ +} \ +static int name##_proc_show(struct seq_file *m, void *v) \ +{ \ + seq_printf(m, "%d\n", name ); \ + return 0; \ +} \ +static int name##_open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, name##_proc_show, NULL); \ +} \ +\ +static const struct file_operations cifs_##name##_proc_fops = { \ + .open = name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + .write = name##_write, \ +} + +extern int rdma_readwrite_threshold; +extern int smbd_max_frmr_depth; +extern int smbd_keep_alive_interval; +extern int smbd_max_receive_size; +extern int smbd_max_fragmented_recv_size; +extern int smbd_max_send_size; +extern int smbd_send_credit_target; +extern int smbd_receive_credit_max; + +PROC_FILE_DEFINE(rdma_readwrite_threshold); +PROC_FILE_DEFINE(smbd_max_frmr_depth); +PROC_FILE_DEFINE(smbd_keep_alive_interval); +PROC_FILE_DEFINE(smbd_max_receive_size); +PROC_FILE_DEFINE(smbd_max_fragmented_recv_size); +PROC_FILE_DEFINE(smbd_max_send_size); +PROC_FILE_DEFINE(smbd_send_credit_target); +PROC_FILE_DEFINE(smbd_receive_credit_max); +#endif + static struct proc_dir_entry *proc_fs_cifs; static const struct file_operations cifsFYI_proc_fops; static const struct file_operations cifs_lookup_cache_proc_fops; @@ -403,6 +454,24 @@ cifs_proc_init(void) &cifs_security_flags_proc_fops); proc_create("LookupCacheEnabled", 0, proc_fs_cifs, &cifs_lookup_cache_proc_fops); +#ifdef CONFIG_CIFS_SMB_DIRECT + proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs, + &cifs_rdma_readwrite_threshold_proc_fops); + proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs, + &cifs_smbd_max_frmr_depth_proc_fops); + proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs, + &cifs_smbd_keep_alive_interval_proc_fops); + proc_create("smbd_max_receive_size", 0, proc_fs_cifs, + &cifs_smbd_max_receive_size_proc_fops); + proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs, + &cifs_smbd_max_fragmented_recv_size_proc_fops); + proc_create("smbd_max_send_size", 0, proc_fs_cifs, + &cifs_smbd_max_send_size_proc_fops); + proc_create("smbd_send_credit_target", 0, proc_fs_cifs, + &cifs_smbd_send_credit_target_proc_fops); + proc_create("smbd_receive_credit_max", 0, proc_fs_cifs, + &cifs_smbd_receive_credit_max_proc_fops); +#endif } void @@ -420,6 +489,16 @@ cifs_proc_clean(void) remove_proc_entry("SecurityFlags", proc_fs_cifs); remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); +#ifdef CONFIG_CIFS_SMB_DIRECT + remove_proc_entry("rdma_readwrite_threshold", proc_fs_cifs); + remove_proc_entry("smbd_max_frmr_depth", proc_fs_cifs); + remove_proc_entry("smbd_keep_alive_interval", proc_fs_cifs); + remove_proc_entry("smbd_max_receive_size", proc_fs_cifs); + remove_proc_entry("smbd_max_fragmented_recv_size", proc_fs_cifs); + remove_proc_entry("smbd_max_send_size", proc_fs_cifs); + remove_proc_entry("smbd_send_credit_target", proc_fs_cifs); + remove_proc_entry("smbd_receive_credit_max", proc_fs_cifs); +#endif remove_proc_entry("fs/cifs", NULL); } -- cgit v1.2.3 From f04a703c3d613845ae3141bfaf223489de8ab3eb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 15 Dec 2017 12:48:32 -0800 Subject: cifs: Fix missing put_xid in cifs_file_strict_mmap If cifs_zap_mapping() returned an error, we would return without putting the xid that we got earlier. Restructure cifs_file_strict_mmap() and cifs_file_mmap() to be more similar to each other and have a single point of return that always puts the xid. Signed-off-by: Matthew Wilcox Signed-off-by: Steve French CC: Stable --- fs/cifs/file.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index df9f682708c6..3a85df2a9baf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3471,20 +3471,18 @@ static const struct vm_operations_struct cifs_file_vm_ops = { int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) { - int rc, xid; + int xid, rc = 0; struct inode *inode = file_inode(file); xid = get_xid(); - if (!CIFS_CACHE_READ(CIFS_I(inode))) { + if (!CIFS_CACHE_READ(CIFS_I(inode))) rc = cifs_zap_mapping(inode); - if (rc) - return rc; - } - - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } @@ -3494,16 +3492,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) int rc, xid; xid = get_xid(); + rc = cifs_revalidate_file(file); - if (rc) { + if (rc) cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", rc); - free_xid(xid); - return rc; - } - rc = generic_file_mmap(file, vma); - if (rc == 0) + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) vma->vm_ops = &cifs_file_vm_ops; + free_xid(xid); return rc; } -- cgit v1.2.3 From 0933d6fa748b6283be6242f7f99ad3f4dc70e143 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 2 Jan 2018 09:43:10 -0800 Subject: cifs: fix build errors for SMB_DIRECT Prevent build errors when CIFS=y and INFINIBAND=m. fs/cifs/smbdirect.o: In function `smbd_qp_async_error_upcall': smbdirect.c:(.text+0x28c): undefined reference to `ib_event_msg' fs/cifs/smbdirect.o: In function `smbd_destroy_rdma_work': smbdirect.c:(.text+0xfde): undefined reference to `ib_drain_qp' smbdirect.c:(.text+0xfea): undefined reference to `rdma_destroy_qp' smbdirect.c:(.text+0x12a0): undefined reference to `ib_free_cq' smbdirect.c:(.text+0x12ac): undefined reference to `ib_free_cq' smbdirect.c:(.text+0x12b8): undefined reference to `ib_dealloc_pd' smbdirect.c:(.text+0x12c4): undefined reference to `rdma_destroy_id' fs/cifs/smbdirect.o: In function `_smbd_get_connection': smbdirect.c:(.text+0x168c): undefined reference to `rdma_create_id' smbdirect.c:(.text+0x1713): undefined reference to `rdma_resolve_addr' smbdirect.c:(.text+0x1780): undefined reference to `rdma_resolve_route' smbdirect.c:(.text+0x17e3): undefined reference to `rdma_destroy_id' smbdirect.c:(.text+0x183d): undefined reference to `rdma_destroy_id' smbdirect.c:(.text+0x199d): undefined reference to `ib_alloc_cq' smbdirect.c:(.text+0x19d9): undefined reference to `ib_alloc_cq' smbdirect.c:(.text+0x1a89): undefined reference to `rdma_create_qp' smbdirect.c:(.text+0x1b3c): undefined reference to `rdma_connect' smbdirect.c:(.text+0x2538): undefined reference to `rdma_destroy_qp' smbdirect.c:(.text+0x2549): undefined reference to `ib_free_cq' smbdirect.c:(.text+0x255a): undefined reference to `ib_free_cq' smbdirect.c:(.text+0x2563): undefined reference to `ib_dealloc_pd' smbdirect.c:(.text+0x256c): undefined reference to `rdma_destroy_id' smbdirect.c:(.text+0x25f0): undefined reference to `__ib_alloc_pd' smbdirect.c:(.text+0x26bb): undefined reference to `rdma_disconnect' fs/cifs/smbdirect.o: In function `smbd_disconnect_rdma_work': smbdirect.c:(.text+0x62): undefined reference to `rdma_disconnect' Signed-off-by: Randy Dunlap Cc: Steve French Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org (moderated for non-subscribers) Signed-off-by: Steve French --- fs/cifs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 500fd69fb58b..c71971c01c63 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -198,7 +198,7 @@ config CIFS_SMB311 config CIFS_SMB_DIRECT bool "SMB Direct support (Experimental)" - depends on CIFS && INFINIBAND + depends on CIFS=m && INFINIBAND || CIFS=y && INFINIBAND=y help Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. SMB Direct allows transferring SMB packets over RDMA. If unsure, -- cgit v1.2.3 From 2f8946464b11822169b9f10c7cf58a2440b51d54 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:34 -0700 Subject: CIFS: SMBD: Upper layer connects to SMBDirect session When "rdma" is specified in the mount option, make CIFS connect to SMB Direct. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 64be6f9e54a2..fafaecb5fb18 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -44,7 +44,6 @@ #include #include #include - #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -56,6 +55,7 @@ #include "rfc1002pdu.h" #include "fscache.h" #include "smb2proto.h" +#include "smbdirect.h" #define CIFS_PORT 445 #define RFC1001_PORT 139 @@ -2310,13 +2310,29 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->echo_interval = volume_info->echo_interval * HZ; else tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ; - + if (tcp_ses->rdma) { +#ifndef CONFIG_CIFS_SMB_DIRECT + cifs_dbg(VFS, "CONFIG_CIFS_SMB_DIRECT is not enabled\n"); + rc = -ENOENT; + goto out_err_crypto_release; +#endif + tcp_ses->smbd_conn = smbd_get_connection( + tcp_ses, (struct sockaddr *)&volume_info->dstaddr); + if (tcp_ses->smbd_conn) { + cifs_dbg(VFS, "RDMA transport established\n"); + rc = 0; + goto smbd_connected; + } else { + rc = -ENOENT; + goto out_err_crypto_release; + } + } rc = ip_connect(tcp_ses); if (rc < 0) { cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n"); goto out_err_crypto_release; } - +smbd_connected: /* * since we're in a cifs function already, we know that * this will succeed. No need for try_module_get(). -- cgit v1.2.3 From ad57b8e1726ed1b61f74c278c602cd5aab21bd95 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:35 -0700 Subject: CIFS: SMBD: Implement function to reconnect to a SMB Direct transport Add function to implement a reconnect to SMB Direct. This involves tearing down the current connection and establishing/negotiating a new connection. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 36 ++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 4 ++++ 2 files changed, 40 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 862cdf9424cb..a96058a3ad87 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1387,6 +1387,42 @@ static void idle_connection_timer(struct work_struct *work) info->keep_alive_interval*HZ); } +/* + * Reconnect this SMBD connection, called from upper layer + * return value: 0 on success, or actual error code + */ +int smbd_reconnect(struct TCP_Server_Info *server) +{ + log_rdma_event(INFO, "reconnecting rdma session\n"); + + if (!server->smbd_conn) { + log_rdma_event(ERR, "rdma session already destroyed\n"); + return -EINVAL; + } + + /* + * This is possible if transport is disconnected and we haven't received + * notification from RDMA, but upper layer has detected timeout + */ + if (server->smbd_conn->transport_status == SMBD_CONNECTED) { + log_rdma_event(INFO, "disconnecting transport\n"); + smbd_disconnect_rdma_connection(server->smbd_conn); + } + + /* wait until the transport is destroyed */ + wait_event(server->smbd_conn->wait_destroy, + server->smbd_conn->transport_status == SMBD_DESTROYED); + + destroy_workqueue(server->smbd_conn->workqueue); + kfree(server->smbd_conn); + + log_rdma_event(INFO, "creating rdma session\n"); + server->smbd_conn = smbd_get_connection( + server, (struct sockaddr *) &server->dstaddr); + + return server->smbd_conn ? 0 : -ENOENT; +} + static void destroy_caches_and_workqueue(struct smbd_connection *info) { destroy_receive_buffers(info); diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 25b3782cc692..f1db2ee7c8c2 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -247,11 +247,15 @@ struct smbd_response { struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); +/* Reconnect SMBDirect session */ +int smbd_reconnect(struct TCP_Server_Info *server); + #else #define cifs_rdma_enabled(server) 0 struct smbd_connection {}; static inline void *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} +static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } #endif #endif -- cgit v1.2.3 From 781a8050f2a8e1474a75122b7d940959cc579e14 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:36 -0700 Subject: CIFS: SMBD: Upper layer reconnects to SMB Direct session Do a reconnect on SMB Direct when it is used as the connection. Reconnect can happen for many reasons and it's mostly the decision of SMB2 upper layer. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/connect.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fafaecb5fb18..fc460663b308 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -406,7 +406,10 @@ cifs_reconnect(struct TCP_Server_Info *server) /* we should try only the port we connected to before */ mutex_lock(&server->srv_mutex); - rc = generic_ip_connect(server); + if (cifs_rdma_enabled(server)) + rc = smbd_reconnect(server); + else + rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); mutex_unlock(&server->srv_mutex); -- cgit v1.2.3 From 8ef130f9ec27973f7b49e20c5a3b9134ca33026c Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:37 -0700 Subject: CIFS: SMBD: Implement function to destroy a SMB Direct connection Add function to tear down a SMB Direct connection. This is used by upper layer to free all SMB Direct connection and transport resources. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 16 ++++++++++++++++ fs/cifs/smbdirect.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index a96058a3ad87..b462a2f3863b 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1387,6 +1387,22 @@ static void idle_connection_timer(struct work_struct *work) info->keep_alive_interval*HZ); } +/* Destroy this SMBD connection, called from upper layer */ +void smbd_destroy(struct smbd_connection *info) +{ + log_rdma_event(INFO, "destroying rdma session\n"); + + /* Kick off the disconnection process */ + smbd_disconnect_rdma_connection(info); + + log_rdma_event(INFO, "wait for transport being destroyed\n"); + wait_event(info->wait_destroy, + info->transport_status == SMBD_DESTROYED); + + destroy_workqueue(info->workqueue); + kfree(info); +} + /* * Reconnect this SMBD connection, called from upper layer * return value: 0 on success, or actual error code diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index f1db2ee7c8c2..f0ce934650c1 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -249,6 +249,8 @@ struct smbd_connection *smbd_get_connection( /* Reconnect SMBDirect session */ int smbd_reconnect(struct TCP_Server_Info *server); +/* Destroy SMBDirect session */ +void smbd_destroy(struct smbd_connection *info); #else #define cifs_rdma_enabled(server) 0 @@ -256,6 +258,7 @@ struct smbd_connection {}; static inline void *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } +static inline void smbd_destroy(struct smbd_connection *info) {} #endif #endif -- cgit v1.2.3 From bce9ce7cc0766f5fe532cb89567ca2ac5ad80a1f Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:38 -0700 Subject: CIFS: SMBD: Upper layer destroys SMB Direct session on shutdown or umount When upper layer wants to umount, make it call shutdown on transport when SMB Direct is used. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fc460663b308..d8bfa89161e2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -704,7 +704,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) wake_up_all(&server->request_q); /* give those requests time to exit */ msleep(125); - + if (cifs_rdma_enabled(server) && server->smbd_conn) { + smbd_destroy(server->smbd_conn); + server->smbd_conn = NULL; + } if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; -- cgit v1.2.3 From 09902f8dc849fd9d3f1258a5c926c5d5472646b1 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:39 -0700 Subject: CIFS: SMBD: Set SMB Direct maximum read or write size for I/O When connecting over SMB Direct, the transport negotiates its maximum I/O sizes with the server and determines how to choose to do RDMA send/recv vs read/write. Expose these maximum I/O sizes to upper layer so we will get the correct sized payloads. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2ops.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ed88ab8a4774..c8fa4c4ccc50 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -32,6 +32,7 @@ #include "smb2status.h" #include "smb2glob.h" #include "cifs_ioctl.h" +#include "smbdirect.h" static int change_conf(struct TCP_Server_Info *server) @@ -250,7 +251,11 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified wsize, or default */ wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; wsize = min_t(unsigned int, wsize, server->max_write); - +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->rdma) + wsize = min_t(unsigned int, + wsize, server->smbd_conn->max_readwrite_size); +#endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); @@ -266,6 +271,11 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) /* start with specified rsize, or default */ rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; rsize = min_t(unsigned int, rsize, server->max_read); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->rdma) + rsize = min_t(unsigned int, + rsize, server->smbd_conn->max_readwrite_size); +#endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); -- cgit v1.2.3 From f64b78fd1835d1d764685b0c80c292c5d3daaa07 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:40 -0700 Subject: CIFS: SMBD: Implement function to receive data via RDMA receive On the receive path, the transport maintains receive buffers and a reassembly queue for transferring payload via RDMA recv. There is data copy in the transport on recv when it copies the payload to upper layer. The transport recognizes the RFC1002 header length use in the SMB upper layer payloads in CIFS. Because this length is mainly used for TCP and not applicable to RDMA, it is handled as a out-of-band information and is never sent over the wire, and the trasnport behaves like TCP to upper layer by processing and exposing the length correctly on data payloads. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 7 ++ 2 files changed, 235 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index b462a2f3863b..d8c5fea3707c 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -14,6 +14,7 @@ * the GNU General Public License for more details. */ #include +#include #include "smbdirect.h" #include "cifs_debug.h" @@ -178,6 +179,8 @@ static void smbd_destroy_rdma_work(struct work_struct *work) log_rdma_event(INFO, "wait for all recv to finish\n"); wake_up_interruptible(&info->wait_reassembly_queue); + wait_event(info->wait_smbd_recv_pending, + info->smbd_recv_pending == 0); log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); wait_event(info->wait_send_pending, @@ -1649,6 +1652,9 @@ struct smbd_connection *_smbd_get_connection( queue_delayed_work(info->workqueue, &info->idle_timer_work, info->keep_alive_interval*HZ); + init_waitqueue_head(&info->wait_smbd_recv_pending); + info->smbd_recv_pending = 0; + init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); @@ -1715,3 +1721,225 @@ try_again: } return ret; } + +/* + * Receive data from receive reassembly queue + * All the incoming data packets are placed in reassembly queue + * buf: the buffer to read data into + * size: the length of data to read + * return value: actual data read + * Note: this implementation copies the data from reassebmly queue to receive + * buffers used by upper layer. This is not the optimal code path. A better way + * to do it is to not have upper layer allocate its receive buffers but rather + * borrow the buffer from reassembly queue, and return it after data is + * consumed. But this will require more changes to upper layer code, and also + * need to consider packet boundaries while they still being reassembled. + */ +int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) +{ + struct smbd_response *response; + struct smbd_data_transfer *data_transfer; + int to_copy, to_read, data_read, offset; + u32 data_length, remaining_data_length, data_offset; + int rc; + unsigned long flags; + +again: + if (info->transport_status != SMBD_CONNECTED) { + log_read(ERR, "disconnected\n"); + return -ENODEV; + } + + /* + * No need to hold the reassembly queue lock all the time as we are + * the only one reading from the front of the queue. The transport + * may add more entries to the back of the queue at the same time + */ + log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + info->reassembly_data_length); + if (info->reassembly_data_length >= size) { + int queue_length; + int queue_removed = 0; + + /* + * Need to make sure reassembly_data_length is read before + * reading reassembly_queue_length and calling + * _get_first_reassembly. This call is lock free + * as we never read at the end of the queue which are being + * updated in SOFTIRQ as more data is received + */ + virt_rmb(); + queue_length = info->reassembly_queue_length; + data_read = 0; + to_read = size; + offset = info->first_entry_offset; + while (data_read < size) { + response = _get_first_reassembly(info); + data_transfer = smbd_response_payload(response); + data_length = le32_to_cpu(data_transfer->data_length); + remaining_data_length = + le32_to_cpu( + data_transfer->remaining_data_length); + data_offset = le32_to_cpu(data_transfer->data_offset); + + /* + * The upper layer expects RFC1002 length at the + * beginning of the payload. Return it to indicate + * the total length of the packet. This minimize the + * change to upper layer packet processing logic. This + * will be eventually remove when an intermediate + * transport layer is added + */ + if (response->first_segment && size == 4) { + unsigned int rfc1002_len = + data_length + remaining_data_length; + *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + data_read = 4; + response->first_segment = false; + log_read(INFO, "returning rfc1002 length %d\n", + rfc1002_len); + goto read_rfc1002_done; + } + + to_copy = min_t(int, data_length - offset, to_read); + memcpy( + buf + data_read, + (char *)data_transfer + data_offset + offset, + to_copy); + + /* move on to the next buffer? */ + if (to_copy == data_length - offset) { + queue_length--; + /* + * No need to lock if we are not at the + * end of the queue + */ + if (!queue_length) + spin_lock_irqsave( + &info->reassembly_queue_lock, + flags); + list_del(&response->list); + queue_removed++; + if (!queue_length) + spin_unlock_irqrestore( + &info->reassembly_queue_lock, + flags); + + info->count_reassembly_queue--; + info->count_dequeue_reassembly_queue++; + put_receive_buffer(info, response); + offset = 0; + log_read(INFO, "put_receive_buffer offset=0\n"); + } else + offset += to_copy; + + to_read -= to_copy; + data_read += to_copy; + + log_read(INFO, "_get_first_reassembly memcpy %d bytes " + "data_transfer_length-offset=%d after that " + "to_read=%d data_read=%d offset=%d\n", + to_copy, data_length - offset, + to_read, data_read, offset); + } + + spin_lock_irqsave(&info->reassembly_queue_lock, flags); + info->reassembly_data_length -= data_read; + info->reassembly_queue_length -= queue_removed; + spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); + + info->first_entry_offset = offset; + log_read(INFO, "returning to thread data_read=%d " + "reassembly_data_length=%d first_entry_offset=%d\n", + data_read, info->reassembly_data_length, + info->first_entry_offset); +read_rfc1002_done: + return data_read; + } + + log_read(INFO, "wait_event on more data\n"); + rc = wait_event_interruptible( + info->wait_reassembly_queue, + info->reassembly_data_length >= size || + info->transport_status != SMBD_CONNECTED); + /* Don't return any data if interrupted */ + if (rc) + return -ENODEV; + + goto again; +} + +/* + * Receive a page from receive reassembly queue + * page: the page to read data into + * to_read: the length of data to read + * return value: actual data read + */ +int smbd_recv_page(struct smbd_connection *info, + struct page *page, unsigned int to_read) +{ + int ret; + char *to_address; + + /* make sure we have the page ready for read */ + ret = wait_event_interruptible( + info->wait_reassembly_queue, + info->reassembly_data_length >= to_read || + info->transport_status != SMBD_CONNECTED); + if (ret) + return 0; + + /* now we can read from reassembly queue and not sleep */ + to_address = kmap_atomic(page); + + log_read(INFO, "reading from page=%p address=%p to_read=%d\n", + page, to_address, to_read); + + ret = smbd_recv_buf(info, to_address, to_read); + kunmap_atomic(to_address); + + return ret; +} + +/* + * Receive data from transport + * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC + * return: total bytes read, or 0. SMB Direct will not do partial read. + */ +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) +{ + char *buf; + struct page *page; + unsigned int to_read; + int rc; + + info->smbd_recv_pending++; + + switch (msg->msg_iter.type) { + case READ | ITER_KVEC: + buf = msg->msg_iter.kvec->iov_base; + to_read = msg->msg_iter.kvec->iov_len; + rc = smbd_recv_buf(info, buf, to_read); + break; + + case READ | ITER_BVEC: + page = msg->msg_iter.bvec->bv_page; + to_read = msg->msg_iter.bvec->bv_len; + rc = smbd_recv_page(info, page, to_read); + break; + + default: + /* It's a bug in upper layer to get there */ + cifs_dbg(VFS, "CIFS: invalid msg type %d\n", + msg->msg_iter.type); + rc = -EIO; + } + + info->smbd_recv_pending--; + wake_up(&info->wait_smbd_recv_pending); + + /* SMBDirect will read it all or nothing */ + if (rc > 0) + msg->msg_iter.count = 0; + return rc; +} diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index f0ce934650c1..c072a68e1321 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -91,6 +91,9 @@ struct smbd_connection { int fragment_reassembly_remaining; /* Activity accoutning */ + /* Pending reqeusts issued from upper layer */ + int smbd_recv_pending; + wait_queue_head_t wait_smbd_recv_pending; atomic_t send_pending; wait_queue_head_t wait_send_pending; @@ -252,6 +255,9 @@ int smbd_reconnect(struct TCP_Server_Info *server); /* Destroy SMBDirect session */ void smbd_destroy(struct smbd_connection *info); +/* Interface for carrying upper layer I/O through send/recv */ +int smbd_recv(struct smbd_connection *info, struct msghdr *msg); + #else #define cifs_rdma_enabled(server) 0 struct smbd_connection {}; @@ -259,6 +265,7 @@ static inline void *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } static inline void smbd_destroy(struct smbd_connection *info) {} +static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } #endif #endif -- cgit v1.2.3 From 2fef137a2e6a2e5a7984f991e6b9546ddd93c6f2 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:41 -0700 Subject: CIFS: SMBD: Upper layer receives data via RDMA receive With SMB Direct connected, use it for receiving data via RDMA receive. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d8bfa89161e2..1677401660d0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -542,8 +542,10 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) if (server_unresponsive(server)) return -ECONNABORTED; - - length = sock_recvmsg(server->ssocket, smb_msg, 0); + if (cifs_rdma_enabled(server) && server->smbd_conn) + length = smbd_recv(server->smbd_conn, smb_msg); + else + length = sock_recvmsg(server->ssocket, smb_msg, 0); if (server->tcpStatus == CifsExiting) return -ESHUTDOWN; -- cgit v1.2.3 From d649e1bba3caee93bb000ff5ac6a65dfc115f8c2 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:42 -0700 Subject: CIFS: SMBD: Implement function to send data via RDMA send The transport doesn't maintain send buffers or send queue for transferring payload via RDMA send. There is no data copy in the transport on send. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/smbdirect.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 5 ++ 2 files changed, 251 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index d8c5fea3707c..3351873db93f 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -41,6 +41,12 @@ static int smbd_post_recv( struct smbd_response *response); static int smbd_post_send_empty(struct smbd_connection *info); +static int smbd_post_send_data( + struct smbd_connection *info, + struct kvec *iov, int n_vec, int remaining_data_length); +static int smbd_post_send_page(struct smbd_connection *info, + struct page *page, unsigned long offset, + size_t size, int remaining_data_length); /* SMBD version number */ #define SMBD_V1 0x0100 @@ -177,6 +183,10 @@ static void smbd_destroy_rdma_work(struct work_struct *work) log_rdma_event(INFO, "cancelling send immediate work\n"); cancel_delayed_work_sync(&info->send_immediate_work); + log_rdma_event(INFO, "wait for all send to finish\n"); + wait_event(info->wait_smbd_send_pending, + info->smbd_send_pending == 0); + log_rdma_event(INFO, "wait for all recv to finish\n"); wake_up_interruptible(&info->wait_reassembly_queue); wait_event(info->wait_smbd_recv_pending, @@ -1077,6 +1087,24 @@ dma_mapping_failure: return rc; } +/* + * Send a page + * page: the page to send + * offset: offset in the page to send + * size: length in the page to send + * remaining_data_length: remaining data to send in this payload + */ +static int smbd_post_send_page(struct smbd_connection *info, struct page *page, + unsigned long offset, size_t size, int remaining_data_length) +{ + struct scatterlist sgl; + + sg_init_table(&sgl, 1); + sg_set_page(&sgl, page, size, offset); + + return smbd_post_send_sgl(info, &sgl, size, remaining_data_length); +} + /* * Send an empty message * Empty message is used to extend credits to peer to for keep live @@ -1088,6 +1116,35 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_sgl(info, NULL, 0, 0); } +/* + * Send a data buffer + * iov: the iov array describing the data buffers + * n_vec: number of iov array + * remaining_data_length: remaining data to send following this packet + * in segmented SMBD packet + */ +static int smbd_post_send_data( + struct smbd_connection *info, struct kvec *iov, int n_vec, + int remaining_data_length) +{ + int i; + u32 data_length = 0; + struct scatterlist sgl[SMBDIRECT_MAX_SGE]; + + if (n_vec > SMBDIRECT_MAX_SGE) { + cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec); + return -ENOMEM; + } + + sg_init_table(sgl, n_vec); + for (i = 0; i < n_vec; i++) { + data_length += iov[i].iov_len; + sg_set_buf(&sgl[i], iov[i].iov_base, iov[i].iov_len); + } + + return smbd_post_send_sgl(info, sgl, data_length, remaining_data_length); +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1652,6 +1709,9 @@ struct smbd_connection *_smbd_get_connection( queue_delayed_work(info->workqueue, &info->idle_timer_work, info->keep_alive_interval*HZ); + init_waitqueue_head(&info->wait_smbd_send_pending); + info->smbd_send_pending = 0; + init_waitqueue_head(&info->wait_smbd_recv_pending); info->smbd_recv_pending = 0; @@ -1943,3 +2003,189 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) msg->msg_iter.count = 0; return rc; } + +/* + * Send data to transport + * Each rqst is transported as a SMBDirect payload + * rqst: the data to write + * return value: 0 if successfully write, otherwise error code + */ +int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) +{ + struct kvec vec; + int nvecs; + int size; + int buflen = 0, remaining_data_length; + int start, i, j; + int max_iov_size = + info->max_send_size - sizeof(struct smbd_data_transfer); + struct kvec iov[SMBDIRECT_MAX_SGE]; + int rc; + + info->smbd_send_pending++; + if (info->transport_status != SMBD_CONNECTED) { + rc = -ENODEV; + goto done; + } + + /* + * This usually means a configuration error + * We use RDMA read/write for packet size > rdma_readwrite_threshold + * as long as it's properly configured we should never get into this + * situation + */ + if (rqst->rq_nvec + rqst->rq_npages > SMBDIRECT_MAX_SGE) { + log_write(ERR, "maximum send segment %x exceeding %x\n", + rqst->rq_nvec + rqst->rq_npages, SMBDIRECT_MAX_SGE); + rc = -EINVAL; + goto done; + } + + /* + * Remove the RFC1002 length defined in MS-SMB2 section 2.1 + * It is used only for TCP transport + * In future we may want to add a transport layer under protocol + * layer so this will only be issued to TCP transport + */ + iov[0].iov_base = (char *)rqst->rq_iov[0].iov_base + 4; + iov[0].iov_len = rqst->rq_iov[0].iov_len - 4; + buflen += iov[0].iov_len; + + /* total up iov array first */ + for (i = 1; i < rqst->rq_nvec; i++) { + iov[i].iov_base = rqst->rq_iov[i].iov_base; + iov[i].iov_len = rqst->rq_iov[i].iov_len; + buflen += iov[i].iov_len; + } + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + if (buflen + sizeof(struct smbd_data_transfer) > + info->max_fragmented_send_size) { + log_write(ERR, "payload size %d > max size %d\n", + buflen, info->max_fragmented_send_size); + rc = -EINVAL; + goto done; + } + + remaining_data_length = buflen; + + log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d " + "rq_tailsz=%d buflen=%d\n", + rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz, + rqst->rq_tailsz, buflen); + + start = i = iov[0].iov_len ? 0 : 1; + buflen = 0; + while (true) { + buflen += iov[i].iov_len; + if (buflen > max_iov_size) { + if (i > start) { + remaining_data_length -= + (buflen-iov[i].iov_len); + log_write(INFO, "sending iov[] from start=%d " + "i=%d nvecs=%d " + "remaining_data_length=%d\n", + start, i, i-start, + remaining_data_length); + rc = smbd_post_send_data( + info, &iov[start], i-start, + remaining_data_length); + if (rc) + goto done; + } else { + /* iov[start] is too big, break it */ + nvecs = (buflen+max_iov_size-1)/max_iov_size; + log_write(INFO, "iov[%d] iov_base=%p buflen=%d" + " break to %d vectors\n", + start, iov[start].iov_base, + buflen, nvecs); + for (j = 0; j < nvecs; j++) { + vec.iov_base = + (char *)iov[start].iov_base + + j*max_iov_size; + vec.iov_len = max_iov_size; + if (j == nvecs-1) + vec.iov_len = + buflen - + max_iov_size*(nvecs-1); + remaining_data_length -= vec.iov_len; + log_write(INFO, + "sending vec j=%d iov_base=%p" + " iov_len=%zu " + "remaining_data_length=%d\n", + j, vec.iov_base, vec.iov_len, + remaining_data_length); + rc = smbd_post_send_data( + info, &vec, 1, + remaining_data_length); + if (rc) + goto done; + } + i++; + } + start = i; + buflen = 0; + } else { + i++; + if (i == rqst->rq_nvec) { + /* send out all remaining vecs */ + remaining_data_length -= buflen; + log_write(INFO, + "sending iov[] from start=%d i=%d " + "nvecs=%d remaining_data_length=%d\n", + start, i, i-start, + remaining_data_length); + rc = smbd_post_send_data(info, &iov[start], + i-start, remaining_data_length); + if (rc) + goto done; + break; + } + } + log_write(INFO, "looping i=%d buflen=%d\n", i, buflen); + } + + /* now sending pages if there are any */ + for (i = 0; i < rqst->rq_npages; i++) { + buflen = (i == rqst->rq_npages-1) ? + rqst->rq_tailsz : rqst->rq_pagesz; + nvecs = (buflen + max_iov_size - 1) / max_iov_size; + log_write(INFO, "sending pages buflen=%d nvecs=%d\n", + buflen, nvecs); + for (j = 0; j < nvecs; j++) { + size = max_iov_size; + if (j == nvecs-1) + size = buflen - j*max_iov_size; + remaining_data_length -= size; + log_write(INFO, "sending pages i=%d offset=%d size=%d" + " remaining_data_length=%d\n", + i, j*max_iov_size, size, remaining_data_length); + rc = smbd_post_send_page( + info, rqst->rq_pages[i], j*max_iov_size, + size, remaining_data_length); + if (rc) + goto done; + } + } + +done: + /* + * As an optimization, we don't wait for individual I/O to finish + * before sending the next one. + * Send them all and wait for pending send count to get to 0 + * that means all the I/Os have been out and we are good to return + */ + + wait_event(info->wait_send_payload_pending, + atomic_read(&info->send_payload_pending) == 0); + + info->smbd_send_pending--; + wake_up(&info->wait_smbd_send_pending); + + return rc; +} diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index c072a68e1321..27453ef2181f 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -92,6 +92,9 @@ struct smbd_connection { /* Activity accoutning */ /* Pending reqeusts issued from upper layer */ + int smbd_send_pending; + wait_queue_head_t wait_smbd_send_pending; + int smbd_recv_pending; wait_queue_head_t wait_smbd_recv_pending; @@ -257,6 +260,7 @@ void smbd_destroy(struct smbd_connection *info); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); +int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst); #else #define cifs_rdma_enabled(server) 0 @@ -266,6 +270,7 @@ static inline void *smbd_get_connection( static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } static inline void smbd_destroy(struct smbd_connection *info) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } +static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; } #endif #endif -- cgit v1.2.3 From 9762c2d080926b7c292cb7c64ca6030e88d6a6e4 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:43 -0700 Subject: CIFS: SMBD: Upper layer sends data via RDMA send With SMB Direct connected, use it for sending data via RDMA send. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/transport.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 510f41a435c8..9779b3292d8e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -37,6 +37,7 @@ #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "smbdirect.h" /* Max number of iovectors we can use off the stack when sending requests. */ #define CIFS_MAX_IOV_SIZE 8 @@ -232,7 +233,10 @@ __smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct socket *ssocket = server->ssocket; struct msghdr smb_msg; int val = 1; - + if (cifs_rdma_enabled(server) && server->smbd_conn) { + rc = smbd_send(server->smbd_conn, rqst); + goto smbd_done; + } if (ssocket == NULL) return -ENOTSOCK; @@ -301,7 +305,7 @@ uncork: */ server->tcpStatus = CifsNeedReconnect; } - +smbd_done: if (rc < 0 && rc != -EINTR) cifs_dbg(VFS, "Error %d sending data on socket to server\n", rc); -- cgit v1.2.3 From c7398583340a6d82b8bb7f7f21edcde27dc6a898 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:44 -0700 Subject: CIFS: SMBD: Implement RDMA memory registration Memory registration is used for transferring payload via RDMA read or write. After I/O is done, memory registrations are recovered and reused. This process can be time consuming and is done in a work queue. Signed-off-by: Long Li Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smbdirect.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/smbdirect.h | 53 +++++++ 2 files changed, 474 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 3351873db93f..731577d4317f 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -48,6 +48,9 @@ static int smbd_post_send_page(struct smbd_connection *info, struct page *page, unsigned long offset, size_t size, int remaining_data_length); +static void destroy_mr_list(struct smbd_connection *info); +static int allocate_mr_list(struct smbd_connection *info); + /* SMBD version number */ #define SMBD_V1 0x0100 @@ -198,6 +201,12 @@ static void smbd_destroy_rdma_work(struct work_struct *work) wait_event(info->wait_send_payload_pending, atomic_read(&info->send_payload_pending) == 0); + log_rdma_event(INFO, "freeing mr list\n"); + wake_up_interruptible_all(&info->wait_mr); + wait_event(info->wait_for_mr_cleanup, + atomic_read(&info->mr_used_count) == 0); + destroy_mr_list(info); + /* It's not posssible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { @@ -453,6 +462,16 @@ static bool process_negotiation_response( } info->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); + info->rdma_readwrite_threshold = + rdma_readwrite_threshold > info->max_fragmented_send_size ? + info->max_fragmented_send_size : + rdma_readwrite_threshold; + + + info->max_readwrite_size = min_t(u32, + le32_to_cpu(packet->max_readwrite_size), + info->max_frmr_depth * PAGE_SIZE); + info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; return true; } @@ -748,6 +767,12 @@ static int smbd_ia_open( rc = -EPROTONOSUPPORT; goto out2; } + info->max_frmr_depth = min_t(int, + smbd_max_frmr_depth, + info->id->device->attrs.max_fast_reg_page_list_len); + info->mr_type = IB_MR_TYPE_MEM_REG; + if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) + info->mr_type = IB_MR_TYPE_SG_GAPS; info->pd = ib_alloc_pd(info->id->device, 0); if (IS_ERR(info->pd)) { @@ -1582,6 +1607,8 @@ struct smbd_connection *_smbd_get_connection( struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; + struct ib_port_immutable port_immutable; + u32 ird_ord_hdr[2]; info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) @@ -1670,6 +1697,28 @@ struct smbd_connection *_smbd_get_connection( memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = 0; + conn_param.responder_resources = + info->id->device->attrs.max_qp_rd_atom + < SMBD_CM_RESPONDER_RESOURCES ? + info->id->device->attrs.max_qp_rd_atom : + SMBD_CM_RESPONDER_RESOURCES; + info->responder_resources = conn_param.responder_resources; + log_rdma_mr(INFO, "responder_resources=%d\n", + info->responder_resources); + + /* Need to send IRD/ORD in private data for iWARP */ + info->id->device->get_port_immutable( + info->id->device, info->id->port_num, &port_immutable); + if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { + ird_ord_hdr[0] = info->responder_resources; + ird_ord_hdr[1] = 1; + conn_param.private_data = ird_ord_hdr; + conn_param.private_data_len = sizeof(ird_ord_hdr); + } else { + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + } + conn_param.retry_count = SMBD_CM_RETRY; conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; conn_param.flow_control = 0; @@ -1734,8 +1783,19 @@ struct smbd_connection *_smbd_get_connection( goto negotiation_failed; } + rc = allocate_mr_list(info); + if (rc) { + log_rdma_mr(ERR, "memory registration allocation failed\n"); + goto allocate_mr_failed; + } + return info; +allocate_mr_failed: + /* At this point, need to a full transport shutdown */ + smbd_destroy(info); + return NULL; + negotiation_failed: cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); @@ -2189,3 +2249,364 @@ done: return rc; } + +static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbd_mr *mr; + struct ib_cqe *cqe; + + if (wc->status) { + log_rdma_mr(ERR, "status=%d\n", wc->status); + cqe = wc->wr_cqe; + mr = container_of(cqe, struct smbd_mr, cqe); + smbd_disconnect_rdma_connection(mr->conn); + } +} + +/* + * The work queue function that recovers MRs + * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used + * again. Both calls are slow, so finish them in a workqueue. This will not + * block I/O path. + * There is one workqueue that recovers MRs, there is no need to lock as the + * I/O requests calling smbd_register_mr will never update the links in the + * mr_list. + */ +static void smbd_mr_recovery_work(struct work_struct *work) +{ + struct smbd_connection *info = + container_of(work, struct smbd_connection, mr_recovery_work); + struct smbd_mr *smbdirect_mr; + int rc; + + list_for_each_entry(smbdirect_mr, &info->mr_list, list) { + if (smbdirect_mr->state == MR_INVALIDATED || + smbdirect_mr->state == MR_ERROR) { + + if (smbdirect_mr->state == MR_INVALIDATED) { + ib_dma_unmap_sg( + info->id->device, smbdirect_mr->sgl, + smbdirect_mr->sgl_count, + smbdirect_mr->dir); + smbdirect_mr->state = MR_READY; + } else if (smbdirect_mr->state == MR_ERROR) { + + /* recover this MR entry */ + rc = ib_dereg_mr(smbdirect_mr->mr); + if (rc) { + log_rdma_mr(ERR, + "ib_dereg_mr faield rc=%x\n", + rc); + smbd_disconnect_rdma_connection(info); + } + + smbdirect_mr->mr = ib_alloc_mr( + info->pd, info->mr_type, + info->max_frmr_depth); + if (IS_ERR(smbdirect_mr->mr)) { + log_rdma_mr(ERR, + "ib_alloc_mr failed mr_type=%x " + "max_frmr_depth=%x\n", + info->mr_type, + info->max_frmr_depth); + smbd_disconnect_rdma_connection(info); + } + + smbdirect_mr->state = MR_READY; + } + /* smbdirect_mr->state is updated by this function + * and is read and updated by I/O issuing CPUs trying + * to get a MR, the call to atomic_inc_return + * implicates a memory barrier and guarantees this + * value is updated before waking up any calls to + * get_mr() from the I/O issuing CPUs + */ + if (atomic_inc_return(&info->mr_ready_count) == 1) + wake_up_interruptible(&info->wait_mr); + } + } +} + +static void destroy_mr_list(struct smbd_connection *info) +{ + struct smbd_mr *mr, *tmp; + + cancel_work_sync(&info->mr_recovery_work); + list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { + if (mr->state == MR_INVALIDATED) + ib_dma_unmap_sg(info->id->device, mr->sgl, + mr->sgl_count, mr->dir); + ib_dereg_mr(mr->mr); + kfree(mr->sgl); + kfree(mr); + } +} + +/* + * Allocate MRs used for RDMA read/write + * The number of MRs will not exceed hardware capability in responder_resources + * All MRs are kept in mr_list. The MR can be recovered after it's used + * Recovery is done in smbd_mr_recovery_work. The content of list entry changes + * as MRs are used and recovered for I/O, but the list links will not change + */ +static int allocate_mr_list(struct smbd_connection *info) +{ + int i; + struct smbd_mr *smbdirect_mr, *tmp; + + INIT_LIST_HEAD(&info->mr_list); + init_waitqueue_head(&info->wait_mr); + spin_lock_init(&info->mr_list_lock); + atomic_set(&info->mr_ready_count, 0); + atomic_set(&info->mr_used_count, 0); + init_waitqueue_head(&info->wait_for_mr_cleanup); + /* Allocate more MRs (2x) than hardware responder_resources */ + for (i = 0; i < info->responder_resources * 2; i++) { + smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); + if (!smbdirect_mr) + goto out; + smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, + info->max_frmr_depth); + if (IS_ERR(smbdirect_mr->mr)) { + log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x " + "max_frmr_depth=%x\n", + info->mr_type, info->max_frmr_depth); + goto out; + } + smbdirect_mr->sgl = kcalloc( + info->max_frmr_depth, + sizeof(struct scatterlist), + GFP_KERNEL); + if (!smbdirect_mr->sgl) { + log_rdma_mr(ERR, "failed to allocate sgl\n"); + ib_dereg_mr(smbdirect_mr->mr); + goto out; + } + smbdirect_mr->state = MR_READY; + smbdirect_mr->conn = info; + + list_add_tail(&smbdirect_mr->list, &info->mr_list); + atomic_inc(&info->mr_ready_count); + } + INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); + return 0; + +out: + kfree(smbdirect_mr); + + list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { + ib_dereg_mr(smbdirect_mr->mr); + kfree(smbdirect_mr->sgl); + kfree(smbdirect_mr); + } + return -ENOMEM; +} + +/* + * Get a MR from mr_list. This function waits until there is at least one + * MR available in the list. It may access the list while the + * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock + * as they never modify the same places. However, there may be several CPUs + * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * protect this situation. + */ +static struct smbd_mr *get_mr(struct smbd_connection *info) +{ + struct smbd_mr *ret; + int rc; +again: + rc = wait_event_interruptible(info->wait_mr, + atomic_read(&info->mr_ready_count) || + info->transport_status != SMBD_CONNECTED); + if (rc) { + log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); + return NULL; + } + + if (info->transport_status != SMBD_CONNECTED) { + log_rdma_mr(ERR, "info->transport_status=%x\n", + info->transport_status); + return NULL; + } + + spin_lock(&info->mr_list_lock); + list_for_each_entry(ret, &info->mr_list, list) { + if (ret->state == MR_READY) { + ret->state = MR_REGISTERED; + spin_unlock(&info->mr_list_lock); + atomic_dec(&info->mr_ready_count); + atomic_inc(&info->mr_used_count); + return ret; + } + } + + spin_unlock(&info->mr_list_lock); + /* + * It is possible that we could fail to get MR because other processes may + * try to acquire a MR at the same time. If this is the case, retry it. + */ + goto again; +} + +/* + * Register memory for RDMA read/write + * pages[]: the list of pages to register memory with + * num_pages: the number of pages to register + * tailsz: if non-zero, the bytes to register in the last page + * writing: true if this is a RDMA write (SMB read), false for RDMA read + * need_invalidate: true if this MR needs to be locally invalidated after I/O + * return value: the MR registered, NULL if failed. + */ +struct smbd_mr *smbd_register_mr( + struct smbd_connection *info, struct page *pages[], int num_pages, + int tailsz, bool writing, bool need_invalidate) +{ + struct smbd_mr *smbdirect_mr; + int rc, i; + enum dma_data_direction dir; + struct ib_reg_wr *reg_wr; + struct ib_send_wr *bad_wr; + + if (num_pages > info->max_frmr_depth) { + log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", + num_pages, info->max_frmr_depth); + return NULL; + } + + smbdirect_mr = get_mr(info); + if (!smbdirect_mr) { + log_rdma_mr(ERR, "get_mr returning NULL\n"); + return NULL; + } + smbdirect_mr->need_invalidate = need_invalidate; + smbdirect_mr->sgl_count = num_pages; + sg_init_table(smbdirect_mr->sgl, num_pages); + + for (i = 0; i < num_pages - 1; i++) + sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0); + + sg_set_page(&smbdirect_mr->sgl[i], pages[i], + tailsz ? tailsz : PAGE_SIZE, 0); + + dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + smbdirect_mr->dir = dir; + rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir); + if (!rc) { + log_rdma_mr(INFO, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", + num_pages, dir, rc); + goto dma_map_error; + } + + rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages, + NULL, PAGE_SIZE); + if (rc != num_pages) { + log_rdma_mr(INFO, + "ib_map_mr_sg failed rc = %x num_pages = %x\n", + rc, num_pages); + goto map_mr_error; + } + + ib_update_fast_reg_key(smbdirect_mr->mr, + ib_inc_rkey(smbdirect_mr->mr->rkey)); + reg_wr = &smbdirect_mr->wr; + reg_wr->wr.opcode = IB_WR_REG_MR; + smbdirect_mr->cqe.done = register_mr_done; + reg_wr->wr.wr_cqe = &smbdirect_mr->cqe; + reg_wr->wr.num_sge = 0; + reg_wr->wr.send_flags = IB_SEND_SIGNALED; + reg_wr->mr = smbdirect_mr->mr; + reg_wr->key = smbdirect_mr->mr->rkey; + reg_wr->access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; + + /* + * There is no need for waiting for complemtion on ib_post_send + * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution + * on the next ib_post_send when we actaully send I/O to remote peer + */ + rc = ib_post_send(info->id->qp, ®_wr->wr, &bad_wr); + if (!rc) + return smbdirect_mr; + + log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", + rc, reg_wr->key); + + /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ +map_mr_error: + ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgl, + smbdirect_mr->sgl_count, smbdirect_mr->dir); + +dma_map_error: + smbdirect_mr->state = MR_ERROR; + if (atomic_dec_and_test(&info->mr_used_count)) + wake_up(&info->wait_for_mr_cleanup); + + return NULL; +} + +static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct smbd_mr *smbdirect_mr; + struct ib_cqe *cqe; + + cqe = wc->wr_cqe; + smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); + smbdirect_mr->state = MR_INVALIDATED; + if (wc->status != IB_WC_SUCCESS) { + log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); + smbdirect_mr->state = MR_ERROR; + } + complete(&smbdirect_mr->invalidate_done); +} + +/* + * Deregister a MR after I/O is done + * This function may wait if remote invalidation is not used + * and we have to locally invalidate the buffer to prevent data is being + * modified by remote peer after upper layer consumes it + */ +int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) +{ + struct ib_send_wr *wr, *bad_wr; + struct smbd_connection *info = smbdirect_mr->conn; + int rc = 0; + + if (smbdirect_mr->need_invalidate) { + /* Need to finish local invalidation before returning */ + wr = &smbdirect_mr->inv_wr; + wr->opcode = IB_WR_LOCAL_INV; + smbdirect_mr->cqe.done = local_inv_done; + wr->wr_cqe = &smbdirect_mr->cqe; + wr->num_sge = 0; + wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey; + wr->send_flags = IB_SEND_SIGNALED; + + init_completion(&smbdirect_mr->invalidate_done); + rc = ib_post_send(info->id->qp, wr, &bad_wr); + if (rc) { + log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); + smbd_disconnect_rdma_connection(info); + goto done; + } + wait_for_completion(&smbdirect_mr->invalidate_done); + smbdirect_mr->need_invalidate = false; + } else + /* + * For remote invalidation, just set it to MR_INVALIDATED + * and defer to mr_recovery_work to recover the MR for next use + */ + smbdirect_mr->state = MR_INVALIDATED; + + /* + * Schedule the work to do MR recovery for future I/Os + * MR recovery is slow and we don't want it to block the current I/O + */ + queue_work(info->workqueue, &info->mr_recovery_work); + +done: + if (atomic_dec_and_test(&info->mr_used_count)) + wake_up(&info->wait_for_mr_cleanup); + + return rc; +} diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 27453ef2181f..fdb8df84fa1f 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -90,6 +90,29 @@ struct smbd_connection { int receive_credit_target; int fragment_reassembly_remaining; + /* Memory registrations */ + /* Maximum number of RDMA read/write outstanding on this connection */ + int responder_resources; + /* Maximum number of SGEs in a RDMA write/read */ + int max_frmr_depth; + /* + * If payload is less than or equal to the threshold, + * use RDMA send/recv to send upper layer I/O. + * If payload is more than the threshold, + * use RDMA read/write through memory registration for I/O. + */ + int rdma_readwrite_threshold; + enum ib_mr_type mr_type; + struct list_head mr_list; + spinlock_t mr_list_lock; + /* The number of available MRs ready for memory registration */ + atomic_t mr_ready_count; + atomic_t mr_used_count; + wait_queue_head_t wait_mr; + struct work_struct mr_recovery_work; + /* Used by transport to wait until all MRs are returned */ + wait_queue_head_t wait_for_mr_cleanup; + /* Activity accoutning */ /* Pending reqeusts issued from upper layer */ int smbd_send_pending; @@ -262,6 +285,36 @@ void smbd_destroy(struct smbd_connection *info); int smbd_recv(struct smbd_connection *info, struct msghdr *msg); int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst); +enum mr_state { + MR_READY, + MR_REGISTERED, + MR_INVALIDATED, + MR_ERROR +}; + +struct smbd_mr { + struct smbd_connection *conn; + struct list_head list; + enum mr_state state; + struct ib_mr *mr; + struct scatterlist *sgl; + int sgl_count; + enum dma_data_direction dir; + union { + struct ib_reg_wr wr; + struct ib_send_wr inv_wr; + }; + struct ib_cqe cqe; + bool need_invalidate; + struct completion invalidate_done; +}; + +/* Interfaces to register and deregister MR for RDMA read/write */ +struct smbd_mr *smbd_register_mr( + struct smbd_connection *info, struct page *pages[], int num_pages, + int tailsz, bool writing, bool need_invalidate); +int smbd_deregister_mr(struct smbd_mr *mr); + #else #define cifs_rdma_enabled(server) 0 struct smbd_connection {}; -- cgit v1.2.3 From db223a590d45b5e584a430c46a778699a5825cdd Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:45 -0700 Subject: CIFS: SMBD: Upper layer performs SMB write via RDMA read through memory registration When sending I/O, if size is larger than rdma_readwrite_threshold we prepare to send SMB write packet for a RDMA read via memory registration. The actual I/O is done by remote peer through local RDMA hardware. Modify the relevant fields in the packet accordingly, and append a smbd_buffer_descriptor_v1 to the end of the SMB write packet. On write I/O finish, deregister the memory region if this was for a RDMA read. If remote invalidation is not used, the call to smbd_deregister_mr will do local invalidation and possibly wait. Memory region is normally deregistered in MID callback as soon as it's used. There are situations where the MID may not be created on I/O failure, under which memory region is deregistered when write data context is released. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/cifsglob.h | 3 +++ fs/cifs/cifssmb.c | 7 ++++++ fs/cifs/smb2pdu.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 573a7bef8817..29b5a9bd3128 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1174,6 +1174,9 @@ struct cifs_writedata { pid_t pid; unsigned int bytes; int result; +#ifdef CONFIG_CIFS_SMB_DIRECT + struct smbd_mr *mr; +#endif unsigned int pagesz; unsigned int tailsz; unsigned int credits; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 35dc5bf01ee2..66d1ebfe75dd 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -43,6 +43,7 @@ #include "cifs_unicode.h" #include "cifs_debug.h" #include "fscache.h" +#include "smbdirect.h" #ifdef CONFIG_CIFS_POSIX static struct { @@ -1923,6 +1924,12 @@ cifs_writedata_release(struct kref *refcount) { struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata, refcount); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (wdata->mr) { + smbd_deregister_mr(wdata->mr); + wdata->mr = NULL; + } +#endif if (wdata->cfile) cifsFileInfo_put(wdata->cfile); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c0dc0491af93..908d7770d15a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -48,6 +48,7 @@ #include "smb2glob.h" #include "cifspdu.h" #include "cifs_spnego.h" +#include "smbdirect.h" /* * The following table defines the expected "StructureSize" of SMB2 requests @@ -2728,7 +2729,19 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->result = -EIO; break; } - +#ifdef CONFIG_CIFS_SMB_DIRECT + /* + * If this wdata has a memory registered, the MR can be freed + * The number of MRs available is limited, it's important to recover + * used MR as soon as I/O is finished. Hold MR longer in the later + * I/O process can possibly result in I/O deadlock due to lack of MR + * to send request on I/O retry + */ + if (wdata->mr) { + smbd_deregister_mr(wdata->mr); + wdata->mr = NULL; + } +#endif if (wdata->result) cifs_stats_fail_inc(tcon, SMB2_WRITE_HE); @@ -2780,7 +2793,42 @@ smb2_async_writev(struct cifs_writedata *wdata, req->DataOffset = cpu_to_le16( offsetof(struct smb2_write_req, Buffer)); req->RemainingBytes = 0; - +#ifdef CONFIG_CIFS_SMB_DIRECT + /* + * If we want to do a server RDMA read, fill in and append + * smbd_buffer_descriptor_v1 to the end of write request + */ + if (server->rdma && wdata->bytes >= + server->smbd_conn->rdma_readwrite_threshold) { + + struct smbd_buffer_descriptor_v1 *v1; + bool need_invalidate = server->dialect == SMB30_PROT_ID; + + wdata->mr = smbd_register_mr( + server->smbd_conn, wdata->pages, + wdata->nr_pages, wdata->tailsz, + false, need_invalidate); + if (!wdata->mr) { + rc = -ENOBUFS; + goto async_writev_out; + } + req->Length = 0; + req->DataOffset = 0; + req->RemainingBytes = + (wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz; + req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE; + if (need_invalidate) + req->Channel = SMB2_CHANNEL_RDMA_V1; + req->WriteChannelInfoOffset = + offsetof(struct smb2_write_req, Buffer); + req->WriteChannelInfoLength = + sizeof(struct smbd_buffer_descriptor_v1); + v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + v1->offset = wdata->mr->mr->iova; + v1->token = wdata->mr->mr->rkey; + v1->length = wdata->mr->mr->length; + } +#endif /* 4 for rfc1002 length field and 1 for Buffer */ iov[0].iov_len = 4; rfc1002_marker = cpu_to_be32(total_len - 1 + wdata->bytes); @@ -2794,11 +2842,22 @@ smb2_async_writev(struct cifs_writedata *wdata, rqst.rq_npages = wdata->nr_pages; rqst.rq_pagesz = wdata->pagesz; rqst.rq_tailsz = wdata->tailsz; - +#ifdef CONFIG_CIFS_SMB_DIRECT + if (wdata->mr) { + iov[1].iov_len += sizeof(struct smbd_buffer_descriptor_v1); + rqst.rq_npages = 0; + } +#endif cifs_dbg(FYI, "async write at %llu %u bytes\n", wdata->offset, wdata->bytes); +#ifdef CONFIG_CIFS_SMB_DIRECT + /* For RDMA read, I/O size is in RemainingBytes not in Length */ + if (!wdata->mr) + req->Length = cpu_to_le32(wdata->bytes); +#else req->Length = cpu_to_le32(wdata->bytes); +#endif if (wdata->credits) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, -- cgit v1.2.3 From 74dcf418fe344657b0e48a5a4b75f935443fa53f Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:46 -0700 Subject: CIFS: SMBD: Read correct returned data length for RDMA write (SMB read) I/O This patch is for preparing upper layer doing SMB read via RDMA write. When RDMA write is used for SMB read, the returned data length is in DataRemaining in the response packet. Reading it properly by adding a parameter to specifiy where the returned data length is. Add the defition for memory registration to wdata and return the correct length based on if RDMA write is used. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/cifsglob.h | 13 +++++++++++-- fs/cifs/cifssmb.c | 8 ++++++-- fs/cifs/smb1ops.c | 4 +++- fs/cifs/smb2ops.c | 12 ++++++++++-- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 29b5a9bd3128..678e638c1e69 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -230,8 +230,14 @@ struct smb_version_operations { __u64 (*get_next_mid)(struct TCP_Server_Info *); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); - /* data length from read response message */ - unsigned int (*read_data_length)(char *); + /* + * Data length from read response message + * When in_remaining is true, the returned data length is in + * message field DataRemaining for out-of-band data read (e.g through + * Memory Registration RDMA write in SMBD). + * Otherwise, the returned data length is in message field DataLength. + */ + unsigned int (*read_data_length)(char *, bool in_remaining); /* map smb to linux error */ int (*map_error)(char *, bool); /* find mid corresponding to the response message */ @@ -1152,6 +1158,9 @@ struct cifs_readdata { struct cifs_readdata *rdata, struct iov_iter *iter); struct kvec iov[2]; +#ifdef CONFIG_CIFS_SMB_DIRECT + struct smbd_mr *mr; +#endif unsigned int pagesz; unsigned int tailsz; unsigned int credits; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 66d1ebfe75dd..49cf999f3d46 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1455,6 +1455,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) struct cifs_readdata *rdata = mid->callback_data; char *buf = server->smallbuf; unsigned int buflen = get_rfc1002_length(buf) + 4; + bool use_rdma_mr = false; cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n", __func__, mid->mid, rdata->offset, rdata->bytes); @@ -1543,8 +1544,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) rdata->iov[0].iov_base, server->total_read); /* how much data is in the response? */ - data_len = server->ops->read_data_length(buf); - if (data_offset + data_len > buflen) { +#ifdef CONFIG_CIFS_SMB_DIRECT + use_rdma_mr = rdata->mr; +#endif + data_len = server->ops->read_data_length(buf, use_rdma_mr); + if (!use_rdma_mr && (data_offset + data_len > buflen)) { /* data_len is corrupt -- discard frame */ rdata->result = -EIO; return cifs_readv_discard(server, mid); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index a723df3e0197..3d495e440c87 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -87,9 +87,11 @@ cifs_read_data_offset(char *buf) } static unsigned int -cifs_read_data_length(char *buf) +cifs_read_data_length(char *buf, bool in_remaining) { READ_RSP *rsp = (READ_RSP *)buf; + /* It's a bug reading remaining data for SMB1 packets */ + WARN_ON(in_remaining); return (le16_to_cpu(rsp->DataLengthHigh) << 16) + le16_to_cpu(rsp->DataLength); } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c8fa4c4ccc50..e3393ff5d458 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -957,9 +957,13 @@ smb2_read_data_offset(char *buf) } static unsigned int -smb2_read_data_length(char *buf) +smb2_read_data_length(char *buf, bool in_remaining) { struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf; + + if (in_remaining) + return le32_to_cpu(rsp->DataRemaining); + return le32_to_cpu(rsp->DataLength); } @@ -2421,6 +2425,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, struct iov_iter iter; struct kvec iov; int length; + bool use_rdma_mr = false; if (shdr->Command != SMB2_READ) { cifs_dbg(VFS, "only big read responses are supported\n"); @@ -2447,7 +2452,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, } data_offset = server->ops->read_data_offset(buf) + 4; - data_len = server->ops->read_data_length(buf); +#ifdef CONFIG_CIFS_SMB_DIRECT + use_rdma_mr = rdata->mr; +#endif + data_len = server->ops->read_data_length(buf, use_rdma_mr); if (data_offset < server->vals->read_rsp_size) { /* -- cgit v1.2.3 From bd3dcc6a22a9186ed78da51ce09e889803552189 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:47 -0700 Subject: CIFS: SMBD: Upper layer performs SMB read via RDMA write through memory registration If I/O size is larger than rdma_readwrite_threshold, use RDMA write for SMB read by specifying channel SMB2_CHANNEL_RDMA_V1 or SMB2_CHANNEL_RDMA_V1_INVALIDATE in the SMB packet, depending on SMB dialect used. Append a smbd_buffer_descriptor_v1 to the end of the SMB packet and fill in other values to indicate this SMB read uses RDMA write. There is no need to read from the transport for incoming payload. At the time SMB read response comes back, the data is already transferred and placed in the pages by RDMA hardware. When SMB read is finished, deregister the memory regions if RDMA write is used for this SMB read. smbd_deregister_mr may need to do local invalidation and sleep, if server remote invalidation is not used. There are situations where the MID may not be created on I/O failure, under which memory region is deregistered when read data context is released. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/file.c | 17 +++++++++++++++-- fs/cifs/smb2pdu.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3a85df2a9baf..7cee97b93a61 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -42,7 +42,7 @@ #include "cifs_debug.h" #include "cifs_fs_sb.h" #include "fscache.h" - +#include "smbdirect.h" static inline int cifs_convert_flags(unsigned int flags) { @@ -2902,7 +2902,12 @@ cifs_readdata_release(struct kref *refcount) { struct cifs_readdata *rdata = container_of(refcount, struct cifs_readdata, refcount); - +#ifdef CONFIG_CIFS_SMB_DIRECT + if (rdata->mr) { + smbd_deregister_mr(rdata->mr); + rdata->mr = NULL; + } +#endif if (rdata->cfile) cifsFileInfo_put(rdata->cfile); @@ -3031,6 +3036,10 @@ uncached_fill_pages(struct TCP_Server_Info *server, } if (iter) result = copy_page_from_iter(page, 0, n, iter); +#ifdef CONFIG_CIFS_SMB_DIRECT + else if (rdata->mr) + result = n; +#endif else result = cifs_read_page_from_socket(server, page, n); if (result < 0) @@ -3598,6 +3607,10 @@ readpages_fill_pages(struct TCP_Server_Info *server, if (iter) result = copy_page_from_iter(page, 0, n, iter); +#ifdef CONFIG_CIFS_SMB_DIRECT + else if (rdata->mr) + result = n; +#endif else result = cifs_read_page_from_socket(server, page, n); if (result < 0) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 908d7770d15a..bee0871d6dda 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2458,7 +2458,40 @@ smb2_new_read_req(void **buf, unsigned int *total_len, req->MinimumCount = 0; req->Length = cpu_to_le32(io_parms->length); req->Offset = cpu_to_le64(io_parms->offset); +#ifdef CONFIG_CIFS_SMB_DIRECT + /* + * If we want to do a RDMA write, fill in and append + * smbd_buffer_descriptor_v1 to the end of read request + */ + if (server->rdma && rdata && + rdata->bytes >= server->smbd_conn->rdma_readwrite_threshold) { + + struct smbd_buffer_descriptor_v1 *v1; + bool need_invalidate = + io_parms->tcon->ses->server->dialect == SMB30_PROT_ID; + + rdata->mr = smbd_register_mr( + server->smbd_conn, rdata->pages, + rdata->nr_pages, rdata->tailsz, + true, need_invalidate); + if (!rdata->mr) + return -ENOBUFS; + + req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE; + if (need_invalidate) + req->Channel = SMB2_CHANNEL_RDMA_V1; + req->ReadChannelInfoOffset = + offsetof(struct smb2_read_plain_req, Buffer); + req->ReadChannelInfoLength = + sizeof(struct smbd_buffer_descriptor_v1); + v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + v1->offset = rdata->mr->mr->iova; + v1->token = rdata->mr->mr->rkey; + v1->length = rdata->mr->mr->length; + *total_len += sizeof(*v1) - 1; + } +#endif if (request_type & CHAINED_REQUEST) { if (!(request_type & END_OF_CHAIN)) { /* next 8-byte aligned request */ @@ -2537,7 +2570,17 @@ smb2_readv_callback(struct mid_q_entry *mid) if (rdata->result != -ENODATA) rdata->result = -EIO; } - +#ifdef CONFIG_CIFS_SMB_DIRECT + /* + * If this rdata has a memmory registered, the MR can be freed + * MR needs to be freed as soon as I/O finishes to prevent deadlock + * because they have limited number and are used for future I/Os + */ + if (rdata->mr) { + smbd_deregister_mr(rdata->mr); + rdata->mr = NULL; + } +#endif if (rdata->result) cifs_stats_fail_inc(tcon, SMB2_READ_HE); -- cgit v1.2.3 From 08a3b9690fd9e923663a32e314b0098af1393b84 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:48 -0700 Subject: CIFS: SMBD: Add SMB Direct debug counters For debugging and troubleshooting, export SMBDirect debug counters to /proc/fs/cifs/DebugData. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/cifs_debug.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index e6045fd5c856..05be9b47eb0c 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -155,6 +155,72 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) list_for_each(tmp1, &cifs_tcp_ses_list) { server = list_entry(tmp1, struct TCP_Server_Info, tcp_ses_list); + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (!server->rdma) + goto skip_rdma; + + seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " + "transport status: %x", + server->smbd_conn->protocol, + server->smbd_conn->transport_status); + seq_printf(m, "\nConn receive_credit_max: %x " + "send_credit_target: %x max_send_size: %x", + server->smbd_conn->receive_credit_max, + server->smbd_conn->send_credit_target, + server->smbd_conn->max_send_size); + seq_printf(m, "\nConn max_fragmented_recv_size: %x " + "max_fragmented_send_size: %x max_receive_size:%x", + server->smbd_conn->max_fragmented_recv_size, + server->smbd_conn->max_fragmented_send_size, + server->smbd_conn->max_receive_size); + seq_printf(m, "\nConn keep_alive_interval: %x " + "max_readwrite_size: %x rdma_readwrite_threshold: %x", + server->smbd_conn->keep_alive_interval, + server->smbd_conn->max_readwrite_size, + server->smbd_conn->rdma_readwrite_threshold); + seq_printf(m, "\nDebug count_get_receive_buffer: %x " + "count_put_receive_buffer: %x count_send_empty: %x", + server->smbd_conn->count_get_receive_buffer, + server->smbd_conn->count_put_receive_buffer, + server->smbd_conn->count_send_empty); + seq_printf(m, "\nRead Queue count_reassembly_queue: %x " + "count_enqueue_reassembly_queue: %x " + "count_dequeue_reassembly_queue: %x " + "fragment_reassembly_remaining: %x " + "reassembly_data_length: %x " + "reassembly_queue_length: %x", + server->smbd_conn->count_reassembly_queue, + server->smbd_conn->count_enqueue_reassembly_queue, + server->smbd_conn->count_dequeue_reassembly_queue, + server->smbd_conn->fragment_reassembly_remaining, + server->smbd_conn->reassembly_data_length, + server->smbd_conn->reassembly_queue_length); + seq_printf(m, "\nCurrent Credits send_credits: %x " + "receive_credits: %x receive_credit_target: %x", + atomic_read(&server->smbd_conn->send_credits), + atomic_read(&server->smbd_conn->receive_credits), + server->smbd_conn->receive_credit_target); + seq_printf(m, "\nPending send_pending: %x send_payload_pending:" + " %x smbd_send_pending: %x smbd_recv_pending: %x", + atomic_read(&server->smbd_conn->send_pending), + atomic_read(&server->smbd_conn->send_payload_pending), + server->smbd_conn->smbd_send_pending, + server->smbd_conn->smbd_recv_pending); + seq_printf(m, "\nReceive buffers count_receive_queue: %x " + "count_empty_packet_queue: %x", + server->smbd_conn->count_receive_queue, + server->smbd_conn->count_empty_packet_queue); + seq_printf(m, "\nMR responder_resources: %x " + "max_frmr_depth: %x mr_type: %x", + server->smbd_conn->responder_resources, + server->smbd_conn->max_frmr_depth, + server->smbd_conn->mr_type); + seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x", + atomic_read(&server->smbd_conn->mr_ready_count), + atomic_read(&server->smbd_conn->mr_used_count)); +skip_rdma: +#endif seq_printf(m, "\nNumber of credits: %d", server->credits); i++; list_for_each(tmp2, &server->smb_ses_list) { -- cgit v1.2.3 From 8801e902337151039a87d87789d1408c4eccc3e7 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 22 Nov 2017 17:38:49 -0700 Subject: CIFS: SMBD: Disable signing on SMB direct transport Currently the CIFS SMB Direct implementation (experimental) doesn't properly support signing. Disable it when SMB Direct is in use for transport. Signing will be enabled in future after it is implemented. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 8 ++++++++ fs/cifs/smb2pdu.c | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1677401660d0..0efd22e75ac7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1968,6 +1968,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } +#ifdef CONFIG_CIFS_SMB_DIRECT + if (vol->rdma && vol->sign) { + cifs_dbg(VFS, "Currently SMB direct doesn't support signing." + " This is being fixed\n"); + goto cifs_parse_mount_err; + } +#endif + #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index bee0871d6dda..a3e67beb73e2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -616,6 +616,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) cifs_dbg(FYI, "validate negotiate\n"); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (tcon->ses->server->rdma) + return 0; +#endif + /* * validation ioctl must be signed, so no point sending this if we * can not sign it (ie are not known user). Even if signing is not -- cgit v1.2.3 From 9084432c316b76f697732ac806f706a5a436aae1 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Mon, 18 Dec 2017 21:30:06 +0800 Subject: CIFS: SMBD: _smbd_get_connection() can be static Fixes: 07495ff5d9bc ("CIFS: SMBD: Establish SMB Direct connection") Signed-off-by: Fengguang Wu Signed-off-by: Steve French Acked-by: Long Li --- fs/cifs/smbdirect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 731577d4317f..f527e22650f5 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1599,7 +1599,7 @@ out1: } /* Create a SMBD connection, called by upper layer */ -struct smbd_connection *_smbd_get_connection( +static struct smbd_connection *_smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port) { int rc; -- cgit v1.2.3 From 9aca7e454415f7878b28524e76bebe1170911a88 Mon Sep 17 00:00:00 2001 From: Daniel N Pettersson Date: Thu, 11 Jan 2018 16:00:12 +0100 Subject: cifs: Fix autonegotiate security settings mismatch Autonegotiation gives a security settings mismatch error if the SMB server selects an SMBv3 dialect that isn't SMB3.02. The exact error is "protocol revalidation - security settings mismatch". This can be tested using Samba v4.2 or by setting the global Samba setting max protocol = SMB3_00. The check that fails in smb3_validate_negotiate is the dialect verification of the negotiate info response. This is because it tries to verify against the protocol_id in the global smbdefault_values. The protocol_id in smbdefault_values is SMB3.02. In SMB2_negotiate the protocol_id in smbdefault_values isn't updated, it is global so it probably shouldn't be, but server->dialect is. This patch changes the check in smb3_validate_negotiate to use server->dialect instead of server->vals->protocol_id. The patch works with autonegotiate and when using a specific version in the vers mount option. Signed-off-by: Daniel N Pettersson Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2pdu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a3e67beb73e2..7900aec7f92f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -700,8 +700,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) + if (pneg_rsp->Dialect != cpu_to_le16(tcon->ses->server->dialect)) goto vneg_out; if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) -- cgit v1.2.3 From e36c048a9bd853b64c2b32a2ed90be2eff9bbd62 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jan 2018 21:51:05 +0100 Subject: CIFS: SMBD: work around gcc -Wmaybe-uninitialized warning GCC versions from 4.9 to 6.3 produce a false-positive warning when dealing with a conditional spin_lock_irqsave(): fs/cifs/smbdirect.c: In function 'smbd_recv_buf': include/linux/spinlock.h:260:3: warning: 'flags' may be used uninitialized in this function [-Wmaybe-uninitialized] This function calls some sleeping interfaces, so it is clear that it does not get called with interrupts disabled and there is no need to save the irq state before taking the spinlock. This lets us remove the variable, which makes the function slightly more efficient and avoids the warning. A further cleanup could do the same change for other functions in this file, but I did not want to take this too far for now. Fixes: ac69f66e54ca ("CIFS: SMBD: Implement function to receive data via RDMA receive") Signed-off-by: Arnd Bergmann Signed-off-by: Steve French --- fs/cifs/smbdirect.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index f527e22650f5..f9234ed83a60 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1862,7 +1862,6 @@ int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; - unsigned long flags; again: if (info->transport_status != SMBD_CONNECTED) { @@ -1935,15 +1934,13 @@ again: * end of the queue */ if (!queue_length) - spin_lock_irqsave( - &info->reassembly_queue_lock, - flags); + spin_lock_irq( + &info->reassembly_queue_lock); list_del(&response->list); queue_removed++; if (!queue_length) - spin_unlock_irqrestore( - &info->reassembly_queue_lock, - flags); + spin_unlock_irq( + &info->reassembly_queue_lock); info->count_reassembly_queue--; info->count_dequeue_reassembly_queue++; @@ -1963,10 +1960,10 @@ again: to_read, data_read, offset); } - spin_lock_irqsave(&info->reassembly_queue_lock, flags); + spin_lock_irq(&info->reassembly_queue_lock); info->reassembly_data_length -= data_read; info->reassembly_queue_length -= queue_removed; - spin_unlock_irqrestore(&info->reassembly_queue_lock, flags); + spin_unlock_irq(&info->reassembly_queue_lock); info->first_entry_offset = offset; log_read(INFO, "returning to thread data_read=%d " -- cgit v1.2.3 From 37e12f55515218c69284600e32dcb4fcacc45f8b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 17 Jan 2018 09:52:39 +0000 Subject: cifs: remove redundant duplicated assignment of pointer 'node' Node is assigned twice to rb_first(root), first during declaration time and second after a taking a spin lock, so we have a duplicated assignment. Remove the first assignment because it is redundant and also not protected by the spin lock. Cleans up clang warning: fs/cifs/connect.c:4435:18: warning: Value stored to 'node' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0efd22e75ac7..63c5d85fe25e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4432,7 +4432,7 @@ cifs_prune_tlinks(struct work_struct *work) struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, prune_tlinks.work); struct rb_root *root = &cifs_sb->tlink_tree; - struct rb_node *node = rb_first(root); + struct rb_node *node; struct rb_node *tmp; struct tcon_link *tlink; -- cgit v1.2.3 From 4c7e95b1b3286a2ba704790018a35510344958f2 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 23 Jan 2018 11:13:17 +0100 Subject: mtd: nand: gpmi: Fix subpage reads Commit 25f815f66a14 ("mtd: nand: force drivers to explicitly send READ/PROG commands") added a call to nand_read_page_op() in gpmi_ecc_read_page(), which means this function now sends a READ0 command and place the data pointer at the beginning of the page. This logic is breaking gpmi_ecc_read_subpage() which was calling gpmi_ecc_read_page() and expected it to only retrieve the data without sending the READ0 command. Create a gpmi_ecc_read_page_data() helper which only does the data retrieval and ECC correction steps and implement gpmi_ecc_read_page() as a wrapper that calls nand_read_page_op()+gpmi_ecc_read_page_data(). This way, gpmi_ecc_read_subpage() can call gpmi_ecc_read_page_data() which restores the logic we had before commit 25f815f66a14 ("mtd: nand: force drivers to explicitly send READ/PROG commands"). Fixes: 25f815f66a14 ("mtd: nand: force drivers to explicitly send READ/PROG commands") Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal Acked-by: Han Xu --- drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index b51db8c85405..ab9a0a2ed3b2 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -1029,11 +1029,13 @@ static void block_mark_swapping(struct gpmi_nand_data *this, p[1] = (p[1] & mask) | (from_oob >> (8 - bit)); } -static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, - uint8_t *buf, int oob_required, int page) +static int gpmi_ecc_read_page_data(struct nand_chip *chip, + uint8_t *buf, int oob_required, + int page) { struct gpmi_nand_data *this = nand_get_controller_data(chip); struct bch_geometry *nfc_geo = &this->bch_geometry; + struct mtd_info *mtd = nand_to_mtd(chip); void *payload_virt; dma_addr_t payload_phys; void *auxiliary_virt; @@ -1043,8 +1045,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, unsigned int max_bitflips = 0; int ret; - nand_read_page_op(chip, page, 0, NULL, 0); - dev_dbg(this->dev, "page number is : %d\n", page); ret = read_page_prepare(this, buf, nfc_geo->payload_size, this->payload_virt, this->payload_phys, @@ -1178,6 +1178,14 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, return max_bitflips; } +static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page) +{ + nand_read_page_op(chip, page, 0, NULL, 0); + + return gpmi_ecc_read_page_data(chip, buf, oob_required, page); +} + /* Fake a virtual small page for the subpage read */ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offs, uint32_t len, uint8_t *buf, int page) @@ -1256,7 +1264,7 @@ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, /* Read the subpage now */ this->swap_block_mark = false; - max_bitflips = gpmi_ecc_read_page(mtd, chip, buf, 0, page); + max_bitflips = gpmi_ecc_read_page_data(chip, buf, 0, page); /* Restore */ writel(r1_old, bch_regs + HW_BCH_FLASH0LAYOUT0); -- cgit v1.2.3 From f4c6cd1a7f2275d5bc0e494b21fff26f8dde80f0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 24 Jan 2018 23:49:31 +0100 Subject: mtd: nand: sunxi: Fix ECC strength choice When the requested ECC strength does not exactly match the strengths supported by the ECC engine, the driver is selecting the closest strength meeting the 'selected_strength > requested_strength' constraint. Fix the fact that, in this particular case, ecc->strength value was not updated to match the 'selected_strength'. For instance, one can encounter this issue when no ECC requirement is filled in the device tree while the NAND chip minimum requirement is not a strength/step_size combo natively supported by the ECC engine. Fixes: 1fef62c1423b ("mtd: nand: add sunxi NAND flash controller support") Cc: Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/sunxi_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 2275fbedfb2a..f5a55c63935c 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1858,8 +1858,14 @@ static int sunxi_nand_hw_common_ecc_ctrl_init(struct mtd_info *mtd, /* Add ECC info retrieval from DT */ for (i = 0; i < ARRAY_SIZE(strengths); i++) { - if (ecc->strength <= strengths[i]) + if (ecc->strength <= strengths[i]) { + /* + * Update ecc->strength value with the actual strength + * that will be used by the ECC engine. + */ + ecc->strength = strengths[i]; break; + } } if (i >= ARRAY_SIZE(strengths)) { -- cgit v1.2.3 From 617ab45c9a8900e64a78b43696c02598b8cad68b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 11:36:29 +0100 Subject: x86/hyperv: Stop suppressing X86_FEATURE_PCID When hypercall-based TLB flush was enabled for Hyper-V guests PCID feature was deliberately suppressed as a precaution: back then PCID was never exposed to Hyper-V guests and it wasn't clear what will happen if some day it becomes available. The day came and PCID/INVPCID features are already exposed on certain Hyper-V hosts. From TLFS (as of 5.0b) it is unclear how TLB flush hypercalls combine with PCID. In particular the usage of PCID is per-cpu based: the same mm gets different CR3 values on different CPUs. If the hypercall does exact matching this will fail. However, this is not the case. David Zhang explains: "In practice, the AddressSpace argument is ignored on any VM that supports PCIDs. Architecturally, the AddressSpace argument must match the CR3 with PCID bits stripped out (i.e., the low 12 bits of AddressSpace should be 0 in long mode). The flush hypercalls flush all PCIDs for the specified AddressSpace." With this, PCID can be enabled. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Cc: David Zhang Cc: Stephen Hemminger Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: "K. Y. Srinivasan" Cc: Aditya Bhandari Link: https://lkml.kernel.org/r/20180124103629.29980-1-vkuznets@redhat.com --- arch/x86/hyperv/mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 9cc9e1c1e2db..56c9ebac946f 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -137,7 +137,12 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, } if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; flush->flags = 0; } else { flush->address_space = 0; @@ -219,7 +224,12 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, } if (info->mm) { + /* + * AddressSpace argument must match the CR3 with PCID bits + * stripped out. + */ flush->address_space = virt_to_phys(info->mm->pgd); + flush->address_space &= CR3_ADDR_MASK; flush->flags = 0; } else { flush->address_space = 0; @@ -278,8 +288,6 @@ void hyperv_setup_mmu_ops(void) if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) return; - setup_clear_cpu_cap(X86_FEATURE_PCID); - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) { pr_info("Using hypercall for remote TLB flush\n"); pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; -- cgit v1.2.3 From 782bf20c2a1795f35dcd526aa8005cd1870745da Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 24 Jan 2018 20:29:14 +0000 Subject: x86: Remove unused IOMMU_STRESS Kconfig Last use of IOMMU_STRESS was removed in commit 29b68415e335 ("x86: amd_iommu: move to drivers/iommu/"). 6 years later the Kconfig entry is definitely due for removal. Signed-off-by: Corentin Labbe Signed-off-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/1516825754-28415-1-git-send-email-clabbe@baylibre.com --- arch/x86/Kconfig.debug | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6293a8768a91..49fb85a5a2b7 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -169,14 +169,6 @@ config IOMMU_DEBUG options. See Documentation/x86/x86_64/boot-options.txt for more details. -config IOMMU_STRESS - bool "Enable IOMMU stress-test mode" - ---help--- - This option disables various optimizations in IOMMU related - code to do real stress testing of the IOMMU code. This option - will cause a performance drop and should only be enabled for - testing. - config IOMMU_LEAK bool "IOMMU leak tracing" depends on IOMMU_DEBUG && DMA_API_DEBUG -- cgit v1.2.3 From b3ab8adc8b5a72c2cc60ea936a870143a9b8c100 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:43:03 -0800 Subject: perf vendor events intel: Update Broadwell events to V22 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/broadwell/cache.json | 555 ++++++--- .../arch/x86/broadwell/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwell/frontend.json | 138 +-- .../perf/pmu-events/arch/x86/broadwell/memory.json | 210 +++- .../perf/pmu-events/arch/x86/broadwell/other.json | 20 +- .../pmu-events/arch/x86/broadwell/pipeline.json | 1216 ++++++++++---------- .../arch/x86/broadwell/virtual-memory.json | 150 +-- 7 files changed, 1401 insertions(+), 996 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json index 73688a9dab2a..bba3152ec54a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json @@ -10,13 +10,30 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "BriefDescription": "RFO requests that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x27", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests that miss L2 cache.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -29,6 +46,43 @@ "BriefDescription": "L2 prefetch requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3f", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x41", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.", "EventCode": "0x24", @@ -69,6 +123,15 @@ "BriefDescription": "L2 code requests", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xe7", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.", "EventCode": "0x24", @@ -79,6 +142,15 @@ "BriefDescription": "Requests from L2 hardware prefetchers", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xff", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of WB requests that hit L2 cache.", "EventCode": "0x27", @@ -130,6 +202,27 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", "EventCode": "0x51", @@ -152,7 +245,30 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "BDM76", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "BDM76", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x2", @@ -174,26 +290,26 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", + "UMask": "0x4", "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", + "UMask": "0x8", "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -208,18 +324,6 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).", "EventCode": "0x63", @@ -261,7 +365,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "EventCode": "0xB0", "Counter": "0,1,2,3", "UMask": "0x8", @@ -280,153 +384,162 @@ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "CounterHTOff": "0,1,2,3" + }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x21", "Errata": "BDM35", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "BDM35", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -438,84 +551,83 @@ "Errata": "BDM100, BDE70", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS", "SampleAfterValue": "100007", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "BDE70, BDM100", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100007", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -659,119 +771,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010001 ", "Counter": "0,1,2,3", @@ -784,6 +784,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020001 ", "Counter": "0,1,2,3", @@ -796,6 +797,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020001 ", "Counter": "0,1,2,3", @@ -808,6 +810,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020001 ", "Counter": "0,1,2,3", @@ -820,6 +823,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020001 ", "Counter": "0,1,2,3", @@ -832,6 +836,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020001 ", "Counter": "0,1,2,3", @@ -844,6 +849,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020001 ", "Counter": "0,1,2,3", @@ -856,6 +862,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0001 ", "Counter": "0,1,2,3", @@ -868,6 +875,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0001 ", "Counter": "0,1,2,3", @@ -880,6 +888,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0001 ", "Counter": "0,1,2,3", @@ -892,6 +901,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0001 ", "Counter": "0,1,2,3", @@ -904,6 +914,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0001 ", "Counter": "0,1,2,3", @@ -916,6 +927,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0001 ", "Counter": "0,1,2,3", @@ -928,6 +940,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010002 ", "Counter": "0,1,2,3", @@ -940,6 +953,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0002 ", "Counter": "0,1,2,3", @@ -952,6 +966,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0002 ", "Counter": "0,1,2,3", @@ -964,6 +979,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0002 ", "Counter": "0,1,2,3", @@ -976,6 +992,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0002 ", "Counter": "0,1,2,3", @@ -988,6 +1005,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0002 ", "Counter": "0,1,2,3", @@ -1000,6 +1018,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0002 ", "Counter": "0,1,2,3", @@ -1012,6 +1031,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010004 ", "Counter": "0,1,2,3", @@ -1024,6 +1044,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020004 ", "Counter": "0,1,2,3", @@ -1036,6 +1057,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020004 ", "Counter": "0,1,2,3", @@ -1048,6 +1070,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020004 ", "Counter": "0,1,2,3", @@ -1060,6 +1083,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020004 ", "Counter": "0,1,2,3", @@ -1072,6 +1096,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020004 ", "Counter": "0,1,2,3", @@ -1084,6 +1109,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020004 ", "Counter": "0,1,2,3", @@ -1096,6 +1122,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0004 ", "Counter": "0,1,2,3", @@ -1108,6 +1135,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0004 ", "Counter": "0,1,2,3", @@ -1120,6 +1148,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0004 ", "Counter": "0,1,2,3", @@ -1132,6 +1161,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0004 ", "Counter": "0,1,2,3", @@ -1144,6 +1174,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0004 ", "Counter": "0,1,2,3", @@ -1156,6 +1187,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0004 ", "Counter": "0,1,2,3", @@ -1168,6 +1200,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010008 ", "Counter": "0,1,2,3", @@ -1180,6 +1213,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020008 ", "Counter": "0,1,2,3", @@ -1192,6 +1226,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020008 ", "Counter": "0,1,2,3", @@ -1204,6 +1239,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020008 ", "Counter": "0,1,2,3", @@ -1216,6 +1252,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020008 ", "Counter": "0,1,2,3", @@ -1228,6 +1265,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020008 ", "Counter": "0,1,2,3", @@ -1240,6 +1278,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020008 ", "Counter": "0,1,2,3", @@ -1252,6 +1291,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0008 ", "Counter": "0,1,2,3", @@ -1264,6 +1304,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0008 ", "Counter": "0,1,2,3", @@ -1276,6 +1317,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0008 ", "Counter": "0,1,2,3", @@ -1288,6 +1330,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0008 ", "Counter": "0,1,2,3", @@ -1300,6 +1343,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0008 ", "Counter": "0,1,2,3", @@ -1312,6 +1356,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0008 ", "Counter": "0,1,2,3", @@ -1324,6 +1369,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010010 ", "Counter": "0,1,2,3", @@ -1336,6 +1382,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020010 ", "Counter": "0,1,2,3", @@ -1348,6 +1395,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020010 ", "Counter": "0,1,2,3", @@ -1360,6 +1408,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020010 ", "Counter": "0,1,2,3", @@ -1372,6 +1421,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020010 ", "Counter": "0,1,2,3", @@ -1384,6 +1434,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020010 ", "Counter": "0,1,2,3", @@ -1396,6 +1447,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020010 ", "Counter": "0,1,2,3", @@ -1408,6 +1460,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0010 ", "Counter": "0,1,2,3", @@ -1420,6 +1473,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0010 ", "Counter": "0,1,2,3", @@ -1432,6 +1486,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0010 ", "Counter": "0,1,2,3", @@ -1444,6 +1499,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0010 ", "Counter": "0,1,2,3", @@ -1456,6 +1512,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0010 ", "Counter": "0,1,2,3", @@ -1468,6 +1525,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0010 ", "Counter": "0,1,2,3", @@ -1480,6 +1538,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010020 ", "Counter": "0,1,2,3", @@ -1492,6 +1551,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020020 ", "Counter": "0,1,2,3", @@ -1504,6 +1564,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020020 ", "Counter": "0,1,2,3", @@ -1516,6 +1577,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020020 ", "Counter": "0,1,2,3", @@ -1528,6 +1590,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020020 ", "Counter": "0,1,2,3", @@ -1540,6 +1603,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020020 ", "Counter": "0,1,2,3", @@ -1552,6 +1616,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020020 ", "Counter": "0,1,2,3", @@ -1564,6 +1629,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0020 ", "Counter": "0,1,2,3", @@ -1576,6 +1642,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0020 ", "Counter": "0,1,2,3", @@ -1588,6 +1655,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0020 ", "Counter": "0,1,2,3", @@ -1600,6 +1668,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0020 ", "Counter": "0,1,2,3", @@ -1612,6 +1681,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0020 ", "Counter": "0,1,2,3", @@ -1624,6 +1694,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0020 ", "Counter": "0,1,2,3", @@ -1636,6 +1707,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010040 ", "Counter": "0,1,2,3", @@ -1648,6 +1720,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020040 ", "Counter": "0,1,2,3", @@ -1660,6 +1733,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020040 ", "Counter": "0,1,2,3", @@ -1672,6 +1746,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020040 ", "Counter": "0,1,2,3", @@ -1684,6 +1759,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020040 ", "Counter": "0,1,2,3", @@ -1696,6 +1772,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020040 ", "Counter": "0,1,2,3", @@ -1708,6 +1785,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020040 ", "Counter": "0,1,2,3", @@ -1720,6 +1798,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0040 ", "Counter": "0,1,2,3", @@ -1732,6 +1811,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0040 ", "Counter": "0,1,2,3", @@ -1744,6 +1824,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0040 ", "Counter": "0,1,2,3", @@ -1756,6 +1837,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0040 ", "Counter": "0,1,2,3", @@ -1768,6 +1850,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0040 ", "Counter": "0,1,2,3", @@ -1780,6 +1863,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0040 ", "Counter": "0,1,2,3", @@ -1792,6 +1876,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010080 ", "Counter": "0,1,2,3", @@ -1804,6 +1889,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020080 ", "Counter": "0,1,2,3", @@ -1816,6 +1902,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020080 ", "Counter": "0,1,2,3", @@ -1828,6 +1915,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020080 ", "Counter": "0,1,2,3", @@ -1840,6 +1928,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020080 ", "Counter": "0,1,2,3", @@ -1852,6 +1941,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020080 ", "Counter": "0,1,2,3", @@ -1864,6 +1954,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020080 ", "Counter": "0,1,2,3", @@ -1876,6 +1967,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0080 ", "Counter": "0,1,2,3", @@ -1888,6 +1980,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0080 ", "Counter": "0,1,2,3", @@ -1900,6 +1993,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0080 ", "Counter": "0,1,2,3", @@ -1912,6 +2006,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0080 ", "Counter": "0,1,2,3", @@ -1924,6 +2019,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0080 ", "Counter": "0,1,2,3", @@ -1936,6 +2032,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0080 ", "Counter": "0,1,2,3", @@ -1948,6 +2045,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010100 ", "Counter": "0,1,2,3", @@ -1960,6 +2058,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020100 ", "Counter": "0,1,2,3", @@ -1972,6 +2071,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020100 ", "Counter": "0,1,2,3", @@ -1984,6 +2084,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020100 ", "Counter": "0,1,2,3", @@ -1996,6 +2097,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020100 ", "Counter": "0,1,2,3", @@ -2008,6 +2110,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020100 ", "Counter": "0,1,2,3", @@ -2020,6 +2123,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020100 ", "Counter": "0,1,2,3", @@ -2032,6 +2136,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0100 ", "Counter": "0,1,2,3", @@ -2044,6 +2149,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0100 ", "Counter": "0,1,2,3", @@ -2056,6 +2162,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0100 ", "Counter": "0,1,2,3", @@ -2068,6 +2175,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0100 ", "Counter": "0,1,2,3", @@ -2080,6 +2188,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0100 ", "Counter": "0,1,2,3", @@ -2092,6 +2201,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0100 ", "Counter": "0,1,2,3", @@ -2104,6 +2214,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010200 ", "Counter": "0,1,2,3", @@ -2116,6 +2227,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020200 ", "Counter": "0,1,2,3", @@ -2128,6 +2240,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020200 ", "Counter": "0,1,2,3", @@ -2140,6 +2253,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020200 ", "Counter": "0,1,2,3", @@ -2152,6 +2266,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020200 ", "Counter": "0,1,2,3", @@ -2164,6 +2279,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020200 ", "Counter": "0,1,2,3", @@ -2176,6 +2292,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020200 ", "Counter": "0,1,2,3", @@ -2188,6 +2305,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0200 ", "Counter": "0,1,2,3", @@ -2200,6 +2318,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0200 ", "Counter": "0,1,2,3", @@ -2212,6 +2331,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0200 ", "Counter": "0,1,2,3", @@ -2224,6 +2344,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0200 ", "Counter": "0,1,2,3", @@ -2236,6 +2357,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0200 ", "Counter": "0,1,2,3", @@ -2248,6 +2370,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0200 ", "Counter": "0,1,2,3", @@ -2260,6 +2383,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000018000 ", "Counter": "0,1,2,3", @@ -2272,6 +2396,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080028000 ", "Counter": "0,1,2,3", @@ -2284,6 +2409,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100028000 ", "Counter": "0,1,2,3", @@ -2296,6 +2422,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200028000 ", "Counter": "0,1,2,3", @@ -2308,6 +2435,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400028000 ", "Counter": "0,1,2,3", @@ -2320,6 +2448,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000028000 ", "Counter": "0,1,2,3", @@ -2332,6 +2461,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80028000 ", "Counter": "0,1,2,3", @@ -2344,6 +2474,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c8000 ", "Counter": "0,1,2,3", @@ -2356,6 +2487,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c8000 ", "Counter": "0,1,2,3", @@ -2368,6 +2500,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c8000 ", "Counter": "0,1,2,3", @@ -2380,6 +2513,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c8000 ", "Counter": "0,1,2,3", @@ -2392,6 +2526,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c8000 ", "Counter": "0,1,2,3", @@ -2404,6 +2539,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c8000 ", "Counter": "0,1,2,3", @@ -2416,6 +2552,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010090 ", "Counter": "0,1,2,3", @@ -2428,6 +2565,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020090 ", "Counter": "0,1,2,3", @@ -2440,6 +2578,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020090 ", "Counter": "0,1,2,3", @@ -2452,6 +2591,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020090 ", "Counter": "0,1,2,3", @@ -2464,6 +2604,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020090 ", "Counter": "0,1,2,3", @@ -2476,6 +2617,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020090 ", "Counter": "0,1,2,3", @@ -2488,6 +2630,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020090 ", "Counter": "0,1,2,3", @@ -2500,6 +2643,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0090 ", "Counter": "0,1,2,3", @@ -2512,6 +2656,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0090 ", "Counter": "0,1,2,3", @@ -2524,6 +2669,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0090 ", "Counter": "0,1,2,3", @@ -2536,6 +2682,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0090 ", "Counter": "0,1,2,3", @@ -2548,6 +2695,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0090 ", "Counter": "0,1,2,3", @@ -2560,6 +2708,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0090 ", "Counter": "0,1,2,3", @@ -2572,6 +2721,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010120 ", "Counter": "0,1,2,3", @@ -2584,6 +2734,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020120 ", "Counter": "0,1,2,3", @@ -2596,6 +2747,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020120 ", "Counter": "0,1,2,3", @@ -2608,6 +2760,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020120 ", "Counter": "0,1,2,3", @@ -2620,6 +2773,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020120 ", "Counter": "0,1,2,3", @@ -2632,6 +2786,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020120 ", "Counter": "0,1,2,3", @@ -2644,6 +2799,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020120 ", "Counter": "0,1,2,3", @@ -2656,6 +2812,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0120 ", "Counter": "0,1,2,3", @@ -2668,6 +2825,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0120 ", "Counter": "0,1,2,3", @@ -2680,6 +2838,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0120 ", "Counter": "0,1,2,3", @@ -2692,6 +2851,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0120 ", "Counter": "0,1,2,3", @@ -2704,6 +2864,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0120 ", "Counter": "0,1,2,3", @@ -2716,6 +2877,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0120 ", "Counter": "0,1,2,3", @@ -2728,6 +2890,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010240 ", "Counter": "0,1,2,3", @@ -2740,6 +2903,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020240 ", "Counter": "0,1,2,3", @@ -2752,6 +2916,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020240 ", "Counter": "0,1,2,3", @@ -2764,6 +2929,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020240 ", "Counter": "0,1,2,3", @@ -2776,6 +2942,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020240 ", "Counter": "0,1,2,3", @@ -2788,6 +2955,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020240 ", "Counter": "0,1,2,3", @@ -2800,6 +2968,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020240 ", "Counter": "0,1,2,3", @@ -2812,6 +2981,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0240 ", "Counter": "0,1,2,3", @@ -2824,6 +2994,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0240 ", "Counter": "0,1,2,3", @@ -2836,6 +3007,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0240 ", "Counter": "0,1,2,3", @@ -2848,6 +3020,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0240 ", "Counter": "0,1,2,3", @@ -2860,6 +3033,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0240 ", "Counter": "0,1,2,3", @@ -2872,6 +3046,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0240 ", "Counter": "0,1,2,3", @@ -2884,6 +3059,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010091 ", "Counter": "0,1,2,3", @@ -2896,6 +3072,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020091 ", "Counter": "0,1,2,3", @@ -2908,6 +3085,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020091 ", "Counter": "0,1,2,3", @@ -2920,6 +3098,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020091 ", "Counter": "0,1,2,3", @@ -2932,6 +3111,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020091 ", "Counter": "0,1,2,3", @@ -2944,6 +3124,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020091 ", "Counter": "0,1,2,3", @@ -2956,6 +3137,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020091 ", "Counter": "0,1,2,3", @@ -2968,6 +3150,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0091 ", "Counter": "0,1,2,3", @@ -2980,6 +3163,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0091 ", "Counter": "0,1,2,3", @@ -2992,6 +3176,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0091 ", "Counter": "0,1,2,3", @@ -3004,6 +3189,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0091 ", "Counter": "0,1,2,3", @@ -3016,6 +3202,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0091 ", "Counter": "0,1,2,3", @@ -3028,6 +3215,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0091 ", "Counter": "0,1,2,3", @@ -3040,6 +3228,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010122 ", "Counter": "0,1,2,3", @@ -3052,6 +3241,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020122 ", "Counter": "0,1,2,3", @@ -3064,6 +3254,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020122 ", "Counter": "0,1,2,3", @@ -3076,6 +3267,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020122 ", "Counter": "0,1,2,3", @@ -3088,6 +3280,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020122 ", "Counter": "0,1,2,3", @@ -3100,6 +3293,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020122 ", "Counter": "0,1,2,3", @@ -3112,6 +3306,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020122 ", "Counter": "0,1,2,3", @@ -3124,6 +3319,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0122 ", "Counter": "0,1,2,3", @@ -3136,6 +3332,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0122 ", "Counter": "0,1,2,3", @@ -3148,6 +3345,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0122 ", "Counter": "0,1,2,3", @@ -3160,6 +3358,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0122 ", "Counter": "0,1,2,3", @@ -3172,6 +3371,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0122 ", "Counter": "0,1,2,3", @@ -3184,6 +3384,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0122 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json index 102bfb808199..689d478dae93 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json @@ -1,6 +1,6 @@ [ { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x8", @@ -11,7 +11,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x10", @@ -22,7 +22,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x1", @@ -32,7 +31,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x2", @@ -42,7 +40,15 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x4", @@ -52,7 +58,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x8", @@ -62,7 +67,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x10", @@ -72,7 +76,43 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x15", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x2a", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x3c", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x2", @@ -82,7 +122,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x4", @@ -92,7 +132,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x8", @@ -102,7 +142,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x10", @@ -121,51 +161,5 @@ "BriefDescription": "Cycles with any input/output SSE or FP assist", "CounterMask": "1", "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "EventCode": "0xc7", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x3c", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x2a", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x15", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json index b0cdf1f097a0..7142c76d7f11 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json @@ -10,7 +10,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x4", @@ -20,80 +20,49 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x10", @@ -104,7 +73,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x10", @@ -116,7 +85,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x18", @@ -127,7 +96,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x18", @@ -138,7 +107,17 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x24", @@ -149,7 +128,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x24", @@ -160,7 +139,39 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x3c", @@ -200,7 +211,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -263,7 +274,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", @@ -271,16 +282,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json index ff5416d29d0d..c9154cebbdf0 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json @@ -90,7 +90,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x1", @@ -170,13 +169,13 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "EventCode": "0xc8", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "HLE_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -251,13 +250,13 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "EventCode": "0xc9", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "RTM_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "CounterHTOff": "0,1,2,3" }, { @@ -431,6 +430,7 @@ "CounterHTOff": "3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020001 ", "Counter": "0,1,2,3", @@ -443,6 +443,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0001 ", "Counter": "0,1,2,3", @@ -455,6 +456,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000001 ", "Counter": "0,1,2,3", @@ -467,6 +469,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000001 ", "Counter": "0,1,2,3", @@ -479,6 +482,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000001 ", "Counter": "0,1,2,3", @@ -491,6 +495,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000001 ", "Counter": "0,1,2,3", @@ -503,6 +508,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000001 ", "Counter": "0,1,2,3", @@ -515,6 +521,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000001 ", "Counter": "0,1,2,3", @@ -527,6 +534,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000001 ", "Counter": "0,1,2,3", @@ -539,6 +547,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000001 ", "Counter": "0,1,2,3", @@ -551,6 +560,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000001 ", "Counter": "0,1,2,3", @@ -563,6 +573,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000001 ", "Counter": "0,1,2,3", @@ -575,6 +586,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000001 ", "Counter": "0,1,2,3", @@ -587,6 +599,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0002 ", "Counter": "0,1,2,3", @@ -599,6 +612,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000002 ", "Counter": "0,1,2,3", @@ -611,6 +625,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000002 ", "Counter": "0,1,2,3", @@ -623,6 +638,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000002 ", "Counter": "0,1,2,3", @@ -635,6 +651,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000002 ", "Counter": "0,1,2,3", @@ -647,6 +664,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000002 ", "Counter": "0,1,2,3", @@ -659,6 +677,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020004 ", "Counter": "0,1,2,3", @@ -671,6 +690,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0004 ", "Counter": "0,1,2,3", @@ -683,6 +703,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000004 ", "Counter": "0,1,2,3", @@ -695,6 +716,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000004 ", "Counter": "0,1,2,3", @@ -707,6 +729,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000004 ", "Counter": "0,1,2,3", @@ -719,6 +742,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000004 ", "Counter": "0,1,2,3", @@ -731,6 +755,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000004 ", "Counter": "0,1,2,3", @@ -743,6 +768,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000004 ", "Counter": "0,1,2,3", @@ -755,6 +781,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000004 ", "Counter": "0,1,2,3", @@ -767,6 +794,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000004 ", "Counter": "0,1,2,3", @@ -779,6 +807,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000004 ", "Counter": "0,1,2,3", @@ -791,6 +820,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000004 ", "Counter": "0,1,2,3", @@ -803,6 +833,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000004 ", "Counter": "0,1,2,3", @@ -815,6 +846,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020008 ", "Counter": "0,1,2,3", @@ -827,6 +859,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0008 ", "Counter": "0,1,2,3", @@ -839,6 +872,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000008 ", "Counter": "0,1,2,3", @@ -851,6 +885,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000008 ", "Counter": "0,1,2,3", @@ -863,6 +898,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000008 ", "Counter": "0,1,2,3", @@ -875,6 +911,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000008 ", "Counter": "0,1,2,3", @@ -887,6 +924,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000008 ", "Counter": "0,1,2,3", @@ -899,6 +937,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000008 ", "Counter": "0,1,2,3", @@ -911,6 +950,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000008 ", "Counter": "0,1,2,3", @@ -923,6 +963,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000008 ", "Counter": "0,1,2,3", @@ -935,6 +976,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000008 ", "Counter": "0,1,2,3", @@ -947,6 +989,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000008 ", "Counter": "0,1,2,3", @@ -959,6 +1002,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000008 ", "Counter": "0,1,2,3", @@ -971,6 +1015,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020010 ", "Counter": "0,1,2,3", @@ -983,6 +1028,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0010 ", "Counter": "0,1,2,3", @@ -995,6 +1041,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000010 ", "Counter": "0,1,2,3", @@ -1007,6 +1054,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000010 ", "Counter": "0,1,2,3", @@ -1019,6 +1067,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000010 ", "Counter": "0,1,2,3", @@ -1031,6 +1080,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000010 ", "Counter": "0,1,2,3", @@ -1043,6 +1093,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000010 ", "Counter": "0,1,2,3", @@ -1055,6 +1106,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000010 ", "Counter": "0,1,2,3", @@ -1067,6 +1119,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000010 ", "Counter": "0,1,2,3", @@ -1079,6 +1132,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000010 ", "Counter": "0,1,2,3", @@ -1091,6 +1145,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000010 ", "Counter": "0,1,2,3", @@ -1103,6 +1158,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000010 ", "Counter": "0,1,2,3", @@ -1115,6 +1171,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000010 ", "Counter": "0,1,2,3", @@ -1127,6 +1184,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020020 ", "Counter": "0,1,2,3", @@ -1139,6 +1197,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0020 ", "Counter": "0,1,2,3", @@ -1151,6 +1210,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000020 ", "Counter": "0,1,2,3", @@ -1163,6 +1223,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000020 ", "Counter": "0,1,2,3", @@ -1175,6 +1236,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000020 ", "Counter": "0,1,2,3", @@ -1187,6 +1249,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000020 ", "Counter": "0,1,2,3", @@ -1199,6 +1262,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000020 ", "Counter": "0,1,2,3", @@ -1211,6 +1275,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000020 ", "Counter": "0,1,2,3", @@ -1223,6 +1288,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000020 ", "Counter": "0,1,2,3", @@ -1235,6 +1301,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000020 ", "Counter": "0,1,2,3", @@ -1247,6 +1314,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000020 ", "Counter": "0,1,2,3", @@ -1259,6 +1327,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000020 ", "Counter": "0,1,2,3", @@ -1271,6 +1340,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000020 ", "Counter": "0,1,2,3", @@ -1283,6 +1353,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020040 ", "Counter": "0,1,2,3", @@ -1295,6 +1366,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0040 ", "Counter": "0,1,2,3", @@ -1307,6 +1379,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000040 ", "Counter": "0,1,2,3", @@ -1319,6 +1392,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000040 ", "Counter": "0,1,2,3", @@ -1331,6 +1405,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000040 ", "Counter": "0,1,2,3", @@ -1343,6 +1418,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000040 ", "Counter": "0,1,2,3", @@ -1355,6 +1431,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000040 ", "Counter": "0,1,2,3", @@ -1367,6 +1444,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000040 ", "Counter": "0,1,2,3", @@ -1379,6 +1457,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000040 ", "Counter": "0,1,2,3", @@ -1391,6 +1470,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000040 ", "Counter": "0,1,2,3", @@ -1403,6 +1483,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000040 ", "Counter": "0,1,2,3", @@ -1415,6 +1496,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000040 ", "Counter": "0,1,2,3", @@ -1427,6 +1509,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000040 ", "Counter": "0,1,2,3", @@ -1439,6 +1522,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020080 ", "Counter": "0,1,2,3", @@ -1451,6 +1535,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0080 ", "Counter": "0,1,2,3", @@ -1463,6 +1548,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000080 ", "Counter": "0,1,2,3", @@ -1475,6 +1561,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000080 ", "Counter": "0,1,2,3", @@ -1487,6 +1574,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000080 ", "Counter": "0,1,2,3", @@ -1499,6 +1587,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000080 ", "Counter": "0,1,2,3", @@ -1511,6 +1600,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000080 ", "Counter": "0,1,2,3", @@ -1523,6 +1613,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000080 ", "Counter": "0,1,2,3", @@ -1535,6 +1626,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000080 ", "Counter": "0,1,2,3", @@ -1547,6 +1639,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000080 ", "Counter": "0,1,2,3", @@ -1559,6 +1652,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000080 ", "Counter": "0,1,2,3", @@ -1571,6 +1665,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000080 ", "Counter": "0,1,2,3", @@ -1583,6 +1678,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000080 ", "Counter": "0,1,2,3", @@ -1595,6 +1691,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020100 ", "Counter": "0,1,2,3", @@ -1607,6 +1704,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0100 ", "Counter": "0,1,2,3", @@ -1619,6 +1717,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000100 ", "Counter": "0,1,2,3", @@ -1631,6 +1730,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000100 ", "Counter": "0,1,2,3", @@ -1643,6 +1743,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000100 ", "Counter": "0,1,2,3", @@ -1655,6 +1756,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000100 ", "Counter": "0,1,2,3", @@ -1667,6 +1769,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000100 ", "Counter": "0,1,2,3", @@ -1679,6 +1782,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000100 ", "Counter": "0,1,2,3", @@ -1691,6 +1795,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000100 ", "Counter": "0,1,2,3", @@ -1703,6 +1808,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000100 ", "Counter": "0,1,2,3", @@ -1715,6 +1821,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000100 ", "Counter": "0,1,2,3", @@ -1727,6 +1834,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000100 ", "Counter": "0,1,2,3", @@ -1739,6 +1847,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000100 ", "Counter": "0,1,2,3", @@ -1751,6 +1860,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020200 ", "Counter": "0,1,2,3", @@ -1763,6 +1873,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0200 ", "Counter": "0,1,2,3", @@ -1775,6 +1886,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000200 ", "Counter": "0,1,2,3", @@ -1787,6 +1899,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000200 ", "Counter": "0,1,2,3", @@ -1799,6 +1912,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000200 ", "Counter": "0,1,2,3", @@ -1811,6 +1925,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000200 ", "Counter": "0,1,2,3", @@ -1823,6 +1938,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000200 ", "Counter": "0,1,2,3", @@ -1835,6 +1951,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000200 ", "Counter": "0,1,2,3", @@ -1847,6 +1964,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000200 ", "Counter": "0,1,2,3", @@ -1859,6 +1977,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000200 ", "Counter": "0,1,2,3", @@ -1871,6 +1990,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000200 ", "Counter": "0,1,2,3", @@ -1883,6 +2003,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000200 ", "Counter": "0,1,2,3", @@ -1895,6 +2016,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000200 ", "Counter": "0,1,2,3", @@ -1907,6 +2029,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000028000 ", "Counter": "0,1,2,3", @@ -1919,6 +2042,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c8000 ", "Counter": "0,1,2,3", @@ -1931,6 +2055,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084008000 ", "Counter": "0,1,2,3", @@ -1943,6 +2068,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104008000 ", "Counter": "0,1,2,3", @@ -1955,6 +2081,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204008000 ", "Counter": "0,1,2,3", @@ -1967,6 +2094,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404008000 ", "Counter": "0,1,2,3", @@ -1979,6 +2107,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004008000 ", "Counter": "0,1,2,3", @@ -1991,6 +2120,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004008000 ", "Counter": "0,1,2,3", @@ -2003,6 +2133,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84008000 ", "Counter": "0,1,2,3", @@ -2015,6 +2146,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc008000 ", "Counter": "0,1,2,3", @@ -2027,6 +2159,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c008000 ", "Counter": "0,1,2,3", @@ -2039,6 +2172,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c008000 ", "Counter": "0,1,2,3", @@ -2051,6 +2185,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c008000 ", "Counter": "0,1,2,3", @@ -2063,6 +2198,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020090 ", "Counter": "0,1,2,3", @@ -2075,6 +2211,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0090 ", "Counter": "0,1,2,3", @@ -2087,6 +2224,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000090 ", "Counter": "0,1,2,3", @@ -2099,6 +2237,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000090 ", "Counter": "0,1,2,3", @@ -2111,6 +2250,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000090 ", "Counter": "0,1,2,3", @@ -2123,6 +2263,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000090 ", "Counter": "0,1,2,3", @@ -2135,6 +2276,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000090 ", "Counter": "0,1,2,3", @@ -2147,6 +2289,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000090 ", "Counter": "0,1,2,3", @@ -2159,6 +2302,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000090 ", "Counter": "0,1,2,3", @@ -2171,6 +2315,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000090 ", "Counter": "0,1,2,3", @@ -2183,6 +2328,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000090 ", "Counter": "0,1,2,3", @@ -2195,6 +2341,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000090 ", "Counter": "0,1,2,3", @@ -2207,6 +2354,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000090 ", "Counter": "0,1,2,3", @@ -2219,6 +2367,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020120 ", "Counter": "0,1,2,3", @@ -2231,6 +2380,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0120 ", "Counter": "0,1,2,3", @@ -2243,6 +2393,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000120 ", "Counter": "0,1,2,3", @@ -2255,6 +2406,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000120 ", "Counter": "0,1,2,3", @@ -2267,6 +2419,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000120 ", "Counter": "0,1,2,3", @@ -2279,6 +2432,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000120 ", "Counter": "0,1,2,3", @@ -2291,6 +2445,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000120 ", "Counter": "0,1,2,3", @@ -2303,6 +2458,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000120 ", "Counter": "0,1,2,3", @@ -2315,6 +2471,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000120 ", "Counter": "0,1,2,3", @@ -2327,6 +2484,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000120 ", "Counter": "0,1,2,3", @@ -2339,6 +2497,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000120 ", "Counter": "0,1,2,3", @@ -2351,6 +2510,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000120 ", "Counter": "0,1,2,3", @@ -2363,6 +2523,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000120 ", "Counter": "0,1,2,3", @@ -2375,6 +2536,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020240 ", "Counter": "0,1,2,3", @@ -2387,6 +2549,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0240 ", "Counter": "0,1,2,3", @@ -2399,6 +2562,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000240 ", "Counter": "0,1,2,3", @@ -2411,6 +2575,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000240 ", "Counter": "0,1,2,3", @@ -2423,6 +2588,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000240 ", "Counter": "0,1,2,3", @@ -2435,6 +2601,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000240 ", "Counter": "0,1,2,3", @@ -2447,6 +2614,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000240 ", "Counter": "0,1,2,3", @@ -2459,6 +2627,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000240 ", "Counter": "0,1,2,3", @@ -2471,6 +2640,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000240 ", "Counter": "0,1,2,3", @@ -2483,6 +2653,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000240 ", "Counter": "0,1,2,3", @@ -2495,6 +2666,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000240 ", "Counter": "0,1,2,3", @@ -2507,6 +2679,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000240 ", "Counter": "0,1,2,3", @@ -2519,6 +2692,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000240 ", "Counter": "0,1,2,3", @@ -2531,6 +2705,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020091 ", "Counter": "0,1,2,3", @@ -2543,6 +2718,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0091 ", "Counter": "0,1,2,3", @@ -2555,6 +2731,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000091 ", "Counter": "0,1,2,3", @@ -2567,6 +2744,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000091 ", "Counter": "0,1,2,3", @@ -2579,6 +2757,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000091 ", "Counter": "0,1,2,3", @@ -2591,6 +2770,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000091 ", "Counter": "0,1,2,3", @@ -2603,6 +2783,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000091 ", "Counter": "0,1,2,3", @@ -2615,6 +2796,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000091 ", "Counter": "0,1,2,3", @@ -2627,6 +2809,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000091 ", "Counter": "0,1,2,3", @@ -2639,6 +2822,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000091 ", "Counter": "0,1,2,3", @@ -2651,6 +2835,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000091 ", "Counter": "0,1,2,3", @@ -2663,6 +2848,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000091 ", "Counter": "0,1,2,3", @@ -2675,6 +2861,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000091 ", "Counter": "0,1,2,3", @@ -2687,6 +2874,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020122 ", "Counter": "0,1,2,3", @@ -2699,6 +2887,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0122 ", "Counter": "0,1,2,3", @@ -2711,6 +2900,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000122 ", "Counter": "0,1,2,3", @@ -2723,6 +2913,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000122 ", "Counter": "0,1,2,3", @@ -2735,6 +2926,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000122 ", "Counter": "0,1,2,3", @@ -2747,6 +2939,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000122 ", "Counter": "0,1,2,3", @@ -2759,6 +2952,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000122 ", "Counter": "0,1,2,3", @@ -2771,6 +2965,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000122 ", "Counter": "0,1,2,3", @@ -2783,6 +2978,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000122 ", "Counter": "0,1,2,3", @@ -2795,6 +2991,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000122 ", "Counter": "0,1,2,3", @@ -2807,6 +3004,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000122 ", "Counter": "0,1,2,3", @@ -2819,6 +3017,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000122 ", "Counter": "0,1,2,3", @@ -2831,6 +3030,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000122 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/other.json b/tools/perf/pmu-events/arch/x86/broadwell/other.json index edf14f0d0eaf..4f829c5febbe 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json index 78913ae87703..97c5d0784c6c 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json @@ -2,32 +2,42 @@ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.", @@ -59,26 +69,37 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "INT_MISC.RAT_STALL_CYCLES", + "UMask": "0x3", + "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).", "EventCode": "0x0E", @@ -89,6 +110,18 @@ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.", "EventCode": "0x0E", @@ -117,18 +150,6 @@ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" - }, { "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.", "EventCode": "0x14", @@ -139,6 +160,26 @@ "BriefDescription": "Cycles when divider is busy executing divide operations", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Thread cycles when thread is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.", "EventCode": "0x3C", @@ -149,6 +190,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "Counter": "0,1,2,3", @@ -158,6 +229,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.", "EventCode": "0x4c", @@ -224,6 +304,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", "EventCode": "0x87", @@ -404,6 +496,15 @@ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "Counter": "0,1,2,3", + "UMask": "0xa0", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.", "EventCode": "0x89", @@ -434,6 +535,16 @@ "BriefDescription": "Speculative and retired mispredicted macro conditional branches", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "EventCode": "0xA0", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "SampleAfterValue": "2000003", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "EventCode": "0xA1", @@ -445,602 +556,472 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "UMask": "0x4", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "UMask": "0x4", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RESOURCE_STALLS.RS", + "UMask": "0x8", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "RESOURCE_STALLS.ROB", + "UMask": "0x8", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "UMask": "0x10", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "UMask": "0x10", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "UMask": "0x10", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "UMask": "0x20", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "UMask": "0x20", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of Uops delivered by the LSD. ", - "EventCode": "0xA8", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.UOPS", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x40", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "BDM61", - "EventName": "INST_RETIRED.ANY_P", + "UMask": "0x80", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", - "EventCode": "0xC0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "INST_RETIRED.X87", + "UMask": "0x80", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", "SampleAfterValue": "2000003", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", - "EventCode": "0xC0", - "Counter": "1", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "BDM11, BDM55", - "EventName": "INST_RETIRED.PREC_DIST", + "EventName": "RESOURCE_STALLS.ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Resource-related stall cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "UMask": "0x4", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", - "EventCode": "0xC2", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_RETIRED.ALL", + "UMask": "0x8", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops.", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "Data_LA": "1" + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", - "EventCode": "0xC2", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "UMask": "0x10", + "EventName": "RESOURCE_STALLS.ROB", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", - "EventCode": "0xC2", - "Invert": "1", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", - "EventCode": "0xC3", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MACHINE_CLEARS.CYCLES", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "EventCode": "0xC3", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MACHINE_CLEARS.MASKMOV", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "SampleAfterValue": "2000003", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x40", - "Errata": "BDW98", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x4", - "Errata": "BDW98", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BR_MISP_RETIRED.RET", - "SampleAfterValue": "100007", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", - "EventCode": "0xCC", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Count cases of saving new LBR", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x89", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x10", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "UMask": "0x1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "BriefDescription": "Number of Uops delivered by the LSD.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x20", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x80", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.THREAD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "EventCode": "0xC5", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", + "PublicDescription": "Number of uops executed from any thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "EventCode": "0xb1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "UMask": "0x0", + "Errata": "BDM61", + "EventName": "INST_RETIRED.ANY_P", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PEBS": "2", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "BDM11, BDM55", + "EventName": "INST_RETIRED.PREC_DIST", + "SampleAfterValue": "2000003", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" + }, + { + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "UMask": "0x2", + "EventName": "INST_RETIRED.X87", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xC1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", + "UMask": "0x40", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", + "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Data_LA": "1" }, { - "EventCode": "0x5E", + "PublicDescription": "This event counts cycles without actually retired uops.", + "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", + "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "MACHINE_CLEARS.CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x4", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "100003", + "BriefDescription": "Self-modifying code (SMC) detected.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x20", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "EventName": "MACHINE_CLEARS.MASKMOV", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x1", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "EventCode": "0xA0", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "SampleAfterValue": "2000003", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", - "Counter": "Fixed counter 2", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "EventCode": "0xC4", + "Counter": "0,1,2,3", "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "Errata": "BDW98", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "UMask": "0x8", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "SampleAfterValue": "100007", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "PublicDescription": "This event counts not taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", + "UMask": "0x10", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", + "UMask": "0x20", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PublicDescription": "This event counts far branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", + "UMask": "0x40", + "Errata": "BDW98", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "100007", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", + "UMask": "0x0", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "Invert": "1", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "UMask": "0x4", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "BR_MISP_RETIRED.RET", + "SampleAfterValue": "100007", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventCode": "0xCC", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "BriefDescription": "Count cases of saving new LBR", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "EventCode": "0xe6", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "UMask": "0x1f", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json index 4301e6fbc5eb..2a015e4c7e21 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json @@ -43,6 +43,16 @@ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x08", @@ -72,6 +82,15 @@ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", "EventCode": "0x49", @@ -116,6 +135,16 @@ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x49", @@ -145,6 +174,15 @@ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.", "EventCode": "0x4F", @@ -199,6 +237,16 @@ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x85", @@ -228,6 +276,15 @@ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", "EventCode": "0xAE", @@ -251,61 +308,61 @@ { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x21", + "UMask": "0x12", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x12", + "UMask": "0x14", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x22", + "UMask": "0x18", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x14", + "UMask": "0x21", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x24", + "UMask": "0x22", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x18", + "UMask": "0x24", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "CounterHTOff": "0,1,2,3" }, { @@ -327,62 +384,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From 97d00f2d10ddd6b3ca89641d4a2fe6922a535c3f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:44:03 -0800 Subject: perf vendor events intel: Update BroadwellX events to V13 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/broadwellx/cache.json | 383 +++--- .../arch/x86/broadwellx/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwellx/frontend.json | 138 +-- .../pmu-events/arch/x86/broadwellx/memory.json | 40 +- .../perf/pmu-events/arch/x86/broadwellx/other.json | 20 +- .../pmu-events/arch/x86/broadwellx/pipeline.json | 1214 ++++++++++---------- .../arch/x86/broadwellx/virtual-memory.json | 150 +-- 7 files changed, 1055 insertions(+), 998 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json index d1d043829b95..bf0c51272068 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json @@ -11,11 +11,28 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -29,6 +46,43 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -69,6 +123,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -79,6 +142,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -130,6 +202,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x51", "UMask": "0x1", @@ -151,6 +244,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "BDM76", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "BDM76", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -158,7 +274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -175,24 +291,24 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "Errata": "BDM76", - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -208,18 +324,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -266,7 +370,7 @@ "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -280,27 +384,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -308,37 +421,37 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -346,24 +459,24 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -371,69 +484,69 @@ { "EventCode": "0xD1", "UMask": "0x1", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x2", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x8", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x20", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -445,84 +558,83 @@ { "EventCode": "0xD1", "UMask": "0x40", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x1", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x8", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "BDE70, BDM100", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -534,7 +646,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -546,7 +658,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -694,119 +806,6 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "BDM76", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "Offcore": "1", "EventCode": "0xB7, 0xBB", @@ -816,6 +815,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -828,6 +828,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -840,6 +841,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -852,6 +854,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -864,6 +867,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -876,6 +880,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -888,6 +893,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -900,6 +906,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -912,6 +919,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -924,6 +932,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -936,6 +945,20 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3", + "MSRValue": "0x3f803c0002", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json index 4ae1ea24f22f..d7b9d9c9c518 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json @@ -6,7 +6,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -17,7 +17,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -25,7 +25,6 @@ "EventCode": "0xC7", "UMask": "0x1", "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", @@ -35,17 +34,24 @@ "EventCode": "0xC7", "UMask": "0x2", "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x3", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xC7", "UMask": "0x4", "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -55,7 +61,6 @@ "EventCode": "0xC7", "UMask": "0x8", "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -65,19 +70,54 @@ "EventCode": "0xC7", "UMask": "0x10", "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x15", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "UMask": "0x20", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x2a", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x3c", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xCA", "UMask": "0x2", "BriefDescription": "Number of X87 assists due to output value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -87,7 +127,7 @@ "BriefDescription": "Number of X87 assists due to input value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -97,7 +137,7 @@ "BriefDescription": "Number of SIMD FP assists due to Output values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -107,7 +147,7 @@ "BriefDescription": "Number of SIMD FP assists due to input values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -121,51 +161,5 @@ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xc7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3c", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x2a", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x15", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json index 06bf0a40e568..72781e1e3362 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json @@ -15,80 +15,49 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -99,7 +68,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -111,7 +80,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_OCCUR", "CounterMask": "1", - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -122,7 +91,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,7 +102,17 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -144,7 +123,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -155,7 +134,39 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -165,7 +176,7 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_ALL_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -205,7 +216,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -268,18 +279,7 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json index 1204ea8ff30d..d79a5cfea44b 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json @@ -95,7 +95,6 @@ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", "EventName": "TX_EXEC.MISC1", - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -171,11 +170,11 @@ { "EventCode": "0xc8", "UMask": "0x4", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -252,11 +251,11 @@ { "EventCode": "0xc9", "UMask": "0x4", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -439,6 +438,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -451,6 +451,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -463,6 +464,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -475,6 +477,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -487,6 +490,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -499,6 +503,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -511,6 +516,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -523,6 +529,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -535,6 +542,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -547,6 +555,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -559,6 +568,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -571,6 +581,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -583,6 +594,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -595,6 +607,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -607,6 +620,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -619,6 +633,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -631,6 +646,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -643,6 +659,20 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3", + "MSRValue": "0x3fbfc00002", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/other.json b/tools/perf/pmu-events/arch/x86/broadwellx/other.json index 718fcb1db2ee..4475249ea9da 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json index 02b4e1035f2d..920c89da9111 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json @@ -3,31 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -60,22 +70,33 @@ }, { "EventCode": "0x0D", - "UMask": "0x8", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RAT_STALL_CYCLES", - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "CounterMask": "1", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", "CounterMask": "1", - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x8", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -89,6 +110,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -117,18 +150,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, { "EventCode": "0x14", "UMask": "0x1", @@ -139,6 +160,26 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "UMask": "0x1", @@ -149,6 +190,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -158,6 +229,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -224,6 +304,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -404,6 +496,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -434,6 +535,16 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xA0", + "UMask": "0x3", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "Counter": "0,1,2,3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA1", "UMask": "0x1", @@ -446,601 +557,471 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", - "CounterMask": "2", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", - "CounterMask": "4", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", - "CounterMask": "5", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", - "CounterMask": "6", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "CounterMask": "12", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of Uops delivered by the LSD. ", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "BDM61", - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", + "EventCode": "0xA2", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "BDM11, BDM55", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", - "CounterHTOff": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", + "EventName": "RESOURCE_STALLS.RS", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x8", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", + "EventName": "RESOURCE_STALLS.ROB", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "CounterMask": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.CYCLES", - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "CounterMask": "2", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Self-modifying code (SMC) detected.", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.SMC", - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "CounterMask": "4", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC3", - "UMask": "0x20", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "EventCode": "0xA3", + "UMask": "0x4", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MASKMOV", - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "SampleAfterValue": "100003", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterMask": "4", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Not taken branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "Errata": "BDW98", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "Errata": "BDW98", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "CounterMask": "5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC5", - "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x8", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "PEBS": "1", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.RET", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "SampleAfterValue": "100007", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "CounterMask": "6", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", - "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "CounterMask": "6", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "CounterMask": "8", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "CounterMask": "12", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterMask": "12", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "CounterMask": "2", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "6", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "BDM61", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", + "EventCode": "0xC0", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "BDM11, BDM55", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", + "EventName": "UOPS_RETIRED.ALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "CounterMask": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "MACHINE_CLEARS.CYCLES", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "EventCode": "0xC3", + "UMask": "0x4", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xC3", "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.MASKMOV", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "EventCode": "0xC4", + "UMask": "0x1", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA0", - "UMask": "0x3", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "EventCode": "0xC4", + "UMask": "0x2", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", + "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0xC4", + "UMask": "0x4", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "Errata": "BDW98", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xC4", + "UMask": "0x8", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0xC4", + "UMask": "0x10", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x20", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x40", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "Errata": "BDW98", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x1", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x4", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC5", + "UMask": "0x8", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.RET", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "EventCode": "0xC5", + "UMask": "0x20", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json index 5ce8b67ba076..7d79c707c6d1 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json @@ -43,6 +43,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -72,6 +82,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x1", @@ -116,6 +135,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -145,6 +174,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4F", "UMask": "0x10", @@ -199,6 +237,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -228,6 +276,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xAE", "UMask": "0x1", @@ -250,60 +307,60 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x22", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "UMask": "0x22", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" @@ -327,62 +384,5 @@ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From 03da89c5516ea7be3afce2bd86b0a886877db835 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:46:30 -0800 Subject: perf vendor events intel: Update Goldmont events to V12 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/goldmont/cache.json | 1244 +++++++++++++++++--- .../perf/pmu-events/arch/x86/goldmont/memory.json | 280 ++++- tools/perf/pmu-events/arch/x86/goldmont/other.json | 54 +- .../pmu-events/arch/x86/goldmont/pipeline.json | 506 ++++---- .../arch/x86/goldmont/virtual-memory.json | 60 +- 5 files changed, 1687 insertions(+), 457 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/goldmont/cache.json b/tools/perf/pmu-events/arch/x86/goldmont/cache.json index 4e02e1e5e70d..f8bbe087b0f8 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/cache.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/cache.json @@ -1,4 +1,24 @@ [ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x41", + "EventName": "LONGEST_LAT_CACHE.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache request misses" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x4f", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache requests" + }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.", @@ -11,120 +31,119 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.", + "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to ensure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.", "EventCode": "0x31", "Counter": "0,1,2,3", "UMask": "0x0", "EventName": "CORE_REJECT_L2Q.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Requests rejected by the L2Q " + "BriefDescription": "Requests rejected by the L2Q" }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.", - "EventCode": "0x2E", + "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.", + "EventCode": "0x51", "Counter": "0,1,2,3", - "UMask": "0x4f", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "UMask": "0x1", + "EventName": "DL1.DIRTY_EVICTION", "SampleAfterValue": "200003", - "BriefDescription": "L2 cache requests" + "BriefDescription": "L1 Cache evictions for dirty data" }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.", - "EventCode": "0x2E", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "LONGEST_LAT_CACHE.MISS", + "UMask": "0x2", + "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "L2 cache request misses" + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss." }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts cycles that an ICache miss is outstanding, and instruction fetch is stalled. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes, while an Icache miss outstanding. Note this event is not the same as cycles to retrieve an instruction due to an Icache miss. Rather, it is the part of the Instruction Cache (ICache) miss time where no bytes are available for the decoder.", - "EventCode": "0x86", + "EventCode": "0xB7", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where code-fetch is stalled and an ICache miss is outstanding. This is not the same as an ICache Miss." + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100007", + "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of load uops retired.", + "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "UMask": "0x21", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired (Precise event capable)" + "BriefDescription": "Locked load uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of store uops retired.", + "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "UMask": "0x41", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired (Precise event capable)" + "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.", + "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x83", - "EventName": "MEM_UOPS_RETIRED.ALL", + "UMask": "0x42", + "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired (Precise event capable)" + "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts locked memory uops retired. This includes \"regular\" locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.", + "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "UMask": "0x43", + "EventName": "MEM_UOPS_RETIRED.SPLIT", "SampleAfterValue": "200003", - "BriefDescription": "Locked load uops retired (Precise event capable)" + "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of load uops retired.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "UMask": "0x81", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Load uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of store uops retired.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", + "UMask": "0x82", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Store uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x43", - "EventName": "MEM_UOPS_RETIRED.SPLIT", + "UMask": "0x83", + "EventName": "MEM_UOPS_RETIRED.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Memory uops retired (Precise event capable)" }, { "PEBS": "2", @@ -140,24 +159,24 @@ { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired that miss the L1 data cache.", + "PublicDescription": "Counts load uops retired that hit in the L2 cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", + "UMask": "0x2", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired that hit in the L2 cache.", + "PublicDescription": "Counts load uops retired that miss the L1 data cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "UMask": "0x8", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" + "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" }, { "PEBS": "2", @@ -205,24 +224,20 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.", - "EventCode": "0x51", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DL1.DIRTY_EVICTION", - "SampleAfterValue": "200003", - "BriefDescription": "L1 Cache evictions for dirty data" - }, - { - "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", + "MSRValue": "0x40000032b7 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", + "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)" + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x36000032b7 ", "Counter": "0,1,2,3", @@ -234,6 +249,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x10000032b7 ", "Counter": "0,1,2,3", @@ -245,6 +262,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x04000032b7 ", "Counter": "0,1,2,3", @@ -256,6 +275,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x02000032b7 ", "Counter": "0,1,2,3", @@ -267,6 +288,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x00000432b7 ", "Counter": "0,1,2,3", @@ -278,6 +301,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x00000132b7 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000022 ", "Counter": "0,1,2,3", @@ -289,6 +340,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000022 ", "Counter": "0,1,2,3", @@ -300,6 +353,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000022 ", "Counter": "0,1,2,3", @@ -311,6 +366,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000022 ", "Counter": "0,1,2,3", @@ -322,6 +379,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040022 ", "Counter": "0,1,2,3", @@ -333,6 +392,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600003091", "Counter": "0,1,2,3", @@ -344,6 +431,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000003091", "Counter": "0,1,2,3", @@ -355,6 +444,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400003091", "Counter": "0,1,2,3", @@ -366,6 +457,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200003091", "Counter": "0,1,2,3", @@ -377,6 +470,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000043091", "Counter": "0,1,2,3", @@ -388,6 +483,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600003010 ", "Counter": "0,1,2,3", @@ -399,6 +522,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000003010 ", "Counter": "0,1,2,3", @@ -410,6 +535,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400003010 ", "Counter": "0,1,2,3", @@ -421,6 +548,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200003010 ", "Counter": "0,1,2,3", @@ -432,347 +561,957 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000043010 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000048000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000018000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000044800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000014800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000044000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000014000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000042000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000012000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000041000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000011000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000008000 ", + "MSRValue": "0x0000010800 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400008000 ", + "MSRValue": "0x4000000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200008000 ", + "MSRValue": "0x3600000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000048000 ", + "MSRValue": "0x1000000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000018000 ", + "MSRValue": "0x0400000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004800 ", + "MSRValue": "0x0200000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044800 ", + "MSRValue": "0x0000040400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004000 ", + "MSRValue": "0x0000010400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000004000 ", + "MSRValue": "0x4000000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400004000 ", + "MSRValue": "0x3600000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200004000 ", + "MSRValue": "0x1000000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044000 ", + "MSRValue": "0x0400000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600002000 ", + "MSRValue": "0x0200000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000002000 ", + "MSRValue": "0x0000040200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400002000 ", + "MSRValue": "0x0000010200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200002000 ", + "MSRValue": "0x4000000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000042000 ", + "MSRValue": "0x3600000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600001000 ", + "MSRValue": "0x1000000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000001000 ", + "MSRValue": "0x0400000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400001000 ", + "MSRValue": "0x0200000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200001000 ", + "MSRValue": "0x0000040100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000041000 ", + "MSRValue": "0x0000010100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000800 ", + "MSRValue": "0x4000000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000800 ", + "MSRValue": "0x3600000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000800 ", + "MSRValue": "0x1000000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000800 ", + "MSRValue": "0x0400000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040800 ", + "MSRValue": "0x0200000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000010400 ", + "MSRValue": "0x0000040080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000100 ", + "MSRValue": "0x0000010080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000080 ", + "MSRValue": "0x4000000020 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000020 ", "Counter": "0,1,2,3", @@ -784,6 +1523,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000020 ", "Counter": "0,1,2,3", @@ -795,6 +1536,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000020 ", "Counter": "0,1,2,3", @@ -806,6 +1549,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000020 ", "Counter": "0,1,2,3", @@ -817,6 +1562,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040020 ", "Counter": "0,1,2,3", @@ -828,6 +1575,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010020 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000010 ", "Counter": "0,1,2,3", @@ -839,6 +1614,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000010 ", "Counter": "0,1,2,3", @@ -850,6 +1627,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000010 ", "Counter": "0,1,2,3", @@ -861,6 +1640,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000010 ", "Counter": "0,1,2,3", @@ -872,6 +1653,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040010 ", "Counter": "0,1,2,3", @@ -883,6 +1666,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000008 ", "Counter": "0,1,2,3", @@ -894,6 +1705,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000008 ", "Counter": "0,1,2,3", @@ -905,6 +1718,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000008 ", "Counter": "0,1,2,3", @@ -916,6 +1731,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000008 ", "Counter": "0,1,2,3", @@ -927,6 +1744,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040008 ", "Counter": "0,1,2,3", @@ -938,6 +1757,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000004 ", "Counter": "0,1,2,3", @@ -949,6 +1783,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000004 ", "Counter": "0,1,2,3", @@ -960,6 +1796,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000004 ", "Counter": "0,1,2,3", @@ -971,6 +1822,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000004 ", "Counter": "0,1,2,3", @@ -982,6 +1835,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040004 ", "Counter": "0,1,2,3", @@ -993,6 +1848,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000002 ", "Counter": "0,1,2,3", @@ -1004,6 +1874,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000002 ", "Counter": "0,1,2,3", @@ -1015,6 +1887,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000002 ", "Counter": "0,1,2,3", @@ -1026,6 +1900,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000002 ", "Counter": "0,1,2,3", @@ -1037,6 +1913,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000002 ", "Counter": "0,1,2,3", @@ -1048,6 +1926,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040002 ", "Counter": "0,1,2,3", @@ -1059,6 +1939,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010002 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000001 ", "Counter": "0,1,2,3", @@ -1070,6 +1965,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000001 ", "Counter": "0,1,2,3", @@ -1081,6 +1978,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000001 ", "Counter": "0,1,2,3", @@ -1092,6 +1991,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000001 ", "Counter": "0,1,2,3", @@ -1103,6 +2004,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000001 ", "Counter": "0,1,2,3", @@ -1114,6 +2017,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040001 ", "Counter": "0,1,2,3", @@ -1123,5 +2028,18 @@ "SampleAfterValue": "100007", "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.", "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010001 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.", + "Offcore": "1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/memory.json b/tools/perf/pmu-events/arch/x86/goldmont/memory.json index ac8b0d365a19..690cebd12a94 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/memory.json @@ -1,14 +1,4 @@ [ - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.", - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "SampleAfterValue": "200003", - "BriefDescription": "Machine clears due to memory ordering issue" - }, { "PEBS": "2", "CollectPEBSRecord": "2", @@ -30,5 +20,275 @@ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT", "SampleAfterValue": "200003", "BriefDescription": "Store uops that split a page (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved as another core is in the process of modifying the data.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "200003", + "BriefDescription": "Machine clears due to memory ordering issue" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x20000032b7 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000003010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000400 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000200 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000100 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000080 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000020 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000002 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000001 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/other.json b/tools/perf/pmu-events/arch/x86/goldmont/other.json index df25ca9542f1..959cadd7cb0e 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/other.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/other.json @@ -1,23 +1,23 @@ [ { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.", - "EventCode": "0xCA", + "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL", + "UMask": "0x0", + "EventName": "FETCH_STALL.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend" + "BriefDescription": "Cycles code-fetch stalled due to any reason." }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.", - "EventCode": "0xCA", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY", + "UMask": "0x1", + "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "Unfilled issue slots per cycle to recover" + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss." }, { "CollectPEBSRecord": "1", @@ -29,6 +29,26 @@ "SampleAfterValue": "200003", "BriefDescription": "Unfilled issue slots per cycle" }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle to recover" + }, { "CollectPEBSRecord": "2", "PublicDescription": "Counts hardware interrupts received by the processor.", @@ -36,8 +56,18 @@ "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "HW_INTERRUPTS.RECEIVED", + "SampleAfterValue": "203", + "BriefDescription": "Hardware interrupts received" + }, + { + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.", + "EventCode": "0xCB", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "HW_INTERRUPTS.MASKED", "SampleAfterValue": "200003", - "BriefDescription": "Hardware interrupts received (Precise event capable)" + "BriefDescription": "Cycles hardware interrupts are masked" }, { "CollectPEBSRecord": "2", @@ -47,6 +77,6 @@ "UMask": "0x4", "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED", "SampleAfterValue": "200003", - "BriefDescription": "Cycles pending interrupts are masked (Precise event capable)" + "BriefDescription": "Cycles pending interrupts are masked" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json index 07f00041f56f..254788af8ab6 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json @@ -1,168 +1,136 @@ [ { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Retired branch instructions (Precise event capable)" - }, - { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x7e", - "EventName": "BR_INST_RETIRED.JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired conditional branch instructions (Precise event capable)" + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 0", + "UMask": "0x1", + "EventName": "INST_RETIRED.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Fixed event)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0xfe", - "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)" + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted (Fixed event)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts near CALL branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0xf9", - "EventName": "BR_INST_RETIRED.CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Retired near call instructions (Precise event capable)" + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 2", + "UMask": "0x3", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted (Fixed event)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near relative CALL branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xfd", - "EventName": "BR_INST_RETIRED.REL_CALL", + "UMask": "0x1", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", "SampleAfterValue": "200003", - "BriefDescription": "Retired near relative call instructions (Precise event capable)" + "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near indirect CALL branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xfb", - "EventName": "BR_INST_RETIRED.IND_CALL", + "UMask": "0x2", + "EventName": "LD_BLOCKS.STORE_FORWARD", "SampleAfterValue": "200003", - "BriefDescription": "Retired near indirect call instructions (Precise event capable)" + "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near return branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xf7", - "EventName": "BR_INST_RETIRED.RETURN", + "UMask": "0x4", + "EventName": "LD_BLOCKS.4K_ALIAS", "SampleAfterValue": "200003", - "BriefDescription": "Retired near return instructions (Precise event capable)" + "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xeb", - "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "UMask": "0x8", + "EventName": "LD_BLOCKS.UTLB_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)" + "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.", - "EventCode": "0xC4", + "PublicDescription": "Counts anytime a load that retires is blocked for any reason.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xbf", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "UMask": "0x10", + "EventName": "LD_BLOCKS.ALL_BLOCK", "SampleAfterValue": "200003", - "BriefDescription": "Retired far branch instructions (Precise event capable)" + "BriefDescription": "Loads blocked (Precise event capable)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.", + "EventCode": "0x0E", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)" - }, - { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x7e", - "EventName": "BR_MISP_RETIRED.JCC", + "EventName": "UOPS_ISSUED.ANY", "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)" + "BriefDescription": "Uops issued to the back end per cycle" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xfe", - "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)" + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Reference cycles when core is not halted. This event uses a programmable general purpose performance counter.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xfb", - "EventName": "BR_MISP_RETIRED.IND_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)" + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.", + "EventCode": "0x9C", "Counter": "0,1,2,3", - "UMask": "0xf7", - "EventName": "BR_MISP_RETIRED.RETURN", + "UMask": "0x0", + "EventName": "UOPS_NOT_DELIVERED.ANY", "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)" + "BriefDescription": "Uops requested but not-delivered to the back-end per cycle" }, { "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0xeb", - "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)" + "UMask": "0x0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Precise event capable)" }, { "PEBS": "2", @@ -186,9 +154,41 @@ "SampleAfterValue": "2000003", "BriefDescription": "MS uops retired (Precise event capable)" }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of floating point divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "UOPS_RETIRED.FPDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Floating point divide uops retired. (Precise Event Capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "UOPS_RETIRED.IDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Integer divide uops retired. (Precise Event Capable)" + }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel? architecture processors.", + "PublicDescription": "Counts machine clears for any reason.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "MACHINE_CLEARS.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "All machine clears" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.", "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x1", @@ -217,217 +217,239 @@ "BriefDescription": "Machine clears due to memory disambiguation" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts machine clears for any reason.", - "EventCode": "0xC3", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "MACHINE_CLEARS.ALL", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "SampleAfterValue": "200003", - "BriefDescription": "All machine clears" + "BriefDescription": "Retired branch instructions (Precise event capable)" }, { "PEBS": "2", - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", - "EventCode": "0xC0", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "INST_RETIRED.ANY_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired (Precise event capable)" + "UMask": "0x7e", + "EventName": "BR_INST_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions (Precise event capable)" }, { + "PEBS": "2", "CollectPEBSRecord": "1", - "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.", - "EventCode": "0x9C", + "PublicDescription": "Counts the number of taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "UOPS_NOT_DELIVERED.ANY", + "UMask": "0x80", + "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES", "SampleAfterValue": "200003", - "BriefDescription": "Uops requested but not-delivered to the back-end per cycle" + "BriefDescription": "Retired taken branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.", - "EventCode": "0x0E", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "UOPS_ISSUED.ANY", + "UMask": "0xbf", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "200003", - "BriefDescription": "Uops issued to the back end per cycle" + "BriefDescription": "Retired far branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles if either divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CYCLES_DIV_BUSY.ALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a divider is busy" + "UMask": "0xeb", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "SampleAfterValue": "200003", + "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles the integer divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near return branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLES_DIV_BUSY.IDIV", + "UMask": "0xf7", + "EventName": "BR_INST_RETIRED.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "Cycles the integer divide unit is busy" + "BriefDescription": "Retired near return instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles the floating point divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near CALL branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLES_DIV_BUSY.FPDIV", + "UMask": "0xf9", + "EventName": "BR_INST_RETIRED.CALL", "SampleAfterValue": "200003", - "BriefDescription": "Cycles the FP divide unit is busy" + "BriefDescription": "Retired near call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 1", - "UMask": "0x1", - "EventName": "INST_RETIRED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfb", + "EventName": "BR_INST_RETIRED.IND_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near indirect call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when core is not halted (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near relative CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfd", + "EventName": "BR_INST_RETIRED.REL_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near relative call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 3", - "UMask": "0x3", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when core is not halted (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfe", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", - "EventCode": "0x3C", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.CORE_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when core is not halted" + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Reference cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", - "EventCode": "0x3C", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when core is not halted" + "UMask": "0x7e", + "EventName": "BR_MISP_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ALL", + "UMask": "0xeb", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for any branch type" + "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts BACLEARS on return instructions.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BACLEARS.RETURN", + "UMask": "0xf7", + "EventName": "BR_MISP_RETIRED.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for return branch" + "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BACLEARS.COND", + "UMask": "0xfb", + "EventName": "BR_MISP_RETIRED.IND_CALL", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for conditional branch" + "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts anytime a load that retires is blocked for any reason.", - "EventCode": "0x03", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "LD_BLOCKS.ALL_BLOCK", + "UMask": "0xfe", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked (Precise event capable)" + "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles if either divide unit is busy.", + "EventCode": "0xCD", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.UTLB_MISS", + "UMask": "0x0", + "EventName": "CYCLES_DIV_BUSY.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a divider is busy" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the integer divide unit is busy.", + "EventCode": "0xCD", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLES_DIV_BUSY.IDIV", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)" + "BriefDescription": "Cycles the integer divide unit is busy" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the floating point divide unit is busy.", + "EventCode": "0xCD", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", + "EventName": "CYCLES_DIV_BUSY.FPDIV", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)" + "BriefDescription": "Cycles the FP divide unit is busy" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.", + "EventCode": "0xE6", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "EventName": "BACLEARS.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)" + "BriefDescription": "BACLEARs asserted for any branch type" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts BACLEARS on return instructions.", + "EventCode": "0xE6", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "LD_BLOCKS.4K_ALIAS", + "UMask": "0x8", + "EventName": "BACLEARS.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)" + "BriefDescription": "BACLEARs asserted for return branch" }, { - "PEBS": "2", "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.", + "EventCode": "0xE6", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES", + "UMask": "0x10", + "EventName": "BACLEARS.COND", "SampleAfterValue": "200003", - "BriefDescription": "Retired taken branch instructions (Precise event capable)" + "BriefDescription": "BACLEARs asserted for conditional branch" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json index 3202c4478836..9805198d3f5f 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json @@ -1,4 +1,34 @@ [ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "PAGE_WALKS.D_SIDE_CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of D-side page-walks in cycles" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "PAGE_WALKS.I_SIDE_CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of I-side pagewalks in cycles" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "PAGE_WALKS.CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of page-walks in cycles" + }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.", @@ -41,35 +71,5 @@ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", "SampleAfterValue": "200003", "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "PAGE_WALKS.D_SIDE_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of D-side page-walks in cycles" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "PAGE_WALKS.I_SIDE_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of I-side pagewalks in cycles" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "PAGE_WALKS.CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of page-walks in cycles" } ] \ No newline at end of file -- cgit v1.2.3 From ca3a2d055d86e6a731c783f2081deb9b03e36d2e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:48:12 -0800 Subject: perf vendor events intel: Update Haswell events to V27 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswell/cache.json | 365 ++++--- .../arch/x86/haswell/floating-point.json | 20 +- .../perf/pmu-events/arch/x86/haswell/frontend.json | 132 +-- tools/perf/pmu-events/arch/x86/haswell/memory.json | 21 + tools/perf/pmu-events/arch/x86/haswell/other.json | 20 +- .../perf/pmu-events/arch/x86/haswell/pipeline.json | 1131 ++++++++++---------- .../arch/x86/haswell/virtual-memory.json | 212 ++-- 7 files changed, 977 insertions(+), 924 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/haswell/cache.json b/tools/perf/pmu-events/arch/x86/haswell/cache.json index bfb5ebf48c54..da4d6ddd4f92 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswell/cache.json @@ -11,14 +11,34 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instruction fetches that missed the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand requests that miss L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x27", "Errata": "HSD78", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "BriefDescription": "Demand requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -31,6 +51,48 @@ "BriefDescription": "L2 prefetch requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "All requests that missed L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3f", + "Errata": "HSD78", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand data read requests that hit L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x41", + "Errata": "HSD78", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instruction fetches that hit the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -72,6 +134,17 @@ "BriefDescription": "L2 code requests", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Demand requests to L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xe7", + "Errata": "HSD78", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -82,6 +155,17 @@ "BriefDescription": "Requests from L2 hardware prefetchers", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "All requests to L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xff", + "Errata": "HSD78", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Not rejected writebacks that hit L2 cache.", "EventCode": "0x27", @@ -122,6 +206,27 @@ "BriefDescription": "L1D miss oustandings duration in cycles", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "2" + }, { "EventCode": "0x48", "Counter": "0,1,2,3", @@ -133,13 +238,13 @@ }, { "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", "CounterMask": "1", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.", @@ -162,6 +267,28 @@ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", @@ -185,46 +312,35 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", + "UMask": "0x4", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78, HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", + "UMask": "0x8", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -288,6 +404,15 @@ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "CounterHTOff": "0,1,2,3" + }, { "PEBS": "1", "EventCode": "0xD0", @@ -296,7 +421,7 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -308,7 +433,7 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -321,31 +446,33 @@ "Errata": "HSD76, HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -358,19 +485,20 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts all store uops retired. This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x82", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -401,20 +529,20 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L3 cache hits as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L1 cache as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", @@ -427,20 +555,18 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "Errata": "HSD29, HSM30", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -477,25 +603,27 @@ }, { "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -513,14 +641,13 @@ }, { "PEBS": "1", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "HSD74, HSD29, HSD25, HSM30", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100003", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -665,6 +792,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "", "EventCode": "0xf4", "Counter": "0,1,2,3", "UMask": "0x10", @@ -674,131 +802,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of instruction fetches that hit the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of instruction fetches that missed the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "Errata": "HSD78", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "Errata": "HSD78", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests that missed L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "Errata": "HSD78", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "Errata": "HSD78", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78, HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c8fff", "Counter": "0,1,2,3", @@ -811,6 +815,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c07f7", "Counter": "0,1,2,3", @@ -823,6 +828,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c07f7", "Counter": "0,1,2,3", @@ -835,6 +841,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0244", "Counter": "0,1,2,3", @@ -847,6 +854,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0122", "Counter": "0,1,2,3", @@ -859,6 +867,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0122", "Counter": "0,1,2,3", @@ -871,6 +880,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0091", "Counter": "0,1,2,3", @@ -883,6 +893,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0091", "Counter": "0,1,2,3", @@ -895,6 +906,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0200", "Counter": "0,1,2,3", @@ -907,6 +919,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0100", "Counter": "0,1,2,3", @@ -919,6 +932,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0080", "Counter": "0,1,2,3", @@ -931,6 +945,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0040", "Counter": "0,1,2,3", @@ -943,6 +958,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0020", "Counter": "0,1,2,3", @@ -955,6 +971,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0010", "Counter": "0,1,2,3", @@ -967,6 +984,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0004", "Counter": "0,1,2,3", @@ -979,6 +997,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0004", "Counter": "0,1,2,3", @@ -991,6 +1010,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0002", "Counter": "0,1,2,3", @@ -1003,6 +1023,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0002", "Counter": "0,1,2,3", @@ -1015,6 +1036,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0001", "Counter": "0,1,2,3", @@ -1027,6 +1049,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0001", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json index 1732fa49c6d2..f9843e5a9b42 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json @@ -19,6 +19,16 @@ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", + "EventCode": "0xC6", + "Counter": "0,1,2,3", + "UMask": "0x7", + "EventName": "AVX_INSTS.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of X87 FP assists due to output values.", "EventCode": "0xCA", @@ -69,15 +79,5 @@ "BriefDescription": "Cycles with any input/output SSE or FP assist", "CounterMask": "1", "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", - "EventCode": "0xC6", - "Counter": "0,1,2,3", - "UMask": "0x7", - "EventName": "AVX_INSTS.ALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/frontend.json b/tools/perf/pmu-events/arch/x86/haswell/frontend.json index 57a1ce46971f..c0a5bedcc15c 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/haswell/frontend.json @@ -21,74 +21,43 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -134,6 +103,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -156,6 +135,38 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -194,6 +205,15 @@ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "ICACHE.IFDATA_STALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.", "EventCode": "0x9C", @@ -270,25 +290,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x80", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE.IFDATA_STALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/memory.json b/tools/perf/pmu-events/arch/x86/haswell/memory.json index aab981b42339..e5f9fa6655b3 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswell/memory.json @@ -401,6 +401,7 @@ "CounterHTOff": "3" }, { + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc08fff", "Counter": "0,1,2,3", @@ -413,6 +414,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01004007f7", "Counter": "0,1,2,3", @@ -425,6 +427,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc007f7", "Counter": "0,1,2,3", @@ -437,6 +440,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400244", "Counter": "0,1,2,3", @@ -449,6 +453,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00244", "Counter": "0,1,2,3", @@ -461,6 +466,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400122", "Counter": "0,1,2,3", @@ -473,6 +479,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00122", "Counter": "0,1,2,3", @@ -485,6 +492,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400091", "Counter": "0,1,2,3", @@ -497,6 +505,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00091", "Counter": "0,1,2,3", @@ -509,6 +518,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00200", "Counter": "0,1,2,3", @@ -521,6 +531,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00100", "Counter": "0,1,2,3", @@ -533,6 +544,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00080", "Counter": "0,1,2,3", @@ -545,6 +557,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00040", "Counter": "0,1,2,3", @@ -557,6 +570,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00020", "Counter": "0,1,2,3", @@ -569,6 +583,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00010", "Counter": "0,1,2,3", @@ -581,6 +596,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400004", "Counter": "0,1,2,3", @@ -593,6 +609,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00004", "Counter": "0,1,2,3", @@ -605,6 +622,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400002", "Counter": "0,1,2,3", @@ -617,6 +635,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00002", "Counter": "0,1,2,3", @@ -629,6 +648,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400001", "Counter": "0,1,2,3", @@ -641,6 +661,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00001", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/haswell/other.json b/tools/perf/pmu-events/arch/x86/haswell/other.json index 85d6a14baf9d..8a4d898d76c1 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/other.json +++ b/tools/perf/pmu-events/arch/x86/haswell/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x5C", "Counter": "0,1,2,3", @@ -30,6 +20,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json index 0099848607ad..a4dcfce4a512 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json @@ -2,33 +2,43 @@ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "Errata": "HSD140, HSD143", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.", @@ -67,7 +77,19 @@ "UMask": "0x3", "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -81,6 +103,29 @@ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.", "EventCode": "0x0E", @@ -112,35 +157,32 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", - "Invert": "1", + "EventCode": "0x14", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "ARITH.DIVIDER_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", - "Invert": "1", + "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Thread cycles when thread is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x14", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ARITH.DIVIDER_UOPS", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -153,6 +195,38 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "Counter": "0,1,2,3", @@ -162,6 +236,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4c", @@ -232,6 +315,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).", "EventCode": "0x87", @@ -406,6 +501,15 @@ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "Counter": "0,1,2,3", + "UMask": "0xa0", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "Counter": "0,1,2,3", @@ -445,136 +549,282 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", + "PublicDescription": "Cycles per core when uops are exectuted in port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles per core when uops are executed in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles per thread when uops are executed in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "PublicDescription": "Cycles per core when uops are exectuted in port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Cycles per core when uops are executed in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles per thread when uops are executed in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "UMask": "0x4", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "UMask": "0x4", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles allocation is stalled due to resource related reason.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD135", - "EventName": "RESOURCE_STALLS.ANY", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Cycles per thread when uops are executed in port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RESOURCE_STALLS.RS", + "UMask": "0x8", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 3.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "RESOURCE_STALLS.ROB", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", - "EventCode": "0xA3", + "PublicDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "UMask": "0x10", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L2 cache miss loads.", - "CounterMask": "1", + "BriefDescription": "Cycles per core when uops are executed in port 4.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", - "EventCode": "0xA3", - "Counter": "2", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 4.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are executed in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are executed in port 6.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD135", + "EventName": "RESOURCE_STALLS.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource-related stall cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "RESOURCE_STALLS.ROB", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to re-order buffer full.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L2 cache miss loads.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.", @@ -594,7 +844,7 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -620,6 +870,17 @@ "CounterMask": "6", "CounterHTOff": "0,1,2,3" }, + { + "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, { "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", @@ -642,14 +903,23 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", - "EventCode": "0xB1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -665,71 +935,172 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of instructions at retirement.", - "EventCode": "0xC0", + "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "HSD11, HSD140", - "EventName": "INST_RETIRED.ANY_P", + "UMask": "0x1", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", - "EventCode": "0xC0", + "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "INST_RETIRED.X87", - "SampleAfterValue": "2000003", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", - "EventCode": "0xC0", - "Counter": "1", "UMask": "0x1", - "Errata": "HSD140", - "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", - "EventCode": "0xC1", + "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.", - "EventCode": "0xC2", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.ALL", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops.", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "Data_LA": "1" + "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.", - "EventCode": "0xC2", + "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instructions at retirement.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x0", + "Errata": "HSD11, HSD140", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "HSD140", + "EventName": "INST_RETIRED.PREC_DIST", + "SampleAfterValue": "2000003", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" + }, + { + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "INST_RETIRED.X87", + "SampleAfterValue": "2000003", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", + "EventCode": "0xC1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_RETIRED.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Actually retired uops.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Data_LA": "1" + }, { "EventCode": "0xC2", "Invert": "1", @@ -764,6 +1135,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "Counter": "0,1,2,3", @@ -773,6 +1154,17 @@ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.", "EventCode": "0xC3", @@ -792,9 +1184,18 @@ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", @@ -814,18 +1215,27 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100003", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", @@ -846,7 +1256,6 @@ }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", @@ -866,14 +1275,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "EventCode": "0xC4", + "PublicDescription": "Mispredicted branch instructions at retirement.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "UMask": "0x0", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "All mispredicted macro branch instructions retired.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", @@ -885,16 +1294,6 @@ "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Mispredicted branch instructions at retirement.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "2", "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.", @@ -903,427 +1302,37 @@ "UMask": "0x4", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Count cases of saving new LBR records by hardware.", - "EventCode": "0xCC", + "PEBS": "1", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Count cases of saving new LBR", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "PublicDescription": "Count cases of saving new LBR records by hardware.", + "EventCode": "0xCC", "Counter": "0,1,2,3", - "UMask": "0x8", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "BriefDescription": "Count cases of saving new LBR", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x80", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Number of front end re-steers due to BPU misprediction.", - "EventCode": "0xe6", + "PublicDescription": "Number of front end re-steers due to BPU misprediction.", + "EventCode": "0xe6", "Counter": "0,1,2,3", "UMask": "0x1f", "EventName": "BACLEARS.ANY", "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json index ce80a08d0f08..777b500a5c9f 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json @@ -38,6 +38,16 @@ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.", "EventCode": "0x08", @@ -68,6 +78,16 @@ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Number of cache load STLB hits. No page walk.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.", "EventCode": "0x08", @@ -117,6 +137,16 @@ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB store misses.", "EventCode": "0x49", @@ -147,6 +177,16 @@ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.", "EventCode": "0x49", @@ -205,6 +245,16 @@ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks in ITLB of any page size.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by ITLB misses.", "EventCode": "0x85", @@ -235,6 +285,16 @@ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "ITLB misses that hit STLB. No page walk.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.", "EventCode": "0xae", @@ -256,41 +316,45 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", + "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "UMask": "0x12", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB", + "BriefDescription": "Number of DTLB page walker hits in the L2", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", + "UMask": "0x14", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of DTLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", + "UMask": "0x18", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", + "BriefDescription": "Number of DTLB page walker hits in Memory", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", + "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "UMask": "0x21", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L2", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB", "CounterHTOff": "0,1,2,3" }, { @@ -304,43 +368,43 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", + "UMask": "0x24", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of ITLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", + "UMask": "0x28", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "BriefDescription": "Number of ITLB page walker hits in Memory", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x14", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "UMask": "0x41", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x24", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "UMask": "0x42", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { @@ -355,41 +419,37 @@ { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", + "UMask": "0x48", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x18", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "UMask": "0x81", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in Memory", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x28", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", + "UMask": "0x82", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in Memory", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x48", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", + "UMask": "0x84", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { @@ -420,65 +480,5 @@ "SampleAfterValue": "100003", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of cache load STLB hits. No page walk.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks in ITLB of any page size.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "ITLB misses that hit STLB. No page walk.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From 032c16b296d512d151a3276ab3c53d4a4d65e2ed Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:49:26 -0800 Subject: perf vendor events intel: Update HaswellX events to V19 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswellx/cache.json | 377 ++++--- .../arch/x86/haswellx/floating-point.json | 20 +- .../pmu-events/arch/x86/haswellx/frontend.json | 132 +-- .../perf/pmu-events/arch/x86/haswellx/memory.json | 28 + tools/perf/pmu-events/arch/x86/haswellx/other.json | 20 +- .../pmu-events/arch/x86/haswellx/pipeline.json | 1133 ++++++++++---------- .../arch/x86/haswellx/virtual-memory.json | 212 ++-- 7 files changed, 991 insertions(+), 931 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/haswellx/cache.json b/tools/perf/pmu-events/arch/x86/haswellx/cache.json index f1bae0817a6f..b2fbd617306a 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/cache.json @@ -12,12 +12,32 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Number of instruction fetches that missed the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "Errata": "HSD78", - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Demand requests that miss L2 cache.", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -31,6 +51,48 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "Errata": "HSD78", + "PublicDescription": "All requests that missed L2.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "Errata": "HSD78", + "PublicDescription": "Demand data read requests that hit L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Number of instruction fetches that hit the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -72,6 +134,17 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "Errata": "HSD78", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -82,6 +155,17 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "Errata": "HSD78", + "PublicDescription": "All requests to L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -122,6 +206,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, { "EventCode": "0x48", "UMask": "0x2", @@ -133,13 +238,13 @@ }, { "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x51", @@ -162,6 +267,28 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "HSD78, HSD62, HSD61", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "HSD78, HSD62, HSD61", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -186,23 +313,23 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "HSD62, HSD61", - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", - "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "Errata": "HSD62, HSD61", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -217,17 +344,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "HSD62, HSD61", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -288,10 +404,19 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -303,20 +428,20 @@ { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "100003", "L1_Hit_Indication": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -328,32 +453,34 @@ { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "Errata": "HSD29, HSM30", + "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "100003", "L1_Hit_Indication": "1", + "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -365,14 +492,15 @@ { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", + "PublicDescription": "This event counts all store uops retired. This is a precise event.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { @@ -402,13 +530,13 @@ { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "Retired load uops with L3 cache hits as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -421,20 +549,19 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "Errata": "HSM30", - "PublicDescription": "Retired load uops missed L1 cache as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "Errata": "HSD29, HSM30", - "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -447,7 +574,6 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -478,24 +604,26 @@ { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "HSD29, HSD25, HSM26, HSM30", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "HSD29, HSD25, HSM26, HSM30", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, @@ -514,20 +642,19 @@ { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "HSD74, HSD29, HSD25, HSM30", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -539,7 +666,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -551,7 +678,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -706,134 +833,10 @@ "BriefDescription": "Split locks in SQ", "Counter": "0,1,2,3", "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "PublicDescription": "Number of instruction fetches that hit the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "PublicDescription": "Number of instruction fetches that missed the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "Errata": "HSD78", - "PublicDescription": "Demand requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "Errata": "HSD78", - "PublicDescription": "Demand requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "Errata": "HSD78", - "PublicDescription": "All requests that missed L2.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "Errata": "HSD78", - "PublicDescription": "All requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "HSD78, HSD62, HSD61", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "Offcore": "1", "EventCode": "0xB7, 0xBB", @@ -843,6 +846,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -855,6 +859,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -867,6 +872,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -879,6 +885,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -891,6 +898,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -903,6 +911,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -915,6 +924,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -927,6 +937,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -939,6 +950,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -951,6 +963,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -963,6 +976,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -975,6 +989,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -987,6 +1002,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -999,6 +1015,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1011,6 +1028,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1023,6 +1041,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1035,6 +1054,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1047,6 +1067,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1059,6 +1080,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1071,6 +1093,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json index 6282aed6e090..bc08cc1f2f7e 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json @@ -19,6 +19,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC6", + "UMask": "0x7", + "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", + "Counter": "0,1,2,3", + "EventName": "AVX_INSTS.ALL", + "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xCA", "UMask": "0x2", @@ -69,15 +79,5 @@ "PublicDescription": "Cycles with any input/output SSE* or FP assists.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x7", - "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", - "Counter": "0,1,2,3", - "EventName": "AVX_INSTS.ALL", - "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/frontend.json b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json index 2d0c7aac1e61..a4d9f1fcf940 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/frontend.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json @@ -22,72 +22,41 @@ }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -134,6 +103,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x79", "UMask": "0x24", @@ -156,6 +135,38 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x79", "UMask": "0x3c", @@ -194,6 +205,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x80", + "UMask": "0x4", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", + "Counter": "0,1,2,3", + "EventName": "ICACHE.IFDATA_STALL", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x9C", "UMask": "0x1", @@ -270,25 +290,5 @@ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x80", - "UMask": "0x4", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", - "Counter": "0,1,2,3", - "EventName": "ICACHE.IFDATA_STALL", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/memory.json b/tools/perf/pmu-events/arch/x86/haswellx/memory.json index 0886cc000d22..56b0f24b8029 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/memory.json @@ -409,6 +409,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -421,6 +422,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -433,6 +435,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -445,6 +448,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -457,6 +461,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -469,6 +474,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -481,6 +487,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -493,6 +500,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -505,6 +513,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -517,6 +526,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -529,6 +539,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -541,6 +552,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -553,6 +565,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -565,6 +578,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -577,6 +591,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -589,6 +604,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -601,6 +617,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -613,6 +630,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -625,6 +643,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -637,6 +656,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -649,6 +669,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -661,6 +682,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -673,6 +695,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -685,6 +708,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -697,6 +721,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -709,6 +734,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -721,6 +747,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -733,6 +760,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/other.json b/tools/perf/pmu-events/arch/x86/haswellx/other.json index 4e1b6ce96ca3..800e65df31bc 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/other.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -30,6 +20,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json index c3a163d34bd7..8a18bfe9e3e4 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json @@ -3,32 +3,42 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "Errata": "HSD140, HSD143", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state.", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -63,7 +73,7 @@ { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", "EventName": "INT_MISC.RECOVERY_CYCLES", "CounterMask": "1", @@ -71,6 +81,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x0E", "UMask": "0x1", @@ -81,6 +103,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -112,34 +157,31 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "EventCode": "0x14", + "UMask": "0x2", + "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", + "EventName": "ARITH.DIVIDER_UOPS", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", - "AnyThread": "1", - "CounterMask": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x14", - "UMask": "0x2", - "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "0,1,2,3", - "EventName": "ARITH.DIVIDER_UOPS", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -153,6 +195,38 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -162,6 +236,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -232,6 +315,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -406,6 +501,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -446,135 +550,281 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are executed in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are executed in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "Errata": "HSD135", - "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles with pending L2 cache miss loads.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are executed in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "Errata": "HSD78", - "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are executed in port 5.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 5.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are executed in port 6.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 6.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x1", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "Errata": "HSD135", + "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", "UMask": "0x8", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ROB", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x1", + "BriefDescription": "Cycles with pending L2 cache miss loads.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "CounterMask": "1", + "Errata": "HSD78", + "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA3", @@ -590,7 +840,7 @@ { "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "CounterMask": "4", @@ -620,6 +870,17 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, { "EventCode": "0xA3", "UMask": "0xc", @@ -642,13 +903,22 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "Errata": "HSD30, HSM31", - "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -665,68 +935,169 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "HSD11, HSD140", - "PublicDescription": "Number of instructions at retirement.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "CounterMask": "1", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "CounterMask": "2", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", + "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "HSD140", - "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", - "SampleAfterValue": "2000003", - "CounterHTOff": "1" - }, - { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "CounterMask": "3", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC2", + "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "CounterMask": "4", + "Errata": "HSD144, HSD30, HSM31", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC2", + "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.", + "EventName": "UOPS_EXECUTED.CORE", + "Errata": "HSD30, HSM31", + "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "CounterMask": "1", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "CounterMask": "2", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "HSD11, HSD140", + "PublicDescription": "Number of instructions at retirement.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x1", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "HSD140", + "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "Counter": "0,1,2,3", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "Counter": "0,1,2,3", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC2", + "UMask": "0x1", + "BriefDescription": "Actually retired uops.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -764,6 +1135,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC2", + "UMask": "0x2", + "BriefDescription": "Retirement slots used.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "UMask": "0x1", @@ -773,6 +1154,17 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "Counter": "0,1,2,3", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "UMask": "0x4", @@ -792,6 +1184,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Branch instructions at retirement.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC4", "UMask": "0x1", @@ -799,7 +1201,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -815,13 +1216,23 @@ }, { "EventCode": "0xC4", - "UMask": "0x0", + "UMask": "0x2", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x4", "BriefDescription": "All (macro) branch instructions retired.", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Branch instructions at retirement.", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xC4", @@ -830,7 +1241,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near return instructions retired.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -851,7 +1261,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near taken branches retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -866,14 +1275,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired.", - "PEBS": "2", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Mispredicted branch instructions at retirement.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC5", @@ -885,20 +1294,10 @@ "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Mispredicted branch instructions at retirement.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0xC5", "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", @@ -907,423 +1306,33 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", + "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", - "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "Count cases of saving new LBR records by hardware.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Count cases of saving new LBR records by hardware.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "CounterMask": "1", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "CounterMask": "2", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "CounterMask": "3", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "CounterMask": "4", - "Errata": "HSD144, HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", "EventName": "BACLEARS.ANY", "PublicDescription": "Number of front end re-steers due to BPU misprediction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x00", - "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", - "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json index 9c00f8ef6a07..168df552b1a8 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json @@ -38,6 +38,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -68,6 +78,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Number of cache load STLB hits. No page walk.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x80", @@ -117,6 +137,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -147,6 +177,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x80", @@ -205,6 +245,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks in ITLB of any page size.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -235,6 +285,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "PublicDescription": "ITLB misses that hit STLB. No page walk.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xae", "UMask": "0x1", @@ -257,39 +317,43 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", - "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x41", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "Errata": "HSD25", + "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x81", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "Errata": "HSD25", + "PublicDescription": "Number of DTLB page walker loads from memory.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", - "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -305,41 +369,41 @@ }, { "EventCode": "0xBC", - "UMask": "0x42", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "Errata": "HSD25", + "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x82", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "UMask": "0x28", + "BriefDescription": "Number of ITLB page walker hits in Memory", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", + "Errata": "HSD25", + "PublicDescription": "Number of ITLB page walker loads from memory.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", + "UMask": "0x41", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", - "Errata": "HSD25", - "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", + "UMask": "0x42", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", - "Errata": "HSD25", - "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -354,41 +418,37 @@ }, { "EventCode": "0xBC", - "UMask": "0x84", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "UMask": "0x48", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory", + "UMask": "0x81", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", - "Errata": "HSD25", - "PublicDescription": "Number of DTLB page walker loads from memory.", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x28", - "BriefDescription": "Number of ITLB page walker hits in Memory", + "UMask": "0x82", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", - "Errata": "HSD25", - "PublicDescription": "Number of ITLB page walker loads from memory.", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x48", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", + "UMask": "0x84", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -420,65 +480,5 @@ "PublicDescription": "Count number of STLB flush attempts.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "PublicDescription": "Number of cache load STLB hits. No page walk.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks in ITLB of any page size.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "PublicDescription": "ITLB misses that hit STLB. No page walk.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From c955cd2b04200cae6a00284d6c6cda09c6eeaced Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:50:11 -0800 Subject: perf vendor events intel: Update IvyBridge events to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/ivybridge/cache.json | 567 +++----------- .../pmu-events/arch/x86/ivybridge/frontend.json | 122 +-- .../perf/pmu-events/arch/x86/ivybridge/memory.json | 92 +-- .../perf/pmu-events/arch/x86/ivybridge/other.json | 20 +- .../pmu-events/arch/x86/ivybridge/pipeline.json | 822 +++++++++++---------- .../arch/x86/ivybridge/virtual-memory.json | 60 +- 6 files changed, 634 insertions(+), 1049 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json index f1ee6d4853c5..3c0a28e27d73 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json @@ -9,6 +9,16 @@ "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "RFO requests that hit L2 cache.", "EventCode": "0x24", @@ -29,6 +39,16 @@ "BriefDescription": "RFO requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 store RFO requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xc", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of instruction fetches that hit the L2 cache.", "EventCode": "0x24", @@ -49,6 +69,16 @@ "BriefDescription": "L2 cache misses when fetching instructions", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 code requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -69,36 +99,6 @@ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 store RFO requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -218,6 +218,29 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of lines brought into the L1 data cache.", "EventCode": "0x51", @@ -239,76 +262,87 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -379,7 +413,7 @@ "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -389,7 +423,7 @@ "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -399,7 +433,7 @@ "UMask": "0x21", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -409,7 +443,7 @@ "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -419,7 +453,7 @@ "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -429,7 +463,7 @@ "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -439,67 +473,61 @@ "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L1 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L1 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L2 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L2 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source followed an L1 miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources following L1 data-cache miss", + "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops that missed L2, excluding unknown sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source is LLC miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -510,61 +538,56 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. ", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache LLC hit and cross-core snoop missed.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package LLC hit and cross-core snoop hits.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache with HitM responses.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)", + "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", @@ -751,373 +774,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).", - "EventCode": "0xD3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10008", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all writebacks from the core to the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x18000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10400", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10800", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts non-temporal stores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand rfo's ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x000105B3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch prefetch RFOs ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x000107F7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json index de72b84b3536..efaa949ead31 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json @@ -20,76 +20,45 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -137,6 +106,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -159,6 +138,39 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -206,7 +218,7 @@ "UMask": "0x1", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled ", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "CounterHTOff": "0,1,2,3" }, { @@ -289,17 +301,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json index e1c6a1d4a4d5..6005b364c580 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json @@ -37,18 +37,6 @@ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PEBS": "2", - "EventCode": "0xCD", - "Counter": "3", - "UMask": "0x2", - "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", - "PRECISE_STORE": "1", - "TakenAlone": "1", - "CounterHTOff": "3" - }, { "PEBS": "2", "PublicDescription": "Loads with latency value being above 4.", @@ -162,75 +150,15 @@ "CounterHTOff": "3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3004003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x6004001b3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts LLC replacements", - "CounterHTOff": "0,1,2,3" + "PEBS": "2", + "EventCode": "0xCD", + "Counter": "3", + "UMask": "0x2", + "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", + "PRECISE_STORE": "1", + "TakenAlone": "1", + "CounterHTOff": "3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/other.json b/tools/perf/pmu-events/arch/x86/ivybridge/other.json index 9c2dd0511a32..4eb83ee40412 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/other.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json index 2145c28193f7..0afbfd95ea30 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json @@ -1,30 +1,41 @@ [ { "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.", @@ -77,6 +88,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.", "EventCode": "0x0E", @@ -174,6 +196,17 @@ "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.", "EventCode": "0x3C", @@ -184,6 +217,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "Counter": "0,1,2,3", @@ -193,6 +256,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4C", @@ -216,37 +288,37 @@ { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", + "UMask": "0x1", + "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", + "UMask": "0x2", + "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", + "UMask": "0x4", + "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", + "UMask": "0x8", + "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -259,6 +331,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "Counter": "0,1,2,3", @@ -498,118 +582,118 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 1.", + "PublicDescription": "Cycles per core when uops are dispatched to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 1", + "BriefDescription": "Cycles per core when uops are dispatched to port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 5.", + "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 0.", + "PublicDescription": "Cycles which a Uop is dispatched on port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", + "UMask": "0xc", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 0", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", + "UMask": "0xc", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 1", + "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", + "UMask": "0x30", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 5.", + "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", + "UMask": "0x30", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 2.", + "PublicDescription": "Cycles which a Uop is dispatched on port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", + "BriefDescription": "Cycles per thread when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles which a Uop is dispatched on port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", + "BriefDescription": "Cycles per thread when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", + "UMask": "0x80", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -662,15 +746,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.", @@ -683,6 +766,16 @@ "CounterMask": "2", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Total execution stalls.", "EventCode": "0xA3", @@ -690,7 +783,17 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -705,6 +808,16 @@ "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", + "CounterMask": "5", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA3", "Counter": "0,1,2,3", @@ -716,16 +829,57 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "SampleAfterValue": "2000003", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "SampleAfterValue": "2000003", "BriefDescription": "Execution stalls due to L1 data cache misses", "CounterMask": "12", "CounterHTOff": "2" }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" + }, { "EventCode": "0xA8", "Counter": "0,1,2,3", @@ -746,6 +900,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.", "EventCode": "0xB1", @@ -756,6 +921,61 @@ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", "EventCode": "0xB1", @@ -767,15 +987,59 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "EventCode": "0xB1", - "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Number of instructions at retirement.", @@ -809,24 +1073,12 @@ }, { "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired, Use cmask=1 and invert to count active cycles or stalled cycles.", "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Counts the number of retirement slots used each cycle.", - "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used. ", + "BriefDescription": "Retired uops.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -863,6 +1115,27 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of self-modifying-code machine clears detected.", "EventCode": "0xC3", @@ -880,50 +1153,67 @@ "UMask": "0x20", "EventName": "MACHINE_CLEARS.MASKMOV", "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0. ", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "BR_INST_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired. ", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Direct and indirect near call instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired. ", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired. ", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -933,18 +1223,17 @@ "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired. ", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired. ", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -954,28 +1243,7 @@ "UMask": "0x40", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "Mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired. ", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -990,13 +1258,12 @@ }, { "PEBS": "1", - "PublicDescription": "Mispredicted taken branch instructions retired.", "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1009,6 +1276,16 @@ "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Count cases of saving new LBR records by hardware.", "EventCode": "0xCC", @@ -1028,280 +1305,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "Fixed counter 2" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "EventCode": "0xB1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json index f036f5398906..f243551b4d12 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json @@ -1,4 +1,34 @@ [ + { + "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x81", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x82", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x84", + "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", + "SampleAfterValue": "2000003", + "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -146,35 +176,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", - "SampleAfterValue": "2000003", - "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From 194b6fa41a06273166c5016980a67121bd1460c6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:51:14 -0800 Subject: perf vendor events intel: Update IvyTown events to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/ivytown/cache.json | 692 +++-------------- .../perf/pmu-events/arch/x86/ivytown/frontend.json | 122 +-- tools/perf/pmu-events/arch/x86/ivytown/memory.json | 368 +-------- tools/perf/pmu-events/arch/x86/ivytown/other.json | 20 +- .../perf/pmu-events/arch/x86/ivytown/pipeline.json | 822 +++++++++++---------- .../arch/x86/ivytown/virtual-memory.json | 60 +- 6 files changed, 635 insertions(+), 1449 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/ivytown/cache.json b/tools/perf/pmu-events/arch/x86/ivytown/cache.json index ff27a620edd8..d8cc93b3a04c 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/cache.json @@ -9,6 +9,16 @@ "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "RFO requests that hit L2 cache.", "EventCode": "0x24", @@ -29,6 +39,16 @@ "BriefDescription": "RFO requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 store RFO requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xc", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of instruction fetches that hit the L2 cache.", "EventCode": "0x24", @@ -49,6 +69,16 @@ "BriefDescription": "L2 cache misses when fetching instructions", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 code requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -69,36 +99,6 @@ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 store RFO requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -218,6 +218,29 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of lines brought into the L1 data cache.", "EventCode": "0x51", @@ -239,76 +262,87 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -379,7 +413,7 @@ "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -389,7 +423,7 @@ "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -399,7 +433,7 @@ "UMask": "0x21", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -409,7 +443,7 @@ "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -419,7 +453,7 @@ "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -429,7 +463,7 @@ "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -439,67 +473,61 @@ "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L1 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L1 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L2 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L2 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source followed an L1 miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources following L1 data-cache miss", + "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops that missed L2, excluding unknown sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source is LLC miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -510,67 +538,61 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. ", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache LLC hit and cross-core snoop missed.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package LLC hit and cross-core snoop hits.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache with HitM responses.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)", "EventCode": "0xD3", "Counter": "0,1,2,3", - "UMask": "0x1", + "UMask": "0x3", "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.", + "BriefDescription": "Retired load uops whose data source was local DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).", "CounterHTOff": "0,1,2,3" }, { @@ -778,495 +800,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10008", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all writebacks from the core to the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x803c8000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x23ffc08000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0040", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0200", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10400", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10800", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts non-temporal stores", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/frontend.json b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json index de72b84b3536..efaa949ead31 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/frontend.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json @@ -20,76 +20,45 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -137,6 +106,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -159,6 +138,39 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -206,7 +218,7 @@ "UMask": "0x1", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled ", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "CounterHTOff": "0,1,2,3" }, { @@ -289,17 +301,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/memory.json b/tools/perf/pmu-events/arch/x86/ivytown/memory.json index 437d98f3e344..4ec94df8d70b 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/memory.json @@ -28,18 +28,6 @@ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PEBS": "2", - "EventCode": "0xCD", - "Counter": "3", - "UMask": "0x2", - "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", - "PRECISE_STORE": "1", - "TakenAlone": "1", - "CounterHTOff": "3" - }, { "PEBS": "2", "PublicDescription": "Loads with latency value being above 4.", @@ -153,351 +141,15 @@ "CounterHTOff": "3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f800244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc203f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x6004003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f8203f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67fc00001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc20002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20040", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67fc00010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20200", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" + "PEBS": "2", + "EventCode": "0xCD", + "Counter": "3", + "UMask": "0x2", + "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", + "PRECISE_STORE": "1", + "TakenAlone": "1", + "CounterHTOff": "3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/other.json b/tools/perf/pmu-events/arch/x86/ivytown/other.json index 9c2dd0511a32..4eb83ee40412 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/other.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json index 2145c28193f7..0afbfd95ea30 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json @@ -1,30 +1,41 @@ [ { "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.", @@ -77,6 +88,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.", "EventCode": "0x0E", @@ -174,6 +196,17 @@ "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.", "EventCode": "0x3C", @@ -184,6 +217,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "Counter": "0,1,2,3", @@ -193,6 +256,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4C", @@ -216,37 +288,37 @@ { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", + "UMask": "0x1", + "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", + "UMask": "0x2", + "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", + "UMask": "0x4", + "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", + "UMask": "0x8", + "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -259,6 +331,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "Counter": "0,1,2,3", @@ -498,118 +582,118 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 1.", + "PublicDescription": "Cycles per core when uops are dispatched to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 1", + "BriefDescription": "Cycles per core when uops are dispatched to port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 5.", + "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 0.", + "PublicDescription": "Cycles which a Uop is dispatched on port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", + "UMask": "0xc", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 0", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", + "UMask": "0xc", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 1", + "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", + "UMask": "0x30", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 5.", + "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", + "UMask": "0x30", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 2.", + "PublicDescription": "Cycles which a Uop is dispatched on port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", + "BriefDescription": "Cycles per thread when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles which a Uop is dispatched on port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", + "BriefDescription": "Cycles per thread when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", + "UMask": "0x80", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -662,15 +746,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.", @@ -683,6 +766,16 @@ "CounterMask": "2", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Total execution stalls.", "EventCode": "0xA3", @@ -690,7 +783,17 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -705,6 +808,16 @@ "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", + "CounterMask": "5", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA3", "Counter": "0,1,2,3", @@ -716,16 +829,57 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "SampleAfterValue": "2000003", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "SampleAfterValue": "2000003", "BriefDescription": "Execution stalls due to L1 data cache misses", "CounterMask": "12", "CounterHTOff": "2" }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" + }, { "EventCode": "0xA8", "Counter": "0,1,2,3", @@ -746,6 +900,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.", "EventCode": "0xB1", @@ -756,6 +921,61 @@ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", "EventCode": "0xB1", @@ -767,15 +987,59 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "EventCode": "0xB1", - "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Number of instructions at retirement.", @@ -809,24 +1073,12 @@ }, { "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired, Use cmask=1 and invert to count active cycles or stalled cycles.", "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Counts the number of retirement slots used each cycle.", - "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used. ", + "BriefDescription": "Retired uops.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -863,6 +1115,27 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of self-modifying-code machine clears detected.", "EventCode": "0xC3", @@ -880,50 +1153,67 @@ "UMask": "0x20", "EventName": "MACHINE_CLEARS.MASKMOV", "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0. ", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "BR_INST_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired. ", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Direct and indirect near call instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired. ", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired. ", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -933,18 +1223,17 @@ "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired. ", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired. ", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -954,28 +1243,7 @@ "UMask": "0x40", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "Mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired. ", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -990,13 +1258,12 @@ }, { "PEBS": "1", - "PublicDescription": "Mispredicted taken branch instructions retired.", "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1009,6 +1276,16 @@ "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Count cases of saving new LBR records by hardware.", "EventCode": "0xCC", @@ -1028,280 +1305,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "Fixed counter 2" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "EventCode": "0xB1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json index c8de548b78fa..4645e9d3f460 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json @@ -1,4 +1,14 @@ [ + { + "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x81", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -8,6 +18,16 @@ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x82", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -17,6 +37,16 @@ "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x84", + "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", + "SampleAfterValue": "2000003", + "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -164,35 +194,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", - "SampleAfterValue": "2000003", - "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From ffaa6f274201cb40721f85a143f6bbd6fc8ef601 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:52:48 -0800 Subject: perf vendor events intel: Update Silvermont events to V14 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/silvermont/cache.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/silvermont/cache.json b/tools/perf/pmu-events/arch/x86/silvermont/cache.json index 0bd1bc5302de..82be7d1b8b81 100644 --- a/tools/perf/pmu-events/arch/x86/silvermont/cache.json +++ b/tools/perf/pmu-events/arch/x86/silvermont/cache.json @@ -36,12 +36,13 @@ "BriefDescription": "L2 cache request misses" }, { + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events. \r\n", "EventCode": "0x86", "Counter": "0,1", "UMask": "0x4", "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of cycles the NIP stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses." + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss." }, { "PEBS": "1", -- cgit v1.2.3 From c93240a72418ccd440b0c472f26ad00c8746d518 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:53:50 -0800 Subject: perf vendor events intel: Update Skylake events to V36 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylake/cache.json | 4390 +++----------------- .../arch/x86/skylake/floating-point.json | 5 +- .../perf/pmu-events/arch/x86/skylake/frontend.json | 232 +- tools/perf/pmu-events/arch/x86/skylake/memory.json | 2118 +--------- tools/perf/pmu-events/arch/x86/skylake/other.json | 40 +- .../perf/pmu-events/arch/x86/skylake/pipeline.json | 973 ++--- .../arch/x86/skylake/virtual-memory.json | 262 +- 7 files changed, 1517 insertions(+), 6503 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json index 0551a9ba865d..54bfe9e4045c 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json @@ -1,4206 +1,852 @@ [ { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x11", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that miss the STLB.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that miss the STLB.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", "UMask": "0x21", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions with locked access.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired load instructions.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_INST_RETIRED.ALL_STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired store instructions.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L1 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L1 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "EventCode": "0xD2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read miss L2, no rejects", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "EventCode": "0xD2", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", - "EventCode": "0xD2", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD4", + "PublicDescription": "Demand requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x27", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "EventCode": "0x51", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D.REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data line replacements", + "UMask": "0x38", + "EventName": "L2_RQSTS.PF_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand\n from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "EventCode": "0x48", + "PublicDescription": "All requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D miss outstandings duration in cycles", + "UMask": "0x3f", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x48", + "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "UMask": "0x41", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests sent to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Cacheable and noncachaeble code read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", - "SampleAfterValue": "100003", - "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand and prefetch data reads", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, and so on.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "SampleAfterValue": "100003", - "BriefDescription": "Any memory transaction that reached the SQ.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.", - "EventCode": "0xB2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts L2 writebacks that access L2 cache.", - "EventCode": "0xF0", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "L2_TRANS.L2_WB", - "SampleAfterValue": "200003", - "BriefDescription": "L2 writebacks that access L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.", - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x41", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests missed L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.", - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x4f", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of cache line split locks sent to the uncore.", - "EventCode": "0xF4", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "SQ_MISC.SPLIT_LOCK", - "SampleAfterValue": "100003", - "BriefDescription": "Number of cache line split locks sent to uncore.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read miss L2, no rejects", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe1", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe2", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe4", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xf8", - "EventName": "L2_RQSTS.ALL_PF", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x38", - "EventName": "L2_RQSTS.PF_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xd8", - "EventName": "L2_RQSTS.PF_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "RFO requests that hit L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "RFO requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "L2 cache misses when fetching instructions.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All L2 requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L2_LINES_OUT.SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L2_LINES_OUT.NON_SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_PREF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", - "EventCode": "0xF1", - "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "L2_LINES_IN.ALL", - "SampleAfterValue": "100003", - "BriefDescription": "L2 cache lines filling L2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_HWPF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000018000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0002 ", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0002 ", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0002 ", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xd8", + "EventName": "L2_RQSTS.PF_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0002 ", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe1", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0002 ", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe2", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0002 ", + "PublicDescription": "Counts the total number of L2 code requests.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe4", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100002 ", + "PublicDescription": "Demand requests to L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe7", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100002 ", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xf8", + "EventName": "L2_RQSTS.ALL_PF", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100002 ", + "PublicDescription": "All L2 requests.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xff", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100002 ", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "EventCode": "0x2E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x41", + "Errata": "SKL057", + "EventName": "LONGEST_LAT_CACHE.MISS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Core-originated cacheable demand requests missed L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100002 ", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "EventCode": "0x2E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x4f", + "Errata": "SKL057", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100002 ", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D_PEND_MISS.PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D miss outstandings duration in cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100002 ", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080002 ", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080002 ", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "EventCode": "0x48", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080002 ", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "EventCode": "0x51", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D.REPLACEMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D data line replacements", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080002 ", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080002 ", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080002 ", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080002 ", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040002 ", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040002 ", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040002 ", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040002 ", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040002 ", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040002 ", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "EventCode": "0xB0", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand Data Read requests sent to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040002 ", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cacheable and noncachaeble code read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020002 ", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020002 ", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand and prefetch data reads", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020002 ", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x80", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Any memory transaction that reached the SQ.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020002 ", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "EventCode": "0xB2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020002 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020002 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions that miss the STLB.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x11", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020002 ", + "PEBS": "1", + "PublicDescription": "Retired store instructions that miss the STLB.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x12", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010002 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x21", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions with locked access. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x41", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x42", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x81", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired load instructions. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400001 ", + "PEBS": "1", + "PublicDescription": "All retired store instructions.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x82", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired store instructions. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", + "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x2", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "SampleAfterValue": "50021", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "SampleAfterValue": "50021", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L3 cache as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x20", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x40", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0001 ", + "PEBS": "1", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x8", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100001 ", + "PEBS": "1", + "EventCode": "0xD4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "SampleAfterValue": "100007", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100001 ", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "EventCode": "0xF0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x40", + "EventName": "L2_TRANS.L2_WB", + "SampleAfterValue": "200003", + "BriefDescription": "L2 writebacks that access L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100001 ", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "EventCode": "0xF1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x1f", + "EventName": "L2_LINES_IN.ALL", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "L2 cache lines filling L2", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L2_LINES_OUT.SILENT", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100001 ", + "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080001 ", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", + "EventCode": "0xF4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x10", + "EventName": "SQ_MISC.SPLIT_LOCK", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Number of cache line split locks sent to uncore.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080001 ", + "MSRValue": "0x3fc0400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080001 ", + "MSRValue": "0x1000400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080001 ", + "MSRValue": "0x0400400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NOT_NEEDED", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080001 ", + "MSRValue": "0x0200400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080001 ", + "MSRValue": "0x0100400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SPL_HIT", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040001 ", + "MSRValue": "0x0080400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040001 ", + "MSRValue": "0x3fc01c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_HITM", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040001 ", + "MSRValue": "0x10001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040001 ", + "MSRValue": "0x04001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040001 ", + "MSRValue": "0x02001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040001 ", + "MSRValue": "0x01001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040001 ", + "MSRValue": "0x00801c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SPL_HIT", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fc0020001 ", "Counter": "0,1,2,3", @@ -4213,6 +859,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020001 ", "Counter": "0,1,2,3", @@ -4225,6 +872,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020001 ", "Counter": "0,1,2,3", @@ -4237,6 +885,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020001 ", "Counter": "0,1,2,3", @@ -4249,6 +898,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020001 ", "Counter": "0,1,2,3", @@ -4261,6 +911,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020001 ", "Counter": "0,1,2,3", @@ -4273,18 +924,7 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010001 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json index 3c6b59af5d54..213dd6230cf2 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json @@ -27,13 +27,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -55,7 +54,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x1e", diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index e697dbd63e6e..578dff5bd823 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -1,62 +1,81 @@ [ { - "EventCode": "0x80", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "ICACHE_16B.IFDATA_STALL", + "EventName": "IDQ.MITE_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ICACHE_64B.IFTAG_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_UOPS", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x20", @@ -66,95 +85,99 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Cycles MITE is delivering 4 Uops", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles MITE is delivering any Uop", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_CYCLES", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", - "CounterMask": "4", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "EventCode": "0x80", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "UMask": "0x4", + "EventName": "ICACHE_16B.IFDATA_STALL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CounterMask": "1", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "EventCode": "0x83", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering 4 Uops", - "CounterMask": "4", + "UMask": "0x1", + "EventName": "ICACHE_64B.IFTAG_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "EventCode": "0x83", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering any Uop", - "CounterMask": "1", + "UMask": "0x2", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x83", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread\n\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions)\n \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -164,7 +187,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -175,7 +198,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -186,6 +209,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -196,6 +220,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -217,7 +242,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", @@ -228,6 +253,7 @@ }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", "EventCode": "0xC6", "MSRValue": "0x11", "Counter": "0,1,2,3", @@ -235,7 +261,7 @@ "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -248,7 +274,7 @@ "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -261,12 +287,13 @@ "EventName": "FRONTEND_RETIRED.L2_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", "EventCode": "0xC6", "MSRValue": "0x14", "Counter": "0,1,2,3", @@ -274,12 +301,13 @@ "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "EventCode": "0xC6", "MSRValue": "0x15", "Counter": "0,1,2,3", @@ -287,7 +315,7 @@ "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -300,7 +328,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -313,7 +341,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -326,34 +354,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", "EventCode": "0xC6", "MSRValue": "0x400806", "Counter": "0,1,2,3", @@ -367,6 +374,7 @@ }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", "EventCode": "0xC6", "MSRValue": "0x401006", "Counter": "0,1,2,3", @@ -374,12 +382,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", "EventCode": "0xC6", "MSRValue": "0x402006", "Counter": "0,1,2,3", @@ -387,7 +396,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -400,7 +409,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -413,7 +422,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -426,7 +435,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -439,12 +448,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", "EventCode": "0xC6", "MSRValue": "0x100206", "Counter": "0,1,2,3", @@ -452,7 +462,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -465,7 +475,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json index d7fd5b06825b..3bd8b712c889 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json @@ -1,6 +1,74 @@ [ { - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", + "PublicDescription": "Number of times a TSX line had a cache conflict.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "TX_MEM.ABORT_CONFLICT", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x1", @@ -10,7 +78,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x2", @@ -50,7 +118,77 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "EventCode": "0xB0", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "100003", + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "SKL089", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x1", @@ -71,7 +209,7 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x4", @@ -99,13 +237,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.). ", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -128,7 +265,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", "EventCode": "0xC9", "Counter": "0,1,2,3", "UMask": "0x1", @@ -149,7 +286,7 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered.", + "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", "EventCode": "0xC9", "Counter": "0,1,2,3", "UMask": "0x4", @@ -207,17 +344,6 @@ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.", - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL089", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "2", "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", @@ -331,1816 +457,87 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3ffc000001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "TX_MEM.ABORT_CONFLICT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x103c000001 ", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x043c000001 ", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x023c000001 ", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x013c000001 ", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times we could not allocate Lock Buffer.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00bc000001 ", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Demand Data Read requests who miss L3 cache.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests who miss L3 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000001 ", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fc4000001 ", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", @@ -2151,18 +548,7 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000001 ", "Counter": "0,1,2,3", @@ -2175,6 +561,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000001 ", "Counter": "0,1,2,3", @@ -2187,6 +574,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000001 ", "Counter": "0,1,2,3", @@ -2199,6 +587,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000001 ", "Counter": "0,1,2,3", @@ -2211,6 +600,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000001 ", "Counter": "0,1,2,3", @@ -2221,89 +611,5 @@ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json index cfdc323acc82..84a316d380ac 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/other.json +++ b/tools/perf/pmu-events/arch/x86/skylake/other.json @@ -1,11 +1,47 @@ [ { - "PublicDescription": "This event counts the number of hardware interruptions received by the processor.", + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "SW_PREFETCH_ACCESS.T0", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHW instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", "EventCode": "0xCB", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "HW_INTERRUPTS.RECEIVED", - "SampleAfterValue": "100003", + "SampleAfterValue": "203", "BriefDescription": "Number of hardware interrupts received by the processor.", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index 0f7adb809be3..bc6d2afbcd8a 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -1,80 +1,92 @@ [ { - "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { - "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { - "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", + "UMask": "0x2", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "100003", + "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xE6", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ANY", + "UMask": "0x8", + "EventName": "LD_BLOCKS.NO_SR", "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "EventCode": "0x07", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LSD.UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "SampleAfterValue": "100003", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", - "EventCode": "0x87", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "ILD_STALL.LCP", + "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -87,33 +99,35 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "EventCode": "0x0E", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "EventName": "UOPS_ISSUED.ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "EventCode": "0x0E", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "EventCode": "0x0E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.ANY", + "UMask": "0x2", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", "SampleAfterValue": "2000003", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -126,361 +140,318 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", + "EventCode": "0x14", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "EventName": "ARITH.DIVIDER_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", - "EventCode": "0x5E", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RS_EVENTS.EMPTY_CYCLES", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5E", - "Invert": "1", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", - "EventCode": "0xCC", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Increments whenever there is an update to the LBR array.", + "UMask": "0x0", + "EdgeDetect": "1", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "SampleAfterValue": "100007", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of machine clears (nukes) of any type.", - "EventCode": "0xC3", + "EventCode": "0x3C", "Counter": "0,1,2,3", "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type. ", - "CounterMask": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.ANY_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", - "EventCode": "0xC0", - "Counter": "1", + "EventCode": "0x3C", + "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.PREC_DIST", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", - "EventCode": "0xC2", + "EventCode": "0x3C", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "EventCode": "0x4C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LOAD_HIT_PRE.SW_PF", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "EventCode": "0x5E", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "RS_EVENTS.EMPTY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", - "EventCode": "0xC2", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "EventCode": "0x5E", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "EventCode": "0x87", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "EventName": "ILD_STALL.LCP", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x2", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x10", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x20", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x40", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired. ", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "UMask": "0x1", + "EventName": "RESOURCE_STALLS.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource-related stall cycles", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. ", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "UMask": "0x8", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED.X87", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of x87 uops dispatched.", + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CounterMask": "1", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "UMask": "0x10", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "16", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "UMask": "0x14", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "20", + "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x1", @@ -490,6 +461,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x2", @@ -499,6 +471,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x4", @@ -508,6 +481,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x8", @@ -517,6 +491,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x10", @@ -535,212 +510,196 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Number of Uops delivered by the LSD.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.THREAD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Number of uops executed from any thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.", - "EventCode": "0x4C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LOAD_HIT_PRE.SW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap)\n\n - store forwarding is impossible due to u-arch limitations\n\n - preceding lock RMW operations are not forwarded\n\n - store has the no-forward bit set (uncacheable/page-split/masked stores)\n\n - all-blocking stores are used (mostly, fences and port I/O)\n\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.", - "EventCode": "0x03", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "SampleAfterValue": "100003", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "EventCode": "0x03", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.NO_SR", - "SampleAfterValue": "100003", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", - "EventCode": "0x07", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", - "SampleAfterValue": "100003", - "BriefDescription": "False dependencies in MOB due to partial compare on address.", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Counts the number of x87 uops executed.", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.X87", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "16", + "BriefDescription": "Counts the number of x87 uops dispatched.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x14", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "UMask": "0x0", + "Errata": "SKL091, SKL044", + "EventName": "INST_RETIRED.ANY_P", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "20", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "SKL091, SKL044", + "EventName": "INST_RETIRED.PREC_DIST", "SampleAfterValue": "2000003", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" }, { "PEBS": "2", @@ -757,183 +716,235 @@ "CounterHTOff": "0,2,3" }, { - "EventCode": "0x14", + "EventCode": "0xC1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ARITH.DIVIDER_ACTIVE", + "UMask": "0x3f", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the retirement slots used.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", - "CounterMask": "1", + "BriefDescription": "Retirement slots used.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "OTHER_ASSISTS.ANY", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", "SampleAfterValue": "100003", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register.\r\nFor more information, refer to ?Mixing Intel AVX and Intel SSE Code? section of the Optimization Guide.", - "EventCode": "0x0E", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "UMask": "0x4", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "100003", + "BriefDescription": "Self-modifying code (SMC) detected.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "UMask": "0x2", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", + "UMask": "0x8", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "SampleAfterValue": "100007", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", + "UMask": "0x10", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", + "UMask": "0x20", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "Invert": "1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterMask": "1", + "UMask": "0x40", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "EventCode": "0x3C", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x0", - "EdgeDetect": "1", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "SampleAfterValue": "100007", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "CounterMask": "1", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "UMask": "0x2", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "UMask": "0x4", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted macro branch instructions retired.", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "EventCode": "0xCC", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Increments whenever there is an update to the LBR array.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json index 02f32cbf6789..2bcba7daca14 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json @@ -1,83 +1,6 @@ [ { - "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", - "EventCode": "0xAE", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB.ITLB_FLUSH", - "SampleAfterValue": "100007", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x4F", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EPT.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Misses at all ITLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_PENDING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x1", @@ -87,45 +10,68 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "2000003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "2000003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "2000003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "SampleAfterValue": "100003", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x20", @@ -135,7 +81,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x1", @@ -145,45 +91,68 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "100003", - "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "SampleAfterValue": "100003", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x20", @@ -193,73 +162,77 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.", - "EventCode": "0xBD", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "EventCode": "0x4F", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TLB_FLUSH.DTLB_THREAD", - "SampleAfterValue": "100007", - "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "UMask": "0x10", + "EventName": "EPT.WALK_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", - "EventCode": "0xBD", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TLB_FLUSH.STLB_ANY", - "SampleAfterValue": "100007", - "BriefDescription": "STLB flush attempts", + "UMask": "0x1", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Misses at all ITLB levels that cause page walks", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", + "UMask": "0x2", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "UMask": "0x4", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "100003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "UMask": "0x8", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "UMask": "0xe", + "EventName": "ITLB_MISSES.WALK_COMPLETED", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", - "CounterMask": "1", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "EventName": "ITLB_MISSES.WALK_PENDING", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", - "CounterMask": "1", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x10", @@ -268,5 +241,44 @@ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "EventCode": "0xAE", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "ITLB.ITLB_FLUSH", + "SampleAfterValue": "100007", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "EventCode": "0xBD", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "SampleAfterValue": "100007", + "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "EventCode": "0xBD", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "TLB_FLUSH.STLB_ANY", + "SampleAfterValue": "100007", + "BriefDescription": "STLB flush attempts", + "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From 1716021e2e88a284cc737db40524734dffea2de5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:54:25 -0800 Subject: perf vendor events intel: Update SkylakeX events to V1.06 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylakex/cache.json | 257 +++++++++++++-------- .../arch/x86/skylakex/floating-point.json | 3 +- .../pmu-events/arch/x86/skylakex/frontend.json | 48 ++-- .../perf/pmu-events/arch/x86/skylakex/memory.json | 231 ++++++++++++------ tools/perf/pmu-events/arch/x86/skylakex/other.json | 94 +++++++- .../pmu-events/arch/x86/skylakex/pipeline.json | 44 ++-- .../arch/x86/skylakex/virtual-memory.json | 42 ++-- 7 files changed, 482 insertions(+), 237 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index b5bc742b6fbc..5c9940866acd 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -265,7 +265,7 @@ { "EventCode": "0x60", "UMask": "0x2", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle. ", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", @@ -398,22 +398,24 @@ { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load instructions that miss the STLB.", + "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PublicDescription": "Retired load instructions that miss the STLB.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store instructions that miss the STLB.", + "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "PublicDescription": "Retired store instructions that miss the STLB.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -421,7 +423,7 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load instructions with locked access.", + "BriefDescription": "Retired load instructions with locked access. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -432,24 +434,22 @@ { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -457,7 +457,7 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load instructions.", + "BriefDescription": "All retired load instructions. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -468,11 +468,12 @@ { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store instructions.", + "BriefDescription": "All retired store instructions. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PublicDescription": "All retired store instructions.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -485,7 +486,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -509,7 +510,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. ", + "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -545,7 +546,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. ", + "PublicDescription": "Retired load instructions missed L3 cache as data sources.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -557,7 +558,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. ", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -616,7 +617,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -639,7 +639,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", - "PublicDescription": "Retired load instructions whose data sources was remote HITM.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -648,9 +647,9 @@ "UMask": "0x8", "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", - "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -697,7 +696,7 @@ { "EventCode": "0xF2", "UMask": "0x2", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped ", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", "Counter": "0,1,2,3", "EventName": "L2_LINES_OUT.NON_SILENT", "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", @@ -742,7 +741,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -755,7 +754,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -768,7 +767,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -781,7 +780,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -794,7 +793,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -807,7 +806,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -820,7 +819,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -833,7 +832,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -846,7 +845,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -859,7 +858,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -872,7 +871,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -885,7 +884,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -898,7 +897,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -911,7 +910,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -924,7 +923,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -937,7 +936,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -950,7 +949,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -963,7 +962,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -976,7 +975,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -989,7 +988,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1002,7 +1001,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1015,7 +1014,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1028,7 +1027,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1041,7 +1040,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1054,7 +1053,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1067,7 +1066,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1080,7 +1079,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1093,7 +1092,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1106,7 +1105,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1119,7 +1118,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1132,7 +1131,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1145,7 +1144,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1158,7 +1157,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1171,7 +1170,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1184,7 +1183,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1197,7 +1196,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1210,7 +1209,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1223,7 +1222,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1236,7 +1235,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1249,7 +1248,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1262,7 +1261,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1275,7 +1274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1288,7 +1287,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1301,7 +1300,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1314,7 +1313,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1327,7 +1326,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1340,7 +1339,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1353,7 +1352,85 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that have any response type.", + "MSRValue": "0x0000018000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3.", + "MSRValue": "0x3f803c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1366,7 +1443,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1379,7 +1456,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1392,7 +1469,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1405,7 +1482,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1418,7 +1495,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1431,7 +1508,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1444,7 +1521,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1457,7 +1534,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1470,7 +1547,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1483,7 +1560,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1496,7 +1573,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1509,7 +1586,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1522,7 +1599,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1535,7 +1612,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1548,7 +1625,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1561,7 +1638,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1574,7 +1651,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1587,7 +1664,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1600,7 +1677,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1613,7 +1690,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1626,7 +1703,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1639,7 +1716,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1652,7 +1729,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1665,7 +1742,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index 1c09a328df36..286ed1a37ec9 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -29,10 +29,9 @@ { "EventCode": "0xC7", "UMask": "0x8", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", - "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 40abc0852cd6..403a4f89e9b2 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -182,7 +182,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -247,20 +247,20 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "PEBS": "1", "MSRValue": "0x11", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. ", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -268,7 +268,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x12", "Counter": "0,1,2,3", @@ -281,7 +281,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x13", "Counter": "0,1,2,3", @@ -294,7 +294,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x14", "Counter": "0,1,2,3", @@ -308,13 +308,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x15", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. ", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -322,7 +322,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x400206", "Counter": "0,1,2,3", @@ -335,7 +335,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x200206", "Counter": "0,1,2,3", @@ -348,7 +348,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x400406", "Counter": "0,1,2,3", @@ -367,7 +367,7 @@ "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -375,13 +375,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x401006", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -389,13 +389,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x402006", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -403,7 +403,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x404006", "Counter": "0,1,2,3", @@ -416,7 +416,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x408006", "Counter": "0,1,2,3", @@ -429,7 +429,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x410006", "Counter": "0,1,2,3", @@ -442,7 +442,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x420006", "Counter": "0,1,2,3", @@ -455,13 +455,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x100206", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -469,7 +469,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x300206", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index ca22a22c1abd..e7f1aa31226d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -214,7 +214,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -239,10 +239,9 @@ { "EventCode": "0xC8", "UMask": "0x20", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.). ", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -292,7 +291,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered.", + "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -466,7 +465,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss in the L3. ", + "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -479,7 +478,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -492,7 +491,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -505,7 +504,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -518,7 +517,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -531,7 +530,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -544,7 +543,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -557,7 +556,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -570,7 +569,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -583,7 +582,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -596,7 +595,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -609,7 +608,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -622,7 +621,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss in the L3. ", + "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -635,7 +634,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -648,7 +647,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -661,7 +660,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -674,7 +673,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -687,7 +686,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -700,7 +699,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -713,7 +712,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -726,7 +725,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -739,7 +738,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -752,7 +751,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -765,7 +764,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -778,7 +777,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -791,7 +790,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -804,7 +803,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -817,7 +816,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -830,7 +829,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -843,7 +842,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -856,7 +855,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -869,7 +868,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -882,7 +881,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -895,7 +894,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -908,7 +907,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -921,7 +920,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -934,7 +933,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -947,7 +946,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -960,7 +959,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -973,7 +972,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -986,7 +985,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -999,7 +998,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1012,7 +1011,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1025,7 +1024,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1038,7 +1037,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1051,7 +1050,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1064,7 +1063,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1077,7 +1076,85 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss in the L3.", + "MSRValue": "0x3fbc008000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b808000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604008000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1090,7 +1167,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss in the L3. ", + "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1103,7 +1180,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1116,7 +1193,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1129,7 +1206,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1142,7 +1219,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1155,7 +1232,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1168,7 +1245,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss in the L3. ", + "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1181,7 +1258,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1194,7 +1271,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1207,7 +1284,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1220,7 +1297,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1233,7 +1310,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1246,7 +1323,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1259,7 +1336,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1272,7 +1349,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1285,7 +1362,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1298,7 +1375,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1311,7 +1388,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1324,7 +1401,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1337,7 +1414,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1350,7 +1427,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1363,7 +1440,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1376,7 +1453,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1389,8 +1466,8 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json index 70243b0b0586..778a541463eb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json @@ -39,6 +39,42 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x32", + "UMask": "0x1", + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x2", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.T0", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x4", + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x8", + "BriefDescription": "Number of PREFETCHW instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xCB", "UMask": "0x1", @@ -49,6 +85,62 @@ "SampleAfterValue": "203", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xEF", + "UMask": "0x1", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x2", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x4", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x8", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x10", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x20", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x40", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xFE", "UMask": "0x2", @@ -69,4 +161,4 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 0895d1e52a4a..f99f7ae27820 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -3,41 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -126,7 +126,7 @@ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "Counter": "0,1,2,3", "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -762,11 +762,10 @@ "EdgeDetect": "1", "EventCode": "0xC3", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type. ", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", "EventName": "MACHINE_CLEARS.COUNT", "CounterMask": "1", - "PublicDescription": "Number of machine clears (nukes) of any type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -799,7 +798,7 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -811,14 +810,14 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_CALL", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. ", + "BriefDescription": "All (macro) branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", @@ -835,7 +834,7 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -858,19 +857,19 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", + "BriefDescription": "Counts the number of far branch instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -891,7 +890,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -902,14 +901,14 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC5", "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", @@ -920,10 +919,11 @@ { "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json index 70750dab7ead..7f466c97e485 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json @@ -12,30 +12,30 @@ { "EventCode": "0x08", "UMask": "0x2", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts demand data loads that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x4", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts demand data loads that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x8", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -52,17 +52,17 @@ { "EventCode": "0x08", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", "CounterMask": "1", @@ -93,30 +93,30 @@ { "EventCode": "0x49", "UMask": "0x2", - "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts demand data stores that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x4", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts demand data stores that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x8", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,17 +133,17 @@ { "EventCode": "0x49", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", "CounterMask": "1", @@ -197,7 +197,7 @@ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "Counter": "0,1,2,3", "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts completed page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -224,10 +224,10 @@ { "EventCode": "0x85", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "ITLB_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, -- cgit v1.2.3 From fae0a4df1cc6bb1772fa653a2c8eb422d82b824d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 10:10:16 -0800 Subject: perf vendor events intel: Update BroadwellDE events to V7 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwellde/cache.json | 389 ++++--- .../arch/x86/broadwellde/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwellde/frontend.json | 138 +-- .../pmu-events/arch/x86/broadwellde/memory.json | 9 +- .../pmu-events/arch/x86/broadwellde/other.json | 20 +- .../pmu-events/arch/x86/broadwellde/pipeline.json | 1214 ++++++++++---------- .../arch/x86/broadwellde/virtual-memory.json | 150 +-- 7 files changed, 1033 insertions(+), 995 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json index 36fe398029b9..bf243fe2a0ec 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json @@ -11,11 +11,28 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -29,6 +46,43 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -69,6 +123,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -79,6 +142,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -130,6 +202,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x51", "UMask": "0x1", @@ -151,6 +244,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "BDM76", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "BDM76", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -158,7 +274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -175,24 +291,24 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "Errata": "BDM76", - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -208,18 +324,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -266,7 +370,7 @@ "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -280,27 +384,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -308,37 +421,37 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -346,24 +459,24 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -371,69 +484,69 @@ { "EventCode": "0xD1", "UMask": "0x1", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x2", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x8", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x20", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -445,77 +558,112 @@ { "EventCode": "0xD1", "UMask": "0x40", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x1", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x8", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "BDE70, BDM100", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x4", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM", + "Errata": "BDE70", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x10", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM", + "Errata": "BDE70", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x20", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD", + "Errata": "BDE70", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -657,118 +805,5 @@ "PublicDescription": "This event counts the number of split locks in the super queue.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "BDM76", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json index 4ae1ea24f22f..d7b9d9c9c518 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json @@ -6,7 +6,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -17,7 +17,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -25,7 +25,6 @@ "EventCode": "0xC7", "UMask": "0x1", "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", @@ -35,17 +34,24 @@ "EventCode": "0xC7", "UMask": "0x2", "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x3", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xC7", "UMask": "0x4", "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -55,7 +61,6 @@ "EventCode": "0xC7", "UMask": "0x8", "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -65,19 +70,54 @@ "EventCode": "0xC7", "UMask": "0x10", "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x15", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "UMask": "0x20", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x2a", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x3c", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xCA", "UMask": "0x2", "BriefDescription": "Number of X87 assists due to output value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -87,7 +127,7 @@ "BriefDescription": "Number of X87 assists due to input value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -97,7 +137,7 @@ "BriefDescription": "Number of SIMD FP assists due to Output values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -107,7 +147,7 @@ "BriefDescription": "Number of SIMD FP assists due to input values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -121,51 +161,5 @@ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xc7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3c", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x2a", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x15", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json index 06bf0a40e568..72781e1e3362 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json @@ -15,80 +15,49 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -99,7 +68,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -111,7 +80,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_OCCUR", "CounterMask": "1", - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -122,7 +91,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,7 +102,17 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -144,7 +123,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -155,7 +134,39 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -165,7 +176,7 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_ALL_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -205,7 +216,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -268,18 +279,7 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json index cfa1e5876ec3..e44f73c24ac8 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json @@ -95,7 +95,6 @@ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", "EventName": "TX_EXEC.MISC1", - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -171,11 +170,11 @@ { "EventCode": "0xc8", "UMask": "0x4", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -252,11 +251,11 @@ { "EventCode": "0xc9", "UMask": "0x4", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/other.json b/tools/perf/pmu-events/arch/x86/broadwellde/other.json index 718fcb1db2ee..4475249ea9da 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json index 02b4e1035f2d..920c89da9111 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json @@ -3,31 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -60,22 +70,33 @@ }, { "EventCode": "0x0D", - "UMask": "0x8", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RAT_STALL_CYCLES", - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "CounterMask": "1", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", "CounterMask": "1", - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x8", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -89,6 +110,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -117,18 +150,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, { "EventCode": "0x14", "UMask": "0x1", @@ -139,6 +160,26 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "UMask": "0x1", @@ -149,6 +190,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -158,6 +229,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -224,6 +304,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -404,6 +496,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -434,6 +535,16 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xA0", + "UMask": "0x3", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "Counter": "0,1,2,3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA1", "UMask": "0x1", @@ -446,601 +557,471 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", - "CounterMask": "2", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", - "CounterMask": "4", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", - "CounterMask": "5", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", - "CounterMask": "6", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "CounterMask": "12", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of Uops delivered by the LSD. ", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "BDM61", - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", + "EventCode": "0xA2", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "BDM11, BDM55", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", - "CounterHTOff": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", + "EventName": "RESOURCE_STALLS.RS", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x8", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", + "EventName": "RESOURCE_STALLS.ROB", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "CounterMask": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.CYCLES", - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "CounterMask": "2", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Self-modifying code (SMC) detected.", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.SMC", - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "CounterMask": "4", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC3", - "UMask": "0x20", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "EventCode": "0xA3", + "UMask": "0x4", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MASKMOV", - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "SampleAfterValue": "100003", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterMask": "4", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Not taken branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "Errata": "BDW98", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "Errata": "BDW98", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "CounterMask": "5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC5", - "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x8", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "PEBS": "1", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.RET", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "SampleAfterValue": "100007", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "CounterMask": "6", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", - "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "CounterMask": "6", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "CounterMask": "8", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "CounterMask": "12", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterMask": "12", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "CounterMask": "2", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "6", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "BDM61", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", + "EventCode": "0xC0", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "BDM11, BDM55", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", + "EventName": "UOPS_RETIRED.ALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "CounterMask": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "MACHINE_CLEARS.CYCLES", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "EventCode": "0xC3", + "UMask": "0x4", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xC3", "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.MASKMOV", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "EventCode": "0xC4", + "UMask": "0x1", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA0", - "UMask": "0x3", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "EventCode": "0xC4", + "UMask": "0x2", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", + "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0xC4", + "UMask": "0x4", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "Errata": "BDW98", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xC4", + "UMask": "0x8", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0xC4", + "UMask": "0x10", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x20", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x40", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "Errata": "BDW98", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x1", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x4", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC5", + "UMask": "0x8", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.RET", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "EventCode": "0xC5", + "UMask": "0x20", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json index 5ce8b67ba076..7d79c707c6d1 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json @@ -43,6 +43,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -72,6 +82,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x1", @@ -116,6 +135,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -145,6 +174,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4F", "UMask": "0x10", @@ -199,6 +237,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -228,6 +276,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xAE", "UMask": "0x1", @@ -250,60 +307,60 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x22", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "UMask": "0x22", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" @@ -327,62 +384,5 @@ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3 From f5b5bdd92f62248fd354e34986b7a49f30159af9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 15:38:00 -0800 Subject: perf vendor events intel: Update IvyBridge files to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/ivybridge/cache.json | 324 +++++++++++++++++++++ .../perf/pmu-events/arch/x86/ivybridge/memory.json | 72 +++++ 2 files changed, 396 insertions(+) diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json index 3c0a28e27d73..999a01bc6467 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json @@ -774,5 +774,329 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all writebacks from the core to the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x18000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts non-temporal stores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand rfo's ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x000105B3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch prefetch RFOs ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x000107F7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json index 6005b364c580..a74d54f56192 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json @@ -160,5 +160,77 @@ "PRECISE_STORE": "1", "TakenAlone": "1", "CounterHTOff": "3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3004003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x6004001b3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts LLC replacements", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file -- cgit v1.2.3 From 5b50758c4b5e2b122309ee307bd2ebd7fcd8871f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 15:38:33 -0800 Subject: perf vendor events intel: Update IvyTown files to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/ivytown/cache.json | 456 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/ivytown/memory.json | 348 ++++++++++++++++ 2 files changed, 804 insertions(+) diff --git a/tools/perf/pmu-events/arch/x86/ivytown/cache.json b/tools/perf/pmu-events/arch/x86/ivytown/cache.json index d8cc93b3a04c..6dad3ad6b102 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/cache.json @@ -800,5 +800,461 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch data reads that hit the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all writebacks from the core to the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x803c8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x23ffc08000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0040", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0200", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts non-temporal stores", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/memory.json b/tools/perf/pmu-events/arch/x86/ivytown/memory.json index 4ec94df8d70b..3a7b86af8816 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/memory.json @@ -151,5 +151,353 @@ "PRECISE_STORE": "1", "TakenAlone": "1", "CounterHTOff": "3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc00244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f800244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc203f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x6004003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f8203f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67fc00001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc20002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20040", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67fc00010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20200", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file -- cgit v1.2.3 From aa6292f4845e7921fca60b146403ea6682b8f65d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:10 -0700 Subject: perf tools: Integrating the CoreSight decoding library The Open CoreSight Decoding Library (openCSD) is a free and open library to decode traces collected by the CoreSight hardware infrastructure. This patch adds the required mechanic to recognise the presence of the openCSD library on a system and set up miscellaneous flags to be used in the compilation of the trace decoding feature. Signed-off-by: Mathieu Poirier Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kim Phillips Cc: linux-arm-kernel@lists.infradead.org Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Link: http://lkml.kernel.org/r/1516211539-5166-2-git-send-email-mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1516635644-24819-1-git-send-email-mathieu.poirier@linaro.org [ Merged missing test-libopencsd.c file, provided later by Mathieu ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 3 ++- tools/build/feature/Makefile | 7 ++++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-libopencsd.c | 8 ++++++++ tools/perf/Makefile.config | 25 +++++++++++++++++++++++++ tools/perf/Makefile.perf | 2 ++ 6 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 tools/build/feature/test-libopencsd.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e52fcefee379..c378f003b007 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \ bpf \ sched_getcpu \ sdt \ - setns + setns \ + libopencsd # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cff38f342283..59585fe20221 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -52,7 +52,8 @@ FILES= \ test-cxx.bin \ test-jvmti.bin \ test-sched_getcpu.bin \ - test-setns.bin + test-setns.bin \ + test-libopencsd.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -104,6 +105,10 @@ $(OUTPUT)test-sched_getcpu.bin: $(OUTPUT)test-setns.bin: $(BUILD) +$(OUTPUT)test-libopencsd.bin: + $(BUILD) # -lopencsd_c_api -lopencsd provided by + # $(FEATURE_CHECK_LDFLAGS-libopencsd) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 6fdf83263ab7..8dc20a61341f 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -162,6 +162,10 @@ # include "test-setns.c" #undef main +#define main main_test_libopencsd +# include "test-libopencsd.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -199,6 +203,7 @@ int main(int argc, char *argv[]) main_test_sched_getcpu(); main_test_sdt(); main_test_setns(); + main_test_libopencsd(); return 0; } diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c new file mode 100644 index 000000000000..5ff1246e6194 --- /dev/null +++ b/tools/build/feature/test-libopencsd.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + (void)ocsd_get_version(); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a042ccca4e93..0dfdaa9fa81e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -105,6 +105,16 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) +ifdef CSINCLUDES + LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) +endif +OPENCSDLIBS := -lopencsd_c_api -lopencsd +ifdef CSLIBS + LIBOPENCSD_LDFLAGS := -L$(CSLIBS) +endif +FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS) +FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS) + ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif @@ -353,6 +363,21 @@ ifeq ($(feature-setns), 1) $(call detected,CONFIG_SETNS) endif +ifndef NO_CORESIGHT + ifeq ($(feature-libopencsd), 1) + CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) + LDFLAGS += $(LIBOPENCSD_LDFLAGS) + EXTLIBS += $(OPENCSDLIBS) + $(call detected,CONFIG_LIBOPENCSD) + ifdef CSTRACE_RAW + CFLAGS += -DCS_DEBUG_RAW + ifeq (${CSTRACE_RAW}, packed) + CFLAGS += -DCS_RAW_PACKED + endif + endif + endif +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9a9b528a88bb..9b0351d3ce34 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -98,6 +98,8 @@ include ../scripts/utilities.mak # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if # llvm-config is not in $PATH. +# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. + # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL LC_COLLATE=C -- cgit v1.2.3 From 440a23b34c06104bd92b876b40efa45c2d7a0e27 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:11 -0700 Subject: perf tools: Add initial entry point for decoder CoreSight traces This patch adds the entry point for CoreSight trace decoding, serving as a jumping board for furhter expansions. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-3-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 5 ++ tools/perf/util/auxtrace.c | 2 + tools/perf/util/cs-etm.c | 213 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cs-etm.h | 15 ++++ 4 files changed, 235 insertions(+) create mode 100644 tools/perf/util/cs-etm.c diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4eef0c243306..c054ff802efb 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -88,6 +88,11 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-$(CONFIG_AUXTRACE) += arm-spe.o libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o + +ifdef CONFIG_LIBOPENCSD +libperf-$(CONFIG_AUXTRACE) += cs-etm.o +endif + libperf-y += parse-branch-options.o libperf-y += dump-insn.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3bba9947ab7f..9faf3b5367db 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -52,6 +52,7 @@ #include "debug.h" #include +#include "cs-etm.h" #include "intel-pt.h" #include "intel-bts.h" #include "arm-spe.h" @@ -914,6 +915,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, case PERF_AUXTRACE_ARM_SPE: return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: + return cs_etm__process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c new file mode 100644 index 000000000000..f47797101857 --- /dev/null +++ b/tools/perf/util/cs-etm.c @@ -0,0 +1,213 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#include +#include +#include +#include +#include + +#include + +#include "auxtrace.h" +#include "color.h" +#include "cs-etm.h" +#include "debug.h" +#include "evlist.h" +#include "intlist.h" +#include "machine.h" +#include "map.h" +#include "perf.h" +#include "thread.h" +#include "thread_map.h" +#include "thread-stack.h" +#include "util.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct cs_etm_auxtrace { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + struct itrace_synth_opts synth_opts; + struct perf_session *session; + struct machine *machine; + struct thread *unknown_thread; + + u8 timeless_decoding; + u8 snapshot_mode; + u8 data_queued; + u8 sample_branches; + + int num_cpu; + u32 auxtrace_type; + u64 branches_sample_type; + u64 branches_id; + u64 **metadata; + u64 kernel_start; + unsigned int pmu_type; +}; + +struct cs_etm_queue { + struct cs_etm_auxtrace *etm; + struct thread *thread; + struct cs_etm_decoder *decoder; + struct auxtrace_buffer *buffer; + const struct cs_etm_state *state; + union perf_event *event_buf; + unsigned int queue_nr; + pid_t pid, tid; + int cpu; + u64 time; + u64 timestamp; + u64 offset; +}; + +static int cs_etm__flush_events(struct perf_session *session, + struct perf_tool *tool) +{ + (void) session; + (void) tool; + return 0; +} + +static void cs_etm__free_queue(void *priv) +{ + struct cs_etm_queue *etmq = priv; + + free(etmq); +} + +static void cs_etm__free_events(struct perf_session *session) +{ + unsigned int i; + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + struct auxtrace_queues *queues = &aux->queues; + + for (i = 0; i < queues->nr_queues; i++) { + cs_etm__free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + + auxtrace_queues__free(queues); +} + +static void cs_etm__free(struct perf_session *session) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + cs_etm__free_events(session); + session->auxtrace = NULL; + + zfree(&aux); +} + +static int cs_etm__process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + (void) session; + (void) event; + (void) sample; + (void) tool; + return 0; +} + +static int cs_etm__process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool) +{ + (void) session; + (void) event; + (void) tool; + return 0; +} + +static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) +{ + struct perf_evsel *evsel; + struct perf_evlist *evlist = etm->session->evlist; + bool timeless_decoding = true; + + /* + * Circle through the list of event and complain if we find one + * with the time bit set. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) + timeless_decoding = false; + } + + return timeless_decoding; +} + +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + struct cs_etm_auxtrace *etm = NULL; + int event_header_size = sizeof(struct perf_event_header); + int info_header_size; + int total_size = auxtrace_info->header.size; + int err = 0; + + /* + * sizeof(auxtrace_info_event::type) + + * sizeof(auxtrace_info_event::reserved) == 8 + */ + info_header_size = 8; + + if (total_size < (event_header_size + info_header_size)) + return -EINVAL; + + etm = zalloc(sizeof(*etm)); + + if (!etm) + err = -ENOMEM; + + err = auxtrace_queues__init(&etm->queues); + if (err) + goto err_free_etm; + + etm->session = session; + etm->machine = &session->machines.host; + + etm->auxtrace_type = auxtrace_info->type; + etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + + etm->auxtrace.process_event = cs_etm__process_event; + etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; + etm->auxtrace.flush_events = cs_etm__flush_events; + etm->auxtrace.free_events = cs_etm__free_events; + etm->auxtrace.free = cs_etm__free; + session->auxtrace = &etm->auxtrace; + + if (dump_trace) + return 0; + + err = auxtrace_queues__process_index(&etm->queues, session); + if (err) + goto err_free_queues; + + etm->data_queued = etm->queues.populated; + + return 0; + +err_free_queues: + auxtrace_queues__free(&etm->queues); + session->auxtrace = NULL; +err_free_etm: + zfree(&etm); + + return -EINVAL; +} diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 3cc6bc3263fe..5ab6a8ef1b32 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,9 @@ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ #define INCLUDE__UTIL_PERF_CS_ETM_H__ +#include "util/event.h" +#include "util/session.h" + /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. */ @@ -71,4 +74,16 @@ static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#ifdef HAVE_CSTRACE_SUPPORT +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session); +#else +static inline int +cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + #endif -- cgit v1.2.3 From cd8bfd8c973eaff8e15260cf1f02e2b84c9b1cb9 Mon Sep 17 00:00:00 2001 From: Tor Jeremiassen Date: Wed, 17 Jan 2018 10:52:12 -0700 Subject: perf tools: Add processing of coresight metadata The auxtrace_info section contains metadata that describes the number of trace capable CPUs, their ETM version and trace configuration, including trace id values. This information is required by the trace decoder in order to properly decode the compressed trace packets. This patch adds code to read and parse this metadata, and store it for use in configuring instances of the cs-etm trace decoder. Co-authored-by: Mathieu Poirier Signed-off-by: Tor Jeremiassen Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-4-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/cs-etm.h | 3 + 2 files changed, 194 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f47797101857..18894ee7aa0b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -102,12 +102,24 @@ static void cs_etm__free_events(struct perf_session *session) static void cs_etm__free(struct perf_session *session) { + int i; + struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); cs_etm__free_events(session); session->auxtrace = NULL; + /* First remove all traceID/CPU# nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, traceid_list) + intlist__remove(traceid_list, inode); + /* Then the RB tree itself */ + intlist__delete(traceid_list); + + for (i = 0; i < aux->num_cpu; i++) + zfree(&aux->metadata[i]); + + zfree(&aux->metadata); zfree(&aux); } @@ -151,15 +163,69 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) return timeless_decoding; } +static const char * const cs_etm_global_header_fmts[] = { + [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", + [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", +}; + +static const char * const cs_etm_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_ETMCR] = " ETMCR %llx\n", + [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", + [CS_ETM_ETMCCER] = " ETMCCER %llx\n", + [CS_ETM_ETMIDR] = " ETMIDR %llx\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", +}; + +static void cs_etm__print_auxtrace_info(u64 *val, int num) +{ + int i, j, cpu = 0; + + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { + if (val[i] == __perf_cs_etmv3_magic) + for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + else if (val[i] == __perf_cs_etmv4_magic) + for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + else + /* failure.. return */ + return; + } +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; + struct int_node *inode; + unsigned int pmu_type; int event_header_size = sizeof(struct perf_event_header); int info_header_size; int total_size = auxtrace_info->header.size; - int err = 0; + int priv_size = 0; + int num_cpu; + int err = 0, idx = -1; + int i, j, k; + u64 *ptr, *hdr = NULL; + u64 **metadata = NULL; /* * sizeof(auxtrace_info_event::type) + @@ -170,10 +236,117 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (total_size < (event_header_size + info_header_size)) return -EINVAL; + priv_size = total_size - event_header_size - info_header_size; + + /* First the global part */ + ptr = (u64 *) auxtrace_info->priv; + + /* Look for version '0' of the header */ + if (ptr[0] != 0) + return -EINVAL; + + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + if (!hdr) + return -ENOMEM; + + /* Extract header information - see cs-etm.h for format */ + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + hdr[i] = ptr[i]; + num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; + pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & + 0xffffffff); + + /* + * Create an RB tree for traceID-CPU# tuple. Since the conversion has + * to be made for each packet that gets decoded, optimizing access in + * anything other than a sequential array is worth doing. + */ + traceid_list = intlist__new(NULL); + if (!traceid_list) { + err = -ENOMEM; + goto err_free_hdr; + } + + metadata = zalloc(sizeof(*metadata) * num_cpu); + if (!metadata) { + err = -ENOMEM; + goto err_free_traceid_list; + } + + /* + * The metadata is stored in the auxtrace_info section and encodes + * the configuration of the ARM embedded trace macrocell which is + * required by the trace decoder to properly decode the trace due + * to its highly compressed nature. + */ + for (j = 0; j < num_cpu; j++) { + if (ptr[i] == __perf_cs_etmv3_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETM_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETM_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETM_ETMTRACEIDR]; + i += CS_ETM_PRIV_MAX; + } else if (ptr[i] == __perf_cs_etmv4_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETMV4_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; + i += CS_ETMV4_PRIV_MAX; + } + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, idx); + + /* Something went wrong, no need to continue */ + if (!inode) { + err = PTR_ERR(inode); + goto err_free_metadata; + } + + /* + * The node for that CPU should not be taken. + * Back out if that's the case. + */ + if (inode->priv) { + err = -EINVAL; + goto err_free_metadata; + } + /* All good, associate the traceID with the CPU# */ + inode->priv = &metadata[j][CS_ETM_CPU]; + } + + /* + * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * CS_ETMV4_PRIV_MAX mark how many double words are in the + * global metadata, and each cpu's metadata respectively. + * The following tests if the correct number of double words was + * present in the auxtrace info section. + */ + if (i * 8 != priv_size) { + err = -EINVAL; + goto err_free_metadata; + } + etm = zalloc(sizeof(*etm)); - if (!etm) + if (!etm) { err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&etm->queues); if (err) @@ -182,6 +355,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->session = session; etm->machine = &session->machines.host; + etm->num_cpu = num_cpu; + etm->pmu_type = pmu_type; + etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); + etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); @@ -192,8 +369,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.free = cs_etm__free; session->auxtrace = &etm->auxtrace; - if (dump_trace) + if (dump_trace) { + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return 0; + } err = auxtrace_queues__process_index(&etm->queues, session); if (err) @@ -208,6 +387,15 @@ err_free_queues: session->auxtrace = NULL; err_free_etm: zfree(&etm); +err_free_metadata: + /* No need to check @metadata[j], free(NULL) is supported */ + for (j = 0; j < num_cpu; j++) + free(metadata[j]); + zfree(&metadata); +err_free_traceid_list: + intlist__delete(traceid_list); +err_free_hdr: + zfree(&hdr); return -EINVAL; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5ab6a8ef1b32..5864d5dca616 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -64,6 +64,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) -- cgit v1.2.3 From 68ffe39028982d08bf382700642ed46cc0539e1b Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:13 -0700 Subject: perf tools: Add decoder mechanic to support dumping trace data This patch adds the required interface to the openCSD library to support dumping CoreSight trace packet using the "report --dump" command. The information conveyed is related to the type of packets gathered by a trace session rather than full decoding. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-5-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/cs-etm-decoder/Build | 1 + tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 334 ++++++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 96 +++++++ tools/perf/util/cs-etm.c | 108 +++++++- 5 files changed, 536 insertions(+), 4 deletions(-) create mode 100644 tools/perf/util/cs-etm-decoder/Build create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c054ff802efb..ea0a452550b0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -91,6 +91,7 @@ libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o ifdef CONFIG_LIBOPENCSD libperf-$(CONFIG_AUXTRACE) += cs-etm.o +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif libperf-y += parse-branch-options.o diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build new file mode 100644 index 000000000000..bc22c39c727f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c new file mode 100644 index 000000000000..6a4c86b1431f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -0,0 +1,334 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#include +#include +#include +#include +#include +#include + +#include "cs-etm.h" +#include "cs-etm-decoder.h" +#include "intlist.h" +#include "util.h" + +#define MAX_BUFFER 1024 + +/* use raw logging */ +#ifdef CS_DEBUG_RAW +#define CS_LOG_RAW_FRAMES +#ifdef CS_RAW_PACKED +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \ + OCSD_DFRMTR_PACKED_RAW_OUT) +#else +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT) +#endif +#endif + +struct cs_etm_decoder { + void *data; + void (*packet_printer)(const char *msg); + bool trace_on; + dcd_tree_handle_t dcd_tree; + cs_etm_mem_cb_type mem_access; + ocsd_datapath_resp_t prev_return; + u32 packet_count; + u32 head; + u32 tail; + struct cs_etm_packet packet_buffer[MAX_BUFFER]; +}; + +static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, + ocsd_etmv4_cfg *config) +{ + config->reg_configr = params->etmv4.reg_configr; + config->reg_traceidr = params->etmv4.reg_traceidr; + config->reg_idr0 = params->etmv4.reg_idr0; + config->reg_idr1 = params->etmv4.reg_idr1; + config->reg_idr2 = params->etmv4.reg_idr2; + config->reg_idr8 = params->etmv4.reg_idr8; + config->reg_idr9 = 0; + config->reg_idr10 = 0; + config->reg_idr11 = 0; + config->reg_idr12 = 0; + config->reg_idr13 = 0; + config->arch_ver = ARCH_V8; + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__print_str_cb(const void *p_context, + const char *msg, + const int str_len) +{ + if (p_context && str_len) + ((struct cs_etm_decoder *)p_context)->packet_printer(msg); +} + +static int +cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + int ret = 0; + + if (d_params->packet_printer == NULL) + return -1; + + decoder->packet_printer = d_params->packet_printer; + + /* + * Set up a library default logger to process any printers + * (packet/raw frame) we add later. + */ + ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + if (ret != 0) + return -1; + + /* no stdout / err / file output */ + ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + if (ret != 0) + return -1; + + /* + * Set the string CB for the default logger, passes strings to + * perf print logger. + */ + ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + if (ret != 0) + ret = -1; + + return 0; +} + +#ifdef CS_LOG_RAW_FRAMES +static void +cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + /* Only log these during a --dump operation */ + if (d_params->operation == CS_ETM_OPERATION_PRINT) { + /* set up a library default logger to process the + * raw frame printer we add later + */ + ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + + /* no stdout / err / file output */ + ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + + /* set the string CB for the default logger, + * passes strings to perf print logger. + */ + ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + + /* use the built in library printer for the raw frames */ + ocsd_dt_set_raw_frame_printer(decoder->dcd_tree, + CS_RAW_DEBUG_FLAGS); + } +} +#else +static void +cs_etm_decoder__init_raw_frame_logging( + struct cs_etm_decoder_params *d_params __maybe_unused, + struct cs_etm_decoder *decoder __maybe_unused) +{ +} +#endif + +static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, + const char *decoder_name, + void *trace_config) +{ + u8 csid; + + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; +} + +static int +cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + return cs_etm_decoder__create_packet_printer(decoder, + decoder_name, + trace_config); +} + +static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +{ + int i; + + decoder->head = 0; + decoder->tail = 0; + decoder->packet_count = 0; + for (i = 0; i < MAX_BUFFER; i++) { + decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; + } +} + +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + if (d_params->operation == CS_ETM_OPERATION_PRINT) + return cs_etm_decoder__create_etm_packet_printer(t_params, + decoder); + return -1; +} + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]) +{ + struct cs_etm_decoder *decoder; + ocsd_dcd_tree_src_t format; + u32 flags; + int i, ret; + + if ((!t_params) || (!d_params)) + return NULL; + + decoder = zalloc(sizeof(*decoder)); + + if (!decoder) + return NULL; + + decoder->data = d_params->data; + decoder->prev_return = OCSD_RESP_CONT; + cs_etm_decoder__clear_buffer(decoder); + format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : + OCSD_TRC_SRC_SINGLE); + flags = 0; + flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0); + flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0); + flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0); + + /* + * Drivers may add barrier frames when used with perf, set up to + * handle this. Barriers const of FSYNC packet repeated 4 times. + */ + flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC; + + /* Create decode tree for the data source */ + decoder->dcd_tree = ocsd_create_dcd_tree(format, flags); + + if (decoder->dcd_tree == 0) + goto err_free_decoder; + + /* init library print logging support */ + ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); + if (ret != 0) + goto err_free_decoder_tree; + + /* init raw frame logging if required */ + cs_etm_decoder__init_raw_frame_logging(d_params, decoder); + + for (i = 0; i < num_cpu; i++) { + ret = cs_etm_decoder__create_etm_decoder(d_params, + &t_params[i], + decoder); + if (ret != 0) + goto err_free_decoder_tree; + } + + return decoder; + +err_free_decoder_tree: + ocsd_destroy_dcd_tree(decoder->dcd_tree); +err_free_decoder: + free(decoder); + return NULL; +} + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed) +{ + int ret = 0; + ocsd_datapath_resp_t cur = OCSD_RESP_CONT; + ocsd_datapath_resp_t prev_return = decoder->prev_return; + size_t processed = 0; + u32 count; + + while (processed < len) { + if (OCSD_DATA_RESP_IS_WAIT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_FLUSH, + 0, + 0, + NULL, + NULL); + } else if (OCSD_DATA_RESP_IS_CONT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_DATA, + indx + processed, + len - processed, + &buf[processed], + &count); + processed += count; + } else { + ret = -EINVAL; + break; + } + + /* + * Return to the input code if the packet buffer is full. + * Flushing will get done once the packet buffer has been + * processed. + */ + if (OCSD_DATA_RESP_IS_WAIT(cur)) + break; + + prev_return = cur; + } + + decoder->prev_return = cur; + *consumed = processed; + + return ret; +} + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ + if (!decoder) + return; + + ocsd_destroy_dcd_tree(decoder->dcd_tree); + decoder->dcd_tree = NULL; + free(decoder); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h new file mode 100644 index 000000000000..a1e9b0ac5965 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#ifndef INCLUDE__CS_ETM_DECODER_H__ +#define INCLUDE__CS_ETM_DECODER_H__ + +#include +#include + +struct cs_etm_decoder; + +struct cs_etm_buffer { + const unsigned char *buf; + size_t len; + u64 offset; + u64 ref_timestamp; +}; + +enum cs_etm_sample_type { + CS_ETM_RANGE = 1 << 0, +}; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + u64 start_addr; + u64 end_addr; + u8 exc; + u8 exc_ret; + int cpu; +}; + +struct cs_etm_queue; + +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, + size_t, u8 *); + +struct cs_etmv4_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; +}; + +struct cs_etm_trace_params { + int protocol; + union { + struct cs_etmv4_trace_params etmv4; + }; +}; + +struct cs_etm_decoder_params { + int operation; + void (*packet_printer)(const char *msg); + cs_etm_mem_cb_type mem_acc_cb; + u8 formatted; + u8 fsyncs; + u8 hsyncs; + u8 frame_aligned; + void *data; +}; + +/* + * The following enums are indexed starting with 1 to align with the + * open source coresight trace decoder library. + */ +enum { + CS_ETM_PROTO_ETMV3 = 1, + CS_ETM_PROTO_ETMV4i, + CS_ETM_PROTO_ETMV4d, +}; + +enum { + CS_ETM_OPERATION_PRINT = 1, + CS_ETM_OPERATION_DECODE, +}; + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed); + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, + struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]); + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder); + +#endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 18894ee7aa0b..cad429ce3c00 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -18,6 +18,7 @@ #include "auxtrace.h" #include "color.h" #include "cs-etm.h" +#include "cs-etm-decoder/cs-etm-decoder.h" #include "debug.h" #include "evlist.h" #include "intlist.h" @@ -69,6 +70,78 @@ struct cs_etm_queue { u64 offset; }; +static void cs_etm__packet_dump(const char *pkt_string) +{ + const char *color = PERF_COLOR_BLUE; + int len = strlen(pkt_string); + + if (len && (pkt_string[len-1] == '\n')) + color_fprintf(stdout, color, " %s", pkt_string); + else + color_fprintf(stdout, color, " %s\n", pkt_string); + + fflush(stdout); +} + +static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, + struct auxtrace_buffer *buffer) +{ + int i, ret; + const char *color = PERF_COLOR_BLUE; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_decoder *decoder; + size_t buffer_used = 0; + + fprintf(stdout, "\n"); + color_fprintf(stdout, color, + ". ... CoreSight ETM Trace data: size %zu bytes\n", + buffer->size); + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_PRINT; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + + decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!decoder) + return; + do { + size_t consumed; + + ret = cs_etm_decoder__process_data_block( + decoder, buffer->offset, + &((u8 *)buffer->data)[buffer_used], + buffer->size - buffer_used, &consumed); + if (ret) + break; + + buffer_used += consumed; + } while (buffer_used < buffer->size); + + cs_etm_decoder__free(decoder); +} + static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { @@ -137,11 +210,38 @@ static int cs_etm__process_event(struct perf_session *session, static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool) + struct perf_tool *tool __maybe_unused) { - (void) session; - (void) event; - (void) tool; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (!etm->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + bool is_pipe = perf_data__is_pipe(session->data); + int err; + + if (is_pipe) + data_offset = 0; + else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&etm->queues, session, + event, data_offset, &buffer); + if (err) + return err; + + if (dump_trace) + if (auxtrace_buffer__get_data(buffer, fd)) { + cs_etm__dump_event(etm, buffer); + auxtrace_buffer__put_data(buffer); + } + } + return 0; } -- cgit v1.2.3 From c9a01a11dfe63e5b981b1fe8c99435e12c758007 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:14 -0700 Subject: perf tools: Add support for decoding CoreSight trace data Adding functionality to create a CoreSight trace decoder capable of decoding trace data pushed by a client application. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-6-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 119 ++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 6a4c86b1431f..57b020b0b36f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -200,6 +200,121 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) } } +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const u8 trace_chan_id, + enum cs_etm_sample_type sample_type) +{ + u32 et = 0; + struct int_node *inode = NULL; + + if (decoder->packet_count >= MAX_BUFFER - 1) + return OCSD_RESP_FATAL_SYS_ERR; + + /* Search the RB tree for the cpu associated with this traceID */ + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return OCSD_RESP_FATAL_SYS_ERR; + + et = decoder->tail; + decoder->packet_buffer[et].sample_type = sample_type; + decoder->packet_buffer[et].start_addr = elem->st_addr; + decoder->packet_buffer[et].end_addr = elem->en_addr; + decoder->packet_buffer[et].exc = false; + decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].cpu = *((int *)inode->priv); + + /* Wrap around if need be */ + et = (et + 1) & (MAX_BUFFER - 1); + + decoder->tail = et; + decoder->packet_count++; + + if (decoder->packet_count == MAX_BUFFER - 1) + return OCSD_RESP_WAIT; + + return OCSD_RESP_CONT; +} + +static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( + const void *context, + const ocsd_trc_index_t indx __maybe_unused, + const u8 trace_chan_id __maybe_unused, + const ocsd_generic_trace_elem *elem) +{ + ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + switch (elem->elem_type) { + case OCSD_GEN_TRC_ELEM_UNKNOWN: + break; + case OCSD_GEN_TRC_ELEM_NO_SYNC: + decoder->trace_on = false; + break; + case OCSD_GEN_TRC_ELEM_TRACE_ON: + decoder->trace_on = true; + break; + case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + resp = cs_etm_decoder__buffer_packet(decoder, elem, + trace_chan_id, + CS_ETM_RANGE); + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION: + decoder->packet_buffer[decoder->tail].exc = true; + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: + decoder->packet_buffer[decoder->tail].exc_ret = true; + break; + case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + case OCSD_GEN_TRC_ELEM_EO_TRACE: + case OCSD_GEN_TRC_ELEM_ADDR_NACC: + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: + case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: + case OCSD_GEN_TRC_ELEM_EVENT: + case OCSD_GEN_TRC_ELEM_SWTRACE: + case OCSD_GEN_TRC_ELEM_CUSTOM: + default: + break; + } + + return resp; +} + +static int cs_etm_decoder__create_etm_packet_decoder( + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + u8 csid; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; + + return 0; +} + static int cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, @@ -208,6 +323,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, if (d_params->operation == CS_ETM_OPERATION_PRINT) return cs_etm_decoder__create_etm_packet_printer(t_params, decoder); + else if (d_params->operation == CS_ETM_OPERATION_DECODE) + return cs_etm_decoder__create_etm_packet_decoder(t_params, + decoder); + return -1; } -- cgit v1.2.3 From 290598be0e84badee2ce93b32e4146184720b2f4 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:15 -0700 Subject: perf tools: Add functionality to communicate with the openCSD decoder This patch adds functions to communicate with the openCSD trace decoder, more specifically to access program memory, fetch trace packets and reset the decoder. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-7-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 60 +++++++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 9 ++++ 2 files changed, 69 insertions(+) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 57b020b0b36f..1fb01849f1c7 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -45,6 +45,66 @@ struct cs_etm_decoder { struct cs_etm_packet packet_buffer[MAX_BUFFER]; }; +static u32 +cs_etm_decoder__mem_access(const void *context, + const ocsd_vaddr_t address, + const ocsd_mem_space_acc_t mem_space __maybe_unused, + const u32 req_size, + u8 *buffer) +{ + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + return decoder->mem_access(decoder->data, + address, + req_size, + buffer); +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func) +{ + decoder->mem_access = cb_func; + + if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, + OCSD_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, decoder)) + return -1; + + return 0; +} + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) +{ + ocsd_datapath_resp_t dp_ret; + + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, + 0, 0, NULL, NULL); + if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) + return -1; + + return 0; +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet) +{ + if (!decoder || !packet) + return -EINVAL; + + /* Nothing to do, might as well just return */ + if (decoder->packet_count == 0) + return 0; + + *packet = decoder->packet_buffer[decoder->head]; + + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + + decoder->packet_count--; + + return 1; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index a1e9b0ac5965..3d2e6205d186 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -93,4 +93,13 @@ cs_etm_decoder__new(int num_cpu, void cs_etm_decoder__free(struct cs_etm_decoder *decoder); +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func); + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet); + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); + #endif /* INCLUDE__CS_ETM_DECODER_H__ */ -- cgit v1.2.3 From 20d9c478b01aa1a652db54c1fe867dc92636bc70 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:16 -0700 Subject: pert tools: Add queue management functionality Add functionatlity to setup trace queues so that traces associated with CoreSight auxtrace events found in the perf.data file can be classified properly. The decoder and memory callback associated with each queue are then used to decode the traces that have been assigned to that queue. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-8-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index cad429ce3c00..83eb676274b5 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -196,15 +196,215 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, + size_t size, u8 *buffer) +{ + u8 cpumode; + u64 offset; + int len; + struct thread *thread; + struct machine *machine; + struct addr_location al; + + if (!etmq) + return -1; + + machine = etmq->etm->machine; + if (address >= etmq->etm->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = etmq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = etmq->etm->unknown_thread; + } + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); + + if (!al.map || !al.map->dso) + return 0; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) + return 0; + + offset = al.map->map_ip(al.map, address); + + map__load(al.map); + + len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); + + if (len <= 0) + return 0; + + return len; +} + +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + unsigned int queue_nr) +{ + int i; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_queue *etmq; + + etmq = zalloc(sizeof(*etmq)); + if (!etmq) + return NULL; + + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!etmq->event_buf) + goto out_free; + + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->pid = -1; + etmq->tid = -1; + etmq->cpu = -1; + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + + if (!t_params) + goto out_free; + + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_DECODE; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + d_params.data = etmq; + + etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!etmq->decoder) + goto out_free; + + /* + * Register a function to handle all memory accesses required by + * the trace decoder library. + */ + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, + 0x0L, ((u64) -1L), + cs_etm__mem_access)) + goto out_free_decoder; + + etmq->offset = 0; + + return etmq; + +out_free_decoder: + cs_etm_decoder__free(etmq->decoder); +out_free: + zfree(&etmq->event_buf); + free(etmq); + + return NULL; +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct cs_etm_queue *etmq = queue->priv; + + if (list_empty(&queue->head) || etmq) + return 0; + + etmq = cs_etm__alloc_queue(etm, queue_nr); + + if (!etmq) + return -ENOMEM; + + queue->priv = etmq; + + if (queue->cpu != -1) + etmq->cpu = queue->cpu; + + etmq->tid = queue->tid; + + return 0; +} + +static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) +{ + unsigned int i; + int ret; + + for (i = 0; i < etm->queues.nr_queues; i++) { + ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +{ + if (etm->queues.new_data) { + etm->queues.new_data = false; + return cs_etm__setup_queues(etm); + } + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool) { - (void) session; - (void) event; - (void) sample; - (void) tool; + int err = 0; + u64 timestamp; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* Keep compiler happy */ + (void)event; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("CoreSight ETM Trace requires ordered events\n"); + return -EINVAL; + } + + if (!etm->timeless_decoding) + return -EINVAL; + + if (sample->time && (sample->time != (u64) -1)) + timestamp = sample->time; + else + timestamp = 0; + + if (timestamp || etm->timeless_decoding) { + err = cs_etm__update_queues(etm); + if (err) + return err; + } + return 0; } -- cgit v1.2.3 From 9f878b29da969e195e106992ea8572da3d244811 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:17 -0700 Subject: perf tools: Add full support for CoreSight trace decoding This patch adds support for complete packet decoding, allowing traces collected during a trace session to be decoder from the "report" infrastructure. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-9-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 166 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 83eb676274b5..407095af1456 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -70,6 +70,10 @@ struct cs_etm_queue { u64 offset; }; +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_); + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -145,9 +149,25 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - (void) session; - (void) tool; - return 0; + int ret; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + if (!etm->timeless_decoding) + return -EINVAL; + + ret = cs_etm__update_queues(etm); + + if (ret < 0) + return ret; + + return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); } static void cs_etm__free_queue(void *priv) @@ -369,6 +389,138 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } +static int +cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +{ + struct auxtrace_buffer *aux_buffer = etmq->buffer; + struct auxtrace_buffer *old_buffer = aux_buffer; + struct auxtrace_queue *queue; + + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + + aux_buffer = auxtrace_buffer__next(queue, aux_buffer); + + /* If no more data, drop the previous auxtrace_buffer and return */ + if (!aux_buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + buff->len = 0; + return 0; + } + + etmq->buffer = aux_buffer; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!aux_buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(etmq->etm->session->data); + + aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); + if (!aux_buffer->data) + return -ENOMEM; + } + + /* If valid, drop the previous buffer */ + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + buff->offset = aux_buffer->offset; + buff->len = aux_buffer->size; + buff->buf = aux_buffer->data; + + buff->ref_timestamp = aux_buffer->reference; + + return buff->len; +} + +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) +{ + struct cs_etm_queue *etmq = queue->priv; + + /* CPU-wide tracing isn't supported yet */ + if (queue->tid == -1) + return; + + if ((!etmq->thread) && (etmq->tid != -1)) + etmq->thread = machine__find_thread(etm->machine, -1, + etmq->tid); + + if (etmq->thread) { + etmq->pid = etmq->thread->pid_; + if (queue->cpu == -1) + etmq->cpu = etmq->thread->cpu; + } +} + +static int cs_etm__run_decoder(struct cs_etm_queue *etmq) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_buffer buffer; + size_t buffer_used, processed; + int err = 0; + + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + + /* Go through each buffer in the queue and decode them one by one */ +more: + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) + return err; + /* + * We cannot assume consecutive blocks in the data file are contiguous, + * reset the decoder to force re-sync. + */ + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + } while (buffer.len > buffer_used); + +goto more; + + return err; +} + +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_) +{ + unsigned int i; + struct auxtrace_queues *queues = &etm->queues; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &etm->queues.queue_array[i]; + struct cs_etm_queue *etmq = queue->priv; + + if (etmq && ((tid == -1) || (etmq->tid == tid))) { + etmq->time = time_; + cs_etm__set_pid_tid_cpu(etm, queue); + cs_etm__run_decoder(etmq); + } + } + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -380,9 +532,6 @@ static int cs_etm__process_event(struct perf_session *session, struct cs_etm_auxtrace, auxtrace); - /* Keep compiler happy */ - (void)event; - if (dump_trace) return 0; @@ -405,6 +554,11 @@ static int cs_etm__process_event(struct perf_session *session, return err; } + if (event->header.type == PERF_RECORD_EXIT) + return cs_etm__process_timeless_queues(etm, + event->fork.tid, + sample->time); + return 0; } -- cgit v1.2.3 From b12235b113cfd7e4a31f0c8bdb0d8e8588ba6683 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:18 -0700 Subject: perf tools: Add mechanic to synthesise CoreSight trace packets Once decoded from trace packets information on trace range needs to be communicated to the perf synthesis infrastructure so that it is available to the perf tools built-in rendering tools and scripts. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-10-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 407095af1456..b9f0a53dfa65 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -453,6 +453,157 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } } +/* + * The cs etm packet encodes an instruction range between a branch target + * and the next taken branch. Generate sample accordingly. + */ +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + struct perf_sample sample = {.ip = 0,}; + union perf_event *event = etmq->event_buf; + u64 start_addr = packet->start_addr; + u64 end_addr = packet->end_addr; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = start_addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.addr = end_addr; + sample.id = etmq->etm->branches_id; + sample.stream_id = etmq->etm->branches_id; + sample.period = 1; + sample.cpu = packet->cpu; + sample.flags = 0; + sample.cpumode = PERF_RECORD_MISC_USER; + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + return ret; +} + +struct cs_etm_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int cs_etm__event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct cs_etm_synth *cs_etm_synth = + container_of(tool, struct cs_etm_synth, dummy_tool); + + return perf_session__deliver_synth_event(cs_etm_synth->session, + event, NULL); +} + +static int cs_etm__synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct cs_etm_synth cs_etm_synth; + + memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); + cs_etm_synth.session = session; + + return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, + &id, cs_etm__event_synth); +} + +static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == etm->pmu_type) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with CoreSight Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (etm->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + /* create new id val to be a fixed offset from evsel id */ + id = evsel->id[0] + 1000000000; + + if (!id) + id = 1; + + if (etm->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_branches = true; + etm->branches_sample_type = attr.sample_type; + etm->branches_id = id; + } + + return 0; +} + +static int cs_etm__sample(struct cs_etm_queue *etmq) +{ + int ret; + struct cs_etm_packet packet; + + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); + if (ret <= 0) + return ret; + + /* + * If the packet contains an instruction range, generate an + * instruction sequence event. + */ + if (packet.sample_type & CS_ETM_RANGE) + cs_etm__synth_branch_sample(etmq, &packet); + } + + return 0; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -494,6 +645,12 @@ more: etmq->offset += processed; buffer_used += processed; + + /* + * Nothing to do with an error condition, let's hope the next + * chunk will be better. + */ + err = cs_etm__sample(etmq); } while (buffer.len > buffer_used); goto more; @@ -828,6 +985,17 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return 0; } + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts); + etm->synth_opts.callchain = false; + } + + err = cs_etm__synth_events(etm, session); + if (err) + goto err_free_queues; + err = auxtrace_queues__process_index(&etm->queues, session); if (err) goto err_free_queues; -- cgit v1.2.3 From 8046bf0cd7636e06e65fec3d1b67706db29b19dd Mon Sep 17 00:00:00 2001 From: Tor Jeremiassen Date: Wed, 17 Jan 2018 10:52:19 -0700 Subject: MAINTAINERS: Adding entry for CoreSight trace decoding Adding maintainers for Coresight trace decoding via perf tools. Signed-off-by: Tor Jeremiassen Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-11-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d76af75a653a..7eafa087dda3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1313,7 +1313,8 @@ F: tools/perf/arch/arm/util/pmu.c F: tools/perf/arch/arm/util/auxtrace.c F: tools/perf/arch/arm/util/cs-etm.c F: tools/perf/arch/arm/util/cs-etm.h -F: tools/perf/util/cs-etm.h +F: tools/perf/util/cs-etm.* +F: tools/perf/util/cs-etm-decoder/* ARM/CORGI MACHINE SUPPORT M: Richard Purdie -- cgit v1.2.3 From 78c436907c94660edc76f499b80dbebbbe6fd572 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 10:42:55 -0300 Subject: perf bpf: Remove misplaced __maybe_unused attribute The bpf__setup_stdout() function uses that evlist argument, remove the misleading __maybe_unused attribute. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-7vbhhzbd33nvdm7l35gdfryt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index ab2598af91eb..af7ad814b2c3 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1533,7 +1533,7 @@ int bpf__apply_obj_config(void) (strcmp("__bpf_stdout__", \ bpf_map__name(pos)) == 0)) -int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +int bpf__setup_stdout(struct perf_evlist *evlist) { struct bpf_map_priv *tmpl_priv = NULL; struct bpf_object *obj, *tmp; -- cgit v1.2.3 From 591421e151ddf95e43d690a5c9b291d8e1cb8065 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 11:38:54 -0300 Subject: perf trace: Add --print-sample To help with debugging, like the interrupted out of order issue that will be dealt with in the next patch in this series, changing the code to deal with: raw_syscalls:sys_enter 411967179.269 Timer 9609/9626 [2] raw_syscalls:sys_enter 411967179.213 file:// Content 9609/9609 [3] 328.038 (18446744073709.496 ms): Timer/9626 futex(uaddr: 0x7fc0d4027044, op: WAIT|PRIV, utime: 0x7fc0b0ffdb50 ) ... raw_syscalls:sys_exit 411967179.225 file:// Content 9609/9609 [3] 327.982 ( 0.012 ms): file:// Conten/9609 futex(uaddr: 0x7fc0d4027040, op: WAKE|PRIV, val: 1 ) = 1 That long duration is the bug. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-fljqiibjn7wet24jd1ed7abc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 4 ++++ tools/perf/builtin-trace.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 6909cf1e0eea..33a88e984e66 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -163,6 +163,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Implies '--call-graph dwarf' when --call-graph not present on the command line, on systems where DWARF unwinding was built in. +--print-sample:: + Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the + raw_syscalls:sys_{enter,exit} tracepoints, for debugging. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7dece5e0cdbb..322c2b15e407 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -110,6 +110,7 @@ struct trace { bool summary; bool summary_only; bool show_comm; + bool print_sample; bool show_tool_stats; bool trace_syscalls; bool kernel_syscallchains; @@ -1578,6 +1579,23 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp return printed; } +static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample, struct thread *thread) +{ + int printed = 0; + + if (trace->print_sample) { + double ts = (double)sample->time / NSEC_PER_MSEC; + + printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", + perf_evsel__name(evsel), ts, + thread__comm_str(thread), + sample->pid, sample->tid, sample->cpu); + } + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1598,6 +1616,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) goto out_put; + trace__fprintf_sample(trace, evsel, sample, thread); + args = perf_evsel__sc_tp_ptr(evsel, args, sample); if (ttrace->entry_str == NULL) { @@ -1688,6 +1708,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) goto out_put; + trace__fprintf_sample(trace, evsel, sample, thread); + if (trace->summary) thread__update_stats(ttrace, id, sample); @@ -3034,6 +3056,8 @@ int cmd_trace(int argc, const char **argv) "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), + OPT_BOOLEAN(0, "print-sample", &trace.print_sample, + "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, -- cgit v1.2.3 From 522283fec7d3f312224360da48057e923ee22765 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 11:42:11 -0300 Subject: perf trace: Do not print from time delta for interrupted syscall lines We were calculating the delta from a in-flight syscall that got its output interrupted by another syscall, which doesn't seem like useful information, we will print the syscall duration (sys_exit - sys_enter) when the raw_syscalls:sys_exit event happens. The problem here is how we're consuming the multiple ring buffers, without using the ordered_events code used by perf_session, which may cause some reordering of syscalls for diferent CPUs, so just stop printing that delta, to avoid things like: # trace --print-sample -p 9626 -e futex raw_syscalls:sys_enter 411967179.269 Timer 9609/9626 [2] raw_syscalls:sys_enter 411967179.213 file:// Content 9609/9609 [3] 328.038 (18446744073709.496 ms): Timer/9626 futex(uaddr: 0x7fc0d4027044, op: WAIT|PRIV, utime: 0x7fc0b0ffdb50 ) ... raw_syscalls:sys_exit 411967179.225 file:// Content 9609/9609 [3] 327.982 ( 0.012 ms): file:// Conten/9609 futex(uaddr: 0x7fc0d4027040, op: WAKE|PRIV, val: 1 ) = 1 This is a bandaid, we should better try and use the ordered_events code, possibly with some refactoring prep work, but for now at least we don't show those false long deltas for the lines ending in '...'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-q6xgsqrju1sr6ltud9kjjhmb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 322c2b15e407..ab00096328e4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -821,7 +821,7 @@ static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) size_t printed = fprintf(fp, "("); if (!calculated) - printed += fprintf(fp, " ? "); + printed += fprintf(fp, " "); else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) @@ -1556,10 +1556,9 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } -static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +static int trace__printf_interrupted_entry(struct trace *trace) { struct thread_trace *ttrace; - u64 duration; size_t printed; if (trace->current == NULL) @@ -1570,9 +1569,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp if (!ttrace->entry_pending) return 0; - duration = sample->time - ttrace->entry_time; - - printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1627,7 +1624,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) - trace__printf_interrupted_entry(trace, sample); + trace__printf_interrupted_entry(trace); ttrace->entry_time = sample->time; msg = ttrace->entry_str; @@ -1941,7 +1938,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, } } - trace__printf_interrupted_entry(trace, sample); + trace__printf_interrupted_entry(trace); trace__fprintf_tstamp(trace, sample->time, trace->output); if (trace->trace_syscalls) -- cgit v1.2.3 From 3258abe0991590a182be0a20ef6b79b65fe2c9cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 12:56:59 -0300 Subject: perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY E.g.: # strace -e futex -p 14437 strace: Process 14437 attached futex(0x7f46f4808d70, FUTEX_WAKE_PRIVATE, 1) = 0 futex(0x7f46f24e68b0, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, {tv_sec=1516636744, tv_nsec=221969000}, 0xffffffff) = -1 ETIMEDOUT (Connection timed out) # Should pretty print that 0xffffffff value, like: # trace -e futex --tid 14437 0.028 ( 0.005 ms): futex(uaddr: 0x7f46f4808d70, op: WAKE|PRIV, val: 1 ) = 0 0.037 (1000.092 ms): futex(uaddr: 0x7f46f24e68b0, op: WAIT_BITSET|PRIV|CLKRT, utime: 0x7f46f23fedf0, val3: MATCH_ANY) = -1 ETIMEDOUT Connection timed out ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-raef6e352la90600yksthao1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 +++- tools/perf/trace/beauty/futex_val3.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/futex_val3.c diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ab00096328e4..46d3ff09440c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -549,6 +549,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/eventfd.c" #include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" +#include "trace/beauty/futex_val3.c" #include "trace/beauty/mmap.c" #include "trace/beauty/mode_t.c" #include "trace/beauty/msg_flags.c" @@ -611,7 +612,8 @@ static struct syscall_fmt { { .name = "fstat", .alias = "newfstat", }, { .name = "fstatat", .alias = "newfstatat", }, { .name = "futex", - .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, }, + .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, + [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, { .name = "futimesat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "getitimer", diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c new file mode 100644 index 000000000000..26f6b3253511 --- /dev/null +++ b/tools/perf/trace/beauty/futex_val3.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#ifndef FUTEX_BITSET_MATCH_ANY +#define FUTEX_BITSET_MATCH_ANY 0xffffffff +#endif + +static size_t syscall_arg__scnprintf_futex_val3(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned int bitset = arg->val; + + if (bitset == FUTEX_BITSET_MATCH_ANY) + return scnprintf(bf, size, "MATCH_ANY"); + + return scnprintf(bf, size, "%#xd", bitset); +} + +#define SCA_FUTEX_VAL3 syscall_arg__scnprintf_futex_val3 -- cgit v1.2.3 From bafae98e7a95df74ce4529ae96251cb12c86fdf3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 16:42:16 -0300 Subject: perf evlist: Remove fcntl.h from evlist.h Not needed there, fixup the places where it is needed and was getting only by luck via evlist.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yxjpetn64z8vjuguu84gr6x6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 3 +++ tools/perf/builtin-script.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 3 +++ tools/perf/util/cgroup.c | 3 +++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 - tools/perf/util/parse-events.c | 3 +++ 9 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 98853162eae9..55d919dc5bc6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -26,6 +26,9 @@ #include #endif #include +#include +#include +#include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3499d68e1d70..ab19a6ee4093 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "sane_ctype.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 46d3ff09440c..868306ccd8b8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "sane_ctype.h" diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 8e709c9d512c..e8399beca62b 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 97c9407d02a0..43519267b93b 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include "perf.h" #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index d9ffc1e6eb39..984f69144f87 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -6,6 +6,9 @@ #include "cgroup.h" #include "evlist.h" #include +#include +#include +#include int nr_cgroups; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 120efd85f2c8..ac35cd214feb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include "parse-events.h" #include +#include #include #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e7fbca69cbac..75f8e0ad5d76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "../perf.h" #include "event.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 170316795a18..34589c427e52 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include #include "term.h" #include "../perf.h" -- cgit v1.2.3 From c19d0847b2dce3dc4219fa1a21f4ad2256b42d9d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 16:50:16 -0300 Subject: perf trace beauty flock: Move to separate object file To resolve some header conflicts that were preventing the build to succeed in the Alpine Linux distribution. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-bvud0dvzvip3kibeplupdbmc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 - tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/flock.c | 10 +++++----- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 868306ccd8b8..17d11deeb88d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -548,7 +548,6 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" -#include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" #include "trace/beauty/futex_val3.c" #include "trace/beauty/mmap.c" diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 066bbf0f4a74..66330d4b739b 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,5 +1,6 @@ libperf-y += clone.o libperf-y += fcntl.o +libperf-y += flock.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index d8f6b2ec7fc5..984a504d335c 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -79,6 +79,9 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg +size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FLOCK syscall_arg__scnprintf_flock + size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c index f9707f57566c..c4ff6ad30b06 100644 --- a/tools/perf/trace/beauty/flock.c +++ b/tools/perf/trace/beauty/flock.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include + +#include "trace/beauty/beauty.h" +#include +#include #ifndef LOCK_MAND #define LOCK_MAND 32 @@ -17,8 +20,7 @@ #define LOCK_RW 192 #endif -static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg) { int printed = 0, op = arg->val; @@ -45,5 +47,3 @@ static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, return printed; } - -#define SCA_FLOCK syscall_arg__scnprintf_flock -- cgit v1.2.3 From 1a29b5b7f347a1a9230c1e0af5b37e3e571588ab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:13 +0100 Subject: KVM: x86: Make indirect calls in emulator speculation safe Replace the indirect calls with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.595615683@infradead.org --- arch/x86/kvm/emulate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d90cdc77e077..453d8c990108 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "x86.h" #include "tss.h" @@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("push %[flags]; popf; call *%[fastop]" - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + asm("push %[flags]; popf; " CALL_NOSPEC + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); return rc; } @@ -5305,9 +5306,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) if (!(ctxt->d & ByteOp)) fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), ASM_CALL_CONSTRAINT + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); -- cgit v1.2.3 From c940a3fb1e2e9b7d03228ab28f375fb5a47ff699 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jan 2018 10:58:14 +0100 Subject: KVM: VMX: Make indirect call speculation safe Replace indirect call with CALL_NOSPEC. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: David Woodhouse Cc: Andrea Arcangeli Cc: Andi Kleen Cc: Ashok Raj Cc: Greg KH Cc: Jun Nakajima Cc: David Woodhouse Cc: Linus Torvalds Cc: rga@amazon.de Cc: Dave Hansen Cc: Asit Mallick Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Jason Baron Cc: Paolo Bonzini Cc: Dan Williams Cc: Arjan Van De Ven Cc: Tim Chen Link: https://lkml.kernel.org/r/20180125095843.645776917@infradead.org --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d1e25dba3112..924589c53422 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9064,14 +9064,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #endif "pushf\n\t" __ASM_SIZE(push) " $%c[cs]\n\t" - "call *%[entry]\n\t" + CALL_NOSPEC : #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif ASM_CALL_CONSTRAINT : - [entry]"r"(entry), + THUNK_TARGET(entry), [ss]"i"(__KERNEL_DS), [cs]"i"(__KERNEL_CS) ); -- cgit v1.2.3 From 82d94856fa221b5173eefd56bcd1057c037e9b07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Jan 2018 13:10:30 +0100 Subject: perf/core: Fix lock inversion between perf,trace,cpuhp Lockdep gifted us with noticing the following 4-way lockup scenario: perf_trace_init() #0 mutex_lock(&event_mutex) perf_trace_event_init() perf_trace_event_reg() tp_event->class->reg() := tracepoint_probe_register #1 mutex_lock(&tracepoints_mutex) trace_point_add_func() #2 static_key_enable() #2 do_cpu_up() perf_event_init_cpu() #3 mutex_lock(&pmus_lock) #4 mutex_lock(&ctx->mutex) perf_event_task_disable() mutex_lock(¤t->perf_event_mutex) #4 ctx = perf_event_ctx_lock() #5 perf_event_for_each_child() do_exit() task_work_run() __fput() perf_release() perf_event_release_kernel() #4 mutex_lock(&ctx->mutex) #5 mutex_lock(&event->child_mutex) free_event() _free_event() event->destroy() := perf_trace_destroy #0 mutex_lock(&event_mutex); Fix that by moving the free_event() out from under the locks. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4df5b695bf0d..2d80824298a7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1231,6 +1231,10 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::lock * perf_event::mmap_mutex * mmap_sem + * + * cpu_hotplug_lock + * pmus_lock + * cpuctx->mutex / perf_event_context::mutex */ static struct perf_event_context * perf_event_ctx_lock_nested(struct perf_event *event, int nesting) @@ -4196,6 +4200,7 @@ int perf_event_release_kernel(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct perf_event *child, *tmp; + LIST_HEAD(free_list); /* * If we got here through err_file: fput(event_file); we will not have @@ -4268,8 +4273,7 @@ again: struct perf_event, child_list); if (tmp == child) { perf_remove_from_context(child, DETACH_GROUP); - list_del(&child->child_list); - free_event(child); + list_move(&child->child_list, &free_list); /* * This matches the refcount bump in inherit_event(); * this can't be the last reference. @@ -4284,6 +4288,11 @@ again: } mutex_unlock(&event->child_mutex); + list_for_each_entry_safe(child, tmp, &free_list, child_list) { + list_del(&child->child_list); + free_event(child); + } + no_ctx: put_event(event); /* Must be the 'last' reference */ return 0; -- cgit v1.2.3 From 43fa87f7deed52e8c8420182e0c133bc4cf395f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Jan 2018 17:07:59 +0100 Subject: perf/core: Fix another perf,trace,cpuhp lock inversion Lockdep noticed the following 3-way lockup race: perf_trace_init() #0 mutex_lock(&event_mutex) perf_trace_event_init() perf_trace_event_reg() tp_event->class->reg() := tracepoint_probe_register #1 mutex_lock(&tracepoints_mutex) trace_point_add_func() #2 static_key_enable() #2 do_cpu_up() perf_event_init_cpu() #3 mutex_lock(&pmus_lock) #4 mutex_lock(&ctx->mutex) perf_ioctl() #4 ctx = perf_event_ctx_lock() _perf_iotcl() ftrace_profile_set_filter() #0 mutex_lock(&event_mutex) Fudge it for now by noting that the tracepoint state does not depend on the event <-> context relation. Ugly though :/ Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/events/core.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2d80824298a7..816f83d70fc6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8525,6 +8525,29 @@ fail_clear_files: return ret; } +static int +perf_tracepoint_set_filter(struct perf_event *event, char *filter_str) +{ + struct perf_event_context *ctx = event->ctx; + int ret; + + /* + * Beware, here be dragons!! + * + * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint + * stuff does not actually need it. So temporarily drop ctx->mutex. As per + * perf_event_ctx_lock() we already have a reference on ctx. + * + * This can result in event getting moved to a different ctx, but that + * does not affect the tracepoint state. + */ + mutex_unlock(&ctx->mutex); + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); + mutex_lock(&ctx->mutex); + + return ret; +} + static int perf_event_set_filter(struct perf_event *event, void __user *arg) { char *filter_str; @@ -8541,8 +8564,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg) if (IS_ENABLED(CONFIG_EVENT_TRACING) && event->attr.type == PERF_TYPE_TRACEPOINT) - ret = ftrace_profile_set_filter(event, event->attr.config, - filter_str); + ret = perf_tracepoint_set_filter(event, filter_str); else if (has_addr_filter(event)) ret = perf_event_set_addr_filter(event, filter_str); -- cgit v1.2.3 From 0c7296cad651a3a40286d70ff37e73bd6fa4e4da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Jan 2018 21:23:02 +0100 Subject: perf/core: Fix ctx::mutex deadlock Lockdep noticed the following 3-way lockup scenario: sys_perf_event_open() perf_event_alloc() perf_try_init_event() #0 ctx = perf_event_ctx_lock_nested(1) perf_swevent_init() swevent_hlist_get() #1 mutex_lock(&pmus_lock) perf_event_init_cpu() #1 mutex_lock(&pmus_lock) #2 mutex_lock(&ctx->mutex) sys_perf_event_open() mutex_lock_double() #2 mutex_lock() #0 mutex_lock_nested() And while we need that perf_event_ctx_lock_nested() for HW PMUs such that they can iterate the sibling list, trying to match it to the available counters, the software PMUs need do no such thing. Exclude them. In particular the swevent triggers the above invertion, while the tpevent PMU triggers a more elaborate one through their event_mutex. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 816f83d70fc6..5d8f4031f8d5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9199,7 +9199,13 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) if (!try_module_get(pmu->module)) return -ENODEV; - if (event->group_leader != event) { + /* + * A number of pmu->event_init() methods iterate the sibling_list to, + * for example, validate if the group fits on the PMU. Therefore, + * if this is a sibling event, acquire the ctx->mutex to protect + * the sibling_list. + */ + if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) { /* * This ctx->mutex can nest when we're called through * inheritance. See the perf_event_ctx_lock_nested() comment. -- cgit v1.2.3 From efe951d3de9141626a494bcb1efb0650eaef6491 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jan 2018 19:23:08 +0100 Subject: perf/x86: Fix perf,x86,cpuhp deadlock More lockdep gifts, a 5-way lockup race: perf_event_create_kernel_counter() perf_event_alloc() perf_try_init_event() x86_pmu_event_init() __x86_pmu_event_init() x86_reserve_hardware() #0 mutex_lock(&pmc_reserve_mutex); reserve_ds_buffer() #1 get_online_cpus() perf_event_release_kernel() _free_event() hw_perf_event_destroy() x86_release_hardware() #0 mutex_lock(&pmc_reserve_mutex) release_ds_buffer() #1 get_online_cpus() #1 do_cpu_up() perf_event_init_cpu() #2 mutex_lock(&pmus_lock) #3 mutex_lock(&ctx->mutex) sys_perf_event_open() mutex_lock_double() #3 mutex_lock(ctx->mutex) #4 mutex_lock_nested(ctx->mutex, 1); perf_try_init_event() #4 mutex_lock_nested(ctx->mutex, 1) x86_pmu_event_init() intel_pmu_hw_config() x86_add_exclusive() #0 mutex_lock(&pmc_reserve_mutex) Fix it by using ordering constructs instead of locking. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8156e47da7ba..18c25ab28557 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -372,10 +372,9 @@ static int alloc_pebs_buffer(int cpu) static void release_pebs_buffer(int cpu) { struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); - struct debug_store *ds = hwev->ds; void *cea; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs) return; kfree(per_cpu(insn_buffer, cpu)); @@ -384,7 +383,6 @@ static void release_pebs_buffer(int cpu) /* Clear the fixmap */ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; ds_clear_cea(cea, x86_pmu.pebs_buffer_size); - ds->pebs_buffer_base = 0; dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); hwev->ds_pebs_vaddr = NULL; } @@ -419,16 +417,14 @@ static int alloc_bts_buffer(int cpu) static void release_bts_buffer(int cpu) { struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); - struct debug_store *ds = hwev->ds; void *cea; - if (!ds || !x86_pmu.bts) + if (!x86_pmu.bts) return; /* Clear the fixmap */ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; ds_clear_cea(cea, BTS_BUFFER_SIZE); - ds->bts_buffer_base = 0; dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); hwev->ds_bts_vaddr = NULL; } @@ -454,16 +450,22 @@ void release_ds_buffers(void) if (!x86_pmu.bts && !x86_pmu.pebs) return; - get_online_cpus(); - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + + for_each_possible_cpu(cpu) { + /* + * Again, ignore errors from offline CPUs, they will no longer + * observe cpu_hw_events.ds and not program the DS_AREA when + * they come up. + */ fini_debug_store_on_cpu(cpu); + } for_each_possible_cpu(cpu) { release_pebs_buffer(cpu); release_bts_buffer(cpu); - release_ds_buffer(cpu); } - put_online_cpus(); } void reserve_ds_buffers(void) @@ -483,8 +485,6 @@ void reserve_ds_buffers(void) if (!x86_pmu.pebs) pebs_err = 1; - get_online_cpus(); - for_each_possible_cpu(cpu) { if (alloc_ds_buffer(cpu)) { bts_err = 1; @@ -521,11 +521,14 @@ void reserve_ds_buffers(void) if (x86_pmu.pebs && !pebs_err) x86_pmu.pebs_active = 1; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) { + /* + * Ignores wrmsr_on_cpu() errors for offline CPUs they + * will get this call through intel_pmu_cpu_starting(). + */ init_debug_store_on_cpu(cpu); + } } - - put_online_cpus(); } /* -- cgit v1.2.3 From ee9aebb27cbdac677525e0e56e8844cf4bf46461 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Jan 2018 14:55:12 -0700 Subject: nvme-pci: Suspend queues after deleting them The driver had been abusing the cq_vector state to know if new submissions were safe, but that was before we could quiesce blk-mq. If the controller happens to get an interrupt through while we're suspending those queues, 'no irq handler' warnings may occur. This patch will disable the interrupts only after the queues are deleted. Reported-by: Jianchao Wang Tested-by: Jianchao Wang Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c46c239cc1ff..e2342d365d3c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1324,9 +1324,6 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; - if (nvme_suspend_queue(nvmeq)) - return; - if (shutdown) nvme_shutdown_ctrl(&dev->ctrl); else @@ -2011,9 +2008,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) +static void nvme_disable_io_queues(struct nvme_dev *dev) { - int pass; + int pass, queues = dev->online_queues - 1; unsigned long timeout; u8 opcode = nvme_admin_delete_sq; @@ -2164,7 +2161,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i, queues; + int i; bool dead = true; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -2199,21 +2196,13 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) } nvme_stop_queues(&dev->ctrl); - queues = dev->online_queues - 1; - for (i = dev->ctrl.queue_count - 1; i > 0; i--) - nvme_suspend_queue(&dev->queues[i]); - - if (dead) { - /* A device might become IO incapable very soon during - * probe, before the admin queue is configured. Thus, - * queue_count can be 0 here. - */ - if (dev->ctrl.queue_count) - nvme_suspend_queue(&dev->queues[0]); - } else { - nvme_disable_io_queues(dev, queues); + if (!dead) { + nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } + for (i = dev->ctrl.queue_count - 1; i >= 0; i--) + nvme_suspend_queue(&dev->queues[i]); + nvme_pci_disable(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); -- cgit v1.2.3 From 4ee806d51176ba7b8ff1efd81f271d7252e03a1d Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 18 Jan 2018 16:14:26 -0500 Subject: net: tcp: close sock if net namespace is exiting When a tcp socket is closed, if it detects that its net namespace is exiting, close immediately and do not wait for FIN sequence. For normal sockets, a reference is taken to their net namespace, so it will never exit while the socket is open. However, kernel sockets do not take a reference to their net namespace, so it may begin exiting while the kernel socket is still open. In this case if the kernel socket is a tcp socket, it will stay open trying to complete its close sequence. The sock's dst(s) hold a reference to their interface, which are all transferred to the namespace's loopback interface when the real interfaces are taken down. When the namespace tries to take down its loopback interface, it hangs waiting for all references to the loopback interface to release, which results in messages like: unregister_netdevice: waiting for lo to become free. Usage count = 1 These messages continue until the socket finally times out and closes. Since the net namespace cleanup holds the net_mutex while calling its registered pernet callbacks, any new net namespace initialization is blocked until the current net namespace finishes exiting. After this change, the tcp socket notices the exiting net namespace, and closes immediately, releasing its dst(s) and their reference to the loopback interface, which lets the net namespace continue exiting. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 Signed-off-by: Dan Streetman Signed-off-by: David S. Miller --- include/net/net_namespace.h | 10 ++++++++++ net/ipv4/tcp.c | 3 +++ net/ipv4/tcp_timer.c | 15 +++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 10f99dafd5ac..049008493faf 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } +static inline int check_net(const struct net *net) +{ + return atomic_read(&net->count) != 0; +} + void net_drop_ns(void *); #else @@ -247,6 +252,11 @@ int net_eq(const struct net *net1, const struct net *net2) return 1; } +static inline int check_net(const struct net *net) +{ + return 1; +} + #define net_drop_ns NULL #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f08eebe60446..8e053ad7cae2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2298,6 +2298,9 @@ adjudge_to_death: tcp_send_active_reset(sk, GFP_ATOMIC); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + } else if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_set_state(sk, TCP_CLOSE); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 968fda198376..388158c9d9f6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -48,11 +48,19 @@ static void tcp_write_err(struct sock *sk) * to prevent DoS attacks. It is called when a retransmission timeout * or zero probe timeout occurs on orphaned socket. * + * Also close if our net namespace is exiting; in that case there is no + * hope of ever communicating again since all netns interfaces are already + * down (or about to be down), and we need to release our dst references, + * which have been moved to the netns loopback interface, so the namespace + * can finish exiting. This condition is only possible if we are a kernel + * socket, as those do not hold references to the namespace. + * * Criteria is still not confirmed experimentally and may change. * We kill the socket, if: * 1. If number of orphaned sockets exceeds an administratively configured * limit. * 2. If we have strong memory pressure. + * 3. If our net namespace is exiting. */ static int tcp_out_of_resources(struct sock *sk, bool do_reset) { @@ -81,6 +89,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; } + + if (!check_net(sock_net(sk))) { + /* Not possible to send reset; just close */ + tcp_done(sk); + return 1; + } + return 0; } -- cgit v1.2.3 From 6e49412016f5f28ae36c3eaa5a36ec787b788951 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 25 Jan 2018 09:09:25 +0100 Subject: nvme: don't free uuid pointer before printing it Commit df351ef73789 ("nvme-fabrics: fix memory leak when parsing host ID option") fixed the leak of 'p' but in case uuid_parse() fails the memory is freed before the error print that is using it. Free it after printing eventual errors. Signed-off-by: Johannes Thumshirn Fixes: df351ef73789 ("nvme-fabrics: fix memory leak when parsing host ID option") Reported-by: Dan Carpenter Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index eb46967bb0d5..9cee72a80472 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -739,12 +739,13 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } ret = uuid_parse(p, &hostid); - kfree(p); if (ret) { pr_err("Invalid hostid %s\n", p); ret = -EINVAL; + kfree(p); goto out; } + kfree(p); break; case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; -- cgit v1.2.3 From 1dad3a67fbb03d88c68ca20a4f89296e50600710 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 6 Dec 2017 18:21:20 +0200 Subject: nvme-rdma: remove redundant boolean for inline_data Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 38e183461d9d..6c2fdfa4c86a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -66,7 +66,6 @@ struct nvme_rdma_request { struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; int nents; - bool inline_data; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; @@ -1086,7 +1085,6 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, sg->length = cpu_to_le32(sg_dma_len(req->sg_table.sgl)); sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; - req->inline_data = true; req->num_sge++; return 0; } @@ -1158,7 +1156,6 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, int count, ret; req->num_sge = 1; - req->inline_data = false; refcount_set(&req->ref, 2); /* send and recv completions */ c->common.flags |= NVME_CMD_SGL_METABUF; -- cgit v1.2.3 From f15ca723c1ebe6c1a06bc95fda6b62cd87b44559 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Jan 2018 19:03:03 +0100 Subject: net: don't call update_pmtu unconditionally Some dst_ops (e.g. md_dst_ops)) doesn't set this handler. It may result to: "BUG: unable to handle kernel NULL pointer dereference at (null)" Let's add a helper to check if update_pmtu is available before calling it. Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path") Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") CC: Roman Kapl CC: Xin Long Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 +-- drivers/net/geneve.c | 4 ++-- drivers/net/vxlan.c | 6 ++---- include/net/dst.h | 8 ++++++++ net/ipv4/ip_tunnel.c | 3 +-- net/ipv4/ip_vti.c | 2 +- net/ipv6/ip6_tunnel.c | 6 ++---- net/ipv6/ip6_vti.c | 2 +- net/ipv6/sit.c | 4 ++-- 9 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 2c13123bfd69..71ea9e26666c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1456,8 +1456,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, struct ipoib_dev_priv *priv = ipoib_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue); - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0a48b3073d3d..64fda2e1040e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -829,7 +829,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - GENEVE_BASE_HLEN - info->options_len - 14; - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); } sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -875,7 +875,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - GENEVE_BASE_HLEN - info->options_len - 14; - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); } sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 31f4b7911ef8..c3e34e3c82a7 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2158,8 +2158,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (skb_dst(skb)) { int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, - skb, mtu); + skb_dst_update_pmtu(skb, mtu); } tos = ip_tunnel_ecn_encap(tos, old_iph, skb); @@ -2200,8 +2199,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (skb_dst(skb)) { int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, - skb, mtu); + skb_dst_update_pmtu(skb, mtu); } tos = ip_tunnel_ecn_encap(tos, old_iph, skb); diff --git a/include/net/dst.h b/include/net/dst.h index b091fd536098..d49d607dd2b3 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -521,4 +521,12 @@ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) } #endif +static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu); +} + #endif /* _NET_DST_H */ diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5ddb1cb52bd4..6d21068f9b55 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -520,8 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 949f432a5f04..51b1669334fe 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -200,7 +200,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9a7cf355bc8c..1ee5584c3555 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -642,8 +642,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, - rel_info); + skb_dst_update_pmtu(skb2, rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -1131,8 +1130,7 @@ route_lookup: mtu = 576; } - if (skb_dst(skb) && !t->parms.collect_md) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index dbb74f3c57a7..8c184f84f353 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (!skb->ignore_df && skb->len > mtu) { - skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + skb_dst_update_pmtu(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d7dc23c1b2ca..3873d3877135 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -934,8 +934,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, df = 0; } - if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (tunnel->parms.iph.daddr) + skb_dst_update_pmtu(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -- cgit v1.2.3 From 0fd189a95fdbc631737df5f27a0fc0a3dd31b75e Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 25 Jan 2018 18:29:53 -0500 Subject: drm/nouveau: Move irq setup/teardown to pci ctor/dtor For a while we've been having issues with seemingly random interrupts coming from nvidia cards when resuming them. Originally the fix for this was thought to be just re-arming the MSI interrupt registers right after re-allocating our IRQs, however it seems a lot of what we do is both wrong and not even nessecary. This was made apparent by what appeared to be a regression in the mainline kernel that started introducing suspend/resume issues for nouveau: a0c9259dc4e1 (irq/matrix: Spread interrupts on allocation) After this commit was introduced, we started getting interrupts from the GPU before we actually re-allocated our own IRQ (see references below) and assigned the IRQ handler. Investigating this turned out that the problem was not with the commit, but the fact that nouveau even free/allocates it's irqs before and after suspend/resume. For starters: drivers in the linux kernel haven't had to handle freeing/re-allocating their IRQs during suspend/resume cycles for quite a while now. Nouveau seems to be one of the few drivers left that still does this, despite the fact there's no reason we actually need to since disabling interrupts from the device side should be enough, as the kernel is already smart enough to know to disable host-side interrupts for us before going into suspend. Since we were tearing down our IRQs by hand however, that means there was a short period during resume where interrupts could be received before we re-allocated our IRQ which would lead to us getting an unhandled IRQ. Since we never handle said IRQ and re-arm the interrupt registers, this would cause us to miss all of the interrupts from the GPU and cause our init process to start timing out on anything requiring interrupts. So, since this whole setup/teardown every suspend/resume cycle is useless anyway, move irq setup/teardown into the pci subdev's ctor/dtor functions instead so they're only called at driver load and driver unload. This should fix most of the issues with pending interrupts on resume, along with getting suspend/resume for nouveau to work again. As well, this probably means we can also just remove the msi rearm call inside nvkm_pci_init(). But since our main focus here is to fix suspend/resume before 4.15, we'll save that for a later patch. Signed-off-by: Lyude Paul Cc: Karol Herbst Cc: Thomas Gleixner Cc: Mike Galbraith Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c | 46 +++++++++++++++++--------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c index deb96de54b00..ee2431a7804e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c @@ -71,6 +71,10 @@ nvkm_pci_intr(int irq, void *arg) struct nvkm_pci *pci = arg; struct nvkm_device *device = pci->subdev.device; bool handled = false; + + if (pci->irq < 0) + return IRQ_HANDLED; + nvkm_mc_intr_unarm(device); if (pci->msi) pci->func->msi_rearm(pci); @@ -84,11 +88,6 @@ nvkm_pci_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_pci *pci = nvkm_pci(subdev); - if (pci->irq >= 0) { - free_irq(pci->irq, pci); - pci->irq = -1; - } - if (pci->agp.bridge) nvkm_agp_fini(pci); @@ -108,8 +107,20 @@ static int nvkm_pci_oneinit(struct nvkm_subdev *subdev) { struct nvkm_pci *pci = nvkm_pci(subdev); - if (pci_is_pcie(pci->pdev)) - return nvkm_pcie_oneinit(pci); + struct pci_dev *pdev = pci->pdev; + int ret; + + if (pci_is_pcie(pci->pdev)) { + ret = nvkm_pcie_oneinit(pci); + if (ret) + return ret; + } + + ret = request_irq(pdev->irq, nvkm_pci_intr, IRQF_SHARED, "nvkm", pci); + if (ret) + return ret; + + pci->irq = pdev->irq; return 0; } @@ -117,7 +128,6 @@ static int nvkm_pci_init(struct nvkm_subdev *subdev) { struct nvkm_pci *pci = nvkm_pci(subdev); - struct pci_dev *pdev = pci->pdev; int ret; if (pci->agp.bridge) { @@ -131,28 +141,34 @@ nvkm_pci_init(struct nvkm_subdev *subdev) if (pci->func->init) pci->func->init(pci); - ret = request_irq(pdev->irq, nvkm_pci_intr, IRQF_SHARED, "nvkm", pci); - if (ret) - return ret; - - pci->irq = pdev->irq; - /* Ensure MSI interrupts are armed, for the case where there are * already interrupts pending (for whatever reason) at load time. */ if (pci->msi) pci->func->msi_rearm(pci); - return ret; + return 0; } static void * nvkm_pci_dtor(struct nvkm_subdev *subdev) { struct nvkm_pci *pci = nvkm_pci(subdev); + nvkm_agp_dtor(pci); + + if (pci->irq >= 0) { + /* freq_irq() will call the handler, we use pci->irq == -1 + * to signal that it's been torn down and should be a noop. + */ + int irq = pci->irq; + pci->irq = -1; + free_irq(irq, pci); + } + if (pci->msi) pci_disable_msi(pci->pdev); + return nvkm_pci(subdev); } -- cgit v1.2.3 From 6793f1c450b1533a5e9c2493490de771d38b24f9 Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 25 Jan 2018 19:39:44 -0500 Subject: orangefs: fix deadlock; do not write i_size in read_iter After do_readv_writev, the inode cache is invalidated anyway, so i_size will never be read. It will be fetched from the server which will also know about updates from other machines. Fixes deadlock on 32-bit SMP. See https://marc.info/?l=linux-fsdevel&m=151268557427760&w=2 Signed-off-by: Martin Brandenburg Cc: Al Viro Cc: Mike Marshall Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/orangefs/file.c | 7 ++----- fs/orangefs/orangefs-kernel.h | 11 ----------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 1668fd645c45..0d228cd087e6 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -452,7 +452,7 @@ ssize_t orangefs_inode_read(struct inode *inode, static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; - loff_t pos = *(&iocb->ki_pos); + loff_t pos = iocb->ki_pos; ssize_t rc = 0; BUG_ON(iocb->private); @@ -492,9 +492,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite } } - if (file->f_pos > i_size_read(file->f_mapping->host)) - orangefs_i_size_write(file->f_mapping->host, file->f_pos); - rc = generic_write_checks(iocb, iter); if (rc <= 0) { @@ -508,7 +505,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite * pos to the end of the file, so we will wait till now to set * pos... */ - pos = *(&iocb->ki_pos); + pos = iocb->ki_pos; rc = do_readv_writev(ORANGEFS_IO_WRITE, file, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 97adf7d100b5..2595453fe737 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -533,17 +533,6 @@ do { \ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ } while (0) -static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) -{ -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_lock(inode); -#endif - i_size_write(inode, i_size); -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - inode_unlock(inode); -#endif -} - static inline void orangefs_set_timeout(struct dentry *dentry) { unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; -- cgit v1.2.3 From a78e93661c5fd30b9e1dee464b2f62f966883ef7 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Fri, 26 Jan 2018 01:53:26 +0100 Subject: r8169: fix memory corruption on retrieval of hardware statistics. Hardware statistics retrieval hurts in tight invocation loops. Avoid extraneous write and enforce strict ordering of writes targeted to the tally counters dump area address registers. Signed-off-by: Francois Romieu Tested-by: Oliver Freyermuth Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index fc0d5fa65ad4..734286ebe5ef 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2244,19 +2244,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd) void __iomem *ioaddr = tp->mmio_addr; dma_addr_t paddr = tp->counters_phys_addr; u32 cmd; - bool ret; RTL_W32(CounterAddrHigh, (u64)paddr >> 32); + RTL_R32(CounterAddrHigh); cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(CounterAddrLow, cmd); RTL_W32(CounterAddrLow, cmd | counter_cmd); - ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); - - RTL_W32(CounterAddrLow, 0); - RTL_W32(CounterAddrHigh, 0); - - return ret; + return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); } static bool rtl8169_reset_counters(struct net_device *dev) -- cgit v1.2.3 From 1e19c4d689dc1e95bafd23ef68fbc0c6b9e05180 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Jan 2018 19:37:37 -0800 Subject: net: vrf: Add support for sends to local broadcast address Sukumar reported that sends to the local broadcast address (255.255.255.255) are broken. Check for the address in vrf driver and do not redirect to the VRF device - similar to multicast packets. With this change sockets can use SO_BINDTODEVICE to specify an egress interface and receive responses. Note: the egress interface can not be a VRF device but needs to be the enslaved device. https://bugzilla.kernel.org/show_bug.cgi?id=198521 Reported-by: Sukumar Gopalakrishnan Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index feb1b2e15c2e..139c61c8244a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -673,8 +673,9 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { - /* don't divert multicast */ - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) + /* don't divert multicast or local broadcast */ + if (ipv4_is_multicast(ip_hdr(skb)->daddr) || + ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; if (qdisc_tx_is_default(vrf_dev)) -- cgit v1.2.3 From ad70062cdb4002c74db4fbed4e2b34daffccacc2 Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Mon, 22 Jan 2018 22:03:16 +0800 Subject: nvme-pci: introduce RECONNECTING state to mark initializing procedure After Sagi's commit (nvme-rdma: fix concurrent reset and reconnect), both nvme-fc/rdma have following pattern: RESETTING - quiesce blk-mq queues, teardown and delete queues/ connections, clear out outstanding IO requests... RECONNECTING - establish new queues/connections and some other initializing things. Introduce RECONNECTING to nvme-pci transport to do the same mark. Then we get a coherent state definition among nvme pci/rdma/fc transports. Suggested-by: James Smart Reviewed-by: James Smart Reviewed-by: Reviewed-by: Keith Busch Signed-off-by: Jianchao Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/pci.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fde6fd2e7eef..63c2c469112d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -260,7 +260,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (new_state) { case NVME_CTRL_ADMIN_ONLY: switch (old_state) { - case NVME_CTRL_RESETTING: + case NVME_CTRL_RECONNECTING: changed = true; /* FALLTHRU */ default: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e2342d365d3c..0bc6a9e48c8e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1140,9 +1140,14 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) */ bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); - /* If there is a reset ongoing, we shouldn't reset again. */ - if (dev->ctrl.state == NVME_CTRL_RESETTING) + /* If there is a reset/reinit ongoing, we shouldn't reset again. */ + switch (dev->ctrl.state) { + case NVME_CTRL_RESETTING: + case NVME_CTRL_RECONNECTING: return false; + default: + break; + } /* We shouldn't reset unless the controller is on fatal error state * _or_ if we lost the communication with it. @@ -2284,6 +2289,16 @@ static void nvme_reset_work(struct work_struct *work) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); + /* + * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the + * initializing procedure here. + */ + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RECONNECTING)) { + dev_warn(dev->ctrl.device, + "failed to mark controller RECONNECTING\n"); + goto out; + } + result = nvme_pci_enable(dev); if (result) goto out; -- cgit v1.2.3 From 3d030e41d96f46c14faf79f19c3cf1b9961815c8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 26 Jan 2018 11:21:37 +0100 Subject: nvme: add tracepoint for nvme_setup_cmd Add tracepoints for nvme_setup_cmd() for tracing admin and/or nvm commands. Examples of the two tracepoints are as follows for trace_nvme_setup_admin_cmd(): kworker/u8:0-5 [003] .... 2.998792: nvme_setup_admin_cmd: cmdid=14, flags=0x0, meta=0x0, cmd=(nvme_admin_create_cq cqid=1, qsize=1023, cq_flags=0x3, irq_vector=0) and trace_nvme_setup_nvm_cmd(): dd-205 [001] .... 3.503929: nvme_setup_nvm_cmd: qid=1, nsid=1, cmdid=989, flags=0x0, meta=0x0, cmd=(nvme_cmd_read slba=4096, len=2047, ctrl=0x0, dsmgmt=0, reftag=0) Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/Makefile | 4 ++ drivers/nvme/host/core.c | 7 +++ drivers/nvme/host/trace.c | 130 +++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/trace.h | 140 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 281 insertions(+) create mode 100644 drivers/nvme/host/trace.c create mode 100644 drivers/nvme/host/trace.h diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index a25fd43650ad..441e67e3a9d7 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 + +ccflags-y += -I$(src) + obj-$(CONFIG_NVME_CORE) += nvme-core.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o @@ -6,6 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o nvme-core-y := core.o +nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 63c2c469112d..f1430ca79a5b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -29,6 +29,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #include "nvme.h" #include "fabrics.h" @@ -628,6 +631,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, } cmd->common.command_id = req->tag; + if (ns) + trace_nvme_setup_nvm_cmd(req->q->id, cmd); + else + trace_nvme_setup_admin_cmd(cmd); return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c new file mode 100644 index 000000000000..41944bbef835 --- /dev/null +++ b/drivers/nvme/host/trace.c @@ -0,0 +1,130 @@ +/* + * NVM Express device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include "trace.h" + +static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 sqid = get_unaligned_le16(cdw10); + u16 qsize = get_unaligned_le16(cdw10 + 2); + u16 sq_flags = get_unaligned_le16(cdw10 + 4); + u16 cqid = get_unaligned_le16(cdw10 + 6); + + + trace_seq_printf(p, "sqid=%u, qsize=%u, sq_flags=0x%x, cqid=%u", + sqid, qsize, sq_flags, cqid); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u16 cqid = get_unaligned_le16(cdw10); + u16 qsize = get_unaligned_le16(cdw10 + 2); + u16 cq_flags = get_unaligned_le16(cdw10 + 4); + u16 irq_vector = get_unaligned_le16(cdw10 + 6); + + trace_seq_printf(p, "cqid=%u, qsize=%u, cq_flags=0x%x, irq_vector=%u", + cqid, qsize, cq_flags, irq_vector); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_admin_identify(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u8 cns = cdw10[0]; + u16 ctrlid = get_unaligned_le16(cdw10 + 2); + + trace_seq_printf(p, "cns=%u, ctrlid=%u", cns, ctrlid); + trace_seq_putc(p, 0); + + return ret; +} + + + +static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + u64 slba = get_unaligned_le64(cdw10); + u16 length = get_unaligned_le16(cdw10 + 8); + u16 control = get_unaligned_le16(cdw10 + 10); + u32 dsmgmt = get_unaligned_le32(cdw10 + 12); + u32 reftag = get_unaligned_le32(cdw10 + 16); + + trace_seq_printf(p, + "slba=%llu, len=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u", + slba, length, control, dsmgmt, reftag); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "nr=%u, attributes=%u", + get_unaligned_le32(cdw10), + get_unaligned_le32(cdw10 + 4)); + trace_seq_putc(p, 0); + + return ret; +} + +static const char *nvme_trace_common(struct trace_seq *p, u8 *cdw10) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, "cdw10=%*ph", 24, cdw10); + trace_seq_putc(p, 0); + + return ret; +} + +const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_admin_create_sq: + return nvme_trace_create_sq(p, cdw10); + case nvme_admin_create_cq: + return nvme_trace_create_cq(p, cdw10); + case nvme_admin_identify: + return nvme_trace_admin_identify(p, cdw10); + default: + return nvme_trace_common(p, cdw10); + } +} + +const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, + u8 opcode, u8 *cdw10) +{ + switch (opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + case nvme_cmd_write_zeroes: + return nvme_trace_read_write(p, cdw10); + case nvme_cmd_dsm: + return nvme_trace_dsm(p, cdw10); + default: + return nvme_trace_common(p, cdw10); + } +} diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h new file mode 100644 index 000000000000..feadf0b57d17 --- /dev/null +++ b/drivers/nvme/host/trace.h @@ -0,0 +1,140 @@ +/* + * NVM Express device driver tracepoints + * Copyright (c) 2018 Johannes Thumshirn, SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nvme + +#if !defined(_TRACE_NVME_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NVME_H + +#include +#include +#include + +#include "nvme.h" + +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +const char *nvme_trace_parse_admin_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +#define __parse_nvme_admin_cmd(opcode, cdw10) \ + nvme_trace_parse_admin_cmd(p, opcode, cdw10) + +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + +const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p, u8 opcode, + u8 *cdw10); +#define __parse_nvme_cmd(opcode, cdw10) \ + nvme_trace_parse_nvm_cmd(p, opcode, cdw10) + +TRACE_EVENT(nvme_setup_admin_cmd, + TP_PROTO(struct nvme_command *cmd), + TP_ARGS(cmd), + TP_STRUCT__entry( + __field(u8, opcode) + __field(u8, flags) + __field(u16, cid) + __field(u64, metadata) + __array(u8, cdw10, 24) + ), + TP_fast_assign( + __entry->opcode = cmd->common.opcode; + __entry->flags = cmd->common.flags; + __entry->cid = cmd->common.command_id; + __entry->metadata = le64_to_cpu(cmd->common.metadata); + memcpy(__entry->cdw10, cmd->common.cdw10, + sizeof(__entry->cdw10)); + ), + TP_printk(" cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + __entry->cid, __entry->flags, __entry->metadata, + show_admin_opcode_name(__entry->opcode), + __parse_nvme_admin_cmd(__entry->opcode, __entry->cdw10)) +); + + +TRACE_EVENT(nvme_setup_nvm_cmd, + TP_PROTO(int qid, struct nvme_command *cmd), + TP_ARGS(qid, cmd), + TP_STRUCT__entry( + __field(int, qid) + __field(u8, opcode) + __field(u8, flags) + __field(u16, cid) + __field(u32, nsid) + __field(u64, metadata) + __array(u8, cdw10, 24) + ), + TP_fast_assign( + __entry->qid = qid; + __entry->opcode = cmd->common.opcode; + __entry->flags = cmd->common.flags; + __entry->cid = cmd->common.command_id; + __entry->nsid = le32_to_cpu(cmd->common.nsid); + __entry->metadata = le64_to_cpu(cmd->common.metadata); + memcpy(__entry->cdw10, cmd->common.cdw10, + sizeof(__entry->cdw10)); + ), + TP_printk("qid=%d, nsid=%u, cmdid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + __entry->qid, __entry->nsid, __entry->cid, + __entry->flags, __entry->metadata, + show_opcode_name(__entry->opcode), + __parse_nvme_cmd(__entry->opcode, __entry->cdw10)) +); + +#endif /* _TRACE_NVME_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From ca5554a696dce37852f6d6721520b4f13fc295c3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 26 Jan 2018 11:21:38 +0100 Subject: nvme: add tracepoint for nvme_complete_rq Add a tracepoint in nvme_complete_rq() for completions of NVMe commands. An expmale output of the trace-point is as follows: -0 [001] d.h. 3.505266: nvme_complete_rq: cmdid=989, qid=1, res=0, retries=0, flags=0x0, status=0 Signed-off-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/trace.h | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f1430ca79a5b..b3af8e914570 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -220,6 +220,8 @@ void nvme_complete_rq(struct request *req) { blk_status_t status = nvme_error_status(req); + trace_nvme_complete_rq(req); + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if (nvme_req_needs_failover(req, status)) { nvme_failover_req(req); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index feadf0b57d17..ea91fccd1bc0 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -129,6 +129,31 @@ TRACE_EVENT(nvme_setup_nvm_cmd, __parse_nvme_cmd(__entry->opcode, __entry->cdw10)) ); +TRACE_EVENT(nvme_complete_rq, + TP_PROTO(struct request *req), + TP_ARGS(req), + TP_STRUCT__entry( + __field(int, qid) + __field(int, cid) + __field(u64, result) + __field(u8, retries) + __field(u8, flags) + __field(u16, status) + ), + TP_fast_assign( + __entry->qid = req->q->id; + __entry->cid = req->tag; + __entry->result = le64_to_cpu(nvme_req(req)->result.u64); + __entry->retries = nvme_req(req)->retries; + __entry->flags = nvme_req(req)->flags; + __entry->status = nvme_req(req)->status; + ), + TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u", + __entry->cid, __entry->qid, __entry->result, + __entry->retries, __entry->flags, __entry->status) + +); + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 479c03a7173d07b2967f9b521159271543d99e2f Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Thu, 25 Jan 2018 21:16:17 +0100 Subject: spi: orion: Fix a resource leak if the optional "axi" clk is deferred If the optional "axi" clk is deferred, we still need to undo some initialisation. Especially 'master' must be released. It will be reallocated the next time 'orion_spi_probe()' is called. Add a new label to clean what needs to be cleaned and rename another label to improve the names used. Fixes: 92ae112e477a ("spi: orion: Fix clock resource by adding an optional bus clock") Signed-off-by: Christophe JAILLET Acked-by: Gregory CLEMENT Signed-off-by: Mark Brown --- drivers/spi/spi-orion.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 482a0cf3b7aa..deca63e82ff6 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -638,8 +638,10 @@ static int orion_spi_probe(struct platform_device *pdev) /* The following clock is only used by some SoCs */ spi->axi_clk = devm_clk_get(&pdev->dev, "axi"); if (IS_ERR(spi->axi_clk) && - PTR_ERR(spi->axi_clk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + PTR_ERR(spi->axi_clk) == -EPROBE_DEFER) { + status = -EPROBE_DEFER; + goto out_rel_clk; + } if (!IS_ERR(spi->axi_clk)) clk_prepare_enable(spi->axi_clk); @@ -667,7 +669,7 @@ static int orion_spi_probe(struct platform_device *pdev) spi->base = devm_ioremap_resource(&pdev->dev, r); if (IS_ERR(spi->base)) { status = PTR_ERR(spi->base); - goto out_rel_clk; + goto out_rel_axi_clk; } /* Scan all SPI devices of this controller for direct mapped devices */ @@ -705,7 +707,7 @@ static int orion_spi_probe(struct platform_device *pdev) PAGE_SIZE); if (!spi->direct_access[cs].vaddr) { status = -ENOMEM; - goto out_rel_clk; + goto out_rel_axi_clk; } spi->direct_access[cs].size = PAGE_SIZE; @@ -733,8 +735,9 @@ static int orion_spi_probe(struct platform_device *pdev) out_rel_pm: pm_runtime_disable(&pdev->dev); -out_rel_clk: +out_rel_axi_clk: clk_disable_unprepare(spi->axi_clk); +out_rel_clk: clk_disable_unprepare(spi->clk); out: spi_master_put(master); -- cgit v1.2.3 From caf7501a1b4ec964190f31f9c3f163de252273b8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 25 Jan 2018 15:50:28 -0800 Subject: module/retpoline: Warn about missing retpoline in module There's a risk that a kernel which has full retpoline mitigations becomes vulnerable when a module gets loaded that hasn't been compiled with the right compiler or the right option. To enable detection of that mismatch at module load time, add a module info string "retpoline" at build time when the module was compiled with retpoline support. This only covers compiled C source, but assembler source or prebuilt object files are not checked. If a retpoline enabled kernel detects a non retpoline protected module at load time, print a warning and report it in the sysfs vulnerability file. [ tglx: Massaged changelog ] Signed-off-by: Andi Kleen Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: gregkh@linuxfoundation.org Cc: torvalds@linux-foundation.org Cc: jeyu@kernel.org Cc: arjan@linux.intel.com Link: https://lkml.kernel.org/r/20180125235028.31211-1-andi@firstfloor.org --- arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- include/linux/module.h | 9 +++++++++ kernel/module.c | 11 +++++++++++ scripts/mod/modpost.c | 9 +++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 390b3dc3d438..4a39d7bb4bd8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -93,6 +94,19 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 mitigation: " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static bool spectre_v2_bad_module; + +#ifdef RETPOLINE +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vunerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} +#endif static void __init spec2_print_if_insecure(const char *reason) { @@ -278,6 +292,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif diff --git a/include/linux/module.h b/include/linux/module.h index fe5aa3736707..b1cc541f2ddf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef RETPOLINE +extern bool retpoline_module_ok(bool has_retpoline); +#else +static inline bool retpoline_module_ok(bool has_retpoline) +{ + return true; +} +#endif + #ifdef CONFIG_MODULE_SIG static inline bool module_sig_ok(struct module *module) { diff --git a/kernel/module.c b/kernel/module.c index de66ec825992..690c0651c40f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) } #endif /* CONFIG_LIVEPATCH */ +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) +{ + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) + return; + + pr_warn("%s: loading module not compiled with retpoline compiler.\n", + mod->name); +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); } + check_modinfo_retpoline(mod, info); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); pr_warn("%s: module is from the staging directory, the quality " diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 98314b400a95..54deaa1066cf 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); } +/* Cannot check for assembler */ +static void add_retpoline(struct buffer *b) +{ + buf_printf(b, "\n#ifdef RETPOLINE\n"); + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); + buf_printf(b, "#endif\n"); +} + static void add_staging_flag(struct buffer *b, const char *name) { static const char *staging_dir = "drivers/staging"; @@ -2506,6 +2514,7 @@ int main(int argc, char **argv) err |= check_modname_len(mod); add_header(&buf, mod); add_intree_flag(&buf, !external_module); + add_retpoline(&buf); add_staging_flag(&buf, mod->name); err |= add_versions(&buf, mod); add_depends(&buf, mod, modules); -- cgit v1.2.3 From 057c76440c791eb31eb68f3b003f67d00fcce51a Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 26 Jan 2018 21:08:43 +0800 Subject: regulator: added support for suspend states Some systems need to set regulators to specific states when they enter low power modes, especially around CPUs. There are many of these modes depending on the particular runtime state. Currently the regulator consumers are not granted permission to change suspend state of regulator devices, the constraints are configured at startup. In order to allow changes in a vlotage range, we need to add new properties for voltage range and a flag to give permission to change the suspend voltage and suspend on/off in suspend mode. Signed-off-by: Chunyan Zhang Reviewed-by: Rob Herring Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/regulator/regulator.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt index 378f6dc8b8bd..e459226dfac9 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.txt +++ b/Documentation/devicetree/bindings/regulator/regulator.txt @@ -42,8 +42,16 @@ Optional properties: - regulator-state-[mem/disk] node has following common properties: - regulator-on-in-suspend: regulator should be on in suspend state. - regulator-off-in-suspend: regulator should be off in suspend state. - - regulator-suspend-microvolt: regulator should be set to this voltage - in suspend. + - regulator-suspend-min-microvolt: minimum voltage may be set in + suspend state. + - regulator-suspend-max-microvolt: maximum voltage may be set in + suspend state. + - regulator-suspend-microvolt: the default voltage which regulator + would be set in suspend. This property is now deprecated, instead + setting voltage for suspend mode via the API which regulator + driver provides is recommended. + - regulator-changeable-in-suspend: whether the default voltage and + the regulator on/off in suspend can be changed in runtime. - regulator-mode: operating mode in the given suspend state. The set of possible operating modes depends on the capabilities of every hardware so the valid modes are documented on each regulator -- cgit v1.2.3 From c360a6df02cdba47c0590ffc7d15ec6687183e8c Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 26 Jan 2018 21:08:44 +0800 Subject: regulator: make regulator voltage be an array to support more states Some regulator consumers would like to make the regulator device keeping a voltage range output when the system entering into suspend states. Making regulator voltage be an array can allow consumers to set voltage for normal state as well as for suspend states through the same code. Signed-off-by: Chunyan Zhang Signed-off-by: Mark Brown --- drivers/regulator/core.c | 63 ++++++++++++++++++++++++-------------------- drivers/regulator/internal.h | 18 +++++++++++-- 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b64b7916507f..97bc9f7adf2f 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -240,22 +240,25 @@ static int regulator_check_voltage(struct regulator_dev *rdev, * regulator consumers */ static int regulator_check_consumers(struct regulator_dev *rdev, - int *min_uV, int *max_uV) + int *min_uV, int *max_uV, + suspend_state_t state) { struct regulator *regulator; + struct regulator_voltage *voltage; list_for_each_entry(regulator, &rdev->consumer_list, list) { + voltage = ®ulator->voltage[state]; /* * Assume consumers that didn't say anything are OK * with anything in the constraint range. */ - if (!regulator->min_uV && !regulator->max_uV) + if (!voltage->min_uV && !voltage->max_uV) continue; - if (*max_uV > regulator->max_uV) - *max_uV = regulator->max_uV; - if (*min_uV < regulator->min_uV) - *min_uV = regulator->min_uV; + if (*max_uV > voltage->max_uV) + *max_uV = voltage->max_uV; + if (*min_uV < voltage->min_uV) + *min_uV = voltage->min_uV; } if (*min_uV > *max_uV) { @@ -1356,9 +1359,9 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, debugfs_create_u32("uA_load", 0444, regulator->debugfs, ®ulator->uA_load); debugfs_create_u32("min_uV", 0444, regulator->debugfs, - ®ulator->min_uV); + ®ulator->voltage[PM_SUSPEND_ON].min_uV); debugfs_create_u32("max_uV", 0444, regulator->debugfs, - ®ulator->max_uV); + ®ulator->voltage[PM_SUSPEND_ON].max_uV); debugfs_create_file("constraint_flags", 0444, regulator->debugfs, regulator, &constraint_flags_fops); @@ -2898,9 +2901,11 @@ out: } static int regulator_set_voltage_unlocked(struct regulator *regulator, - int min_uV, int max_uV) + int min_uV, int max_uV, + suspend_state_t state) { struct regulator_dev *rdev = regulator->rdev; + struct regulator_voltage *voltage = ®ulator->voltage[state]; int ret = 0; int old_min_uV, old_max_uV; int current_uV; @@ -2911,7 +2916,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, * should be a noop (some cpufreq implementations use the same * voltage for multiple frequencies, for example). */ - if (regulator->min_uV == min_uV && regulator->max_uV == max_uV) + if (voltage->min_uV == min_uV && voltage->max_uV == max_uV) goto out; /* If we're trying to set a range that overlaps the current voltage, @@ -2921,8 +2926,8 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) { current_uV = _regulator_get_voltage(rdev); if (min_uV <= current_uV && current_uV <= max_uV) { - regulator->min_uV = min_uV; - regulator->max_uV = max_uV; + voltage->min_uV = min_uV; + voltage->max_uV = max_uV; goto out; } } @@ -2940,12 +2945,12 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, goto out; /* restore original values in case of error */ - old_min_uV = regulator->min_uV; - old_max_uV = regulator->max_uV; - regulator->min_uV = min_uV; - regulator->max_uV = max_uV; + old_min_uV = voltage->min_uV; + old_max_uV = voltage->max_uV; + voltage->min_uV = min_uV; + voltage->max_uV = max_uV; - ret = regulator_check_consumers(rdev, &min_uV, &max_uV); + ret = regulator_check_consumers(rdev, &min_uV, &max_uV, state); if (ret < 0) goto out2; @@ -2982,7 +2987,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, if (supply_change_uV > 0) { ret = regulator_set_voltage_unlocked(rdev->supply, - best_supply_uV, INT_MAX); + best_supply_uV, INT_MAX, state); if (ret) { dev_err(&rdev->dev, "Failed to increase supply voltage: %d\n", ret); @@ -2996,7 +3001,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, if (supply_change_uV < 0) { ret = regulator_set_voltage_unlocked(rdev->supply, - best_supply_uV, INT_MAX); + best_supply_uV, INT_MAX, state); if (ret) dev_warn(&rdev->dev, "Failed to decrease supply voltage: %d\n", ret); @@ -3007,8 +3012,8 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, out: return ret; out2: - regulator->min_uV = old_min_uV; - regulator->max_uV = old_max_uV; + voltage->min_uV = old_min_uV; + voltage->max_uV = old_max_uV; return ret; } @@ -3037,7 +3042,8 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) regulator_lock_supply(regulator->rdev); - ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV); + ret = regulator_set_voltage_unlocked(regulator, min_uV, max_uV, + PM_SUSPEND_ON); regulator_unlock_supply(regulator->rdev); @@ -3138,6 +3144,7 @@ EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel); int regulator_sync_voltage(struct regulator *regulator) { struct regulator_dev *rdev = regulator->rdev; + struct regulator_voltage *voltage = ®ulator->voltage[PM_SUSPEND_ON]; int ret, min_uV, max_uV; mutex_lock(&rdev->mutex); @@ -3149,20 +3156,20 @@ int regulator_sync_voltage(struct regulator *regulator) } /* This is only going to work if we've had a voltage configured. */ - if (!regulator->min_uV && !regulator->max_uV) { + if (!voltage->min_uV && !voltage->max_uV) { ret = -EINVAL; goto out; } - min_uV = regulator->min_uV; - max_uV = regulator->max_uV; + min_uV = voltage->min_uV; + max_uV = voltage->max_uV; /* This should be a paranoia check... */ ret = regulator_check_voltage(rdev, &min_uV, &max_uV); if (ret < 0) goto out; - ret = regulator_check_consumers(rdev, &min_uV, &max_uV); + ret = regulator_check_consumers(rdev, &min_uV, &max_uV, 0); if (ret < 0) goto out; @@ -4424,8 +4431,8 @@ static void regulator_summary_show_subtree(struct seq_file *s, switch (rdev->desc->type) { case REGULATOR_VOLTAGE: seq_printf(s, "%37dmV %5dmV", - consumer->min_uV / 1000, - consumer->max_uV / 1000); + consumer->voltage[PM_SUSPEND_ON].min_uV / 1000, + consumer->voltage[PM_SUSPEND_ON].max_uV / 1000); break; case REGULATOR_CURRENT: break; diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 66a8ea0c8386..aba8e4149838 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -16,10 +16,25 @@ #ifndef __REGULATOR_INTERNAL_H #define __REGULATOR_INTERNAL_H +#include + +#define REGULATOR_STATES_NUM (PM_SUSPEND_MAX + 1) + +struct regulator_voltage { + int min_uV; + int max_uV; +}; + /* * struct regulator * * One for each consumer device. + * @voltage - a voltage array for each state of runtime, i.e.: + * PM_SUSPEND_ON + * PM_SUSPEND_TO_IDLE + * PM_SUSPEND_STANDBY + * PM_SUSPEND_MEM + * PM_SUSPEND_MAX */ struct regulator { struct device *dev; @@ -27,8 +42,7 @@ struct regulator { unsigned int always_on:1; unsigned int bypass:1; int uA_load; - int min_uV; - int max_uV; + struct regulator_voltage voltage[REGULATOR_STATES_NUM]; const char *supply_name; struct device_attribute dev_attr; struct regulator_dev *rdev; -- cgit v1.2.3 From 72069f9957a11896e79e95c8b55ec815e97c2187 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 26 Jan 2018 21:08:45 +0800 Subject: regulator: leave one item to record whether regulator is enabled The items "disabled" and "enabled" are a little redundant, since only one of them would be set to record if the regulator device should keep on or be switched to off in suspend states. So in this patch, the "disabled" was removed, only leave the "enabled": - enabled == 1 for regulator-on-in-suspend - enabled == 0 for regulator-off-in-suspend - enabled == -1 means do nothing when entering suspend mode. Signed-off-by: Chunyan Zhang Signed-off-by: Mark Brown --- drivers/regulator/core.c | 14 ++++++-------- drivers/regulator/of_regulator.c | 6 ++++-- include/linux/regulator/machine.h | 19 +++++++++++++++---- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 97bc9f7adf2f..5ea80e94eb69 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -742,21 +742,19 @@ static int suspend_set_state(struct regulator_dev *rdev, * only warn if the driver implements set_suspend_voltage or * set_suspend_mode callback. */ - if (!rstate->enabled && !rstate->disabled) { + if (rstate->enabled != ENABLE_IN_SUSPEND && + rstate->enabled != DISABLE_IN_SUSPEND) { if (rdev->desc->ops->set_suspend_voltage || rdev->desc->ops->set_suspend_mode) rdev_warn(rdev, "No configuration\n"); return 0; } - if (rstate->enabled && rstate->disabled) { - rdev_err(rdev, "invalid configuration\n"); - return -EINVAL; - } - - if (rstate->enabled && rdev->desc->ops->set_suspend_enable) + if (rstate->enabled == ENABLE_IN_SUSPEND && + rdev->desc->ops->set_suspend_enable) ret = rdev->desc->ops->set_suspend_enable(rdev); - else if (rstate->disabled && rdev->desc->ops->set_suspend_disable) + else if (rstate->enabled == DISABLE_IN_SUSPEND && + rdev->desc->ops->set_suspend_disable) ret = rdev->desc->ops->set_suspend_disable(rdev); else /* OK if set_suspend_enable or set_suspend_disable is NULL */ ret = 0; diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 14637a01ba2d..41dad42b18f0 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -177,10 +177,12 @@ static void of_get_regulation_constraints(struct device_node *np, if (of_property_read_bool(suspend_np, "regulator-on-in-suspend")) - suspend_state->enabled = true; + suspend_state->enabled = ENABLE_IN_SUSPEND; else if (of_property_read_bool(suspend_np, "regulator-off-in-suspend")) - suspend_state->disabled = true; + suspend_state->enabled = DISABLE_IN_SUSPEND; + else + suspend_state->enabled = DO_NOTHING_IN_SUSPEND; if (!of_property_read_u32(suspend_np, "regulator-suspend-microvolt", &pval)) diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 9cd4fef37203..ce89c5548c89 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -42,6 +42,16 @@ struct regulator; #define REGULATOR_CHANGE_DRMS 0x10 #define REGULATOR_CHANGE_BYPASS 0x20 +/* + * operations in suspend mode + * DO_NOTHING_IN_SUSPEND - the default value + * DISABLE_IN_SUSPEND - turn off regulator in suspend states + * ENABLE_IN_SUSPEND - keep regulator on in suspend states + */ +#define DO_NOTHING_IN_SUSPEND (-1) +#define DISABLE_IN_SUSPEND 0 +#define ENABLE_IN_SUSPEND 1 + /* Regulator active discharge flags */ enum regulator_active_discharge { REGULATOR_ACTIVE_DISCHARGE_DEFAULT, @@ -58,14 +68,15 @@ enum regulator_active_discharge { * * @uV: Operating voltage during suspend. * @mode: Operating mode during suspend. - * @enabled: Enabled during suspend. - * @disabled: Disabled during suspend. + * @enabled: operations during suspend. + * - DO_NOTHING_IN_SUSPEND + * - DISABLE_IN_SUSPEND + * - ENABLE_IN_SUSPEND */ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ - int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disabled in this suspend state */ + int enabled; }; /** -- cgit v1.2.3 From aa27bbc6c6c60227c096d515f55ffe6cdfef7d2b Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 26 Jan 2018 21:08:46 +0800 Subject: regulator: empty the old suspend functions Regualtor suspend/resume functions should only be called by PM suspend core via registering dev_pm_ops, and regulator devices should implement the callback functions. Thus, any regulator consumer shouldn't call the regulator suspend/resume functions directly. In order to avoid compile errors, two empty functions with the same name still be left for the time being. Signed-off-by: Chunyan Zhang Signed-off-by: Mark Brown --- drivers/regulator/core.c | 74 --------------------------------------- include/linux/regulator/machine.h | 5 ++- 2 files changed, 2 insertions(+), 77 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5ea80e94eb69..080c2334edc5 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4179,80 +4179,6 @@ void regulator_unregister(struct regulator_dev *rdev) } EXPORT_SYMBOL_GPL(regulator_unregister); -static int _regulator_suspend_prepare(struct device *dev, void *data) -{ - struct regulator_dev *rdev = dev_to_rdev(dev); - const suspend_state_t *state = data; - int ret; - - mutex_lock(&rdev->mutex); - ret = suspend_prepare(rdev, *state); - mutex_unlock(&rdev->mutex); - - return ret; -} - -/** - * regulator_suspend_prepare - prepare regulators for system wide suspend - * @state: system suspend state - * - * Configure each regulator with it's suspend operating parameters for state. - * This will usually be called by machine suspend code prior to supending. - */ -int regulator_suspend_prepare(suspend_state_t state) -{ - /* ON is handled by regulator active state */ - if (state == PM_SUSPEND_ON) - return -EINVAL; - - return class_for_each_device(®ulator_class, NULL, &state, - _regulator_suspend_prepare); -} -EXPORT_SYMBOL_GPL(regulator_suspend_prepare); - -static int _regulator_suspend_finish(struct device *dev, void *data) -{ - struct regulator_dev *rdev = dev_to_rdev(dev); - int ret; - - mutex_lock(&rdev->mutex); - if (rdev->use_count > 0 || rdev->constraints->always_on) { - if (!_regulator_is_enabled(rdev)) { - ret = _regulator_do_enable(rdev); - if (ret) - dev_err(dev, - "Failed to resume regulator %d\n", - ret); - } - } else { - if (!have_full_constraints()) - goto unlock; - if (!_regulator_is_enabled(rdev)) - goto unlock; - - ret = _regulator_do_disable(rdev); - if (ret) - dev_err(dev, "Failed to suspend regulator %d\n", ret); - } -unlock: - mutex_unlock(&rdev->mutex); - - /* Keep processing regulators in spite of any errors */ - return 0; -} - -/** - * regulator_suspend_finish - resume regulators from system wide suspend - * - * Turn on regulators that might be turned off by regulator_suspend_prepare - * and that should be turned on according to the regulators properties. - */ -int regulator_suspend_finish(void) -{ - return class_for_each_device(®ulator_class, NULL, NULL, - _regulator_suspend_finish); -} -EXPORT_SYMBOL_GPL(regulator_suspend_finish); /** * regulator_has_full_constraints - the system has fully specified constraints diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ce89c5548c89..c4a56df8931b 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -236,12 +236,12 @@ struct regulator_init_data { #ifdef CONFIG_REGULATOR void regulator_has_full_constraints(void); -int regulator_suspend_prepare(suspend_state_t state); -int regulator_suspend_finish(void); #else static inline void regulator_has_full_constraints(void) { } +#endif + static inline int regulator_suspend_prepare(suspend_state_t state) { return 0; @@ -250,6 +250,5 @@ static inline int regulator_suspend_finish(void) { return 0; } -#endif #endif -- cgit v1.2.3 From f7efad10b5c492892b1e5decf5d3ebb29fa5c9af Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 26 Jan 2018 21:08:47 +0800 Subject: regulator: add PM suspend and resume hooks In this patch, consumers are allowed to set suspend voltage, and this actually just set the "uV" in constraint::regulator_state, when the regulator_suspend_late() was called by PM core through callback when the system is entering into suspend, the regulator device would act suspend activity then. And it assumes that if any consumer set suspend voltage, the regulator device should be enabled in the suspend state. And if the suspend voltage of a regulator device for all consumers was set zero, the regulator device would be off in the suspend state. This patch also provides a new function hook to regulator devices for resuming from suspend states. Signed-off-by: Chunyan Zhang Signed-off-by: Mark Brown --- drivers/regulator/core.c | 255 +++++++++++++++++++++++++++++++++----- drivers/regulator/of_regulator.c | 14 +++ include/linux/regulator/driver.h | 2 + include/linux/regulator/machine.h | 13 +- 4 files changed, 251 insertions(+), 33 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 080c2334edc5..949e317e4d6e 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -236,6 +236,12 @@ static int regulator_check_voltage(struct regulator_dev *rdev, return 0; } +/* return 0 if the state is valid */ +static int regulator_check_states(suspend_state_t state) +{ + return (state > PM_SUSPEND_MAX || state == PM_SUSPEND_TO_IDLE); +} + /* Make sure we select a voltage that suits the needs of all * regulator consumers */ @@ -327,6 +333,24 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, return -EINVAL; } +static inline struct regulator_state * +regulator_get_suspend_state(struct regulator_dev *rdev, suspend_state_t state) +{ + if (rdev->constraints == NULL) + return NULL; + + switch (state) { + case PM_SUSPEND_STANDBY: + return &rdev->constraints->state_standby; + case PM_SUSPEND_MEM: + return &rdev->constraints->state_mem; + case PM_SUSPEND_MAX: + return &rdev->constraints->state_disk; + default: + return NULL; + } +} + static ssize_t regulator_uV_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -734,9 +758,14 @@ static int drms_uA_update(struct regulator_dev *rdev) } static int suspend_set_state(struct regulator_dev *rdev, - struct regulator_state *rstate) + suspend_state_t state) { int ret = 0; + struct regulator_state *rstate; + + rstate = regulator_get_suspend_state(rdev, state); + if (rstate == NULL) + return -EINVAL; /* If we have no suspend mode configration don't set anything; * only warn if the driver implements set_suspend_voltage or @@ -779,28 +808,8 @@ static int suspend_set_state(struct regulator_dev *rdev, return ret; } } - return ret; -} -/* locks held by caller */ -static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state) -{ - if (!rdev->constraints) - return -EINVAL; - - switch (state) { - case PM_SUSPEND_STANDBY: - return suspend_set_state(rdev, - &rdev->constraints->state_standby); - case PM_SUSPEND_MEM: - return suspend_set_state(rdev, - &rdev->constraints->state_mem); - case PM_SUSPEND_MAX: - return suspend_set_state(rdev, - &rdev->constraints->state_disk); - default: - return -EINVAL; - } + return ret; } static void print_constraints(struct regulator_dev *rdev) @@ -1069,7 +1078,7 @@ static int set_machine_constraints(struct regulator_dev *rdev, /* do we need to setup our suspend state */ if (rdev->constraints->initial_state) { - ret = suspend_prepare(rdev, rdev->constraints->initial_state); + ret = suspend_set_state(rdev, rdev->constraints->initial_state); if (ret < 0) { rdev_err(rdev, "failed to set suspend state\n"); return ret; @@ -2898,6 +2907,32 @@ out: return ret; } +static int _regulator_do_set_suspend_voltage(struct regulator_dev *rdev, + int min_uV, int max_uV, suspend_state_t state) +{ + struct regulator_state *rstate; + int uV, sel; + + rstate = regulator_get_suspend_state(rdev, state); + if (rstate == NULL) + return -EINVAL; + + if (min_uV < rstate->min_uV) + min_uV = rstate->min_uV; + if (max_uV > rstate->max_uV) + max_uV = rstate->max_uV; + + sel = regulator_map_voltage(rdev, min_uV, max_uV); + if (sel < 0) + return sel; + + uV = rdev->desc->ops->list_voltage(rdev, sel); + if (uV >= min_uV && uV <= max_uV) + rstate->uV = uV; + + return 0; +} + static int regulator_set_voltage_unlocked(struct regulator *regulator, int min_uV, int max_uV, suspend_state_t state) @@ -2993,7 +3028,11 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, } } - ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); + if (state == PM_SUSPEND_ON) + ret = _regulator_do_set_voltage(rdev, min_uV, max_uV); + else + ret = _regulator_do_set_suspend_voltage(rdev, min_uV, + max_uV, state); if (ret < 0) goto out2; @@ -3049,6 +3088,89 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) } EXPORT_SYMBOL_GPL(regulator_set_voltage); +static inline int regulator_suspend_toggle(struct regulator_dev *rdev, + suspend_state_t state, bool en) +{ + struct regulator_state *rstate; + + rstate = regulator_get_suspend_state(rdev, state); + if (rstate == NULL) + return -EINVAL; + + if (!rstate->changeable) + return -EPERM; + + rstate->enabled = en; + + return 0; +} + +int regulator_suspend_enable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return regulator_suspend_toggle(rdev, state, true); +} +EXPORT_SYMBOL_GPL(regulator_suspend_enable); + +int regulator_suspend_disable(struct regulator_dev *rdev, + suspend_state_t state) +{ + struct regulator *regulator; + struct regulator_voltage *voltage; + + /* + * if any consumer wants this regulator device keeping on in + * suspend states, don't set it as disabled. + */ + list_for_each_entry(regulator, &rdev->consumer_list, list) { + voltage = ®ulator->voltage[state]; + if (voltage->min_uV || voltage->max_uV) + return 0; + } + + return regulator_suspend_toggle(rdev, state, false); +} +EXPORT_SYMBOL_GPL(regulator_suspend_disable); + +static int _regulator_set_suspend_voltage(struct regulator *regulator, + int min_uV, int max_uV, + suspend_state_t state) +{ + struct regulator_dev *rdev = regulator->rdev; + struct regulator_state *rstate; + + rstate = regulator_get_suspend_state(rdev, state); + if (rstate == NULL) + return -EINVAL; + + if (rstate->min_uV == rstate->max_uV) { + rdev_err(rdev, "The suspend voltage can't be changed!\n"); + return -EPERM; + } + + return regulator_set_voltage_unlocked(regulator, min_uV, max_uV, state); +} + +int regulator_set_suspend_voltage(struct regulator *regulator, int min_uV, + int max_uV, suspend_state_t state) +{ + int ret = 0; + + /* PM_SUSPEND_ON is handled by regulator_set_voltage() */ + if (regulator_check_states(state) || state == PM_SUSPEND_ON) + return -EINVAL; + + regulator_lock_supply(regulator->rdev); + + ret = _regulator_set_suspend_voltage(regulator, min_uV, + max_uV, state); + + regulator_unlock_supply(regulator->rdev); + + return ret; +} +EXPORT_SYMBOL_GPL(regulator_set_suspend_voltage); + /** * regulator_set_voltage_time - get raise/fall time * @regulator: regulator source @@ -3923,12 +4045,6 @@ static void regulator_dev_release(struct device *dev) kfree(rdev); } -static struct class regulator_class = { - .name = "regulator", - .dev_release = regulator_dev_release, - .dev_groups = regulator_dev_groups, -}; - static void rdev_init_debugfs(struct regulator_dev *rdev) { struct device *parent = rdev->dev.parent; @@ -4179,7 +4295,86 @@ void regulator_unregister(struct regulator_dev *rdev) } EXPORT_SYMBOL_GPL(regulator_unregister); +#ifdef CONFIG_SUSPEND +static int _regulator_suspend_late(struct device *dev, void *data) +{ + struct regulator_dev *rdev = dev_to_rdev(dev); + suspend_state_t *state = data; + int ret; + + mutex_lock(&rdev->mutex); + ret = suspend_set_state(rdev, *state); + mutex_unlock(&rdev->mutex); + + return ret; +} +/** + * regulator_suspend_late - prepare regulators for system wide suspend + * @state: system suspend state + * + * Configure each regulator with it's suspend operating parameters for state. + */ +static int regulator_suspend_late(struct device *dev) +{ + suspend_state_t state = pm_suspend_target_state; + + return class_for_each_device(®ulator_class, NULL, &state, + _regulator_suspend_late); +} +static int _regulator_resume_early(struct device *dev, void *data) +{ + int ret = 0; + struct regulator_dev *rdev = dev_to_rdev(dev); + suspend_state_t *state = data; + struct regulator_state *rstate; + + rstate = regulator_get_suspend_state(rdev, *state); + if (rstate == NULL) + return -EINVAL; + + mutex_lock(&rdev->mutex); + + if (rdev->desc->ops->resume_early && + (rstate->enabled == ENABLE_IN_SUSPEND || + rstate->enabled == DISABLE_IN_SUSPEND)) + ret = rdev->desc->ops->resume_early(rdev); + + mutex_unlock(&rdev->mutex); + + return ret; +} + +static int regulator_resume_early(struct device *dev) +{ + suspend_state_t state = pm_suspend_target_state; + + return class_for_each_device(®ulator_class, NULL, &state, + _regulator_resume_early); +} + +#else /* !CONFIG_SUSPEND */ + +#define regulator_suspend_late NULL +#define regulator_resume_early NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_PM +static const struct dev_pm_ops __maybe_unused regulator_pm_ops = { + .suspend_late = regulator_suspend_late, + .resume_early = regulator_resume_early, +}; +#endif + +static struct class regulator_class = { + .name = "regulator", + .dev_release = regulator_dev_release, + .dev_groups = regulator_dev_groups, +#ifdef CONFIG_PM + .pm = ®ulator_pm_ops, +#endif +}; /** * regulator_has_full_constraints - the system has fully specified constraints * diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 41dad42b18f0..a09ef6cc2e9c 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -184,9 +184,23 @@ static void of_get_regulation_constraints(struct device_node *np, else suspend_state->enabled = DO_NOTHING_IN_SUSPEND; + if (!of_property_read_u32(np, "regulator-suspend-min-microvolt", + &pval)) + suspend_state->min_uV = pval; + + if (!of_property_read_u32(np, "regulator-suspend-max-microvolt", + &pval)) + suspend_state->max_uV = pval; + if (!of_property_read_u32(suspend_np, "regulator-suspend-microvolt", &pval)) suspend_state->uV = pval; + else /* otherwise use min_uV as default suspend voltage */ + suspend_state->uV = suspend_state->min_uV; + + if (of_property_read_bool(suspend_np, + "regulator-changeable-in-suspend")) + suspend_state->changeable = true; if (i == PM_SUSPEND_MEM) constraints->initial_state = PM_SUSPEND_MEM; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 94417b4226bd..4c00486b7a78 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -214,6 +214,8 @@ struct regulator_ops { /* set regulator suspend operating mode (defined in consumer.h) */ int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode); + int (*resume_early)(struct regulator_dev *rdev); + int (*set_pull_down) (struct regulator_dev *); }; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index c4a56df8931b..93a04893c739 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -66,17 +66,24 @@ enum regulator_active_discharge { * state. One of enabled or disabled must be set for the * configuration to be applied. * - * @uV: Operating voltage during suspend. + * @uV: Default operating voltage during suspend, it can be adjusted + * among . + * @min_uV: Minimum suspend voltage may be set. + * @max_uV: Maximum suspend voltage may be set. * @mode: Operating mode during suspend. * @enabled: operations during suspend. * - DO_NOTHING_IN_SUSPEND * - DISABLE_IN_SUSPEND * - ENABLE_IN_SUSPEND + * @changeable: Is this state can be switched between enabled/disabled, */ struct regulator_state { - int uV; /* suspend voltage */ - unsigned int mode; /* suspend regulator operating mode */ + int uV; + int min_uV; + int max_uV; + unsigned int mode; int enabled; + bool changeable; }; /** -- cgit v1.2.3 From 993181e151311936d7a343fccd6cc4cebe83ad81 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 26 Jan 2018 16:24:49 +0200 Subject: spi: dw: Remove unused members from struct chip_data Local struct chip_data has two members that are not used: - cs. Looks like was never used - enable_dma. Became unused by the commit f89a6d8f43eb ("spi: dw-mid: move to use core SPI DMA mappings"). Signed-off-by: Jarkko Nikula Acked-by: Andy Shevchenko Signed-off-by: Mark Brown --- drivers/spi/spi-dw.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index b217c22ff72f..211cc7d75bf8 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -30,13 +30,11 @@ /* Slave spi_dev related */ struct chip_data { - u8 cs; /* chip select pin */ u8 tmode; /* TR/TO/RO/EEPROM */ u8 type; /* SPI/SSP/MicroWire */ u8 poll_mode; /* 1 means use poll mode */ - u8 enable_dma; u16 clk_div; /* baud rate divider */ u32 speed_hz; /* baud rate */ void (*cs_control)(u32 command); -- cgit v1.2.3 From 95ca0ee8636059ea2800dfbac9ecac6212d6b38f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:09 +0000 Subject: x86/cpufeatures: Add CPUID_7_EDX CPUID leaf This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeature.h | 7 +++++-- arch/x86/include/asm/cpufeatures.h | 8 +++++--- arch/x86/include/asm/disabled-features.h | 3 ++- arch/x86/include/asm/required-features.h | 3 ++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/scattered.c | 2 -- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ea9a7dde62e5..70eddb3922ff 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -29,6 +29,7 @@ enum cpuid_leafs CPUID_8000_000A_EDX, CPUID_7_ECX, CPUID_8000_0007_EBX, + CPUID_7_EDX, }; #ifdef CONFIG_X86_FEATURE_NAMES @@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 25b9375c1484..7b25cf30d25d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 18 /* N 32-bit words worth of info */ +#define NCAPINTS 19 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -206,8 +206,6 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ @@ -319,6 +317,10 @@ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index e428e16dd822..c6a3af198294 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -71,6 +71,7 @@ #define DISABLED_MASK15 0 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) #define DISABLED_MASK17 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define DISABLED_MASK18 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d91ba04dd007..fb3a6de7440b 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -106,6 +106,7 @@ #define REQUIRED_MASK15 0 #define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK17 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) +#define REQUIRED_MASK18 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 372ba3fb400f..e5d66e93ed81 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -745,6 +745,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; + c->x86_capability[CPUID_7_EDX] = edx; } /* Extended state features: level 0x0000000d */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index d0e69769abfd..df11f5d604be 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -21,8 +21,6 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, - { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, -- cgit v1.2.3 From fc67dd70adb711a45d2ef34e12d1a8be75edde61 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:10 +0000 Subject: x86/cpufeatures: Add Intel feature bits for Speculation Control Add three feature bits exposed by new microcode on Intel CPUs for speculation control. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-3-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 7b25cf30d25d..0a5107002716 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -320,6 +320,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* * BUG word(s) -- cgit v1.2.3 From 5d10cbc91d9eb5537998b65608441b592eec65e7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:11 +0000 Subject: x86/cpufeatures: Add AMD feature bits for Speculation Control AMD exposes the PRED_CMD/SPEC_CTRL MSRs slightly differently to Intel. See http://lkml.kernel.org/r/2b3e25cc-286d-8bd0-aeaf-9ac4aae39de8@amd.com Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: Tom Lendacky Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-4-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0a5107002716..ae3212f14dec 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -269,6 +269,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ +#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -- cgit v1.2.3 From 1e340c60d0dd3ae07b5bedc16a0469c14b9f3410 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:12 +0000 Subject: x86/msr: Add definitions for new speculation control MSRs Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES. See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-5-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/msr-index.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fa11fb1fa570..eb83ff1bae8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -39,6 +39,13 @@ /* Intel MSRs. Some also available on other CPUs */ +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ + +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ + #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -57,6 +64,11 @@ #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe + +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ + #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e -- cgit v1.2.3 From fec9434a12f38d3aeafeb75711b71d8a1fdef621 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:13 +0000 Subject: x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown Also, for CPUs which don't speculate at all, don't report that they're vulnerable to the Spectre variants either. Leave the cpu_no_meltdown[] match table with just X86_VENDOR_AMD in it for now, even though that could be done with a simple comparison, on the assumption that we'll have more to add. Based on suggestions from Dave Hansen and Alan Cox. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Reviewed-by: Borislav Petkov Acked-by: Dave Hansen Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-6-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/cpu/common.c | 48 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e5d66e93ed81..970ee06dc8aa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -853,6 +855,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } +static const __initdata struct x86_cpu_id cpu_no_speculation[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, + { X86_VENDOR_CENTAUR, 5 }, + { X86_VENDOR_INTEL, 5 }, + { X86_VENDOR_NSC, 5 }, + { X86_VENDOR_ANY, 4 }, + {} +}; + +static const __initdata struct x86_cpu_id cpu_no_meltdown[] = { + { X86_VENDOR_AMD }, + {} +}; + +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = 0; + + if (x86_match_cpu(cpu_no_meltdown)) + return false; + + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + /* Rogue Data Cache Load? No! */ + if (ia32_cap & ARCH_CAP_RDCL_NO) + return false; + + return true; +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -900,11 +937,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (c->x86_vendor != X86_VENDOR_AMD) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + if (!x86_match_cpu(cpu_no_speculation)) { + if (cpu_vulnerable_to_meltdown(c)) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + } fpu__init_system(c); -- cgit v1.2.3 From a5b2966364538a0e68c9fa29bc0a3a1651799035 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:14 +0000 Subject: x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes This doesn't refuse to load the affected microcodes; it just refuses to use the Spectre v2 mitigation features if they're detected, by clearing the appropriate feature bits. The AMD CPUID bits are handled here too, because hypervisors *may* have been exposing those bits even on Intel chips, for fine-grained control of what's available. It is non-trivial to use x86_match_cpu() for this table because that doesn't handle steppings. And the approach taken in commit bd9240a18 almost made me lose my lunch. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-7-git-send-email-dwmw@amazon.co.uk --- arch/x86/kernel/cpu/intel.c | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b720dacac051..5faa487d0477 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) ELF_HWCAP2 |= HWCAP2_RING3MWAIT; } +/* + * Early microcode releases for the Spectre v2 mitigation were broken. + * Information taken from; + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://kb.vmware.com/s/article/52345 + * - Microcode revisions observed in the wild + * - Release note from 20180108 microcode release + */ +struct sku_microcode { + u8 model; + u8 stepping; + u32 microcode; +}; +static const struct sku_microcode spectre_bad_microcodes[] = { + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + /* Updated in the 20180108 release; blacklist until we know otherwise */ + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, + /* Observed in the wild */ + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, +}; + +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { + if (c->x86_model == spectre_bad_microcodes[i].model && + c->x86_mask == spectre_bad_microcodes[i].stepping) + return (c->microcode <= spectre_bad_microcodes[i].microcode); + } + return false; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_STIBP) || + cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || + cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || + cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_STIBP); + clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); + clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + } + /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * -- cgit v1.2.3 From 20ffa1caecca4db8f79fe665acdeaa5af815a24d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 25 Jan 2018 16:14:15 +0000 Subject: x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support Expose indirect_branch_prediction_barrier() for use in subsequent patches. [ tglx: Add IBPB status to spectre_v2 sysfs file ] Co-developed-by: KarimAllah Ahmed Signed-off-by: KarimAllah Ahmed Signed-off-by: David Woodhouse Cc: gnomes@lxorguk.ukuu.org.uk Cc: ak@linux.intel.com Cc: ashok.raj@intel.com Cc: dave.hansen@intel.com Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-8-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ae3212f14dec..07934b2f8df2 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -210,6 +210,8 @@ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4ad41087ce0e..34e384c7208f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -218,5 +218,18 @@ static inline void vmexit_fill_RSB(void) #endif } +static inline void indirect_branch_prediction_barrier(void) +{ + asm volatile(ALTERNATIVE("", + "movl %[msr], %%ecx\n\t" + "movl %[val], %%eax\n\t" + "movl $0, %%edx\n\t" + "wrmsr", + X86_FEATURE_IBPB) + : : [msr] "i" (MSR_IA32_PRED_CMD), + [val] "i" (PRED_CMD_IBPB) + : "eax", "ecx", "edx", "memory"); +} + #endif /* __ASSEMBLY__ */ #endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4a39d7bb4bd8..bac7a3558db2 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -263,6 +263,13 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Filling RSB on context switch\n"); } + + /* Initialize Indirect Branch Prediction Barrier if supported */ + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { + setup_force_cpu_cap(X86_FEATURE_IBPB); + pr_info("Enabling Indirect Branch Prediction Barrier\n"); + } } #undef pr_fmt @@ -292,7 +299,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", spectre_v2_bad_module ? " - vulnerable module loaded" : ""); } #endif -- cgit v1.2.3 From 0e6c16c652cadaffd25a6bb326ec10da5bcec6b4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:36 +0100 Subject: x86/alternative: Print unadorned pointers After commit ad67b74d2469 ("printk: hash addresses printed with %p") pointers are being hashed when printed. However, this makes the alternative debug output completely useless. Switch to %px in order to see the unadorned kernel pointers. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-2-bp@alien8.de --- arch/x86/kernel/alternative.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e0b97e4d1db5..14a52c7d23d4 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -298,7 +298,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) tgt_rip = next_rip + o_dspl; n_dspl = tgt_rip - orig_insn; - DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); + DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); if (tgt_rip - orig_insn >= 0) { if (n_dspl - 2 <= 127) @@ -355,7 +355,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins add_nops(instr + (a->instrlen - a->padlen), a->padlen); local_irq_restore(flags); - DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); } @@ -376,7 +376,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insnbuf[MAX_PATCH_LEN]; - DPRINTK("alt table %p -> %p", start, end); + DPRINTK("alt table %px, -> %px", start, end); /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. @@ -400,14 +400,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, continue; } - DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", + DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", a->cpuid >> 5, a->cpuid & 0x1f, instr, a->instrlen, replacement, a->replacementlen, a->padlen); - DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); - DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); + DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; @@ -433,7 +433,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, a->instrlen - a->replacementlen); insnbuf_sz += a->instrlen - a->replacementlen; } - DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); + DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); text_poke_early(instr, insnbuf, insnbuf_sz); } -- cgit v1.2.3 From 7a32fc51ca938e67974cbb9db31e1a43f98345a9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:37 +0100 Subject: x86/nospec: Fix header guards names ... to adhere to the _ASM_X86_ naming scheme. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-3-bp@alien8.de --- arch/x86/include/asm/nospec-branch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 34e384c7208f..865192a2cc31 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NOSPEC_BRANCH_H__ -#define __NOSPEC_BRANCH_H__ +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ +#define _ASM_X86_NOSPEC_BRANCH_H_ #include #include @@ -232,4 +232,4 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ -#endif /* __NOSPEC_BRANCH_H__ */ +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ -- cgit v1.2.3 From 55fa19d3e51f33d9cd4056d25836d93abf9438db Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 26 Jan 2018 13:11:39 +0100 Subject: x86/bugs: Drop one "mitigation" from dmesg Make [ 0.031118] Spectre V2 mitigation: Mitigation: Full generic retpoline into [ 0.031118] Spectre V2: Mitigation: Full generic retpoline to reduce the mitigation mitigations strings. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Cc: riel@redhat.com Cc: ak@linux.intel.com Cc: peterz@infradead.org Cc: David Woodhouse Cc: jikos@kernel.org Cc: luto@amacapital.net Cc: dave.hansen@intel.com Cc: torvalds@linux-foundation.org Cc: keescook@google.com Cc: Josh Poimboeuf Cc: tim.c.chen@linux.intel.com Cc: pjt@google.com Link: https://lkml.kernel.org/r/20180126121139.31959-5-bp@alien8.de --- arch/x86/kernel/cpu/bugs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bac7a3558db2..c988a8acb0d5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -91,7 +91,7 @@ static const char *spectre_v2_strings[] = { }; #undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; static bool spectre_v2_bad_module; -- cgit v1.2.3 From 5beda7d54eafece4c974cfa9fbb9f60fb18fd20a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jan 2018 13:12:14 -0800 Subject: x86/mm/64: Fix vmapped stack syncing on very-large-memory 4-level systems Neil Berrington reported a double-fault on a VM with 768GB of RAM that uses large amounts of vmalloc space with PTI enabled. The cause is that load_new_mm_cr3() was never fixed to take the 5-level pgd folding code into account, so, on a 4-level kernel, the pgd synchronization logic compiles away to exactly nothing. Interestingly, the problem doesn't trigger with nopti. I assume this is because the kernel is mapped with global pages if we boot with nopti. The sequence of operations when we create a new task is that we first load its mm while still running on the old stack (which crashes if the old stack is unmapped in the new mm unless the TLB saves us), then we call prepare_switch_to(), and then we switch to the new stack. prepare_switch_to() pokes the new stack directly, which will populate the mapping through vmalloc_fault(). I assume that we're getting lucky on non-PTI systems -- the old stack's TLB entry stays alive long enough to make it all the way through prepare_switch_to() and switch_to() so that we make it to a valid stack. Fixes: b50858ce3e2a ("x86/mm/vmalloc: Add 5-level paging support") Reported-and-tested-by: Neil Berrington Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Konstantin Khlebnikov Cc: stable@vger.kernel.org Cc: Dave Hansen Cc: Borislav Petkov Link: https://lkml.kernel.org/r/346541c56caed61abbe693d7d2742b4a380c5001.1516914529.git.luto@kernel.org --- arch/x86/mm/tlb.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index a1561957dccb..5bfe61a5e8e3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, local_irq_restore(flags); } +static void sync_current_stack_to_mm(struct mm_struct *mm) +{ + unsigned long sp = current_stack_pointer; + pgd_t *pgd = pgd_offset(mm, sp); + + if (CONFIG_PGTABLE_LEVELS > 4) { + if (unlikely(pgd_none(*pgd))) { + pgd_t *pgd_ref = pgd_offset_k(sp); + + set_pgd(pgd, *pgd_ref); + } + } else { + /* + * "pgd" is faked. The top level entries are "p4d"s, so sync + * the p4d. This compiles to approximately the same code as + * the 5-level case. + */ + p4d_t *p4d = p4d_offset(pgd, sp); + + if (unlikely(p4d_none(*p4d))) { + pgd_t *pgd_ref = pgd_offset_k(sp); + p4d_t *p4d_ref = p4d_offset(pgd_ref, sp); + + set_p4d(p4d, *p4d_ref); + } + } +} + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * mapped in the new pgd, we'll double-fault. Forcibly * map it. */ - unsigned int index = pgd_index(current_stack_pointer); - pgd_t *pgd = next->pgd + index; - - if (unlikely(pgd_none(*pgd))) - set_pgd(pgd, init_mm.pgd[index]); + sync_current_stack_to_mm(next); } /* Stop remote flushes for the previous mm */ -- cgit v1.2.3 From 36b3a7726886f24c4209852a58e64435bde3af98 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jan 2018 13:12:15 -0800 Subject: x86/mm/64: Tighten up vmalloc_fault() sanity checks on 5-level kernels On a 5-level kernel, if a non-init mm has a top-level entry, it needs to match init_mm's, but the vmalloc_fault() code skipped over the BUG_ON() that would have checked it. While we're at it, get rid of the rather confusing 4-level folded "pgd" logic. Cleans-up: b50858ce3e2a ("x86/mm/vmalloc: Add 5-level paging support") Signed-off-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Cc: Konstantin Khlebnikov Cc: Dave Hansen Cc: Borislav Petkov Cc: Neil Berrington Link: https://lkml.kernel.org/r/2ae598f8c279b0a29baf75df207e6f2fdddc0a1b.1516914529.git.luto@kernel.org --- arch/x86/mm/fault.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b3e40773dce0..800de815519c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -439,18 +439,13 @@ static noinline int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_ref)) return -1; - if (pgd_none(*pgd)) { - set_pgd(pgd, *pgd_ref); - arch_flush_lazy_mmu_mode(); - } else if (CONFIG_PGTABLE_LEVELS > 4) { - /* - * With folded p4d, pgd_none() is always false, so the pgd may - * point to an empty page table entry and pgd_page_vaddr() - * will return garbage. - * - * We will do the correct sanity check on the p4d level. - */ - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + if (CONFIG_PGTABLE_LEVELS > 4) { + if (pgd_none(*pgd)) { + set_pgd(pgd, *pgd_ref); + arch_flush_lazy_mmu_mode(); + } else { + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } } /* With 4-level paging, copying happens on the p4d level. */ @@ -459,7 +454,7 @@ static noinline int vmalloc_fault(unsigned long address) if (p4d_none(*p4d_ref)) return -1; - if (p4d_none(*p4d)) { + if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { set_p4d(p4d, *p4d_ref); arch_flush_lazy_mmu_mode(); } else { @@ -470,6 +465,7 @@ static noinline int vmalloc_fault(unsigned long address) * Below here mismatches are bugs because these lower tables * are shared: */ + BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); pud = pud_offset(p4d, address); pud_ref = pud_offset(p4d_ref, address); -- cgit v1.2.3 From 148096af0bf381c78afe253c07ef1c77778f0e68 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Mon, 22 Jan 2018 15:30:06 +0100 Subject: regulator: core: Move of_find_regulator_by_node() to of_regulator.c As of_find_regulator_by_node() is an of function it should be moved from core.c to of_regulator.c. It provides better separation of device tree functions from the core and allows other of_functions in of_regulator.c to resolve device_node to regulator_dev. This will be useful for implementation of parsing coupled regulators properties. Declare of_find_regulator_by_node() function in internal.h as well as regulator_class and dev_to_rdev(), as they are needed by of_find_regulator_by_node(). Signed-off-by: Maciej Purski Signed-off-by: Mark Brown --- drivers/regulator/core.c | 23 +---------------------- drivers/regulator/internal.h | 9 +++++++++ drivers/regulator/of_regulator.c | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 365b32e3f505..5f7678292cef 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -58,8 +58,6 @@ static bool has_full_constraints; static struct dentry *debugfs_root; -static struct class regulator_class; - /* * struct regulator_map * @@ -112,11 +110,6 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, const char *supply_name); static void _regulator_put(struct regulator *regulator); -static struct regulator_dev *dev_to_rdev(struct device *dev) -{ - return container_of(dev, struct regulator_dev, dev); -} - static const char *rdev_get_name(struct regulator_dev *rdev) { if (rdev->constraints && rdev->constraints->name) @@ -1417,20 +1410,6 @@ static void regulator_supply_alias(struct device **dev, const char **supply) } } -static int of_node_match(struct device *dev, const void *data) -{ - return dev->of_node == data; -} - -static struct regulator_dev *of_find_regulator_by_node(struct device_node *np) -{ - struct device *dev; - - dev = class_find_device(®ulator_class, NULL, np, of_node_match); - - return dev ? dev_to_rdev(dev) : NULL; -} - static int regulator_match(struct device *dev, const void *data) { struct regulator_dev *r = dev_to_rdev(dev); @@ -3918,7 +3897,7 @@ static void regulator_dev_release(struct device *dev) kfree(rdev); } -static struct class regulator_class = { +struct class regulator_class = { .name = "regulator", .dev_release = regulator_dev_release, .dev_groups = regulator_dev_groups, diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h index 66a8ea0c8386..2f3218be5b8d 100644 --- a/drivers/regulator/internal.h +++ b/drivers/regulator/internal.h @@ -35,6 +35,15 @@ struct regulator { struct dentry *debugfs; }; +extern struct class regulator_class; + +static inline struct regulator_dev *dev_to_rdev(struct device *dev) +{ + return container_of(dev, struct regulator_dev, dev); +} + +struct regulator_dev *of_find_regulator_by_node(struct device_node *np); + #ifdef CONFIG_OF struct regulator_init_data *regulator_of_get_init_data(struct device *dev, const struct regulator_desc *desc, diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index 14637a01ba2d..54e810ae93d6 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -376,3 +376,17 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev, return init_data; } + +static int of_node_match(struct device *dev, const void *data) +{ + return dev->of_node == data; +} + +struct regulator_dev *of_find_regulator_by_node(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(®ulator_class, NULL, np, of_node_match); + + return dev ? dev_to_rdev(dev) : NULL; +} -- cgit v1.2.3 From 3d67fe950707a930664c5673ecc026f1bb497136 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Mon, 22 Jan 2018 15:30:07 +0100 Subject: regulator: core: Refactor regulator_list_voltage() Change _regulator_list_voltage() argument from regulator to regulator_dev in order to provide better separation of core layers. Allow calling _regulator_list_voltage() from functions, with regulator_dev argument. This refactoring is needed in order to implement setting voltage of coupled regulators. Signed-off-by: Maciej Purski Signed-off-by: Mark Brown --- drivers/regulator/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5f7678292cef..fd8eacfea422 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2447,10 +2447,9 @@ static int _regulator_is_enabled(struct regulator_dev *rdev) return rdev->desc->ops->is_enabled(rdev); } -static int _regulator_list_voltage(struct regulator *regulator, - unsigned selector, int lock) +static int _regulator_list_voltage(struct regulator_dev *rdev, + unsigned selector, int lock) { - struct regulator_dev *rdev = regulator->rdev; const struct regulator_ops *ops = rdev->desc->ops; int ret; @@ -2466,7 +2465,8 @@ static int _regulator_list_voltage(struct regulator *regulator, if (lock) mutex_unlock(&rdev->mutex); } else if (rdev->is_switch && rdev->supply) { - ret = _regulator_list_voltage(rdev->supply, selector, lock); + ret = _regulator_list_voltage(rdev->supply->rdev, + selector, lock); } else { return -EINVAL; } @@ -2542,7 +2542,7 @@ EXPORT_SYMBOL_GPL(regulator_count_voltages); */ int regulator_list_voltage(struct regulator *regulator, unsigned selector) { - return _regulator_list_voltage(regulator, selector, 1); + return _regulator_list_voltage(regulator->rdev, selector, 1); } EXPORT_SYMBOL_GPL(regulator_list_voltage); -- cgit v1.2.3 From 6572cc2bf2e7b10378eaa5a94a0c717dca1289c9 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 24 Jan 2018 13:26:11 -0800 Subject: Update the RISC-V MAINTAINERS file Now that we're upstream in Linux we've been able to make some infrastructure changes so our port works a bit more like other ports. Specifically: * We now have a mailing list specific to the RISC-V Linux port, hosted at lists.infreadead.org. * We now have a kernel.org git tree where work on our port is coordinated. This patch changes the RISC-V maintainers entry to reflect these new bits of infrastructure. Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e3581413420c..a3c25b18bd2d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11651,8 +11651,8 @@ F: drivers/mtd/nand/r852.h RISC-V ARCHITECTURE M: Palmer Dabbelt M: Albert Ou -L: patches@groups.riscv.org -T: git https://github.com/riscv/riscv-linux +L: linux-riscv@lists.infradead.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/palmer/riscv-linux.git S: Supported F: arch/riscv/ K: riscv -- cgit v1.2.3 From dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 26 Jan 2018 15:14:16 +0300 Subject: dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state ccid2_hc_tx_rto_expire() timer callback always restarts the timer again and can run indefinitely (unless it is stopped outside), and after commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from dccp_destroy_sock() to sk_destruct(), this started to happen quite often. The timer prevents releasing the socket, as a result, sk_destruct() won't be called. Found with LTP/dccp_ipsec tests running on the bonding device, which later couldn't be unloaded after the tests were completed: unregister_netdevice: waiting for bond0 to become free. Usage count = 148 Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") Signed-off-by: Alexey Kodanev Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 1c75cd1255f6..92d016e87816 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t) ccid2_pr_debug("RTO_EXPIRE\n"); + if (sk->sk_state == DCCP_CLOSED) + goto out; + /* back-off timer */ hc->tx_rto <<= 1; if (hc->tx_rto > DCCP_RTO_MAX) -- cgit v1.2.3 From ba3169fc7548759be986b168d662e0ba64c2fd88 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 26 Jan 2018 11:48:25 +0000 Subject: VSOCK: set POLLOUT | POLLWRNORM for TCP_CLOSING select(2) with wfds but no rfds must return when the socket is shut down by the peer. This way userspace notices socket activity and gets -EPIPE from the next write(2). Currently select(2) does not return for virtio-vsock when a SEND+RCV shutdown packet is received. This is because vsock_poll() only sets POLLOUT | POLLWRNORM for TCP_CLOSE, not the TCP_CLOSING state that the socket is in when the shutdown is received. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5d28abf87fbf..c9473d698525 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -951,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * POLLOUT|POLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ - if (sk->sk_state == TCP_CLOSE) { + if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= POLLOUT | POLLWRNORM; -- cgit v1.2.3 From 00cb9f4f5ef5e98653d4726836a4482180bd2efd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 26 Jan 2018 17:55:30 +0000 Subject: regulator: Fix build error 3d67fe950707 (regulator: core: Refactor regulator_list_voltage()) missed one user of regulator_list_voltage(), update for that. Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 2dccc4b3766e..42681c10cbe4 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2980,7 +2980,7 @@ static int regulator_set_voltage_unlocked(struct regulator *regulator, goto out2; } - best_supply_uV = _regulator_list_voltage(regulator, selector, 0); + best_supply_uV = _regulator_list_voltage(rdev, selector, 0); if (best_supply_uV < 0) { ret = best_supply_uV; goto out2; -- cgit v1.2.3 From 796baeeef85a40b3495a907fb7425086e7010102 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Fri, 26 Jan 2018 20:12:41 +0000 Subject: block: remove smart1,2.h smart1,2.h is unused since commit d436641439e0 ("cpqarray: remove it from the kernel") Remove it from tree. Signed-off-by: Corentin Labbe Signed-off-by: Jens Axboe --- drivers/block/smart1,2.h | 278 ----------------------------------------------- 1 file changed, 278 deletions(-) delete mode 100644 drivers/block/smart1,2.h diff --git a/drivers/block/smart1,2.h b/drivers/block/smart1,2.h deleted file mode 100644 index e5565fbaeb30..000000000000 --- a/drivers/block/smart1,2.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Disk Array driver for Compaq SMART2 Controllers - * Copyright 1998 Compaq Computer Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Questions/Comments/Bugfixes to iss_storagedev@hp.com - * - * If you want to make changes, improve or add functionality to this - * driver, you'll probably need the Compaq Array Controller Interface - * Specificiation (Document number ECG086/1198) - */ - -/* - * This file contains the controller communication implementation for - * Compaq SMART-1 and SMART-2 controllers. To the best of my knowledge, - * this should support: - * - * PCI: - * SMART-2/P, SMART-2DH, SMART-2SL, SMART-221, SMART-3100ES, SMART-3200 - * Integerated SMART Array Controller, SMART-4200, SMART-4250ES - * - * EISA: - * SMART-2/E, SMART, IAES, IDA-2, IDA - */ - -/* - * Memory mapped FIFO interface (SMART 42xx cards) - */ -static void smart4_submit_command(ctlr_info_t *h, cmdlist_t *c) -{ - writel(c->busaddr, h->vaddr + S42XX_REQUEST_PORT_OFFSET); -} - -/* - * This card is the opposite of the other cards. - * 0 turns interrupts on... - * 0x08 turns them off... - */ -static void smart4_intr_mask(ctlr_info_t *h, unsigned long val) -{ - if (val) - { /* Turn interrupts on */ - writel(0, h->vaddr + S42XX_REPLY_INTR_MASK_OFFSET); - } else /* Turn them off */ - { - writel( S42XX_INTR_OFF, - h->vaddr + S42XX_REPLY_INTR_MASK_OFFSET); - } -} - -/* - * For older cards FIFO Full = 0. - * On this card 0 means there is room, anything else FIFO Full. - * - */ -static unsigned long smart4_fifo_full(ctlr_info_t *h) -{ - - return (!readl(h->vaddr + S42XX_REQUEST_PORT_OFFSET)); -} - -/* This type of controller returns -1 if the fifo is empty, - * Not 0 like the others. - * And we need to let it know we read a value out - */ -static unsigned long smart4_completed(ctlr_info_t *h) -{ - long register_value - = readl(h->vaddr + S42XX_REPLY_PORT_OFFSET); - - /* Fifo is empty */ - if( register_value == 0xffffffff) - return 0; - - /* Need to let it know we got the reply */ - /* We do this by writing a 0 to the port we just read from */ - writel(0, h->vaddr + S42XX_REPLY_PORT_OFFSET); - - return ((unsigned long) register_value); -} - - /* - * This hardware returns interrupt pending at a different place and - * it does not tell us if the fifo is empty, we will have check - * that by getting a 0 back from the command_completed call. - */ -static unsigned long smart4_intr_pending(ctlr_info_t *h) -{ - unsigned long register_value = - readl(h->vaddr + S42XX_INTR_STATUS); - - if( register_value & S42XX_INTR_PENDING) - return FIFO_NOT_EMPTY; - return 0 ; -} - -static struct access_method smart4_access = { - smart4_submit_command, - smart4_intr_mask, - smart4_fifo_full, - smart4_intr_pending, - smart4_completed, -}; - -/* - * Memory mapped FIFO interface (PCI SMART2 and SMART 3xxx cards) - */ -static void smart2_submit_command(ctlr_info_t *h, cmdlist_t *c) -{ - writel(c->busaddr, h->vaddr + COMMAND_FIFO); -} - -static void smart2_intr_mask(ctlr_info_t *h, unsigned long val) -{ - writel(val, h->vaddr + INTR_MASK); -} - -static unsigned long smart2_fifo_full(ctlr_info_t *h) -{ - return readl(h->vaddr + COMMAND_FIFO); -} - -static unsigned long smart2_completed(ctlr_info_t *h) -{ - return readl(h->vaddr + COMMAND_COMPLETE_FIFO); -} - -static unsigned long smart2_intr_pending(ctlr_info_t *h) -{ - return readl(h->vaddr + INTR_PENDING); -} - -static struct access_method smart2_access = { - smart2_submit_command, - smart2_intr_mask, - smart2_fifo_full, - smart2_intr_pending, - smart2_completed, -}; - -/* - * IO access for SMART-2/E cards - */ -static void smart2e_submit_command(ctlr_info_t *h, cmdlist_t *c) -{ - outl(c->busaddr, h->io_mem_addr + COMMAND_FIFO); -} - -static void smart2e_intr_mask(ctlr_info_t *h, unsigned long val) -{ - outl(val, h->io_mem_addr + INTR_MASK); -} - -static unsigned long smart2e_fifo_full(ctlr_info_t *h) -{ - return inl(h->io_mem_addr + COMMAND_FIFO); -} - -static unsigned long smart2e_completed(ctlr_info_t *h) -{ - return inl(h->io_mem_addr + COMMAND_COMPLETE_FIFO); -} - -static unsigned long smart2e_intr_pending(ctlr_info_t *h) -{ - return inl(h->io_mem_addr + INTR_PENDING); -} - -static struct access_method smart2e_access = { - smart2e_submit_command, - smart2e_intr_mask, - smart2e_fifo_full, - smart2e_intr_pending, - smart2e_completed, -}; - -/* - * IO access for older SMART-1 type cards - */ -#define SMART1_SYSTEM_MASK 0xC8E -#define SMART1_SYSTEM_DOORBELL 0xC8F -#define SMART1_LOCAL_MASK 0xC8C -#define SMART1_LOCAL_DOORBELL 0xC8D -#define SMART1_INTR_MASK 0xC89 -#define SMART1_LISTADDR 0xC90 -#define SMART1_LISTLEN 0xC94 -#define SMART1_TAG 0xC97 -#define SMART1_COMPLETE_ADDR 0xC98 -#define SMART1_LISTSTATUS 0xC9E - -#define CHANNEL_BUSY 0x01 -#define CHANNEL_CLEAR 0x02 - -static void smart1_submit_command(ctlr_info_t *h, cmdlist_t *c) -{ - /* - * This __u16 is actually a bunch of control flags on SMART - * and below. We want them all to be zero. - */ - c->hdr.size = 0; - - outb(CHANNEL_CLEAR, h->io_mem_addr + SMART1_SYSTEM_DOORBELL); - - outl(c->busaddr, h->io_mem_addr + SMART1_LISTADDR); - outw(c->size, h->io_mem_addr + SMART1_LISTLEN); - - outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_LOCAL_DOORBELL); -} - -static void smart1_intr_mask(ctlr_info_t *h, unsigned long val) -{ - if (val == 1) { - outb(0xFD, h->io_mem_addr + SMART1_SYSTEM_DOORBELL); - outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_LOCAL_DOORBELL); - outb(0x01, h->io_mem_addr + SMART1_INTR_MASK); - outb(0x01, h->io_mem_addr + SMART1_SYSTEM_MASK); - } else { - outb(0, h->io_mem_addr + 0xC8E); - } -} - -static unsigned long smart1_fifo_full(ctlr_info_t *h) -{ - unsigned char chan; - chan = inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_CLEAR; - return chan; -} - -static unsigned long smart1_completed(ctlr_info_t *h) -{ - unsigned char status; - unsigned long cmd; - - if (inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_BUSY) { - outb(CHANNEL_BUSY, h->io_mem_addr + SMART1_SYSTEM_DOORBELL); - - cmd = inl(h->io_mem_addr + SMART1_COMPLETE_ADDR); - status = inb(h->io_mem_addr + SMART1_LISTSTATUS); - - outb(CHANNEL_CLEAR, h->io_mem_addr + SMART1_LOCAL_DOORBELL); - - /* - * this is x86 (actually compaq x86) only, so it's ok - */ - if (cmd) ((cmdlist_t*)bus_to_virt(cmd))->req.hdr.rcode = status; - } else { - cmd = 0; - } - return cmd; -} - -static unsigned long smart1_intr_pending(ctlr_info_t *h) -{ - unsigned char chan; - chan = inb(h->io_mem_addr + SMART1_SYSTEM_DOORBELL) & CHANNEL_BUSY; - return chan; -} - -static struct access_method smart1_access = { - smart1_submit_command, - smart1_intr_mask, - smart1_fifo_full, - smart1_intr_pending, - smart1_completed, -}; -- cgit v1.2.3 From b327a717e506980399464e304e363f94f95eb7a1 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Wed, 24 Jan 2018 13:46:10 +0100 Subject: CIFS: make IPC a regular tcon * Remove ses->ipc_tid. * Make IPC$ regular tcon. * Add a direct pointer to it in ses->tcon_ipc. * Distinguish PIPE tcon from IPC tcon by adding a tcon->pipe flag. All IPC tcons are pipes but not all pipes are IPC. * All TreeConnect functions now cannot take a NULL tcon object. The IPC tcon has the same lifetime as the session it belongs to. It is created when the session is created and destroyed when the session is destroyed. Since no mounts directly refer to the IPC tcon, its refcount should always be set to initialisation value (1). Thus we make sure cifs_put_tcon() skips it. If the mount request resulting in a new session being created requires encryption, try to require it too for IPC. * set SERVER_NAME_LENGTH to serverName actual size The maximum length of an ipv6 string representation is defined in INET6_ADDRSTRLEN as 45+1 for null but lets keep what we know works. Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/cifsglob.h | 14 ++--- fs/cifs/cifssmb.c | 7 +-- fs/cifs/connect.c | 150 ++++++++++++++++++++++++++++++++++++++++------------- fs/cifs/inode.c | 2 +- fs/cifs/smb2pdu.c | 36 +++---------- 5 files changed, 133 insertions(+), 76 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 678e638c1e69..48f7c197cd2d 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -64,8 +64,8 @@ #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) -/* currently length of NIP6_FMT */ -#define SERVER_NAME_LENGTH 40 +/* maximum length of ip addr as a string (including ipv6 and sctp) */ +#define SERVER_NAME_LENGTH 80 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) /* echo interval in seconds */ @@ -833,12 +833,12 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) struct cifs_ses { struct list_head smb_ses_list; struct list_head tcon_list; + struct cifs_tcon *tcon_ipc; struct mutex session_mutex; struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum statusEnum status; unsigned overrideSecFlg; /* if non-zero override global sec flags */ - __u32 ipc_tid; /* special tid for connection to IPC share */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ @@ -846,8 +846,7 @@ struct cifs_ses { kuid_t linux_uid; /* overriding owner of files on the mount */ kuid_t cred_uid; /* owner of credentials */ unsigned int capabilities; - char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for - TCP names - will ipv6 and sctp addresses fit? */ + char serverName[SERVER_NAME_LEN_WITH_NULL]; char *user_name; /* must not be null except during init of sess and after mount option parsing we fill it */ char *domainName; @@ -942,7 +941,9 @@ struct cifs_tcon { FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; - bool ipc:1; /* set if connection to IPC$ eg for RPC/PIPES */ + bool ipc:1; /* set if connection to IPC$ share (always also pipe) */ + bool pipe:1; /* set if connection to pipe share */ + bool print:1; /* set if connection to printer share */ bool retry:1; bool nocase:1; bool seal:1; /* transport encryption for this mounted share */ @@ -955,7 +956,6 @@ struct cifs_tcon { bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ - bool print:1; /* set if connection to printer share */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 49cf999f3d46..4e0922d24eb2 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4833,10 +4833,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, *target_nodes = NULL; cifs_dbg(FYI, "In GetDFSRefer the path %s\n", search_name); - if (ses == NULL) + if (ses == NULL || ses->tcon_ipc == NULL) return -ENODEV; + getDFSRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, NULL, (void **) &pSMB, + rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB, (void **) &pSMBr); if (rc) return rc; @@ -4844,7 +4845,7 @@ getDFSRetry: /* server pointer checked in called function, but should never be null here anyway */ pSMB->hdr.Mid = get_next_mid(ses->server); - pSMB->hdr.Tid = ses->ipc_tid; + pSMB->hdr.Tid = ses->tcon_ipc->tid; pSMB->hdr.Uid = ses->Suid; if (ses->capabilities & CAP_STATUS32) pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 63c5d85fe25e..8b5e401f547a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -354,11 +354,12 @@ cifs_reconnect(struct TCP_Server_Info *server) list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifs_ses, smb_ses_list); ses->need_reconnect = true; - ses->ipc_tid = 0; list_for_each(tmp2, &ses->tcon_list) { tcon = list_entry(tmp2, struct cifs_tcon, tcon_list); tcon->need_reconnect = true; } + if (ses->tcon_ipc) + ses->tcon_ipc->need_reconnect = true; } spin_unlock(&cifs_tcp_ses_lock); @@ -2426,6 +2427,93 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) return 1; } +/** + * cifs_setup_ipc - helper to setup the IPC tcon for the session + * + * A new IPC connection is made and stored in the session + * tcon_ipc. The IPC tcon has the same lifetime as the session. + */ +static int +cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info) +{ + int rc = 0, xid; + struct cifs_tcon *tcon; + struct nls_table *nls_codepage; + char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; + bool seal = false; + + /* + * If the mount request that resulted in the creation of the + * session requires encryption, force IPC to be encrypted too. + */ + if (volume_info->seal) { + if (ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) + seal = true; + else { + cifs_dbg(VFS, + "IPC: server doesn't support encryption\n"); + return -EOPNOTSUPP; + } + } + + tcon = tconInfoAlloc(); + if (tcon == NULL) + return -ENOMEM; + + snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->serverName); + + /* cannot fail */ + nls_codepage = load_nls_default(); + + xid = get_xid(); + tcon->ses = ses; + tcon->ipc = true; + tcon->seal = seal; + rc = ses->server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage); + free_xid(xid); + + if (rc) { + cifs_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); + tconInfoFree(tcon); + goto out; + } + + cifs_dbg(FYI, "IPC tcon rc = %d ipc tid = %d\n", rc, tcon->tid); + + ses->tcon_ipc = tcon; +out: + unload_nls(nls_codepage); + return rc; +} + +/** + * cifs_free_ipc - helper to release the session IPC tcon + * + * Needs to be called everytime a session is destroyed + */ +static int +cifs_free_ipc(struct cifs_ses *ses) +{ + int rc = 0, xid; + struct cifs_tcon *tcon = ses->tcon_ipc; + + if (tcon == NULL) + return 0; + + if (ses->server->ops->tree_disconnect) { + xid = get_xid(); + rc = ses->server->ops->tree_disconnect(xid, tcon); + free_xid(xid); + } + + if (rc) + cifs_dbg(FYI, "failed to disconnect IPC tcon (rc=%d)\n", rc); + + tconInfoFree(tcon); + ses->tcon_ipc = NULL; + return rc; +} + static struct cifs_ses * cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) { @@ -2466,6 +2554,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) ses->status = CifsExiting; spin_unlock(&cifs_tcp_ses_lock); + cifs_free_ipc(ses); + if (ses->status == CifsExiting && server->ops->logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); @@ -2710,6 +2800,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) spin_unlock(&cifs_tcp_ses_lock); free_xid(xid); + + cifs_setup_ipc(ses, volume_info); + return ses; get_ses_fail: @@ -2754,8 +2847,16 @@ void cifs_put_tcon(struct cifs_tcon *tcon) { unsigned int xid; - struct cifs_ses *ses = tcon->ses; + struct cifs_ses *ses; + /* + * IPC tcon share the lifetime of their session and are + * destroyed in the session put function + */ + if (tcon == NULL || tcon->ipc) + return; + + ses = tcon->ses; cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); if (--tcon->tc_count > 0) { @@ -3031,39 +3132,17 @@ get_dfs_path(const unsigned int xid, struct cifs_ses *ses, const char *old_path, const struct nls_table *nls_codepage, unsigned int *num_referrals, struct dfs_info3_param **referrals, int remap) { - char *temp_unc; int rc = 0; - if (!ses->server->ops->tree_connect || !ses->server->ops->get_dfs_refer) + if (!ses->server->ops->get_dfs_refer) return -ENOSYS; *num_referrals = 0; *referrals = NULL; - if (ses->ipc_tid == 0) { - temp_unc = kmalloc(2 /* for slashes */ + - strnlen(ses->serverName, SERVER_NAME_LEN_WITH_NULL * 2) - + 1 + 4 /* slash IPC$ */ + 2, GFP_KERNEL); - if (temp_unc == NULL) - return -ENOMEM; - temp_unc[0] = '\\'; - temp_unc[1] = '\\'; - strcpy(temp_unc + 2, ses->serverName); - strcpy(temp_unc + 2 + strlen(ses->serverName), "\\IPC$"); - rc = ses->server->ops->tree_connect(xid, ses, temp_unc, NULL, - nls_codepage); - cifs_dbg(FYI, "Tcon rc = %d ipc_tid = %d\n", rc, ses->ipc_tid); - kfree(temp_unc); - } - if (rc == 0) - rc = ses->server->ops->get_dfs_refer(xid, ses, old_path, - referrals, num_referrals, - nls_codepage, remap); - /* - * BB - map targetUNCs to dfs_info3 structures, here or in - * ses->server->ops->get_dfs_refer. - */ - + rc = ses->server->ops->get_dfs_refer(xid, ses, old_path, + referrals, num_referrals, + nls_codepage, remap); return rc; } @@ -3828,7 +3907,7 @@ try_mount_again: tcon->unix_ext = 0; /* server does not support them */ /* do not care if a following call succeed - informational */ - if (!tcon->ipc && server->ops->qfs_tcon) + if (!tcon->pipe && server->ops->qfs_tcon) server->ops->qfs_tcon(xid, tcon); cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info); @@ -3958,8 +4037,7 @@ out: } /* - * Issue a TREE_CONNECT request. Note that for IPC$ shares, that the tcon - * pointer may be NULL. + * Issue a TREE_CONNECT request. */ int CIFSTCon(const unsigned int xid, struct cifs_ses *ses, @@ -3995,7 +4073,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, pSMB->AndXCommand = 0xFF; pSMB->Flags = cpu_to_le16(TCON_EXTENDED_SECINFO); bcc_ptr = &pSMB->Password[0]; - if (!tcon || (ses->server->sec_mode & SECMODE_USER)) { + if (tcon->pipe || (ses->server->sec_mode & SECMODE_USER)) { pSMB->PasswordLength = cpu_to_le16(1); /* minimum */ *bcc_ptr = 0; /* password is null byte */ bcc_ptr++; /* skip password */ @@ -4067,7 +4145,7 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, 0); /* above now done in SendReceive */ - if ((rc == 0) && (tcon != NULL)) { + if (rc == 0) { bool is_unicode; tcon->tidStatus = CifsGood; @@ -4087,7 +4165,8 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && (bcc_ptr[2] == 'C')) { cifs_dbg(FYI, "IPC connection\n"); - tcon->ipc = 1; + tcon->ipc = true; + tcon->pipe = true; } } else if (length == 2) { if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) { @@ -4114,9 +4193,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, else tcon->Flags = 0; cifs_dbg(FYI, "Tcon flags: 0x%x\n", tcon->Flags); - } else if ((rc == 0) && tcon == NULL) { - /* all we need to save for IPC$ connection */ - ses->ipc_tid = smb_buffer_response->Tid; } cifs_buf_release(smb_buffer); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ecb99079363a..8f9a8cc7cc62 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1049,7 +1049,7 @@ iget_no_retry: tcon->resource_id = CIFS_I(inode)->uniqueid; #endif - if (rc && tcon->ipc) { + if (rc && tcon->pipe) { cifs_dbg(FYI, "ipc connection - fake read inode\n"); spin_lock(&inode->i_lock); inode->i_mode |= S_IFDIR; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7900aec7f92f..2943adc754e4 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1258,8 +1258,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, } /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */ - if (tcon) - tcon->tid = 0; + tcon->tid = 0; rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req, &total_len); @@ -1268,15 +1267,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, return rc; } - if (tcon == NULL) { - if ((ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)) - flags |= CIFS_TRANSFORM_REQ; - - /* since no tcon, smb2_init can not do this, so do here */ - req->sync_hdr.SessionId = ses->Suid; - if (ses->server->sign) - req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; - } else if (encryption_required(tcon)) + if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; iov[0].iov_base = (char *)req; @@ -1302,21 +1293,16 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_error_exit; } - if (tcon == NULL) { - ses->ipc_tid = rsp->hdr.sync_hdr.TreeId; - goto tcon_exit; - } - switch (rsp->ShareType) { case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); break; case SMB2_SHARE_TYPE_PIPE: - tcon->ipc = true; + tcon->pipe = true; cifs_dbg(FYI, "connection to pipe share\n"); break; case SMB2_SHARE_TYPE_PRINT: - tcon->ipc = true; + tcon->print = true; cifs_dbg(FYI, "connection to printer\n"); break; default: @@ -1892,16 +1878,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (rc) return rc; - if (use_ipc) { - if (ses->ipc_tid == 0) { - cifs_small_buf_release(req); - return -ENOTCONN; - } - - cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n", - req->sync_hdr.TreeId, ses->ipc_tid); - req->sync_hdr.TreeId = ses->ipc_tid; - } if (encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -2317,6 +2293,10 @@ void smb2_reconnect_server(struct work_struct *work) tcon_exist = true; } } + if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { + list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); + tcon_exist = true; + } } /* * Get the reference to server struct to be sure that the last call of -- cgit v1.2.3 From 63a83b861c47dba9e0f46b98423723a6a3d97fb1 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Wed, 24 Jan 2018 13:46:11 +0100 Subject: CIFS: use tcon_ipc instead of use_ipc parameter of SMB2_ioctl Since IPC now has a tcon object, the caller can just pass it. This allows domain-based DFS requests to work with smb2+. Link: https://bugzilla.samba.org/show_bug.cgi?id=12917 Fixes: 9d49640a21bf ("CIFS: implement get_dfs_refer for SMB2+") Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2file.c | 2 +- fs/cifs/smb2ops.c | 53 ++++++++++++++++++++++------------------------------- fs/cifs/smb2pdu.c | 4 +--- fs/cifs/smb2proto.h | 3 +-- 4 files changed, 25 insertions(+), 37 deletions(-) diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index b4b1f0305f29..12af5dba742b 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -74,7 +74,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, nr_ioctl_req.Reserved = 0; rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, - true /* is_fsctl */, false /* use_ipc */, + true /* is_fsctl */, (char *)&nr_ioctl_req, sizeof(nr_ioctl_req), NULL, NULL /* no return info */); if (rc == -EOPNOTSUPP) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e3393ff5d458..eb68e2fcc500 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -293,7 +293,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, - false /* use_ipc */, NULL /* no data input */, 0 /* no data input */, (char **)&out_buf, &ret_data_len); if (rc != 0) @@ -792,7 +791,6 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, - false /* use_ipc */, NULL, 0 /* no input */, (char **)&res_key, &ret_data_len); @@ -858,8 +856,7 @@ smb2_copychunk_range(const unsigned int xid, /* Request server copy to target from src identified by key */ rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, - true /* is_fsctl */, false /* use_ipc */, - (char *)pcchunk, + true /* is_fsctl */, (char *)pcchunk, sizeof(struct copychunk_ioctl), (char **)&retbuf, &ret_data_len); if (rc == 0) { @@ -1020,7 +1017,7 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_SPARSE, - true /* is_fctl */, false /* use_ipc */, + true /* is_fctl */, &setsparse, 1, NULL, NULL); if (rc) { tcon->broken_sparse_sup = true; @@ -1091,7 +1088,7 @@ smb2_duplicate_extents(const unsigned int xid, rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_DUPLICATE_EXTENTS_TO_FILE, - true /* is_fsctl */, false /* use_ipc */, + true /* is_fsctl */, (char *)&dup_ext_buf, sizeof(struct duplicate_extents_to_file), NULL, @@ -1126,7 +1123,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon, return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_INTEGRITY_INFORMATION, - true /* is_fsctl */, false /* use_ipc */, + true /* is_fsctl */, (char *)&integr_info, sizeof(struct fsctl_set_integrity_information_req), NULL, @@ -1146,7 +1143,7 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SRV_ENUMERATE_SNAPSHOTS, - true /* is_fsctl */, false /* use_ipc */, + true /* is_fsctl */, NULL, 0 /* no input data */, (char **)&retbuf, &ret_data_len); @@ -1365,16 +1362,20 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name); /* - * Use any tcon from the current session. Here, the first one. + * Try to use the IPC tcon, otherwise just use any */ - spin_lock(&cifs_tcp_ses_lock); - tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon, - tcon_list); - if (tcon) - tcon->tc_count++; - spin_unlock(&cifs_tcp_ses_lock); + tcon = ses->tcon_ipc; + if (tcon == NULL) { + spin_lock(&cifs_tcp_ses_lock); + tcon = list_first_entry_or_null(&ses->tcon_list, + struct cifs_tcon, + tcon_list); + if (tcon) + tcon->tc_count++; + spin_unlock(&cifs_tcp_ses_lock); + } - if (!tcon) { + if (tcon == NULL) { cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n", ses); rc = -ENOTCONN; @@ -1403,20 +1404,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len); do { - /* try first with IPC */ rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_DFS_GET_REFERRALS, - true /* is_fsctl */, true /* use_ipc */, + true /* is_fsctl */, (char *)dfs_req, dfs_req_size, (char **)&dfs_rsp, &dfs_rsp_size); - if (rc == -ENOTCONN) { - /* try with normal tcon */ - rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, - FSCTL_DFS_GET_REFERRALS, - true /* is_fsctl */, false /*use_ipc*/, - (char *)dfs_req, dfs_req_size, - (char **)&dfs_rsp, &dfs_rsp_size); - } } while (rc == -EAGAIN); if (rc) { @@ -1435,7 +1427,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, } out: - if (tcon) { + if (tcon && !tcon->ipc) { + /* ipc tcons are not refcounted */ spin_lock(&cifs_tcp_ses_lock); tcon->tc_count--; spin_unlock(&cifs_tcp_ses_lock); @@ -1727,8 +1720,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - true /* is_fctl */, false /* use_ipc */, - (char *)&fsctl_buf, + true /* is_fctl */, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), NULL, NULL); free_xid(xid); return rc; @@ -1762,8 +1754,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, - true /* is_fctl */, false /* use_ipc */, - (char *)&fsctl_buf, + true /* is_fctl */, (char *)&fsctl_buf, sizeof(struct file_zero_data_information), NULL, NULL); free_xid(xid); return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2943adc754e4..17b7f3aed195 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -680,7 +680,6 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - false /* use_ipc */, (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), (char **)&pneg_rsp, &rsplen); @@ -1841,7 +1840,7 @@ creat_exit: */ int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, - u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc, + u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, char **out_data, u32 *plen /* returned data len */) { @@ -2006,7 +2005,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, FSCTL_SET_COMPRESSION, true /* is_fsctl */, - false /* use_ipc */, (char *)&fsctl_input /* data input */, 2 /* in data len */, &ret_data /* out data */, NULL); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index e9ab5227e7a8..05287b01f596 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -125,8 +125,7 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, struct smb2_err_rsp **err_buf); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, - bool is_fsctl, bool use_ipc, - char *in_data, u32 indatalen, + bool is_fsctl, char *in_data, u32 indatalen, char **out_data, u32 *plen /* returned data len */); extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id); -- cgit v1.2.3 From 02cf5905e35df7e08691b6becda167858486da9a Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Wed, 24 Jan 2018 13:46:12 +0100 Subject: CIFS: dump IPC tcon in debug proc file dump it as first share with an "IPC: " prefix. Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/cifs_debug.c | 61 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 05be9b47eb0c..f491340f32ad 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -110,6 +110,32 @@ void cifs_dump_mids(struct TCP_Server_Info *server) } #ifdef CONFIG_PROC_FS +static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) +{ + __u32 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); + + seq_printf(m, "%s Mounts: %d ", tcon->treeName, tcon->tc_count); + if (tcon->nativeFileSystem) + seq_printf(m, "Type: %s ", tcon->nativeFileSystem); + seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x\n\tPathComponentMax: %d Status: %d", + le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), + le32_to_cpu(tcon->fsAttrInfo.Attributes), + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), + tcon->tidStatus); + if (dev_type == FILE_DEVICE_DISK) + seq_puts(m, " type: DISK "); + else if (dev_type == FILE_DEVICE_CD_ROM) + seq_puts(m, " type: CDROM "); + else + seq_printf(m, " type: %d ", dev_type); + if (tcon->ses->server->ops->dump_share_caps) + tcon->ses->server->ops->dump_share_caps(m, tcon); + + if (tcon->need_reconnect) + seq_puts(m, "\tDISCONNECTED "); + seq_putc(m, '\n'); +} + static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct list_head *tmp1, *tmp2, *tmp3; @@ -118,7 +144,6 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; int i, j; - __u32 dev_type; seq_puts(m, "Display Internal CIFS Data Structures for Debugging\n" @@ -260,35 +285,19 @@ skip_rdma: seq_puts(m, "\n\tShares:"); j = 0; + + seq_printf(m, "\n\t%d) IPC: ", j); + if (ses->tcon_ipc) + cifs_debug_tcon(m, ses->tcon_ipc); + else + seq_puts(m, "none\n"); + list_for_each(tmp3, &ses->tcon_list) { tcon = list_entry(tmp3, struct cifs_tcon, tcon_list); ++j; - dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); - seq_printf(m, "\n\t%d) %s Mounts: %d ", j, - tcon->treeName, tcon->tc_count); - if (tcon->nativeFileSystem) { - seq_printf(m, "Type: %s ", - tcon->nativeFileSystem); - } - seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" - "\n\tPathComponentMax: %d Status: %d", - le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), - le32_to_cpu(tcon->fsAttrInfo.Attributes), - le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), - tcon->tidStatus); - if (dev_type == FILE_DEVICE_DISK) - seq_puts(m, " type: DISK "); - else if (dev_type == FILE_DEVICE_CD_ROM) - seq_puts(m, " type: CDROM "); - else - seq_printf(m, " type: %d ", dev_type); - if (server->ops->dump_share_caps) - server->ops->dump_share_caps(m, tcon); - - if (tcon->need_reconnect) - seq_puts(m, "\tDISCONNECTED "); - seq_putc(m, '\n'); + seq_printf(m, "\n\t%d) ", j); + cifs_debug_tcon(m, tcon); } seq_puts(m, "\n\tMIDs:\n"); -- cgit v1.2.3 From 2026b06e9ce8521dae1a71654dc5a39e7ce3b871 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 24 Jan 2018 23:07:41 -0600 Subject: Cleanup some minor endian issues in smb3 rdma Minor cleanup of some sparse warnings (including a few misc endian fixes for the new smb3 rdma code) Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2pdu.c | 22 +++++++++++----------- fs/cifs/smb2pdu.h | 14 +++++++------- fs/cifs/smbdirect.c | 5 +++-- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 17b7f3aed195..63778ac22fd9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2463,13 +2463,13 @@ smb2_new_read_req(void **buf, unsigned int *total_len, if (need_invalidate) req->Channel = SMB2_CHANNEL_RDMA_V1; req->ReadChannelInfoOffset = - offsetof(struct smb2_read_plain_req, Buffer); + cpu_to_le16(offsetof(struct smb2_read_plain_req, Buffer)); req->ReadChannelInfoLength = - sizeof(struct smbd_buffer_descriptor_v1); + cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; - v1->offset = rdata->mr->mr->iova; - v1->token = rdata->mr->mr->rkey; - v1->length = rdata->mr->mr->length; + v1->offset = cpu_to_le64(rdata->mr->mr->iova); + v1->token = cpu_to_le32(rdata->mr->mr->rkey); + v1->length = cpu_to_le32(rdata->mr->mr->length); *total_len += sizeof(*v1) - 1; } @@ -2840,18 +2840,18 @@ smb2_async_writev(struct cifs_writedata *wdata, req->Length = 0; req->DataOffset = 0; req->RemainingBytes = - (wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz; + cpu_to_le32((wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz); req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE; if (need_invalidate) req->Channel = SMB2_CHANNEL_RDMA_V1; req->WriteChannelInfoOffset = - offsetof(struct smb2_write_req, Buffer); + cpu_to_le16(offsetof(struct smb2_write_req, Buffer)); req->WriteChannelInfoLength = - sizeof(struct smbd_buffer_descriptor_v1); + cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; - v1->offset = wdata->mr->mr->iova; - v1->token = wdata->mr->mr->rkey; - v1->length = wdata->mr->mr->length; + v1->offset = cpu_to_le64(wdata->mr->mr->iova); + v1->token = cpu_to_le32(wdata->mr->mr->rkey); + v1->length = cpu_to_le32(wdata->mr->mr->length); } #endif /* 4 for rfc1002 length field and 1 for Buffer */ diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 19d34881815f..6eb9f9691ed4 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -830,9 +830,9 @@ struct smb2_flush_rsp { #define SMB2_READFLAG_READ_UNBUFFERED 0x01 /* Channel field for read and write: exactly one of following flags can be set*/ -#define SMB2_CHANNEL_NONE 0x00000000 -#define SMB2_CHANNEL_RDMA_V1 0x00000001 /* SMB3 or later */ -#define SMB2_CHANNEL_RDMA_V1_INVALIDATE 0x00000002 /* SMB3.02 or later */ +#define SMB2_CHANNEL_NONE cpu_to_le32(0x00000000) +#define SMB2_CHANNEL_RDMA_V1 cpu_to_le32(0x00000001) /* SMB3 or later */ +#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002) /* >= SMB3.02 */ /* SMB2 read request without RFC1001 length at the beginning */ struct smb2_read_plain_req { @@ -847,8 +847,8 @@ struct smb2_read_plain_req { __le32 MinimumCount; __le32 Channel; /* MBZ except for SMB3 or later */ __le32 RemainingBytes; - __le16 ReadChannelInfoOffset; /* Reserved MBZ */ - __le16 ReadChannelInfoLength; /* Reserved MBZ */ + __le16 ReadChannelInfoOffset; + __le16 ReadChannelInfoLength; __u8 Buffer[1]; } __packed; @@ -877,8 +877,8 @@ struct smb2_write_req { __u64 VolatileFileId; /* opaque endianness */ __le32 Channel; /* Reserved MBZ */ __le32 RemainingBytes; - __le16 WriteChannelInfoOffset; /* Reserved MBZ */ - __le16 WriteChannelInfoLength; /* Reserved MBZ */ + __le16 WriteChannelInfoOffset; + __le16 WriteChannelInfoLength; __le32 Flags; __u8 Buffer[1]; } __packed; diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index f9234ed83a60..5130492847eb 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1855,7 +1855,8 @@ try_again: * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) +static int smbd_recv_buf(struct smbd_connection *info, char *buf, + unsigned int size) { struct smbd_response *response; struct smbd_data_transfer *data_transfer; @@ -1992,7 +1993,7 @@ read_rfc1002_done: * to_read: the length of data to read * return value: actual data read */ -int smbd_recv_page(struct smbd_connection *info, +static int smbd_recv_page(struct smbd_connection *info, struct page *page, unsigned int to_read) { int ret; -- cgit v1.2.3 From 97f4b7276b829a8927ac903a119bef2f963ccc58 Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 25 Jan 2018 15:59:39 +0100 Subject: CIFS: zero sensitive data when freeing also replaces memset()+kfree() by kzfree(). Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky Cc: --- fs/cifs/cifsencrypt.c | 3 +-- fs/cifs/connect.c | 6 +++--- fs/cifs/misc.c | 14 ++++---------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 68abbb0db608..f2b0a7f124da 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -325,9 +325,8 @@ int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, { int i; int rc; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char password_with_pad[CIFS_ENCPWD_SIZE] = {0}; - memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); if (password) strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8b5e401f547a..ee2ab86bff5b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1720,7 +1720,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ - kfree(vol->password); + kzfree(vol->password); vol->password = NULL; break; } @@ -1758,7 +1758,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } - kfree(vol->password); + kzfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -4356,7 +4356,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: kfree(vol_info->username); - kfree(vol_info->password); + kzfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index eea93ac15ef0..a0dbced4a45c 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -98,14 +98,11 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); - kfree(buf_to_free->auth_key.response); - kfree(buf_to_free); + kzfree(buf_to_free->auth_key.response); + kzfree(buf_to_free); } struct cifs_tcon * @@ -136,10 +133,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) } atomic_dec(&tconInfoAllocCount); kfree(buf_to_free->nativeFileSystem); - if (buf_to_free->password) { - memset(buf_to_free->password, 0, strlen(buf_to_free->password)); - kfree(buf_to_free->password); - } + kzfree(buf_to_free->password); kfree(buf_to_free); } -- cgit v1.2.3 From 6b314714ff6337465a730195952e1b1f10d97063 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Jan 2018 09:28:25 -0600 Subject: move a few externs to smbdirect.h to eliminate warning Quiet minor sparse warnings in new SMB3 rdma patch series ("symbol was not declared ...") by moving these externs to smbdirect.h Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/cifs_debug.c | 9 --------- fs/cifs/smbdirect.h | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index f491340f32ad..c7a863219fa3 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -483,15 +483,6 @@ static const struct file_operations cifs_##name##_proc_fops = { \ .write = name##_write, \ } -extern int rdma_readwrite_threshold; -extern int smbd_max_frmr_depth; -extern int smbd_keep_alive_interval; -extern int smbd_max_receive_size; -extern int smbd_max_fragmented_recv_size; -extern int smbd_max_send_size; -extern int smbd_send_credit_target; -extern int smbd_receive_credit_max; - PROC_FILE_DEFINE(rdma_readwrite_threshold); PROC_FILE_DEFINE(smbd_max_frmr_depth); PROC_FILE_DEFINE(smbd_keep_alive_interval); diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index fdb8df84fa1f..f9038daea194 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -24,6 +24,15 @@ #include #include +extern int rdma_readwrite_threshold; +extern int smbd_max_frmr_depth; +extern int smbd_keep_alive_interval; +extern int smbd_max_receive_size; +extern int smbd_max_fragmented_recv_size; +extern int smbd_max_send_size; +extern int smbd_send_credit_target; +extern int smbd_receive_credit_max; + enum keep_alive_status { KEEP_ALIVE_NONE, KEEP_ALIVE_PENDING, -- cgit v1.2.3 From 4a1360d01d7b6bb9d9c295378850b6d1ae134a0f Mon Sep 17 00:00:00 2001 From: Aurelien Aptel Date: Thu, 25 Jan 2018 18:47:52 +0100 Subject: CIFS: document tcon/ses/server refcount dance Signed-off-by: Aurelien Aptel Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ee2ab86bff5b..a726f524fb84 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2704,6 +2704,13 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), } #endif /* CONFIG_KEYS */ +/** + * cifs_get_smb_ses - get a session matching @volume_info data from @server + * + * This function assumes it is being called from cifs_mount() where we + * already got a server reference (server refcount +1). See + * cifs_get_tcon() for refcount explanations. + */ static struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) { @@ -2877,6 +2884,26 @@ cifs_put_tcon(struct cifs_tcon *tcon) cifs_put_smb_ses(ses); } +/** + * cifs_get_tcon - get a tcon matching @volume_info data from @ses + * + * - tcon refcount is the number of mount points using the tcon. + * - ses refcount is the number of tcon using the session. + * + * 1. This function assumes it is being called from cifs_mount() where + * we already got a session reference (ses refcount +1). + * + * 2. Since we're in the context of adding a mount point, the end + * result should be either: + * + * a) a new tcon already allocated with refcount=1 (1 mount point) and + * its session refcount incremented (1 new tcon). This +1 was + * already done in (1). + * + * b) an existing tcon with refcount+1 (add a mount point to it) and + * identical ses refcount (no new tcon). Because of (1) we need to + * decrement the ses refcount. + */ static struct cifs_tcon * cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) { @@ -2885,8 +2912,11 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) tcon = cifs_find_tcon(ses, volume_info); if (tcon) { + /* + * tcon has refcount already incremented but we need to + * decrement extra ses reference gotten by caller (case b) + */ cifs_dbg(FYI, "Found match on UNC path\n"); - /* existing tcon already has a reference */ cifs_put_smb_ses(ses); return tcon; } -- cgit v1.2.3 From cd1aca29fa020b6e6edcd3d5b3e49ab877d1bed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Souto?= Date: Thu, 28 Dec 2017 14:23:08 +0100 Subject: cifs: add .splice_write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add splice_write support in cifs vfs using iter_file_splice_write Signed-off-by: Andrés Souto Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 801b63b7900f..a7be591d8e18 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1070,6 +1070,7 @@ const struct file_operations cifs_file_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, @@ -1088,6 +1089,7 @@ const struct file_operations cifs_file_strict_ops = { .flush = cifs_flush, .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, @@ -1107,6 +1109,7 @@ const struct file_operations cifs_file_direct_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, @@ -1124,6 +1127,7 @@ const struct file_operations cifs_file_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, @@ -1141,6 +1145,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_strict_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, @@ -1159,6 +1164,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .flush = cifs_flush, .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .unlocked_ioctl = cifs_ioctl, .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, -- cgit v1.2.3 From ab2c64330963a9d96c43751d83d133df20b04b19 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 26 Jan 2018 16:58:32 -0600 Subject: update internal version number for cifs.ko To version 2.11 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 5a10e566f0e6..013ba2aed8d9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.10" +#define CIFS_VERSION "2.11" #endif /* _CIFSFS_H */ -- cgit v1.2.3 From 8a95b74d50825067fb6c8af7f9db03e711b1cb9d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 25 Jan 2018 11:59:34 -0800 Subject: x86: Mark hpa as a "Designated Reviewer" for the time being Due to some unfortunate events, I have not been directly involved in the x86 kernel patch flow for a while now. I have also not been able to ramp back up by now like I had hoped to, and after reviewing what I will need to work on both internally at Intel and elsewhere in the near term, it is clear that I am not going to be able to ramp back up until late 2018 at the very earliest. It is not acceptable to not recognize that this load is currently taken by Ingo and Thomas without my direct participation, so I mark myself as R: (designated reviewer) rather than M: (maintainer) until further notice. This is in fact recognizing the de facto situation for the past few years. I have obviously no intention of going away, and I will do everything within my power to improve Linux on x86 and x86 for Linux. This, however, puts credit where it is due and reflects a change of focus. This patch also removes stale entries for portions of the x86 architecture which have not been maintained separately from arch/x86 for a long time. If there is a reason to re-introduce them then that can happen later. Signed-off-by: H. Peter Anvin Signed-off-by: Thomas Gleixner Cc: Bruce Schlobohm Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180125195934.5253-1-hpa@zytor.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e3581413420c..94976349ff61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6609,16 +6609,6 @@ L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/i2c-stub.c -i386 BOOT CODE -M: "H. Peter Anvin" -S: Maintained -F: arch/x86/boot/ - -i386 SETUP CODE / CPU ERRATA WORKAROUNDS -M: "H. Peter Anvin" -T: git git://git.kernel.org/pub/scm/linux/kernel/git/hpa/linux-2.6-x86setup.git -S: Maintained - IA64 (Itanium) PLATFORM M: Tony Luck M: Fenghua Yu @@ -14858,7 +14848,7 @@ F: net/x25/ X86 ARCHITECTURE (32-BIT AND 64-BIT) M: Thomas Gleixner M: Ingo Molnar -M: "H. Peter Anvin" +R: "H. Peter Anvin" M: x86@kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core -- cgit v1.2.3 From d5421ea43d30701e03cadc56a38854c36a8b4433 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Jan 2018 14:54:32 +0100 Subject: hrtimer: Reset hrtimer cpu base proper on CPU hotplug The hrtimer interrupt code contains a hang detection and mitigation mechanism, which prevents that a long delayed hrtimer interrupt causes a continous retriggering of interrupts which prevent the system from making progress. If a hang is detected then the timer hardware is programmed with a certain delay into the future and a flag is set in the hrtimer cpu base which prevents newly enqueued timers from reprogramming the timer hardware prior to the chosen delay. The subsequent hrtimer interrupt after the delay clears the flag and resumes normal operation. If such a hang happens in the last hrtimer interrupt before a CPU is unplugged then the hang_detected flag is set and stays that way when the CPU is plugged in again. At that point the timer hardware is not armed and it cannot be armed because the hang_detected flag is still active, so nothing clears that flag. As a consequence the CPU does not receive hrtimer interrupts and no timers expire on that CPU which results in RCU stalls and other malfunctions. Clear the flag along with some other less critical members of the hrtimer cpu base to ensure starting from a clean state when a CPU is plugged in. Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the root cause of that hard to reproduce heisenbug. Once understood it's trivial and certainly justifies a brown paperbag. Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Sebastian Sewior Cc: Anna-Maria Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos --- kernel/time/hrtimer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d32520840fde..aa9d2a2b1210 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -655,7 +655,9 @@ static void hrtimer_reprogram(struct hrtimer *timer, static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next = KTIME_MAX; + base->hang_detected = 0; base->hres_active = 0; + base->next_timer = NULL; } /* @@ -1589,6 +1591,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) timerqueue_init_head(&cpu_base->clock_base[i].active); } + cpu_base->active_bases = 0; cpu_base->cpu = cpu; hrtimer_init_hres(cpu_base); return 0; -- cgit v1.2.3 From e383095c7fe8d218e00ec0f83e4b95ed4e627b02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 27 Jan 2018 15:45:14 +0100 Subject: x86/cpu/bugs: Make retpoline module warning conditional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If sysfs is disabled and RETPOLINE not defined: arch/x86/kernel/cpu/bugs.c:97:13: warning: ‘spectre_v2_bad_module’ defined but not used [-Wunused-variable] static bool spectre_v2_bad_module; Hide it. Fixes: caf7501a1b4e ("module/retpoline: Warn about missing retpoline in module") Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: David Woodhouse --- arch/x86/kernel/cpu/bugs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c988a8acb0d5..b0b7157df89e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -94,9 +94,10 @@ static const char *spectre_v2_strings[] = { #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; -static bool spectre_v2_bad_module; #ifdef RETPOLINE +static bool spectre_v2_bad_module; + bool retpoline_module_ok(bool has_retpoline) { if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) @@ -106,6 +107,13 @@ bool retpoline_module_ok(bool has_retpoline) spectre_v2_bad_module = true; return false; } + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } #endif static void __init spec2_print_if_insecure(const char *reason) @@ -300,7 +308,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IPBP" : "", - spectre_v2_bad_module ? " - vulnerable module loaded" : ""); + boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + spectre_v2_module_string()); } #endif -- cgit v1.2.3 From 836ad11235d04f7a6fcd41b1abceaeacc70153c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 27 Jan 2018 17:22:01 +0100 Subject: hwmon: (dell-smm) Enable broken functionality via "force" module param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Dell machines are broken and some functionality is disabled. Show warning into dmesg about this fact and allow user via "force" module param to override brokenness and enable broken functionality. Signed-off-by: Pali Rohár Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index c7c9e95e58a8..aef4f8317ae2 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -1060,8 +1060,11 @@ static int __init i8k_probe(void) i8k_get_dmi_data(DMI_BIOS_VERSION)); } - if (dmi_check_system(i8k_blacklist_fan_type_dmi_table)) - disallow_fan_type_call = true; + if (dmi_check_system(i8k_blacklist_fan_type_dmi_table)) { + pr_warn("broken Dell BIOS detected, disallow fan type call\n"); + if (!force) + disallow_fan_type_call = true; + } strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION), sizeof(bios_version)); -- cgit v1.2.3 From f480ea90b9dd7dfcf9031c82abc294f86d7db435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sat, 27 Jan 2018 17:28:34 +0100 Subject: hwmon: (dell-smm) Disable fan support for Dell Inspiron 7720 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling fan related SMM functions implemented by Dell BIOS firmware on Dell Inspiron 7720 freeze kernel for about 500ms. Until Dell fixes it we need to disable fan support for Dell Inspiron 7720 as it makes system unusable. Via "force" module param fan support can be enabled. Reported-by: vova7890@mail.ru Signed-off-by: Pali Rohár Link: https://bugzilla.kernel.org/show_bug.cgi?id=195751 Cc: stable@vger.kernel.org # v4.0+, will need backport Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index aef4f8317ae2..3f8b4e482b64 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -76,6 +76,7 @@ static uint i8k_fan_mult = I8K_FAN_MULT; static uint i8k_pwm_mult; static uint i8k_fan_max = I8K_FAN_HIGH; static bool disallow_fan_type_call; +static bool disallow_fan_support; #define I8K_HWMON_HAVE_TEMP1 (1 << 0) #define I8K_HWMON_HAVE_TEMP2 (1 << 1) @@ -242,6 +243,9 @@ static int i8k_get_fan_status(int fan) { struct smm_regs regs = { .eax = I8K_SMM_GET_FAN, }; + if (disallow_fan_support) + return -EINVAL; + regs.ebx = fan & 0xff; return i8k_smm(®s) ? : regs.eax & 0xff; } @@ -253,6 +257,9 @@ static int i8k_get_fan_speed(int fan) { struct smm_regs regs = { .eax = I8K_SMM_GET_SPEED, }; + if (disallow_fan_support) + return -EINVAL; + regs.ebx = fan & 0xff; return i8k_smm(®s) ? : (regs.eax & 0xffff) * i8k_fan_mult; } @@ -264,7 +271,7 @@ static int _i8k_get_fan_type(int fan) { struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, }; - if (disallow_fan_type_call) + if (disallow_fan_support || disallow_fan_type_call) return -EINVAL; regs.ebx = fan & 0xff; @@ -289,6 +296,9 @@ static int i8k_get_fan_nominal_speed(int fan, int speed) { struct smm_regs regs = { .eax = I8K_SMM_GET_NOM_SPEED, }; + if (disallow_fan_support) + return -EINVAL; + regs.ebx = (fan & 0xff) | (speed << 8); return i8k_smm(®s) ? : (regs.eax & 0xffff) * i8k_fan_mult; } @@ -300,6 +310,9 @@ static int i8k_set_fan(int fan, int speed) { struct smm_regs regs = { .eax = I8K_SMM_SET_FAN, }; + if (disallow_fan_support) + return -EINVAL; + speed = (speed < 0) ? 0 : ((speed > i8k_fan_max) ? i8k_fan_max : speed); regs.ebx = (fan & 0xff) | (speed << 8); @@ -772,6 +785,8 @@ static struct attribute *i8k_attrs[] = { static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr, int index) { + if (disallow_fan_support && index >= 8) + return 0; if (disallow_fan_type_call && (index == 9 || index == 12 || index == 15)) return 0; @@ -1038,6 +1053,23 @@ static const struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initconst { } }; +/* + * On some machines all fan related SMM functions implemented by Dell BIOS + * firmware freeze kernel for about 500ms. Until Dell fixes these problems fan + * support for affected blacklisted Dell machines stay disabled. + * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=195751 + */ +static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = { + { + .ident = "Dell Inspiron 7720", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 7720"), + }, + }, + { } +}; + /* * Probe for the presence of a supported laptop. */ @@ -1060,6 +1092,12 @@ static int __init i8k_probe(void) i8k_get_dmi_data(DMI_BIOS_VERSION)); } + if (dmi_check_system(i8k_blacklist_fan_support_dmi_table)) { + pr_warn("broken Dell BIOS detected, disallow fan support\n"); + if (!force) + disallow_fan_support = true; + } + if (dmi_check_system(i8k_blacklist_fan_type_dmi_table)) { pr_warn("broken Dell BIOS detected, disallow fan type call\n"); if (!force) -- cgit v1.2.3 From 6fbc4232a5ac944531bda397f3644d1cf66bdd13 Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Sat, 27 Jan 2018 17:23:29 +0100 Subject: hwmon: (dell-smm) Disable fan support for Dell Vostro 3360 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling fan related SMM functions implemented by Dell BIOS firmware on Dell Vostro 3360 freeze kernel for about 500ms. Unfortunately, it is unlikely for Dell to fix this since the machine is pretty old, so this commit just disables fan support to make the system usable again. Via "force" module param fan support can be enabled. Link: https://bugzilla.kernel.org/show_bug.cgi?id=195751 Link: http://lkml.iu.edu/hypermail/linux/kernel/1711.2/06083.html Reviewed-by: Pali Rohár Signed-off-by: Oleksandr Natalenko Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 3f8b4e482b64..bf3bb7e1adab 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -1067,6 +1067,13 @@ static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 7720"), }, }, + { + .ident = "Dell Vostro 3360", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Vostro 3360"), + }, + }, { } }; -- cgit v1.2.3 From 2961298efe1ea1b6fc0d7ee8b76018fa6c0bcef2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 27 Jan 2018 16:24:32 +0000 Subject: x86/cpufeatures: Clean up Spectre v2 related CPUID flags We want to expose the hardware features simply in /proc/cpuinfo as "ibrs", "ibpb" and "stibp". Since AMD has separate CPUID bits for those, use them as the user-visible bits. When the Intel SPEC_CTRL bit is set which indicates both IBRS and IBPB capability, set those (AMD) bits accordingly. Likewise if the Intel STIBP bit is set, set the AMD STIBP that's used for the generic hardware capability. Hide the rest from /proc/cpuinfo by putting "" in the comments. Including RETPOLINE and RETPOLINE_AMD which shouldn't be visible there. There are patches to make the sysfs vulnerabilities information non-readable by non-root, and the same should apply to all information about which mitigations are actually in use. Those *shouldn't* appear in /proc/cpuinfo. The feature bit for whether IBPB is actually used, which is needed for ALTERNATIVEs, is renamed to X86_FEATURE_USE_IBPB. Originally-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-2-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/cpufeatures.h | 18 +++++++++--------- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 7 +++---- arch/x86/kernel/cpu/intel.c | 31 +++++++++++++++++++++---------- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 07934b2f8df2..73b5fff159a4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -203,14 +203,14 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled*/ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -271,9 +271,9 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_AMD_PRED_CMD (13*32+12) /* Prediction Command MSR (AMD) */ -#define X86_FEATURE_AMD_SPEC_CTRL (13*32+14) /* Speculation Control MSR only (AMD) */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors (AMD) */ +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -325,8 +325,8 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ /* diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 865192a2cc31..19ecb5446b30 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -225,7 +225,7 @@ static inline void indirect_branch_prediction_barrier(void) "movl %[val], %%eax\n\t" "movl $0, %%edx\n\t" "wrmsr", - X86_FEATURE_IBPB) + X86_FEATURE_USE_IBPB) : : [msr] "i" (MSR_IA32_PRED_CMD), [val] "i" (PRED_CMD_IBPB) : "eax", "ecx", "edx", "memory"); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b0b7157df89e..32d8e6cdc09e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -273,9 +273,8 @@ retpoline_auto: } /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) || - boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) { - setup_force_cpu_cap(X86_FEATURE_IBPB); + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Enabling Indirect Branch Prediction Barrier\n"); } } @@ -308,7 +307,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Not affected\n"); return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5faa487d0477..0c8b916abced 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -175,17 +175,28 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); - if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_STIBP) || - cpu_has(c, X86_FEATURE_AMD_SPEC_CTRL) || - cpu_has(c, X86_FEATURE_AMD_PRED_CMD) || - cpu_has(c, X86_FEATURE_AMD_STIBP)) && bad_spectre_microcode(c)) { - pr_warn("Intel Spectre v2 broken microcode detected; disabling SPEC_CTRL\n"); - clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + /* + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, + * and they also have a different bit for STIBP support. Also, + * a hypervisor might have set the individual AMD bits even on + * Intel CPUs, for finer-grained selection of what's available. + */ + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_IBPB); + } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) + set_cpu_cap(c, X86_FEATURE_STIBP); + + /* Now if any of them are set, check the blacklist and clear the lot */ + if ((cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); + clear_cpu_cap(c, X86_FEATURE_IBRS); + clear_cpu_cap(c, X86_FEATURE_IBPB); clear_cpu_cap(c, X86_FEATURE_STIBP); - clear_cpu_cap(c, X86_FEATURE_AMD_SPEC_CTRL); - clear_cpu_cap(c, X86_FEATURE_AMD_PRED_CMD); - clear_cpu_cap(c, X86_FEATURE_AMD_STIBP); + clear_cpu_cap(c, X86_FEATURE_SPEC_CTRL); + clear_cpu_cap(c, X86_FEATURE_INTEL_STIBP); } /* -- cgit v1.2.3 From 1dde7415e99933bb7293d6b2843752cbdb43ec11 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:33 +0000 Subject: x86/retpoline: Simplify vmexit_fill_RSB() Simplify it to call an asm-function instead of pasting 41 insn bytes at every call site. Also, add alignment to the macro as suggested here: https://support.google.com/faqs/answer/7625886 [dwmw2: Clean up comments, let it clobber %ebx and just tell the compiler] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-3-git-send-email-dwmw@amazon.co.uk --- arch/x86/entry/entry_32.S | 3 +- arch/x86/entry/entry_64.S | 3 +- arch/x86/include/asm/asm-prototypes.h | 3 ++ arch/x86/include/asm/nospec-branch.h | 70 ++++------------------------------- arch/x86/lib/Makefile | 1 + arch/x86/lib/retpoline.S | 56 ++++++++++++++++++++++++++++ 6 files changed, 71 insertions(+), 65 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 60c4c342316c..2a35b1e0fb90 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %ebx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 63f4320602a3..b4f00984089e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -495,7 +495,8 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + /* Clobbers %rbx */ + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 1908214b9125..4d111616524b 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,4 +38,7 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) +asmlinkage void __fill_rsb(void); +asmlinkage void __clear_rsb(void); + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 19ecb5446b30..df4ececa6ebf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -7,50 +7,6 @@ #include #include -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * We define a CPP macro such that it can be used from both .S files and - * inline assembly. It's possible to do a .macro and then include that - * from C via asm(".include ") but let's not go there. - */ - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -/* - * Google experimented with loop-unrolling and this turned out to be - * the optimal version — two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ - mov $(nr/2), reg; \ -771: \ - call 772f; \ -773: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 773b; \ -772: \ - call 774f; \ -775: /* speculation trap */ \ - pause; \ - lfence; \ - jmp 775b; \ -774: \ - dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; - #ifdef __ASSEMBLY__ /* @@ -121,17 +77,10 @@ #endif .endm - /* - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP - * monstrosity above, manually. - */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +/* This clobbers the BX register */ +.macro FILL_RETURN_BUFFER nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr -.Lskip_rsb_\@: + ALTERNATIVE "", "call __clear_rsb", \ftr #endif .endm @@ -206,15 +155,10 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - unsigned long loops; - - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE("jmp 910f", - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), - X86_FEATURE_RETPOLINE) - "910:" - : "=r" (loops), ASM_CALL_CONSTRAINT - : : "memory" ); + alternative_input("", + "call __fill_rsb", + X86_FEATURE_RETPOLINE, + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); #endif } diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d435c89875c1..d0a3170e6804 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_RETPOLINE) += retpoline.o +OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index c909961e678a..480edc3a5e03 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,6 +7,7 @@ #include #include #include +#include .macro THUNK reg .section .text.__x86.indirect_thunk @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * Google experimented with loop-unrolling and this turned out to be + * the optimal version - two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +.macro STUFF_RSB nr:req sp:req + mov $(\nr / 2), %_ASM_BX + .align 16 +771: + call 772f +773: /* speculation trap */ + pause + lfence + jmp 773b + .align 16 +772: + call 774f +775: /* speculation trap */ + pause + lfence + jmp 775b + .align 16 +774: + dec %_ASM_BX + jnz 771b + add $((BITS_PER_LONG/8) * \nr), \sp +.endm + +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +ENTRY(__fill_rsb) + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP + ret +END(__fill_rsb) +EXPORT_SYMBOL_GPL(__fill_rsb) + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ + +ENTRY(__clear_rsb) + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP + ret +END(__clear_rsb) +EXPORT_SYMBOL_GPL(__clear_rsb) -- cgit v1.2.3 From 64e16720ea0879f8ab4547e3b9758936d483909b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 27 Jan 2018 16:24:34 +0000 Subject: x86/speculation: Simplify indirect_branch_prediction_barrier() Make it all a function which does the WRMSR instead of having a hairy inline asm. [dwmw2: export it, fix CONFIG_RETPOLINE issues] Signed-off-by: Borislav Petkov Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Cc: ak@linux.intel.com Cc: dave.hansen@intel.com Cc: karahmed@amazon.de Cc: arjan@linux.intel.com Cc: torvalds@linux-foundation.org Cc: peterz@infradead.org Cc: bp@alien8.de Cc: pbonzini@redhat.com Cc: tim.c.chen@linux.intel.com Cc: gregkh@linux-foundation.org Link: https://lkml.kernel.org/r/1517070274-12128-4-git-send-email-dwmw@amazon.co.uk --- arch/x86/include/asm/nospec-branch.h | 13 ++++--------- arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/bugs.c | 6 ++++++ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index df4ececa6ebf..d15d471348b8 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -164,15 +164,10 @@ static inline void vmexit_fill_RSB(void) static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_input("", + "call __ibp_barrier", + X86_FEATURE_USE_IBPB, + ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9c18da64daa9..881ca3b1d6d4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -970,4 +970,7 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); + +void __ibp_barrier(void); + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 32d8e6cdc09e..3bfb2b23d79c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -311,3 +311,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, spectre_v2_module_string()); } #endif + +void __ibp_barrier(void) +{ + __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); +} +EXPORT_SYMBOL_GPL(__ibp_barrier); -- cgit v1.2.3 From dd085168a74c99c3ebe7f813069e412eb8444243 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 27 Jan 2018 20:21:50 -0600 Subject: x86/ftrace: Add one more ENDPROC annotation When ORC support was added for the ftrace_64.S code, an ENDPROC for function_hook() was missed. This results in the following warning: arch/x86/kernel/ftrace_64.o: warning: objtool: .entry.text+0x0: unreachable instruction Fixes: e2ac83d74a4d ("x86/ftrace: Fix ORC unwinding from ftrace handlers") Reported-by: Steven Rostedt Reported-by: Borislav Petkov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20180128022150.dqierscqmt3uwwsr@treble --- arch/x86/kernel/ftrace_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7cb8ba08beb9..8774fd2ed390 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -291,7 +291,7 @@ trace: restore_mcount_regs jmp fgraph_trace -END(function_hook) +ENDPROC(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 36c7ce4a17f220398e12e588ea3484265df4c41c Mon Sep 17 00:00:00 2001 From: Achilles Gaikwad Date: Sun, 28 Jan 2018 13:39:48 +0530 Subject: fs/cifs/cifsacl.c Fixes typo in a comment Signed-off-by: Achilles Gaikwad Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index b98436f5c7c7..13a8a77322c9 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1125,7 +1125,7 @@ out: return rc; } -/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ +/* Translate the CIFS ACL (similar to NTFS ACL) for a file into mode bits */ int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, struct inode *inode, const char *path, -- cgit v1.2.3 From d8a5b80568a9cb66810e75b182018e9edb68e8ff Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jan 2018 13:20:33 -0800 Subject: Linux 4.15 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 339397b838d3..c8b8e902d5a4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc9 +EXTRAVERSION = NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From ae5e165d855dd978a461b22175531b07f54fb61f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 29 Jan 2018 06:41:30 -0500 Subject: fs: new API for handling inode->i_version Add a documentation blob that explains what the i_version field is, how it is expected to work, and how it is currently implemented by various filesystems. We already have inode_inc_iversion. Add several other functions for manipulating and accessing the i_version counter. For now, the implementation is trivial and basically works the way that all of the open-coded i_version accesses work today. Future patches will convert existing users of i_version to use the new API, and then convert the backend implementation to do things more efficiently. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/xattr.c | 1 + fs/ext4/inode.c | 1 + fs/ext4/namei.c | 1 + fs/inode.c | 1 + include/linux/fs.h | 15 --- include/linux/iversion.h | 242 +++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 249 insertions(+), 15 deletions(-) create mode 100644 include/linux/iversion.h diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb1bac7c8553..c95d7b2efefb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e1a7f3cb5be9..27f008b33fc1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2ef8acaac688..aa452c9e2eff 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 2c7e53f9ff1b..5258c1714830 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7df2c5644e59..fa5d8bc52d2d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 798b3ac680db..bcf0dff517be 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_jbd2.h" diff --git a/fs/inode.c b/fs/inode.c index 03102d6ef044..19e72f500f71 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -18,6 +18,7 @@ #include /* for inode_has_buffers */ #include #include +#include #include #include "internal.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 511fbaabf624..76382c24e9d0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2036,21 +2036,6 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } -/** - * inode_inc_iversion - increments i_version - * @inode: inode that need to be updated - * - * Every time the inode is modified, the i_version field will be incremented. - * The filesystem has to be mounted with i_version flag - */ - -static inline void inode_inc_iversion(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); -} - enum file_time_flags { S_ATIME = 1, S_MTIME = 2, diff --git a/include/linux/iversion.h b/include/linux/iversion.h new file mode 100644 index 000000000000..d61e063f7c75 --- /dev/null +++ b/include/linux/iversion.h @@ -0,0 +1,242 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IVERSION_H +#define _LINUX_IVERSION_H + +#include + +/* + * The change attribute (i_version) is mandated by NFSv4 and is mostly for + * knfsd, but is also used for other purposes (e.g. IMA). The i_version must + * appear different to observers if there was a change to the inode's data or + * metadata since it was last queried. + * + * Observers see the i_version as a 64-bit number that never decreases. If it + * remains the same since it was last checked, then nothing has changed in the + * inode. If it's different then something has changed. Observers cannot infer + * anything about the nature or magnitude of the changes from the value, only + * that the inode has changed in some fashion. + * + * Not all filesystems properly implement the i_version counter. Subsystems that + * want to use i_version field on an inode should first check whether the + * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). + * + * Those that set SB_I_VERSION will automatically have their i_version counter + * incremented on writes to normal files. If the SB_I_VERSION is not set, then + * the VFS will not touch it on writes, and the filesystem can use it how it + * wishes. Note that the filesystem is always responsible for updating the + * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). + * We consider these sorts of filesystems to have a kernel-managed i_version. + * + * It may be impractical for filesystems to keep i_version updates atomic with + * respect to the changes that cause them. They should, however, guarantee + * that i_version updates are never visible before the changes that caused + * them. Also, i_version updates should never be delayed longer than it takes + * the original change to reach disk. + * + * Note that some filesystems (e.g. NFS and AFS) just use the field to store + * a server-provided value (for the most part). For that reason, those + * filesystems do not set SB_I_VERSION. These filesystems are considered to + * have a self-managed i_version. + */ + +/** + * inode_set_iversion_raw - set i_version to the specified raw value + * @inode: inode to set + * @new: new i_version value to set + * + * Set @inode's i_version field to @new. This function is for use by + * filesystems that self-manage the i_version. + * + * For example, the NFS client stores its NFSv4 change attribute in this way, + * and the AFS client stores the data_version from the server here. + */ +static inline void +inode_set_iversion_raw(struct inode *inode, u64 new) +{ + inode->i_version = new; +} + +/** + * inode_set_iversion - set i_version to a particular value + * @inode: inode to set + * @new: new i_version value to set + * + * Set @inode's i_version field to @new. This function is for filesystems with + * a kernel-managed i_version. + * + * For now, this just does the same thing as the _raw variant. + */ +static inline void +inode_set_iversion(struct inode *inode, u64 new) +{ + inode_set_iversion_raw(inode, new); +} + +/** + * inode_set_iversion_queried - set i_version to a particular value and set + * flag to indicate that it has been viewed + * @inode: inode to set + * @new: new i_version value to set + * + * When loading in an i_version value from a backing store, we typically don't + * know whether it was previously viewed before being stored or not. Thus, we + * must assume that it was, to ensure that any changes will result in the + * value changing. + * + * This function will set the inode's i_version, and possibly flag the value + * as if it has already been viewed at least once. + * + * For now, this just does what inode_set_iversion does. + */ +static inline void +inode_set_iversion_queried(struct inode *inode, u64 new) +{ + inode_set_iversion(inode, new); +} + +/** + * inode_maybe_inc_iversion - increments i_version + * @inode: inode with the i_version that should be updated + * @force: increment the counter even if it's not necessary + * + * Every time the inode is modified, the i_version field must be seen to have + * changed by any observer. + * + * In this implementation, we always increment it after taking the i_lock to + * ensure that we don't race with other incrementors. + * + * Returns true if counter was bumped, and false if it wasn't. + */ +static inline bool +inode_maybe_inc_iversion(struct inode *inode, bool force) +{ + spin_lock(&inode->i_lock); + inode->i_version++; + spin_unlock(&inode->i_lock); + return true; +} + +/** + * inode_inc_iversion - forcibly increment i_version + * @inode: inode that needs to be updated + * + * Forcbily increment the i_version field. This always results in a change to + * the observable value. + */ +static inline void +inode_inc_iversion(struct inode *inode) +{ + inode_maybe_inc_iversion(inode, true); +} + +/** + * inode_iversion_need_inc - is the i_version in need of being incremented? + * @inode: inode to check + * + * Returns whether the inode->i_version counter needs incrementing on the next + * change. + * + * For now, we assume that it always does. + */ +static inline bool +inode_iversion_need_inc(struct inode *inode) +{ + return true; +} + +/** + * inode_peek_iversion_raw - grab a "raw" iversion value + * @inode: inode from which i_version should be read + * + * Grab a "raw" inode->i_version value and return it. The i_version is not + * flagged or converted in any way. This is mostly used to access a self-managed + * i_version. + * + * With those filesystems, we want to treat the i_version as an entirely + * opaque value. + */ +static inline u64 +inode_peek_iversion_raw(const struct inode *inode) +{ + return inode->i_version; +} + +/** + * inode_inc_iversion_raw - forcibly increment raw i_version + * @inode: inode that needs to be updated + * + * Forcbily increment the raw i_version field. This always results in a change + * to the raw value. + * + * NFS will use the i_version field to store the value from the server. It + * mostly treats it as opaque, but in the case where it holds a write + * delegation, it must increment the value itself. This function does that. + */ +static inline void +inode_inc_iversion_raw(struct inode *inode) +{ + inode_inc_iversion(inode); +} + +/** + * inode_peek_iversion - read i_version without flagging it to be incremented + * @inode: inode from which i_version should be read + * + * Read the inode i_version counter for an inode without registering it as a + * query. + * + * This is typically used by local filesystems that need to store an i_version + * on disk. In that situation, it's not necessary to flag it as having been + * viewed, as the result won't be used to gauge changes from that point. + */ +static inline u64 +inode_peek_iversion(const struct inode *inode) +{ + return inode_peek_iversion_raw(inode); +} + +/** + * inode_query_iversion - read i_version for later use + * @inode: inode from which i_version should be read + * + * Read the inode i_version counter. This should be used by callers that wish + * to store the returned i_version for later comparison. This will guarantee + * that a later query of the i_version will result in a different value if + * anything has changed. + * + * This implementation just does a peek. + */ +static inline u64 +inode_query_iversion(struct inode *inode) +{ + return inode_peek_iversion(inode); +} + +/** + * inode_cmp_iversion_raw - check whether the raw i_version counter has changed + * @inode: inode to check + * @old: old value to check against its i_version + * + * Compare the current raw i_version counter with a previous one. Returns 0 if + * they are the same or non-zero if they are different. + */ +static inline s64 +inode_cmp_iversion_raw(const struct inode *inode, u64 old) +{ + return (s64)inode_peek_iversion_raw(inode) - (s64)old; +} + +/** + * inode_cmp_iversion - check whether the i_version counter has changed + * @inode: inode to check + * @old: old value to check against its i_version + * + * Compare an i_version counter with a previous one. Returns 0 if they are + * the same or non-zero if they are different. + */ +static inline s64 +inode_cmp_iversion(const struct inode *inode, u64 old) +{ + return (s64)inode_peek_iversion(inode) - (s64)old; +} +#endif -- cgit v1.2.3 From 7594c461161745f0d38a4346d4f895e0837b8094 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Dec 2017 06:25:31 -0500 Subject: fs: don't take the i_lock in inode_inc_iversion The rationale for taking the i_lock when incrementing this value is lost in antiquity. The readers of the field don't take it (at least not universally), so my assumption is that it was only done here to serialize incrementors. If that is indeed the case, then we can drop the i_lock from this codepath and treat it as a atomic64_t for the purposes of incrementing it. This allows us to use inode_inc_iversion without any danger of lock inversion. Note that the read side is not fetched atomically with this change. The assumption here is that that is not a critical issue since the i_version is not fully synchronized with anything else anyway. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- include/linux/iversion.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/iversion.h b/include/linux/iversion.h index d61e063f7c75..f268828f9f7e 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -110,12 +110,13 @@ inode_set_iversion_queried(struct inode *inode, u64 new) static inline bool inode_maybe_inc_iversion(struct inode *inode, bool force) { - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); + atomic64_t *ivp = (atomic64_t *)&inode->i_version; + + atomic64_inc(ivp); return true; } + /** * inode_inc_iversion - forcibly increment i_version * @inode: inode that needs to be updated -- cgit v1.2.3 From 2489dbabea80e8c075eb01bf195d8bb0b1440dd2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:09 -0500 Subject: fat: convert to new i_version API Signed-off-by: Jeff Layton --- fs/fat/dir.c | 3 ++- fs/fat/inode.c | 9 +++++---- fs/fat/namei_msdos.c | 7 ++++--- fs/fat/namei_vfat.c | 22 +++++++++++----------- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index b833ffeee1e1..8e100c3bf72c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "fat.h" /* @@ -1055,7 +1056,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) brelse(bh); if (err) return err; - dir->i_version++; + inode_inc_iversion(dir); if (nr_slots) { /* diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 20a0a89eaca5..ffbbf0520d9e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -507,7 +508,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_pos = 0; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; - inode->i_version++; + inode_inc_iversion(inode); inode->i_generation = get_seconds(); if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { @@ -590,7 +591,7 @@ struct inode *fat_build_inode(struct super_block *sb, goto out; } inode->i_ino = iunique(sb, MSDOS_ROOT_INO); - inode->i_version = 1; + inode_set_iversion(inode, 1); err = fat_fill_inode(inode, de); if (err) { iput(inode); @@ -1377,7 +1378,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; - inode->i_version++; + inode_inc_iversion(inode); inode->i_generation = 0; inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; @@ -1828,7 +1829,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (!root_inode) goto out_fail; root_inode->i_ino = MSDOS_ROOT_INO; - root_inode->i_version = 1; + inode_set_iversion(root_inode, 1); error = fat_read_root(root_inode); if (error < 0) { iput(root_inode); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index d24d2758a363..582ca731a6c9 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -7,6 +7,7 @@ */ #include +#include #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ @@ -480,7 +481,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, } else mark_inode_dirty(old_inode); - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); @@ -508,7 +509,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, goto out; new_i_pos = sinfo.i_pos; } - new_dir->i_version++; + inode_inc_iversion(new_dir); fat_detach(old_inode); fat_attach(old_inode, new_i_pos); @@ -540,7 +541,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_sinfo.bh = NULL; if (err) goto error_dotdot; - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = ts; if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 02c066663a3a..cefea792cde8 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -20,7 +20,7 @@ #include #include #include - +#include #include "fat.h" static inline unsigned long vfat_d_version(struct dentry *dentry) @@ -46,7 +46,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; spin_lock(&dentry->d_lock); - if (vfat_d_version(dentry) != d_inode(dentry->d_parent)->i_version) + if (inode_cmp_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) ret = 0; spin_unlock(&dentry->d_lock); return ret; @@ -759,7 +759,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!inode) - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); return d_splice_alias(inode, dentry); error: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -781,7 +781,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); if (err) goto out; - dir->i_version++; + inode_inc_iversion(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); @@ -789,7 +789,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, err = PTR_ERR(inode); goto out; } - inode->i_version++; + inode_inc_iversion(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -823,7 +823,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); fat_detach(inode); - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -849,7 +849,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); fat_detach(inode); - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -875,7 +875,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo); if (err) goto out_free; - dir->i_version++; + inode_inc_iversion(dir); inc_nlink(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); @@ -885,7 +885,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) /* the directory was completed, just return a error */ goto out; } - inode->i_version++; + inode_inc_iversion(inode); set_nlink(inode, 2); inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -951,7 +951,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_i_pos = sinfo.i_pos; } - new_dir->i_version++; + inode_inc_iversion(new_dir); fat_detach(old_inode); fat_attach(old_inode, new_i_pos); @@ -979,7 +979,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, old_sinfo.bh = NULL; if (err) goto error_dotdot; - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = ts; if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); -- cgit v1.2.3 From 9dffe569d942a57cfd27ee961f8fb6facc6ba86a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:10 -0500 Subject: affs: convert to new i_version API Signed-off-by: Jeff Layton --- fs/affs/amigaffs.c | 5 +++-- fs/affs/dir.c | 5 +++-- fs/affs/super.c | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 0f0e6925e97d..14a6c1b90c9f 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -10,6 +10,7 @@ */ #include +#include #include "affs.h" /* @@ -60,7 +61,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) affs_brelse(dir_bh); dir->i_mtime = dir->i_ctime = current_time(dir); - dir->i_version++; + inode_inc_iversion(dir); mark_inode_dirty(dir); return 0; @@ -114,7 +115,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) affs_brelse(bh); dir->i_mtime = dir->i_ctime = current_time(dir); - dir->i_version++; + inode_inc_iversion(dir); mark_inode_dirty(dir); return retval; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index a105e77df2c1..d180b46453cf 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -14,6 +14,7 @@ * */ +#include #include "affs.h" static int affs_readdir(struct file *, struct dir_context *); @@ -80,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) * we can jump directly to where we left off. */ ino = (u32)(long)file->private_data; - if (ino && file->f_version == inode->i_version) { + if (ino && inode_cmp_iversion(inode, file->f_version) == 0) { pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -130,7 +131,7 @@ inside: } while (ino); } done: - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); file->private_data = (void *)(long)ino; affs_brelse(fh_bh); diff --git a/fs/affs/super.c b/fs/affs/super.c index 1117e36134cc..e602619aed9d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "affs.h" static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -102,7 +103,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb) if (!i) return NULL; - i->vfs_inode.i_version = 1; + inode_set_iversion(&i->vfs_inode, 1); i->i_lc = NULL; i->i_ext_bh = NULL; i->i_pa_cnt = 0; -- cgit v1.2.3 From a01179e6eb5aecf5d8ca0df2598e199eedb59578 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:11 -0500 Subject: afs: convert to new i_version API For AFS, it's generally treated as an opaque value, so we use the *_raw variants of the API here. Note that AFS has quite a different definition for this counter. AFS only increments it on changes to the data to the data in regular files and contents of the directories. Inode metadata changes do not result in a version increment. We'll need to reconcile that somehow if we ever want to present this to userspace via statx. Signed-off-by: Jeff Layton --- fs/afs/fsclient.c | 3 ++- fs/afs/inode.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index b90ef39ae914..88ec38c2d83c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "internal.h" #include "afs_fs.h" @@ -124,7 +125,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; - vnode->vfs_inode.i_version = data_version; + inode_set_iversion_raw(&vnode->vfs_inode, data_version); } expected_version = status->data_version; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 3415eb7484f6..dcd2e08d6cdb 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "internal.h" static const struct inode_operations afs_symlink_inode_operations = { @@ -89,7 +90,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; inode->i_generation = vnode->fid.unique; - inode->i_version = vnode->status.data_version; + inode_set_iversion_raw(inode, vnode->status.data_version); inode->i_mapping->a_ops = &afs_fs_aops; read_sequnlock_excl(&vnode->cb_lock); @@ -218,7 +219,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; - inode->i_version = 0; + inode_set_iversion_raw(inode, 0); inode->i_generation = 0; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); -- cgit v1.2.3 From c7f88c4e78f517a023febc6ef618b4d634d12c73 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:12 -0500 Subject: btrfs: convert to new i_version API Signed-off-by: Jeff Layton Acked-by: David Sterba --- fs/btrfs/delayed-inode.c | 7 +++++-- fs/btrfs/inode.c | 6 ++++-- fs/btrfs/tree-log.c | 4 +++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5d73f79ded8b..6a246ae2bcb2 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -18,6 +18,7 @@ */ #include +#include #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" @@ -1700,7 +1701,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); btrfs_set_stack_inode_generation(inode_item, BTRFS_I(inode)->generation); - btrfs_set_stack_inode_sequence(inode_item, inode->i_version); + btrfs_set_stack_inode_sequence(inode_item, + inode_peek_iversion(inode)); btrfs_set_stack_inode_transid(inode_item, trans->transid); btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); @@ -1754,7 +1756,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); - inode->i_version = btrfs_stack_inode_sequence(inode_item); + inode_set_iversion_queried(inode, + btrfs_stack_inode_sequence(inode_item)); inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 27f008b33fc1..ac8692849a81 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3778,7 +3778,8 @@ static int btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); + inode_set_iversion_queried(inode, + btrfs_inode_sequence(leaf, inode_item)); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -3946,7 +3947,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, &token); btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, &token); - btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); + btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode), + &token); btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7bf9b31561db..1b7d92075c1f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "tree-log.h" #include "disk-io.h" #include "locking.h" @@ -3609,7 +3610,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), &token); - btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); + btrfs_set_token_inode_sequence(leaf, item, + inode_peek_iversion(inode), &token); btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); -- cgit v1.2.3 From 317bc9478000b859a683564649d4937ca69c25d4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:13 -0500 Subject: exofs: switch to new i_version API Signed-off-by: Jeff Layton --- fs/exofs/dir.c | 9 +++++---- fs/exofs/super.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 98233a97b7b8..c5a53fcc43ea 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -31,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include "exofs.h" static inline unsigned exofs_chunk_size(struct inode *inode) @@ -60,7 +61,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); if (!PageUptodate(page)) SetPageUptodate(page); @@ -241,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); - int need_revalidate = (file->f_version != inode->i_version); + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) return 0; @@ -264,8 +265,8 @@ exofs_readdir(struct file *file, struct dir_context *ctx) chunk_mask); ctx->pos = (n<f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (struct exofs_dir_entry *)(kaddr + offset); limit = kaddr + exofs_last_byte(inode, n) - diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 819624cfc8da..7e244093c0e5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "exofs.h" @@ -159,7 +160,7 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) if (!oi) return NULL; - oi->vfs_inode.i_version = 1; + inode_set_iversion(&oi->vfs_inode, 1); return &oi->vfs_inode; } -- cgit v1.2.3 From e1d747d9b6728cc01d5bcbe784a16ba726df4553 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:14 -0500 Subject: ext2: convert to new i_version API Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- fs/ext2/dir.c | 9 +++++---- fs/ext2/super.c | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 987647986f47..4111085a129f 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -26,6 +26,7 @@ #include #include #include +#include typedef struct ext2_dir_entry_2 ext2_dirent; @@ -92,7 +93,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); block_write_end(NULL, mapping, pos, len, len, page, NULL); if (pos+len > dir->i_size) { @@ -293,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); unsigned char *types = NULL; - int need_revalidate = file->f_version != inode->i_version; + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) return 0; @@ -319,8 +320,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx) offset = ext2_validate_entry(kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7646818ab266..554c98b8a93a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -184,7 +185,7 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); #ifdef CONFIG_QUOTA memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif @@ -1569,7 +1570,7 @@ out: return err; if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); - inode->i_version++; + inode_inc_iversion(inode); inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; -- cgit v1.2.3 From ee73f9a52a34377887acfa3b76169709e80d577c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jan 2018 08:21:39 -0500 Subject: ext4: convert to new i_version API Signed-off-by: Jeff Layton Acked-by: Theodore Ts'o --- fs/ext4/dir.c | 9 +++++---- fs/ext4/inline.c | 7 ++++--- fs/ext4/inode.c | 12 ++++++++---- fs/ext4/ioctl.c | 3 ++- fs/ext4/namei.c | 4 ++-- fs/ext4/super.c | 3 ++- fs/ext4/xattr.c | 5 +++-- 7 files changed, 26 insertions(+), 17 deletions(-) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d5babc9f222b..afda0a0499ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "ext4.h" #include "xattr.h" @@ -208,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -227,7 +228,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -568,10 +569,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - (file->f_version != inode->i_version)) { + inode_cmp_iversion(inode, file->f_version)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1367553c43bb..a8b987b71173 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -14,6 +14,7 @@ #include #include +#include #include "ext4_jbd2.h" #include "ext4.h" @@ -1042,7 +1043,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, */ dir->i_mtime = dir->i_ctime = current_time(dir); ext4_update_dx_flag(dir); - dir->i_version++; + inode_inc_iversion(dir); return 1; } @@ -1494,7 +1495,7 @@ int ext4_read_inline_dir(struct file *file, * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1526,7 +1527,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < extra_size) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fa5d8bc52d2d..1b0d54b372f2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4874,12 +4874,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); + u64 ivers = le32_to_cpu(raw_inode->i_disk_version); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - inode->i_version |= + ivers |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + inode_set_iversion_queried(inode, ivers); } ret = 0; @@ -5165,11 +5167,13 @@ static int ext4_do_update_inode(handle_t *handle, } if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); + u64 ivers = inode_peek_iversion(inode); + + raw_inode->i_disk_version = cpu_to_le32(ivers); if (ei->i_extra_isize) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) raw_inode->i_version_hi = - cpu_to_le32(inode->i_version >> 32); + cpu_to_le32(ivers >> 32); raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1eec25014f62..7e99ad02f1ba 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "ext4.h" #include @@ -144,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, i_gid_write(inode_bl, 0); inode_bl->i_flags = 0; ei_bl->i_flags = 0; - inode_bl->i_version = 1; + inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bcf0dff517be..55f6e38de5ba 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2956,7 +2956,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) "empty directory '%.*s' has too many links (%u)", dentry->d_name.len, dentry->d_name.name, inode->i_nlink); - inode->i_version++; + inode_inc_iversion(inode); clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is * zero will ensure that the right thing happens during any @@ -3362,7 +3362,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ent->de->inode = cpu_to_le32(ino); if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; - ent->dir->i_version++; + inode_inc_iversion(ent->dir); ent->dir->i_ctime = ent->dir->i_mtime = current_time(ent->dir); ext4_mark_inode_dirty(handle, ent->dir); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c46693a14d7..5de959fb0244 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -967,7 +968,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) if (!ei) return NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 218a7ba57819..63656dbafdc4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -294,13 +295,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) { return ((u64)ea_inode->i_ctime.tv_sec << 32) | - ((u32)ea_inode->i_version); + (u32) inode_peek_iversion_raw(ea_inode); } static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) { ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); - ea_inode->i_version = (u32)ref_count; + inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff); } static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) -- cgit v1.2.3 From 1eb5d98f16f6d71af8781436d2b0bcf9236f7fc6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jan 2018 08:21:17 -0500 Subject: nfs: convert to new i_version API For NFS, we just use the "raw" API since the i_version is mostly managed by the server. The exception there is when the client holds a write delegation, but we only need to bump it once there anyway to handle CB_GETATTR. Tested-by: Krzysztof Kozlowski Signed-off-by: Jeff Layton --- fs/nfs/delegation.c | 3 ++- fs/nfs/fscache-index.c | 5 +++-- fs/nfs/inode.c | 18 +++++++++--------- fs/nfs/nfs4proc.c | 10 ++++++---- fs/nfs/nfstrace.h | 5 +++-- fs/nfs/write.c | 8 +++----- 6 files changed, 26 insertions(+), 23 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ade44ca0c66c..d8b47624fee2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -347,7 +348,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->pagemod_limit = res->pagemod_limit; - delegation->change_attr = inode->i_version; + delegation->change_attr = inode_peek_iversion_raw(inode); delegation->cred = get_rpccred(cred); delegation->inode = inode; delegation->flags = 1< #include #include +#include #include "internal.h" #include "fscache.h" @@ -211,7 +212,7 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, auxdata.ctime = nfsi->vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->vfs_inode.i_version; + auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); if (bufmax > sizeof(auxdata)) bufmax = sizeof(auxdata); @@ -243,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, auxdata.ctime = nfsi->vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->vfs_inode.i_version; + auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); if (memcmp(data, &auxdata, datalen) != 0) return FSCACHE_CHECKAUX_OBSOLETE; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b992d2382ffa..93552c482992 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,8 +38,8 @@ #include #include #include - #include +#include #include "nfs4_fs.h" #include "callback.h" @@ -483,7 +483,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st memset(&inode->i_atime, 0, sizeof(inode->i_atime)); memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); - inode->i_version = 0; + inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); inode->i_uid = make_kuid(&init_user_ns, -2); @@ -508,7 +508,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st else if (nfs_server_capable(inode, NFS_CAP_CTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) - inode->i_version = fattr->change_attr; + inode_set_iversion_raw(inode, fattr->change_attr); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE); @@ -1289,8 +1289,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) - && inode->i_version == fattr->pre_change_attr) { - inode->i_version = fattr->change_attr; + && !inode_cmp_iversion_raw(inode, fattr->pre_change_attr)) { + inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); ret |= NFS_INO_INVALID_ATTR; @@ -1348,7 +1348,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ - if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr) + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode_cmp_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) @@ -1642,7 +1642,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { - fattr->pre_change_attr = inode->i_version; + fattr->pre_change_attr = inode_peek_iversion_raw(inode); fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && @@ -1778,7 +1778,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { - if (inode->i_version != fattr->change_attr) { + if (inode_cmp_iversion_raw(inode, fattr->change_attr)) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ @@ -1790,7 +1790,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); } - inode->i_version = fattr->change_attr; + inode_set_iversion_raw(inode, fattr->change_attr); } } else { nfsi->cache_validity |= save_cache_validity; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 56fa5a16e097..17a03f2c4330 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -1045,16 +1046,16 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (cinfo->atomic && cinfo->before == dir->i_version) { + if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; } else { nfs_force_lookup_revalidate(dir); - if (cinfo->before != dir->i_version) + if (cinfo->before != inode_peek_iversion_raw(dir)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; } - dir->i_version = cinfo->after; + inode_set_iversion_raw(dir, cinfo->after); nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); @@ -2454,7 +2455,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) data->file_created = true; else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = true; - if (data->file_created || dir->i_version != o_res->cinfo.after) + if (data->file_created || + inode_peek_iversion_raw(dir) != o_res->cinfo.after) update_changeattr(dir, &o_res->cinfo, o_res->f_attr->time_start); } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 093290c42d7c..610d89d8942e 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -9,6 +9,7 @@ #define _TRACE_NFS_H #include +#include #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ @@ -61,7 +62,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); - __entry->version = inode->i_version; + __entry->version = inode_peek_iversion_raw(inode); ), TP_printk( @@ -100,7 +101,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); - __entry->version = inode->i_version; + __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5b5f464f6f2a..f87cbe126fa0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -753,11 +754,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ spin_lock(&mapping->private_lock); if (!nfs_have_writebacks(inode) && - NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); - } + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + inode_inc_iversion_raw(inode); if (likely(!PageSwapCache(req->wb_page))) { set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); -- cgit v1.2.3 From 1f15a550f5f3e328b7693f664ae21f5a71a7a636 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:16 -0500 Subject: nfsd: convert to new i_version API Mostly just making sure we use the "get" wrappers so we know when it is being fetched for later use. Signed-off-by: Jeff Layton --- fs/nfsd/nfsfh.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 43f31cf49bae..b8444189223b 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -11,6 +11,7 @@ #include #include #include +#include static inline __u32 ino_t_to_u32(ino_t ino) { @@ -259,7 +260,7 @@ static inline u64 nfsd4_change_attribute(struct inode *inode) chattr = inode->i_ctime.tv_sec; chattr <<= 30; chattr += inode->i_ctime.tv_nsec; - chattr += inode->i_version; + chattr += inode_query_iversion(inode); return chattr; } -- cgit v1.2.3 From cc56c33e783fcd8ea0a84941cab4f919609b4835 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:17 -0500 Subject: ocfs2: convert to new i_version API Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- fs/ocfs2/dir.c | 15 ++++++++------- fs/ocfs2/inode.c | 3 ++- fs/ocfs2/namei.c | 3 ++- fs/ocfs2/quota_global.c | 3 ++- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index febe6312ceff..32f9c72dff17 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -1174,7 +1175,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, le16_add_cpu(&pde->rec_len, le16_to_cpu(de->rec_len)); de->inode = 0; - dir->i_version++; + inode_inc_iversion(dir); ocfs2_journal_dirty(handle, bh); goto bail; } @@ -1729,7 +1730,7 @@ int __ocfs2_add_entry(handle_t *handle, if (ocfs2_dir_indexed(dir)) ocfs2_recalc_free_list(dir, handle, lookup); - dir->i_version++; + inode_inc_iversion(dir); ocfs2_journal_dirty(handle, insert_bh); retval = 0; goto bail; @@ -1775,7 +1776,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (*f_version != inode->i_version) { + if (inode_cmp_iversion(inode, *f_version)) { for (i = 0; i < i_size_read(inode) && i < offset; ) { de = (struct ocfs2_dir_entry *) (data->id_data + i); @@ -1791,7 +1792,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, i += le16_to_cpu(de->rec_len); } ctx->pos = offset = i; - *f_version = inode->i_version; + *f_version = inode_query_iversion(inode); } de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); @@ -1869,7 +1870,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (*f_version != inode->i_version) { + if (inode_cmp_iversion(inode, *f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full @@ -1886,7 +1887,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - *f_version = inode->i_version; + *f_version = inode_query_iversion(inode); } while (ctx->pos < i_size_read(inode) @@ -1940,7 +1941,7 @@ static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, */ int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx) { - u64 version = inode->i_version; + u64 version = inode_query_iversion(inode); ocfs2_dir_foreach_blk(inode, &version, ctx, true); return 0; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 1a1e0078ab38..d51b80edd972 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -302,7 +303,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); - inode->i_version = 1; + inode_set_iversion(inode, 1); inode->i_generation = le32_to_cpu(fe->i_generation); inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); inode->i_mode = le16_to_cpu(fe->i_mode); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b0a10d9b36f..c801eddc4bf3 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -1520,7 +1521,7 @@ static int ocfs2_rename(struct inode *old_dir, mlog_errno(status); goto bail; } - new_dir->i_version++; + inode_inc_iversion(new_dir); if (S_ISDIR(new_inode->i_mode)) ocfs2_set_links_count(newfe, 0); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b39d14cbfa34..7a922190a8c7 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -289,7 +290,7 @@ out: mlog_errno(err); return err; } - gqinode->i_version++; + inode_inc_iversion(gqinode); ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); return len; } -- cgit v1.2.3 From bb8c2d66bc6f7dd6e00fc25203efe0858cf91a5e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:18 -0500 Subject: ufs: use new i_version API Signed-off-by: Jeff Layton --- fs/ufs/dir.c | 9 +++++---- fs/ufs/inode.c | 3 ++- fs/ufs/super.c | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 2edc1755b7c5..50dfce000864 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "ufs_fs.h" #include "ufs.h" @@ -47,7 +48,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); block_write_end(NULL, mapping, pos, len, len, page, NULL); if (pos+len > dir->i_size) { i_size_write(dir, pos+len); @@ -428,7 +429,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - int need_revalidate = file->f_version != inode->i_version; + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); @@ -455,8 +456,8 @@ ufs_readdir(struct file *file, struct dir_context *ctx) offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); ctx->pos = (n<f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (struct ufs_dir_entry *)(kaddr+offset); limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index afb601c0dda0..c843ec858cf7 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ufs_fs.h" #include "ufs.h" @@ -693,7 +694,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) if (err) goto bad_inode; - inode->i_version++; + inode_inc_iversion(inode); ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; ufsi->i_dir_start_lookup = 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 4d497e9c6883..b6ba80e05bff 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -88,6 +88,7 @@ #include #include #include +#include #include "ufs_fs.h" #include "ufs.h" @@ -1440,7 +1441,7 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) if (!ei) return NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); seqlock_init(&ei->meta_lock); mutex_init(&ei->truncate_mutex); return &ei->vfs_inode; -- cgit v1.2.3 From f0e28280629e0ec7921f3179409a179b1ea41f24 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:19 -0500 Subject: xfs: convert to new i_version API Signed-off-by: Jeff Layton Acked-by: Darrick J. Wong Acked-by: Dave Chinner --- fs/xfs/libxfs/xfs_inode_buf.c | 7 +++++-- fs/xfs/xfs_icache.c | 5 +++-- fs/xfs/xfs_inode.c | 3 ++- fs/xfs/xfs_inode_item.c | 3 ++- fs/xfs/xfs_trans_inode.c | 4 +++- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6b7989038d75..b9c0bf80669c 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -32,6 +32,8 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" +#include + /* * Check that none of the inode's in the buffer have a next * unlinked field of 0. @@ -264,7 +266,8 @@ xfs_inode_from_disk( to->di_flags = be16_to_cpu(from->di_flags); if (to->di_version == 3) { - inode->i_version = be64_to_cpu(from->di_changecount); + inode_set_iversion_queried(inode, + be64_to_cpu(from->di_changecount)); to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); @@ -314,7 +317,7 @@ xfs_inode_to_disk( to->di_flags = cpu_to_be16(from->di_flags); if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(inode->i_version); + to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 43005fbe8b1e..4c315adb05e6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -37,6 +37,7 @@ #include #include +#include /* * Allocate and initialise an xfs_inode. @@ -293,14 +294,14 @@ xfs_reinit_inode( int error; uint32_t nlink = inode->i_nlink; uint32_t generation = inode->i_generation; - uint64_t version = inode->i_version; + uint64_t version = inode_peek_iversion(inode); umode_t mode = inode->i_mode; error = inode_init_always(mp->m_super, inode); set_nlink(inode, nlink); inode->i_generation = generation; - inode->i_version = version; + inode_set_iversion_queried(inode, version); inode->i_mode = mode; return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 801274126648..dfc5e60d8af3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -16,6 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include +#include #include "xfs.h" #include "xfs_fs.h" @@ -833,7 +834,7 @@ xfs_ialloc( ip->i_d.di_flags = 0; if (ip->i_d.di_version == 3) { - inode->i_version = 1; + inode_set_iversion(inode, 1); ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6ee5c3bf19ad..7571abf5dfb3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -30,6 +30,7 @@ #include "xfs_buf_item.h" #include "xfs_log.h" +#include kmem_zone_t *xfs_ili_zone; /* inode log item zone */ @@ -354,7 +355,7 @@ xfs_inode_to_log_dinode( to->di_next_unlinked = NULLAGINO; if (from->di_version == 3) { - to->di_changecount = inode->i_version; + to->di_changecount = inode_peek_iversion(inode); to->di_crtime.t_sec = from->di_crtime.t_sec; to->di_crtime.t_nsec = from->di_crtime.t_nsec; to->di_flags2 = from->di_flags2; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index daa7615497f9..225544327c4f 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -28,6 +28,8 @@ #include "xfs_inode_item.h" #include "xfs_trace.h" +#include + /* * Add a locked inode to the transaction. * @@ -117,7 +119,7 @@ xfs_trans_log_inode( */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - VFS_I(ip)->i_version++; + inode_inc_iversion(VFS_I(ip)); flags |= XFS_ILOG_CORE; } -- cgit v1.2.3 From 3b370b215f4b45c0be87d7a15b0d0e2e8b238aee Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:21 -0500 Subject: IMA: switch IMA over to new i_version API Signed-off-by: Jeff Layton --- security/integrity/ima/ima_api.c | 3 ++- security/integrity/ima/ima_main.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c7e8db0ea4c0..c6ae42266270 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ima.h" @@ -215,7 +216,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, * which do not support i_version, support is limited to an initial * measurement/appraisal/audit. */ - i_version = file_inode(file)->i_version; + i_version = inode_query_iversion(inode); hash.hdr.algo = algo; /* Initialize hash digest to 0's in case of failure */ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 50b82599994d..06a70c5a2329 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ima.h" @@ -128,7 +129,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, inode_lock(inode); if (atomic_read(&inode->i_writecount) == 1) { if (!IS_I_VERSION(inode) || - (iint->version != inode->i_version) || + inode_cmp_iversion(inode, iint->version) || (iint->flags & IMA_NEW_FILE)) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; -- cgit v1.2.3 From e38cf302b2e92a870f23e07c4390e04685779340 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:22 -0500 Subject: fs: only set S_VERSION when updating times if necessary We only really need to update i_version if someone has queried for it since we last incremented it. By doing that, we can avoid having to update the inode if the times haven't changed. If the times have changed, then we go ahead and forcibly increment the counter, under the assumption that we'll be going to the storage anyway, and the increment itself is relatively cheap. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara --- fs/inode.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 19e72f500f71..e2ca0f4b5151 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1635,17 +1635,21 @@ static int relatime_need_update(const struct path *path, struct inode *inode, int generic_update_time(struct inode *inode, struct timespec *time, int flags) { int iflags = I_DIRTY_TIME; + bool dirty = false; if (flags & S_ATIME) inode->i_atime = *time; if (flags & S_VERSION) - inode_inc_iversion(inode); + dirty = inode_maybe_inc_iversion(inode, false); if (flags & S_CTIME) inode->i_ctime = *time; if (flags & S_MTIME) inode->i_mtime = *time; + if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && + !(inode->i_sb->s_flags & SB_LAZYTIME)) + dirty = true; - if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION)) + if (dirty) iflags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, iflags); return 0; @@ -1864,7 +1868,7 @@ int file_update_time(struct file *file) if (!timespec_equal(&inode->i_ctime, &now)) sync_it |= S_CTIME; - if (IS_I_VERSION(inode)) + if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) sync_it |= S_VERSION; if (!sync_it) -- cgit v1.2.3 From d17260fd5f9cd5c112cbcbbfd3024186d34c02d7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:23 -0500 Subject: xfs: avoid setting XFS_ILOG_CORE if i_version doesn't need incrementing If XFS_ILOG_CORE is already set then go ahead and increment it. Signed-off-by: Jeff Layton Acked-by: Darrick J. Wong Acked-by: Dave Chinner --- fs/xfs/xfs_trans_inode.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 225544327c4f..4a89da4b6fe7 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -112,15 +112,17 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. We don't use - * inode_inc_version() because there is no need for extra locking around - * i_version as we already hold the inode locked exclusively for - * metadata modification. + * counter if it is configured for this to occur. While we have the + * inode locked exclusively for metadata modification, we can usually + * avoid setting XFS_ILOG_CORE if no one has queried the value since + * the last time it was incremented. If we have XFS_ILOG_CORE already + * set however, then go ahead and bump the i_version counter + * unconditionally. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - inode_inc_iversion(VFS_I(ip)); - flags |= XFS_ILOG_CORE; + if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE)) + flags |= XFS_ILOG_CORE; } tp->t_flags |= XFS_TRANS_DIRTY; -- cgit v1.2.3 From 3a8c7231d53641a21d794c7406044e19ad299a00 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Dec 2017 06:35:24 -0500 Subject: btrfs: only dirty the inode in btrfs_update_time if something was changed At this point, we know that "now" and the file times may differ, and we suspect that the i_version has been flagged to be bumped. Attempt to bump the i_version, and only mark the inode dirty if that actually occurred or if one of the times was updated. Signed-off-by: Jeff Layton Acked-by: David Sterba Reviewed-by: Liu Bo --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ac8692849a81..76245323a7c8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6107,19 +6107,20 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; + bool dirty = flags & ~S_VERSION; if (btrfs_root_readonly(root)) return -EROFS; if (flags & S_VERSION) - inode_inc_iversion(inode); + dirty |= inode_maybe_inc_iversion(inode, dirty); if (flags & S_CTIME) inode->i_ctime = *now; if (flags & S_MTIME) inode->i_mtime = *now; if (flags & S_ATIME) inode->i_atime = *now; - return btrfs_dirty_inode(inode); + return dirty ? btrfs_dirty_inode(inode) : 0; } /* -- cgit v1.2.3 From f02a9ad1f15daf4378afeda025a53455f72645dd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 21 Dec 2017 07:45:44 -0500 Subject: fs: handle inode->i_version more efficiently Since i_version is mostly treated as an opaque value, we can exploit that fact to avoid incrementing it when no one is watching. With that change, we can avoid incrementing the counter on writes, unless someone has queried for it since it was last incremented. If the a/c/mtime don't change, and the i_version hasn't changed, then there's no need to dirty the inode metadata on a write. Convert the i_version counter to an atomic64_t, and use the lowest order bit to hold a flag that will tell whether anyone has queried the value since it was last incremented. When we go to maybe increment it, we fetch the value and check the flag bit. If it's clear then we don't need to do anything if the update isn't being forced. If we do need to update, then we increment the counter by 2, and clear the flag bit, and then use a CAS op to swap it into place. If that works, we return true. If it doesn't then do it again with the value that we fetch from the CAS operation. On the query side, if the flag is already set, then we just shift the value down by 1 bit and return it. Otherwise, we set the flag in our on-stack value and again use cmpxchg to swap it into place if it hasn't changed. If it has, then we use the value from the cmpxchg as the new "old" value and try again. This method allows us to avoid incrementing the counter on writes (and dirtying the metadata) under typical workloads. We only need to increment if it has been queried since it was last changed. Signed-off-by: Jeff Layton Reviewed-by: Jan Kara Acked-by: Dave Chinner Tested-by: Krzysztof Kozlowski --- include/linux/fs.h | 2 +- include/linux/iversion.h | 208 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 154 insertions(+), 56 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 76382c24e9d0..6804d075933e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,7 +639,7 @@ struct inode { struct hlist_head i_dentry; struct rcu_head i_rcu; }; - u64 i_version; + atomic64_t i_version; atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; diff --git a/include/linux/iversion.h b/include/linux/iversion.h index f268828f9f7e..858463fca249 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -5,6 +5,8 @@ #include /* + * The inode->i_version field: + * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must * appear different to observers if there was a change to the inode's data or @@ -33,86 +35,171 @@ * them. Also, i_version updates should never be delayed longer than it takes * the original change to reach disk. * + * This implementation uses the low bit in the i_version field as a flag to + * track when the value has been queried. If it has not been queried since it + * was last incremented, we can skip the increment in most cases. + * + * In the event that we're updating the ctime, we will usually go ahead and + * bump the i_version anyway. Since that has to go to stable storage in some + * fashion, we might as well increment it as well. + * + * With this implementation, the value should always appear to observers to + * increase over time if the file has changed. It's recommended to use + * inode_cmp_iversion() helper to compare values. + * * Note that some filesystems (e.g. NFS and AFS) just use the field to store * a server-provided value (for the most part). For that reason, those * filesystems do not set SB_I_VERSION. These filesystems are considered to * have a self-managed i_version. + * + * Persistently storing the i_version + * ---------------------------------- + * Queries of the i_version field are not gated on them hitting the backing + * store. It's always possible that the host could crash after allowing + * a query of the value but before it has made it to disk. + * + * To mitigate this problem, filesystems should always use + * inode_set_iversion_queried when loading an existing inode from disk. This + * ensures that the next attempted inode increment will result in the value + * changing. + * + * Storing the value to disk therefore does not count as a query, so those + * filesystems should use inode_peek_iversion to grab the value to be stored. + * There is no need to flag the value as having been queried in that case. */ +/* + * We borrow the lowest bit in the i_version to use as a flag to tell whether + * it has been queried since we last incremented it. If it has, then we must + * increment it on the next change. After that, we can clear the flag and + * avoid incrementing it again until it has again been queried. + */ +#define I_VERSION_QUERIED_SHIFT (1) +#define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) +#define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) + /** * inode_set_iversion_raw - set i_version to the specified raw value * @inode: inode to set - * @new: new i_version value to set + * @val: new i_version value to set * - * Set @inode's i_version field to @new. This function is for use by + * Set @inode's i_version field to @val. This function is for use by * filesystems that self-manage the i_version. * * For example, the NFS client stores its NFSv4 change attribute in this way, * and the AFS client stores the data_version from the server here. */ static inline void -inode_set_iversion_raw(struct inode *inode, u64 new) +inode_set_iversion_raw(struct inode *inode, u64 val) +{ + atomic64_set(&inode->i_version, val); +} + +/** + * inode_peek_iversion_raw - grab a "raw" iversion value + * @inode: inode from which i_version should be read + * + * Grab a "raw" inode->i_version value and return it. The i_version is not + * flagged or converted in any way. This is mostly used to access a self-managed + * i_version. + * + * With those filesystems, we want to treat the i_version as an entirely + * opaque value. + */ +static inline u64 +inode_peek_iversion_raw(const struct inode *inode) { - inode->i_version = new; + return atomic64_read(&inode->i_version); } /** * inode_set_iversion - set i_version to a particular value * @inode: inode to set - * @new: new i_version value to set + * @val: new i_version value to set * - * Set @inode's i_version field to @new. This function is for filesystems with - * a kernel-managed i_version. + * Set @inode's i_version field to @val. This function is for filesystems with + * a kernel-managed i_version, for initializing a newly-created inode from + * scratch. * - * For now, this just does the same thing as the _raw variant. + * In this case, we do not set the QUERIED flag since we know that this value + * has never been queried. */ static inline void -inode_set_iversion(struct inode *inode, u64 new) +inode_set_iversion(struct inode *inode, u64 val) { - inode_set_iversion_raw(inode, new); + inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); } /** - * inode_set_iversion_queried - set i_version to a particular value and set - * flag to indicate that it has been viewed + * inode_set_iversion_queried - set i_version to a particular value as quereied * @inode: inode to set - * @new: new i_version value to set + * @val: new i_version value to set * - * When loading in an i_version value from a backing store, we typically don't - * know whether it was previously viewed before being stored or not. Thus, we - * must assume that it was, to ensure that any changes will result in the - * value changing. + * Set @inode's i_version field to @val, and flag it for increment on the next + * change. * - * This function will set the inode's i_version, and possibly flag the value - * as if it has already been viewed at least once. + * Filesystems that persistently store the i_version on disk should use this + * when loading an existing inode from disk. * - * For now, this just does what inode_set_iversion does. + * When loading in an i_version value from a backing store, we can't be certain + * that it wasn't previously viewed before being stored. Thus, we must assume + * that it was, to ensure that we don't end up handing out the same value for + * different versions of the same inode. */ static inline void -inode_set_iversion_queried(struct inode *inode, u64 new) +inode_set_iversion_queried(struct inode *inode, u64 val) { - inode_set_iversion(inode, new); + inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | + I_VERSION_QUERIED); } /** * inode_maybe_inc_iversion - increments i_version * @inode: inode with the i_version that should be updated - * @force: increment the counter even if it's not necessary + * @force: increment the counter even if it's not necessary? * * Every time the inode is modified, the i_version field must be seen to have * changed by any observer. * - * In this implementation, we always increment it after taking the i_lock to - * ensure that we don't race with other incrementors. + * If "force" is set or the QUERIED flag is set, then ensure that we increment + * the value, and clear the queried flag. * - * Returns true if counter was bumped, and false if it wasn't. + * In the common case where neither is set, then we can return "false" without + * updating i_version. + * + * If this function returns false, and no other metadata has changed, then we + * can avoid logging the metadata. */ static inline bool inode_maybe_inc_iversion(struct inode *inode, bool force) { - atomic64_t *ivp = (atomic64_t *)&inode->i_version; + u64 cur, old, new; + + /* + * The i_version field is not strictly ordered with any other inode + * information, but the legacy inode_inc_iversion code used a spinlock + * to serialize increments. + * + * Here, we add full memory barriers to ensure that any de-facto + * ordering with other info is preserved. + * + * This barrier pairs with the barrier in inode_query_iversion() + */ + smp_mb(); + cur = inode_peek_iversion_raw(inode); + for (;;) { + /* If flag is clear then we needn't do anything */ + if (!force && !(cur & I_VERSION_QUERIED)) + return false; - atomic64_inc(ivp); + /* Since lowest bit is flag, add 2 to avoid it */ + new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; + + old = atomic64_cmpxchg(&inode->i_version, cur, new); + if (likely(old == cur)) + break; + cur = old; + } return true; } @@ -135,31 +222,12 @@ inode_inc_iversion(struct inode *inode) * @inode: inode to check * * Returns whether the inode->i_version counter needs incrementing on the next - * change. - * - * For now, we assume that it always does. + * change. Just fetch the value and check the QUERIED flag. */ static inline bool inode_iversion_need_inc(struct inode *inode) { - return true; -} - -/** - * inode_peek_iversion_raw - grab a "raw" iversion value - * @inode: inode from which i_version should be read - * - * Grab a "raw" inode->i_version value and return it. The i_version is not - * flagged or converted in any way. This is mostly used to access a self-managed - * i_version. - * - * With those filesystems, we want to treat the i_version as an entirely - * opaque value. - */ -static inline u64 -inode_peek_iversion_raw(const struct inode *inode) -{ - return inode->i_version; + return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; } /** @@ -176,7 +244,7 @@ inode_peek_iversion_raw(const struct inode *inode) static inline void inode_inc_iversion_raw(struct inode *inode) { - inode_inc_iversion(inode); + atomic64_inc(&inode->i_version); } /** @@ -193,7 +261,7 @@ inode_inc_iversion_raw(struct inode *inode) static inline u64 inode_peek_iversion(const struct inode *inode) { - return inode_peek_iversion_raw(inode); + return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } /** @@ -205,12 +273,35 @@ inode_peek_iversion(const struct inode *inode) * that a later query of the i_version will result in a different value if * anything has changed. * - * This implementation just does a peek. + * In this implementation, we fetch the current value, set the QUERIED flag and + * then try to swap it into place with a cmpxchg, if it wasn't already set. If + * that fails, we try again with the newly fetched value from the cmpxchg. */ static inline u64 inode_query_iversion(struct inode *inode) { - return inode_peek_iversion(inode); + u64 cur, old, new; + + cur = inode_peek_iversion_raw(inode); + for (;;) { + /* If flag is already set, then no need to swap */ + if (cur & I_VERSION_QUERIED) { + /* + * This barrier (and the implicit barrier in the + * cmpxchg below) pairs with the barrier in + * inode_maybe_inc_iversion(). + */ + smp_mb(); + break; + } + + new = cur | I_VERSION_QUERIED; + old = atomic64_cmpxchg(&inode->i_version, cur, new); + if (likely(old == cur)) + break; + cur = old; + } + return cur >> I_VERSION_QUERIED_SHIFT; } /** @@ -233,11 +324,18 @@ inode_cmp_iversion_raw(const struct inode *inode, u64 old) * @old: old value to check against its i_version * * Compare an i_version counter with a previous one. Returns 0 if they are - * the same or non-zero if they are different. + * the same, a positive value if the one in the inode appears newer than @old, + * and a negative value if @old appears to be newer than the one in the + * inode. + * + * Note that we don't need to set the QUERIED flag in this case, as the value + * in the inode is not being recorded for later use. */ + static inline s64 inode_cmp_iversion(const struct inode *inode, u64 old) { - return (s64)inode_peek_iversion(inode) - (s64)old; + return (s64)(inode_peek_iversion_raw(inode) & ~I_VERSION_QUERIED) - + (s64)(old << I_VERSION_QUERIED_SHIFT); } #endif -- cgit v1.2.3 From e124ece53edf3a80d594fa537d5c0a37b26f4eb7 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:02:35 +0800 Subject: btrfs: get device pointer from device_list_add() Instead of pointer to btrfs_fs_devices as an arg in device_list_add() better to get pointer to btrfs_device as return value, then we have both, pointer to btrfs_device and btrfs_fs_devices. btrfs_device is needed to handle reappearing missing device. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4224a735493b..34c889a66240 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -724,12 +724,11 @@ error_brelse: * Add new device to list of registered devices * * Returns: - * 0 - device already known or newly added - * < 0 - error + * device pointer which was just added or updated when successful + * error pointer when failed */ -static noinline int device_list_add(const char *path, - struct btrfs_super_block *disk_super, - u64 devid, struct btrfs_fs_devices **fs_devices_ret) +static noinline struct btrfs_device *device_list_add(const char *path, + struct btrfs_super_block *disk_super, u64 devid) { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; @@ -740,7 +739,7 @@ static noinline int device_list_add(const char *path, if (!fs_devices) { fs_devices = alloc_fs_devices(disk_super->fsid); if (IS_ERR(fs_devices)) - return PTR_ERR(fs_devices); + return ERR_CAST(fs_devices); list_add(&fs_devices->list, &fs_uuids); @@ -752,19 +751,19 @@ static noinline int device_list_add(const char *path, if (!device) { if (fs_devices->opened) - return -EBUSY; + return ERR_PTR(-EBUSY); device = btrfs_alloc_device(NULL, &devid, disk_super->dev_item.uuid); if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return PTR_ERR(device); + return device; } name = rcu_string_strdup(path, GFP_NOFS); if (!name) { free_device(device); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } rcu_assign_pointer(device->name, name); @@ -818,12 +817,12 @@ static noinline int device_list_add(const char *path, * with larger generation number or the last-in if * generation are equal. */ - return -EEXIST; + return ERR_PTR(-EEXIST); } name = rcu_string_strdup(path, GFP_NOFS); if (!name) - return -ENOMEM; + return ERR_PTR(-ENOMEM); rcu_string_free(device->name); rcu_assign_pointer(device->name, name); if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { @@ -843,9 +842,7 @@ static noinline int device_list_add(const char *path, fs_devices->total_devices = btrfs_super_num_devices(disk_super); - *fs_devices_ret = fs_devices; - - return 0; + return device; } static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) @@ -1180,9 +1177,10 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_fs_devices **fs_devices_ret) { struct btrfs_super_block *disk_super; + struct btrfs_device *device; struct block_device *bdev; struct page *page; - int ret; + int ret = 0; u64 devid; u64 bytenr; @@ -1209,7 +1207,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, devid = btrfs_stack_device_id(&disk_super->dev_item); - ret = device_list_add(path, disk_super, devid, fs_devices_ret); + device = device_list_add(path, disk_super, devid); + if (IS_ERR(device)) + ret = PTR_ERR(device); + else + *fs_devices_ret = device->fs_devices; btrfs_release_disk_super(page); -- cgit v1.2.3 From 3acbcbfc8f06d4ade2aab2ebba0a2542a05ce90c Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 18 Jan 2018 22:02:36 +0800 Subject: btrfs: drop devid as device_list_add() arg As struct btrfs_disk_super is being passed, so it can get devid the same way its parent does. Signed-off-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 34c889a66240..b5036bd69e6a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -728,12 +728,13 @@ error_brelse: * error pointer when failed */ static noinline struct btrfs_device *device_list_add(const char *path, - struct btrfs_super_block *disk_super, u64 devid) + struct btrfs_super_block *disk_super) { struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; struct rcu_string *name; u64 found_transid = btrfs_super_generation(disk_super); + u64 devid = btrfs_stack_device_id(&disk_super->dev_item); fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { @@ -1181,7 +1182,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct block_device *bdev; struct page *page; int ret = 0; - u64 devid; u64 bytenr; /* @@ -1205,9 +1205,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, goto error_bdev_put; } - devid = btrfs_stack_device_id(&disk_super->dev_item); - - device = device_list_add(path, disk_super, devid); + device = device_list_add(path, disk_super); if (IS_ERR(device)) ret = PTR_ERR(device); else -- cgit v1.2.3 From 89a8f6d4904c8cf3ff8fee9fdaff392a6bbb8bf6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:31 +0100 Subject: x86/hyperv: Check for required priviliges in hyperv_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In hyperv_init() its presumed that it always has access to VP index and hypercall MSRs while according to the specification it should be checked if it's allowed to access the corresponding MSRs before accessing them. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: Paolo Bonzini Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-2-vkuznets@redhat.com --- arch/x86/hyperv/hv_init.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 189a398290db..21f9d53d9f00 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -110,12 +110,19 @@ static int hv_cpu_init(unsigned int cpu) */ void hyperv_init(void) { - u64 guest_id; + u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; + /* Absolutely required MSRs */ + required_msrs = HV_X64_MSR_HYPERCALL_AVAILABLE | + HV_X64_MSR_VP_INDEX_AVAILABLE; + + if ((ms_hyperv.features & required_msrs) != required_msrs) + return; + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); -- cgit v1.2.3 From e2768eaa1ca4fbb7b778da5615cce3dd310352e6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:32 +0100 Subject: x86/hyperv: Add a function to read both TSC and TSC page value simulateneously MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is going to be used from KVM code where both TSC and TSC page value are needed. Nothing is supposed to use the function when Hyper-V code is compiled out, just BUG(). Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: Paolo Bonzini Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-3-vkuznets@redhat.com --- arch/x86/hyperv/hv_init.c | 1 + arch/x86/include/asm/mshyperv.h | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 21f9d53d9f00..1a6c63f721bc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -37,6 +37,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { return tsc_pg; } +EXPORT_SYMBOL_GPL(hv_get_tsc_page); static u64 read_hv_clock_tsc(struct clocksource *arg) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 8bf450b13d9f..6b1d4ea78270 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -325,9 +325,10 @@ static inline void hyperv_setup_mmu_ops(void) {} #ifdef CONFIG_HYPERV_TSCPAGE struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc) { - u64 scale, offset, cur_tsc; + u64 scale, offset; u32 sequence; /* @@ -358,7 +359,7 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) scale = READ_ONCE(tsc_pg->tsc_scale); offset = READ_ONCE(tsc_pg->tsc_offset); - cur_tsc = rdtsc_ordered(); + *cur_tsc = rdtsc_ordered(); /* * Make sure we read sequence after we read all other values @@ -368,7 +369,14 @@ static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - return mul_u64_u64_shr(cur_tsc, scale, 64) + offset; + return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; +} + +static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) +{ + u64 cur_tsc; + + return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); } #else @@ -376,5 +384,12 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) { return NULL; } + +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, + u64 *cur_tsc) +{ + BUG(); + return U64_MAX; +} #endif #endif -- cgit v1.2.3 From 93286261de1b46339aa27cd4c639b21778f6cade Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:33 +0100 Subject: x86/hyperv: Reenlightenment notifications support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hyper-V supports Live Migration notification. This is supposed to be used in conjunction with TSC emulation: when a VM is migrated to a host with different TSC frequency for some short period the host emulates the accesses to TSC and sends an interrupt to notify about the event. When the guest is done updating everything it can disable TSC emulation and everything will start working fast again. These notifications weren't required until now as Hyper-V guests are not supposed to use TSC as a clocksource: in Linux the TSC is even marked as unstable on boot. Guests normally use 'tsc page' clocksource and host updates its values on migrations automatically. Things change when with nested virtualization: even when the PV clocksources (kvm-clock or tsc page) are passed through to the nested guests the TSC frequency and frequency changes need to be know.. Hyper-V Top Level Functional Specification (as of v5.0b) wrongly specifies EAX:BIT(12) of CPUID:0x40000009 as the feature identification bit. The right one to check is EAX:BIT(13) of CPUID:0x40000003. I was assured that the fix in on the way. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: Paolo Bonzini Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-4-vkuznets@redhat.com --- arch/x86/entry/entry_32.S | 3 ++ arch/x86/entry/entry_64.S | 3 ++ arch/x86/hyperv/hv_init.c | 89 ++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/irq_vectors.h | 7 ++- arch/x86/include/asm/mshyperv.h | 9 ++++ arch/x86/include/uapi/asm/hyperv.h | 27 ++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 6 +++ 7 files changed, 143 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2a35b1e0fb90..7a796eeddf99 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -895,6 +895,9 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, hyperv_vector_handler) +BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, + hyperv_reenlightenment_intr) + #endif /* CONFIG_HYPERV */ ENTRY(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a83570495162..553aa49909ce 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1245,6 +1245,9 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ #if IS_ENABLED(CONFIG_HYPERV) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler + +apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ + hyperv_reenlightenment_vector hyperv_reenlightenment_intr #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1a6c63f721bc..712ac40081f7 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -18,6 +18,8 @@ */ #include +#include +#include #include #include #include @@ -102,6 +104,93 @@ static int hv_cpu_init(unsigned int cpu) return 0; } +static void (*hv_reenlightenment_cb)(void); + +static void hv_reenlightenment_notify(struct work_struct *dummy) +{ + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + /* Don't issue the callback if TSC accesses are not emulated */ + if (hv_reenlightenment_cb && emu_status.inprogress) + hv_reenlightenment_cb(); +} +static DECLARE_DELAYED_WORK(hv_reenlightenment_work, hv_reenlightenment_notify); + +void hyperv_stop_tsc_emulation(void) +{ + u64 freq; + struct hv_tsc_emulation_status emu_status; + + rdmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + emu_status.inprogress = 0; + wrmsrl(HV_X64_MSR_TSC_EMULATION_STATUS, *(u64 *)&emu_status); + + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + tsc_khz = div64_u64(freq, 1000); +} +EXPORT_SYMBOL_GPL(hyperv_stop_tsc_emulation); + +static inline bool hv_reenlightenment_available(void) +{ + /* + * Check for required features and priviliges to make TSC frequency + * change notifications work. + */ + return ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && + ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE && + ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT; +} + +__visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) +{ + entering_ack_irq(); + + schedule_delayed_work(&hv_reenlightenment_work, HZ/10); + + exiting_irq(); +} + +void set_hv_tscchange_cb(void (*cb)(void)) +{ + struct hv_reenlightenment_control re_ctrl = { + .vector = HYPERV_REENLIGHTENMENT_VECTOR, + .enabled = 1, + .target_vp = hv_vp_index[smp_processor_id()] + }; + struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; + + if (!hv_reenlightenment_available()) { + pr_warn("Hyper-V: reenlightenment support is unavailable\n"); + return; + } + + hv_reenlightenment_cb = cb; + + /* Make sure callback is registered before we write to MSRs */ + wmb(); + + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); +} +EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); + +void clear_hv_tscchange_cb(void) +{ + struct hv_reenlightenment_control re_ctrl; + + if (!hv_reenlightenment_available()) + return; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + re_ctrl.enabled = 0; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *(u64 *)&re_ctrl); + + hv_reenlightenment_cb = NULL; +} +EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 67421f649cfa..e71c1120426b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -103,7 +103,12 @@ #endif #define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef -#define LOCAL_TIMER_VECTOR 0xee + +#if IS_ENABLED(CONFIG_HYPERV) +#define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#endif + +#define LOCAL_TIMER_VECTOR 0xed #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6b1d4ea78270..1790002a2052 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -160,6 +160,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_set_synint_state(int_num, val) wrmsrl(int_num, val) void hyperv_callback_vector(void); +void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector #endif @@ -316,11 +317,19 @@ void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); bool hv_is_hypercall_page_setup(void); void hyperv_cleanup(void); + +void hyperv_reenlightenment_intr(struct pt_regs *regs); +void set_hv_tscchange_cb(void (*cb)(void)); +void clear_hv_tscchange_cb(void); +void hyperv_stop_tsc_emulation(void); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hypercall_page_setup(void) { return false; } static inline void hyperv_cleanup(void) {} static inline void hyperv_setup_mmu_ops(void) {} +static inline void set_hv_tscchange_cb(void (*cb)(void)) {} +static inline void clear_hv_tscchange_cb(void) {} +static inline void hyperv_stop_tsc_emulation(void) {}; #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 1a5bfead93b4..197c2e6c7376 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -40,6 +40,9 @@ */ #define HV_X64_ACCESS_FREQUENCY_MSRS (1 << 11) +/* AccessReenlightenmentControls privilege */ +#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) + /* * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available @@ -234,6 +237,30 @@ #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) +/* TSC emulation after migration */ +#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 + +struct hv_reenlightenment_control { + u64 vector:8; + u64 reserved1:8; + u64 enabled:1; + u64 reserved2:15; + u64 target_vp:32; +}; + +#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 +#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 + +struct hv_tsc_emulation_control { + u64 enabled:1; + u64 reserved:63; +}; + +struct hv_tsc_emulation_status { + u64 inprogress:1; + u64 reserved:63; +}; + #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 85eb5fc180c8..9340f41ce8d3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -251,6 +251,12 @@ static void __init ms_hyperv_init_platform(void) hyperv_setup_mmu_ops(); /* Setup the IDT for hypervisor callback */ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); + + /* Setup the IDT for reenlightenment notifications */ + if (ms_hyperv.features & HV_X64_ACCESS_REENLIGHTENMENT) + alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, + hyperv_reenlightenment_vector); + #endif } -- cgit v1.2.3 From e7c4e36c447daca2b7df49024f6bf230871cb155 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:34 +0100 Subject: x86/hyperv: Redirect reenlightment notifications on CPU offlining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is very unlikely for CPUs to get offlined when running on Hyper-V as there is a protection in the vmbus module which prevents it when the guest has any VMBus devices assigned. This, however, may change in future if an option to reassign an already active channel will be added. It is also possible to run without any Hyper-V devices or to have a CPU with no assigned channels. Reassign reenlightenment notifications to some other active CPU when the CPU which is assigned to them goes offline. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: Paolo Bonzini Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-5-vkuznets@redhat.com --- arch/x86/hyperv/hv_init.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 712ac40081f7..e4377e2f2a10 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -191,6 +191,26 @@ void clear_hv_tscchange_cb(void) } EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb); +static int hv_cpu_die(unsigned int cpu) +{ + struct hv_reenlightenment_control re_ctrl; + unsigned int new_cpu; + + if (hv_reenlightenment_cb == NULL) + return 0; + + rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + if (re_ctrl.target_vp == hv_vp_index[cpu]) { + /* Reassign to some other online CPU */ + new_cpu = cpumask_any_but(cpu_online_mask, cpu); + + re_ctrl.target_vp = hv_vp_index[new_cpu]; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); + } + + return 0; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -220,7 +240,7 @@ void hyperv_init(void) return; if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", - hv_cpu_init, NULL) < 0) + hv_cpu_init, hv_cpu_die) < 0) goto free_vp_index; /* -- cgit v1.2.3 From 51d4e5daa32808df4d50db511d167fde19fa114e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:35 +0100 Subject: x86/irq: Count Hyper-V reenlightenment interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hyper-V reenlightenment interrupts arrive when the VM is migrated, While they are not interesting in general it's important when L2 nested guests are running. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Reviewed-by: Thomas Gleixner Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: Paolo Bonzini Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-6-vkuznets@redhat.com --- arch/x86/hyperv/hv_init.c | 2 ++ arch/x86/include/asm/hardirq.h | 3 +++ arch/x86/kernel/irq.c | 9 +++++++++ 3 files changed, 14 insertions(+) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e4377e2f2a10..a3adece392f1 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -147,6 +147,8 @@ __visible void __irq_entry hyperv_reenlightenment_intr(struct pt_regs *regs) { entering_ack_irq(); + inc_irq_stat(irq_hv_reenlightenment_count); + schedule_delayed_work(&hv_reenlightenment_work, HZ/10); exiting_irq(); diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 51cc979dd364..7c341a74ec8c 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -38,6 +38,9 @@ typedef struct { #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) unsigned int irq_hv_callback_count; #endif +#if IS_ENABLED(CONFIG_HYPERV) + unsigned int irq_hv_reenlightenment_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 68e1867cca80..45fb4d2565f8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -141,6 +141,15 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->irq_hv_callback_count); seq_puts(p, " Hypervisor callback interrupts\n"); } +#endif +#if IS_ENABLED(CONFIG_HYPERV) + if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HRE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->irq_hv_reenlightenment_count); + seq_puts(p, " Hyper-V reenlightenment interrupts\n"); + } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) -- cgit v1.2.3 From b0c39dc68e3b3d22bf9d2984f62f6c86788a49e7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:36 +0100 Subject: x86/kvm: Pass stable clocksource to guests when running nested on Hyper-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, KVM is able to work in 'masterclock' mode passing PVCLOCK_TSC_STABLE_BIT to guests when the clocksource which is used on the host is TSC. When running nested on Hyper-V the guest normally uses a different one: TSC page which is resistant to TSC frequency changes on events like L1 migration. Add support for it in KVM. The only non-trivial change is in vgettsc(): when updating the gtod copy both the clock readout and tsc value have to be updated now. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-7-vkuznets@redhat.com --- arch/x86/kvm/x86.c | 93 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 25 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c53298dfbf50..b1ce368a07af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -1377,6 +1378,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) return tsc; } +static inline int gtod_is_based_on_tsc(int mode) +{ + return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK; +} + static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 @@ -1396,7 +1402,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) * perform request to enable masterclock. */ if (ka->use_master_clock || - (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched)) + (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched)) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc, @@ -1459,6 +1465,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vcpu->arch.tsc_offset = offset; } +static inline bool kvm_check_tsc_unstable(void) +{ +#ifdef CONFIG_X86_64 + /* + * TSC is marked unstable when we're running on Hyper-V, + * 'TSC page' clocksource is good. + */ + if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK) + return false; +#endif + return check_tsc_unstable(); +} + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1504,7 +1523,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) */ if (synchronizing && vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { - if (!check_tsc_unstable()) { + if (!kvm_check_tsc_unstable()) { offset = kvm->arch.cur_tsc_offset; pr_debug("kvm: matched tsc offset for %llu\n", data); } else { @@ -1604,18 +1623,43 @@ static u64 read_tsc(void) return last; } -static inline u64 vgettsc(u64 *cycle_now) +static inline u64 vgettsc(u64 *tsc_timestamp, int *mode) { long v; struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + u64 tsc_pg_val; + + switch (gtod->clock.vclock_mode) { + case VCLOCK_HVCLOCK: + tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(), + tsc_timestamp); + if (tsc_pg_val != U64_MAX) { + /* TSC page valid */ + *mode = VCLOCK_HVCLOCK; + v = (tsc_pg_val - gtod->clock.cycle_last) & + gtod->clock.mask; + } else { + /* TSC page invalid */ + *mode = VCLOCK_NONE; + } + break; + case VCLOCK_TSC: + *mode = VCLOCK_TSC; + *tsc_timestamp = read_tsc(); + v = (*tsc_timestamp - gtod->clock.cycle_last) & + gtod->clock.mask; + break; + default: + *mode = VCLOCK_NONE; + } - *cycle_now = read_tsc(); + if (*mode == VCLOCK_NONE) + *tsc_timestamp = v = 0; - v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask; return v * gtod->clock.mult; } -static int do_monotonic_boot(s64 *t, u64 *cycle_now) +static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1624,9 +1668,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; ns += gtod->boot_ns; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1635,7 +1678,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) return mode; } -static int do_realtime(struct timespec *ts, u64 *cycle_now) +static int do_realtime(struct timespec *ts, u64 *tsc_timestamp) { struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; @@ -1644,10 +1687,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) do { seq = read_seqcount_begin(>od->seq); - mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ns = gtod->nsec_base; - ns += vgettsc(cycle_now); + ns += vgettsc(tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -1657,25 +1699,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now) return mode; } -/* returns true if host is using tsc clocksource */ -static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) +/* returns true if host is using TSC based clocksource */ +static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns, + tsc_timestamp)); } -/* returns true if host is using tsc clocksource */ +/* returns true if host is using TSC based clocksource */ static bool kvm_get_walltime_and_clockread(struct timespec *ts, - u64 *cycle_now) + u64 *tsc_timestamp) { /* checked again under seqlock below */ - if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode)) return false; - return do_realtime(ts, cycle_now) == VCLOCK_TSC; + return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp)); } #endif @@ -2869,13 +2912,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); } - if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { + if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (check_tsc_unstable()) { + if (kvm_check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_vcpu_write_tsc_offset(vcpu, offset); @@ -6110,9 +6153,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); /* disable master clock if host does not trust, or does not - * use, TSC clocksource + * use, TSC based clocksource. */ - if (gtod->clock.vclock_mode != VCLOCK_TSC && + if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) queue_work(system_long_wq, &pvclock_gtod_work); @@ -7767,7 +7810,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, { struct kvm_vcpu *vcpu; - if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) + if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); @@ -7924,7 +7967,7 @@ int kvm_arch_hardware_enable(void) return ret; local_tsc = rdtsc(); - stable = !check_tsc_unstable(); + stable = !kvm_check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (!stable && vcpu->cpu == smp_processor_id()) -- cgit v1.2.3 From 0092e4346f49558e5fe5a927c6d78d401dc4ed73 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 24 Jan 2018 14:23:37 +0100 Subject: x86/kvm: Support Hyper-V reenlightenment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running nested KVM on Hyper-V guests its required to update masterclocks for all guests when L1 migrates to a host with different TSC frequency. Implement the procedure in the following way: - Pause all guests. - Tell the host (Hyper-V) to stop emulating TSC accesses. - Update the gtod copy, recompute clocks. - Unpause all guests. This is somewhat similar to cpufreq but there are two important differences: - TSC emulation can only be disabled globally (on all CPUs) - The new TSC frequency is not known until emulation is turned off so there is no way to 'prepare' for the event upfront. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal Link: https://lkml.kernel.org/r/20180124132337.30138-8-vkuznets@redhat.com --- arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b1ce368a07af..879a99987401 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -68,6 +68,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -5932,6 +5933,43 @@ static void tsc_khz_changed(void *data) __this_cpu_write(cpu_tsc_khz, khz); } +static void kvm_hyperv_tsc_notifier(void) +{ +#ifdef CONFIG_X86_64 + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int cpu; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_make_mclock_inprogress_request(kvm); + + hyperv_stop_tsc_emulation(); + + /* TSC frequency always matches when on Hyper-V */ + for_each_present_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + kvm_max_guest_tsc_khz = tsc_khz; + + list_for_each_entry(kvm, &vm_list, vm_list) { + struct kvm_arch *ka = &kvm->arch; + + spin_lock(&ka->pvclock_gtod_sync_lock); + + pvclock_update_vm_gtod_copy(kvm); + + kvm_for_each_vcpu(cpu, vcpu, kvm) + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + + kvm_for_each_vcpu(cpu, vcpu, kvm) + kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); + + spin_unlock(&ka->pvclock_gtod_sync_lock); + } + spin_unlock(&kvm_lock); +#endif +} + static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -6217,6 +6255,9 @@ int kvm_arch_init(void *opaque) kvm_lapic_init(); #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); + + if (x86_hyper_type == X86_HYPER_MS_HYPERV) + set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); #endif return 0; @@ -6229,6 +6270,10 @@ out: void kvm_arch_exit(void) { +#ifdef CONFIG_X86_64 + if (x86_hyper_type == X86_HYPER_MS_HYPERV) + clear_hv_tscchange_cb(); +#endif kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); -- cgit v1.2.3 From 5fa4ec9cb2e6679e2f828033726f758ea314b9c5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 31 Jan 2018 09:41:40 +0100 Subject: x86/kvm: Make it compile on 32bit and with HYPYERVISOR_GUEST=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reenlightment support for hyperv slapped a direct reference to x86_hyper_type into the kvm code which results in the following build failure when CONFIG_HYPERVISOR_GUEST=n: arch/x86/kvm/x86.c:6259:6: error: ‘x86_hyper_type’ undeclared (first use in this function) arch/x86/kvm/x86.c:6259:6: note: each undeclared identifier is reported only once for each function it appears in Use the proper helper function to cure that. The 32bit compile fails because of: arch/x86/kvm/x86.c:5936:13: warning: ‘kvm_hyperv_tsc_notifier’ defined but not used [-Wunused-function] which is a real trainwreck engineering artwork. The callsite is wrapped into #ifdef CONFIG_X86_64, but the function itself has the #ifdef inside the function body. Make the function itself wrapped into the ifdef to cure that. Qualiteee.... Fixes: 0092e4346f49 ("x86/kvm: Support Hyper-V reenlightenment") Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Vitaly Kuznetsov Cc: Paolo Bonzini Cc: Stephen Hemminger Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Haiyang Zhang Cc: "Michael Kelley (EOSG)" Cc: Roman Kagan Cc: Andy Lutomirski Cc: devel@linuxdriverproject.org Cc: "K. Y. Srinivasan" Cc: Cathy Avery Cc: Mohammed Gamal --- arch/x86/kvm/x86.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 879a99987401..cd3b3bc67c5a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5933,9 +5933,9 @@ static void tsc_khz_changed(void *data) __this_cpu_write(cpu_tsc_khz, khz); } +#ifdef CONFIG_X86_64 static void kvm_hyperv_tsc_notifier(void) { -#ifdef CONFIG_X86_64 struct kvm *kvm; struct kvm_vcpu *vcpu; int cpu; @@ -5967,8 +5967,8 @@ static void kvm_hyperv_tsc_notifier(void) spin_unlock(&ka->pvclock_gtod_sync_lock); } spin_unlock(&kvm_lock); -#endif } +#endif static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -6256,7 +6256,7 @@ int kvm_arch_init(void *opaque) #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); - if (x86_hyper_type == X86_HYPER_MS_HYPERV) + if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) set_hv_tscchange_cb(kvm_hyperv_tsc_notifier); #endif @@ -6271,7 +6271,7 @@ out: void kvm_arch_exit(void) { #ifdef CONFIG_X86_64 - if (x86_hyper_type == X86_HYPER_MS_HYPERV) + if (hypervisor_is_type(X86_HYPER_MS_HYPERV)) clear_hv_tscchange_cb(); #endif kvm_lapic_exit(); -- cgit v1.2.3